Update on "Add formulas and basic tests"

albanD · albanD · commit 8a0d8124881a · 2020-12-16T12:15:34.000-05:00
RFC: pytorch/rfcs#11 This PR adds: - Codegen support to define forward grad formulas and few manual formulas - Codegen support to automatically generate formulas as well as few usage - Tests for basic forward grad components Codegen generated examples. For each of them, the only part that is changed is the if statement before the return checking for fw grad defined. - For manual entry: ```yaml - name: max(Tensor self) -> Tensor self: evenly_distribute_backward(grad, self, result) result: max_forward(self_fw_grad, self, result) ``` ```cpp Tensor max(const Tensor & self) { auto& self_ = unpack(self, "self", 0); auto _any_requires_grad = compute_requires_grad( self ); std::shared_ptr<MaxBackward1> grad_fn; if (_any_requires_grad) { grad_fn = std::shared_ptr<MaxBackward1>(new MaxBackward1(), deleteNode); grad_fn->set_next_edges(collect_next_edges( self )); grad_fn->self_ = SavedVariable(self, false); } #ifndef NDEBUG c10::optional<Storage> self__storage_saved = self_.has_storage() ? c10::optional<Storage>(self_.storage()) : c10::nullopt; c10::intrusive_ptr<TensorImpl> self__impl_saved; if (self_.defined()) self__impl_saved = self_.getIntrusivePtr(); #endif auto tmp = ([&]() { at::AutoNonVariableTypeMode non_var_type_mode(true); return at::max(self_); })(); auto result = std::move(tmp); #ifndef NDEBUG if (self__storage_saved.has_value()) AT_ASSERT(self__storage_saved.value().is_alias_of(self_.storage())); if (self__impl_saved) AT_ASSERT(self__impl_saved == self_.getIntrusivePtr()); #endif if (grad_fn) { set_history(flatten_tensor_args( result ), grad_fn); } throw_error_for_complex_autograd(result, "max"); if (isFwGradDefined(self)) { auto self_fw_grad = toLegacyFwGrad(self); auto self_primal = toLegacyPrimal(self); auto result_new_fw_grad = max_forward(self_fw_grad, self_primal, result); if (result_new_fw_grad.defined()) { result.set_fw_grad(result_new_fw_grad, /* level */ 0, /* is_inplace_op */ false); } } if (grad_fn) { grad_fn->result_ = SavedVariable(result, true); } return result; } ``` - For element wise entry: ```yaml - name: abs(Tensor self) -> Tensor self: grad * self.sgn() result: auto_element_wise ``` ```cpp Tensor abs(const Tensor & self) { auto& self_ = unpack(self, "self", 0); auto _any_requires_grad = compute_requires_grad( self ); std::shared_ptr<AbsBackward> grad_fn; if (_any_requires_grad) { grad_fn = std::shared_ptr<AbsBackward>(new AbsBackward(), deleteNode); grad_fn->set_next_edges(collect_next_edges( self )); grad_fn->self_ = SavedVariable(self, false); } #ifndef NDEBUG c10::optional<Storage> self__storage_saved = self_.has_storage() ? c10::optional<Storage>(self_.storage()) : c10::nullopt; c10::intrusive_ptr<TensorImpl> self__impl_saved; if (self_.defined()) self__impl_saved = self_.getIntrusivePtr(); #endif auto tmp = ([&]() { at::AutoNonVariableTypeMode non_var_type_mode(true); return at::abs(self_); })(); auto result = std::move(tmp); #ifndef NDEBUG if (self__storage_saved.has_value()) AT_ASSERT(self__storage_saved.value().is_alias_of(self_.storage())); if (self__impl_saved) AT_ASSERT(self__impl_saved == self_.getIntrusivePtr()); #endif if (grad_fn) { set_history(flatten_tensor_args( result ), grad_fn); } throw_error_for_complex_autograd(result, "abs"); if (isFwGradDefined(self)) { auto self_fw_grad = toLegacyFwGrad(self); auto self_primal = toLegacyPrimal(self); auto result_new_fw_grad = self_fw_grad * self_primal.sgn(); if (result_new_fw_grad.defined()) { result.set_fw_grad(result_new_fw_grad, /* level */ 0, /* is_inplace_op */ false); } } return result; } ``` - For linear entry: ```yaml - name: clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor self: grad result: auto_linear ``` ```cpp Tensor clone(const Tensor & self, c10::optional<MemoryFormat> memory_format) { auto& self_ = unpack(self, "self", 0); auto _any_requires_grad = compute_requires_grad( self ); std::shared_ptr<CloneBackward> grad_fn; if (_any_requires_grad) { grad_fn = std::shared_ptr<CloneBackward>(new CloneBackward(), deleteNode); grad_fn->set_next_edges(collect_next_edges( self )); } #ifndef NDEBUG c10::optional<Storage> self__storage_saved = self_.has_storage() ? c10::optional<Storage>(self_.storage()) : c10::nullopt; c10::intrusive_ptr<TensorImpl> self__impl_saved; if (self_.defined()) self__impl_saved = self_.getIntrusivePtr(); #endif auto tmp = ([&]() { at::AutoNonVariableTypeMode non_var_type_mode(true); return at::clone(self_, memory_format); })(); auto result = std::move(tmp); #ifndef NDEBUG if (self__storage_saved.has_value()) AT_ASSERT(self__storage_saved.value().is_alias_of(self_.storage())); if (self__impl_saved) AT_ASSERT(self__impl_saved == self_.getIntrusivePtr()); #endif if (grad_fn) { set_history(flatten_tensor_args( result ), grad_fn); } if (isFwGradDefined(self)) { auto self_fw_grad = toLegacyFwGrad(self); auto result_new_fw_grad = at::clone(self_fw_grad, memory_format); if (result_new_fw_grad.defined()) { result.set_fw_grad(result_new_fw_grad, /* level */ 0, /* is_inplace_op */ false); } } return result; } ``` - For no entry: ```yaml - name: angle(Tensor self) -> Tensor self: angle_backward(grad, self) ``` ```cpp Tensor angle(const Tensor & self) { auto& self_ = unpack(self, "self", 0); auto _any_requires_grad = compute_requires_grad( self ); std::shared_ptr<AngleBackward> grad_fn; if (_any_requires_grad) { grad_fn = std::shared_ptr<AngleBackward>(new AngleBackward(), deleteNode); grad_fn->set_next_edges(collect_next_edges( self )); grad_fn->self_ = SavedVariable(self, false); } #ifndef NDEBUG c10::optional<Storage> self__storage_saved = self_.has_storage() ? c10::optional<Storage>(self_.storage()) : c10::nullopt; c10::intrusive_ptr<TensorImpl> self__impl_saved; if (self_.defined()) self__impl_saved = self_.getIntrusivePtr(); #endif auto tmp = ([&]() { at::AutoNonVariableTypeMode non_var_type_mode(true); return at::angle(self_); })(); auto result = std::move(tmp); #ifndef NDEBUG if (self__storage_saved.has_value()) AT_ASSERT(self__storage_saved.value().is_alias_of(self_.storage())); if (self__impl_saved) AT_ASSERT(self__impl_saved == self_.getIntrusivePtr()); #endif if (grad_fn) { set_history(flatten_tensor_args( result ), grad_fn); } throw_error_for_complex_autograd(result, "angle"); TORCH_CHECK(!(isFwGradDefined(self)), "Trying to use forward prop with angle that does not support it."); return result; } ``` [ghstack-poisoned]
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -111,6 +111,14 @@
   dispatch:
     DefaultBackend: _fw_primal
 
+- func: make_dual(Tensor(a) primal, Tensor tangent, int level) -> Tensor(a)
+  use_c10_dispatcher: full
+  variants: function
+
+- func: unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent)
+  use_c10_dispatcher: full
+  variants: function
+
 - func: rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!)
   variants: method
 
diff --git a/aten/src/ATen/templates/TensorBody.h b/aten/src/ATen/templates/TensorBody.h
@@ -609,7 +609,7 @@ class CAFFE2_API Tensor {
 
   /// This function can be used to set the value of the forward grad.
   /// Note that the given value might not be used directly if it is a view of another Tensor.
-  void set_fw_grad(Tensor& new_grad, uint64_t level, bool is_inplace_op) {
+  void set_fw_grad(const Tensor& new_grad, uint64_t level, bool is_inplace_op) {
     impl_->set_fw_grad(new_grad, *this, level, is_inplace_op);
   }
 
diff --git a/c10/core/TensorImpl.cpp b/c10/core/TensorImpl.cpp
@@ -50,7 +50,7 @@ const at::Tensor& TensorImpl::fw_grad(uint64_t level, const at::Tensor& self) co
   return autograd_meta_->fw_grad(level, self);
 }
 
-void TensorImpl::set_fw_grad(at::Tensor& new_grad, const at::Tensor& self, uint64_t level, bool is_inplace_op) {
+void TensorImpl::set_fw_grad(const at::Tensor& new_grad, const at::Tensor& self, uint64_t level, bool is_inplace_op) {
   if (!autograd_meta_) autograd_meta_ = impl::GetAutogradMetaFactory()->make();
   autograd_meta_->set_fw_grad(new_grad, self, level, is_inplace_op);
 }
diff --git a/c10/core/TensorImpl.h b/c10/core/TensorImpl.h
@@ -137,7 +137,7 @@ struct C10_API AutogradMetaInterface {
   virtual at::Tensor& mutable_grad() = 0;
   virtual const at::Tensor& grad() const = 0;
   virtual const at::Tensor& fw_grad(uint64_t level, const at::Tensor& self) const = 0;
-  virtual void set_fw_grad(at::Tensor& new_grad, const at::Tensor& self, uint64_t level, bool is_inplace_op) = 0;
+  virtual void set_fw_grad(const at::Tensor& new_grad, const at::Tensor& self, uint64_t level, bool is_inplace_op) = 0;
   virtual ~AutogradMetaInterface();
 };
 
@@ -634,7 +634,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
    *   - "is_inplace_op" is a boolean flag that tells if this gradient was generated
    *     by an inplace operation or an out of place one. This allows better error checking.
    */
-  void set_fw_grad(at::Tensor& new_grad, const at::Tensor& self, uint64_t level, bool is_inplace_op);
+  void set_fw_grad(const at::Tensor& new_grad, const at::Tensor& self, uint64_t level, bool is_inplace_op);
 
   /**
    * Return a typed data pointer to the actual data which this tensor refers to.
diff --git a/torch/autograd/forward_ad.py b/torch/autograd/forward_ad.py
@@ -65,7 +65,7 @@ def make_dual(tensor, tangent, *, level=None):
         raise RuntimeError("Trying to create a dual Tensor for forward AD but no level "
                            "exists, make sure to enter_dual_level() first.")
 
-    return torch._C._make_dual(tensor, tangent, level=level)
+    return torch.make_dual(tensor, tangent, level=level)
 
 def unpack_dual(tensor, *, level=None):
     r"""Function that unpacks a "dual object" to recover two plain tensors, one representing
@@ -80,7 +80,7 @@ def unpack_dual(tensor, *, level=None):
     if level < 0:
         return tensor, None
 
-    return torch._C._unpack_dual(tensor, level=level)
+    return torch.unpack_dual(tensor, level=level)
 
 class dual_level(_DecoratorContextManager):
     r"""Context-manager that controls the current forward ad level. It
diff --git a/torch/csrc/autograd/autograd.cpp b/torch/csrc/autograd/autograd.cpp
@@ -166,19 +166,6 @@ void exit_dual_level(uint64_t level) {
   ForwardADLevel::release_idx(level);
 }
 
-at::Tensor make_dual(const at::Tensor& primal, at::Tensor tangent, uint64_t level) {
-  TORCH_CHECK(!primal.fw_grad(level).defined(), "Making a dual Tensor based on a Tensor that "
-              "already has a forward gradient at the same level ", level, " is not supported.");
-
-  auto dual_tensor = primal.view(primal.sizes());
-  dual_tensor.set_fw_grad(tangent, level, /* is_inplace_op */ false);
-  return dual_tensor;
-}
-
-std::pair<at::Tensor, at::Tensor> unpack_dual(const at::Tensor& tensor, uint64_t level) {
-  return {tensor._fw_primal(level), tensor.fw_grad(level)};
-}
-
 } // namespace forward_ad
 
 } // namespace autograd
diff --git a/torch/csrc/autograd/autograd.h b/torch/csrc/autograd/autograd.h
@@ -89,15 +89,6 @@ TORCH_API uint64_t enter_dual_level();
 /// reverse order compared to the entering that was done with the function above.
 TORCH_API void exit_dual_level(uint64_t level);
 
-/// This function can be used to create a dual Tensor that holds a tangent to compute forward mode gradients.
-/// Note that the dual Tensor's primal is a view of the given primal and the given tangent is used as-is.
-/// This function is backward differentiable.
-TORCH_API at::Tensor make_dual(const at::Tensor& primal, at::Tensor tangent, uint64_t level);
-/// This function can be used to unpack a given dual Tensor to get its primal and tangent. The returned primal
-/// is a view of the dual and the tangent is returned as is.
-/// This function is backward differentiable.
-TORCH_API std::pair<at::Tensor, at::Tensor> unpack_dual(const at::Tensor& tensor, uint64_t level);
-
 } // namespace forward_ad
 } // namespace autograd
 } // namespace torch
diff --git a/torch/csrc/autograd/init.cpp b/torch/csrc/autograd/init.cpp
@@ -317,32 +317,6 @@ static PyObject * python_exit_dual_level(PyObject* _unused, PyObject* args, PyOb
   END_HANDLE_TH_ERRORS
 }
 
-static PyObject * python_make_dual(PyObject* _unused, PyObject* args, PyObject* kwargs) {
-  HANDLE_TH_ERRORS
-  static PythonArgParser parser({
-    "make_dual(Tensor tensor, Tensor tangent, *, int64_t level)"
-  });
-
-  ParsedArgs<3> parsed_args;
-  auto _r = parser.parse(args, kwargs, parsed_args);
-
-  return utils::wrap(forward_ad::make_dual(_r.tensor(0), _r.tensor(1), _r.toInt64(2)));
-  END_HANDLE_TH_ERRORS
-}
-
-static PyObject * python_unpack_dual(PyObject* _unused, PyObject* args, PyObject* kwargs) {
-  HANDLE_TH_ERRORS
-  static PythonArgParser parser({
-    "unpack_dual(Tensor tensor, *, int64_t level)"
-  });
-
-  ParsedArgs<2> parsed_args;
-  auto _r = parser.parse(args, kwargs, parsed_args);
-
-  return utils::wrap(forward_ad::unpack_dual(_r.tensor(0), _r.toInt64(1)));
-  END_HANDLE_TH_ERRORS
-}
-
 // autograd methods on torch._C
 static PyMethodDef methods[] = { // NOLINT
   {"_set_grad_enabled", set_grad_enabled, METH_O, nullptr},
@@ -356,8 +330,6 @@ static PyMethodDef methods[] = { // NOLINT
   {"autocast_decrement_nesting", autocast_decrement_nesting, METH_NOARGS, nullptr},
   {"set_anomaly_enabled", set_anomaly_mode_enabled, METH_O, nullptr},
   {"is_anomaly_enabled", is_anomaly_mode_enabled, METH_NOARGS, nullptr},
-  {"_make_dual", castPyCFunctionWithKeywords(python_make_dual), METH_VARARGS | METH_KEYWORDS, nullptr},
-  {"_unpack_dual", castPyCFunctionWithKeywords(python_unpack_dual), METH_VARARGS | METH_KEYWORDS, nullptr},
   {"_enter_dual_level", python_enter_dual_level, METH_NOARGS, nullptr},
   {"_exit_dual_level", castPyCFunctionWithKeywords(python_exit_dual_level), METH_VARARGS | METH_KEYWORDS, nullptr},
   {nullptr, nullptr, 0, nullptr}
diff --git a/torch/csrc/autograd/variable.cpp b/torch/csrc/autograd/variable.cpp
@@ -594,23 +594,26 @@ namespace {
 
 // This function is will ensure that the fw_grad_ is properly a view of the base for inplace ops on
 // Tensors that do not have forward grad originally.
-void AutogradMeta::set_fw_grad(Variable& new_grad, const Variable& self, uint64_t level, bool is_inplace_op) {
+void AutogradMeta::set_fw_grad(const Variable& new_grad_, const Variable& self, uint64_t level, bool is_inplace_op) {
   if (!fw_grad_) {
     // Lazy initialization
     fw_grad_ = std::make_shared<ForwardGrad>();
   }
   if (fw_grad_->contains(level)) {
     // Setting the forward grad again is only allowed if it is a no-op.
     // We do allow this case to simplify writing codegen for inplace ops.
-    TORCH_INTERNAL_ASSERT(new_grad.defined(), "Cannot set a forward grad that is an undefined Tensor. Use "
+    TORCH_INTERNAL_ASSERT(new_grad_.defined(), "Cannot set a forward grad that is an undefined Tensor. Use "
                           "_fw_primal(level) to get a new Tensor with this forward grad unset.");
 
     TORCH_INTERNAL_ASSERT(is_inplace_op, "Only inplace operations can re-set the forward grad of a Tensor that "
                           "already has one.");
 
-    TORCH_INTERNAL_ASSERT(fw_grad_->value(level).is_same(new_grad), "Cannot set a value of a forward grad if it "
+    TORCH_INTERNAL_ASSERT(fw_grad_->value(level).is_same(new_grad_), "Cannot set a value of a forward grad if it "
                           "already exists. Inplace operations should modify it inplace.");
   } else {
+    // TODO(alband) remove this spurious version counter bump
+    auto new_grad = new_grad_;
+
     // For inplace ops on a Tensor that does not already have a forward grad and is a view, we propagate
     // the tangent to the base and ensure that the new_grad is a view of that base's tangent.
     if (is_inplace_op && is_view_) {
diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h
@@ -244,7 +244,7 @@ struct TORCH_API AutogradMeta : public c10::AutogradMetaInterface {
 
   const Variable& fw_grad(uint64_t level, const Variable& self) const override;
 
-  void set_fw_grad(Variable& new_grad, const Variable& self, uint64_t level, bool is_inplace_op) override;
+  void set_fw_grad(const Variable& new_grad, const Variable& self, uint64_t level, bool is_inplace_op) override;
 
   AutogradMeta(at::TensorImpl* self_impl = nullptr, bool requires_grad = false, Edge gradient_edge = Edge() ) {
     grad_fn_ = std::move(gradient_edge.function);

Original file line number	Diff line number	Diff line change
`@@ -609,7 +609,7 @@ class CAFFE2_API Tensor {`
`609`	`609`
`610`	`610`	`/// This function can be used to set the value of the forward grad.`
`611`	`611`	`/// Note that the given value might not be used directly if it is a view of another Tensor.`
`612`		`- void set_fw_grad(Tensor& new_grad, uint64_t level, bool is_inplace_op) {`
	`612`	`+ void set_fw_grad(const Tensor& new_grad, uint64_t level, bool is_inplace_op) {`
`613`	`613`	`impl_->set_fw_grad(new_grad, *this, level, is_inplace_op);`
`614`	`614`	`}`
`615`	`615`
Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@ const at::Tensor& TensorImpl::fw_grad(uint64_t level, const at::Tensor& self) co`
`50`	`50`	`return autograd_meta_->fw_grad(level, self);`
`51`	`51`	`}`
`52`	`52`
`53`		`-void TensorImpl::set_fw_grad(at::Tensor& new_grad, const at::Tensor& self, uint64_t level, bool is_inplace_op) {`
	`53`	`+void TensorImpl::set_fw_grad(const at::Tensor& new_grad, const at::Tensor& self, uint64_t level, bool is_inplace_op) {`
`54`	`54`	`if (!autograd_meta_) autograd_meta_ = impl::GetAutogradMetaFactory()->make();`
`55`	`55`	`autograd_meta_->set_fw_grad(new_grad, self, level, is_inplace_op);`
`56`	`56`	`}`