Fix for incorrect usage of detach(), cpu(), to() (Lightning-AI#6216)

dvolgyes · tchaton · Borda · kaushikb11 · commit 7c90913ce54a · 2021-03-02T22:01:57.000+05:30
* Fix for incorrect detach/cpu calls (Lightning-AI#6214) * Fix incorrect use of detach(), to(), and cpu(), Lightning-AI#6214 * Fix incorrect use of detach() and cpu(), Lightning-AI#6214 * update pr * add typing * chlog * more... * revert on module * update on comments * revert changes on model Co-authored-by: tchaton <thomas@grid.ai> Co-authored-by: Jirka Borovec <jirka.borovec@seznam.cz>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -32,6 +32,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed multiple early stopping callbacks ([#6197](https://github.com/PyTorchLightning/pytorch-lightning/pull/6197))
 
 
+- Fixed incorrect usage of `detach()`, `cpu()`, `to()` ([#6216](https://github.com/PyTorchLightning/pytorch-lightning/pull/6216))
+
+
 - Fixed LBFGS optimizer support which didn't converge in automatic optimization ([#6147](https://github.com/PyTorchLightning/pytorch-lightning/pull/6147))
 
 
diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
@@ -416,20 +416,22 @@ def get_batch_pbar_metrics(self, include_forked_originals=True, add_dataloader_i
 
         return result
 
-    def detach(self):
+    def detach(self) -> 'Result':
         for k, v in self.items():
             if isinstance(v, torch.Tensor):
                 self.__setitem__(k, v.detach())
+        return self
 
-    def to(self, *args, **kwargs):
+    def to(self, *args, **kwargs) -> 'Result':
         """Move all self attributes to the given device."""
         for k, v in self.items():
             if isinstance(v, torch.Tensor):
                 self.__setitem__(k, v.to(*args, **kwargs))
+        return self
 
-    def cpu(self):
+    def cpu(self) -> 'Result':
         """Move all self attributes to CPU."""
-        self.to(torch.device("cpu"))
+        return self.to(torch.device("cpu"))
 
     def __repr__(self):
         self_copy = self.copy()
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py
@@ -281,11 +281,11 @@ def cache_result(self) -> None:
             # attach capture batch_size
             Result.attach_batch_size(self._batch_size, hook_result)
 
-            hook_result.detach()
+            hook_result = hook_result.detach()
             if self.trainer.move_metrics_to_cpu:
-                hook_result.cpu()
+                hook_result = hook_result.cpu()
             elif self.trainer._distrib_type == DistributedType.DP:
-                hook_result.to(torch.device("cuda", self.trainer.root_gpu))
+                hook_result = hook_result.to(torch.device("cuda", self.trainer.root_gpu))
 
             self._internals[fx_name].append(hook_result, info)
 
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
@@ -773,9 +773,9 @@ def run_evaluation(self, max_batches=None, on_epoch=False):
     def track_output_for_epoch_end(self, outputs, output):
         if output is not None:
             if isinstance(output, Result):
-                output.detach()
+                output = output.detach()
                 if self.move_metrics_to_cpu:
-                    output.cpu()
+                    output = output.cpu()
             elif isinstance(output, dict):
                 output = recursive_detach(output, to_cpu=self.move_metrics_to_cpu)
             elif isinstance(output, torch.Tensor) and output.is_cuda and self.move_metrics_to_cpu:
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
@@ -263,7 +263,7 @@ def on_after_backward(self, training_step_output, batch_idx, untouched_loss):
         is_result_obj = isinstance(training_step_output, Result)
 
         if is_result_obj:
-            training_step_output.detach()
+            training_step_output = training_step_output.detach()
         else:
             training_step_output.batch_loss = training_step_output.batch_loss.detach()
 
@@ -397,9 +397,9 @@ def _process_training_step_output_1_0(self, training_step_output, split_batch):
 
         # track metrics without grads for epoch reduction
         training_step_output_for_epoch_end = copy(result)
-        training_step_output_for_epoch_end.detach()
+        training_step_output_for_epoch_end = training_step_output_for_epoch_end.detach()
         if self.trainer.move_metrics_to_cpu:
-            training_step_output_for_epoch_end.cpu()
+            training_step_output_for_epoch_end = training_step_output_for_epoch_end.cpu()
 
         # what flows back into the system
         training_step_output = result
diff --git a/tests/overrides/test_data_parallel.py b/tests/overrides/test_data_parallel.py
@@ -144,9 +144,9 @@ def training_step(self, batch, batch_idx):
             output.update({"python scalar": 12.3})
             return output
 
-    model = TestModel()
-    model.to(device)
-    model.running_stage = RunningStage.TRAINING
+    model = TestModel().to(device)
+    model.trainer = MagicMock()
+    model.trainer._running_stage = RunningStage.TRAINING
     batch = torch.rand(2, 32).to(device)
     batch_idx = 0