Lightning-AI · carmocca · Mar 30, 2021 · Mar 29, 2021 · Mar 29, 2021 · Mar 29, 2021
@@ -165,6 +165,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed legacy code to include `step` dictionary returns in `callback_metrics`. Use `self.log_dict` instead. ([#6682](https://github.com/PyTorchLightning/pytorch-lightning/pull/6682))
 
 
+- Removed legacy code to log or include metrics in the progress bar by returning them in a dict with the `"log"/"progress_bar"` magic keys. Use `self.log` instead ([#6734](https://github.com/PyTorchLightning/pytorch-lightning/pull/6734))
+
+
 - Removed `optimizer_idx` argument from `training_step` in manual optimization ([#6093](https://github.com/PyTorchLightning/pytorch-lightning/pull/6093))
 
 

@@ -751,13 +751,8 @@ be customized with PyTorch Lightning since every NeMo model is a LightningModule
 
             l_mle, l_length, logdet, loss, _ = self.step(y, y_lengths, x, x_lengths)
 
-            output = {
-                "loss": loss,  # required
-                "progress_bar": {"l_mle": l_mle, "l_length": l_length, "logdet": logdet},
-                "log": {"loss": loss, "l_mle": l_mle, "l_length": l_length, "logdet": logdet},
-            }
-
-            return output
+            self.log_dict({"l_mle": l_mle, "l_length": l_length, "logdet": logdet}, prog_bar=True)
+            return loss
         ...
 
 Neural Types in NeMo TTS

@@ -526,10 +526,6 @@ def reduce_across_time(cls, time_outputs):
         # auto-reduce across time for tbptt
         meta = time_outputs[0]['meta']
 
-        # in 1.0 the results have 'extra'. Once we deprecate 0.10.0 we may not need this
-        if 'extra' in time_outputs[0]:
-            [x.pop('extra', None) for x in time_outputs]
-
         result = cls()
         result = recursive_gather(time_outputs, result)
         recursive_stack(result)

@@ -394,12 +394,10 @@ def run_batch_from_func_name(self, func_name) -> Dict:
 
     def get_latest_batch_log_metrics(self) -> Dict:
         batch_log_metrics = self.run_batch_from_func_name("get_batch_log_metrics")
-        batch_log_metrics.update(self.legacy_batch_log_metrics)
         return batch_log_metrics
 
     def get_latest_batch_pbar_metrics(self) -> Dict:
         batch_pbar_metrics = self.run_batch_from_func_name("get_batch_pbar_metrics")
-        batch_pbar_metrics.update(self.legacy_batch_pbar_metrics)
         return batch_pbar_metrics
 
     @property
@@ -451,8 +449,6 @@ def reset(self):
         self._opt_idx: Optional[int] = None
         self._batch_size: Optional[int] = None
         self._has_batch_loop_finished = False
-        self.legacy_batch_log_metrics = {}
-        self.legacy_batch_pbar_metrics = {}
 
     def __call__(
         self,

@@ -191,10 +191,7 @@ def cache_training_step_metrics(self, opt_closure_result):
             self.add_progress_bar_metrics(pbar_metrics_tmp)
 
         self._callback_metrics.update(callback_metrics_tmp)
-
-        # save legacy log metrics
         self._logged_metrics.update(logged_metrics_tmp)
-        self.cached_results.legacy_batch_log_metrics.update(logged_metrics_tmp)
 
     def log_metrics(self, metrics, grad_norm_dic, step=None):
         """Logs the metric dict passed in.

@@ -12,25 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import inspect
 from abc import ABC
-from collections import Mapping
 
 import torch
 
-from pytorch_lightning.utilities import DistributedType
-from pytorch_lightning.utilities.distributed import rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.memory import recursive_detach
 
 
 class TrainerLoggingMixin(ABC):
 
-    # this is just a summary on variables used in this abstract class,
-    #  the proper values/initialisation should be done in child class
-    _distrib_type: DistributedType
-    num_gpus: int
-
     def metrics_to_scalars(self, metrics):
         new_metrics = {}
         # TODO: this is duplicated in MetricsHolder. should be unified
@@ -49,128 +39,3 @@ def metrics_to_scalars(self, metrics):
             new_metrics[k] = v
 
         return new_metrics
-
-    def process_dict_result(self, output, train=False):
-        """Reduces output according to the training mode.
-
-        Separates loss from logging and progress bar metrics
-        """
-        # --------------------
-        # WARN DEPRECATED KEYS
-        # --------------------
-        # TODO: 1.0.0 remove
-        if isinstance(output, dict):
-            for k, v in output.items():
-                if k in ['log', 'progress_bar']:
-                    m = inspect.cleandoc(
-                        f"The {{{k}:dict keyword}} was deprecated in 0.9.1 and will be removed in 1.0.0\n"
-                        " Please use self.log(...) inside the lightningModule instead.\n"
-                        " # log on a step or aggregate epoch metric to the logger and/or progress bar"
-                        " (inside LightningModule)\n"
-                        " self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)"
-                    )
-                    rank_zero_warn(m)
-
-        # --------------------------
-        # handle single scalar only
-        # --------------------------
-        # single scalar returned from a xx_step
-        if isinstance(output, torch.Tensor):
-            return output, {}, {}, None
-
-        # ---------------
-        # EXTRACT PROGRESS BAR KEYS
-        # ---------------
-        try:
-            progress_output = output['progress_bar']
-
-            # reduce progress metrics for progress bar when using dp
-            if train and self._distrib_type in (DistributedType.DP, DistributedType.DDP2):
-                num_gpus = self.num_gpus
-                progress_output = self.reduce_distributed_output(progress_output, num_gpus)
-
-            progress_bar_metrics = progress_output
-        # todo: specify the possible exception
-        except Exception:
-            progress_bar_metrics = {}
-
-        # ---------------
-        # EXTRACT LOGGING KEYS
-        # ---------------
-        # extract metrics to log to experiment
-        try:
-            log_output = output['log']
-
-            # reduce progress metrics for progress bar when using dp
-            if train and self._distrib_type in (DistributedType.DP, DistributedType.DDP2):
-                num_gpus = self.num_gpus
-                log_output = self.reduce_distributed_output(log_output, num_gpus)
-
-            log_metrics = log_output
-        # todo: specify the possible exception
-        except Exception:
-            log_metrics = {}
-
-        # ---------------
-        # EXTRACT LOSS
-        # ---------------
-        # if output dict doesn't have the keyword loss
-        # then assume the output=loss if scalar
-        loss = None
-        if train:
-            try:
-                loss = output['loss']
-            # todo: specify the possible exception
-            except Exception as exp:
-                if isinstance(output, torch.Tensor):
-                    loss = output
-                else:
-                    raise RuntimeError(
-                        'No `loss` value in the dictionary returned from `model.training_step()`.'
-                    ) from exp
-
-            # when using dp need to reduce the loss
-            if self._distrib_type in (DistributedType.DP, DistributedType.DDP2):
-                loss = self.reduce_distributed_output(loss, self.num_gpus)
-
-        # ---------------
-        # EXTRACT HIDDEN
-        # ---------------
-        hiddens = output.get('hiddens', None) if isinstance(output, Mapping) else None
-        if hiddens is not None:
-            hiddens = hiddens.detach()
-
-        # detach all metrics for callbacks to prevent memory leaks
-        # no .item() because it will slow things down
-        progress_bar_metrics = recursive_detach(progress_bar_metrics)
-        log_metrics = recursive_detach(log_metrics)
-
-        return loss, progress_bar_metrics, log_metrics, hiddens
-
-    def reduce_distributed_output(self, output, num_gpus):
-        if num_gpus <= 1:
-            return output
-
-        # when using DP, we get one output per gpu
-        # average outputs and return
-        if isinstance(output, torch.Tensor):
-            return output.mean()
-
-        for k, v in output.items():
-            # recurse on nested dics
-            if isinstance(output[k], dict):
-                output[k] = self.reduce_distributed_output(output[k], num_gpus)
-
-            # compute the average of scalars
-            elif isinstance(output[k], list):
-                output[k] = sum(output[k]) / len(output[k])
-
-            # do nothing when there's a scalar
-            elif isinstance(output[k], torch.Tensor) and output[k].dim() == 0:
-                pass
-
-            # do not reduce metrics that have batch size > num gpus
-            elif output[k].size(0) <= num_gpus:
-                output[k] = torch.mean(output[k])
-
-        return output
@@ -28,7 +28,6 @@
 from pytorch_lightning.utilities import _TPU_AVAILABLE, AMPType, DeviceType, parsing
 from pytorch_lightning.utilities.distributed import rank_zero_info
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.memory import recursive_detach
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.parsing import AttributeDict
 from pytorch_lightning.utilities.warnings import WarningCache
@@ -242,12 +241,7 @@ def get_optimizers_iterable(self):
         return [[opt_idx, self.trainer.optimizers[opt_idx]]]
 
     def on_after_backward(self, training_step_output, batch_idx, untouched_loss):
-        is_result_obj = isinstance(training_step_output, Result)
-
-        if is_result_obj:
-            training_step_output = training_step_output.detach()
-        else:
-            training_step_output.batch_loss = training_step_output.batch_loss.detach()
+        training_step_output.detach()
 
         # insert after step hook
         self.trainer.call_hook("on_after_backward")
@@ -284,24 +278,16 @@ def training_step(self, split_batch, batch_idx, opt_idx, hiddens):
             training_step_output_for_epoch_end, training_step_output = self._process_training_step_output(
                 training_step_output, split_batch
             )
-            is_result_obj = isinstance(training_step_output, Result)
-
             if training_step_output_for_epoch_end is None:
-                return None
+                return
 
         # enable empty loss when using manual opt
         closure_loss = None
         untouched_loss = None
 
         if self.automatic_optimization:
-            # accumulate loss
-            # (if accumulate_grad_batches = 1 no effect)
-            if is_result_obj:
-                closure_loss = training_step_output.minimize
-            else:
-                closure_loss = training_step_output.batch_loss
-
-            closure_loss = closure_loss / self.trainer.accumulate_grad_batches
+            # accumulate loss. if accumulate_grad_batches==1, no effect
+            closure_loss = training_step_output.minimize / self.trainer.accumulate_grad_batches
 
             # the loss will get scaled for amp. avoid any modifications to it
             untouched_loss = closure_loss.detach().clone()
@@ -322,35 +308,6 @@ def _process_training_step_output(self, training_step_output, split_batch):
         if training_step_output_for_epoch_end is None:
             return None, None
 
-        # -----------------------------------------
-        # process hybrid (1.0)
-        # -----------------------------------------
-        # no need for these checks in 1.0.0
-        # TODO: remove checks in 1.0.0
-        is_tensor = isinstance(training_step_output_for_epoch_end, torch.Tensor)
-        is_1_0_output = is_tensor or ("log" not in training_step_output and "progress_bar" not in training_step_output)
-        if is_1_0_output:
-            return self._process_training_step_output_1_0(training_step_output, split_batch)
-
-        # -----------------------------------------
-        # process old dict (deprecate 1.0)
-        # -----------------------------------------
-        training_step_output = self.trainer.process_dict_result(training_step_output, train=True)
-
-        training_step_output = AttributeDict(
-            batch_loss=training_step_output[0],
-            pbar_on_batch_end=training_step_output[1],
-            log_metrics=training_step_output[2],
-        )
-        # if the user decides to finally reduce things in epoch_end, save raw output without graphs
-        if isinstance(training_step_output_for_epoch_end, torch.Tensor):
-            training_step_output_for_epoch_end = training_step_output_for_epoch_end.detach()
-        else:
-            training_step_output_for_epoch_end = recursive_detach(training_step_output_for_epoch_end)
-
-        return training_step_output_for_epoch_end, training_step_output
-
-    def _process_training_step_output_1_0(self, training_step_output, split_batch):
         result = self.trainer.lightning_module._results
 
         loss = None
@@ -361,6 +318,8 @@ def _process_training_step_output_1_0(self, training_step_output, split_batch):
         if isinstance(training_step_output, dict):
             loss = training_step_output.pop("loss", None)
             hiddens = training_step_output.pop("hiddens", None)
+            if hiddens is not None:
+                hiddens = hiddens.detach()
             result["extra"] = training_step_output
 
         # handle scalar return
@@ -380,10 +339,7 @@ def _process_training_step_output_1_0(self, training_step_output, split_batch):
         if self.trainer.move_metrics_to_cpu:
             training_step_output_for_epoch_end = training_step_output_for_epoch_end.cpu()
 
-        # what flows back into the system
-        training_step_output = result
-
-        return training_step_output_for_epoch_end, training_step_output
+        return training_step_output_for_epoch_end, result
 
     def optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_closure):
         model_ref = self.trainer.lightning_module

@@ -876,7 +876,6 @@ def validation_epoch_end(self, outputs):
     assert trainer.dev_debugger.checkpoint_callback_history[-1]['epoch'] == len(monitor) - 1
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 def test_checkpoint_repeated_strategy(tmpdir):
     """
     This test validates that the checkpoint can be called when provided to callbacks list
@@ -923,7 +922,6 @@ def validation_step(self, batch, batch_idx):
     assert set(os.listdir(tmpdir.join("lightning_logs"))) == {f'version_{i}' for i in range(4)}
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 def test_checkpoint_repeated_strategy_extended(tmpdir):
     """
     This test validates checkpoint can be called several times without
Original file line number	Diff line number	Diff line change
Expand Up		@@ -165,6 +165,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
		- Removed legacy code to include `step` dictionary returns in `callback_metrics`. Use `self.log_dict` instead. ([#6682](https://github.com/PyTorchLightning/pytorch-lightning/pull/6682))


		- Removed legacy code to log or include metrics in the progress bar by returning them in a dict with the `"log"/"progress_bar"` magic keys. Use `self.log` instead ([#6734](https://github.com/PyTorchLightning/pytorch-lightning/pull/6734))


		- Removed `optimizer_idx` argument from `training_step` in manual optimization ([#6093](https://github.com/PyTorchLightning/pytorch-lightning/pull/6093))


Expand Down