From fe437edd0d4ac811deae55a9972633f19a8b23ad Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Tue, 9 Mar 2021 13:04:11 +0000 Subject: [PATCH 1/2] Set find unused parameters to True by default to fix breaking models, add suggestion to re-enable --- docs/source/benchmarking/performance.rst | 15 +++++++++++++++ pytorch_lightning/plugins/training_type/ddp.py | 7 +++++++ .../plugins/training_type/ddp_spawn.py | 7 +++++++ 3 files changed, 29 insertions(+) diff --git a/docs/source/benchmarking/performance.rst b/docs/source/benchmarking/performance.rst index 5f89c759e49bc..d1bc2c9ebc009 100644 --- a/docs/source/benchmarking/performance.rst +++ b/docs/source/benchmarking/performance.rst @@ -94,6 +94,21 @@ DP performs three GPU transfers for EVERY batch: Whereas DDP only performs 1 transfer to sync gradients. Because of this, DDP is MUCH faster than DP. +When using DDP set find_unused_parameters=False +----------------------------------------------- + +By default we have enabled find unused parameters to True. This is for compatibility issues that have arisen in the past (see the `discussion `_ for more information). +This by default comes with a performance hit, and can be disabled in most cases. + +.. code-block:: python + + from pytorch_lightning.plugins import DDPPlugin + + trainer = pl.Trainer( + gpus=2, + plugins=DDPPlugin(find_unused_parameters=False), + ) + ---------- 16-bit precision diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index 3e6c618fcf4e2..f857ad50399cf 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -175,6 +175,13 @@ def set_world_ranks(self): self.world_size = self.num_nodes * self.num_processes def pre_configure_ddp(self): + # if unset, default `find_unused_parameters` `True` + # Many models require setting this parameter to True, as there are corner cases + # when not all parameter backward hooks are fired by the autograd engine even if require_grad is set to True. + # This flag does come with a performance hit, so it is suggested to disable in cases where it is possible. + self._ddp_kwargs["find_unused_parameters"] = self._ddp_kwargs.get( + "find_unused_parameters", True + ) # todo: PyTorch 1.7.0 DDP introduces ``self.reducer._rebuild_buckets()`` breaking manual_optimization if _TORCH_GREATER_EQUAL_1_7 and not self.lightning_module.automatic_optimization and not self._ddp_kwargs.get( "find_unused_parameters", False diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index 9f90ca2cf825b..3636b2fb92fa2 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -168,6 +168,13 @@ def post_dispatch(self): self.__recover_child_process_weights(best_path, last_path) def pre_configure_ddp(self): + # if unset, default `find_unused_parameters` `True` + # Many models require setting this parameter to True, as there are corner cases + # when not all parameter backward hooks are fired by the autograd engine even if require_grad is set to True. + # This flag does come with a performance hit, so it is suggested to disable in cases where it is possible. + self._ddp_kwargs["find_unused_parameters"] = self._ddp_kwargs.get( + "find_unused_parameters", True + ) # todo: PyTorch 1.7.0 DDP introduces ``self.reducer._rebuild_buckets()`` breaking manual_optimization if _TORCH_GREATER_EQUAL_1_7 and not self.lightning_module.automatic_optimization and not self._ddp_kwargs.get( "find_unused_parameters", False From f09c7693054405b81293a555c324725925cb9184 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Tue, 9 Mar 2021 13:11:13 +0000 Subject: [PATCH 2/2] Add changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1c347c00a3f1..ac43223ed39bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed `setup()` and `teardown()` stage argument to take any of `{fit,validate,test,predict}` ([#6386](https://github.com/PyTorchLightning/pytorch-lightning/pull/6386)) +- Changed the default of `find_unused_parameters` back to `True` in DDP and DDP Spawn ([#6438](https://github.com/PyTorchLightning/pytorch-lightning/pull/6438)) + + ### Deprecated