Merge c53873d into 2708c39

Sean Naren · web-flow · commit b285087d2ced · 2021-03-07T11:24:37.000Z
diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py
@@ -242,8 +242,9 @@ def pre_dispatch(self):
         if self.sync_batchnorm:
             self.model = self.configure_sync_batchnorm(self.model)
 
-        # move the model to the correct device
-        self.model_to_device()
+        if self.call_move_to_device_hook_in_pre_dispatch:
+            # move the model to the correct device
+            self.model_to_device()
 
         self.configure_ddp()
 
@@ -302,3 +303,11 @@ def predict(self, *args, **kwargs):
     def post_training_step(self):
         if not self.lightning_module.automatic_optimization:
             self.model.require_backward_grad_sync = True
+
+    @property
+    def call_move_to_device_hook_in_pre_dispatch(self) -> bool:
+        """
+        Call the ``model_to_device`` function within pre_dispatch if this is set to True.
+        Useful for when plugin would like to call model_to_device at another time, or skip the call.
+        """
+        return True
diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py
@@ -54,7 +54,7 @@ def __init__(
         self.sync_batchnorm = sync_batchnorm
         self._ddp_kwargs = kwargs
         self.dist = LightningDistributed()
-        self.num_processes = len(parallel_devices)
+        self.num_processes = len(parallel_devices) if parallel_devices is not None else None
         self.node_rank = 0
         self.mp_queue = None
 
@@ -146,8 +146,9 @@ def new_process(self, process_idx, trainer, mp_queue):
         if self.sync_batchnorm:
             self.model = self.configure_sync_batchnorm(self.model)
 
-        # move the model to the correct device
-        self.model_to_device()
+        if self.call_move_to_device_hook_in_pre_dispatch:
+            # move the model to the correct device
+            self.model_to_device()
 
         self.configure_ddp()
 
@@ -285,3 +286,11 @@ def predict(self, *args, **kwargs):
     def post_training_step(self):
         if not self.lightning_module.automatic_optimization:
             self.model.require_backward_grad_sync = True
+
+    @property
+    def call_move_to_device_hook_in_pre_dispatch(self) -> bool:
+        """
+        Call the ``model_to_device`` function within pre_dispatch if this is set to True.
+        Useful for when plugin would like to call model_to_device at another time, or skip the call.
+        """
+        return True
diff --git a/tests/accelerators/test_ddp.py b/tests/accelerators/test_ddp.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+from unittest import mock
 from unittest.mock import patch
 
 import pytest
 import torch
 
 from pytorch_lightning import Trainer
+from pytorch_lightning.plugins import DDPPlugin, DDPSpawnPlugin
 from tests.accelerators import ddp_model, DDPLauncher
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.runif import RunIf
@@ -91,7 +93,6 @@ def test_torch_distributed_backend_env_variables(tmpdir):
     _environ = {"PL_TORCH_DISTRIBUTED_BACKEND": "undefined", "CUDA_VISIBLE_DEVICES": "0,1", "WORLD_SIZE": "2"}
     with patch.dict(os.environ, _environ), \
          patch('torch.cuda.device_count', return_value=2):
-
         with pytest.raises(ValueError, match="Invalid backend: 'undefined'"):
             model = BoringModel()
             trainer = Trainer(
@@ -102,3 +103,28 @@ def test_torch_distributed_backend_env_variables(tmpdir):
                 logger=False,
             )
             trainer.fit(model)
+
+
+@pytest.mark.parametrize('move_to_device_pre_dispatch_enabled', [True, False])
+@mock.patch('pytorch_lightning.plugins.DDPPlugin.model_to_device')
+def test_move_to_device_in_pre_dispatch(mock_model_to_device, tmpdir, move_to_device_pre_dispatch_enabled):
+    """
+    Test if ``call_move_to_device_hook_in_pre_dispatch`` is disabled we do not move to device till later
+    in training.
+    """
+
+    with mock.patch(
+        f'pytorch_lightning.plugins.DDPPlugin.call_move_to_device_hook_in_pre_dispatch',
+        move_to_device_pre_dispatch_enabled
+    ):
+        model = BoringModel()
+        trainer = Trainer(
+            default_root_dir=tmpdir, fast_dev_run=True, accelerator='ddp', plugins=DDPPlugin(), num_processes=1
+        )
+        trainer.fit(model)
+
+        # Check if mocked device was called. Since we're on CPU, model_to_device does nothing anyway.
+        if move_to_device_pre_dispatch_enabled:
+            mock_model_to_device.assert_called()
+        else:
+            mock_model_to_device.assert_not_called()