Enable self.device access in setup hook (#18021)

awaelchli · carmocca · commit 817ffe389e3e · 2023-07-10T18:20:26.000+02:00
diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md
@@ -111,6 +111,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Dropped support for `wandb` versions older than 0.12.0 in `WandbLogger` ([#17876](https://github.com/Lightning-AI/lightning/pull/17876))
 
 
+- During `LightningModule.setup()`, the `self.device` now returns the device the module will be placed on instead of `cpu` ([#18021](https://github.com/Lightning-AI/lightning/pull/18021))
+
+
 ### Deprecated
 
 - Deprecated the `SingleTPUStrategy` (`strategy="single_tpu"`) in favor of `SingleDeviceXLAStrategy` (`strategy="single_xla"`) ([#17383](https://github.com/Lightning-AI/lightning/pull/17383))
diff --git a/src/lightning/pytorch/trainer/call.py b/src/lightning/pytorch/trainer/call.py
@@ -18,6 +18,7 @@
 from packaging.version import Version
 
 import lightning.pytorch as pl
+from lightning.fabric.utilities.device_dtype_mixin import _DeviceDtypeModuleMixin
 from lightning.pytorch.callbacks import Checkpoint, EarlyStopping
 from lightning.pytorch.trainer.states import TrainerStatus
 from lightning.pytorch.utilities.exceptions import _TunerExitException
@@ -72,6 +73,12 @@ def _call_setup_hook(trainer: "pl.Trainer") -> None:
     assert trainer.state.fn is not None
     fn = trainer.state.fn
 
+    # It is too early to move the model to the device, but we fake the `LightningModule.device` property
+    # so the user can access it in the `LightningModule.setup` hook
+    for module in trainer.lightning_module.modules():
+        if isinstance(module, _DeviceDtypeModuleMixin):
+            module._device = trainer.strategy.root_device
+
     # Trigger lazy creation of experiment in loggers so loggers have their metadata available
     for logger in trainer.loggers:
         _ = logger.experiment
diff --git a/tests/tests_pytorch/trainer/test_trainer.py b/tests/tests_pytorch/trainer/test_trainer.py
@@ -1478,16 +1478,23 @@ def configure_optimizers(self):
 @pytest.mark.parametrize(
     "accelerator",
     [
-        pytest.param("gpu", marks=RunIf(min_cuda_gpus=1)),
+        pytest.param("cuda", marks=RunIf(min_cuda_gpus=1)),
         pytest.param("mps", marks=RunIf(mps=True)),
     ],
 )
-def test_setup_hook_move_to_device_correctly(tmpdir, accelerator):
-    """Verify that if a user defines a layer in the setup hook function, this is moved to the correct device."""
+def test_setup_hook_device_and_layers(tmpdir, accelerator):
+    """Test `LightningModule.device` access and creation of layers in `LightningModule.setup` hook."""
+    expected_device = torch.device(accelerator, 0)
 
     class TestModel(BoringModel):
         def setup(self, stage: str) -> None:
+            # The `self.device` attribute already points to what device the model will land on
+            assert self.device == expected_device
+            # However, the model parameters have not yet been moved to that device
+            assert self.layer.weight.device == torch.device("cpu")
+            # Can create new layers in this hook (on CPU)
             self.new_layer = torch.nn.Linear(2, 2)
+            assert self.new_layer.weight.device == torch.device("cpu")
 
         def training_step(self, batch, batch_idx):
             output = self.layer(batch)