Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Fixed issue where the CLI fails with certain torch objects ([#13153](https://github.com/PyTorchLightning/pytorch-lightning/pull/13153))


- Fixed `estimated_stepping_batches` requiring distributed comms in `configure_optimizers` for the `DeepSpeedStrategy` ([#13350](https://github.com/PyTorchLightning/pytorch-lightning/pull/13350))


-


Expand Down
2 changes: 2 additions & 0 deletions src/pytorch_lightning/strategies/deepspeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,8 @@ def setup_distributed(self):

def setup(self, trainer: "pl.Trainer") -> None:
self.accelerator.setup(trainer)
# we set the device so that optimizers can be created with distributed comms.
self.lightning_module._device = self.root_device
self.setup_optimizers(trainer)
self.setup_precision_plugin()
optimizers_to_device(self.optimizers, self.root_device)
Expand Down
23 changes: 23 additions & 0 deletions tests/tests_pytorch/strategies/test_deepspeed_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1337,3 +1337,26 @@ def test_error_with_invalid_accelerator(tmpdir):
model = BoringModel()
with pytest.raises(MisconfigurationException, match="DeepSpeed strategy is only supported on GPU"):
trainer.fit(model)


@RunIf(min_cuda_gpus=2, deepspeed=True, standalone=True)
def test_deepspeed_configure_optimizer_device_set(tmpdir):
"""Test to ensure that the LM has access to the device within the ``configure_optimizer`` function, and
estimated_stepping_batches works correctly as a result."""

class TestModel(BoringModel):
def configure_optimizers(self):
assert self.trainer.estimated_stepping_batches == 1
assert self.device.type == "cuda"
raise SystemExit

model = TestModel()
trainer = Trainer(
default_root_dir=tmpdir,
fast_dev_run=True,
accelerator="gpu",
devices=2,
strategy=DeepSpeedStrategy(),
)
with pytest.raises(SystemExit):
trainer.fit(model)