From 68af5783f993082cf581c5e7367d4ed19895d2c8 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 21 Jun 2022 15:35:05 +0100
Subject: [PATCH 1/4] Fixes deepspeed and `estimated_stepping_batches`

---
 src/pytorch_lightning/strategies/deepspeed.py |  1 +
 .../strategies/test_deepspeed_strategy.py     | 25 +++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/src/pytorch_lightning/strategies/deepspeed.py b/src/pytorch_lightning/strategies/deepspeed.py
index 56933e1a23e75..8f7f2a6c58860 100644
--- a/src/pytorch_lightning/strategies/deepspeed.py
+++ b/src/pytorch_lightning/strategies/deepspeed.py
@@ -357,6 +357,7 @@ def setup_distributed(self):
 
     def setup(self, trainer: "pl.Trainer") -> None:
         self.accelerator.setup(trainer)
+        self.model_to_device()
         self.setup_optimizers(trainer)
         self.setup_precision_plugin()
         optimizers_to_device(self.optimizers, self.root_device)
diff --git a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
index 2306aeb045c20..876815823ea23 100644
--- a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
+++ b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
@@ -1337,3 +1337,28 @@ def test_error_with_invalid_accelerator(tmpdir):
     model = BoringModel()
     with pytest.raises(MisconfigurationException, match="DeepSpeed strategy is only supported on GPU"):
         trainer.fit(model)
+
+
+@RunIf(min_cuda_gpus=2, deepspeed=True, standalone=True)
+def test_deepspeed_configure_optimizer_device_set(tmpdir):
+    """
+    Test to ensure that the LM has access to the device within the ``configure_optimizer`` function,
+    and estimated_stepping_batches works correctly as a result.
+    """
+
+    class TestModel(BoringModel):
+        def configure_optimizers(self):
+            assert self.trainer.estimated_stepping_batches == 1
+            assert self.device.type == 'cuda'
+            raise SystemExit
+
+    model = TestModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        fast_dev_run=True,
+        accelerator="gpu",
+        devices=2,
+        strategy=DeepSpeedStrategy(),
+    )
+    with pytest.raises(SystemExit):
+        trainer.fit(model)

From 7262fa99ed919212ce7d7f83d48a9198f821f96e Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 21 Jun 2022 15:42:20 +0100
Subject: [PATCH 2/4] Swap to setting the device manually to exclude moving
 weights to device

---
 src/pytorch_lightning/strategies/deepspeed.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pytorch_lightning/strategies/deepspeed.py b/src/pytorch_lightning/strategies/deepspeed.py
index 8f7f2a6c58860..bae617561f85a 100644
--- a/src/pytorch_lightning/strategies/deepspeed.py
+++ b/src/pytorch_lightning/strategies/deepspeed.py
@@ -357,7 +357,8 @@ def setup_distributed(self):
 
     def setup(self, trainer: "pl.Trainer") -> None:
         self.accelerator.setup(trainer)
-        self.model_to_device()
+        # we set the device so that optimizers can be created with distributed comms.
+        self.lightning_module._device = self.root_device
         self.setup_optimizers(trainer)
         self.setup_precision_plugin()
         optimizers_to_device(self.optimizers, self.root_device)

From fc2bfeaba53cbaa5cf4a471f5a7880ff8aadccd2 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 21 Jun 2022 15:48:27 +0100
Subject: [PATCH 3/4] Add CHANGELOG.md

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d2590a70e021e..b8f07b63a0ce6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -232,6 +232,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed issue where the CLI fails with certain torch objects ([#13153](https://github.com/PyTorchLightning/pytorch-lightning/pull/13153))
 
 
+- Fixed `estimated_stepping_batches` requiring distributed comms in `configure_optimizers` for the `DeepSpeedStrategy` ([#13350](https://github.com/PyTorchLightning/pytorch-lightning/pull/13350))
+
+
 -
 
 

From 519ef0131db7e600862f9c09892e7afa97810714 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 21 Jun 2022 14:49:00 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/tests_pytorch/strategies/test_deepspeed_strategy.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
index 876815823ea23..601e559ab3c18 100644
--- a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
+++ b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
@@ -1341,15 +1341,13 @@ def test_error_with_invalid_accelerator(tmpdir):
 
 @RunIf(min_cuda_gpus=2, deepspeed=True, standalone=True)
 def test_deepspeed_configure_optimizer_device_set(tmpdir):
-    """
-    Test to ensure that the LM has access to the device within the ``configure_optimizer`` function,
-    and estimated_stepping_batches works correctly as a result.
-    """
+    """Test to ensure that the LM has access to the device within the ``configure_optimizer`` function, and
+    estimated_stepping_batches works correctly as a result."""
 
     class TestModel(BoringModel):
         def configure_optimizers(self):
             assert self.trainer.estimated_stepping_batches == 1
-            assert self.device.type == 'cuda'
+            assert self.device.type == "cuda"
             raise SystemExit
 
     model = TestModel()