test

awaelchli · awaelchli · commit 86aa452b154f · 2023-08-24T05:52:16.000-04:00
diff --git a/tests/tests_pytorch/strategies/test_fsdp.py b/tests/tests_pytorch/strategies/test_fsdp.py
@@ -294,35 +294,35 @@ def test_fsdp_strategy_full_state_dict(tmpdir, wrap_min_params):
     assert all(_ex == _co for _ex, _co in zip(full_state_dict.keys(), correct_state_dict.keys()))
 
 
-@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, min_torch="1.12")
+# @RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, min_torch="1.12")
 @pytest.mark.parametrize(
     ("model", "strategy", "strategy_cfg"),
     [
         pytest.param(TestFSDPModel(), "fsdp", None, id="manually_wrapped"),
-        pytest.param(
-            TestFSDPModelAutoWrapped(),
-            FSDPStrategy,
-            {"auto_wrap_policy": custom_auto_wrap_policy},
-            marks=RunIf(max_torch="2.0.0"),
-            id="autowrap_1x",
-        ),
-        pytest.param(
-            TestFSDPModelAutoWrapped(),
-            FSDPStrategy,
-            {"auto_wrap_policy": custom_auto_wrap_policy},
-            marks=RunIf(min_torch="2.0.0"),
-            id="autowrap_2x",
-        ),
-        pytest.param(
-            TestFSDPModelAutoWrapped(),
-            FSDPStrategy,
-            {
-                "auto_wrap_policy": ModuleWrapPolicy({nn.Linear}) if _TORCH_GREATER_EQUAL_2_1 else None,
-                "use_orig_params": True,
-            },
-            marks=RunIf(min_torch="2.1.0"),
-            id="autowrap_use_orig_params",
-        ),
+        # pytest.param(
+        #     TestFSDPModelAutoWrapped(),
+        #     FSDPStrategy,
+        #     {"auto_wrap_policy": custom_auto_wrap_policy},
+        #     marks=RunIf(max_torch="2.0.0"),
+        #     id="autowrap_1x",
+        # ),
+        # pytest.param(
+        #     TestFSDPModelAutoWrapped(),
+        #     FSDPStrategy,
+        #     {"auto_wrap_policy": custom_auto_wrap_policy},
+        #     marks=RunIf(min_torch="2.0.0"),
+        #     id="autowrap_2x",
+        # ),
+        # pytest.param(
+        #     TestFSDPModelAutoWrapped(),
+        #     FSDPStrategy,
+        #     {
+        #         "auto_wrap_policy": ModuleWrapPolicy({nn.Linear}) if _TORCH_GREATER_EQUAL_2_1 else None,
+        #         "use_orig_params": True,
+        #     },
+        #     marks=RunIf(min_torch="2.1.0"),
+        #     id="autowrap_use_orig_params",
+        # ),
     ],
 )
 def test_fsdp_checkpoint_multi_gpus(tmpdir, model, strategy, strategy_cfg):
@@ -589,8 +589,8 @@ def test_fsdp_strategy_save_optimizer_states(tmpdir, wrap_min_params):
     trainer.strategy.barrier()
 
 
-@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, min_torch="1.12")
-@pytest.mark.parametrize("wrap_min_params", [2, 1024, 100000000])
+# @RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, min_torch="1.12")
+@pytest.mark.parametrize("wrap_min_params", [1024])
 def test_fsdp_strategy_load_optimizer_states(tmpdir, wrap_min_params):
     """Test to ensure that the full state dict and optimizer states can be load when using FSDP strategy.
 
@@ -807,3 +807,29 @@ def test_save_load_sharded_state_dict(tmp_path):
     strategy = FSDPStrategy(auto_wrap_policy={nn.Linear}, state_dict_type="sharded")
     trainer = Trainer(**trainer_kwargs, strategy=strategy)
     trainer.fit(model, ckpt_path=checkpoint_path)
+
+
+@RunIf(min_torch="1.12")
+@mock.patch("lightning.pytorch.strategies.fsdp.torch.load")
+@mock.patch("lightning.pytorch.strategies.fsdp._lazy_load")
+@mock.patch("lightning.pytorch.strategies.fsdp._load_raw_module_state")
+def test_fsdp_lazy_load_full_state_dict(_, lazy_load_mock, torch_load_mock, tmp_path):
+    """Test that loading a single file (full state) is lazy to reduce peak CPU memory usage."""
+    model = BoringModel()
+    checkpoint = {"state_dict": model.state_dict()}
+    lazy_load_mock.return_value = checkpoint
+
+    strategy = FSDPStrategy()
+    trainer = Trainer()
+    model.trainer = trainer
+    strategy._lightning_module = model
+    strategy.model = model
+
+    file = tmp_path / "test.ckpt"
+    file.touch()
+    
+    strategy.load_checkpoint(checkpoint_path=file)
+    if _TORCH_GREATER_EQUAL_2_0:
+        lazy_load_mock.assert_called_once()
+    else:
+        torch_load_mock.assert_called_once()