@@ -211,9 +211,6 @@ def test_invalid_deepspeed_defaults_no_precision(tmpdir):
211211
212212@pytest .mark .skipif (not torch .cuda .is_available (), reason = "requires GPU machine" )
213213@pytest .mark .skipif (not _DEEPSPEED_AVAILABLE , reason = "DeepSpeed not available." )
214- @pytest .mark .skipif (
215- not os .getenv ("PL_RUNNING_SPECIAL_TESTS" , '0' ) == '1' , reason = "test should be run outside of pytest"
216- )
217214def test_warn_deepspeed_override_backward (tmpdir ):
218215 """
219216 Test to ensure that if the backward hook in the LightningModule is overridden, we throw a warning.
@@ -232,9 +229,6 @@ def backward(self, loss: Tensor, optimizer: Optimizer, optimizer_idx: int, *args
232229
233230@pytest .mark .skipif (not torch .cuda .is_available (), reason = "requires GPU machine" )
234231@pytest .mark .skipif (not _DEEPSPEED_AVAILABLE , reason = "DeepSpeed not available." )
235- @pytest .mark .skipif (
236- not os .getenv ("PL_RUNNING_SPECIAL_TESTS" , '0' ) == '1' , reason = "test should be run outside of pytest"
237- )
238232def test_deepspeed_run_configure_optimizers (tmpdir ):
239233 """
240234 Test end to end that deepspeed works with defaults (without ZeRO as that requires compilation),
@@ -268,9 +262,6 @@ def on_train_start(self) -> None:
268262
269263@pytest .mark .skipif (not torch .cuda .is_available (), reason = "requires GPU machine" )
270264@pytest .mark .skipif (not _DEEPSPEED_AVAILABLE , reason = "DeepSpeed not available." )
271- @pytest .mark .skipif (
272- not os .getenv ("PL_RUNNING_SPECIAL_TESTS" , '0' ) == '1' , reason = "test should be run outside of pytest"
273- )
274265def test_deepspeed_config (tmpdir , deepspeed_zero_config ):
275266 """
276267 Test to ensure deepspeed works correctly when passed a DeepSpeed config object including optimizers/schedulers
@@ -304,6 +295,58 @@ def on_train_start(self) -> None:
304295 _assert_save_model_is_equal (model , tmpdir , trainer )
305296
306297
298+ @pytest .mark .skipif (not _DEEPSPEED_AVAILABLE , reason = "DeepSpeed not available." )
299+ @pytest .mark .skipif (not torch .cuda .is_available (), reason = "requires GPU machine" )
300+ def test_deepspeed_custom_precision_params (tmpdir ):
301+ """
302+ Ensure if we modify the FP16 parameters via the DeepSpeedPlugin, the deepspeed config contains these changes.
303+ """
304+
305+ class TestModel (BoringModel ):
306+
307+ def on_train_start (self ) -> None :
308+ assert self .trainer .training_type_plugin .config ['fp16' ]['loss_scale' ] == 10
309+ assert self .trainer .training_type_plugin .config ['fp16' ]['initial_scale_power' ] == 10
310+ assert self .trainer .training_type_plugin .config ['fp16' ]['loss_scale_window' ] == 10
311+ assert self .trainer .training_type_plugin .config ['fp16' ]['hysteresis' ] == 10
312+ assert self .trainer .training_type_plugin .config ['fp16' ]['min_loss_scale' ] == 10
313+ raise SystemExit ()
314+
315+ model = TestModel ()
316+ trainer = Trainer (
317+ plugins = [
318+ DeepSpeedPlugin (
319+ loss_scale = 10 , initial_scale_power = 10 , loss_scale_window = 10 , hysteresis = 10 , min_loss_scale = 10
320+ )
321+ ],
322+ precision = 16 ,
323+ gpus = 1
324+ )
325+ with pytest .raises (SystemExit ):
326+ trainer .fit (model )
327+
328+
329+ @pytest .mark .skipif (not _DEEPSPEED_AVAILABLE , reason = "DeepSpeed not available." )
330+ @pytest .mark .skipif (not torch .cuda .is_available (), reason = "requires GPU machine" )
331+ def test_deepspeed_assert_config_zero_offload_disabled (tmpdir , deepspeed_zero_config ):
332+ """
333+ Ensure if we use a config and turn off cpu_offload, that this is set to False within the config.
334+ """
335+
336+ deepspeed_zero_config ['zero_optimization' ]['cpu_offload' ] = False
337+
338+ class TestModel (BoringModel ):
339+
340+ def on_train_start (self ) -> None :
341+ assert self .trainer .training_type_plugin .config ['zero_optimization' ]['cpu_offload' ] is False
342+ raise SystemExit ()
343+
344+ model = TestModel ()
345+ trainer = Trainer (plugins = [DeepSpeedPlugin (config = deepspeed_zero_config )], precision = 16 , gpus = 1 )
346+ with pytest .raises (SystemExit ):
347+ trainer .fit (model )
348+
349+
307350@pytest .mark .skipif (not torch .cuda .is_available (), reason = "requires GPU machine" )
308351@pytest .mark .skipif (not _DEEPSPEED_AVAILABLE , reason = "DeepSpeed not available." )
309352@pytest .mark .skipif (torch .cuda .device_count () < 2 , reason = "test requires multi-GPU machine" )
0 commit comments