From 824497b568208a7b1cda4d98a9efcc4bc6f70a62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 20 Feb 2021 15:02:12 +0100 Subject: [PATCH 1/9] test --- setup.cfg | 1 - tests/accelerators/test_cpu.py | 19 +++++++++++++++++++ tests/accelerators/test_gpu.py | 0 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 tests/accelerators/test_cpu.py create mode 100644 tests/accelerators/test_gpu.py diff --git a/setup.cfg b/setup.cfg index be8f7cd50fa2d..68db4ecdda44c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -53,7 +53,6 @@ omit = pytorch_lightning/utilities/distributed.py pytorch_lightning/tuner/auto_gpu_select.py # TODO: temporary, until accelerator refactor is finished - pytorch_lightning/accelerators/accelerator.py pytorch_lightning/plugins/training_type/*.py pytorch_lightning/plugins/precision/*.py pytorch_lightning/plugins/base_plugin.py diff --git a/tests/accelerators/test_cpu.py b/tests/accelerators/test_cpu.py new file mode 100644 index 0000000000000..c9ca61a1b0ba1 --- /dev/null +++ b/tests/accelerators/test_cpu.py @@ -0,0 +1,19 @@ +from unittest.mock import Mock + +import pytest +import torch + +from pytorch_lightning.accelerators import CPUAccelerator +from pytorch_lightning.plugins import SingleDevicePlugin, PrecisionPlugin +from pytorch_lightning.utilities.exceptions import MisconfigurationException + + +def test_invalid_root_device(): + trainer = Mock() + model = Mock() + accelerator = CPUAccelerator( + training_type_plugin=SingleDevicePlugin(torch.device("cuda", 1)), + precision_plugin=PrecisionPlugin() + ) + with pytest.raises(MisconfigurationException, match="Device should be CPU"): + accelerator.setup(trainer=trainer, model=model) diff --git a/tests/accelerators/test_gpu.py b/tests/accelerators/test_gpu.py new file mode 100644 index 0000000000000..e69de29bb2d1d From fe4a26b0b41c04074717401b4697842124164c44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 20 Feb 2021 15:58:53 +0100 Subject: [PATCH 2/9] cpu tests --- pytorch_lightning/accelerators/cpu.py | 2 +- tests/accelerators/test_cpu.py | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/accelerators/cpu.py b/pytorch_lightning/accelerators/cpu.py index 7c79c470001c3..83389b53f5bcb 100644 --- a/pytorch_lightning/accelerators/cpu.py +++ b/pytorch_lightning/accelerators/cpu.py @@ -7,7 +7,7 @@ class CPUAccelerator(Accelerator): def setup(self, trainer, model): if isinstance(self.precision_plugin, MixedPrecisionPlugin): - MisconfigurationException("amp + cpu is not supported. Please use a GPU option") + raise MisconfigurationException("amp + cpu is not supported. Please use a GPU option") if "cpu" not in str(self.root_device): raise MisconfigurationException(f"Device should be CPU, got {self.root_device} instead") diff --git a/tests/accelerators/test_cpu.py b/tests/accelerators/test_cpu.py index c9ca61a1b0ba1..f4a94b7e2d394 100644 --- a/tests/accelerators/test_cpu.py +++ b/tests/accelerators/test_cpu.py @@ -4,11 +4,12 @@ import torch from pytorch_lightning.accelerators import CPUAccelerator -from pytorch_lightning.plugins import SingleDevicePlugin, PrecisionPlugin +from pytorch_lightning.plugins import SingleDevicePlugin, PrecisionPlugin, NativeMixedPrecisionPlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException def test_invalid_root_device(): + """ Test that CPU Accelerator has root device on CPU. """ trainer = Mock() model = Mock() accelerator = CPUAccelerator( @@ -17,3 +18,15 @@ def test_invalid_root_device(): ) with pytest.raises(MisconfigurationException, match="Device should be CPU"): accelerator.setup(trainer=trainer, model=model) + + +def test_unsupported_precision_plugins(): + """ Test error messages are raised for unsupported precision plugins with CPU. """ + trainer = Mock() + model = Mock() + accelerator = CPUAccelerator( + training_type_plugin=SingleDevicePlugin(torch.device("cpu")), + precision_plugin=NativeMixedPrecisionPlugin() + ) + with pytest.raises(MisconfigurationException, match=r"amp \+ cpu is not supported."): + accelerator.setup(trainer=trainer, model=model) From 6c32d6d6e4bd16020e9afde185d510102b78c690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 20 Feb 2021 16:58:48 +0100 Subject: [PATCH 3/9] tests gpu --- tests/accelerators/test_gpu.py | 55 ++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/accelerators/test_gpu.py b/tests/accelerators/test_gpu.py index e69de29bb2d1d..196924fee5425 100644 --- a/tests/accelerators/test_gpu.py +++ b/tests/accelerators/test_gpu.py @@ -0,0 +1,55 @@ +import logging +import os +from unittest import mock +from unittest.mock import Mock + +import pytest +import torch + +from pytorch_lightning.accelerators import GPUAccelerator +from pytorch_lightning.plugins import SingleDevicePlugin, PrecisionPlugin +from pytorch_lightning.utilities.exceptions import MisconfigurationException + + +def test_invalid_root_device(): + """ Test that GPU Accelerator has root device on GPU. """ + accelerator = GPUAccelerator( + training_type_plugin=SingleDevicePlugin(torch.device("cpu")), + precision_plugin=PrecisionPlugin() + ) + with pytest.raises(MisconfigurationException, match="Device should be GPU"): + accelerator.setup(trainer=Mock(), model=Mock()) + + +@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="requires multi-GPU machine") +def test_root_device_set(): + """ Test that GPU Accelerator sets the current device to the root device. """ + accelerator = GPUAccelerator( + training_type_plugin=SingleDevicePlugin(torch.device("cuda", 1)), + precision_plugin=PrecisionPlugin() + ) + accelerator.setup(trainer=Mock(), model=Mock()) + assert torch.cuda.current_device() == 1 + + +@mock.patch.dict(os.environ, {"CUDA_DEVICE_ORDER": ""}) +def test_cuda_environment_variables_set(): + """ Test that GPU Accelerator sets NVIDIA environment variables. """ + accelerator = GPUAccelerator( + training_type_plugin=SingleDevicePlugin(torch.device("cuda", 0)), + precision_plugin=PrecisionPlugin() + ) + accelerator.setup(trainer=Mock(), model=Mock()) + assert os.getenv("CUDA_DEVICE_ORDER") == "PCI_BUS_ID" + + +@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "1, 2", "LOCAL_RANK": "3"}) +def test_cuda_visible_devices_logged(caplog): + """ Test that GPU Accelerator logs CUDA_VISIBLE_DEVICES env variable. """ + accelerator = GPUAccelerator( + training_type_plugin=SingleDevicePlugin(torch.device("cuda", 0)), + precision_plugin=PrecisionPlugin() + ) + with caplog.at_level(logging.INFO): + accelerator.setup(trainer=Mock(), model=Mock()) + assert "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [1, 2]" in caplog.text From e36f0a69d1908eb33a2dd6927930d2b67cd684a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 20 Feb 2021 17:59:30 +0100 Subject: [PATCH 4/9] skip gpu tests on cpu --- tests/accelerators/test_gpu.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/accelerators/test_gpu.py b/tests/accelerators/test_gpu.py index 196924fee5425..c5091ac963cb7 100644 --- a/tests/accelerators/test_gpu.py +++ b/tests/accelerators/test_gpu.py @@ -11,6 +11,7 @@ from pytorch_lightning.utilities.exceptions import MisconfigurationException +@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU machine") def test_invalid_root_device(): """ Test that GPU Accelerator has root device on GPU. """ accelerator = GPUAccelerator( @@ -32,6 +33,7 @@ def test_root_device_set(): assert torch.cuda.current_device() == 1 +@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU machine") @mock.patch.dict(os.environ, {"CUDA_DEVICE_ORDER": ""}) def test_cuda_environment_variables_set(): """ Test that GPU Accelerator sets NVIDIA environment variables. """ @@ -43,6 +45,7 @@ def test_cuda_environment_variables_set(): assert os.getenv("CUDA_DEVICE_ORDER") == "PCI_BUS_ID" +@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU machine") @mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "1, 2", "LOCAL_RANK": "3"}) def test_cuda_visible_devices_logged(caplog): """ Test that GPU Accelerator logs CUDA_VISIBLE_DEVICES env variable. """ From 78a3c689d7a49d276eb2030ee1fc14a294b2a93a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 20 Feb 2021 20:40:36 +0100 Subject: [PATCH 5/9] precision --- tests/accelerators/test_cpu.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/accelerators/test_cpu.py b/tests/accelerators/test_cpu.py index f4a94b7e2d394..1ff1ecacb0bb6 100644 --- a/tests/accelerators/test_cpu.py +++ b/tests/accelerators/test_cpu.py @@ -4,7 +4,8 @@ import torch from pytorch_lightning.accelerators import CPUAccelerator -from pytorch_lightning.plugins import SingleDevicePlugin, PrecisionPlugin, NativeMixedPrecisionPlugin +from pytorch_lightning.plugins import SingleDevicePlugin, PrecisionPlugin +from pytorch_lightning.plugins.precision import MixedPrecisionPlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -26,7 +27,7 @@ def test_unsupported_precision_plugins(): model = Mock() accelerator = CPUAccelerator( training_type_plugin=SingleDevicePlugin(torch.device("cpu")), - precision_plugin=NativeMixedPrecisionPlugin() + precision_plugin=MixedPrecisionPlugin() ) with pytest.raises(MisconfigurationException, match=r"amp \+ cpu is not supported."): accelerator.setup(trainer=trainer, model=model) From 83953aff0e0a85621fc5b6ae34c834cfcc66487b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 20 Feb 2021 21:07:19 +0100 Subject: [PATCH 6/9] fix weird test --- tests/plugins/test_amp_plugin.py | 67 ++------------------------------ 1 file changed, 4 insertions(+), 63 deletions(-) diff --git a/tests/plugins/test_amp_plugin.py b/tests/plugins/test_amp_plugin.py index 80a06b0072e1e..8236a0990335a 100644 --- a/tests/plugins/test_amp_plugin.py +++ b/tests/plugins/test_amp_plugin.py @@ -5,10 +5,8 @@ import torch from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import Callback from pytorch_lightning.plugins import NativeMixedPrecisionPlugin from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE -from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers.boring_model import BoringModel @@ -25,78 +23,21 @@ ) @mock.patch('torch.cuda.device_count', return_value=2) @pytest.mark.parametrize( - ['ddp_backend', 'gpus', 'num_processes'], - [('ddp_cpu', None, 2), ('ddp', 2, 0), ('ddp2', 2, 0), ('ddp_spawn', 2, 0)], + ['ddp_backend', 'gpus'], + [('ddp', 2), ('ddp2', 2), ('ddp_spawn', 2)], ) -def on_fit_start(tmpdir, ddp_backend, gpus, num_processes): - - class CB(Callback): - - def on_fit_start(self, trainer, pl_module): - assert isinstance(trainer.precision_plugin, NativeMixedPrecisionPlugin) - raise SystemExit() - - def train(): - model = BoringModel() - trainer = Trainer( - fast_dev_run=True, - precision=16, - amp_backend='native', - gpus=gpus, - num_processes=num_processes, - accelerator=ddp_backend, - callbacks=[CB()], - ) - trainer.fit(model) - - if ddp_backend == "ddp_cpu": - with pytest.raises(MisconfigurationException, match="MP is only available on GPU"): - train() - else: - with pytest.raises(SystemExit): - train() - - -@pytest.mark.skipif(not _NATIVE_AMP_AVAILABLE, reason="Minimal PT version is set to 1.6") -@mock.patch.dict( - os.environ, { - "CUDA_VISIBLE_DEVICES": "0,1", - "SLURM_NTASKS": "2", - "SLURM_JOB_NAME": "SOME_NAME", - "SLURM_NODEID": "0", - "LOCAL_RANK": "0", - "SLURM_LOCALID": "0" - } -) -@mock.patch('torch.cuda.device_count', return_value=2) -@pytest.mark.parametrize( - ['ddp_backend', 'gpus', 'num_processes'], - [('ddp_cpu', None, 2), ('ddp', 2, 0), ('ddp2', 2, 0), ('ddp_spawn', 2, 0)], -) -def test_amp_choice_custom_ddp_cpu(tmpdir, ddp_backend, gpus, num_processes): +def test_amp_choice_custom_ddp_cpu(device_count_mock, ddp_backend, gpus): class MyNativeAMP(NativeMixedPrecisionPlugin): pass - class CB(Callback): - - def on_fit_start(self, trainer, pl_module): - assert isinstance(trainer.precision_plugin, MyNativeAMP) - raise SystemExit() - - model = BoringModel() trainer = Trainer( - fast_dev_run=True, precision=16, amp_backend='native', - num_processes=num_processes, accelerator=ddp_backend, plugins=[MyNativeAMP()], - callbacks=[CB()], ) - - with pytest.raises(SystemExit): - trainer.fit(model) + assert isinstance(trainer.precision_plugin, MyNativeAMP) class GradientUnscaleBoringModel(BoringModel): From a953f5342c3ebfd24629e28d71999f819e72b848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 21 Feb 2021 02:27:37 +0100 Subject: [PATCH 7/9] fix apex plugin test --- tests/plugins/test_apex_plugin.py | 40 +++++++------------------------ 1 file changed, 8 insertions(+), 32 deletions(-) diff --git a/tests/plugins/test_apex_plugin.py b/tests/plugins/test_apex_plugin.py index 91d42822db57b..dd6c3f266928b 100644 --- a/tests/plugins/test_apex_plugin.py +++ b/tests/plugins/test_apex_plugin.py @@ -4,10 +4,8 @@ import pytest from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import Callback from pytorch_lightning.plugins import ApexMixedPrecisionPlugin from pytorch_lightning.utilities import _APEX_AVAILABLE -from tests.helpers.boring_model import BoringModel @pytest.mark.skipif(not _APEX_AVAILABLE, reason="test requires apex") @@ -23,30 +21,19 @@ ) @mock.patch('torch.cuda.device_count', return_value=2) @pytest.mark.parametrize( - ['ddp_backend', 'gpus', 'num_processes'], - [('ddp_cpu', None, 2), ('ddp', 2, 0), ('ddp2', 2, 0), ('ddp_spawn', 2, 0)], + ['ddp_backend', 'gpus'], + [('ddp', 2), ('ddp2', 2), ('ddp_spawn', 2)], ) -def test_amp_choice_default_ddp_cpu(tmpdir, ddp_backend, gpus, num_processes): +def test_amp_choice_default_ddp(mocked_device_count, ddp_backend, gpus): - class CB(Callback): - - def on_fit_start(self, trainer, pl_module): - assert isinstance(trainer.precision_plugin, ApexMixedPrecisionPlugin) - raise SystemExit() - - model = BoringModel() trainer = Trainer( fast_dev_run=True, precision=16, amp_backend='apex', gpus=gpus, - num_processes=num_processes, accelerator=ddp_backend, - callbacks=[CB()], ) - - with pytest.raises(SystemExit): - trainer.fit(model) + assert isinstance(trainer.precision_plugin, ApexMixedPrecisionPlugin) @pytest.mark.skipif(not _APEX_AVAILABLE, reason="test requires apex") @@ -62,31 +49,20 @@ def on_fit_start(self, trainer, pl_module): ) @mock.patch('torch.cuda.device_count', return_value=2) @pytest.mark.parametrize( - ['ddp_backend', 'gpus', 'num_processes'], - [('ddp_cpu', None, 2), ('ddp', 2, 0), ('ddp2', 2, 0), ('ddp_spawn', 2, 0)], + ['ddp_backend', 'gpus'], + [('ddp', 2), ('ddp2', 2), ('ddp_spawn', 2)], ) -def test_amp_choice_custom_ddp_cpu(tmpdir, ddp_backend, gpus, num_processes): +def test_amp_choice_custom_ddp(mocked_device_count, ddp_backend, gpus): class MyApexPlugin(ApexMixedPrecisionPlugin): pass - class CB(Callback): - - def on_fit_start(self, trainer, pl_module): - assert isinstance(trainer.precision_plugin, MyApexPlugin) - raise SystemExit() - - model = BoringModel() trainer = Trainer( fast_dev_run=True, precision=16, amp_backend='apex', gpus=gpus, - num_processes=num_processes, accelerator=ddp_backend, plugins=[MyApexPlugin(amp_level="O2")], - callbacks=[CB()], ) - - with pytest.raises(SystemExit): - trainer.fit(model) + assert isinstance(trainer.precision_plugin, MyApexPlugin) From 071493386def8cdff23fd46b6736ed132ee2fcff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Mon, 22 Feb 2021 01:06:22 +0100 Subject: [PATCH 8/9] clean up --- tests/accelerators/test_cpu.py | 3 --- tests/accelerators/test_gpu.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/accelerators/test_cpu.py b/tests/accelerators/test_cpu.py index 41a2ffe409b78..ce973f03ef4d5 100644 --- a/tests/accelerators/test_cpu.py +++ b/tests/accelerators/test_cpu.py @@ -9,9 +9,6 @@ from pytorch_lightning.utilities.exceptions import MisconfigurationException - - - def test_invalid_root_device(): """ Test that CPU Accelerator has root device on CPU. """ trainer = Mock() diff --git a/tests/accelerators/test_gpu.py b/tests/accelerators/test_gpu.py index c5091ac963cb7..a798530523e12 100644 --- a/tests/accelerators/test_gpu.py +++ b/tests/accelerators/test_gpu.py @@ -7,7 +7,7 @@ import torch from pytorch_lightning.accelerators import GPUAccelerator -from pytorch_lightning.plugins import SingleDevicePlugin, PrecisionPlugin +from pytorch_lightning.plugins import PrecisionPlugin, SingleDevicePlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException From 01f26b4ccd076de70668656cc01b4f1beb3c7caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Mon, 22 Feb 2021 03:05:20 +0100 Subject: [PATCH 9/9] tpu --- pytorch_lightning/accelerators/tpu.py | 4 ++-- tests/accelerators/test_tpu.py | 32 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 tests/accelerators/test_tpu.py diff --git a/pytorch_lightning/accelerators/tpu.py b/pytorch_lightning/accelerators/tpu.py index 8f63bc7b86b11..f235aab54a256 100644 --- a/pytorch_lightning/accelerators/tpu.py +++ b/pytorch_lightning/accelerators/tpu.py @@ -19,8 +19,8 @@ class TPUAccelerator(Accelerator): def setup(self, trainer, model): if isinstance(self.precision_plugin, MixedPrecisionPlugin): raise MisconfigurationException( - "amp + tpu is not supported. " - "Only bfloats are supported on TPU. Consider using TPUHalfPrecisionPlugin" + "amp + tpu is not supported." + " Only bfloats are supported on TPU. Consider using TPUHalfPrecisionPlugin" ) if not isinstance(self.training_type_plugin, (SingleTPUPlugin, TPUSpawnPlugin)): diff --git a/tests/accelerators/test_tpu.py b/tests/accelerators/test_tpu.py new file mode 100644 index 0000000000000..336f0eddab7c0 --- /dev/null +++ b/tests/accelerators/test_tpu.py @@ -0,0 +1,32 @@ +from unittest.mock import Mock + +import pytest + +from pytorch_lightning.accelerators import TPUAccelerator +from pytorch_lightning.plugins import SingleTPUPlugin, DDPPlugin, PrecisionPlugin +from pytorch_lightning.plugins.precision import MixedPrecisionPlugin +from pytorch_lightning.utilities.exceptions import MisconfigurationException + + +def test_unsupported_precision_plugins(): + """ Test error messages are raised for unsupported precision plugins with TPU. """ + trainer = Mock() + model = Mock() + accelerator = TPUAccelerator( + training_type_plugin=SingleTPUPlugin(device=Mock()), + precision_plugin=MixedPrecisionPlugin(), + ) + with pytest.raises(MisconfigurationException, match=r"amp \+ tpu is not supported."): + accelerator.setup(trainer=trainer, model=model) + + +def test_unsupported_training_type_plugins(): + """ Test error messages are raised for unsupported training type with TPU. """ + trainer = Mock() + model = Mock() + accelerator = TPUAccelerator( + training_type_plugin=DDPPlugin(), + precision_plugin=PrecisionPlugin(), + ) + with pytest.raises(MisconfigurationException, match="TPUs only support a single tpu core or tpu spawn training"): + accelerator.setup(trainer=trainer, model=model)