From 731fb0c7789b54c6ff9ac10a31d39569c9eea246 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Mon, 7 Feb 2022 09:14:57 -0800 Subject: [PATCH 01/22] gpu-cuda-check --- CHANGELOG.md | 3 +++ pytorch_lightning/accelerators/gpu.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65bfeee76bc15..aaeb2821174b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -101,6 +101,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added a `_Stateful` support for `LightningDataModule` ([#11637](https://github.com/PyTorchLightning/pytorch-lightning/pull/11637)) +- Added checks to `GPUAccelerator` to assert CUDA availability ([#]()) + + ### Changed - Implemented a new native and rich format in `_print_results` method of the `EvaluationLoop` ([#11332](https://github.com/PyTorchLightning/pytorch-lightning/pull/11332)) diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py index 3ccf2e4a7f919..dbbfca6a533a5 100644 --- a/pytorch_lightning/accelerators/gpu.py +++ b/pytorch_lightning/accelerators/gpu.py @@ -37,8 +37,14 @@ def setup_environment(self, root_device: torch.device) -> None: """ Raises: MisconfigurationException: + If torch.cuda isn't available + If no CUDA devices are found If the selected device is not GPU. """ + if not torch.cuda.is_available(): + raise MisconfigurationException("GPU Accelerator used, but CUDA isn't available.") + if torch.cuda.device_count() == 0: + raise MisconfigurationException("GPU Accelerator used, but found no CUDA devices available.") if root_device.type != "cuda": raise MisconfigurationException(f"Device should be GPU, got {root_device} instead") torch.cuda.set_device(root_device) From 6b3c9a4c8d2118fe414f9abfa5db8b51474328f6 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Mon, 7 Feb 2022 09:28:16 -0800 Subject: [PATCH 02/22] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aaeb2821174b4..36ec23c9156f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -101,7 +101,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added a `_Stateful` support for `LightningDataModule` ([#11637](https://github.com/PyTorchLightning/pytorch-lightning/pull/11637)) -- Added checks to `GPUAccelerator` to assert CUDA availability ([#]()) +- Added checks to `GPUAccelerator` to assert CUDA availability ([#11797](https://github.com/PyTorchLightning/pytorch-lightning/pull/11797)) ### Changed From 407b6b23b3886b373b1d58f9301cdedff946dbab Mon Sep 17 00:00:00 2001 From: ananthsub Date: Mon, 7 Feb 2022 09:30:03 -0800 Subject: [PATCH 03/22] Update pytorch_lightning/accelerators/gpu.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- pytorch_lightning/accelerators/gpu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py index dbbfca6a533a5..7daf2da1b5e6c 100644 --- a/pytorch_lightning/accelerators/gpu.py +++ b/pytorch_lightning/accelerators/gpu.py @@ -37,8 +37,8 @@ def setup_environment(self, root_device: torch.device) -> None: """ Raises: MisconfigurationException: - If torch.cuda isn't available - If no CUDA devices are found + If torch.cuda isn't available. + If no CUDA devices are found. If the selected device is not GPU. """ if not torch.cuda.is_available(): From e57a1408031de0368abb1c44e2e9429b0d1805d3 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Mon, 7 Feb 2022 18:12:50 -0800 Subject: [PATCH 04/22] Update gpu.py --- pytorch_lightning/accelerators/gpu.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py index 7daf2da1b5e6c..bac1f7c76b985 100644 --- a/pytorch_lightning/accelerators/gpu.py +++ b/pytorch_lightning/accelerators/gpu.py @@ -33,18 +33,24 @@ class GPUAccelerator(Accelerator): """Accelerator for GPU devices.""" - def setup_environment(self, root_device: torch.device) -> None: + def __init__(self) -> None: """ Raises: MisconfigurationException: If torch.cuda isn't available. - If no CUDA devices are found. - If the selected device is not GPU. + If no CUDA deices are found. """ if not torch.cuda.is_available(): raise MisconfigurationException("GPU Accelerator used, but CUDA isn't available.") if torch.cuda.device_count() == 0: raise MisconfigurationException("GPU Accelerator used, but found no CUDA devices available.") + + def setup_environment(self, root_device: torch.device) -> None: + """ + Raises: + MisconfigurationException: + If the selected device is not GPU. + """ if root_device.type != "cuda": raise MisconfigurationException(f"Device should be GPU, got {root_device} instead") torch.cuda.set_device(root_device) From bf23e1314b4954be70617d09b9b8250c41a2a638 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Mon, 7 Feb 2022 18:48:59 -0800 Subject: [PATCH 05/22] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36ec23c9156f6..507ad0ea9bf18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -101,7 +101,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added a `_Stateful` support for `LightningDataModule` ([#11637](https://github.com/PyTorchLightning/pytorch-lightning/pull/11637)) -- Added checks to `GPUAccelerator` to assert CUDA availability ([#11797](https://github.com/PyTorchLightning/pytorch-lightning/pull/11797)) +- Added checks to `GPUAccelerator` to assert CUDA availability at initialization ([#11797](https://github.com/PyTorchLightning/pytorch-lightning/pull/11797)) ### Changed From e8ef16d55a6d098438d1db6c44037eb444b04432 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Mon, 7 Feb 2022 22:16:17 -0800 Subject: [PATCH 06/22] Update test_accelerator_connector.py --- tests/accelerators/test_accelerator_connector.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 3e2ec15216841..edc967d06ebd8 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -98,6 +98,7 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=2) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) +@mock.path("torch.cuda.is_available", return_value=True) def test_accelerator_choice_ddp_slurm(*_): with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"): trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2) @@ -123,6 +124,7 @@ def test_accelerator_choice_ddp_slurm(*_): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=2) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) +@mock.path("torch.cuda.is_available", return_value=True) def test_accelerator_choice_ddp2_slurm(*_): with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"): trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2) @@ -148,6 +150,7 @@ def test_accelerator_choice_ddp2_slurm(*_): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=1) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) +@mock.path("torch.cuda.is_available", return_value=True) def test_accelerator_choice_ddp_te(*_): with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"): trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2) @@ -172,6 +175,7 @@ def test_accelerator_choice_ddp_te(*_): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=1) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) +@mock.path("torch.cuda.is_available", return_value=True) def test_accelerator_choice_ddp2_te(*_): with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"): trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2) @@ -210,6 +214,7 @@ def test_accelerator_choice_ddp_cpu_te(*_): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=1) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) +@mock.path("torch.cuda.is_available", return_value=True) def test_accelerator_choice_ddp_kubeflow(*_): with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"): trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1) From e77b75e7bef8aea0880301d867b184e4d0177573 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Mon, 7 Feb 2022 22:17:39 -0800 Subject: [PATCH 07/22] Update test_accelerator_connector.py --- tests/accelerators/test_accelerator_connector.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index edc967d06ebd8..d14e1412834ea 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -741,6 +741,7 @@ def test_strategy_choice_ddp_slurm(setup_distributed_mock, strategy): @mock.patch("torch.cuda.device_count", return_value=2) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) @pytest.mark.parametrize("strategy", ["ddp2", DDP2Strategy()]) +@mock.patch("torch.cuda.is_available", return_value=True) def test_strategy_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock, strategy): trainer = Trainer(fast_dev_run=True, strategy=strategy, gpus=2) assert trainer._accelerator_connector._is_slurm_managing_tasks() @@ -765,6 +766,7 @@ def test_strategy_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_di @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=2) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) +@mock.patch("torch.cuda.is_available", return_value=True) def test_strategy_choice_ddp_te(*_): trainer = Trainer(fast_dev_run=True, strategy="ddp", gpus=2) assert isinstance(trainer.accelerator, GPUAccelerator) @@ -788,6 +790,7 @@ def test_strategy_choice_ddp_te(*_): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=2) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) +@mock.patch("torch.cuda.is_available", return_value=True) def test_strategy_choice_ddp2_te(*_): trainer = Trainer(fast_dev_run=True, strategy="ddp2", gpus=2) assert isinstance(trainer.accelerator, GPUAccelerator) @@ -825,6 +828,7 @@ def test_strategy_choice_ddp_cpu_te(*_): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=1) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) +@mock.patch("torch.cuda.is_available", return_value=True) def test_strategy_choice_ddp_kubeflow(*_): trainer = Trainer(fast_dev_run=True, strategy="ddp", gpus=1) assert isinstance(trainer.accelerator, GPUAccelerator) From a8cb3692dd41b3b5f1966a5aae5bc5c0aa99da12 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Mon, 7 Feb 2022 22:27:42 -0800 Subject: [PATCH 08/22] update test mocks --- tests/accelerators/test_ddp.py | 1 + tests/accelerators/test_dp.py | 3 +++ tests/models/test_gpu.py | 1 + tests/plugins/test_amp_plugins.py | 1 + tests/utilities/test_cli.py | 1 + 5 files changed, 7 insertions(+) diff --git a/tests/accelerators/test_ddp.py b/tests/accelerators/test_ddp.py index c356ecf935ae1..bc311ad7476f1 100644 --- a/tests/accelerators/test_ddp.py +++ b/tests/accelerators/test_ddp.py @@ -79,6 +79,7 @@ def test_multi_gpu_model_ddp_fit_test(tmpdir, as_module): @RunIf(skip_windows=True) @pytest.mark.skipif(torch.cuda.is_available(), reason="test doesn't requires GPU machine") +@mock.patch("torch.cuda.is_available", return_value=True) def test_torch_distributed_backend_env_variables(tmpdir): """This test set `undefined` as torch backend and should raise an `Backend.UNDEFINED` ValueError.""" _environ = {"PL_TORCH_DISTRIBUTED_BACKEND": "undefined", "CUDA_VISIBLE_DEVICES": "0,1", "WORLD_SIZE": "2"} diff --git a/tests/accelerators/test_dp.py b/tests/accelerators/test_dp.py index f24876197a5f4..4b3879f87059e 100644 --- a/tests/accelerators/test_dp.py +++ b/tests/accelerators/test_dp.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock + import pytest import torch import torch.nn.functional as F @@ -154,6 +156,7 @@ def _assert_extra_outputs(self, outputs): assert out.dtype is torch.float +@mock.patch("torch.cuda.is_available", return_value=True) def test_dp_raise_exception_with_batch_transfer_hooks(tmpdir, monkeypatch): """Test that an exception is raised when overriding batch_transfer_hooks in DP model.""" monkeypatch.setattr("torch.cuda.device_count", lambda: 2) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index a3d9977b31c80..6ea5fa17cad79 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -235,6 +235,7 @@ def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_coun }, ) @mock.patch("torch.cuda.device_count", return_value=1) +@mock.patch("torch.cuda.is_available", return_value=True) @pytest.mark.parametrize("gpus", [[0, 1, 2], 2, "0"]) def test_torchelastic_gpu_parsing(mocked_device_count, gpus): """Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit diff --git a/tests/plugins/test_amp_plugins.py b/tests/plugins/test_amp_plugins.py index 24c04de6604ef..a3a5c70aca45b 100644 --- a/tests/plugins/test_amp_plugins.py +++ b/tests/plugins/test_amp_plugins.py @@ -45,6 +45,7 @@ class MyApexPlugin(ApexMixedPrecisionPlugin): "SLURM_LOCALID": "0", }, ) +@mock.patch("torch.cuda.is_available", return_value=True) @mock.patch("torch.cuda.device_count", return_value=2) @pytest.mark.parametrize("strategy,gpus", [("ddp", 2), ("ddp2", 2), ("ddp_spawn", 2)]) @pytest.mark.parametrize( diff --git a/tests/utilities/test_cli.py b/tests/utilities/test_cli.py index 3809b8b3e2eb6..9fc994e2e4338 100644 --- a/tests/utilities/test_cli.py +++ b/tests/utilities/test_cli.py @@ -184,6 +184,7 @@ def test_parse_args_parsing_complex_types(cli_args, expected, instantiate): def test_parse_args_parsing_gpus(monkeypatch, cli_args, expected_gpu): """Test parsing of gpus and instantiation of Trainer.""" monkeypatch.setattr("torch.cuda.device_count", lambda: 2) + monkeypatch.setattr("torch.cuda.is_available", lambda: True) cli_args = cli_args.split(" ") if cli_args else [] with mock.patch("sys.argv", ["any.py"] + cli_args): parser = LightningArgumentParser(add_help=False, parse_as_dict=False) From 091bdae2099aecb2525982797120820b3ed7aef3 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Mon, 7 Feb 2022 22:41:33 -0800 Subject: [PATCH 09/22] Update test_accelerator_connector.py --- tests/accelerators/test_accelerator_connector.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index d14e1412834ea..78125b6a8542d 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -98,7 +98,7 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=2) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -@mock.path("torch.cuda.is_available", return_value=True) +@mock.patch("torch.cuda.is_available", return_value=True) def test_accelerator_choice_ddp_slurm(*_): with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"): trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2) @@ -124,7 +124,7 @@ def test_accelerator_choice_ddp_slurm(*_): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=2) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -@mock.path("torch.cuda.is_available", return_value=True) +@mock.patch("torch.cuda.is_available", return_value=True) def test_accelerator_choice_ddp2_slurm(*_): with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"): trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2) @@ -150,7 +150,7 @@ def test_accelerator_choice_ddp2_slurm(*_): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=1) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -@mock.path("torch.cuda.is_available", return_value=True) +@mock.patch("torch.cuda.is_available", return_value=True) def test_accelerator_choice_ddp_te(*_): with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"): trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2) @@ -175,7 +175,7 @@ def test_accelerator_choice_ddp_te(*_): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=1) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -@mock.path("torch.cuda.is_available", return_value=True) +@mock.patch("torch.cuda.is_available", return_value=True) def test_accelerator_choice_ddp2_te(*_): with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"): trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2) @@ -214,7 +214,7 @@ def test_accelerator_choice_ddp_cpu_te(*_): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=1) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -@mock.path("torch.cuda.is_available", return_value=True) +@mock.patch("torch.cuda.is_available", return_value=True) def test_accelerator_choice_ddp_kubeflow(*_): with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"): trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1) From fde9491eadd0d687c62cbb3ed6364ec18067c6d0 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Mon, 7 Feb 2022 23:10:12 -0800 Subject: [PATCH 10/22] Update test_accelerator_connector.py --- tests/accelerators/test_accelerator_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 78125b6a8542d..04ad1bca25945 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -740,8 +740,8 @@ def test_strategy_choice_ddp_slurm(setup_distributed_mock, strategy): @mock.patch("torch.cuda.set_device") @mock.patch("torch.cuda.device_count", return_value=2) @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) -@pytest.mark.parametrize("strategy", ["ddp2", DDP2Strategy()]) @mock.patch("torch.cuda.is_available", return_value=True) +@pytest.mark.parametrize("strategy", ["ddp2", DDP2Strategy()]) def test_strategy_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock, strategy): trainer = Trainer(fast_dev_run=True, strategy=strategy, gpus=2) assert trainer._accelerator_connector._is_slurm_managing_tasks() From 6128f2e622429536abb02b63c4f0dbba932d35a1 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Tue, 8 Feb 2022 09:24:13 -0800 Subject: [PATCH 11/22] update mocks --- tests/accelerators/test_accelerator_connector.py | 4 +++- tests/models/test_gpu.py | 2 +- tests/plugins/test_amp_plugins.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 04ad1bca25945..234dc28e1a432 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -742,7 +742,9 @@ def test_strategy_choice_ddp_slurm(setup_distributed_mock, strategy): @mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True) @mock.patch("torch.cuda.is_available", return_value=True) @pytest.mark.parametrize("strategy", ["ddp2", DDP2Strategy()]) -def test_strategy_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock, strategy): +def test_strategy_choice_ddp2_slurm( + set_device_mock, device_count_mock, setup_distributed_mock, is_available_mock, strategy +): trainer = Trainer(fast_dev_run=True, strategy=strategy, gpus=2) assert trainer._accelerator_connector._is_slurm_managing_tasks() assert isinstance(trainer.accelerator, GPUAccelerator) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 6ea5fa17cad79..c494c0c1c18e6 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -237,7 +237,7 @@ def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_coun @mock.patch("torch.cuda.device_count", return_value=1) @mock.patch("torch.cuda.is_available", return_value=True) @pytest.mark.parametrize("gpus", [[0, 1, 2], 2, "0"]) -def test_torchelastic_gpu_parsing(mocked_device_count, gpus): +def test_torchelastic_gpu_parsing(mocked_device_count, mocked_is_available, gpus): """Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit sanitizing the gpus as only one of the GPUs is visible.""" trainer = Trainer(gpus=gpus) diff --git a/tests/plugins/test_amp_plugins.py b/tests/plugins/test_amp_plugins.py index a3a5c70aca45b..f79529224b735 100644 --- a/tests/plugins/test_amp_plugins.py +++ b/tests/plugins/test_amp_plugins.py @@ -57,7 +57,7 @@ class MyApexPlugin(ApexMixedPrecisionPlugin): pytest.param("apex", True, MyApexPlugin, marks=RunIf(amp_apex=True)), ], ) -def test_amp_apex_ddp(mocked_device_count, strategy, gpus, amp, custom_plugin, plugin_cls): +def test_amp_apex_ddp(mocked_is_available, mocked_device_count, strategy, gpus, amp, custom_plugin, plugin_cls): plugin = None if custom_plugin: plugin = plugin_cls(16, "cpu") if amp == "native" else plugin_cls() From fb60736933e52ef437482ab71a07c3e8653bcf09 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Tue, 8 Feb 2022 09:26:05 -0800 Subject: [PATCH 12/22] Update gpu.py --- pytorch_lightning/accelerators/gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py index bac1f7c76b985..3811512d52aa1 100644 --- a/pytorch_lightning/accelerators/gpu.py +++ b/pytorch_lightning/accelerators/gpu.py @@ -38,7 +38,7 @@ def __init__(self) -> None: Raises: MisconfigurationException: If torch.cuda isn't available. - If no CUDA deices are found. + If no CUDA devices are found. """ if not torch.cuda.is_available(): raise MisconfigurationException("GPU Accelerator used, but CUDA isn't available.") From 79c85f5bde534ee75812e85333d7f453c0fd8732 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Tue, 8 Feb 2022 09:44:43 -0800 Subject: [PATCH 13/22] tests --- tests/accelerators/test_dp.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/accelerators/test_dp.py b/tests/accelerators/test_dp.py index 4b3879f87059e..208e080fce225 100644 --- a/tests/accelerators/test_dp.py +++ b/tests/accelerators/test_dp.py @@ -157,9 +157,10 @@ def _assert_extra_outputs(self, outputs): @mock.patch("torch.cuda.is_available", return_value=True) -def test_dp_raise_exception_with_batch_transfer_hooks(tmpdir, monkeypatch): +@mock.patch("torch.cuda.device_count", return_value=2) +def test_dp_raise_exception_with_batch_transfer_hooks(tmpdir, mock_is_available, mock_device_count): """Test that an exception is raised when overriding batch_transfer_hooks in DP model.""" - monkeypatch.setattr("torch.cuda.device_count", lambda: 2) + # monkeypatch.setattr("torch.cuda.device_count", lambda: 2) class CustomModel(BoringModel): def transfer_batch_to_device(self, batch, device, dataloader_idx): From f58e582393e2843eaba7a221d1f2a1f0d4263cc5 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 9 Feb 2022 00:35:23 -0800 Subject: [PATCH 14/22] is_available --- CHANGELOG.md | 2 +- pytorch_lightning/accelerators/accelerator.py | 11 +++++++++++ pytorch_lightning/accelerators/cpu.py | 6 ++++++ pytorch_lightning/accelerators/gpu.py | 17 +++++------------ pytorch_lightning/accelerators/ipu.py | 5 +++++ pytorch_lightning/accelerators/tpu.py | 6 +++++- tests/accelerators/test_cpu.py | 4 ++++ tests/accelerators/test_dp.py | 1 - tests/accelerators/test_gpu.py | 17 +++++++++++++++++ tests/accelerators/test_ipu.py | 1 + tests/accelerators/test_tpu.py | 1 + 11 files changed, 56 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 507ad0ea9bf18..ab35a28d14766 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -101,7 +101,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added a `_Stateful` support for `LightningDataModule` ([#11637](https://github.com/PyTorchLightning/pytorch-lightning/pull/11637)) -- Added checks to `GPUAccelerator` to assert CUDA availability at initialization ([#11797](https://github.com/PyTorchLightning/pytorch-lightning/pull/11797)) +- Added `Accelerator.is_available` to assert device availability ([#11797](https://github.com/PyTorchLightning/pytorch-lightning/pull/11797)) ### Changed diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 724b5b6f244c1..0937cb842ff2e 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -35,7 +35,13 @@ def setup_environment(self, root_device: torch.device) -> None: This is called before the LightningModule/DataModule setup hook which allows the user to access the accelerator environment before setup is complete. + + Raises: + RuntimeError: + If corresponding hardware is not found. """ + if not self.is_available(): + raise RuntimeError(f"{self.__class__.__qualname__} is not configured to run on this hardware.") def setup(self, trainer: "pl.Trainer") -> None: """Setup plugins for the trainer fit and creates optimizers. @@ -59,3 +65,8 @@ def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]: @abstractmethod def auto_device_count() -> int: """Get the device count when set to auto.""" + + @staticmethod + @abstractmethod + def is_available() -> bool: + """Detect if the hardware is available.""" diff --git a/pytorch_lightning/accelerators/cpu.py b/pytorch_lightning/accelerators/cpu.py index 75c55fdf5f047..2fbe3bf18b079 100644 --- a/pytorch_lightning/accelerators/cpu.py +++ b/pytorch_lightning/accelerators/cpu.py @@ -31,6 +31,7 @@ def setup_environment(self, root_device: torch.device) -> None: MisconfigurationException: If the selected device is not CPU. """ + super().setup_environment(root_device) if root_device.type != "cpu": raise MisconfigurationException(f"Device should be CPU, got {root_device} instead.") @@ -42,3 +43,8 @@ def get_device_stats(self, device: _DEVICE) -> dict[str, Any]: def auto_device_count() -> int: """Get the devices when set to auto.""" return 1 + + @staticmethod + def is_available() -> bool: + """CPU is always available for execution.""" + return True diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py index 3811512d52aa1..a333f0627e966 100644 --- a/pytorch_lightning/accelerators/gpu.py +++ b/pytorch_lightning/accelerators/gpu.py @@ -33,24 +33,13 @@ class GPUAccelerator(Accelerator): """Accelerator for GPU devices.""" - def __init__(self) -> None: - """ - Raises: - MisconfigurationException: - If torch.cuda isn't available. - If no CUDA devices are found. - """ - if not torch.cuda.is_available(): - raise MisconfigurationException("GPU Accelerator used, but CUDA isn't available.") - if torch.cuda.device_count() == 0: - raise MisconfigurationException("GPU Accelerator used, but found no CUDA devices available.") - def setup_environment(self, root_device: torch.device) -> None: """ Raises: MisconfigurationException: If the selected device is not GPU. """ + super().setup_environment(root_device) if root_device.type != "cuda": raise MisconfigurationException(f"Device should be GPU, got {root_device} instead") torch.cuda.set_device(root_device) @@ -91,6 +80,10 @@ def auto_device_count() -> int: """Get the devices when set to auto.""" return torch.cuda.device_count() + @staticmethod + def is_available() -> bool: + return torch.cuda.is_available() and torch.cuda.device_count > 0 + def get_nvidia_gpu_stats(device: _DEVICE) -> dict[str, float]: """Get GPU stats including memory, fan speed, and temperature from nvidia-smi. diff --git a/pytorch_lightning/accelerators/ipu.py b/pytorch_lightning/accelerators/ipu.py index 155dce5275a9b..6928546cf8c50 100644 --- a/pytorch_lightning/accelerators/ipu.py +++ b/pytorch_lightning/accelerators/ipu.py @@ -16,6 +16,7 @@ import torch from pytorch_lightning.accelerators.accelerator import Accelerator +from pytorch_lightning.utilities import _IPU_AVAILABLE class IPUAccelerator(Accelerator): @@ -31,3 +32,7 @@ def auto_device_count() -> int: # TODO (@kaushikb11): 4 is the minimal unit they are shipped in. # Update this when api is exposed by the Graphcore team. return 4 + + @staticmethod + def is_available() -> bool: + return _IPU_AVAILABLE diff --git a/pytorch_lightning/accelerators/tpu.py b/pytorch_lightning/accelerators/tpu.py index 34c37dcd95e7f..f1f598c3f1b3c 100644 --- a/pytorch_lightning/accelerators/tpu.py +++ b/pytorch_lightning/accelerators/tpu.py @@ -16,7 +16,7 @@ import torch from pytorch_lightning.accelerators.accelerator import Accelerator -from pytorch_lightning.utilities import _XLA_AVAILABLE +from pytorch_lightning.utilities.imports import _TPU_AVAILABLE, _XLA_AVAILABLE if _XLA_AVAILABLE: import torch_xla.core.xla_model as xm @@ -47,3 +47,7 @@ def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]: def auto_device_count() -> int: """Get the devices when set to auto.""" return 8 + + @staticmethod + def is_available() -> bool: + return _TPU_AVAILABLE diff --git a/tests/accelerators/test_cpu.py b/tests/accelerators/test_cpu.py index 28011aa497eaa..ccc56d42316dd 100644 --- a/tests/accelerators/test_cpu.py +++ b/tests/accelerators/test_cpu.py @@ -22,6 +22,10 @@ def test_restore_checkpoint_after_pre_setup_default(): assert not plugin.restore_checkpoint_after_setup +def test_availability(): + assert CPUAccelerator.is_available + + @pytest.mark.parametrize("restore_after_pre_setup", [True, False]) def test_restore_checkpoint_after_pre_setup(tmpdir, restore_after_pre_setup): """Test to ensure that if restore_checkpoint_after_setup is True, then we only load the state after pre- diff --git a/tests/accelerators/test_dp.py b/tests/accelerators/test_dp.py index 208e080fce225..47635f70d567a 100644 --- a/tests/accelerators/test_dp.py +++ b/tests/accelerators/test_dp.py @@ -160,7 +160,6 @@ def _assert_extra_outputs(self, outputs): @mock.patch("torch.cuda.device_count", return_value=2) def test_dp_raise_exception_with_batch_transfer_hooks(tmpdir, mock_is_available, mock_device_count): """Test that an exception is raised when overriding batch_transfer_hooks in DP model.""" - # monkeypatch.setattr("torch.cuda.device_count", lambda: 2) class CustomModel(BoringModel): def transfer_batch_to_device(self, batch, device, dataloader_idx): diff --git a/tests/accelerators/test_gpu.py b/tests/accelerators/test_gpu.py index 110ba1be9a82c..2b1082a3f35e3 100644 --- a/tests/accelerators/test_gpu.py +++ b/tests/accelerators/test_gpu.py @@ -60,3 +60,20 @@ def test_set_cuda_device(set_device_mock, tmpdir): ) trainer.fit(model) set_device_mock.assert_called_once() + + +@RunIf(min_gpus=1) +def test_gpu_availability(): + assert GPUAccelerator.is_available() + + +@mock.patch("torch.cuda.is_available", return_value=True) +@mock.patch("torch.cuda.device_count", return_value=2) +def test_mocked_gpu_available(*_): + assert GPUAccelerator.is_available() + + +@mock.patch("torch.cuda.is_available", return_value=False) +@mock.patch("torch.cuda.device_count", return_value=0) +def test_mocked_gpu_availability(*_): + assert not GPUAccelerator.is_available() diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py index 3a250de38a7a8..861b149733c0c 100644 --- a/tests/accelerators/test_ipu.py +++ b/tests/accelerators/test_ipu.py @@ -106,6 +106,7 @@ def test_fail_if_no_ipus(tmpdir): @RunIf(ipu=True) def test_accelerator_selected(tmpdir): + assert IPUAccelerator.is_available() trainer = Trainer(default_root_dir=tmpdir, ipus=1) assert isinstance(trainer.accelerator, IPUAccelerator) trainer = Trainer(default_root_dir=tmpdir, ipus=1, accelerator="ipu") diff --git a/tests/accelerators/test_tpu.py b/tests/accelerators/test_tpu.py index a4eb26a4bc505..608d98304c757 100644 --- a/tests/accelerators/test_tpu.py +++ b/tests/accelerators/test_tpu.py @@ -83,6 +83,7 @@ def test_if_test_works_after_train(tmpdir): @RunIf(tpu=True) def test_accelerator_tpu(): + assert TPUAccelerator.is_available() trainer = Trainer(accelerator="tpu", tpu_cores=8) From 2002944959572c7f4e51f36540fbd1d671f89078 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 9 Feb 2022 00:40:57 -0800 Subject: [PATCH 15/22] Update test_gpu.py --- tests/accelerators/test_gpu.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/accelerators/test_gpu.py b/tests/accelerators/test_gpu.py index 2b1082a3f35e3..0317ea83004ac 100644 --- a/tests/accelerators/test_gpu.py +++ b/tests/accelerators/test_gpu.py @@ -63,17 +63,17 @@ def test_set_cuda_device(set_device_mock, tmpdir): @RunIf(min_gpus=1) -def test_gpu_availability(): +def test_real_gpu_availability(): assert GPUAccelerator.is_available() @mock.patch("torch.cuda.is_available", return_value=True) @mock.patch("torch.cuda.device_count", return_value=2) -def test_mocked_gpu_available(*_): +def test_gpu_available(*_): assert GPUAccelerator.is_available() @mock.patch("torch.cuda.is_available", return_value=False) @mock.patch("torch.cuda.device_count", return_value=0) -def test_mocked_gpu_availability(*_): +def test_gpu_not_available(*_): assert not GPUAccelerator.is_available() From 9cc7c00848237d928ab21988a2d983e3ba40aa58 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 9 Feb 2022 01:36:47 -0800 Subject: [PATCH 16/22] tests --- tests/accelerators/test_accelerator_connector.py | 4 ++++ tests/accelerators/test_ddp.py | 9 ++++++--- tests/accelerators/test_gpu.py | 14 +------------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 234dc28e1a432..8ceb2de96c59c 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -345,6 +345,10 @@ class Accel(Accelerator): def auto_device_count() -> int: return 1 + @staticmethod + def is_available() -> bool: + return True + class Prec(PrecisionPlugin): pass diff --git a/tests/accelerators/test_ddp.py b/tests/accelerators/test_ddp.py index bc311ad7476f1..b3bcc22afd634 100644 --- a/tests/accelerators/test_ddp.py +++ b/tests/accelerators/test_ddp.py @@ -91,11 +91,14 @@ def test_torch_distributed_backend_env_variables(tmpdir): @RunIf(skip_windows=True) -@mock.patch("torch.cuda.device_count", return_value=1) -@mock.patch("torch.cuda.is_available", return_value=True) @mock.patch("torch.cuda.set_device") +@mock.patch("torch.cuda.is_available", return_value=True) +@mock.patch("torch.cuda.device_count", return_value=1) +@mock.patch("pytorch_lightning.accelerators.gpu.GPUAccelerator.is_available", return_value=True) @mock.patch.dict(os.environ, {"PL_TORCH_DISTRIBUTED_BACKEND": "gloo"}, clear=True) -def test_ddp_torch_dist_is_available_in_setup(mock_set_device, mock_is_available, mock_device_count, tmpdir): +def test_ddp_torch_dist_is_available_in_setup( + mock_set_device, mock_cuda_available, mock_device_count, mock_gpu_is_available, tmpdir +): """Test to ensure torch distributed is available within the setup hook using ddp.""" class TestModel(BoringModel): diff --git a/tests/accelerators/test_gpu.py b/tests/accelerators/test_gpu.py index 0317ea83004ac..dc3d76cd866e7 100644 --- a/tests/accelerators/test_gpu.py +++ b/tests/accelerators/test_gpu.py @@ -63,17 +63,5 @@ def test_set_cuda_device(set_device_mock, tmpdir): @RunIf(min_gpus=1) -def test_real_gpu_availability(): +def test_gpu_availability(): assert GPUAccelerator.is_available() - - -@mock.patch("torch.cuda.is_available", return_value=True) -@mock.patch("torch.cuda.device_count", return_value=2) -def test_gpu_available(*_): - assert GPUAccelerator.is_available() - - -@mock.patch("torch.cuda.is_available", return_value=False) -@mock.patch("torch.cuda.device_count", return_value=0) -def test_gpu_not_available(*_): - assert not GPUAccelerator.is_available() From c1ff5ab438510a9c1de34894ff107f1dcc4e09a2 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 9 Feb 2022 08:47:06 -0800 Subject: [PATCH 17/22] Update test_dp.py --- tests/accelerators/test_dp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/accelerators/test_dp.py b/tests/accelerators/test_dp.py index 47635f70d567a..0b50d0740da7b 100644 --- a/tests/accelerators/test_dp.py +++ b/tests/accelerators/test_dp.py @@ -158,7 +158,7 @@ def _assert_extra_outputs(self, outputs): @mock.patch("torch.cuda.is_available", return_value=True) @mock.patch("torch.cuda.device_count", return_value=2) -def test_dp_raise_exception_with_batch_transfer_hooks(tmpdir, mock_is_available, mock_device_count): +def test_dp_raise_exception_with_batch_transfer_hooks(mock_is_available, mock_device_count, tmpdir): """Test that an exception is raised when overriding batch_transfer_hooks in DP model.""" class CustomModel(BoringModel): From 7a3702b6b8933651a4b1e58bcecbb23ad5645416 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 9 Feb 2022 09:27:51 -0800 Subject: [PATCH 18/22] Update gpu.py --- pytorch_lightning/accelerators/gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py index a333f0627e966..60869a3028908 100644 --- a/pytorch_lightning/accelerators/gpu.py +++ b/pytorch_lightning/accelerators/gpu.py @@ -82,7 +82,7 @@ def auto_device_count() -> int: @staticmethod def is_available() -> bool: - return torch.cuda.is_available() and torch.cuda.device_count > 0 + return torch.cuda.is_available() and torch.cuda.device_count() > 0 def get_nvidia_gpu_stats(device: _DEVICE) -> dict[str, float]: From 946ad8a104f9218a90562a12ae4f76bea5c6387b Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 9 Feb 2022 09:54:31 -0800 Subject: [PATCH 19/22] Update gpu.py --- pytorch_lightning/accelerators/gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py index 60869a3028908..aa8b0d56dbf63 100644 --- a/pytorch_lightning/accelerators/gpu.py +++ b/pytorch_lightning/accelerators/gpu.py @@ -82,7 +82,7 @@ def auto_device_count() -> int: @staticmethod def is_available() -> bool: - return torch.cuda.is_available() and torch.cuda.device_count() > 0 + return torch.cuda.device_count() > 0 def get_nvidia_gpu_stats(device: _DEVICE) -> dict[str, float]: From e2d4432611e388a283e9b176603bbe50157d2dcb Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 9 Feb 2022 10:49:39 -0800 Subject: [PATCH 20/22] address comments --- tests/accelerators/test_cpu.py | 2 +- tests/accelerators/test_ddp.py | 2 +- tests/accelerators/test_dp.py | 2 +- tests/plugins/test_amp_plugins.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/accelerators/test_cpu.py b/tests/accelerators/test_cpu.py index ccc56d42316dd..bb3ebfe487fd9 100644 --- a/tests/accelerators/test_cpu.py +++ b/tests/accelerators/test_cpu.py @@ -23,7 +23,7 @@ def test_restore_checkpoint_after_pre_setup_default(): def test_availability(): - assert CPUAccelerator.is_available + assert CPUAccelerator.is_available() @pytest.mark.parametrize("restore_after_pre_setup", [True, False]) diff --git a/tests/accelerators/test_ddp.py b/tests/accelerators/test_ddp.py index b3bcc22afd634..342b001abf92c 100644 --- a/tests/accelerators/test_ddp.py +++ b/tests/accelerators/test_ddp.py @@ -97,7 +97,7 @@ def test_torch_distributed_backend_env_variables(tmpdir): @mock.patch("pytorch_lightning.accelerators.gpu.GPUAccelerator.is_available", return_value=True) @mock.patch.dict(os.environ, {"PL_TORCH_DISTRIBUTED_BACKEND": "gloo"}, clear=True) def test_ddp_torch_dist_is_available_in_setup( - mock_set_device, mock_cuda_available, mock_device_count, mock_gpu_is_available, tmpdir + mock_gpu_is_available, mock_device_count, mock_cuda_available, mock_set_device, tmpdir ): """Test to ensure torch distributed is available within the setup hook using ddp.""" diff --git a/tests/accelerators/test_dp.py b/tests/accelerators/test_dp.py index 0b50d0740da7b..9173db2644d77 100644 --- a/tests/accelerators/test_dp.py +++ b/tests/accelerators/test_dp.py @@ -156,8 +156,8 @@ def _assert_extra_outputs(self, outputs): assert out.dtype is torch.float -@mock.patch("torch.cuda.is_available", return_value=True) @mock.patch("torch.cuda.device_count", return_value=2) +@mock.patch("torch.cuda.is_available", return_value=True) def test_dp_raise_exception_with_batch_transfer_hooks(mock_is_available, mock_device_count, tmpdir): """Test that an exception is raised when overriding batch_transfer_hooks in DP model.""" diff --git a/tests/plugins/test_amp_plugins.py b/tests/plugins/test_amp_plugins.py index f79529224b735..f3a5504f398ed 100644 --- a/tests/plugins/test_amp_plugins.py +++ b/tests/plugins/test_amp_plugins.py @@ -57,7 +57,7 @@ class MyApexPlugin(ApexMixedPrecisionPlugin): pytest.param("apex", True, MyApexPlugin, marks=RunIf(amp_apex=True)), ], ) -def test_amp_apex_ddp(mocked_is_available, mocked_device_count, strategy, gpus, amp, custom_plugin, plugin_cls): +def test_amp_apex_ddp(mocked_device_count, mocked_is_available, strategy, gpus, amp, custom_plugin, plugin_cls): plugin = None if custom_plugin: plugin = plugin_cls(16, "cpu") if amp == "native" else plugin_cls() From d6e324849feae3b4d48ae1735bf2b654d62925db Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 9 Feb 2022 13:36:09 -0800 Subject: [PATCH 21/22] Update accelerator.py --- pytorch_lightning/accelerators/accelerator.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 0937cb842ff2e..06f82fb8d4b96 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -35,13 +35,7 @@ def setup_environment(self, root_device: torch.device) -> None: This is called before the LightningModule/DataModule setup hook which allows the user to access the accelerator environment before setup is complete. - - Raises: - RuntimeError: - If corresponding hardware is not found. """ - if not self.is_available(): - raise RuntimeError(f"{self.__class__.__qualname__} is not configured to run on this hardware.") def setup(self, trainer: "pl.Trainer") -> None: """Setup plugins for the trainer fit and creates optimizers. From 2a111dc22cdacf47a000eeb9aac9d0a742325fe3 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 9 Feb 2022 13:58:18 -0800 Subject: [PATCH 22/22] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab35a28d14766..d98dbc4cde2db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -101,7 +101,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added a `_Stateful` support for `LightningDataModule` ([#11637](https://github.com/PyTorchLightning/pytorch-lightning/pull/11637)) -- Added `Accelerator.is_available` to assert device availability ([#11797](https://github.com/PyTorchLightning/pytorch-lightning/pull/11797)) +- Added `Accelerator.is_available` to check device availability ([#11797](https://github.com/PyTorchLightning/pytorch-lightning/pull/11797)) ### Changed