Skip to content

Commit a6d1246

Browse files
awaelchlirohitgr7
andcommitted
Fix validation when accelerator is a string (#13417)
Co-authored-by: Rohit Gupta <[email protected]>
1 parent 9e0cc17 commit a6d1246

File tree

3 files changed

+16
-247
lines changed

3 files changed

+16
-247
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
247247
- Fixed Model Summary when using DeepSpeed Stage 3 ([#13427](https://github.com/PyTorchLightning/pytorch-lightning/pull/13427))
248248
- Fixed `pytorch_lightning.utilities.distributed.gather_all_tensors` to handle tensors of different dimensions ([#12630](https://github.com/PyTorchLightning/pytorch-lightning/pull/12630))
249249
- The loops now call `.set_epoch()` also on batch samplers if the dataloader has one wrapped in a distributed sampler ([#13396](https://github.com/PyTorchLightning/pytorch-lightning/pull/13396))
250+
- Fixed the input validation for the accelerator Trainer argument when passed as a string ([#13417](https://github.com/PyTorchLightning/pytorch-lightning/pull/13417))
250251

251252

252253
## [1.6.4] - 2022-06-01

pytorch_lightning/trainer/connectors/accelerator_connector.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -290,17 +290,18 @@ def _check_config_and_set_final_flags(
290290
f" and you can only specify one strategy, but you have passed {plugin} as a plugin."
291291
)
292292

293-
if accelerator is not None:
294-
if accelerator in self._accelerator_types or accelerator == "auto" or isinstance(accelerator, Accelerator):
295-
self._accelerator_flag = accelerator
296-
elif accelerator in self._registered_strategies or isinstance(accelerator, Strategy):
297-
rank_zero_deprecation(
298-
f"Passing `Trainer(accelerator={accelerator!r})` has been deprecated"
299-
f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
300-
)
301-
self._strategy_flag = accelerator
302-
elif accelerator == "ddp_cpu" and not self._strategy_flag:
303-
self._strategy_flag = accelerator
293+
if (
294+
accelerator is not None
295+
and accelerator not in self._accelerator_types
296+
and accelerator != "auto"
297+
and not isinstance(accelerator, Accelerator)
298+
):
299+
raise ValueError(
300+
f"You selected an invalid accelerator name: `accelerator={accelerator!r}`."
301+
f" Available names are: {', '.join(self._accelerator_types)}."
302+
)
303+
304+
self._accelerator_flag = accelerator
304305

305306
if precision is not None:
306307
if str(precision) not in self._precision_types:
@@ -504,12 +505,6 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
504505
self.accelerator: Accelerator = self._accelerator_flag
505506
else:
506507
assert self._accelerator_flag is not None
507-
self._accelerator_flag = self._accelerator_flag.lower()
508-
if self._accelerator_flag not in AcceleratorRegistry:
509-
raise MisconfigurationException(
510-
"When passing string value for the `accelerator` argument of `Trainer`,"
511-
f" it can only be one of {self._accelerator_types}."
512-
)
513508
self.accelerator = AcceleratorRegistry.get(self._accelerator_flag)
514509

515510
if not self.accelerator.is_available():

tests/accelerators/test_accelerator_connector.py

Lines changed: 3 additions & 230 deletions
Original file line numberDiff line numberDiff line change
@@ -56,229 +56,9 @@ def test_accelerator_choice_cpu(tmpdir):
5656
assert isinstance(trainer.strategy, SingleDeviceStrategy)
5757

5858

59-
@pytest.mark.parametrize(("devices", "num_nodes"), ([(1, 1), (1, 2), (2, 1), (2, 2)]))
60-
def test_accelerator_choice_ddp_cpu(tmpdir, devices: int, num_nodes: int):
61-
trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", devices=devices, num_nodes=num_nodes)
62-
assert isinstance(trainer.accelerator, CPUAccelerator)
63-
no_spawn = devices == 1 and num_nodes > 1
64-
assert isinstance(trainer.strategy, DDPStrategy if no_spawn else DDPSpawnStrategy)
65-
assert isinstance(trainer.strategy.cluster_environment, LightningEnvironment)
66-
67-
68-
@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"})
69-
@mock.patch("torch.cuda.device_count", return_value=2)
70-
@mock.patch("torch.cuda.is_available", return_value=True)
71-
def test_accelerator_choice_ddp(cuda_available_mock, device_count_mock):
72-
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated"):
73-
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1)
74-
assert isinstance(trainer.accelerator, GPUAccelerator)
75-
assert isinstance(trainer.strategy, DDPStrategy)
76-
assert isinstance(trainer.strategy.cluster_environment, LightningEnvironment)
77-
78-
79-
@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"})
80-
@mock.patch("torch.cuda.device_count", return_value=2)
81-
@mock.patch("torch.cuda.is_available", return_value=True)
82-
def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
83-
with pytest.deprecated_call(match=r"accelerator='ddp_spawn'\)` has been deprecated"):
84-
trainer = Trainer(fast_dev_run=True, accelerator="ddp_spawn", gpus=1)
85-
assert isinstance(trainer.accelerator, GPUAccelerator)
86-
assert isinstance(trainer.strategy, DDPSpawnStrategy)
87-
assert isinstance(trainer.strategy.cluster_environment, LightningEnvironment)
88-
89-
90-
@mock.patch.dict(
91-
os.environ,
92-
{
93-
"CUDA_VISIBLE_DEVICES": "0,1",
94-
"SLURM_NTASKS": "2",
95-
"SLURM_JOB_NAME": "SOME_NAME",
96-
"SLURM_NODEID": "0",
97-
"SLURM_PROCID": "1",
98-
"SLURM_LOCALID": "1",
99-
},
100-
)
101-
@mock.patch("torch.cuda.set_device")
102-
@mock.patch("torch.cuda.device_count", return_value=2)
103-
@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True)
104-
@mock.patch("torch.cuda.is_available", return_value=True)
105-
def test_accelerator_choice_ddp_slurm(*_):
106-
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
107-
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2)
108-
assert trainer._accelerator_connector._is_slurm_managing_tasks()
109-
assert isinstance(trainer.accelerator, GPUAccelerator)
110-
assert isinstance(trainer.strategy, DDPStrategy)
111-
assert isinstance(trainer.strategy.cluster_environment, SLURMEnvironment)
112-
assert trainer.strategy.cluster_environment.local_rank() == 1
113-
assert trainer.strategy.local_rank == 1
114-
115-
116-
@mock.patch.dict(
117-
os.environ,
118-
{
119-
"CUDA_VISIBLE_DEVICES": "0,1",
120-
"SLURM_NTASKS": "2",
121-
"SLURM_JOB_NAME": "SOME_NAME",
122-
"SLURM_NODEID": "0",
123-
"SLURM_PROCID": "1",
124-
"SLURM_LOCALID": "1",
125-
},
126-
)
127-
@mock.patch("torch.cuda.set_device")
128-
@mock.patch("torch.cuda.device_count", return_value=2)
129-
@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True)
130-
@mock.patch("torch.cuda.is_available", return_value=True)
131-
def test_accelerator_choice_ddp2_slurm(*_):
132-
with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
133-
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2)
134-
assert trainer._accelerator_connector._is_slurm_managing_tasks()
135-
assert isinstance(trainer.accelerator, GPUAccelerator)
136-
assert isinstance(trainer.strategy, DDP2Strategy)
137-
assert isinstance(trainer.strategy.cluster_environment, SLURMEnvironment)
138-
assert trainer.strategy.cluster_environment.local_rank() == 1
139-
assert trainer.strategy.local_rank == 1
140-
141-
142-
@mock.patch.dict(
143-
os.environ,
144-
{
145-
"CUDA_VISIBLE_DEVICES": "0,1",
146-
"WORLD_SIZE": "2",
147-
"LOCAL_WORLD_SIZE": "2",
148-
"RANK": "1",
149-
"LOCAL_RANK": "1",
150-
"GROUP_RANK": "0",
151-
"TORCHELASTIC_RUN_ID": "1", # present for torch >= 1.9.1
152-
},
153-
)
154-
@mock.patch("torch.cuda.set_device")
155-
@mock.patch("torch.cuda.device_count", return_value=1)
156-
@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True)
157-
@mock.patch("torch.cuda.is_available", return_value=True)
158-
def test_accelerator_choice_ddp_te(*_):
159-
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
160-
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2)
161-
assert isinstance(trainer.accelerator, GPUAccelerator)
162-
assert isinstance(trainer.strategy, DDPStrategy)
163-
assert isinstance(trainer.strategy.cluster_environment, TorchElasticEnvironment)
164-
assert trainer.strategy.cluster_environment.local_rank() == 1
165-
assert trainer.strategy.local_rank == 1
166-
167-
168-
@mock.patch.dict(
169-
os.environ,
170-
{
171-
"CUDA_VISIBLE_DEVICES": "0,1",
172-
"WORLD_SIZE": "2",
173-
"LOCAL_WORLD_SIZE": "2",
174-
"RANK": "1",
175-
"LOCAL_RANK": "1",
176-
"GROUP_RANK": "0",
177-
"TORCHELASTIC_RUN_ID": "1",
178-
},
179-
)
180-
@mock.patch("torch.cuda.set_device")
181-
@mock.patch("torch.cuda.device_count", return_value=1)
182-
@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True)
183-
@mock.patch("torch.cuda.is_available", return_value=True)
184-
def test_accelerator_choice_ddp2_te(*_):
185-
with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
186-
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2)
187-
assert isinstance(trainer.accelerator, GPUAccelerator)
188-
assert isinstance(trainer.strategy, DDP2Strategy)
189-
assert isinstance(trainer.strategy.cluster_environment, TorchElasticEnvironment)
190-
assert trainer.strategy.cluster_environment.local_rank() == 1
191-
assert trainer.strategy.local_rank == 1
192-
193-
194-
@mock.patch.dict(
195-
os.environ,
196-
{
197-
"WORLD_SIZE": "2",
198-
"LOCAL_WORLD_SIZE": "2",
199-
"RANK": "1",
200-
"LOCAL_RANK": "1",
201-
"GROUP_RANK": "0",
202-
"TORCHELASTIC_RUN_ID": "1",
203-
},
204-
)
205-
@mock.patch("torch.cuda.device_count", return_value=0)
206-
@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True)
207-
def test_accelerator_choice_ddp_cpu_te(*_):
208-
trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", devices=2)
209-
assert isinstance(trainer.accelerator, CPUAccelerator)
210-
assert isinstance(trainer.strategy, DDPStrategy)
211-
assert isinstance(trainer.strategy.cluster_environment, TorchElasticEnvironment)
212-
assert trainer.strategy.cluster_environment.local_rank() == 1
213-
assert trainer.strategy.local_rank == 1
214-
215-
216-
@mock.patch.dict(
217-
os.environ,
218-
{
219-
"CUDA_VISIBLE_DEVICES": "0",
220-
"KUBERNETES_PORT": "tcp://127.0.0.1:443",
221-
"MASTER_ADDR": "1.2.3.4",
222-
"MASTER_PORT": "500",
223-
"WORLD_SIZE": "20",
224-
"RANK": "1",
225-
},
226-
)
227-
@mock.patch("torch.cuda.set_device")
228-
@mock.patch("torch.cuda.device_count", return_value=1)
229-
@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True)
230-
@mock.patch("torch.cuda.is_available", return_value=True)
231-
def test_accelerator_choice_ddp_kubeflow(*_):
232-
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
233-
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1)
234-
assert isinstance(trainer.accelerator, GPUAccelerator)
235-
assert isinstance(trainer.strategy, DDPStrategy)
236-
assert isinstance(trainer.strategy.cluster_environment, KubeflowEnvironment)
237-
assert trainer.strategy.cluster_environment.local_rank() == 0
238-
assert trainer.strategy.local_rank == 0
239-
240-
241-
@mock.patch.dict(
242-
os.environ,
243-
{
244-
"KUBERNETES_PORT": "tcp://127.0.0.1:443",
245-
"MASTER_ADDR": "1.2.3.4",
246-
"MASTER_PORT": "500",
247-
"WORLD_SIZE": "20",
248-
"RANK": "1",
249-
},
250-
)
251-
@mock.patch("torch.cuda.device_count", return_value=0)
252-
@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True)
253-
def test_accelerator_choice_ddp_cpu_kubeflow(*_):
254-
trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", devices=1)
255-
assert isinstance(trainer.accelerator, CPUAccelerator)
256-
assert isinstance(trainer.strategy, DDPStrategy)
257-
assert isinstance(trainer.strategy.cluster_environment, KubeflowEnvironment)
258-
assert trainer.strategy.cluster_environment.local_rank() == 0
259-
assert trainer.strategy.local_rank == 0
260-
261-
262-
@mock.patch.dict(
263-
os.environ,
264-
{
265-
"SLURM_NTASKS": "2",
266-
"SLURM_JOB_NAME": "SOME_NAME",
267-
"SLURM_NODEID": "0",
268-
"LOCAL_RANK": "0",
269-
"SLURM_PROCID": "0",
270-
"SLURM_LOCALID": "0",
271-
},
272-
)
273-
@mock.patch("torch.cuda.device_count", return_value=0)
274-
@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True)
275-
def test_accelerator_choice_ddp_cpu_slurm(*_):
276-
trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", devices=2)
277-
assert trainer._accelerator_connector._is_slurm_managing_tasks()
278-
assert isinstance(trainer.accelerator, CPUAccelerator)
279-
assert isinstance(trainer.strategy, DDPStrategy)
280-
assert isinstance(trainer.strategy.cluster_environment, SLURMEnvironment)
281-
assert trainer.strategy.local_rank == 0
59+
def test_accelerator_invalid_choice():
60+
with pytest.raises(ValueError, match="You selected an invalid accelerator name: `accelerator='invalid'`"):
61+
Trainer(accelerator="invalid")
28262

28363

28464
@RunIf(skip_windows=True, standalone=True)
@@ -551,13 +331,6 @@ def test_accelerator_auto_with_devices_gpu():
551331
assert trainer.num_devices == 1
552332

553333

554-
def test_validate_accelerator_and_devices():
555-
556-
trainer = Trainer(accelerator="ddp_cpu", devices=2)
557-
assert isinstance(trainer.accelerator, CPUAccelerator)
558-
assert trainer.num_devices == 2
559-
560-
561334
def test_set_devices_if_none_cpu():
562335

563336
trainer = Trainer(accelerator="cpu", devices=3)

0 commit comments

Comments
 (0)