Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Added `state_id` property to the `Callback` base class ([#6886](https://github.com/PyTorchLightning/pytorch-lightning/pull/6886))


- Added check for unique GPU ids ([#8666](https://github.com/PyTorchLightning/pytorch-lightning/pull/8666))


- Added `ResultCollection` state_dict to Loop `state_dict` and support for distributed reload. ([#8641](https://github.com/PyTorchLightning/pytorch-lightning/pull/8641))


Expand Down
21 changes: 20 additions & 1 deletion pytorch_lightning/utilities/device_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def parse_gpu_ids(gpus: Optional[Union[int, str, List[int]]]) -> Optional[List[i

Args:
gpus: An int -1 or string '-1' indicate that all available GPUs should be used.
A list of ints or a string containing list of comma separated integers
A list of unique ints or a string containing list of comma separated unique integers
indicates specific GPUs to use.
An int 0 means that no GPUs should be used.
Any int N > 0 indicates that GPUs [0..N) should be used.
Expand Down Expand Up @@ -88,6 +88,10 @@ def parse_gpu_ids(gpus: Optional[Union[int, str, List[int]]]) -> Optional[List[i
if TorchElasticEnvironment.is_using_torchelastic() and len(gpus) != 1 and len(_get_all_available_gpus()) == 1:
# omit sanity check on torchelastic as by default shows one visible GPU per process
return gpus

# Check that gpus are unique. Duplicate gpus are not supported by the backend.
_check_unique(gpus)

return _sanitize_gpu_ids(gpus)


Expand Down Expand Up @@ -188,6 +192,21 @@ def _get_all_available_gpus() -> List[int]:
return list(range(torch.cuda.device_count()))


def _check_unique(device_ids: List[int]) -> None:
"""
Checks that the device_ids are unique.

Args:
device_ids: list of ints corresponding to gpus indices

Raises:
MisconfigurationException:
If ``device_ids`` of GPUs aren't unique
"""
if len(device_ids) != len(set(device_ids)):
raise MisconfigurationException("Device ID's (GPU) must be unique.")


def _check_data_type(device_ids: Any) -> None:
"""
Checks that the device_ids argument is one of: None, Int, String or List.
Expand Down
1 change: 1 addition & 0 deletions tests/models/test_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ def test_parse_gpu_ids(mocked_device_count, gpus, expected_gpu_ids):
pytest.param([-1]),
pytest.param([None]),
pytest.param(["0"]),
pytest.param([0, 0]),
],
)
def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, gpus):
Expand Down