Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
7e12ea2
Rename GPUAccelerator to CUDAAccelerator
rohitgr7 Jul 19, 2022
d630a2c
Add back GPUAccelerator and deprecate it
rohitgr7 Jul 19, 2022
94b68ec
Remove temporary registration
justusschock Jul 19, 2022
53b6b08
Merge branch 'master' into merge_different_gpus
justusschock Jul 20, 2022
c145755
accelerator connector reroute
justusschock Jul 20, 2022
953d551
accelerator_connector tests
justusschock Jul 20, 2022
7d443cf
update enums
justusschock Jul 20, 2022
729a8bc
lite support + tests
justusschock Jul 20, 2022
a170ae5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 20, 2022
7ddc024
typo
justusschock Jul 20, 2022
2575c01
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 20, 2022
708b4b4
move "gpu" support up before actual accelerator flag checks
justusschock Jul 21, 2022
315fd05
Stupid arguments
justusschock Jul 21, 2022
d7365ff
fix tests
justusschock Jul 21, 2022
50bcbde
change exception type
justusschock Jul 21, 2022
b0f18f2
fix registry test
justusschock Jul 21, 2022
7fe75bf
pre-commit
justusschock Jul 21, 2022
c84cbe9
CI: debug HPU flow (#13419)
Borda Jul 20, 2022
c1a13b2
Update typing-extensions requirement from <4.2.1,>=4.0.0 to >=4.0.0,<…
dependabot[bot] Jul 20, 2022
039d3dd
[pre-commit.ci] pre-commit suggestions (#13540)
pre-commit-ci[bot] Jul 20, 2022
bb15521
[FIX] Native FSDP precision + tests (#12985)
Jul 20, 2022
a49e8c5
Simplify fetching's loader types (#13111)
carmocca Jul 20, 2022
3129d97
Include app templates to the lightning and app packages (#13731)
manskx Jul 20, 2022
74ab878
Fix mypy typing errors in pytorch_lightning/callbacks/model_checkpoin…
Jungwon-Lee Jul 20, 2022
588e831
Fix typos initialize in docs (#13557)
NathanielDamours Jul 20, 2022
94cb590
Fix main progress bar counter when `val_check_interval=int` and `chec…
rohitgr7 Jul 20, 2022
054bf13
Fix mypy errors attributed to `pytorch_lightning.loggers.tensorboard.…
jxtngx Jul 21, 2022
e36fd77
Fix mypy errors attributed to `pytorch_lightning.loggers.mlflow` (#13…
jxtngx Jul 21, 2022
86341ba
fix mypy errors for loggers/wandb.py (#13483)
gautierdag Jul 21, 2022
2181a16
Fix gatekeeper minimum check (#13769)
carmocca Jul 21, 2022
3565ce2
Merge branch 'master' into merge_different_gpus
justusschock Jul 21, 2022
9f257e5
changelog
justusschock Jul 21, 2022
323271c
changelog
justusschock Jul 21, 2022
90c996b
fix order
justusschock Jul 22, 2022
a0c76b9
move up again
justusschock Jul 22, 2022
c9dc306
add missing test
justusschock Jul 25, 2022
92de866
Merge branch 'master' into merge_different_gpus
justusschock Jul 25, 2022
fe66ab3
fix pickling issue
awaelchli Jul 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/pytorch_lightning/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Changed

- `accelerator="gpu"` now automatically selects an available GPU backend (CUDA and MPS currently) ([#13642](https://github.com/Lightning-AI/lightning/pull/13642))


- Enable validation during overfitting ([#12527](https://github.com/PyTorchLightning/pytorch-lightning/pull/12527))


Expand Down Expand Up @@ -166,6 +169,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Deprecated

- Deprecated `pytorch_lightning.accelerators.gpu.GPUAccelerator` in favor of `pytorch_lightning.accelerators.cuda.CUDAAccelerator` ([#13636](https://github.com/Lightning-AI/lightning/pull/13636))


- Deprecated `pytorch_lightning.loggers.base.LightningLoggerBase` in favor of `pytorch_lightning.loggers.logger.Logger`, and deprecated `pytorch_lightning.loggers.base` in favor of `pytorch_lightning.loggers.logger` ([#120148](https://github.com/PyTorchLightning/pytorch-lightning/pull/12014))


Expand Down
6 changes: 0 additions & 6 deletions src/pytorch_lightning/accelerators/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,6 @@ def register_accelerators(cls, accelerator_registry: Dict) -> None:
cls,
description=f"{cls.__class__.__name__}",
)
# temporarily enable "gpu" to point to the CUDA Accelerator
accelerator_registry.register(
"gpu",
cls,
description=f"{cls.__class__.__name__}",
)

def teardown(self) -> None:
# clean up memory
Expand Down
7 changes: 4 additions & 3 deletions src/pytorch_lightning/lite/lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ class LightningLite(ABC):
- Multi-node support.

Args:
accelerator: The hardware to run on. Possible choices are: ``"cpu"``, ``"gpu"``, ``"tpu"``, ``"auto"``.
accelerator: The hardware to run on. Possible choices are:
``"cpu"``, ``"cuda"``, ``"mps"``, ``"gpu"``, ``"tpu"``, ``"auto"``.
strategy: Strategy for how to run across multiple devices. Possible choices are:
``"dp"``, ``"ddp"``, ``"ddp_spawn"``, ``"deepspeed"``, ``"ddp_sharded"``.
devices: Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
Expand Down Expand Up @@ -436,7 +437,7 @@ def _get_distributed_sampler(dataloader: DataLoader, **kwargs: Any) -> Distribut
return DistributedSamplerWrapper(dataloader.sampler, **kwargs)

def _check_accelerator_support(self, accelerator: Optional[Union[str, Accelerator]]) -> None:
supported = [t.value.lower() for t in self._supported_device_types()] + ["auto"]
supported = [t.value.lower() for t in self._supported_device_types()] + ["gpu", "auto"]
valid = accelerator is None or isinstance(accelerator, Accelerator) or accelerator in supported
if not valid:
raise MisconfigurationException(
Expand All @@ -457,7 +458,7 @@ def _check_strategy_support(self, strategy: Optional[Union[str, Strategy]]) -> N
def _supported_device_types() -> Sequence[_AcceleratorType]:
return (
_AcceleratorType.CPU,
_AcceleratorType.GPU,
_AcceleratorType.CUDA,
_AcceleratorType.TPU,
_AcceleratorType.MPS,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,14 @@ def __init__(
devices=devices, num_nodes=num_nodes, num_processes=num_processes, gpus=gpus, ipus=ipus, tpu_cores=tpu_cores
)
# 2. Instantiate Accelerator
# handle `auto` and `None`
self._set_accelerator_if_ipu_strategy_is_passed()

# handle `auto`, `None` and `gpu`
if self._accelerator_flag == "auto" or self._accelerator_flag is None:
self._accelerator_flag = self._choose_accelerator()
self._accelerator_flag = self._choose_auto_accelerator()
elif self._accelerator_flag == "gpu":
self._accelerator_flag = self._choose_gpu_accelerator_backend()

self._set_parallel_devices_and_init_accelerator()

# 3. Instantiate ClusterEnvironment
Expand Down Expand Up @@ -280,7 +284,7 @@ def _check_config_and_set_final_flags(
if (
accelerator is not None
and accelerator not in self._accelerator_types
and accelerator != "auto"
and accelerator not in ("auto", "gpu")
and not isinstance(accelerator, Accelerator)
):
raise ValueError(
Expand Down Expand Up @@ -487,7 +491,7 @@ def _set_accelerator_if_ipu_strategy_is_passed(self) -> None:
if isinstance(self._strategy_flag, IPUStrategy):
self._accelerator_flag = "ipu"

def _choose_accelerator(self) -> str:
def _choose_auto_accelerator(self) -> str:
"""Choose the accelerator type (str) based on availability when ``accelerator='auto'``."""
if self._accelerator_flag == "auto":
if _TPU_AVAILABLE:
Expand All @@ -502,6 +506,15 @@ def _choose_accelerator(self) -> str:
return "cuda"
return "cpu"

@staticmethod
def _choose_gpu_accelerator_backend() -> str:
if MPSAccelerator.is_available():
return "mps"
if CUDAAccelerator.is_available():
return "cuda"

raise MisconfigurationException("No supported gpu backend found!")

def _set_parallel_devices_and_init_accelerator(self) -> None:
if isinstance(self._accelerator_flag, Accelerator):
self.accelerator: Accelerator = self._accelerator_flag
Expand Down
4 changes: 2 additions & 2 deletions src/pytorch_lightning/utilities/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,15 +244,15 @@ class _AcceleratorType(LightningEnum):
>>> _AcceleratorType.CPU == _AcceleratorType.from_str('cpu')
True
>>> # you can match the type with string
>>> _AcceleratorType.GPU == 'GPU'
>>> _AcceleratorType.CUDA == 'CUDA'
True
>>> # which is case invariant
>>> _AcceleratorType.TPU in ('tpu', 'CPU')
True
"""

CPU = "CPU"
GPU = "GPU"
CUDA = "CUDA"
IPU = "IPU"
TPU = "TPU"
HPU = "HPU"
Expand Down
44 changes: 41 additions & 3 deletions tests/tests_pytorch/accelerators/test_accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def test_accelerator_cpu(_):
MisconfigurationException,
match="CUDAAccelerator can not run on your system since the accelerator is not available.",
):
Trainer(accelerator="gpu")
Trainer(accelerator="cuda")

with pytest.deprecated_call(match=r"is deprecated in v1.7 and will be removed"):
Trainer(accelerator="cpu", gpus=1)
Expand Down Expand Up @@ -671,7 +671,7 @@ def test_devices_auto_choice_mps():

@pytest.mark.parametrize(
["parallel_devices", "accelerator"],
[([torch.device("cpu")], "gpu"), ([torch.device("cuda", i) for i in range(8)], ("tpu"))],
[([torch.device("cpu")], "cuda"), ([torch.device("cuda", i) for i in range(8)], ("tpu"))],
)
def test_parallel_devices_in_strategy_confilict_with_accelerator(parallel_devices, accelerator):
with pytest.raises(MisconfigurationException, match=r"parallel_devices set through"):
Expand Down Expand Up @@ -746,13 +746,51 @@ def test_plugin_only_one_instance_for_one_type(plugins, expected):
Trainer(plugins=plugins)


@pytest.mark.parametrize("accelerator", ("cpu", "gpu", "tpu", "ipu"))
@pytest.mark.parametrize("accelerator", ("cpu", "cuda", "mps", "tpu", "ipu"))
@pytest.mark.parametrize("devices", ("0", 0, []))
def test_passing_zero_and_empty_list_to_devices_flag(accelerator, devices):
with pytest.raises(MisconfigurationException, match="value is not a valid input using"):
Trainer(accelerator=accelerator, devices=devices)


@pytest.mark.parametrize(
"expected_accelerator_flag,expected_accelerator_class",
[
pytest.param("cuda", CUDAAccelerator, marks=RunIf(min_cuda_gpus=1)),
pytest.param("mps", MPSAccelerator, marks=RunIf(mps=True)),
],
)
def test_gpu_accelerator_backend_choice(expected_accelerator_flag, expected_accelerator_class):

trainer = Trainer(accelerator="gpu")
assert trainer._accelerator_connector._accelerator_flag == expected_accelerator_flag
assert isinstance(trainer.accelerator, expected_accelerator_class)


@mock.patch("pytorch_lightning.utilities.device_parser.num_cuda_devices", return_value=1)
def test_gpu_accelerator_backend_choice_cuda(_):
trainer = Trainer(accelerator="gpu")

assert trainer._accelerator_connector._accelerator_flag == "cuda"
assert isinstance(trainer.accelerator, CUDAAccelerator)


@mock.patch("pytorch_lightning.accelerators.mps._MPS_AVAILABLE", return_value=True)
@mock.patch("torch.device", return_value="mps") # necessary because torch doesn't allow creation of mps devices
def test_gpu_accelerator_backend_choice_mps(*_):
trainer = Trainer(accelerator="gpu")

assert trainer._accelerator_connector._accelerator_flag == "mps"
assert isinstance(trainer.accelerator, MPSAccelerator)


@mock.patch("pytorch_lightning.accelerators.mps.MPSAccelerator.is_available", return_value=False)
@mock.patch("pytorch_lightning.accelerators.cuda.CUDAAccelerator.is_available", return_value=False)
def test_gpu_accelerator_misconfiguration_exception(*_):
with pytest.raises(MisconfigurationException, match="No supported gpu backend found!"):
Trainer(accelerator="gpu")


@mock.patch("pytorch_lightning.accelerators.hpu.HPUAccelerator.is_available", return_value=True)
@mock.patch("pytorch_lightning.strategies.hpu_parallel._HPU_AVAILABLE", return_value=True)
@mock.patch("pytorch_lightning.plugins.precision.hpu._HPU_AVAILABLE", return_value=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,4 @@ def is_available():


def test_available_accelerators_in_registry():
assert AcceleratorRegistry.available_accelerators() == ["cpu", "cuda", "gpu", "hpu", "ipu", "mps", "tpu"]
assert AcceleratorRegistry.available_accelerators() == ["cpu", "cuda", "hpu", "ipu", "mps", "tpu"]
2 changes: 2 additions & 0 deletions tests/tests_pytorch/lite/test_lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,11 @@ def test_setup_dataloaders_replace_standard_sampler(shuffle, strategy):
"accelerator, expected",
[
("cpu", "cpu"),
pytest.param("cuda", "cuda:0", marks=RunIf(min_cuda_gpus=1)),
pytest.param("gpu", "cuda:0", marks=RunIf(min_cuda_gpus=1)),
pytest.param("tpu", "xla:0", marks=RunIf(tpu=True)),
pytest.param("mps", "mps:0", marks=RunIf(mps=True)),
pytest.param("gpu", "mps:0", marks=RunIf(mps=True)),
],
)
def test_to_device(accelerator, expected):
Expand Down