diff --git a/CHANGELOG.md b/CHANGELOG.md index c70b3477befa8..dc6ded46af4e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -234,6 +234,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed `MisconfigurationException` to `ModuleNotFoundError` when `rich` isn't available ([#11360](https://github.com/PyTorchLightning/pytorch-lightning/pull/11360)) +- Changed `parallel_devices` property in `ParallelStrategy` to be lazy initialized ([#11572](https://github.com/PyTorchLightning/pytorch-lightning/pull/11572)) + + - Sorted `SimpleProfiler(extended=False)` summary based on mean duration for each hook ([#11671](https://github.com/PyTorchLightning/pytorch-lightning/pull/11671)) diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py index 4aa67baaed422..b5c986cd5c15f 100644 --- a/pytorch_lightning/strategies/ddp.py +++ b/pytorch_lightning/strategies/ddp.py @@ -106,7 +106,6 @@ def __init__( self.interactive_ddp_procs = [] self._num_nodes = 1 self.sync_batchnorm = False - self.num_processes = len(self.parallel_devices) if self.parallel_devices is not None else 0 self._ddp_kwargs = kwargs self._ddp_comm_state = ddp_comm_state self._ddp_comm_hook = ddp_comm_hook @@ -135,6 +134,10 @@ def num_nodes(self, num_nodes: int) -> None: self._num_nodes = num_nodes self.set_world_ranks() + @property + def num_processes(self): + return len(self.parallel_devices) if self.parallel_devices is not None else 0 + @property def distributed_sampler_kwargs(self): distributed_sampler_kwargs = dict(num_replicas=(self.num_nodes * self.num_processes), rank=self.global_rank) diff --git a/pytorch_lightning/strategies/ddp_spawn.py b/pytorch_lightning/strategies/ddp_spawn.py index 097992dc1975e..faddfd9b27a0f 100644 --- a/pytorch_lightning/strategies/ddp_spawn.py +++ b/pytorch_lightning/strategies/ddp_spawn.py @@ -82,7 +82,6 @@ def __init__( self._num_nodes = 1 self.sync_batchnorm = False self._ddp_kwargs = kwargs - self.num_processes = len(parallel_devices) if parallel_devices is not None else 0 self._ddp_comm_state = ddp_comm_state self._ddp_comm_hook = ddp_comm_hook self._ddp_comm_wrapper = ddp_comm_wrapper @@ -107,6 +106,10 @@ def local_rank(self) -> int: def root_device(self): return self.parallel_devices[self.local_rank] + @property + def num_processes(self): + return len(self.parallel_devices) if self.parallel_devices is not None else 0 + @property def distributed_sampler_kwargs(self): distributed_sampler_kwargs = dict(num_replicas=(self.num_nodes * self.num_processes), rank=self.global_rank) diff --git a/pytorch_lightning/strategies/parallel.py b/pytorch_lightning/strategies/parallel.py index 5d7d487a214e3..5840e7816fc75 100644 --- a/pytorch_lightning/strategies/parallel.py +++ b/pytorch_lightning/strategies/parallel.py @@ -72,6 +72,14 @@ def world_size(self) -> int: def is_global_zero(self) -> bool: return self.global_rank == 0 + @property + def parallel_devices(self): + return self._parallel_devices + + @parallel_devices.setter + def parallel_devices(self, parallel_devices): + self._parallel_devices = parallel_devices + @property def distributed_sampler_kwargs(self): distributed_sampler_kwargs = dict(num_replicas=len(self.parallel_devices), rank=self.global_rank) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index d476bc5f0ca6e..dff6d3b32eac5 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -759,8 +759,6 @@ def resolve_strategy(self, training_type: Strategy) -> Strategy: # necessary for when the user has passed in a plugin if hasattr(training_type, "parallel_devices") and getattr(training_type, "parallel_devices") is None: training_type.parallel_devices = self.parallel_devices - if hasattr(training_type, "num_processes"): - training_type.num_processes = len(self.parallel_devices) if hasattr(training_type, "cluster_environment") and getattr(training_type, "cluster_environment") is None: # transfer ownership of the cluster environment to the training type