From 8eaafe25453dd3fa11e8d9fe4b5096399d77ef3e Mon Sep 17 00:00:00 2001 From: Siyu Wang Date: Fri, 21 Jan 2022 17:01:44 -0800 Subject: [PATCH 1/3] Lazy initialize Strategy.parallel_devices --- CHANGELOG.md | 3 +++ pytorch_lightning/strategies/ddp.py | 5 ++++- pytorch_lightning/strategies/ddp_spawn.py | 5 ++++- pytorch_lightning/strategies/parallel.py | 8 ++++++++ .../trainer/connectors/accelerator_connector.py | 2 -- 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa7c4f9b056bc..6e7c85db33687 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -222,6 +222,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed `MisconfigurationException` to `ModuleNotFoundError` when `rich` isn't available ([#11360](https://github.com/PyTorchLightning/pytorch-lightning/pull/11360)) +- Changed `parallel_devices` property in `ParallelStrategy` to be lazy initialized ([#](https://github.com/PyTorchLightning/pytorch-lightning/pull/)) + + ### Deprecated - Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/pull/10103)) diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py index 4aa67baaed422..b5c986cd5c15f 100644 --- a/pytorch_lightning/strategies/ddp.py +++ b/pytorch_lightning/strategies/ddp.py @@ -106,7 +106,6 @@ def __init__( self.interactive_ddp_procs = [] self._num_nodes = 1 self.sync_batchnorm = False - self.num_processes = len(self.parallel_devices) if self.parallel_devices is not None else 0 self._ddp_kwargs = kwargs self._ddp_comm_state = ddp_comm_state self._ddp_comm_hook = ddp_comm_hook @@ -135,6 +134,10 @@ def num_nodes(self, num_nodes: int) -> None: self._num_nodes = num_nodes self.set_world_ranks() + @property + def num_processes(self): + return len(self.parallel_devices) if self.parallel_devices is not None else 0 + @property def distributed_sampler_kwargs(self): distributed_sampler_kwargs = dict(num_replicas=(self.num_nodes * self.num_processes), rank=self.global_rank) diff --git a/pytorch_lightning/strategies/ddp_spawn.py b/pytorch_lightning/strategies/ddp_spawn.py index 097992dc1975e..faddfd9b27a0f 100644 --- a/pytorch_lightning/strategies/ddp_spawn.py +++ b/pytorch_lightning/strategies/ddp_spawn.py @@ -82,7 +82,6 @@ def __init__( self._num_nodes = 1 self.sync_batchnorm = False self._ddp_kwargs = kwargs - self.num_processes = len(parallel_devices) if parallel_devices is not None else 0 self._ddp_comm_state = ddp_comm_state self._ddp_comm_hook = ddp_comm_hook self._ddp_comm_wrapper = ddp_comm_wrapper @@ -107,6 +106,10 @@ def local_rank(self) -> int: def root_device(self): return self.parallel_devices[self.local_rank] + @property + def num_processes(self): + return len(self.parallel_devices) if self.parallel_devices is not None else 0 + @property def distributed_sampler_kwargs(self): distributed_sampler_kwargs = dict(num_replicas=(self.num_nodes * self.num_processes), rank=self.global_rank) diff --git a/pytorch_lightning/strategies/parallel.py b/pytorch_lightning/strategies/parallel.py index 5d7d487a214e3..5840e7816fc75 100644 --- a/pytorch_lightning/strategies/parallel.py +++ b/pytorch_lightning/strategies/parallel.py @@ -72,6 +72,14 @@ def world_size(self) -> int: def is_global_zero(self) -> bool: return self.global_rank == 0 + @property + def parallel_devices(self): + return self._parallel_devices + + @parallel_devices.setter + def parallel_devices(self, parallel_devices): + self._parallel_devices = parallel_devices + @property def distributed_sampler_kwargs(self): distributed_sampler_kwargs = dict(num_replicas=len(self.parallel_devices), rank=self.global_rank) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index d476bc5f0ca6e..dff6d3b32eac5 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -759,8 +759,6 @@ def resolve_strategy(self, training_type: Strategy) -> Strategy: # necessary for when the user has passed in a plugin if hasattr(training_type, "parallel_devices") and getattr(training_type, "parallel_devices") is None: training_type.parallel_devices = self.parallel_devices - if hasattr(training_type, "num_processes"): - training_type.num_processes = len(self.parallel_devices) if hasattr(training_type, "cluster_environment") and getattr(training_type, "cluster_environment") is None: # transfer ownership of the cluster environment to the training type From d67fb210c03b1e36b7217411b697055a10c64e85 Mon Sep 17 00:00:00 2001 From: Aki Nitta Date: Tue, 25 Jan 2022 03:55:54 +0900 Subject: [PATCH 2/3] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e7c85db33687..4447174e73f07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -222,7 +222,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed `MisconfigurationException` to `ModuleNotFoundError` when `rich` isn't available ([#11360](https://github.com/PyTorchLightning/pytorch-lightning/pull/11360)) -- Changed `parallel_devices` property in `ParallelStrategy` to be lazy initialized ([#](https://github.com/PyTorchLightning/pytorch-lightning/pull/)) +- Changed `parallel_devices` property in `ParallelStrategy` to be lazy initialized ([#11572](https://github.com/PyTorchLightning/pytorch-lightning/pull/11572)) ### Deprecated From 3b4e55742e4b7ad165737cd10181fee400100062 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 3 Feb 2022 03:13:11 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9a76351ee603..dc6ded46af4e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -236,7 +236,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed `parallel_devices` property in `ParallelStrategy` to be lazy initialized ([#11572](https://github.com/PyTorchLightning/pytorch-lightning/pull/11572)) - + - Sorted `SimpleProfiler(extended=False)` summary based on mean duration for each hook ([#11671](https://github.com/PyTorchLightning/pytorch-lightning/pull/11671))