From c5687833b464c9ffec7a5bd8331d00e3dda7c50d Mon Sep 17 00:00:00 2001 From: Siyu Wang Date: Fri, 24 Sep 2021 11:43:42 -0700 Subject: [PATCH 01/13] Deprecate LightningDistributed and keep logic in ddp/ddpSpawn directly --- pytorch_lightning/distributed/dist.py | 6 +++++ .../plugins/training_type/ddp.py | 25 +++++++++---------- .../plugins/training_type/ddp_spawn.py | 23 +++++++---------- .../plugins/training_type/deepspeed.py | 3 --- 4 files changed, 27 insertions(+), 30 deletions(-) diff --git a/pytorch_lightning/distributed/dist.py b/pytorch_lightning/distributed/dist.py index d4e41f6e7cc4d..3f5e529fe86b7 100644 --- a/pytorch_lightning/distributed/dist.py +++ b/pytorch_lightning/distributed/dist.py @@ -18,6 +18,12 @@ class LightningDistributed: + """ + .. deprecated:: v1.5 + This method is deprecated in v1.5and will be removed in v1.7. + Logic moved to ddp and ddpspawning + """ + def __init__(self, rank=None, device=None): self.rank = rank self.device = device diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index df0f658bf712a..b377468140d6b 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -31,9 +31,9 @@ import pytorch_lightning as pl from pytorch_lightning.core.optimizer import LightningOptimizer -from pytorch_lightning.distributed import LightningDistributed from pytorch_lightning.overrides import LightningDistributedModule from pytorch_lightning.overrides.distributed import prepare_for_backward +from pytorch_lightning.overrides.torch_distributed import broadcast_object_list from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin @@ -48,13 +48,9 @@ rank_zero_deprecation, rank_zero_warn, ) -from pytorch_lightning.utilities.distributed import ( - distributed_available, - init_ddp_connection, - rank_zero_only, - ReduceOp, - sync_ddp_if_available, -) +from pytorch_lightning.utilities.distributed import distributed_available +from pytorch_lightning.utilities.distributed import group as _group +from pytorch_lightning.utilities.distributed import init_ddp_connection, rank_zero_only, ReduceOp, sync_ddp_if_available from pytorch_lightning.utilities.exceptions import DeadlockDetectedException, MisconfigurationException from pytorch_lightning.utilities.seed import reset_seed from pytorch_lightning.utilities.types import STEP_OUTPUT @@ -116,7 +112,6 @@ def __init__( " Notice that it will be overriden by the trainer setting." ) self._sync_batchnorm = sync_batchnorm or False - self.dist = LightningDistributed() self.num_processes = len(self.parallel_devices) if self.parallel_devices is not None else 0 self._ddp_kwargs = kwargs self._task_idx = None @@ -270,8 +265,6 @@ def setup_distributed(self): init_ddp_connection(self.cluster_environment, self.torch_distributed_backend) # set the ranks and devices - self.dist.rank = self.global_rank - self.dist.device = self.root_device def _check_can_spawn_children(self): if self.local_rank != 0: @@ -396,14 +389,20 @@ def post_dispatch(self, trainer: "pl.Trainer") -> None: def barrier(self, *args, **kwargs) -> None: if not distributed_available(): - return + raise RuntimeError("DDP is not initialized and torch.distributed is not avalible, can not broadcast object") if _TORCH_GREATER_EQUAL_1_8 and torch.distributed.get_backend() == "nccl": torch.distributed.barrier(device_ids=self.determine_ddp_device_ids()) else: torch.distributed.barrier() def broadcast(self, obj: object, src: int = 0) -> object: - return self.dist.broadcast(obj) + if not distributed_available(): + raise RuntimeError("DDP is not initialized and torch.distributed is not avalible, can not broadcast object") + obj = [obj] + if self.global_rank != 0: + obj = [None] * len(obj) + broadcast_object_list(obj, src, group=_group.WORLD) + return obj[0] def pre_backward(self, closure_loss: torch.Tensor) -> None: """Run before precision plugin executes backward.""" diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index 5f493001341d6..e46606402d8ca 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -24,9 +24,9 @@ from torch.nn.parallel.distributed import DistributedDataParallel import pytorch_lightning as pl -from pytorch_lightning.distributed.dist import LightningDistributed from pytorch_lightning.overrides import LightningDistributedModule from pytorch_lightning.overrides.distributed import prepare_for_backward +from pytorch_lightning.overrides.torch_distributed import broadcast_object_list from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin @@ -40,13 +40,9 @@ from pytorch_lightning.utilities.apply_func import apply_to_collection from pytorch_lightning.utilities.cloud_io import atomic_save from pytorch_lightning.utilities.cloud_io import load as pl_load -from pytorch_lightning.utilities.distributed import ( - distributed_available, - init_ddp_connection, - rank_zero_only, - ReduceOp, - sync_ddp_if_available, -) +from pytorch_lightning.utilities.distributed import distributed_available +from pytorch_lightning.utilities.distributed import group as _group +from pytorch_lightning.utilities.distributed import init_ddp_connection, rank_zero_only, ReduceOp, sync_ddp_if_available from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.seed import reset_seed from pytorch_lightning.utilities.types import STEP_OUTPUT @@ -93,7 +89,6 @@ def __init__( ) self._sync_batchnorm = sync_batchnorm or False self._ddp_kwargs = kwargs - self.dist = LightningDistributed() self.num_processes = len(parallel_devices) if parallel_devices is not None else 0 self.mp_queue = None self._ddp_comm_state = ddp_comm_state @@ -193,10 +188,6 @@ def new_process(self, process_idx: int, trainer: "pl.Trainer", mp_queue: SimpleQ # ... need to double check that it is the correct place # self.trainer.call_setup_hook(self.model) - # set the ranks and devices - self.dist.rank = self.global_rank - self.dist.device = self.root_device - # move the model to the correct device self.model_to_device() @@ -324,7 +315,11 @@ def barrier(self, *args, **kwargs) -> None: def broadcast(self, obj: object, src: int = 0) -> object: if not distributed_available(): return obj - return self.dist.broadcast(obj) + obj = [obj] + if self.global_rank != 0: + obj = [None] * len(obj) + broadcast_object_list(obj, src, group=_group.WORLD) + return obj[0] def model_to_device(self): if self.root_device.type == "cuda": diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index cb3b007b712ff..978152506d0e3 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -342,9 +342,6 @@ def setup_distributed(self): self._init_deepspeed_distributed() - # set the ranks and devices - self.dist.rank = self.global_rank - self.dist.device = self.root_device if not self._config_initialized: self._format_config() self._config_initialized = True From 86a886d7fbc0b3a6176c3ca236c931025fef975f Mon Sep 17 00:00:00 2001 From: Siyu Wang Date: Fri, 24 Sep 2021 12:07:21 -0700 Subject: [PATCH 02/13] Deprecate LightningDistributed and keep logic in ddp/ddpSpawn directly --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c77a3409ddb2b..3592f15b82038 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -267,6 +267,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated passing `progress_bar_refresh_rate` to the `Trainer` constructor in favor of adding the `ProgressBar` callback with `refresh_rate` directly to the list of callbacks ([#9616](https://github.com/PyTorchLightning/pytorch-lightning/pull/9616)) +- Deprecate `LightningDistributed` and move the broadcast logic to ddp/ddpSpawn directly ([#9691](https://github.com/PyTorchLightning/pytorch-lightning/pull/9691)) + + ### Removed - Removed deprecated `metrics` ([#8586](https://github.com/PyTorchLightning/pytorch-lightning/pull/8586/)) From 913630d4cb6f3352ad56feee42bc468dc1d18e10 Mon Sep 17 00:00:00 2001 From: Siyu Wang Date: Fri, 24 Sep 2021 12:41:13 -0700 Subject: [PATCH 03/13] Deprecate LightningDistributed and keep logic in ddp/ddpSpawn directly --- pytorch_lightning/plugins/training_type/ddp.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index b377468140d6b..0523d56caa646 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -396,8 +396,6 @@ def barrier(self, *args, **kwargs) -> None: torch.distributed.barrier() def broadcast(self, obj: object, src: int = 0) -> object: - if not distributed_available(): - raise RuntimeError("DDP is not initialized and torch.distributed is not avalible, can not broadcast object") obj = [obj] if self.global_rank != 0: obj = [None] * len(obj) From ee06af099bea0f9976bb09bd022d28301bfa437f Mon Sep 17 00:00:00 2001 From: Siyu Wang Date: Fri, 24 Sep 2021 13:10:44 -0700 Subject: [PATCH 04/13] Deprecate LightningDistributed and keep logic in ddp/ddpSpawn directly --- pytorch_lightning/plugins/training_type/ddp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index 0523d56caa646..6824143941b65 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -389,7 +389,7 @@ def post_dispatch(self, trainer: "pl.Trainer") -> None: def barrier(self, *args, **kwargs) -> None: if not distributed_available(): - raise RuntimeError("DDP is not initialized and torch.distributed is not avalible, can not broadcast object") + return if _TORCH_GREATER_EQUAL_1_8 and torch.distributed.get_backend() == "nccl": torch.distributed.barrier(device_ids=self.determine_ddp_device_ids()) else: From ba1e6203c86d951d827761b6e9b157581e10f14e Mon Sep 17 00:00:00 2001 From: four4fish <88516121+four4fish@users.noreply.github.com> Date: Fri, 24 Sep 2021 14:19:19 -0700 Subject: [PATCH 05/13] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- pytorch_lightning/distributed/dist.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/distributed/dist.py b/pytorch_lightning/distributed/dist.py index 3f5e529fe86b7..1658295bf37b6 100644 --- a/pytorch_lightning/distributed/dist.py +++ b/pytorch_lightning/distributed/dist.py @@ -20,8 +20,8 @@ class LightningDistributed: """ .. deprecated:: v1.5 - This method is deprecated in v1.5and will be removed in v1.7. - Logic moved to ddp and ddpspawning + This class is deprecated in v1.5 and will be removed in v1.7. + The broadcast logic will be moved to the :class:`DDPPlugin` and :class`DDPSpawnPlugin` classes. """ def __init__(self, rank=None, device=None): From de22526bc8607313aac9d73b8a2b0884ef662536 Mon Sep 17 00:00:00 2001 From: four4fish <88516121+four4fish@users.noreply.github.com> Date: Fri, 24 Sep 2021 14:23:05 -0700 Subject: [PATCH 06/13] Apply suggestions from code review Co-authored-by: ananthsub --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3592f15b82038..985c5d0990d50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -267,7 +267,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated passing `progress_bar_refresh_rate` to the `Trainer` constructor in favor of adding the `ProgressBar` callback with `refresh_rate` directly to the list of callbacks ([#9616](https://github.com/PyTorchLightning/pytorch-lightning/pull/9616)) -- Deprecate `LightningDistributed` and move the broadcast logic to ddp/ddpSpawn directly ([#9691](https://github.com/PyTorchLightning/pytorch-lightning/pull/9691)) +- Deprecate `LightningDistributed` and move the broadcast logic to `DDPPlugin` and `DDPSpawnPlugin` directly ([#9691](https://github.com/PyTorchLightning/pytorch-lightning/pull/9691)) ### Removed From 3cbcb449c006e6914cb63fecd2256ee11e09916c Mon Sep 17 00:00:00 2001 From: Siyu Wang Date: Fri, 24 Sep 2021 14:56:29 -0700 Subject: [PATCH 07/13] Deprecate LightningDistributed and keep logic in ddp/ddpSpawn directly --- pytorch_lightning/distributed/dist.py | 7 ++++++- pytorch_lightning/plugins/training_type/ddp.py | 2 +- pytorch_lightning/plugins/training_type/ddp_spawn.py | 2 +- setup.cfg | 1 + tests/deprecated_api/test_remove_1-7.py | 7 +++++++ 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/distributed/dist.py b/pytorch_lightning/distributed/dist.py index 1658295bf37b6..fe47eb6d0297c 100644 --- a/pytorch_lightning/distributed/dist.py +++ b/pytorch_lightning/distributed/dist.py @@ -14,6 +14,7 @@ from typing import Any from pytorch_lightning.overrides.torch_distributed import broadcast_object_list +from pytorch_lightning.utilities import rank_zero_deprecation from pytorch_lightning.utilities.distributed import group as _group @@ -21,10 +22,14 @@ class LightningDistributed: """ .. deprecated:: v1.5 This class is deprecated in v1.5 and will be removed in v1.7. - The broadcast logic will be moved to the :class:`DDPPlugin` and :class`DDPSpawnPlugin` classes. + The broadcast logic will be moved to the :class:`DDPPlugin` and :class`DDPSpawnPlugin` classes. """ def __init__(self, rank=None, device=None): + rank_zero_deprecation( + "This class is deprecated in v1.5 and will be removed in v1.7." + "he broadcast logic will be moved to the :class:`DDPPlugin` and :class`DDPSpawnPlugin` classes." + ) self.rank = rank self.device = device diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index 6824143941b65..1fb7b6732f97d 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -397,7 +397,7 @@ def barrier(self, *args, **kwargs) -> None: def broadcast(self, obj: object, src: int = 0) -> object: obj = [obj] - if self.global_rank != 0: + if self.global_rank != src: obj = [None] * len(obj) broadcast_object_list(obj, src, group=_group.WORLD) return obj[0] diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index e46606402d8ca..5f2b31ac1a53f 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -316,7 +316,7 @@ def broadcast(self, obj: object, src: int = 0) -> object: if not distributed_available(): return obj obj = [obj] - if self.global_rank != 0: + if self.global_rank != src: obj = [None] * len(obj) broadcast_object_list(obj, src, group=_group.WORLD) return obj[0] diff --git a/setup.cfg b/setup.cfg index 86890f08e2c68..99f3a513b0914 100644 --- a/setup.cfg +++ b/setup.cfg @@ -46,6 +46,7 @@ omit = pytorch_lightning/cluster_environments/*.py pytorch_lightning/utilities/distributed.py pytorch_lightning/tuner/auto_gpu_select.py + pytorch_lightning/distributed/dist.py [flake8] diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 2fa8f96e77148..dbc47d4ddcb19 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -243,3 +243,10 @@ def test_v1_7_0_lightning_logger_base_close(tmpdir): ): logger = LoggerCollection([logger]) logger.close() + + +def test_v1_7_0_deprecate_lightning_distributed(tmpdir): + with pytest.deprecated_call(match="This class is deprecated in v1.5 and will be removed in v1.7."): + from pytorch_lightning.distributed.dist import LightningDistributed + + _ = LightningDistributed() From 7e52d866db6f662478da2a38f977996215528da4 Mon Sep 17 00:00:00 2001 From: four4fish <88516121+four4fish@users.noreply.github.com> Date: Fri, 24 Sep 2021 15:05:06 -0700 Subject: [PATCH 08/13] Update pytorch_lightning/distributed/dist.py Co-authored-by: ananthsub --- pytorch_lightning/distributed/dist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/distributed/dist.py b/pytorch_lightning/distributed/dist.py index fe47eb6d0297c..07b25e9b1f730 100644 --- a/pytorch_lightning/distributed/dist.py +++ b/pytorch_lightning/distributed/dist.py @@ -28,7 +28,7 @@ class LightningDistributed: def __init__(self, rank=None, device=None): rank_zero_deprecation( "This class is deprecated in v1.5 and will be removed in v1.7." - "he broadcast logic will be moved to the :class:`DDPPlugin` and :class`DDPSpawnPlugin` classes." + "Broadcast logic is implemented directly in the :class:`TrainingTypePlugin` implementations." ) self.rank = rank self.device = device From 2f2d28e12c676f7e08493796cc46cd36271447db Mon Sep 17 00:00:00 2001 From: Siyu Wang Date: Fri, 24 Sep 2021 15:55:16 -0700 Subject: [PATCH 09/13] Deprecate LightningDistributed and keep logic in ddp/ddpSpawn directly --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 985c5d0990d50..d440db181d77a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -398,6 +398,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `trainer.accumulate_grad_batches` to be an int on init. Default value for it is now `None` inside Trainer ([#9652](https://github.com/PyTorchLightning/pytorch-lightning/pull/9652)) +- Fixed `broadcast` in `DDPPlugin` and ``DDPSpawnPlugin` to respects the src input ([#9691](https://github.com/PyTorchLightning/pytorch-lightning/pull/9691)) + + ## [1.4.8] - 2021-09-22 - Fixed error reporting in DDP process reconciliation when processes are launched by an external agent ([#9389](https://github.com/PyTorchLightning/pytorch-lightning/pull/9389)) From 3a1d92ced076fbc8d4c6257b32039b4f38626cc2 Mon Sep 17 00:00:00 2001 From: four4fish <88516121+four4fish@users.noreply.github.com> Date: Fri, 24 Sep 2021 16:18:42 -0700 Subject: [PATCH 10/13] Apply suggestions from code review Co-authored-by: ananthsub --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d440db181d77a..d351fe2e97f47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -398,7 +398,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `trainer.accumulate_grad_batches` to be an int on init. Default value for it is now `None` inside Trainer ([#9652](https://github.com/PyTorchLightning/pytorch-lightning/pull/9652)) -- Fixed `broadcast` in `DDPPlugin` and ``DDPSpawnPlugin` to respects the src input ([#9691](https://github.com/PyTorchLightning/pytorch-lightning/pull/9691)) +- Fixed `broadcast` in `DDPPlugin` and ``DDPSpawnPlugin` to respect the `src` input ([#9691](https://github.com/PyTorchLightning/pytorch-lightning/pull/9691)) ## [1.4.8] - 2021-09-22 From db5e4e3fffd9405ceed8ff1fd104c8b7920a4b27 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Fri, 24 Sep 2021 19:36:58 -0700 Subject: [PATCH 11/13] Apply suggestions from code review --- pytorch_lightning/distributed/dist.py | 2 +- tests/deprecated_api/test_remove_1-7.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/distributed/dist.py b/pytorch_lightning/distributed/dist.py index 07b25e9b1f730..082e0c617a5f7 100644 --- a/pytorch_lightning/distributed/dist.py +++ b/pytorch_lightning/distributed/dist.py @@ -27,7 +27,7 @@ class LightningDistributed: def __init__(self, rank=None, device=None): rank_zero_deprecation( - "This class is deprecated in v1.5 and will be removed in v1.7." + "LightningDistributed is deprecated in v1.5 and will be removed in v1.7." "Broadcast logic is implemented directly in the :class:`TrainingTypePlugin` implementations." ) self.rank = rank diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index dbc47d4ddcb19..dbbba95a6de4c 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -246,7 +246,7 @@ def test_v1_7_0_lightning_logger_base_close(tmpdir): def test_v1_7_0_deprecate_lightning_distributed(tmpdir): - with pytest.deprecated_call(match="This class is deprecated in v1.5 and will be removed in v1.7."): + with pytest.deprecated_call(match="LightningDistributed is deprecated in v1.5 and will be removed in v1.7."): from pytorch_lightning.distributed.dist import LightningDistributed _ = LightningDistributed() From 8c4fe83780fdffbde24380343d9fac2531bf2412 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Fri, 24 Sep 2021 19:40:41 -0700 Subject: [PATCH 12/13] Apply suggestions from code review --- pytorch_lightning/plugins/training_type/ddp.py | 2 +- pytorch_lightning/plugins/training_type/ddp_spawn.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index 1fb7b6732f97d..1e0927f94e48d 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -398,7 +398,7 @@ def barrier(self, *args, **kwargs) -> None: def broadcast(self, obj: object, src: int = 0) -> object: obj = [obj] if self.global_rank != src: - obj = [None] * len(obj) + obj = [None] broadcast_object_list(obj, src, group=_group.WORLD) return obj[0] diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index 5f2b31ac1a53f..eb1acaec4100b 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -317,7 +317,7 @@ def broadcast(self, obj: object, src: int = 0) -> object: return obj obj = [obj] if self.global_rank != src: - obj = [None] * len(obj) + obj = [None] broadcast_object_list(obj, src, group=_group.WORLD) return obj[0] From e6980f24fe8164cfa50bd19689e5353f89e08885 Mon Sep 17 00:00:00 2001 From: Siyu Wang Date: Fri, 24 Sep 2021 20:03:44 -0700 Subject: [PATCH 13/13] Deprecate LightningDistributed and keep logic in ddp/ddpSpawn directly --- pytorch_lightning/plugins/training_type/ddp.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index 1e0927f94e48d..a26b63151f5a8 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -264,8 +264,6 @@ def setup_distributed(self): # where to store ip_table init_ddp_connection(self.cluster_environment, self.torch_distributed_backend) - # set the ranks and devices - def _check_can_spawn_children(self): if self.local_rank != 0: raise RuntimeError(