From db65aa1a952ee355f9a60e5e95433215b7688493 Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Fri, 20 Aug 2021 22:08:34 -0700 Subject: [PATCH 01/19] Move add_to_queue/get_from_queue to ddp_spawn.py --- pytorch_lightning/core/lightning.py | 22 +++++++------- pytorch_lightning/loggers/wandb.py | 2 +- .../plugins/training_type/ddp_spawn.py | 29 +++++++++++++++++++ tests/loggers/test_wandb.py | 1 - tests/plugins/test_ddp_spawn_plugin.py | 2 +- 5 files changed, 43 insertions(+), 13 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index c847eea57ccd0..ae80c985de899 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -24,7 +24,6 @@ from pathlib import Path from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union -import numpy as np import torch from torch import ScriptModule, Tensor from torch.nn import Module @@ -1949,11 +1948,13 @@ def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: Args: queue: the instance of the queue to append the data. + + .. deprecated:: v1.5 + This method was deprecated in v1.5 in favor of + `pytorch_lightning.plugins.training_type.ddp_spawn.add_to_queue` + and will be removed in v1.7. """ - callback_metrics: dict = apply_to_collection( - self.trainer.callback_metrics, torch.Tensor, lambda x: x.cpu().numpy() - ) # send as numpy to avoid issues with memory sharing - queue.put(callback_metrics) + self.trainer.training_type_plugin.add_to_queue(queue) def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: """ @@ -1962,12 +1963,13 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: Args: queue: the instance of the queue from where to get the data. + + .. deprecated:: v1.5 + This method was deprecated in v1.5 in favor of + `pytorch_lightning.plugins.training_type.ddp_spawn.get_from_queue` + and will be removed in v1.7. """ - # NOTE: `add_to_queue` needs to be called before - callback_metrics: dict = queue.get() - self.trainer.callback_metrics.update( - apply_to_collection(callback_metrics, np.ndarray, lambda x: torch.tensor(x)) - ) + self.trainer.training_type_plugin.get_from_queue(queue) @contextmanager def _prevent_trainer_and_dataloaders_deepcopy(self) -> None: diff --git a/pytorch_lightning/loggers/wandb.py b/pytorch_lightning/loggers/wandb.py index 94baf1781e7ab..0299cb522e59f 100644 --- a/pytorch_lightning/loggers/wandb.py +++ b/pytorch_lightning/loggers/wandb.py @@ -29,7 +29,7 @@ from pytorch_lightning.utilities import _module_available, rank_zero_only from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _compare_version -from pytorch_lightning.utilities.warnings import rank_zero_deprecation, rank_zero_warn +from pytorch_lightning.utilities.warnings import rank_zero_warn _WANDB_AVAILABLE = _module_available("wandb") _WANDB_GREATER_EQUAL_0_10_22 = _compare_version("wandb", operator.ge, "0.10.22") diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index dce7f6e58c3ab..ea55a57ddd0e4 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -16,6 +16,7 @@ import re from multiprocessing.queues import SimpleQueue from typing import Any, Dict, List, Optional, Union +import numpy as np import torch import torch.distributed @@ -36,6 +37,7 @@ rank_zero_deprecation, rank_zero_warn, ) +from pytorch_lightning.utilities.apply_func import apply_to_collection from pytorch_lightning.utilities.cloud_io import atomic_save from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.distributed import ( @@ -385,3 +387,30 @@ def register_plugins(cls, plugin_registry: Dict) -> None: description="DDPSpawn Plugin with `find_unused_parameters` as False", find_unused_parameters=False, ) + + def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: + """ + Appends the :attr:`trainer.callback_metrics` dictionary to the given queue. + To avoid issues with memory sharing, we cast the data to numpy. + + Args: + queue: the instance of the queue to append the data. + """ + callback_metrics: dict = apply_to_collection( + self.trainer.callback_metrics, torch.Tensor, lambda x: x.cpu().numpy() + ) # send as numpy to avoid issues with memory sharing + queue.put(callback_metrics) + + def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: + """ + Retrieve the :attr:`trainer.callback_metrics` dictionary from the given queue. + To preserve consistency, we cast back the data to ``torch.Tensor``. + + Args: + queue: the instance of the queue from where to get the data. + """ + # NOTE: `add_to_queue` needs to be called before + callback_metrics: dict = queue.get() + self.trainer.callback_metrics.update( + apply_to_collection(callback_metrics, np.ndarray, lambda x: torch.tensor(x)) + ) diff --git a/tests/loggers/test_wandb.py b/tests/loggers/test_wandb.py index e5b80993b392c..0684727e84ac8 100644 --- a/tests/loggers/test_wandb.py +++ b/tests/loggers/test_wandb.py @@ -18,7 +18,6 @@ import pytest -import pytorch_lightning from pytorch_lightning import Trainer from pytorch_lightning.loggers import WandbLogger from pytorch_lightning.utilities.exceptions import MisconfigurationException diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py index 1ab94446c8176..bc2aad72a72c2 100644 --- a/tests/plugins/test_ddp_spawn_plugin.py +++ b/tests/plugins/test_ddp_spawn_plugin.py @@ -62,7 +62,7 @@ def test_ddp_cpu(): @RunIf(min_gpus=2) def test_ddp_spawn_extra_parameters(tmpdir): - """Tests if device is set correctely when training for DDPSpawnPlugin.""" + """Tests if device is set correctly when training for DDPSpawnPlugin.""" trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn") assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) From 4ca811c9669750b6e5536fe9ebea03ea0dabc8ed Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 25 Aug 2021 21:48:36 +0000 Subject: [PATCH 02/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytorch_lightning/plugins/training_type/ddp_spawn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index ea55a57ddd0e4..1650e65a69d88 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -16,8 +16,8 @@ import re from multiprocessing.queues import SimpleQueue from typing import Any, Dict, List, Optional, Union -import numpy as np +import numpy as np import torch import torch.distributed import torch.multiprocessing as mp From 785be9ee8f1a915a7eaad4624eb04e7456bc9681 Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Wed, 25 Aug 2021 22:42:13 +0000 Subject: [PATCH 03/19] Update changelog and add todos --- CHANGELOG.md | 2 +- pytorch_lightning/core/lightning.py | 4 ++-- .../plugins/training_type/ddp_spawn.py | 13 +++++++------ tests/plugins/test_ddp_spawn_plugin.py | 6 ++++-- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00ab2503a86b1..753eca26be2b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -111,7 +111,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `DataModule` properties: `train_transforms`, `val_transforms`, `test_transforms`, `size`, `dims` ([#8851](https://github.com/PyTorchLightning/pytorch-lightning/pull/8851)) -- +- Deprecated `add_to_queue`, `get_from_queue` from Lightning Module([9118](https://github.com/PyTorchLightning/pytorch-lightning/pull/9118)) - diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index ae80c985de899..98b9eba530d42 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1954,7 +1954,7 @@ def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: `pytorch_lightning.plugins.training_type.ddp_spawn.add_to_queue` and will be removed in v1.7. """ - self.trainer.training_type_plugin.add_to_queue(queue) + self.trainer.training_type_plugin.add_to_queue(self.trainer, queue) def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: """ @@ -1969,7 +1969,7 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: `pytorch_lightning.plugins.training_type.ddp_spawn.get_from_queue` and will be removed in v1.7. """ - self.trainer.training_type_plugin.get_from_queue(queue) + self.trainer.training_type_plugin.get_from_queue(self.trainer, queue) @contextmanager def _prevent_trainer_and_dataloaders_deepcopy(self) -> None: diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index ea55a57ddd0e4..9450746396856 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -215,6 +215,7 @@ def new_process(self, process_idx: int, trainer: "pl.Trainer", mp_queue: SimpleQ # ensure that spawned processes go through teardown before joining trainer._call_teardown_hook() + # TODO(@daniellepintz): add trainer as an argument in v1.7 def post_dispatch(self): # restore main state with best weights best_path = self.mp_queue.get() @@ -222,6 +223,7 @@ def post_dispatch(self): self._results = self.mp_queue.get() # get the `callback_metrics` and set it to the trainer # only in case the user does not override it. + # TODO(@daniellepintz): update line to `self.get_from_queue(trainer, self.mp_queue)` in v1.7 self.lightning_module.get_from_queue(self.mp_queue) # recover the weights of the processes trained in the children @@ -309,6 +311,7 @@ def __transfer_distrib_spawn_state_on_fit_end(self, trainer: "pl.Trainer", resul self.mp_queue.put(best_model_path) self.mp_queue.put(last_path) self.mp_queue.put(results) + # TODO(@daniellepintz): update line to `self.add_to_queue(trainer, self.mp_queue)` in v1.7 self.lightning_module.add_to_queue(self.mp_queue) # adds the `callback_metrics` to the queue def __recover_child_process_weights(self, best_path, last_path): @@ -388,7 +391,7 @@ def register_plugins(cls, plugin_registry: Dict) -> None: find_unused_parameters=False, ) - def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: + def add_to_queue(self, trainer: "pl.Trainer", queue: torch.multiprocessing.SimpleQueue) -> None: """ Appends the :attr:`trainer.callback_metrics` dictionary to the given queue. To avoid issues with memory sharing, we cast the data to numpy. @@ -397,11 +400,11 @@ def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: queue: the instance of the queue to append the data. """ callback_metrics: dict = apply_to_collection( - self.trainer.callback_metrics, torch.Tensor, lambda x: x.cpu().numpy() + trainer.callback_metrics, torch.Tensor, lambda x: x.cpu().numpy() ) # send as numpy to avoid issues with memory sharing queue.put(callback_metrics) - def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: + def get_from_queue(self, trainer: "pl.Trainer", queue: torch.multiprocessing.SimpleQueue) -> None: """ Retrieve the :attr:`trainer.callback_metrics` dictionary from the given queue. To preserve consistency, we cast back the data to ``torch.Tensor``. @@ -411,6 +414,4 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: """ # NOTE: `add_to_queue` needs to be called before callback_metrics: dict = queue.get() - self.trainer.callback_metrics.update( - apply_to_collection(callback_metrics, np.ndarray, lambda x: torch.tensor(x)) - ) + trainer.callback_metrics.update(apply_to_collection(callback_metrics, np.ndarray, lambda x: torch.tensor(x))) diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py index bc2aad72a72c2..8f38d86417221 100644 --- a/tests/plugins/test_ddp_spawn_plugin.py +++ b/tests/plugins/test_ddp_spawn_plugin.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import pytest import torch from pytorch_lightning import Trainer @@ -73,7 +74,8 @@ def test_ddp_spawn_extra_parameters(tmpdir): val_name: str = "val_acc" model = BoringCallbackDDPSpawnModel(val_name, val) dm = BoringDataModule() - - trainer.fit(model, datamodule=dm) + # TODO(@daniellepintz): delete pytest.deprecated_call in v1.7 + with pytest.deprecated_call(match=r"DataModule property `dims` was deprecated in v1.5"): + trainer.fit(model, datamodule=dm) assert trainer.callback_metrics[val_name] == torch.tensor(val) assert model.test_val == "test_val" From edc0ad6bc05e2fac55b585cf5526776803af1ade Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Wed, 1 Sep 2021 06:25:08 +0000 Subject: [PATCH 04/19] wip --- pytorch_lightning/core/lightning.py | 7 +++++-- tests/deprecated_api/test_remove_1-7.py | 11 +++++++++++ tests/plugins/test_ddp_spawn_plugin.py | 4 +--- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 98b9eba530d42..022e0fdd3e55e 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -30,6 +30,7 @@ from torch.optim.optimizer import Optimizer from torchmetrics import Metric +from pytorch_lightning.plugins import DDPSpawnPlugin from pytorch_lightning.core.hooks import CheckpointHooks, DataHooks, ModelHooks from pytorch_lightning.core.mixins import DeviceDtypeModuleMixin, HyperparametersMixin from pytorch_lightning.core.optimizer import LightningOptimizer @@ -1954,7 +1955,8 @@ def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: `pytorch_lightning.plugins.training_type.ddp_spawn.add_to_queue` and will be removed in v1.7. """ - self.trainer.training_type_plugin.add_to_queue(self.trainer, queue) + if type(self.trainer.training_type_plugin) == DDPSpawnPlugin: + self.trainer.training_type_plugin.add_to_queue(self.trainer, queue) def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: """ @@ -1969,7 +1971,8 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: `pytorch_lightning.plugins.training_type.ddp_spawn.get_from_queue` and will be removed in v1.7. """ - self.trainer.training_type_plugin.get_from_queue(self.trainer, queue) + if type(self.trainer.training_type_plugin) == DDPSpawnPlugin: + self.trainer.training_type_plugin.get_from_queue(self.trainer, queue) @contextmanager def _prevent_trainer_and_dataloaders_deepcopy(self) -> None: diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index d836f1427a110..a4f8b9dfe6f97 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -19,6 +19,7 @@ from tests.deprecated_api import _soft_unimport_module from tests.helpers import BoringModel from tests.helpers.datamodules import MNISTDataModule +from tests.helpers.runif import RunIf def test_v1_7_0_deprecated_lightning_module_summarize(tmpdir): @@ -80,3 +81,13 @@ def test_v1_7_0_datamodule_dims_property(tmpdir): _ = dm.dims with pytest.deprecated_call(match=r"DataModule property `dims` was deprecated in v1.5"): _ = LightningDataModule(dims=(1, 1, 1)) + + +@RunIf(min_gpus=2) +def test_v1_7_0_deprecate_add_get_queue(tmpdir): + """Tests if device is set correctly when training for DDPSpawnPlugin.""" + with pytest.deprecated_call(match=r"`LightningModule.add_to_queue` method was deprecated in v1.5"): + _ = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn") + + with pytest.deprecated_call(match=r"`LightningModule.get_from_queue` method was deprecated in v1.5"): + _ = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn") diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py index 8f38d86417221..e4436378323a9 100644 --- a/tests/plugins/test_ddp_spawn_plugin.py +++ b/tests/plugins/test_ddp_spawn_plugin.py @@ -74,8 +74,6 @@ def test_ddp_spawn_extra_parameters(tmpdir): val_name: str = "val_acc" model = BoringCallbackDDPSpawnModel(val_name, val) dm = BoringDataModule() - # TODO(@daniellepintz): delete pytest.deprecated_call in v1.7 - with pytest.deprecated_call(match=r"DataModule property `dims` was deprecated in v1.5"): - trainer.fit(model, datamodule=dm) + trainer.fit(model, datamodule=dm) assert trainer.callback_metrics[val_name] == torch.tensor(val) assert model.test_val == "test_val" From ccf44e4c77b51b2d17ab81632f68836b1fc17a60 Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Wed, 1 Sep 2021 19:13:54 +0000 Subject: [PATCH 05/19] address comments --- CHANGELOG.md | 2 +- pytorch_lightning/accelerators/accelerator.py | 2 +- pytorch_lightning/core/lightning.py | 10 ++--- .../plugins/training_type/ddp.py | 3 +- .../plugins/training_type/ddp_spawn.py | 36 ++++++++++-------- .../training_type/training_type_plugin.py | 2 +- .../trainer/configuration_validator.py | 19 ++++++++++ tests/deprecated_api/test_remove_1-7.py | 23 ++++++++--- tests/plugins/test_ddp_spawn_plugin.py | 38 +++++++++++++++++-- 9 files changed, 103 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34ef9b21685c4..c1392f48ad285 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -160,7 +160,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `DataModule` properties: `train_transforms`, `val_transforms`, `test_transforms`, `size`, `dims` ([#8851](https://github.com/PyTorchLightning/pytorch-lightning/pull/8851)) -- Deprecated `add_to_queue`, `get_from_queue` from Lightning Module([9118](https://github.com/PyTorchLightning/pytorch-lightning/pull/9118)) +- Deprecated `add_to_queue`, `get_from_queue` from Lightning Module in favor of overriding the same functions in the DDPSpawnPlugins ([9118](https://github.com/PyTorchLightning/pytorch-lightning/pull/9118)) - Deprecated `prepare_data_per_node` flag on Trainer and set it as a property of `DataHooks`, accessible in the `LightningModule` and `LightningDataModule` ([#8958](https://github.com/PyTorchLightning/pytorch-lightning/pull/8958)) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 6038a8abc8f5c..546f79a31adc9 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -120,7 +120,7 @@ def dispatch(self, trainer: "pl.Trainer") -> None: def post_dispatch(self, trainer: "pl.Trainer") -> None: """Hook to do something after the training/evaluation/prediction starts.""" - self.training_type_plugin.post_dispatch() + self.training_type_plugin.post_dispatch(trainer) self.precision_plugin.post_dispatch() @property diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 763b33289c53b..468734346b7e9 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -30,7 +30,6 @@ from torch.optim.optimizer import Optimizer from torchmetrics import Metric -from pytorch_lightning.plugins import DDPSpawnPlugin from pytorch_lightning.core.hooks import CheckpointHooks, DataHooks, ModelHooks from pytorch_lightning.core.mixins import DeviceDtypeModuleMixin, HyperparametersMixin from pytorch_lightning.core.optimizer import LightningOptimizer @@ -47,6 +46,7 @@ from pytorch_lightning.utilities.signature_utils import is_param_in_hook_signature from pytorch_lightning.utilities.types import _METRIC_COLLECTION, EPOCH_OUTPUT, STEP_OUTPUT from pytorch_lightning.utilities.warnings import WarningCache +from pytorch_lightning.plugins import DDPSpawnPlugin warning_cache = WarningCache() log = logging.getLogger(__name__) @@ -1963,10 +1963,10 @@ def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: .. deprecated:: v1.5 This method was deprecated in v1.5 in favor of - `pytorch_lightning.plugins.training_type.ddp_spawn.add_to_queue` + `DDPSpawnPlugin.add_to_queue` and will be removed in v1.7. """ - if type(self.trainer.training_type_plugin) == DDPSpawnPlugin: + if self.trainer and isinstance(self.trainer.training_type_plugin, DDPSpawnPlugin): self.trainer.training_type_plugin.add_to_queue(self.trainer, queue) def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: @@ -1979,10 +1979,10 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: .. deprecated:: v1.5 This method was deprecated in v1.5 in favor of - `pytorch_lightning.plugins.training_type.ddp_spawn.get_from_queue` + `DDPSpawnPlugin.get_from_queue` and will be removed in v1.7. """ - if type(self.trainer.training_type_plugin) == DDPSpawnPlugin: + if self.trainer and isinstance(self.trainer.training_type_plugin, DDPSpawnPlugin): self.trainer.training_type_plugin.get_from_queue(self.trainer, queue) @contextmanager diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index 6d96a443e391a..c726bbf2e2ade 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -29,6 +29,7 @@ import torch.distributed from torch.nn.parallel.distributed import DistributedDataParallel +import pytorch_lightning as pl from pytorch_lightning.core.optimizer import LightningOptimizer from pytorch_lightning.distributed import LightningDistributed from pytorch_lightning.overrides import LightningDistributedModule @@ -385,7 +386,7 @@ def pre_dispatch(self): # share ddp pids to all processes self._share_information_to_prevent_deadlock() - def post_dispatch(self) -> None: + def post_dispatch(self, trainer: "pl.Trainer") -> None: self.cluster_environment.teardown() def barrier(self, *args, **kwargs) -> None: diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index 2d67e7289c676..a7dd930931d0a 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -23,6 +23,7 @@ import torch.multiprocessing as mp from torch.nn.parallel.distributed import DistributedDataParallel +from pytorch_lightning.utilities.model_helpers import is_overridden import pytorch_lightning as pl from pytorch_lightning.distributed.dist import LightningDistributed from pytorch_lightning.overrides import LightningDistributedModule @@ -215,16 +216,18 @@ def new_process(self, process_idx: int, trainer: "pl.Trainer", mp_queue: SimpleQ # ensure that spawned processes go through teardown before joining trainer._call_teardown_hook() - # TODO(@daniellepintz): add trainer as an argument in v1.7 - def post_dispatch(self): + def post_dispatch(self, trainer: "pl.Trainer"): # restore main state with best weights best_path = self.mp_queue.get() last_path = self.mp_queue.get() self._results = self.mp_queue.get() # get the `callback_metrics` and set it to the trainer # only in case the user does not override it. - # TODO(@daniellepintz): update line to `self.get_from_queue(trainer, self.mp_queue)` in v1.7 - self.lightning_module.get_from_queue(self.mp_queue) + # TODO(@daniellepintz): Remove the else in v1.7 + if is_overridden("get_from_queue", trainer.training_type_plugin, DDPSpawnPlugin): + self.get_from_queue(trainer, self.mp_queue) + else: + self.lightning_module.get_from_queue(self.mp_queue) # recover the weights of the processes trained in the children self.__recover_child_process_weights(best_path, last_path) @@ -290,8 +293,11 @@ def __transfer_distrib_spawn_state_on_fit_end(self, trainer: "pl.Trainer", resul self.mp_queue.put(best_model_path) self.mp_queue.put(last_path) self.mp_queue.put(results) - # TODO(@daniellepintz): update line to `self.add_to_queue(trainer, self.mp_queue)` in v1.7 - self.lightning_module.add_to_queue(self.mp_queue) # adds the `callback_metrics` to the queue + # TODO(@daniellepintz): Remove the else in v1.7 + if is_overridden("add_to_queue", trainer.training_type_plugin, DDPSpawnPlugin): + self.add_to_queue(trainer, self.mp_queue) + else: + self.lightning_module.add_to_queue(self.mp_queue) # adds the `callback_metrics` to the queue def __recover_child_process_weights(self, best_path, last_path): # transfer back the best path to the trainer @@ -361,15 +367,6 @@ def post_training_step(self): if not self.lightning_module.automatic_optimization: self.model.require_backward_grad_sync = True - @classmethod - def register_plugins(cls, plugin_registry: Dict) -> None: - plugin_registry.register( - "ddp_spawn_find_unused_parameters_false", - cls, - description="DDPSpawn Plugin with `find_unused_parameters` as False", - find_unused_parameters=False, - ) - def add_to_queue(self, trainer: "pl.Trainer", queue: torch.multiprocessing.SimpleQueue) -> None: """ Appends the :attr:`trainer.callback_metrics` dictionary to the given queue. @@ -395,6 +392,15 @@ def get_from_queue(self, trainer: "pl.Trainer", queue: torch.multiprocessing.Sim callback_metrics: dict = queue.get() trainer.callback_metrics.update(apply_to_collection(callback_metrics, np.ndarray, lambda x: torch.tensor(x))) + @classmethod + def register_plugins(cls, plugin_registry: Dict) -> None: + plugin_registry.register( + "ddp_spawn_find_unused_parameters_false", + cls, + description="DDPSpawn Plugin with `find_unused_parameters` as False", + find_unused_parameters=False, + ) + def teardown(self) -> None: if isinstance(self.model, DistributedDataParallel): self.model = self.lightning_module diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index 6ee1ce77c8c24..f388c0148206c 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -358,5 +358,5 @@ def pre_dispatch(self) -> None: def dispatch(self, trainer: "pl.Trainer") -> None: """Hook to do something at trainer run_stage starts.""" - def post_dispatch(self) -> None: + def post_dispatch(self, trainer: "pl.Trainer") -> None: """Hook to do something after the training/evaluation/prediction finishes.""" diff --git a/pytorch_lightning/trainer/configuration_validator.py b/pytorch_lightning/trainer/configuration_validator.py index e4a3c6f8c00eb..3d10fd5c1a2a4 100644 --- a/pytorch_lightning/trainer/configuration_validator.py +++ b/pytorch_lightning/trainer/configuration_validator.py @@ -43,6 +43,7 @@ def verify_loop_configurations(self, model: "pl.LightningModule") -> None: elif self.trainer.state.fn == TrainerFn.PREDICTING: self.__verify_predict_loop_configuration(model) self.__verify_dp_batch_transfer_support(model) + self._check_add_get_queue(model) def __verify_train_loop_configuration(self, model: "pl.LightningModule") -> None: # ----------------------------------- @@ -201,3 +202,21 @@ def __check_training_step_requires_dataloader_iter(self, model: "pl.LightningMod "The model taking a `dataloader_iter` argument in your `training_step` " "is incompatible with `truncated_bptt_steps > 0`." ) + + def _check_add_get_queue(self, model: "pl.LightningModule"): + r""" + Checks if add_to_queue or get_from_queue is overriden and sends a deprecation warning. + + Args: + model: The lightning module + """ + if is_overridden("add_to_queue", model): + rank_zero_deprecation( + "The `LightningModule.add_to_queue` method was deprecated in v1.5 and will be removed in v1.7 in " + "favor of `DDPSpawnPlugin.add_to_queue`" + ) + if is_overridden("get_from_queue", model): + rank_zero_deprecation( + "The `LightningModule.get_from_queue` method was deprecated in v1.5 and will be removed in v1.7 in " + "favor of `DDPSpawnPlugin.get_from_queue`" + ) diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 5bb276c630f9d..507f7afe4d53b 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -13,7 +13,7 @@ # limitations under the License. """ Test deprecated functionality which will be removed in v1.7.0 """ from unittest import mock - +import torch import pytest from pytorch_lightning import LightningDataModule, Trainer @@ -124,11 +124,24 @@ def test_v1_7_0_process_position_trainer_constructor(tmpdir): _ = Trainer(process_position=5) +class BoringCallbackDDPSpawnModel(BoringModel): + def __init__(self): + super().__init__() + + def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: + queue.put("test_val") + return super().add_to_queue(queue) + + def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: + self.test_val = queue.get() + return super().get_from_queue(queue) + + @RunIf(min_gpus=2) def test_v1_7_0_deprecate_add_get_queue(tmpdir): """Tests if device is set correctly when training for DDPSpawnPlugin.""" - with pytest.deprecated_call(match=r"`LightningModule.add_to_queue` method was deprecated in v1.5"): - _ = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn") + model = BoringCallbackDDPSpawnModel() + trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn") - with pytest.deprecated_call(match=r"`LightningModule.get_from_queue` method was deprecated in v1.5"): - _ = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn") + with pytest.deprecated_call(match=r"`LightningModule.add_to_queue` method was deprecated in v1.5"): + trainer.fit(model) diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py index e4436378323a9..18c76b823a2b1 100644 --- a/tests/plugins/test_ddp_spawn_plugin.py +++ b/tests/plugins/test_ddp_spawn_plugin.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import pytest import torch from pytorch_lightning import Trainer @@ -47,7 +46,7 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: @RunIf(skip_windows=True) def test_ddp_cpu(): - """Tests if device is set correctely when training for DDPSpawnPlugin.""" + """Tests if device is set correctly when training for DDPSpawnPlugin.""" trainer = Trainer(num_processes=2, fast_dev_run=True) # assert training type plugin attributes for device setting @@ -63,7 +62,8 @@ def test_ddp_cpu(): @RunIf(min_gpus=2) def test_ddp_spawn_extra_parameters(tmpdir): - """Tests if device is set correctly when training for DDPSpawnPlugin.""" + """Tests if device is set correctly when training for DDPSpawnPlugin + and tests add_to_queue/get_from_queue with Lightning Module (deprecated way).""" trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn") assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) @@ -77,3 +77,35 @@ def test_ddp_spawn_extra_parameters(tmpdir): trainer.fit(model, datamodule=dm) assert trainer.callback_metrics[val_name] == torch.tensor(val) assert model.test_val == "test_val" + + +class TestDDPSpawnPlugin(DDPSpawnPlugin): + def add_to_queue(self, trainer: Trainer, queue: torch.multiprocessing.SimpleQueue) -> None: + queue.put("new_test_val") + return super().add_to_queue(trainer, queue) + + def get_from_queue(self, trainer: Trainer, queue: torch.multiprocessing.SimpleQueue) -> None: + self.new_test_val = queue.get() + return super().get_from_queue(trainer, queue) + + +@RunIf(min_gpus=2) +def test_ddp_spawn_add_get_queue(tmpdir): + """Tests add_to_queue/get_from_queue with DDPSpawnPlugin.""" + + ddp_spawn_plugin = TestDDPSpawnPlugin() + trainer = Trainer( + default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn", plugins=[ddp_spawn_plugin] + ) + + assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert trainer.training_type_plugin.on_gpu + assert trainer.training_type_plugin.root_device == torch.device("cuda:0") + + val: float = 1.0 + val_name: str = "val_acc" + model = BoringCallbackDDPSpawnModel(val_name, val) + dm = BoringDataModule() + trainer.fit(model, datamodule=dm) + assert trainer.callback_metrics[val_name] == torch.tensor(val) + assert ddp_spawn_plugin.new_test_val == "new_test_val" From 11e9292847fd7b034b33cd786468775a42a6cdf4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Sep 2021 00:12:16 +0000 Subject: [PATCH 06/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytorch_lightning/core/lightning.py | 2 +- pytorch_lightning/plugins/training_type/ddp_spawn.py | 2 +- tests/deprecated_api/test_remove_1-7.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 41f51e8ebfa8f..625a773ce1992 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -34,6 +34,7 @@ from pytorch_lightning.core.mixins import DeviceDtypeModuleMixin, HyperparametersMixin from pytorch_lightning.core.optimizer import LightningOptimizer from pytorch_lightning.core.saving import ModelIO +from pytorch_lightning.plugins import DDPSpawnPlugin from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import FxValidator from pytorch_lightning.utilities import _TORCH_SHARDED_TENSOR_AVAILABLE, rank_zero_deprecation, rank_zero_warn from pytorch_lightning.utilities.apply_func import apply_to_collection, convert_to_tensors @@ -46,7 +47,6 @@ from pytorch_lightning.utilities.signature_utils import is_param_in_hook_signature from pytorch_lightning.utilities.types import _METRIC_COLLECTION, EPOCH_OUTPUT, STEP_OUTPUT from pytorch_lightning.utilities.warnings import WarningCache -from pytorch_lightning.plugins import DDPSpawnPlugin warning_cache = WarningCache() log = logging.getLogger(__name__) diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index a7dd930931d0a..2ba09dfeaec4d 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -23,7 +23,6 @@ import torch.multiprocessing as mp from torch.nn.parallel.distributed import DistributedDataParallel -from pytorch_lightning.utilities.model_helpers import is_overridden import pytorch_lightning as pl from pytorch_lightning.distributed.dist import LightningDistributed from pytorch_lightning.overrides import LightningDistributedModule @@ -48,6 +47,7 @@ ReduceOp, sync_ddp_if_available, ) +from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.seed import reset_seed if _TORCH_GREATER_EQUAL_1_8: diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 507f7afe4d53b..e6f02e5c39094 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -13,8 +13,9 @@ # limitations under the License. """ Test deprecated functionality which will be removed in v1.7.0 """ from unittest import mock -import torch + import pytest +import torch from pytorch_lightning import LightningDataModule, Trainer from pytorch_lightning.loggers import TestTubeLogger From 7edf7f8b0dccd4db066d411b86ec40bc76b0cfa9 Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Thu, 2 Sep 2021 06:18:42 +0000 Subject: [PATCH 07/19] update import path --- pytorch_lightning/core/lightning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 2aca3f67c9b04..5e04bedfe1f04 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -34,7 +34,7 @@ from pytorch_lightning.core.mixins import DeviceDtypeModuleMixin, HyperparametersMixin from pytorch_lightning.core.optimizer import LightningOptimizer from pytorch_lightning.core.saving import ModelIO -from pytorch_lightning.plugins import DDPSpawnPlugin +from pytorch_lightning.plugins.training_type import DDPSpawnPlugin from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import FxValidator from pytorch_lightning.utilities import _TORCH_SHARDED_TENSOR_AVAILABLE, rank_zero_deprecation, rank_zero_warn from pytorch_lightning.utilities.apply_func import apply_to_collection, convert_to_tensors From 9117350bcfee4db44a5e698d42915ad8abb1366e Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Thu, 2 Sep 2021 06:41:00 +0000 Subject: [PATCH 08/19] update import path --- pytorch_lightning/accelerators/accelerator.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 546f79a31adc9..412d623caf73a 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -21,9 +21,8 @@ from torch.utils.data import DataLoader import pytorch_lightning as pl -from pytorch_lightning.plugins import DataParallelPlugin from pytorch_lightning.plugins.precision import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin, PrecisionPlugin -from pytorch_lightning.plugins.training_type import TrainingTypePlugin +from pytorch_lightning.plugins.training_type import TrainingTypePlugin, DataParallelPlugin from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device From a177c4918e178b3765aebc3b77dece73e8a02423 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Sep 2021 06:42:08 +0000 Subject: [PATCH 09/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytorch_lightning/accelerators/accelerator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 412d623caf73a..a7d743bf242df 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -22,7 +22,7 @@ import pytorch_lightning as pl from pytorch_lightning.plugins.precision import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin, PrecisionPlugin -from pytorch_lightning.plugins.training_type import TrainingTypePlugin, DataParallelPlugin +from pytorch_lightning.plugins.training_type import DataParallelPlugin, TrainingTypePlugin from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device From 12cc5d26ce849d113cbcbe9e7fab936117d4b3f0 Mon Sep 17 00:00:00 2001 From: tchaton Date: Thu, 2 Sep 2021 09:17:10 +0100 Subject: [PATCH 10/19] resolve circular reference --- pytorch_lightning/core/lightning.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 5e04bedfe1f04..80c1358b9f9bc 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -30,11 +30,11 @@ from torch.optim.optimizer import Optimizer from torchmetrics import Metric +import pytorch_lightning as pl from pytorch_lightning.core.hooks import CheckpointHooks, DataHooks, ModelHooks from pytorch_lightning.core.mixins import DeviceDtypeModuleMixin, HyperparametersMixin from pytorch_lightning.core.optimizer import LightningOptimizer from pytorch_lightning.core.saving import ModelIO -from pytorch_lightning.plugins.training_type import DDPSpawnPlugin from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import FxValidator from pytorch_lightning.utilities import _TORCH_SHARDED_TENSOR_AVAILABLE, rank_zero_deprecation, rank_zero_warn from pytorch_lightning.utilities.apply_func import apply_to_collection, convert_to_tensors @@ -1954,7 +1954,7 @@ def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: `DDPSpawnPlugin.add_to_queue` and will be removed in v1.7. """ - if self.trainer and isinstance(self.trainer.training_type_plugin, DDPSpawnPlugin): + if self.trainer and isinstance(self.trainer.training_type_plugin, pl.plugins.training_type.DDPSpawnPlugin): self.trainer.training_type_plugin.add_to_queue(self.trainer, queue) def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: @@ -1970,7 +1970,7 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: `DDPSpawnPlugin.get_from_queue` and will be removed in v1.7. """ - if self.trainer and isinstance(self.trainer.training_type_plugin, DDPSpawnPlugin): + if self.trainer and isinstance(self.trainer.training_type_plugin, pl.plugins.training_type.DDPSpawnPlugin): self.trainer.training_type_plugin.get_from_queue(self.trainer, queue) @contextmanager From e2a7a51e3439462d38fc4a493161b7f0d11720d2 Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Fri, 3 Sep 2021 00:02:58 +0000 Subject: [PATCH 11/19] address comment --- .../plugins/training_type/ddp_spawn.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index 6c39351fcff20..8f4e66a0d7aad 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -227,11 +227,11 @@ def post_dispatch(self, trainer: "pl.Trainer"): self._results = self.mp_queue.get() # get the `callback_metrics` and set it to the trainer # only in case the user does not override it. - # TODO(@daniellepintz): Remove the else in v1.7 - if is_overridden("get_from_queue", trainer.training_type_plugin, DDPSpawnPlugin): - self.get_from_queue(trainer, self.mp_queue) - else: + # TODO: Remove the if in v1.7 + if is_overridden("get_from_queue", self.lightning_module): self.lightning_module.get_from_queue(self.mp_queue) + else: + self.get_from_queue(trainer, self.mp_queue) # recover the weights of the processes trained in the children self.__recover_child_process_weights(best_path, last_path) @@ -297,11 +297,12 @@ def __transfer_distrib_spawn_state_on_fit_end(self, trainer: "pl.Trainer", resul self.mp_queue.put(best_model_path) self.mp_queue.put(last_path) self.mp_queue.put(results) - # TODO(@daniellepintz): Remove the else in v1.7 - if is_overridden("add_to_queue", trainer.training_type_plugin, DDPSpawnPlugin): - self.add_to_queue(trainer, self.mp_queue) + # adds the `callback_metrics` to the queue + # TODO: Remove the if in v1.7 + if is_overridden("add_to_queue", self.lightning_module): + self.lightning_module.add_to_queue(self.mp_queue) else: - self.lightning_module.add_to_queue(self.mp_queue) # adds the `callback_metrics` to the queue + self.add_to_queue(trainer, self.mp_queue) def __recover_child_process_weights(self, best_path, last_path): # transfer back the best path to the trainer From 7f48f268eb1d449eee59449f2d1c4095132a3d73 Mon Sep 17 00:00:00 2001 From: Danielle Pintz <38207072+daniellepintz@users.noreply.github.com> Date: Thu, 2 Sep 2021 22:22:06 -0700 Subject: [PATCH 12/19] Update CHANGELOG.md Co-authored-by: ananthsub --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63861e4079a97..48430c68b2ba5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -164,7 +164,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `DataModule` properties: `train_transforms`, `val_transforms`, `test_transforms`, `size`, `dims` ([#8851](https://github.com/PyTorchLightning/pytorch-lightning/pull/8851)) -- Deprecated `add_to_queue`, `get_from_queue` from Lightning Module in favor of overriding the same functions in the DDPSpawnPlugins ([9118](https://github.com/PyTorchLightning/pytorch-lightning/pull/9118)) +- Deprecated `add_to_queue`, `get_from_queue` from `LightningModule` in favor of corresponding methods in the `DDPSpawnPlugins` ([9118](https://github.com/PyTorchLightning/pytorch-lightning/pull/9118)) - Deprecated `prepare_data_per_node` flag on Trainer and set it as a property of `DataHooks`, accessible in the `LightningModule` and `LightningDataModule` ([#8958](https://github.com/PyTorchLightning/pytorch-lightning/pull/8958)) From 4d591e89b00111c4f2ea50d0030bc97cf0ed96a5 Mon Sep 17 00:00:00 2001 From: Danielle Pintz <38207072+daniellepintz@users.noreply.github.com> Date: Thu, 2 Sep 2021 22:46:12 -0700 Subject: [PATCH 13/19] Update CHANGELOG.md Co-authored-by: ananthsub --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 48430c68b2ba5..447b1ded2ddf7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -164,7 +164,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `DataModule` properties: `train_transforms`, `val_transforms`, `test_transforms`, `size`, `dims` ([#8851](https://github.com/PyTorchLightning/pytorch-lightning/pull/8851)) -- Deprecated `add_to_queue`, `get_from_queue` from `LightningModule` in favor of corresponding methods in the `DDPSpawnPlugins` ([9118](https://github.com/PyTorchLightning/pytorch-lightning/pull/9118)) +- Deprecated `add_to_queue`, `get_from_queue` from `LightningModule` in favor of corresponding methods in the `DDPSpawnPlugin` ([9118](https://github.com/PyTorchLightning/pytorch-lightning/pull/9118)) - Deprecated `prepare_data_per_node` flag on Trainer and set it as a property of `DataHooks`, accessible in the `LightningModule` and `LightningDataModule` ([#8958](https://github.com/PyTorchLightning/pytorch-lightning/pull/8958)) From a3936e6c4afe37caa8456f4040777b3d03f330b2 Mon Sep 17 00:00:00 2001 From: Danielle Pintz <38207072+daniellepintz@users.noreply.github.com> Date: Sun, 5 Sep 2021 21:46:42 -0700 Subject: [PATCH 14/19] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- pytorch_lightning/trainer/configuration_validator.py | 2 +- tests/deprecated_api/test_remove_1-7.py | 3 +-- tests/plugins/test_ddp_spawn_plugin.py | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/trainer/configuration_validator.py b/pytorch_lightning/trainer/configuration_validator.py index 9eae337a0cea0..ab3aa81e6d69e 100644 --- a/pytorch_lightning/trainer/configuration_validator.py +++ b/pytorch_lightning/trainer/configuration_validator.py @@ -203,7 +203,7 @@ def __check_training_step_requires_dataloader_iter(self, model: "pl.LightningMod "is incompatible with `truncated_bptt_steps > 0`." ) - def _check_add_get_queue(self, model: "pl.LightningModule"): + def _check_add_get_queue(self, model: "pl.LightningModule") -> None: r""" Checks if add_to_queue or get_from_queue is overriden and sends a deprecation warning. diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index e6f02e5c39094..187bead429171 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -138,11 +138,10 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: return super().get_from_queue(queue) -@RunIf(min_gpus=2) def test_v1_7_0_deprecate_add_get_queue(tmpdir): """Tests if device is set correctly when training for DDPSpawnPlugin.""" model = BoringCallbackDDPSpawnModel() - trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn") + trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, num_processes=2, accelerator="ddp_cpu") with pytest.deprecated_call(match=r"`LightningModule.add_to_queue` method was deprecated in v1.5"): trainer.fit(model) diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py index 985cdf7f0c532..7b6e49703ab03 100644 --- a/tests/plugins/test_ddp_spawn_plugin.py +++ b/tests/plugins/test_ddp_spawn_plugin.py @@ -91,13 +91,12 @@ def get_from_queue(self, trainer: Trainer, queue: torch.multiprocessing.SimpleQu return super().get_from_queue(trainer, queue) -@RunIf(min_gpus=2) def test_ddp_spawn_add_get_queue(tmpdir): """Tests add_to_queue/get_from_queue with DDPSpawnPlugin.""" ddp_spawn_plugin = TestDDPSpawnPlugin() trainer = Trainer( - default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn", plugins=[ddp_spawn_plugin] + default_root_dir=tmpdir, fast_dev_run=True, num_processes=2, accelerator="ddp_cpu", plugins=[ddp_spawn_plugin] ) assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) From 546a28b32650b5b2495863344bc0fe5eaaebfadc Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Mon, 6 Sep 2021 05:33:25 +0000 Subject: [PATCH 15/19] address comments --- tests/deprecated_api/test_remove_1-7.py | 1 - tests/plugins/test_ddp_spawn_plugin.py | 4 ---- 2 files changed, 5 deletions(-) diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 187bead429171..187e5b0c7465c 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -139,7 +139,6 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: def test_v1_7_0_deprecate_add_get_queue(tmpdir): - """Tests if device is set correctly when training for DDPSpawnPlugin.""" model = BoringCallbackDDPSpawnModel() trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, num_processes=2, accelerator="ddp_cpu") diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py index 7b6e49703ab03..b4ea518eab81e 100644 --- a/tests/plugins/test_ddp_spawn_plugin.py +++ b/tests/plugins/test_ddp_spawn_plugin.py @@ -99,10 +99,6 @@ def test_ddp_spawn_add_get_queue(tmpdir): default_root_dir=tmpdir, fast_dev_run=True, num_processes=2, accelerator="ddp_cpu", plugins=[ddp_spawn_plugin] ) - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) - assert trainer.training_type_plugin.on_gpu - assert trainer.training_type_plugin.root_device == torch.device("cuda:0") - val: float = 1.0 val_name: str = "val_acc" model = BoringCallbackDDPSpawnModel(val_name, val) From e5d22892ed67dea0b463383989a87657e3d468eb Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 6 Sep 2021 09:52:06 +0200 Subject: [PATCH 16/19] Apply suggestions from code review --- pytorch_lightning/core/lightning.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 80c1358b9f9bc..2a0dec9101265 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1950,8 +1950,7 @@ def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: queue: the instance of the queue to append the data. .. deprecated:: v1.5 - This method was deprecated in v1.5 in favor of - `DDPSpawnPlugin.add_to_queue` + This method was deprecated in v1.5 in favor of `DDPSpawnPlugin.add_to_queue` and will be removed in v1.7. """ if self.trainer and isinstance(self.trainer.training_type_plugin, pl.plugins.training_type.DDPSpawnPlugin): @@ -1966,8 +1965,7 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: queue: the instance of the queue from where to get the data. .. deprecated:: v1.5 - This method was deprecated in v1.5 in favor of - `DDPSpawnPlugin.get_from_queue` + This method was deprecated in v1.5 in favor of `DDPSpawnPlugin.get_from_queue` and will be removed in v1.7. """ if self.trainer and isinstance(self.trainer.training_type_plugin, pl.plugins.training_type.DDPSpawnPlugin): From 446fafd71c051d03669e82d1c0ebfc5cafff3f91 Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Mon, 6 Sep 2021 23:38:17 +0000 Subject: [PATCH 17/19] update test --- tests/deprecated_api/test_remove_1-7.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 64efdacd626dc..56b229227c8c4 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -192,3 +192,6 @@ def test_v1_7_0_deprecate_add_get_queue(tmpdir): with pytest.deprecated_call(match=r"`LightningModule.add_to_queue` method was deprecated in v1.5"): trainer.fit(model) + + with pytest.deprecated_call(match=r"`LightningModule.get_from_queue` method was deprecated in v1.5"): + trainer.fit(model) From 113645debd6fc3d541d688003fda2616c3ee24af Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Sep 2021 23:39:48 +0000 Subject: [PATCH 18/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytorch_lightning/plugins/training_type/ddp_spawn.py | 10 ++++------ tests/plugins/test_ddp_spawn_plugin.py | 4 ++-- tests/trainer/loops/test_training_loop.py | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index 74b5098140265..5f493001341d6 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -375,9 +375,8 @@ def post_training_step(self): self.model.require_backward_grad_sync = True def add_to_queue(self, trainer: "pl.Trainer", queue: torch.multiprocessing.SimpleQueue) -> None: - """ - Appends the :attr:`trainer.callback_metrics` dictionary to the given queue. - To avoid issues with memory sharing, we cast the data to numpy. + """Appends the :attr:`trainer.callback_metrics` dictionary to the given queue. To avoid issues with memory + sharing, we cast the data to numpy. Args: queue: the instance of the queue to append the data. @@ -388,9 +387,8 @@ def add_to_queue(self, trainer: "pl.Trainer", queue: torch.multiprocessing.Simpl queue.put(callback_metrics) def get_from_queue(self, trainer: "pl.Trainer", queue: torch.multiprocessing.SimpleQueue) -> None: - """ - Retrieve the :attr:`trainer.callback_metrics` dictionary from the given queue. - To preserve consistency, we cast back the data to ``torch.Tensor``. + """Retrieve the :attr:`trainer.callback_metrics` dictionary from the given queue. To preserve consistency, + we cast back the data to ``torch.Tensor``. Args: queue: the instance of the queue from where to get the data. diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py index b4ea518eab81e..2f887fbe0e520 100644 --- a/tests/plugins/test_ddp_spawn_plugin.py +++ b/tests/plugins/test_ddp_spawn_plugin.py @@ -64,8 +64,8 @@ def test_ddp_cpu(): @RunIf(min_gpus=2) def test_ddp_spawn_extra_parameters(tmpdir): - """Tests if device is set correctly when training for DDPSpawnPlugin - and tests add_to_queue/get_from_queue with Lightning Module (deprecated way).""" + """Tests if device is set correctly when training for DDPSpawnPlugin and tests add_to_queue/get_from_queue with + Lightning Module (deprecated way).""" trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn") assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) diff --git a/tests/trainer/loops/test_training_loop.py b/tests/trainer/loops/test_training_loop.py index c37681e4831ca..d21e8efc7a5cb 100644 --- a/tests/trainer/loops/test_training_loop.py +++ b/tests/trainer/loops/test_training_loop.py @@ -191,7 +191,7 @@ def training_epoch_end(self, outputs) -> None: def test_batch_loop_releases_loss(tmpdir): - """Test that loss/graph is released so that it can be garbage collected before the next training step""" + """Test that loss/graph is released so that it can be garbage collected before the next training step.""" class TestModel(BoringModel): def training_step(self, batch, batch_idx): From e73b4195e90538fe96fc7a080bcafa2001f83c7f Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Fri, 10 Sep 2021 19:08:50 +0000 Subject: [PATCH 19/19] RunIf(skip_windows=True) --- tests/deprecated_api/test_remove_1-7.py | 1 + tests/plugins/test_ddp_spawn_plugin.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 734b795ff4966..5dffd8501f241 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -209,6 +209,7 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None: return super().get_from_queue(queue) +@RunIf(skip_windows=True) def test_v1_7_0_deprecate_add_get_queue(tmpdir): model = BoringCallbackDDPSpawnModel() trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, num_processes=2, accelerator="ddp_cpu") diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py index 2f887fbe0e520..a89ddd3aaa50b 100644 --- a/tests/plugins/test_ddp_spawn_plugin.py +++ b/tests/plugins/test_ddp_spawn_plugin.py @@ -91,6 +91,7 @@ def get_from_queue(self, trainer: Trainer, queue: torch.multiprocessing.SimpleQu return super().get_from_queue(trainer, queue) +@RunIf(skip_windows=True) def test_ddp_spawn_add_get_queue(tmpdir): """Tests add_to_queue/get_from_queue with DDPSpawnPlugin."""