From 52a1c57b59fe72cea6e9de97381475a0fcda5dfb Mon Sep 17 00:00:00 2001 From: Ian Hales Date: Tue, 19 Jul 2022 18:57:17 +0100 Subject: [PATCH 01/15] Fixed all mypy typing errors for the IPU strategy. --- pyproject.toml | 1 - src/pytorch_lightning/strategies/ipu.py | 72 ++++++++++++++----------- 2 files changed, 40 insertions(+), 33 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index eb9b025e36811..e1954c5f6d6a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,6 @@ module = [ "pytorch_lightning.strategies.ddp_spawn", "pytorch_lightning.strategies.deepspeed", "pytorch_lightning.strategies.fully_sharded", - "pytorch_lightning.strategies.ipu", "pytorch_lightning.strategies.sharded", "pytorch_lightning.strategies.sharded_spawn", "pytorch_lightning.strategies.strategy", diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 5413756c15271..cff917323a15c 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -13,11 +13,12 @@ # limitations under the License. import json import os -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from pytorch_lightning.strategies.strategy import TBroadcast import torch from torch import FloatTensor, Tensor -from torch.utils.data import DataLoader +from torch.utils.data import DataLoader, Sampler import pytorch_lightning as pl from pytorch_lightning.overrides.base import _LightningModuleWrapperBase, _LightningPrecisionModuleWrapperBase @@ -110,12 +111,12 @@ def __init__( self.device_iterations = device_iterations self.autoreport = autoreport self.autoreport_dir = autoreport_dir - self.poptorch_models = {} + self.poptorch_models : Dict[Union[RunningStage, str], "poptorch.PoplarExecutor"] = {} self._training_opts = training_opts self._inference_opts = inference_opts if self.autoreport: - options = {"autoReport.all": self.autoreport} + options : Dict[str, Any] = {"autoReport.all": self.autoreport} if self.autoreport_dir: self._fs = get_filesystem(str(self.autoreport_dir)) self._fs.makedirs(self.autoreport_dir, exist_ok=True) @@ -137,6 +138,8 @@ def setup(self, trainer: "pl.Trainer") -> None: super().setup(trainer) + assert self.lightning_module.trainer is not None + # disable the `optimizer_zero_grad` function by setting it to `None`. # this is because the IPU zeros the gradients internally self._optimizer_zero_grad_original = self.lightning_module.optimizer_zero_grad @@ -159,7 +162,7 @@ def setup(self, trainer: "pl.Trainer") -> None: model = poptorch.trainingModel(model=model, options=training_opts, optimizer=optimizer) self.poptorch_models[RunningStage.TRAINING] = model - if self.lightning_module.trainer.enable_validation: + if self.lightning_module.trainer and self.lightning_module.trainer.enable_validation: model = poptorch.inferenceModel(model=model, options=inference_opts) self.poptorch_models[RunningStage.VALIDATING] = model elif trainer_fn == TrainerFn.VALIDATING: @@ -189,12 +192,14 @@ def replication_factor(self) -> int: if self._inference_opts: return self._inference_opts.replication_factor - return len(self.parallel_devices) - + return len(self.parallel_devices) if self.parallel_devices else 0 + assert self.lightning_module.trainer is not None stage = self.lightning_module.trainer.state.stage + assert stage is not None return self.poptorch_models[stage]._options.toDict()["replication_factor"] def _create_opts(self, training: bool) -> "poptorch.Options": + assert self.lightning_module.trainer is not None opts = poptorch.Options() opts.deviceIterations(self.device_iterations) opts.replicationFactor(self.replication_factor) @@ -218,11 +223,13 @@ def inference_opts(self) -> "poptorch.Options": return self._inference_opts @property - def lightning_module(self) -> Optional["pl.LightningModule"]: - return self.model.module if isinstance(self.model, LightningIPUModule) else self.model + def lightning_module(self) -> pl.LightningModule: + model = self.model.module if isinstance(self.model, LightningIPUModule) else self.model + assert model is not None + return model def _convert_to_poptorch_loader( - self, dataloader: DataLoader, sampler, mode: Optional[RunningStage] = None + self, dataloader: DataLoader, sampler: Sampler, mode: Optional[RunningStage] = None ) -> "poptorch.DataLoader": if isinstance(dataloader, poptorch.DataLoader): # the user is returning the `poptorch.DataLoader` directly, don't change anything. @@ -239,6 +246,7 @@ def _handle_gradient_accumulation_steps(self) -> None: ``optimizer_step`` will be called on every batch, and the IPU will handle grad accumulation internally. """ + assert self.lightning_module.trainer is not None accumulation_scheduler = self.lightning_module.trainer.accumulation_scheduler if accumulation_scheduler.epochs != [0]: @@ -250,18 +258,18 @@ def _handle_gradient_accumulation_steps(self) -> None: accumulation_scheduler.scheduling.update({0: 1}) @property - def _n_replicate(self): + def _n_replicate(self) -> int: opts = self.training_opts if self.lightning_module.training else self.inference_opts accumulate_grad_batches = opts.Training.gradient_accumulation device_iterations = opts.device_iterations replication_factor = opts.replication_factor return replication_factor * device_iterations * accumulate_grad_batches - def _prepare_input(self, args: Any): - def to_tuple(x): + def _prepare_input(self, args: Any) -> Any: + def to_tuple(x: Any) -> Tuple: return tuple(x) - def to_tensor(x): + def to_tensor(x: Any) -> Tensor: return torch.tensor(x).unsqueeze(0).repeat(self._n_replicate) args = apply_to_collection(args, dtype=list, function=to_tuple) @@ -278,7 +286,7 @@ def _disable_zero_grad(self) -> None: ) lightning_module.optimizer_zero_grad = None # type: ignore[assignment] - def _step(self, stage: RunningStage, *args: Any, **kwargs: Any): + def _step(self, stage: RunningStage, *args: Any, **kwargs: Any) -> STEP_OUTPUT: args = self._prepare_input(args) poptorch_model = self.poptorch_models[stage] self.lightning_module._running_torchscript = True @@ -286,19 +294,19 @@ def _step(self, stage: RunningStage, *args: Any, **kwargs: Any): self.lightning_module._running_torchscript = False return out - def training_step(self, *args, **kwargs) -> STEP_OUTPUT: + def training_step(self, *args: Any, **kwargs: Any) -> STEP_OUTPUT: with self.precision_plugin.train_step_context(): return self._step(RunningStage.TRAINING, *args, **kwargs) - def validation_step(self, *args, **kwargs) -> Optional[STEP_OUTPUT]: + def validation_step(self, *args: Any, **kwargs: Any) -> Optional[STEP_OUTPUT]: with self.precision_plugin.val_step_context(): return self._step(RunningStage.VALIDATING, *args, **kwargs) - def test_step(self, *args, **kwargs) -> Optional[STEP_OUTPUT]: + def test_step(self, *args: Any, **kwargs: Any) -> Optional[STEP_OUTPUT]: with self.precision_plugin.test_step_context(): return self._step(RunningStage.TESTING, *args, **kwargs) - def predict_step(self, *args, **kwargs) -> STEP_OUTPUT: + def predict_step(self, *args: Any, **kwargs: Any) -> STEP_OUTPUT: with self.precision_plugin.predict_step_context(): return self._step(RunningStage.PREDICTING, *args, **kwargs) @@ -309,24 +317,24 @@ def teardown(self) -> None: if self._optimizer_zero_grad_original is not None: # re-enable `optimizer_zero_grad` - self.lightning_module.optimizer_zero_grad = self._optimizer_zero_grad_original + self.lightning_module.optimizer_zero_grad = self._optimizer_zero_grad_original # type: ignore[assignment] for model in self.poptorch_models.values(): model.destroy() super().teardown() - def _compiled(self, model: Any): + def _compiled(self, model: Any) -> bool: # Required to ensure we only attach compiled models, as they are compiled lazily. return model._executable is not None - def _detach_models(self): + def _detach_models(self) -> None: """Detaches all stage specific models from IPU devices.""" for k, model in self.poptorch_models.items(): if self._compiled(model) and model.isAttachedToDevice(): model.detachFromDevice() - def _load_model(self, stage: str): + def _load_model(self, stage: str) -> None: """Loads the stage specific accelerator model onto device if compiled and not attached to IPU devices. Args: @@ -337,28 +345,28 @@ def _load_model(self, stage: str): if self._compiled(model) and not model.isAttachedToDevice(): model.attachToDevice() - def on_train_start(self): + def on_train_start(self) -> None: self._load_model(RunningStage.TRAINING) - def on_validation_start(self): + def on_validation_start(self) -> None: self._load_model(RunningStage.VALIDATING) - def on_test_start(self): + def on_test_start(self) -> None: self._load_model(RunningStage.TESTING) - def on_predict_start(self): + def on_predict_start(self) -> None: self._load_model(RunningStage.PREDICTING) - def on_train_end(self): + def on_train_end(self) -> None: self._detach_models() - def on_validation_end(self): + def on_validation_end(self) -> None: self._detach_models() - def on_test_end(self): + def on_test_end(self) -> None: self._detach_models() - def on_predict_end(self): + def on_predict_end(self) -> None: self._detach_models() def on_train_batch_start(self, batch: Any, batch_idx: int) -> None: @@ -386,7 +394,7 @@ def barrier(self, name: Optional[str] = None) -> None: def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bool = False) -> Tensor: return tensor - def broadcast(self, obj: object, src: int = 0) -> object: + def broadcast(self, obj: TBroadcast, src: int = 0) -> TBroadcast: return obj @classmethod From a6d559380b184ba3a7457f868a8f8cb81f4aad6c Mon Sep 17 00:00:00 2001 From: Ian Hales Date: Tue, 19 Jul 2022 19:00:24 +0100 Subject: [PATCH 02/15] Remove extra typing check on the trainer validation --- src/pytorch_lightning/strategies/ipu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index cff917323a15c..f472d51f25719 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -162,7 +162,7 @@ def setup(self, trainer: "pl.Trainer") -> None: model = poptorch.trainingModel(model=model, options=training_opts, optimizer=optimizer) self.poptorch_models[RunningStage.TRAINING] = model - if self.lightning_module.trainer and self.lightning_module.trainer.enable_validation: + if self.lightning_module.trainer.enable_validation: model = poptorch.inferenceModel(model=model, options=inference_opts) self.poptorch_models[RunningStage.VALIDATING] = model elif trainer_fn == TrainerFn.VALIDATING: From e9949055ce85d033f1fc353286a9e1a523bcde8c Mon Sep 17 00:00:00 2001 From: Ian Hales Date: Wed, 20 Jul 2022 16:24:02 +0100 Subject: [PATCH 03/15] Fix circular dependency from unquoted type hint --- src/pytorch_lightning/strategies/ipu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index f472d51f25719..fd40e507f6d9e 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -223,7 +223,7 @@ def inference_opts(self) -> "poptorch.Options": return self._inference_opts @property - def lightning_module(self) -> pl.LightningModule: + def lightning_module(self) -> "pl.LightningModule": model = self.model.module if isinstance(self.model, LightningIPUModule) else self.model assert model is not None return model From 36e48becd33bed2a0bb10b6e921a91151d8c0554 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 21 Jul 2022 12:08:32 +0000 Subject: [PATCH 04/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pytorch_lightning/strategies/ipu.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index fd40e507f6d9e..ad328e11b6b4d 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -14,7 +14,6 @@ import json import os from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from pytorch_lightning.strategies.strategy import TBroadcast import torch from torch import FloatTensor, Tensor @@ -26,6 +25,7 @@ from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.precision import PrecisionPlugin from pytorch_lightning.strategies.parallel import ParallelStrategy +from pytorch_lightning.strategies.strategy import TBroadcast from pytorch_lightning.trainer.states import RunningStage, TrainerFn from pytorch_lightning.utilities import _IPU_AVAILABLE, _POPTORCH_AVAILABLE, rank_zero_warn from pytorch_lightning.utilities.apply_func import apply_to_collection @@ -111,12 +111,12 @@ def __init__( self.device_iterations = device_iterations self.autoreport = autoreport self.autoreport_dir = autoreport_dir - self.poptorch_models : Dict[Union[RunningStage, str], "poptorch.PoplarExecutor"] = {} + self.poptorch_models: Dict[Union[RunningStage, str], "poptorch.PoplarExecutor"] = {} self._training_opts = training_opts self._inference_opts = inference_opts if self.autoreport: - options : Dict[str, Any] = {"autoReport.all": self.autoreport} + options: Dict[str, Any] = {"autoReport.all": self.autoreport} if self.autoreport_dir: self._fs = get_filesystem(str(self.autoreport_dir)) self._fs.makedirs(self.autoreport_dir, exist_ok=True) @@ -294,7 +294,7 @@ def _step(self, stage: RunningStage, *args: Any, **kwargs: Any) -> STEP_OUTPUT: self.lightning_module._running_torchscript = False return out - def training_step(self, *args: Any, **kwargs: Any) -> STEP_OUTPUT: + def training_step(self, *args: Any, **kwargs: Any) -> STEP_OUTPUT: with self.precision_plugin.train_step_context(): return self._step(RunningStage.TRAINING, *args, **kwargs) @@ -302,11 +302,11 @@ def validation_step(self, *args: Any, **kwargs: Any) -> Optional[STEP_OUTPUT]: with self.precision_plugin.val_step_context(): return self._step(RunningStage.VALIDATING, *args, **kwargs) - def test_step(self, *args: Any, **kwargs: Any) -> Optional[STEP_OUTPUT]: + def test_step(self, *args: Any, **kwargs: Any) -> Optional[STEP_OUTPUT]: with self.precision_plugin.test_step_context(): return self._step(RunningStage.TESTING, *args, **kwargs) - def predict_step(self, *args: Any, **kwargs: Any) -> STEP_OUTPUT: + def predict_step(self, *args: Any, **kwargs: Any) -> STEP_OUTPUT: with self.precision_plugin.predict_step_context(): return self._step(RunningStage.PREDICTING, *args, **kwargs) From 44bb884029276471c214bc4c097a096a901c0e35 Mon Sep 17 00:00:00 2001 From: Ian Hales Date: Thu, 21 Jul 2022 14:05:40 +0100 Subject: [PATCH 05/15] Fix issues that came up after rebasing master --- src/pytorch_lightning/strategies/ipu.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index ad328e11b6b4d..b5f2127f8f8ad 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -13,7 +13,7 @@ # limitations under the License. import json import os -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union import torch from torch import FloatTensor, Tensor @@ -225,11 +225,12 @@ def inference_opts(self) -> "poptorch.Options": @property def lightning_module(self) -> "pl.LightningModule": model = self.model.module if isinstance(self.model, LightningIPUModule) else self.model - assert model is not None + if not isinstance(model, pl.LightningModule): + raise TypeError(f"Unwrapping the module did not yield a `LightningModule`, got {type(model)} instead.") return model def _convert_to_poptorch_loader( - self, dataloader: DataLoader, sampler: Sampler, mode: Optional[RunningStage] = None + self, dataloader: DataLoader, sampler: Union[Sampler, Iterable], mode: Optional[RunningStage] = None ) -> "poptorch.DataLoader": if isinstance(dataloader, poptorch.DataLoader): # the user is returning the `poptorch.DataLoader` directly, don't change anything. From 587477de004758f20e68ddff816ef48239119b6b Mon Sep 17 00:00:00 2001 From: Ian Hales Date: Fri, 29 Jul 2022 15:26:30 +0100 Subject: [PATCH 06/15] Still not quite working - down to the last error on unwrapping return type --- src/pytorch_lightning/strategies/ipu.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index ee7e8ecc5b267..a0ec0b02001fe 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -20,7 +20,8 @@ from torch.utils.data import DataLoader, Sampler import pytorch_lightning as pl -from pytorch_lightning.overrides.base import _LightningModuleWrapperBase, _LightningPrecisionModuleWrapperBase +from pytorch_lightning.overrides.base import _LightningModuleWrapperBase, _LightningPrecisionModuleWrapperBase, unwrap_lightning_module + from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.precision import PrecisionPlugin @@ -138,6 +139,7 @@ def setup(self, trainer: "pl.Trainer") -> None: super().setup(trainer) + assert self.lightning_module is not None assert self.lightning_module.trainer is not None # disable the `optimizer_zero_grad` function by setting it to `None`. @@ -227,9 +229,7 @@ def inference_opts(self) -> "poptorch.Options": @property def lightning_module(self) -> "pl.LightningModule": model = self.model.module if isinstance(self.model, LightningIPUModule) else self.model - if not isinstance(model, pl.LightningModule): - raise TypeError(f"Unwrapping the module did not yield a `LightningModule`, got {type(model)} instead.") - return model + return unwrap_lightning_module(model) def _convert_to_poptorch_loader( self, dataloader: DataLoader, sampler: Union[Sampler, Iterable], mode: Optional[RunningStage] = None From d3568bb001fd55e3768e0de97f87c4a10f99596c Mon Sep 17 00:00:00 2001 From: otaj Date: Tue, 2 Aug 2022 15:07:16 +0200 Subject: [PATCH 07/15] fix mypy errors --- src/pytorch_lightning/strategies/ipu.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 300193bcebf77..3f7aed496fb24 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -233,6 +233,7 @@ def inference_opts(self) -> "poptorch.Options": @property def lightning_module(self) -> "pl.LightningModule": model = self.model.module if isinstance(self.model, LightningIPUModule) else self.model + assert model is not None return unwrap_lightning_module(model) def _convert_to_poptorch_loader( @@ -243,7 +244,7 @@ def _convert_to_poptorch_loader( return dataloader dl_args, dl_kwargs = _get_dataloader_init_args_and_kwargs( - dataloader, sampler, mode, self.replication_factor > 1 + dataloader, sampler, mode, self.replication_factor > 1 # type:ignore [arg-type] ) opts = self.training_opts if mode == RunningStage.TRAINING else self.inference_opts dataloader = poptorch.DataLoader(opts, *dl_args, **dl_kwargs) From 35a4dc5fc7a677b9dbaafb975469e3737ad1fff5 Mon Sep 17 00:00:00 2001 From: otaj Date: Tue, 2 Aug 2022 15:28:14 +0200 Subject: [PATCH 08/15] surgical asserts --- src/pytorch_lightning/strategies/ipu.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 3f7aed496fb24..3f5e158f31f4e 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -207,6 +207,7 @@ def replication_factor(self) -> int: return self.poptorch_models[stage]._options.toDict()["replication_factor"] def _create_opts(self, training: bool) -> "poptorch.Options": + assert self.lightning_module is not None assert self.lightning_module.trainer is not None opts = poptorch.Options() opts.deviceIterations(self.device_iterations) @@ -231,10 +232,9 @@ def inference_opts(self) -> "poptorch.Options": return self._inference_opts @property - def lightning_module(self) -> "pl.LightningModule": - model = self.model.module if isinstance(self.model, LightningIPUModule) else self.model - assert model is not None - return unwrap_lightning_module(model) + def lightning_module(self) -> Optional["pl.LightningModule"]: + if self.model is not None: + return unwrap_lightning_module(self.model) def _convert_to_poptorch_loader( self, dataloader: DataLoader, sampler: Union[Sampler, Iterable], mode: Optional[RunningStage] = None @@ -256,6 +256,7 @@ def _handle_gradient_accumulation_steps(self) -> None: ``optimizer_step`` will be called on every batch, and the IPU will handle grad accumulation internally. """ + assert self.lightning_module is not None assert self.lightning_module.trainer is not None accumulation_scheduler = self.lightning_module.trainer.accumulation_scheduler @@ -269,6 +270,7 @@ def _handle_gradient_accumulation_steps(self) -> None: @property def _n_replicate(self) -> int: + assert self.lightning_module is not None opts = self.training_opts if self.lightning_module.training else self.inference_opts accumulate_grad_batches = opts.Training.gradient_accumulation device_iterations = opts.device_iterations @@ -298,6 +300,7 @@ def batch_to_device(self, batch: Any, device: Optional[torch.device] = None, dat def _disable_zero_grad(self) -> None: lightning_module = self.lightning_module + assert lightning_module is not None if is_overridden("optimizer_zero_grad", lightning_module): assert lightning_module is not None # `is_overridden` returns False otherwise rank_zero_warn( @@ -308,6 +311,7 @@ def _disable_zero_grad(self) -> None: def _step(self, stage: RunningStage, *args: Any, **kwargs: Any) -> STEP_OUTPUT: args = self._prepare_input(args) + assert self.lightning_module is not None poptorch_model = self.poptorch_models[stage] self.lightning_module._running_torchscript = True out = poptorch_model(*args, **kwargs) @@ -335,7 +339,7 @@ def teardown(self) -> None: # undo dataloader patching pl.trainer.connectors.data_connector._update_dataloader = self._update_dataloader_original - if self._optimizer_zero_grad_original is not None: + if self._optimizer_zero_grad_original is not None and self.lightning_module is not None: # re-enable `optimizer_zero_grad` self.lightning_module.optimizer_zero_grad = self._optimizer_zero_grad_original # type: ignore[assignment] From 199a69b241e58e69cec75eeb485856050bf5fb5b Mon Sep 17 00:00:00 2001 From: Ian Hales Date: Tue, 2 Aug 2022 18:05:21 +0100 Subject: [PATCH 09/15] Change stage from str to RunningStage and simplify dict definition for models --- src/pytorch_lightning/strategies/ipu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 3f5e158f31f4e..5f9760f7d2f30 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -117,7 +117,7 @@ def __init__( self.device_iterations = device_iterations self.autoreport = autoreport self.autoreport_dir = autoreport_dir - self.poptorch_models: Dict[Union[RunningStage, str], "poptorch.PoplarExecutor"] = {} + self.poptorch_models: Dict[RunningStage, "poptorch.PoplarExecutor"] = {} self._training_opts = training_opts self._inference_opts = inference_opts @@ -358,7 +358,7 @@ def _detach_models(self) -> None: if self._compiled(model) and model.isAttachedToDevice(): model.detachFromDevice() - def _load_model(self, stage: str) -> None: + def _load_model(self, stage: RunningStage) -> None: """Loads the stage specific accelerator model onto device if compiled and not attached to IPU devices. Args: From 88a2efd257aa21a96325a15abe1d8a97e5ac4a12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 3 Aug 2022 06:14:03 -0400 Subject: [PATCH 10/15] Update src/pytorch_lightning/strategies/ipu.py --- src/pytorch_lightning/strategies/ipu.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 5f9760f7d2f30..d38dbcef51114 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -244,7 +244,10 @@ def _convert_to_poptorch_loader( return dataloader dl_args, dl_kwargs = _get_dataloader_init_args_and_kwargs( - dataloader, sampler, mode, self.replication_factor > 1 # type:ignore [arg-type] + dataloader, + sampler, # type: ignore[arg-type] + mode, + self.replication_factor > 1 ) opts = self.training_opts if mode == RunningStage.TRAINING else self.inference_opts dataloader = poptorch.DataLoader(opts, *dl_args, **dl_kwargs) From ed96cef1416f82c3b6cdc3eac90880dd43bdbfa9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Aug 2022 10:15:44 +0000 Subject: [PATCH 11/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pytorch_lightning/strategies/ipu.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index d38dbcef51114..cabc5a8f98318 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -244,10 +244,7 @@ def _convert_to_poptorch_loader( return dataloader dl_args, dl_kwargs = _get_dataloader_init_args_and_kwargs( - dataloader, - sampler, # type: ignore[arg-type] - mode, - self.replication_factor > 1 + dataloader, sampler, mode, self.replication_factor > 1 # type: ignore[arg-type] ) opts = self.training_opts if mode == RunningStage.TRAINING else self.inference_opts dataloader = poptorch.DataLoader(opts, *dl_args, **dl_kwargs) From f1f0061fcb28157bf41dea30e009a0ec2759537e Mon Sep 17 00:00:00 2001 From: Ian Hales Date: Wed, 3 Aug 2022 17:19:12 +0100 Subject: [PATCH 12/15] Remove unnecessary asserts and the lightning_module property as no longer needed --- src/pytorch_lightning/strategies/ipu.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index cabc5a8f98318..9e76059e9c7d4 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -145,7 +145,6 @@ def setup(self, trainer: "pl.Trainer") -> None: super().setup(trainer) assert self.lightning_module is not None - assert self.lightning_module.trainer is not None # disable the `optimizer_zero_grad` function by setting it to `None`. # this is because the IPU zeros the gradients internally @@ -201,14 +200,12 @@ def replication_factor(self) -> int: return self._inference_opts.replication_factor return len(self.parallel_devices) if self.parallel_devices else 0 - assert self.lightning_module.trainer is not None stage = self.lightning_module.trainer.state.stage assert stage is not None return self.poptorch_models[stage]._options.toDict()["replication_factor"] def _create_opts(self, training: bool) -> "poptorch.Options": assert self.lightning_module is not None - assert self.lightning_module.trainer is not None opts = poptorch.Options() opts.deviceIterations(self.device_iterations) opts.replicationFactor(self.replication_factor) @@ -231,11 +228,6 @@ def inference_opts(self) -> "poptorch.Options": self._inference_opts = self._create_opts(training=False) return self._inference_opts - @property - def lightning_module(self) -> Optional["pl.LightningModule"]: - if self.model is not None: - return unwrap_lightning_module(self.model) - def _convert_to_poptorch_loader( self, dataloader: DataLoader, sampler: Union[Sampler, Iterable], mode: Optional[RunningStage] = None ) -> "poptorch.DataLoader": @@ -257,7 +249,6 @@ def _handle_gradient_accumulation_steps(self) -> None: ``optimizer_step`` will be called on every batch, and the IPU will handle grad accumulation internally. """ assert self.lightning_module is not None - assert self.lightning_module.trainer is not None accumulation_scheduler = self.lightning_module.trainer.accumulation_scheduler if accumulation_scheduler.epochs != [0]: From 3e90ae1d15fb0c63c3b67954faf25e55102fc041 Mon Sep 17 00:00:00 2001 From: Ian Hales Date: Wed, 3 Aug 2022 17:41:05 +0100 Subject: [PATCH 13/15] Remove unused input after taking out lightning_module --- src/pytorch_lightning/strategies/ipu.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 9e76059e9c7d4..9d3021c43479e 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -22,8 +22,7 @@ import pytorch_lightning as pl from pytorch_lightning.overrides.base import ( _LightningModuleWrapperBase, - _LightningPrecisionModuleWrapperBase, - unwrap_lightning_module, + _LightningPrecisionModuleWrapperBase ) from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO From bc6f3148b78e119692387a941bc9492425cfe9fe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Aug 2022 16:43:09 +0000 Subject: [PATCH 14/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pytorch_lightning/strategies/ipu.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 9d3021c43479e..e7bb158b4caa4 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -20,10 +20,7 @@ from torch.utils.data import DataLoader, Sampler import pytorch_lightning as pl -from pytorch_lightning.overrides.base import ( - _LightningModuleWrapperBase, - _LightningPrecisionModuleWrapperBase -) +from pytorch_lightning.overrides.base import _LightningModuleWrapperBase, _LightningPrecisionModuleWrapperBase from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.precision import PrecisionPlugin From d9b8eef4628658124fc44b12fc8afe2cdaf133eb Mon Sep 17 00:00:00 2001 From: Ian <31828525+HalestormAI@users.noreply.github.com> Date: Wed, 3 Aug 2022 20:22:40 +0100 Subject: [PATCH 15/15] Switch conditionals for assertions Co-authored-by: Rohit Gupta --- src/pytorch_lightning/strategies/ipu.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index e7bb158b4caa4..0b5d8e835ad1d 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -195,7 +195,8 @@ def replication_factor(self) -> int: if self._inference_opts: return self._inference_opts.replication_factor - return len(self.parallel_devices) if self.parallel_devices else 0 + assert self.parallel_devices + return len(self.parallel_devices) stage = self.lightning_module.trainer.state.stage assert stage is not None return self.poptorch_models[stage]._options.toDict()["replication_factor"] @@ -326,7 +327,8 @@ def teardown(self) -> None: # undo dataloader patching pl.trainer.connectors.data_connector._update_dataloader = self._update_dataloader_original - if self._optimizer_zero_grad_original is not None and self.lightning_module is not None: + assert self.lightning_module is not None + if self._optimizer_zero_grad_original is not None: # re-enable `optimizer_zero_grad` self.lightning_module.optimizer_zero_grad = self._optimizer_zero_grad_original # type: ignore[assignment]