From 0a2aee2c35b48cc15e9f30edcd89b7bf1eada1bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 19 Oct 2021 17:34:26 +0200 Subject: [PATCH 01/26] all changes --- pytorch_lightning/accelerators/accelerator.py | 19 +++++++++++++------ pytorch_lightning/accelerators/tpu.py | 4 +--- .../plugins/precision/deepspeed_precision.py | 14 ++++++++------ .../plugins/precision/native_amp.py | 6 +++--- .../plugins/precision/precision_plugin.py | 5 +++-- .../plugins/training_type/tpu_spawn.py | 3 +++ 6 files changed, 31 insertions(+), 20 deletions(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 541cf5de3be2b..7e86f9f700986 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -314,23 +314,30 @@ def backward(self, closure_loss: Tensor, *args: Any, **kwargs: Any) -> Tensor: return closure_loss - def optimizer_step(self, optimizer: Optimizer, opt_idx: int, lambda_closure: Callable, **kwargs: Any) -> None: + def optimizer_step( + self, + optimizer: Optimizer, + opt_idx: int = 0, + lambda_closure: Optional[Callable] = None, + model: Optional[Union[Module, "pl.LightningModule"]] = None, + **kwargs: Any + ) -> None: """performs the actual optimizer step. Args: optimizer: the optimizer performing the step opt_idx: index of the current optimizer lambda_closure: closure calculating the loss value + model: reference to the model, optionally defining optimizer step related hooks """ + model = model if model is not None else self.lightning_module make_optimizer_step = self.precision_plugin.pre_optimizer_step( - self.lightning_module, optimizer, opt_idx, lambda_closure, **kwargs + model, optimizer, opt_idx, lambda_closure, **kwargs ) if make_optimizer_step: - self.run_optimizer_step(optimizer, opt_idx, lambda_closure, **kwargs) + self.run_optimizer_step(optimizer, lambda_closure, **kwargs) - def run_optimizer_step( - self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs: Any - ) -> None: + def run_optimizer_step(self, optimizer: Optimizer, lambda_closure: Callable, **kwargs: Any) -> None: self.training_type_plugin.optimizer_step(optimizer, lambda_closure=lambda_closure, **kwargs) def optimizer_zero_grad(self, current_epoch: int, batch_idx: int, optimizer: Optimizer, opt_idx: int) -> None: diff --git a/pytorch_lightning/accelerators/tpu.py b/pytorch_lightning/accelerators/tpu.py index 68925ab67aca9..bd47fd11a5591 100644 --- a/pytorch_lightning/accelerators/tpu.py +++ b/pytorch_lightning/accelerators/tpu.py @@ -49,9 +49,7 @@ def setup(self, trainer: "pl.Trainer") -> None: raise MisconfigurationException("TPUs only support a single tpu core or tpu spawn training.") return super().setup(trainer) - def run_optimizer_step( - self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs: Any - ) -> None: + def run_optimizer_step(self, optimizer: Optimizer, lambda_closure: Callable, **kwargs: Any) -> None: xm.optimizer_step(optimizer, optimizer_args={"closure": lambda_closure, **kwargs}) def _move_optimizer_state(self, device: Optional[torch.device] = None) -> None: diff --git a/pytorch_lightning/plugins/precision/deepspeed_precision.py b/pytorch_lightning/plugins/precision/deepspeed_precision.py index 6954adcbef164..d6ad215d3e486 100644 --- a/pytorch_lightning/plugins/precision/deepspeed_precision.py +++ b/pytorch_lightning/plugins/precision/deepspeed_precision.py @@ -36,24 +36,26 @@ def __init__(self, precision: int) -> None: def pre_optimizer_step( self, - model: "pl.LightningModule", + model: Union[Module, "pl.LightningModule"], optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs: Any, ) -> bool: """Hook to do something before each optimizer step.""" - result = lambda_closure() # DeepSpeed does not support closures + result = lambda_closure() if lambda_closure is not None else None # DeepSpeed does not support closures super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) # in manual optimization, the closure does not return a value - if model.automatic_optimization and result is None: + if isinstance(model, pl.LightningModule) and model.automatic_optimization and result is None: raise MisconfigurationException( "Skipping backward by returning `None` from your `training_step` is not supported by `DeepSpeed`" ) # the following should be in a `optimizer_step` hook but we don't have one in the precision plugin. - deepspeed_engine = model.trainer.model - deepspeed_engine.step() - return False + if isinstance(model, pl.LightningModule): + deepspeed_engine = model.trainer.model + deepspeed_engine.step() + return False + return True def backward(self, model: "pl.LightningModule", closure_loss: Tensor, *args: Any, **kwargs: Any) -> None: if is_overridden("backward", model): diff --git a/pytorch_lightning/plugins/precision/native_amp.py b/pytorch_lightning/plugins/precision/native_amp.py index e298569996274..08b8080715d84 100644 --- a/pytorch_lightning/plugins/precision/native_amp.py +++ b/pytorch_lightning/plugins/precision/native_amp.py @@ -77,7 +77,7 @@ def _run_backward(self, tensor: Tensor, model: Module, *args: Any, **kwargs: Any def pre_optimizer_step( self, - model: "pl.LightningModule", + model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, @@ -90,12 +90,12 @@ def pre_optimizer_step( raise MisconfigurationException( f"Native AMP and the LBFGS optimizer are not compatible (optimizer {optimizer_idx})." ) - result = lambda_closure() # native amp does not support closures + result = lambda_closure() if lambda_closure is not None else None # native amp does not support closures self.scaler.unscale_(optimizer) super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) skipped_backward = result is None # in manual optimization, the closure does not return a value - if not model.automatic_optimization or not skipped_backward: + if not isinstance(model, pl.LightningModule) or not model.automatic_optimization or not skipped_backward: # note: the scaler will skip the `optimizer.step` if nonfinite gradients are found self.scaler.step(optimizer) self.scaler.update() diff --git a/pytorch_lightning/plugins/precision/precision_plugin.py b/pytorch_lightning/plugins/precision/precision_plugin.py index 9ec127886396c..dc378e9cb195c 100644 --- a/pytorch_lightning/plugins/precision/precision_plugin.py +++ b/pytorch_lightning/plugins/precision/precision_plugin.py @@ -99,14 +99,15 @@ def _run_backward(self, tensor: Tensor, model: Module, *args: Any, **kwargs: Any def pre_optimizer_step( self, - model: "pl.LightningModule", + model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs: Any, ) -> bool: """Hook to do something before each optimizer step.""" - model.trainer.call_hook("on_before_optimizer_step", optimizer, optimizer_idx) + if isinstance(model, pl.LightningModule): + model.trainer.call_hook("on_before_optimizer_step", optimizer, optimizer_idx) return True def clip_gradients( diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py index 55e62aade809d..30e36a245eff1 100644 --- a/pytorch_lightning/plugins/training_type/tpu_spawn.py +++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py @@ -268,6 +268,9 @@ def get_mp_spawn_kwargs(self, trainer: "pl.Trainer") -> dict: "start_method": self.start_method, } + def optimizer_step(self, optimizer: Optimizer, lambda_closure: Callable, **kwargs): + xm.optimizer_step(optimizer, barrier=False, optimizer_args={"closure": lambda_closure, **kwargs}) + def start_training(self, trainer: "pl.Trainer") -> None: # todo: precision pluging is call in accelerator setup and should be moved if "XLA_USE_BF16" in os.environ: From b10017ecd8f600fafee85ea6d56a1e7b64133f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 19 Oct 2021 17:39:01 +0200 Subject: [PATCH 02/26] changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8668f8ae50b0b..205ceba395775 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -202,6 +202,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - LightningLite: * Added `PrecisionPlugin.forward_context`, making it the default implementation for all `{train,val,test,predict}_step_context()` methods ([#9988](https://github.com/PyTorchLightning/pytorch-lightning/pull/9988)) * Added `DDPSpawnPlugin.spawn()` for spawning new processes of a given function ([#10018](https://github.com/PyTorchLightning/pytorch-lightning/pull/10018)) + * Added optional `model` argument to the `optimizer_step` methods in accelerators and plugins ([#10023](https://github.com/PyTorchLightning/pytorch-lightning/pull/10023)) + ### Changed From 840161192a5c75aaa2bcb53a0147c0bf24c579b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 19 Oct 2021 17:39:33 +0200 Subject: [PATCH 03/26] remove --- pytorch_lightning/plugins/training_type/tpu_spawn.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py index 30e36a245eff1..55e62aade809d 100644 --- a/pytorch_lightning/plugins/training_type/tpu_spawn.py +++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py @@ -268,9 +268,6 @@ def get_mp_spawn_kwargs(self, trainer: "pl.Trainer") -> dict: "start_method": self.start_method, } - def optimizer_step(self, optimizer: Optimizer, lambda_closure: Callable, **kwargs): - xm.optimizer_step(optimizer, barrier=False, optimizer_args={"closure": lambda_closure, **kwargs}) - def start_training(self, trainer: "pl.Trainer") -> None: # todo: precision pluging is call in accelerator setup and should be moved if "XLA_USE_BF16" in os.environ: From 3ba3bcffd62ec92f307f460b2811cbb469bf4373 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 19 Oct 2021 17:44:43 +0200 Subject: [PATCH 04/26] update tpu --- pytorch_lightning/accelerators/tpu.py | 3 --- pytorch_lightning/plugins/training_type/tpu_spawn.py | 6 +++++- .../plugins/training_type/training_type_plugin.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/accelerators/tpu.py b/pytorch_lightning/accelerators/tpu.py index bd47fd11a5591..0628edf340326 100644 --- a/pytorch_lightning/accelerators/tpu.py +++ b/pytorch_lightning/accelerators/tpu.py @@ -49,9 +49,6 @@ def setup(self, trainer: "pl.Trainer") -> None: raise MisconfigurationException("TPUs only support a single tpu core or tpu spawn training.") return super().setup(trainer) - def run_optimizer_step(self, optimizer: Optimizer, lambda_closure: Callable, **kwargs: Any) -> None: - xm.optimizer_step(optimizer, optimizer_args={"closure": lambda_closure, **kwargs}) - def _move_optimizer_state(self, device: Optional[torch.device] = None) -> None: """Moves the state of the optimizers to the TPU if needed.""" # TODO: `self.root_device` would raise error if called outside the spawn process diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py index 55e62aade809d..ff2f34963620c 100644 --- a/pytorch_lightning/plugins/training_type/tpu_spawn.py +++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py @@ -15,11 +15,12 @@ import os import re import time -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union, Callable import torch import torch.multiprocessing as mp from torch.nn import Module +from torch.optim import Optimizer from torch.utils.data import DataLoader import pytorch_lightning as pl @@ -188,6 +189,9 @@ def new_process(self, process_idx: int, trainer, mp_queue) -> None: # ensure that spawned processes go through teardown before joining trainer._call_teardown_hook() + def optimizer_step(self, optimizer: Optimizer, lambda_closure: Callable, **kwargs) -> None: + xm.optimizer_step(optimizer, optimizer_args={"closure": lambda_closure, **kwargs}) + def model_to_device(self) -> None: self.model = self.wrapped_model.to(self.root_device) diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index 9c53069063a52..5648fecb4038a 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -225,7 +225,7 @@ def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[I def init_optimizers(self, trainer: "pl.Trainer", model: "pl.LightningModule"): return trainer.init_optimizers(model) - def optimizer_step(self, optimizer: torch.optim.Optimizer, lambda_closure: Callable, **kwargs): + def optimizer_step(self, optimizer: torch.optim.Optimizer, lambda_closure: Callable, **kwargs) -> None: optimizer.step(closure=lambda_closure, **kwargs) @property From 295137ee7bc097648862c61b2823930db90f67b8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Oct 2021 15:46:07 +0000 Subject: [PATCH 05/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytorch_lightning/plugins/training_type/tpu_spawn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py index ff2f34963620c..553a927f16e30 100644 --- a/pytorch_lightning/plugins/training_type/tpu_spawn.py +++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py @@ -15,7 +15,7 @@ import os import re import time -from typing import Any, Dict, List, Optional, Union, Callable +from typing import Any, Callable, Dict, List, Optional, Union import torch import torch.multiprocessing as mp From 98b27a48968f31f83cefa5c0c60f82eb443d092c Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 19 Oct 2021 22:27:01 +0200 Subject: [PATCH 06/26] Isolate optimizer step logic to the `PrecisionPlugin` --- pytorch_lightning/accelerators/accelerator.py | 7 +--- pytorch_lightning/accelerators/ipu.py | 7 +--- pytorch_lightning/accelerators/tpu.py | 8 +--- .../plugins/precision/apex_amp.py | 10 ++++- .../plugins/precision/deepspeed_precision.py | 39 ++++++++++-------- .../plugins/precision/ipu_precision.py | 41 +++++++++++++++---- pytorch_lightning/plugins/precision/tpu.py | 29 ++++++++++++- .../plugins/training_type/deepspeed.py | 7 +--- 8 files changed, 94 insertions(+), 54 deletions(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 541cf5de3be2b..f23e01c5fdd10 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -326,12 +326,7 @@ def optimizer_step(self, optimizer: Optimizer, opt_idx: int, lambda_closure: Cal self.lightning_module, optimizer, opt_idx, lambda_closure, **kwargs ) if make_optimizer_step: - self.run_optimizer_step(optimizer, opt_idx, lambda_closure, **kwargs) - - def run_optimizer_step( - self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs: Any - ) -> None: - self.training_type_plugin.optimizer_step(optimizer, lambda_closure=lambda_closure, **kwargs) + self.training_type_plugin.optimizer_step(optimizer, lambda_closure=lambda_closure, **kwargs) def optimizer_zero_grad(self, current_epoch: int, batch_idx: int, optimizer: Optimizer, opt_idx: int) -> None: """Zeros all model parameter's gradients.""" diff --git a/pytorch_lightning/accelerators/ipu.py b/pytorch_lightning/accelerators/ipu.py index fbd23b5f2a217..ef38c9ed0bb16 100644 --- a/pytorch_lightning/accelerators/ipu.py +++ b/pytorch_lightning/accelerators/ipu.py @@ -11,10 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Dict, Union +from typing import Any, Dict, Union import torch -from torch.optim import Optimizer import pytorch_lightning as pl from pytorch_lightning.accelerators.accelerator import Accelerator @@ -35,10 +34,6 @@ def setup_optimizers(self, trainer: "pl.Trainer") -> None: if len(self.optimizers) > 1: raise MisconfigurationException("IPUs currently only support one optimizer.") - def optimizer_step(self, optimizer: Optimizer, opt_idx: int, lambda_closure: Callable, **kwargs: Any) -> None: - # Optimizer step is handled by the IPU accelerator. - lambda_closure() - def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]: """IPU device stats aren't supported yet.""" return {} diff --git a/pytorch_lightning/accelerators/tpu.py b/pytorch_lightning/accelerators/tpu.py index b85a92794e316..7c7680c32f8c9 100644 --- a/pytorch_lightning/accelerators/tpu.py +++ b/pytorch_lightning/accelerators/tpu.py @@ -11,10 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Dict, Optional, Union import torch -from torch.optim import Optimizer import pytorch_lightning as pl from pytorch_lightning.accelerators.accelerator import Accelerator @@ -49,11 +48,6 @@ def setup(self, trainer: "pl.Trainer") -> None: ) return super().setup(trainer) - def run_optimizer_step( - self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs: Any - ) -> None: - xm.optimizer_step(optimizer, optimizer_args={"closure": lambda_closure, **kwargs}) - def _move_optimizer_state(self, device: Optional[torch.device] = None) -> None: """Moves the state of the optimizers to the TPU if needed.""" # TODO: `self.root_device` would raise error if called outside the spawn process diff --git a/pytorch_lightning/plugins/precision/apex_amp.py b/pytorch_lightning/plugins/precision/apex_amp.py index 27fa856d0f4b9..3af949e649681 100644 --- a/pytorch_lightning/plugins/precision/apex_amp.py +++ b/pytorch_lightning/plugins/precision/apex_amp.py @@ -15,11 +15,12 @@ import torch from torch import Tensor -from torch.optim import Optimizer +from torch.optim import LBFGS, Optimizer import pytorch_lightning as pl from pytorch_lightning.plugins.precision.mixed import MixedPrecisionPlugin from pytorch_lightning.utilities import _APEX_AVAILABLE, AMPType +from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.types import _PARAMETERS if _APEX_AVAILABLE: @@ -29,9 +30,10 @@ class ApexMixedPrecisionPlugin(MixedPrecisionPlugin): """Mixed Precision Plugin based on Nvidia/Apex (https://github.com/NVIDIA/apex)""" + backend = AMPType.APEX + def __init__(self, amp_level: str = "O2") -> None: super().__init__() - self.backend = AMPType.APEX self.amp_level = amp_level self._connected = False @@ -97,6 +99,10 @@ def pre_optimizer_step( **kwargs: Any, ) -> bool: """Hook to do something before each optimizer step.""" + if isinstance(optimizer, LBFGS): + raise MisconfigurationException( + f"apex AMP and the LBFGS optimizer are not compatible (optimizer {optimizer_idx})." + ) result = lambda_closure() # APEX amp does not support closures super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) skipped_backward = result is None diff --git a/pytorch_lightning/plugins/precision/deepspeed_precision.py b/pytorch_lightning/plugins/precision/deepspeed_precision.py index 6954adcbef164..8af0d069a508c 100644 --- a/pytorch_lightning/plugins/precision/deepspeed_precision.py +++ b/pytorch_lightning/plugins/precision/deepspeed_precision.py @@ -15,7 +15,7 @@ from torch import Tensor from torch.nn import Module -from torch.optim import Optimizer +from torch.optim import LBFGS, Optimizer import pytorch_lightning as pl from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin @@ -34,6 +34,18 @@ def __init__(self, precision: int) -> None: super().__init__() self.precision = precision + def backward(self, model: "pl.LightningModule", closure_loss: Tensor, *args: Any, **kwargs: Any) -> None: + if is_overridden("backward", model): + warning_cache.warn( + "You have overridden the `LightningModule.backward` hook but it will be ignored since DeepSpeed handles" + " the backward logic internally." + ) + deepspeed_engine = model.trainer.model + deepspeed_engine.backward(closure_loss, *args, **kwargs) + + def _run_backward(self, tensor: Tensor, model: Module, *args: Any, **kwargs: Any) -> None: + model.backward(tensor, *args, **kwargs) + def pre_optimizer_step( self, model: "pl.LightningModule", @@ -43,31 +55,23 @@ def pre_optimizer_step( **kwargs: Any, ) -> bool: """Hook to do something before each optimizer step.""" + if isinstance(optimizer, LBFGS): + raise MisconfigurationException( + f"apex AMP and the LBFGS optimizer are not compatible (optimizer {optimizer_idx})." + ) result = lambda_closure() # DeepSpeed does not support closures super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) + skipped_backward = result is None # in manual optimization, the closure does not return a value - if model.automatic_optimization and result is None: + if model.automatic_optimization and skipped_backward: raise MisconfigurationException( "Skipping backward by returning `None` from your `training_step` is not supported by `DeepSpeed`" ) - # the following should be in a `optimizer_step` hook but we don't have one in the precision plugin. + # DeepSpeed handles the optimizer step internally deepspeed_engine = model.trainer.model deepspeed_engine.step() return False - def backward(self, model: "pl.LightningModule", closure_loss: Tensor, *args: Any, **kwargs: Any) -> None: - if is_overridden("backward", model): - warning_cache.warn( - "You have overridden the `LightningModule.backward` hook but it will be ignored since DeepSpeed handles" - " the backward logic internally." - ) - # todo: hack around for deepspeed engine to call backward - deepspeed_engine = model.trainer.model - deepspeed_engine.backward(closure_loss, *args, **kwargs) - - def _run_backward(self, tensor: Tensor, model: Module, *args: Any, **kwargs: Any) -> None: - model.backward(tensor, *args, **kwargs) - def clip_gradients( self, optimizer: Optimizer, @@ -75,5 +79,4 @@ def clip_gradients( gradient_clip_algorithm: GradClipAlgorithmType = GradClipAlgorithmType.NORM, model: Optional[Module] = None, ) -> None: - """DeepSpeed handles clipping gradients internally via the training type plugin.""" - pass + """DeepSpeed handles gradient clipping internally.""" diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py index d950dfe7cc553..89f272f7e55c1 100644 --- a/pytorch_lightning/plugins/precision/ipu_precision.py +++ b/pytorch_lightning/plugins/precision/ipu_precision.py @@ -11,10 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Optional, Union +from typing import Any, Callable, Optional, Union from torch.nn import Module -from torch.optim import Optimizer +from torch.optim import LBFGS, Optimizer import pytorch_lightning as pl from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin @@ -27,8 +27,10 @@ class IPUPrecisionPlugin(PrecisionPlugin): - def __init__(self, precision: int) -> None: + def __init__(self, precision: Union[int, str]) -> None: super().__init__() + if precision == "bf16": + raise MisconfigurationException("`Trainer(accelerator='ipu', precision='bf16')` is not supported.") self.precision = precision def backward(self, model: "pl.LightningModule", *args: Any, **kwargs: Any) -> None: @@ -38,6 +40,33 @@ def backward(self, model: "pl.LightningModule", *args: Any, **kwargs: Any) -> No " the backward logic internally." ) + def pre_optimizer_step( + self, + model: "pl.LightningModule", + optimizer: Optimizer, + optimizer_idx: int, + lambda_closure: Callable[[], Any], + **kwargs: Any, + ) -> bool: + """IPUs handle the optimizer step internally.""" + super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) + if isinstance(optimizer, LBFGS): + # IPU does not support closures + raise MisconfigurationException( + f"IPUs and the LBFGS optimizer are not compatible (optimizer {optimizer_idx})." + ) + closure_result = lambda_closure() + skipped_backward = closure_result is None + # in manual optimization, the closure does not return a value + if model.automatic_optimization and skipped_backward: + # we lack coverage here and IPUs are (currently) limited - something to explore if there's demand + raise MisconfigurationException( + "Skipping backward by returning `None` from your `training_step` is not implemented for IPUs." + " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`" + " requesting this feature." + ) + return False + def clip_gradients( self, optimizer: Optimizer, @@ -46,11 +75,7 @@ def clip_gradients( model: Optional[Module] = None, ) -> None: """Clips the gradients.""" - if clip_val is None: - return - - clip_val = float(clip_val) - if clip_val <= 0: + if clip_val is None or float(clip_val) <= 0: return raise MisconfigurationException("IPUs currently do not support clipping gradients.") diff --git a/pytorch_lightning/plugins/precision/tpu.py b/pytorch_lightning/plugins/precision/tpu.py index 6df9404d82307..e846e316caea4 100644 --- a/pytorch_lightning/plugins/precision/tpu.py +++ b/pytorch_lightning/plugins/precision/tpu.py @@ -11,8 +11,35 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from collections import Callable +from typing import Any + +from torch.optim import Optimizer + +import pytorch_lightning as pl from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin +from pytorch_lightning.utilities import _XLA_AVAILABLE +from pytorch_lightning.utilities.exceptions import MisconfigurationException + +if _XLA_AVAILABLE: + import torch_xla.core.xla_model as xm class TPUPrecisionPlugin(PrecisionPlugin): - ... + def pre_optimizer_step( + self, + model: "pl.LightningModule", + optimizer: Optimizer, + optimizer_idx: int, + lambda_closure: Callable[[], Any], + **kwargs: Any, + ) -> bool: + super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) + closure_result = xm.optimizer_step(optimizer, optimizer_args={"closure": lambda_closure, **kwargs}) + skipped_backward = closure_result is None + # in manual optimization, the closure does not return a value + if model.automatic_optimization and skipped_backward: + raise MisconfigurationException( + "Skipping backward by returning `None` from your `training_step` is not supported by TPUs" + ) + return False diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index 019fd41d5d1cc..97b49347c4f5c 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -19,7 +19,7 @@ import platform from collections import OrderedDict from pathlib import Path -from typing import Any, Callable, Dict, Generator, List, Mapping, Optional, Tuple, Union +from typing import Any, Dict, Generator, List, Mapping, Optional, Tuple, Union import torch from torch.nn import Module @@ -583,11 +583,6 @@ def init_optimizers(self, trainer: "pl.Trainer", model: "pl.LightningModule") -> # via `_initialize_deepspeed_train` return [], [], [] # empty optimizers, schedulers and frequencies - def optimizer_step(self, optimizer: torch.optim.Optimizer, lambda_closure: Callable, **kwargs): - # note: We rely on the deepspeed engine to carry out the step rather than the optimizer. - # internally, the engine has a reference to the optimizer already. - self.model.step(**kwargs) - @property def handles_gradient_accumulation(self) -> bool: """Whether the plugin handles gradient accumulation internally.""" From 4391425033bddab9362e89f8ba39875ecc03a38a Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 19 Oct 2021 22:33:58 +0200 Subject: [PATCH 07/26] Update --- pytorch_lightning/plugins/precision/deepspeed_precision.py | 2 +- pytorch_lightning/plugins/precision/ipu_precision.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pytorch_lightning/plugins/precision/deepspeed_precision.py b/pytorch_lightning/plugins/precision/deepspeed_precision.py index 8af0d069a508c..ff33ccc690ef2 100644 --- a/pytorch_lightning/plugins/precision/deepspeed_precision.py +++ b/pytorch_lightning/plugins/precision/deepspeed_precision.py @@ -57,7 +57,7 @@ def pre_optimizer_step( """Hook to do something before each optimizer step.""" if isinstance(optimizer, LBFGS): raise MisconfigurationException( - f"apex AMP and the LBFGS optimizer are not compatible (optimizer {optimizer_idx})." + f"DeepSpeed and the LBFGS optimizer are not compatible (optimizer {optimizer_idx})." ) result = lambda_closure() # DeepSpeed does not support closures super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py index 89f272f7e55c1..76c12ff90990e 100644 --- a/pytorch_lightning/plugins/precision/ipu_precision.py +++ b/pytorch_lightning/plugins/precision/ipu_precision.py @@ -51,7 +51,6 @@ def pre_optimizer_step( """IPUs handle the optimizer step internally.""" super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) if isinstance(optimizer, LBFGS): - # IPU does not support closures raise MisconfigurationException( f"IPUs and the LBFGS optimizer are not compatible (optimizer {optimizer_idx})." ) From cb258ab588dbcc52ff5a2c291a01fe1542046639 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 19 Oct 2021 22:34:50 +0200 Subject: [PATCH 08/26] Docs --- pytorch_lightning/core/lightning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index ca4b2af7eee17..441fa1f5a0116 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -646,7 +646,7 @@ def training_step(self, *args, **kwargs) -> STEP_OUTPUT: - :class:`~torch.Tensor` - The loss tensor - ``dict`` - A dictionary. Can include any keys, but must include the key ``'loss'`` - ``None`` - Training will skip to the next batch. This is only for automatic optimization. - This is not supported for multi-GPU or TPU, or using ``DeepSpeed``. + This is not supported for multi-GPU, TPU, IPU, or using DeepSpeed. In this step you'd normally do the forward pass and calculate the loss for a batch. You can also do fancier things like multiple forward passes or something model specific. From 6fff125436591ba541fd66c761035ba9d4f896fc Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 19 Oct 2021 22:38:55 +0200 Subject: [PATCH 09/26] Docs --- docs/source/common/optimizers.rst | 3 +++ pytorch_lightning/core/lightning.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/source/common/optimizers.rst b/docs/source/common/optimizers.rst index 0405b9a4365af..8080d12e2b6fe 100644 --- a/docs/source/common/optimizers.rst +++ b/docs/source/common/optimizers.rst @@ -284,6 +284,9 @@ Here is an example using a closure function. opt.step(closure=closure) +.. warning:: + The :class:`torch.optim.LBFGS` optimizer is not supported for apex AMP, native AMP, IPUs, or DeepSpeed. + ------ Access your own optimizer [manual] diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 441fa1f5a0116..76ea7f4c5acf1 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -646,7 +646,7 @@ def training_step(self, *args, **kwargs) -> STEP_OUTPUT: - :class:`~torch.Tensor` - The loss tensor - ``dict`` - A dictionary. Can include any keys, but must include the key ``'loss'`` - ``None`` - Training will skip to the next batch. This is only for automatic optimization. - This is not supported for multi-GPU, TPU, IPU, or using DeepSpeed. + This is not supported for multi-GPU, TPU, IPU, or DeepSpeed. In this step you'd normally do the forward pass and calculate the loss for a batch. You can also do fancier things like multiple forward passes or something model specific. From 84ee13eb5072bea6f26fa0b3a1b2581e9c3d0547 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 19 Oct 2021 22:56:07 +0200 Subject: [PATCH 10/26] Add test --- .../plugins/precision/ipu_precision.py | 4 ++-- pytorch_lightning/plugins/precision/tpu.py | 3 +-- pytorch_lightning/plugins/training_type/ipu.py | 4 ++-- tests/accelerators/test_accelerator_connector.py | 14 ++++++++++++++ 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py index 76c12ff90990e..e2df2da66075e 100644 --- a/pytorch_lightning/plugins/precision/ipu_precision.py +++ b/pytorch_lightning/plugins/precision/ipu_precision.py @@ -29,8 +29,8 @@ class IPUPrecisionPlugin(PrecisionPlugin): def __init__(self, precision: Union[int, str]) -> None: super().__init__() - if precision == "bf16": - raise MisconfigurationException("`Trainer(accelerator='ipu', precision='bf16')` is not supported.") + if precision in ("bf16", 64): + raise MisconfigurationException(f"`Trainer(accelerator='ipu', precision={precision!r})` is not supported.") self.precision = precision def backward(self, model: "pl.LightningModule", *args: Any, **kwargs: Any) -> None: diff --git a/pytorch_lightning/plugins/precision/tpu.py b/pytorch_lightning/plugins/precision/tpu.py index e846e316caea4..480e1c5f06c53 100644 --- a/pytorch_lightning/plugins/precision/tpu.py +++ b/pytorch_lightning/plugins/precision/tpu.py @@ -11,8 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from collections import Callable -from typing import Any +from typing import Any, Callable from torch.optim import Optimizer diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py index b6728b0551081..3a74d3d47a368 100644 --- a/pytorch_lightning/plugins/training_type/ipu.py +++ b/pytorch_lightning/plugins/training_type/ipu.py @@ -24,7 +24,7 @@ from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin from pytorch_lightning.trainer.states import RunningStage -from pytorch_lightning.utilities import _POPTORCH_AVAILABLE +from pytorch_lightning.utilities import _IPU_AVAILABLE, _POPTORCH_AVAILABLE from pytorch_lightning.utilities.apply_func import apply_to_collection from pytorch_lightning.utilities.cloud_io import get_filesystem from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -85,7 +85,7 @@ def __init__( cluster_environment=cluster_environment, checkpoint_io=checkpoint_io, ) - if not _POPTORCH_AVAILABLE or not poptorch.ipuHardwareIsAvailable(): + if not _IPU_AVAILABLE: raise MisconfigurationException( "The IPU Accelerator requires IPU devices to run. " "Learn more or get started with IPUs at https://www.graphcore.ai/getstarted" diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 64239399fdfe7..8ddfcf26d0f20 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -986,3 +986,17 @@ def test_unsupported_tpu_choice(monkeypatch): monkeypatch.setattr(AcceleratorConnector, "has_tpu", True) with pytest.raises(MisconfigurationException, match=r"accelerator='tpu', precision=64\)` is not implemented"): Trainer(accelerator="tpu", precision=64) + + +def test_unsupported_ipu_choice(monkeypatch): + import pytorch_lightning.plugins.training_type.ipu as ipu + import pytorch_lightning.utilities.imports as imports + from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector + + monkeypatch.setattr(imports, "_IPU_AVAILABLE", True) + monkeypatch.setattr(ipu, "_IPU_AVAILABLE", True) + monkeypatch.setattr(AcceleratorConnector, "has_ipu", True) + with pytest.raises(MisconfigurationException, match=r"accelerator='ipu', precision='bf16'\)` is not supported"): + Trainer(accelerator="ipu", precision="bf16") + with pytest.raises(MisconfigurationException, match=r"accelerator='ipu', precision=64\)` is not supported"): + Trainer(accelerator="ipu", precision=64) From 50e6e1bfa7ea3c8ee368c132700440b6f094cd6c Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 19 Oct 2021 23:08:19 +0200 Subject: [PATCH 11/26] Undo changes --- .../plugins/precision/ipu_precision.py | 4 +--- pytorch_lightning/plugins/training_type/ipu.py | 4 ++-- tests/accelerators/test_accelerator_connector.py | 14 -------------- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py index e2df2da66075e..f8dd77dcefcbe 100644 --- a/pytorch_lightning/plugins/precision/ipu_precision.py +++ b/pytorch_lightning/plugins/precision/ipu_precision.py @@ -27,10 +27,8 @@ class IPUPrecisionPlugin(PrecisionPlugin): - def __init__(self, precision: Union[int, str]) -> None: + def __init__(self, precision: int) -> None: super().__init__() - if precision in ("bf16", 64): - raise MisconfigurationException(f"`Trainer(accelerator='ipu', precision={precision!r})` is not supported.") self.precision = precision def backward(self, model: "pl.LightningModule", *args: Any, **kwargs: Any) -> None: diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py index 3a74d3d47a368..b6728b0551081 100644 --- a/pytorch_lightning/plugins/training_type/ipu.py +++ b/pytorch_lightning/plugins/training_type/ipu.py @@ -24,7 +24,7 @@ from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin from pytorch_lightning.trainer.states import RunningStage -from pytorch_lightning.utilities import _IPU_AVAILABLE, _POPTORCH_AVAILABLE +from pytorch_lightning.utilities import _POPTORCH_AVAILABLE from pytorch_lightning.utilities.apply_func import apply_to_collection from pytorch_lightning.utilities.cloud_io import get_filesystem from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -85,7 +85,7 @@ def __init__( cluster_environment=cluster_environment, checkpoint_io=checkpoint_io, ) - if not _IPU_AVAILABLE: + if not _POPTORCH_AVAILABLE or not poptorch.ipuHardwareIsAvailable(): raise MisconfigurationException( "The IPU Accelerator requires IPU devices to run. " "Learn more or get started with IPUs at https://www.graphcore.ai/getstarted" diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 8ddfcf26d0f20..64239399fdfe7 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -986,17 +986,3 @@ def test_unsupported_tpu_choice(monkeypatch): monkeypatch.setattr(AcceleratorConnector, "has_tpu", True) with pytest.raises(MisconfigurationException, match=r"accelerator='tpu', precision=64\)` is not implemented"): Trainer(accelerator="tpu", precision=64) - - -def test_unsupported_ipu_choice(monkeypatch): - import pytorch_lightning.plugins.training_type.ipu as ipu - import pytorch_lightning.utilities.imports as imports - from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector - - monkeypatch.setattr(imports, "_IPU_AVAILABLE", True) - monkeypatch.setattr(ipu, "_IPU_AVAILABLE", True) - monkeypatch.setattr(AcceleratorConnector, "has_ipu", True) - with pytest.raises(MisconfigurationException, match=r"accelerator='ipu', precision='bf16'\)` is not supported"): - Trainer(accelerator="ipu", precision="bf16") - with pytest.raises(MisconfigurationException, match=r"accelerator='ipu', precision=64\)` is not supported"): - Trainer(accelerator="ipu", precision=64) From 4a5d3360f76ebd16ea3539c68d5c9151100a777d Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Wed, 20 Oct 2021 02:31:04 +0200 Subject: [PATCH 12/26] Update error --- pytorch_lightning/plugins/precision/tpu.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/plugins/precision/tpu.py b/pytorch_lightning/plugins/precision/tpu.py index 480e1c5f06c53..dc4c7c856cbc2 100644 --- a/pytorch_lightning/plugins/precision/tpu.py +++ b/pytorch_lightning/plugins/precision/tpu.py @@ -38,7 +38,10 @@ def pre_optimizer_step( skipped_backward = closure_result is None # in manual optimization, the closure does not return a value if model.automatic_optimization and skipped_backward: + # we lack coverage here so disable this - something to explore if there's demand raise MisconfigurationException( - "Skipping backward by returning `None` from your `training_step` is not supported by TPUs" + "Skipping backward by returning `None` from your `training_step` is not implemented for TPUs." + " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`" + " requesting this feature." ) return False From 877823da91e174231348eac47c3bb9f3786ae3b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 05:18:51 +0200 Subject: [PATCH 13/26] revert removal of opt_idx --- pytorch_lightning/accelerators/accelerator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 7e86f9f700986..753bde4b14452 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -335,9 +335,11 @@ def optimizer_step( model, optimizer, opt_idx, lambda_closure, **kwargs ) if make_optimizer_step: - self.run_optimizer_step(optimizer, lambda_closure, **kwargs) + self.run_optimizer_step(optimizer, opt_idx, lambda_closure, **kwargs) - def run_optimizer_step(self, optimizer: Optimizer, lambda_closure: Callable, **kwargs: Any) -> None: + def run_optimizer_step( + self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs: Any + ) -> None: self.training_type_plugin.optimizer_step(optimizer, lambda_closure=lambda_closure, **kwargs) def optimizer_zero_grad(self, current_epoch: int, batch_idx: int, optimizer: Optimizer, opt_idx: int) -> None: From f2801b2cc5c016d8259f1733de89887e414f741e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 05:42:52 +0200 Subject: [PATCH 14/26] remove unused arguments --- pytorch_lightning/accelerators/tpu.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pytorch_lightning/accelerators/tpu.py b/pytorch_lightning/accelerators/tpu.py index 3be6d12461c39..7c7680c32f8c9 100644 --- a/pytorch_lightning/accelerators/tpu.py +++ b/pytorch_lightning/accelerators/tpu.py @@ -11,10 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Dict, Optional, Union import torch -from torch.optim import Optimizer import pytorch_lightning as pl from pytorch_lightning.accelerators.accelerator import Accelerator From d38e98a9d9d6dcfb87ed6ad42e3a3eff4a0ec977 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 05:57:16 +0200 Subject: [PATCH 15/26] update all type hints --- pytorch_lightning/accelerators/accelerator.py | 8 ++++---- pytorch_lightning/plugins/precision/apex_amp.py | 11 ++++++----- .../plugins/precision/deepspeed_precision.py | 4 ++-- pytorch_lightning/plugins/precision/native_amp.py | 4 ++-- .../plugins/precision/precision_plugin.py | 2 +- pytorch_lightning/plugins/training_type/deepspeed.py | 4 +++- pytorch_lightning/plugins/training_type/tpu_spawn.py | 2 +- .../plugins/training_type/training_type_plugin.py | 4 +++- 8 files changed, 22 insertions(+), 17 deletions(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 753bde4b14452..80428967ab075 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -317,9 +317,9 @@ def backward(self, closure_loss: Tensor, *args: Any, **kwargs: Any) -> Tensor: def optimizer_step( self, optimizer: Optimizer, - opt_idx: int = 0, - lambda_closure: Optional[Callable] = None, - model: Optional[Union[Module, "pl.LightningModule"]] = None, + opt_idx: int, + lambda_closure: Optional[Callable], + model: Optional[Union["pl.LightningModule", Module]] = None, **kwargs: Any ) -> None: """performs the actual optimizer step. @@ -338,7 +338,7 @@ def optimizer_step( self.run_optimizer_step(optimizer, opt_idx, lambda_closure, **kwargs) def run_optimizer_step( - self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs: Any + self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Optional[Callable] = None, **kwargs: Any ) -> None: self.training_type_plugin.optimizer_step(optimizer, lambda_closure=lambda_closure, **kwargs) diff --git a/pytorch_lightning/plugins/precision/apex_amp.py b/pytorch_lightning/plugins/precision/apex_amp.py index 27fa856d0f4b9..0927b39632bcc 100644 --- a/pytorch_lightning/plugins/precision/apex_amp.py +++ b/pytorch_lightning/plugins/precision/apex_amp.py @@ -11,10 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Dict, Optional, Sequence +from typing import Any, Callable, Dict, Optional, Sequence, Union import torch from torch import Tensor +from torch.nn import Module from torch.optim import Optimizer import pytorch_lightning as pl @@ -90,18 +91,18 @@ def reinit_scheduler_properties(optimizers: Sequence[Optimizer], schedulers: Seq def pre_optimizer_step( self, - model: "pl.LightningModule", + model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, - lambda_closure: Callable, + lambda_closure: Optional[Callable], **kwargs: Any, ) -> bool: """Hook to do something before each optimizer step.""" - result = lambda_closure() # APEX amp does not support closures + result = lambda_closure() if lambda_closure is not None else None # APEX amp does not support closures super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) skipped_backward = result is None # in manual optimization, the closure does not return a value - if not model.automatic_optimization or not skipped_backward: + if not isinstance(model, pl.LightningModule) or not model.automatic_optimization or not skipped_backward: # the following should be in a `optimizer_step` hook but we don't have one in the precision plugin. optimizer.step(**kwargs) return False diff --git a/pytorch_lightning/plugins/precision/deepspeed_precision.py b/pytorch_lightning/plugins/precision/deepspeed_precision.py index d6ad215d3e486..dbbd33d1caaae 100644 --- a/pytorch_lightning/plugins/precision/deepspeed_precision.py +++ b/pytorch_lightning/plugins/precision/deepspeed_precision.py @@ -36,10 +36,10 @@ def __init__(self, precision: int) -> None: def pre_optimizer_step( self, - model: Union[Module, "pl.LightningModule"], + model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, - lambda_closure: Callable, + lambda_closure: Optional[Callable], **kwargs: Any, ) -> bool: """Hook to do something before each optimizer step.""" diff --git a/pytorch_lightning/plugins/precision/native_amp.py b/pytorch_lightning/plugins/precision/native_amp.py index 08b8080715d84..d9b18acd33477 100644 --- a/pytorch_lightning/plugins/precision/native_amp.py +++ b/pytorch_lightning/plugins/precision/native_amp.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from contextlib import contextmanager -from typing import Any, Callable, Dict, Generator, Union +from typing import Any, Callable, Dict, Generator, Union, Optional import torch from torch import Tensor @@ -80,7 +80,7 @@ def pre_optimizer_step( model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, - lambda_closure: Callable, + lambda_closure: Optional[Callable], **kwargs: Any, ) -> bool: if self.is_bfloat16: diff --git a/pytorch_lightning/plugins/precision/precision_plugin.py b/pytorch_lightning/plugins/precision/precision_plugin.py index dc378e9cb195c..4bbb8d8ef2155 100644 --- a/pytorch_lightning/plugins/precision/precision_plugin.py +++ b/pytorch_lightning/plugins/precision/precision_plugin.py @@ -102,7 +102,7 @@ def pre_optimizer_step( model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, - lambda_closure: Callable, + lambda_closure: Optional[Callable], **kwargs: Any, ) -> bool: """Hook to do something before each optimizer step.""" diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index 019fd41d5d1cc..c0aa697e74928 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -583,7 +583,9 @@ def init_optimizers(self, trainer: "pl.Trainer", model: "pl.LightningModule") -> # via `_initialize_deepspeed_train` return [], [], [] # empty optimizers, schedulers and frequencies - def optimizer_step(self, optimizer: torch.optim.Optimizer, lambda_closure: Callable, **kwargs): + def optimizer_step( + self, optimizer: torch.optim.Optimizer, lambda_closure: Optional[Callable], **kwargs: Any + ) -> None: # note: We rely on the deepspeed engine to carry out the step rather than the optimizer. # internally, the engine has a reference to the optimizer already. self.model.step(**kwargs) diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py index a4d53457136b6..0409f799a2fb9 100644 --- a/pytorch_lightning/plugins/training_type/tpu_spawn.py +++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py @@ -182,7 +182,7 @@ def new_process(self, trainer: "pl.Trainer", mp_queue: SimpleQueue) -> None: # ensure that spawned processes go through teardown before joining trainer._call_teardown_hook() - def optimizer_step(self, optimizer: Optimizer, lambda_closure: Callable, **kwargs) -> None: + def optimizer_step(self, optimizer: Optimizer, lambda_closure: Optional[Callable], **kwargs: Any) -> None: xm.optimizer_step(optimizer, optimizer_args={"closure": lambda_closure, **kwargs}) def model_to_device(self) -> None: diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index f660a9d3ee964..868ca079adf36 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -249,7 +249,9 @@ def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[I def init_optimizers(self, trainer: "pl.Trainer", model: "pl.LightningModule"): return trainer.init_optimizers(model) - def optimizer_step(self, optimizer: torch.optim.Optimizer, lambda_closure: Callable, **kwargs) -> None: + def optimizer_step( + self, optimizer: torch.optim.Optimizer, lambda_closure: Optional[Callable], **kwargs: Any + ) -> None: optimizer.step(closure=lambda_closure, **kwargs) @property From 18b09bc647dff8e81ae7e1c00db911c17168b144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 06:01:01 +0200 Subject: [PATCH 16/26] mypy --- pytorch_lightning/accelerators/ipu.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/accelerators/ipu.py b/pytorch_lightning/accelerators/ipu.py index fbd23b5f2a217..500e0aab0dec6 100644 --- a/pytorch_lightning/accelerators/ipu.py +++ b/pytorch_lightning/accelerators/ipu.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Dict, Union +from typing import Any, Callable, Dict, Union, Optional import torch from torch.optim import Optimizer @@ -35,7 +35,9 @@ def setup_optimizers(self, trainer: "pl.Trainer") -> None: if len(self.optimizers) > 1: raise MisconfigurationException("IPUs currently only support one optimizer.") - def optimizer_step(self, optimizer: Optimizer, opt_idx: int, lambda_closure: Callable, **kwargs: Any) -> None: + def optimizer_step( + self, optimizer: Optimizer, opt_idx: int, lambda_closure: Optional[Callable], **kwargs: Any + ) -> None: # Optimizer step is handled by the IPU accelerator. lambda_closure() From c15504f2c9837de21e17cbcc91dc42d9d0b7a6e1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Oct 2021 04:02:35 +0000 Subject: [PATCH 17/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytorch_lightning/accelerators/ipu.py | 2 +- pytorch_lightning/plugins/precision/native_amp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/accelerators/ipu.py b/pytorch_lightning/accelerators/ipu.py index 500e0aab0dec6..4b5fd9449bfce 100644 --- a/pytorch_lightning/accelerators/ipu.py +++ b/pytorch_lightning/accelerators/ipu.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Dict, Union, Optional +from typing import Any, Callable, Dict, Optional, Union import torch from torch.optim import Optimizer diff --git a/pytorch_lightning/plugins/precision/native_amp.py b/pytorch_lightning/plugins/precision/native_amp.py index d9b18acd33477..aeb97d3f7743f 100644 --- a/pytorch_lightning/plugins/precision/native_amp.py +++ b/pytorch_lightning/plugins/precision/native_amp.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from contextlib import contextmanager -from typing import Any, Callable, Dict, Generator, Union, Optional +from typing import Any, Callable, Dict, Generator, Optional, Union import torch from torch import Tensor From 80a7eb5b6cd2fd1ef34a10dab162eda66349a9e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 06:15:11 +0200 Subject: [PATCH 18/26] none checks and signature fix (mypy) --- pytorch_lightning/accelerators/ipu.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/accelerators/ipu.py b/pytorch_lightning/accelerators/ipu.py index 4b5fd9449bfce..69ec24cc172fd 100644 --- a/pytorch_lightning/accelerators/ipu.py +++ b/pytorch_lightning/accelerators/ipu.py @@ -14,6 +14,7 @@ from typing import Any, Callable, Dict, Optional, Union import torch +from torch.nn import Module from torch.optim import Optimizer import pytorch_lightning as pl @@ -36,10 +37,16 @@ def setup_optimizers(self, trainer: "pl.Trainer") -> None: raise MisconfigurationException("IPUs currently only support one optimizer.") def optimizer_step( - self, optimizer: Optimizer, opt_idx: int, lambda_closure: Optional[Callable], **kwargs: Any + self, + optimizer: Optimizer, + opt_idx: int, + lambda_closure: Optional[Callable], + model: Optional[Union["pl.LightningModule", Module]] = None, + **kwargs: Any ) -> None: # Optimizer step is handled by the IPU accelerator. - lambda_closure() + if lambda_closure is not None: + lambda_closure() def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]: """IPU device stats aren't supported yet.""" From e363968a405170047c8be49e2aa728b5546c982a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 15:47:42 +0200 Subject: [PATCH 19/26] updates --- pytorch_lightning/accelerators/accelerator.py | 4 ++-- pytorch_lightning/plugins/precision/apex_amp.py | 4 ++-- pytorch_lightning/plugins/precision/deepspeed_precision.py | 4 ++-- pytorch_lightning/plugins/precision/native_amp.py | 6 +++--- pytorch_lightning/plugins/precision/precision_plugin.py | 2 +- pytorch_lightning/plugins/training_type/deepspeed.py | 4 +--- .../plugins/training_type/training_type_plugin.py | 4 +--- 7 files changed, 12 insertions(+), 16 deletions(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 80428967ab075..4ed91d979bd1a 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -318,7 +318,7 @@ def optimizer_step( self, optimizer: Optimizer, opt_idx: int, - lambda_closure: Optional[Callable], + lambda_closure: Callable, model: Optional[Union["pl.LightningModule", Module]] = None, **kwargs: Any ) -> None: @@ -338,7 +338,7 @@ def optimizer_step( self.run_optimizer_step(optimizer, opt_idx, lambda_closure, **kwargs) def run_optimizer_step( - self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Optional[Callable] = None, **kwargs: Any + self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs: Any ) -> None: self.training_type_plugin.optimizer_step(optimizer, lambda_closure=lambda_closure, **kwargs) diff --git a/pytorch_lightning/plugins/precision/apex_amp.py b/pytorch_lightning/plugins/precision/apex_amp.py index 0927b39632bcc..d915209fb4f11 100644 --- a/pytorch_lightning/plugins/precision/apex_amp.py +++ b/pytorch_lightning/plugins/precision/apex_amp.py @@ -94,11 +94,11 @@ def pre_optimizer_step( model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, - lambda_closure: Optional[Callable], + lambda_closure: Callable, **kwargs: Any, ) -> bool: """Hook to do something before each optimizer step.""" - result = lambda_closure() if lambda_closure is not None else None # APEX amp does not support closures + result = lambda_closure() # APEX amp does not support closures super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) skipped_backward = result is None # in manual optimization, the closure does not return a value diff --git a/pytorch_lightning/plugins/precision/deepspeed_precision.py b/pytorch_lightning/plugins/precision/deepspeed_precision.py index dbbd33d1caaae..32d799fd14465 100644 --- a/pytorch_lightning/plugins/precision/deepspeed_precision.py +++ b/pytorch_lightning/plugins/precision/deepspeed_precision.py @@ -39,11 +39,11 @@ def pre_optimizer_step( model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, - lambda_closure: Optional[Callable], + lambda_closure: Callable, **kwargs: Any, ) -> bool: """Hook to do something before each optimizer step.""" - result = lambda_closure() if lambda_closure is not None else None # DeepSpeed does not support closures + result = lambda_closure() # DeepSpeed does not support closures super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) # in manual optimization, the closure does not return a value if isinstance(model, pl.LightningModule) and model.automatic_optimization and result is None: diff --git a/pytorch_lightning/plugins/precision/native_amp.py b/pytorch_lightning/plugins/precision/native_amp.py index aeb97d3f7743f..74651ce3e4a32 100644 --- a/pytorch_lightning/plugins/precision/native_amp.py +++ b/pytorch_lightning/plugins/precision/native_amp.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from contextlib import contextmanager -from typing import Any, Callable, Dict, Generator, Optional, Union +from typing import Any, Callable, Dict, Generator, Union import torch from torch import Tensor @@ -80,7 +80,7 @@ def pre_optimizer_step( model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, - lambda_closure: Optional[Callable], + lambda_closure: Callable, **kwargs: Any, ) -> bool: if self.is_bfloat16: @@ -90,7 +90,7 @@ def pre_optimizer_step( raise MisconfigurationException( f"Native AMP and the LBFGS optimizer are not compatible (optimizer {optimizer_idx})." ) - result = lambda_closure() if lambda_closure is not None else None # native amp does not support closures + result = lambda_closure() # native amp does not support closures self.scaler.unscale_(optimizer) super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs) skipped_backward = result is None diff --git a/pytorch_lightning/plugins/precision/precision_plugin.py b/pytorch_lightning/plugins/precision/precision_plugin.py index 4bbb8d8ef2155..dc378e9cb195c 100644 --- a/pytorch_lightning/plugins/precision/precision_plugin.py +++ b/pytorch_lightning/plugins/precision/precision_plugin.py @@ -102,7 +102,7 @@ def pre_optimizer_step( model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, - lambda_closure: Optional[Callable], + lambda_closure: Callable, **kwargs: Any, ) -> bool: """Hook to do something before each optimizer step.""" diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index c0aa697e74928..a394e07855090 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -583,9 +583,7 @@ def init_optimizers(self, trainer: "pl.Trainer", model: "pl.LightningModule") -> # via `_initialize_deepspeed_train` return [], [], [] # empty optimizers, schedulers and frequencies - def optimizer_step( - self, optimizer: torch.optim.Optimizer, lambda_closure: Optional[Callable], **kwargs: Any - ) -> None: + def optimizer_step(self, optimizer: torch.optim.Optimizer, lambda_closure: Callable, **kwargs) -> None: # note: We rely on the deepspeed engine to carry out the step rather than the optimizer. # internally, the engine has a reference to the optimizer already. self.model.step(**kwargs) diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index 868ca079adf36..95c74d4a87b70 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -249,9 +249,7 @@ def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[I def init_optimizers(self, trainer: "pl.Trainer", model: "pl.LightningModule"): return trainer.init_optimizers(model) - def optimizer_step( - self, optimizer: torch.optim.Optimizer, lambda_closure: Optional[Callable], **kwargs: Any - ) -> None: + def optimizer_step(self, optimizer: torch.optim.Optimizer, lambda_closure: Callable, **kwargs: Any) -> None: optimizer.step(closure=lambda_closure, **kwargs) @property From c700c126af318c8eff2f081544b4017d397ebd7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 15:48:57 +0200 Subject: [PATCH 20/26] update ipu --- pytorch_lightning/accelerators/ipu.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/pytorch_lightning/accelerators/ipu.py b/pytorch_lightning/accelerators/ipu.py index 69ec24cc172fd..fbd23b5f2a217 100644 --- a/pytorch_lightning/accelerators/ipu.py +++ b/pytorch_lightning/accelerators/ipu.py @@ -11,10 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Dict, Union import torch -from torch.nn import Module from torch.optim import Optimizer import pytorch_lightning as pl @@ -36,17 +35,9 @@ def setup_optimizers(self, trainer: "pl.Trainer") -> None: if len(self.optimizers) > 1: raise MisconfigurationException("IPUs currently only support one optimizer.") - def optimizer_step( - self, - optimizer: Optimizer, - opt_idx: int, - lambda_closure: Optional[Callable], - model: Optional[Union["pl.LightningModule", Module]] = None, - **kwargs: Any - ) -> None: + def optimizer_step(self, optimizer: Optimizer, opt_idx: int, lambda_closure: Callable, **kwargs: Any) -> None: # Optimizer step is handled by the IPU accelerator. - if lambda_closure is not None: - lambda_closure() + lambda_closure() def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]: """IPU device stats aren't supported yet.""" From d0c3fa4cf4b3b1c1f6edfed53c4c5925255e1016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 16:03:46 +0200 Subject: [PATCH 21/26] update carlos --- pytorch_lightning/plugins/precision/ipu_precision.py | 4 ++-- pytorch_lightning/plugins/precision/tpu.py | 7 ++++--- pytorch_lightning/plugins/training_type/tpu_spawn.py | 3 --- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py index f8dd77dcefcbe..c9f5318e1b882 100644 --- a/pytorch_lightning/plugins/precision/ipu_precision.py +++ b/pytorch_lightning/plugins/precision/ipu_precision.py @@ -40,7 +40,7 @@ def backward(self, model: "pl.LightningModule", *args: Any, **kwargs: Any) -> No def pre_optimizer_step( self, - model: "pl.LightningModule", + model: Union["pl.LightningModule", Module],, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable[[], Any], @@ -55,7 +55,7 @@ def pre_optimizer_step( closure_result = lambda_closure() skipped_backward = closure_result is None # in manual optimization, the closure does not return a value - if model.automatic_optimization and skipped_backward: + if isinstance(model, pl.LightningModule) and model.automatic_optimization and skipped_backward: # we lack coverage here and IPUs are (currently) limited - something to explore if there's demand raise MisconfigurationException( "Skipping backward by returning `None` from your `training_step` is not implemented for IPUs." diff --git a/pytorch_lightning/plugins/precision/tpu.py b/pytorch_lightning/plugins/precision/tpu.py index dc4c7c856cbc2..b6bed35f5944e 100644 --- a/pytorch_lightning/plugins/precision/tpu.py +++ b/pytorch_lightning/plugins/precision/tpu.py @@ -11,8 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable +from typing import Any, Callable, Union +from torch.nn import Module from torch.optim import Optimizer import pytorch_lightning as pl @@ -27,7 +28,7 @@ class TPUPrecisionPlugin(PrecisionPlugin): def pre_optimizer_step( self, - model: "pl.LightningModule", + model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable[[], Any], @@ -37,7 +38,7 @@ def pre_optimizer_step( closure_result = xm.optimizer_step(optimizer, optimizer_args={"closure": lambda_closure, **kwargs}) skipped_backward = closure_result is None # in manual optimization, the closure does not return a value - if model.automatic_optimization and skipped_backward: + if isinstance(model, pl.LightningModule) and model.automatic_optimization and skipped_backward: # we lack coverage here so disable this - something to explore if there's demand raise MisconfigurationException( "Skipping backward by returning `None` from your `training_step` is not implemented for TPUs." diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py index 696ff42814aab..c75cd4c74470e 100644 --- a/pytorch_lightning/plugins/training_type/tpu_spawn.py +++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py @@ -181,9 +181,6 @@ def new_process(self, trainer: "pl.Trainer", mp_queue: SimpleQueue) -> None: # ensure that spawned processes go through teardown before joining trainer._call_teardown_hook() - def optimizer_step(self, optimizer: Optimizer, lambda_closure: Optional[Callable], **kwargs: Any) -> None: - xm.optimizer_step(optimizer, optimizer_args={"closure": lambda_closure, **kwargs}) - def model_to_device(self) -> None: self.model = self.wrapped_model.to(self.root_device) From 5b6233caf6682d5d5eae09a5e97d017ea96378b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 16:05:00 +0200 Subject: [PATCH 22/26] remove unused --- pytorch_lightning/plugins/training_type/tpu_spawn.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py index c75cd4c74470e..6d18612b94f50 100644 --- a/pytorch_lightning/plugins/training_type/tpu_spawn.py +++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py @@ -20,8 +20,6 @@ import torch import torch.multiprocessing as mp -from torch.nn import Module -from torch.optim import Optimizer from torch.utils.data import DataLoader import pytorch_lightning as pl From 9df48874e34dcd9a21d9bfcfccc8c7043882424e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 17:11:06 +0200 Subject: [PATCH 23/26] Update pytorch_lightning/plugins/precision/ipu_precision.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Mocholí --- pytorch_lightning/plugins/precision/ipu_precision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py index c9f5318e1b882..092ba56ad44f6 100644 --- a/pytorch_lightning/plugins/precision/ipu_precision.py +++ b/pytorch_lightning/plugins/precision/ipu_precision.py @@ -40,7 +40,7 @@ def backward(self, model: "pl.LightningModule", *args: Any, **kwargs: Any) -> No def pre_optimizer_step( self, - model: Union["pl.LightningModule", Module],, + model: Union["pl.LightningModule", Module], optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable[[], Any], From 8866a082d40b8dd3a82662746369523dbce49a3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 17:11:58 +0200 Subject: [PATCH 24/26] Update pytorch_lightning/accelerators/accelerator.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Mocholí --- pytorch_lightning/accelerators/accelerator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 19b317b67cf5a..e1d0dbd8ea024 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -330,7 +330,7 @@ def optimizer_step( lambda_closure: closure calculating the loss value model: reference to the model, optionally defining optimizer step related hooks """ - model = model if model is not None else self.lightning_module + model = model or self.lightning_module make_optimizer_step = self.precision_plugin.pre_optimizer_step( model, optimizer, opt_idx, lambda_closure, **kwargs ) From e602f0085b81a8e6045a35c3a17fbcf8c0694a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 17:12:38 +0200 Subject: [PATCH 25/26] Update pytorch_lightning/accelerators/accelerator.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Mocholí --- pytorch_lightning/accelerators/accelerator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index e1d0dbd8ea024..28145db13be00 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -318,7 +318,7 @@ def optimizer_step( self, optimizer: Optimizer, opt_idx: int, - lambda_closure: Callable, + lambda_closure: Callable[[], Any], model: Optional[Union["pl.LightningModule", Module]] = None, **kwargs: Any ) -> None: From 5610612fa2900a094c067926b398b0703758e879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 20 Oct 2021 17:15:49 +0200 Subject: [PATCH 26/26] update deepspeed logic for Lite --- .../plugins/precision/deepspeed_precision.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/plugins/precision/deepspeed_precision.py b/pytorch_lightning/plugins/precision/deepspeed_precision.py index 436b68e402114..bd92607fd3b17 100644 --- a/pytorch_lightning/plugins/precision/deepspeed_precision.py +++ b/pytorch_lightning/plugins/precision/deepspeed_precision.py @@ -68,11 +68,9 @@ def pre_optimizer_step( "Skipping backward by returning `None` from your `training_step` is not supported by `DeepSpeed`" ) # DeepSpeed handles the optimizer step internally - if isinstance(model, pl.LightningModule): - deepspeed_engine = model.trainer.model - deepspeed_engine.step() - return False - return True + deepspeed_engine = model.trainer.model if isinstance(model, pl.LightningModule) else model + deepspeed_engine.step() + return False def clip_gradients( self,