From d0316bafb939eeda9a3412179e7fc3cd2761641b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 21 Nov 2021 02:17:38 +0100 Subject: [PATCH 1/9] update deepspeed precision handling --- pytorch_lightning/lite/lite.py | 9 ----- .../plugins/training_type/deepspeed.py | 39 +++++-------------- 2 files changed, 10 insertions(+), 38 deletions(-) diff --git a/pytorch_lightning/lite/lite.py b/pytorch_lightning/lite/lite.py index ca88095dfc673..09c5cd5ac2215 100644 --- a/pytorch_lightning/lite/lite.py +++ b/pytorch_lightning/lite/lite.py @@ -407,8 +407,6 @@ def _run_with_sharded_context(self, run_method: Callable, *args: Any, **kwargs: def _set_plugin_specific_precision_variables(self) -> None: # todo: these are hacks as plugins rely on access to the precision plugin - if isinstance(self._strategy, DeepSpeedPlugin): - self._set_deepspeed_precision_variables() if isinstance(self._strategy, DDPShardedPlugin): self._strategy._precision = self._accelerator_connector.precision @@ -430,13 +428,6 @@ def _move_model_to_device(self, model: nn.Module, optimizers: List[Optimizer]) - model = self.to_device(model) return model - def _set_deepspeed_precision_variables(self) -> None: - # TODO: Refactor this once precision pluging is part of the strategy. - amp_type = self._accelerator_connector.amp_type - amp_level = self._accelerator_connector.amp_level - precision = self._accelerator_connector.precision - self._strategy._amp_level, self._strategy._amp_type, self._strategy._precision = amp_level, amp_type, precision - def _requires_distributed_sampler(self, dataloader: DataLoader) -> bool: return ( self._accelerator_connector.is_distributed diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index 01959bdcee212..8d3f8c184629d 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -28,6 +28,7 @@ import pytorch_lightning as pl from pytorch_lightning.overrides.base import _LightningModuleWrapperBase +from pytorch_lightning.plugins import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.precision import PrecisionPlugin @@ -327,24 +328,6 @@ def __init__( self.hysteresis = hysteresis self.min_loss_scale = min_loss_scale - # optionally set by Lite - self._precision: Optional[Union[str, int]] = None - self._amp_level: Optional[str] = None - self._amp_type: Optional[str] = None - - @property - def precision(self) -> Union[str, int]: - return self._precision or self.precision_plugin.precision - - @property - def amp_level(self) -> Optional[str]: - if self._amp_type == AMPType.APEX: - return self._amp_level or self.lightning_module.trainer._accelerator_connector.amp_level - - @property - def amp_type(self) -> Optional[str]: - return self._amp_type or self.lightning_module.trainer._accelerator_connector.amp_type - def _load_config(self, config): if config is None and self.DEEPSPEED_ENV_VAR in os.environ: rank_zero_info(f"Loading DeepSpeed config from set {self.DEEPSPEED_ENV_VAR} environment variable") @@ -459,11 +442,11 @@ def init_deepspeed(self): "DeepSpeed currently does not support different `accumulate_grad_batches` at different epochs." ) - model = LightningDeepSpeedModule(pl_module=self.model, precision=self.precision) + model = LightningDeepSpeedModule(pl_module=self.model, precision=self.precision_plugin.precision) if self.zero_stage_3 and self.partition_module: # Ensure the entire model has been moved to the appropriate device - dtype = torch.float16 if self.precision in (16, "mixed") else torch.float32 + dtype = torch.float16 if self.precision_plugin.precision in (16, "mixed") else torch.float32 deepspeed.zero.Init( module=model, remote_device=self.remote_device, pin_memory=True, config=self.config, dtype=dtype ) @@ -520,7 +503,7 @@ def _initialize_deepspeed_train(self, model): def model_sharded_context(self) -> Generator[None, None, None]: if self.zero_stage_3: assert self._config_initialized - dtype = torch.float16 if self.precision in (16, "mixed") else torch.float32 + dtype = torch.float16 if self.precision_plugin.precision in (16, "mixed") else torch.float32 model_parallel_context = deepspeed.zero.Init( remote_device=self.remote_device, pin_memory=True, config=self.config, dtype=dtype ) @@ -646,11 +629,9 @@ def _auto_select_batch_size(self): ) return batch_size - def _format_precision_config(self): - if self.amp_type == AMPType.APEX: - amp_level = self.amp_level - if self.precision in (16, "mixed"): - if "fp16" not in self.config and self.amp_type == AMPType.NATIVE: + def _format_precision_config(self) -> None: + if self.precision_plugin.precision in (16, "mixed"): + if "fp16" not in self.config and isinstance(self.precision_plugin, NativeMixedPrecisionPlugin): # FP16 is a DeepSpeed standalone AMP implementation rank_zero_info("Enabling DeepSpeed FP16.") self.config["fp16"] = { @@ -661,9 +642,9 @@ def _format_precision_config(self): "hysteresis": self.hysteresis, "min_loss_scale": self.min_loss_scale, } - elif "amp" not in self.config and self.amp_type == AMPType.APEX: - rank_zero_only("Enabling DeepSpeed APEX Implementation.") - self.config["amp"] = {"enabled": True, "opt_level": amp_level} + elif "amp" not in self.config and isinstance(self.precision_plugin, ApexMixedPrecisionPlugin): + rank_zero_info("Enabling DeepSpeed APEX Implementation.") + self.config["amp"] = {"enabled": True, "opt_level": self.precision_plugin.amp_level} def _create_default_config( self, From 379eef8f7e0117aa54e5cb7d1e27aeff779874c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 21 Nov 2021 03:11:13 +0100 Subject: [PATCH 2/9] cyclic import --- pytorch_lightning/plugins/training_type/deepspeed.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index 8d3f8c184629d..ce2161f13eaca 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -27,17 +27,17 @@ from torch.optim.lr_scheduler import _LRScheduler import pytorch_lightning as pl +import pytorch_lightning.plugins as plugins from pytorch_lightning.overrides.base import _LightningModuleWrapperBase -from pytorch_lightning.plugins import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.precision import PrecisionPlugin from pytorch_lightning.plugins.training_type.ddp import DDPPlugin from pytorch_lightning.trainer.optimizers import _get_default_scheduler_config from pytorch_lightning.trainer.states import TrainerFn -from pytorch_lightning.utilities import AMPType, GradClipAlgorithmType +from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.apply_func import apply_to_collection -from pytorch_lightning.utilities.distributed import log, rank_zero_info, rank_zero_only +from pytorch_lightning.utilities.distributed import log, rank_zero_info from pytorch_lightning.utilities.enums import _StrategyType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE @@ -631,7 +631,7 @@ def _auto_select_batch_size(self): def _format_precision_config(self) -> None: if self.precision_plugin.precision in (16, "mixed"): - if "fp16" not in self.config and isinstance(self.precision_plugin, NativeMixedPrecisionPlugin): + if "fp16" not in self.config and isinstance(self.precision_plugin, plugins.NativeMixedPrecisionPlugin): # FP16 is a DeepSpeed standalone AMP implementation rank_zero_info("Enabling DeepSpeed FP16.") self.config["fp16"] = { @@ -642,7 +642,7 @@ def _format_precision_config(self) -> None: "hysteresis": self.hysteresis, "min_loss_scale": self.min_loss_scale, } - elif "amp" not in self.config and isinstance(self.precision_plugin, ApexMixedPrecisionPlugin): + elif "amp" not in self.config and isinstance(self.precision_plugin, plugins.ApexMixedPrecisionPlugin): rank_zero_info("Enabling DeepSpeed APEX Implementation.") self.config["amp"] = {"enabled": True, "opt_level": self.precision_plugin.amp_level} From 5df18b4b14599981c5fe23784192ce3b4111cd69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 23 Nov 2021 03:31:23 +0100 Subject: [PATCH 3/9] fix import nonsense --- pytorch_lightning/plugins/training_type/deepspeed.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index ce2161f13eaca..1ebd948920051 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -27,7 +27,6 @@ from torch.optim.lr_scheduler import _LRScheduler import pytorch_lightning as pl -import pytorch_lightning.plugins as plugins from pytorch_lightning.overrides.base import _LightningModuleWrapperBase from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO @@ -631,7 +630,7 @@ def _auto_select_batch_size(self): def _format_precision_config(self) -> None: if self.precision_plugin.precision in (16, "mixed"): - if "fp16" not in self.config and isinstance(self.precision_plugin, plugins.NativeMixedPrecisionPlugin): + if "fp16" not in self.config and isinstance(self.precision_plugin, pl.plugins.NativeMixedPrecisionPlugin): # FP16 is a DeepSpeed standalone AMP implementation rank_zero_info("Enabling DeepSpeed FP16.") self.config["fp16"] = { @@ -642,7 +641,7 @@ def _format_precision_config(self) -> None: "hysteresis": self.hysteresis, "min_loss_scale": self.min_loss_scale, } - elif "amp" not in self.config and isinstance(self.precision_plugin, plugins.ApexMixedPrecisionPlugin): + elif "amp" not in self.config and isinstance(self.precision_plugin, pl.plugins.ApexMixedPrecisionPlugin): rank_zero_info("Enabling DeepSpeed APEX Implementation.") self.config["amp"] = {"enabled": True, "opt_level": self.precision_plugin.amp_level} From 5697b3840216e16d4da750ee9c276d08b15e3f7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 23 Nov 2021 04:05:58 +0100 Subject: [PATCH 4/9] revert --- pytorch_lightning/plugins/training_type/deepspeed.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index 1ebd948920051..26d2b8d833de8 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -37,7 +37,7 @@ from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.apply_func import apply_to_collection from pytorch_lightning.utilities.distributed import log, rank_zero_info -from pytorch_lightning.utilities.enums import _StrategyType +from pytorch_lightning.utilities.enums import _StrategyType, AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE from pytorch_lightning.utilities.model_helpers import is_overridden @@ -629,8 +629,9 @@ def _auto_select_batch_size(self): return batch_size def _format_precision_config(self) -> None: + amp_type = self.lightning_module.trainer._accelerator_connector.amp_type if self.precision_plugin.precision in (16, "mixed"): - if "fp16" not in self.config and isinstance(self.precision_plugin, pl.plugins.NativeMixedPrecisionPlugin): + if "fp16" not in self.config and amp_type == AMPType.NATIVE: # FP16 is a DeepSpeed standalone AMP implementation rank_zero_info("Enabling DeepSpeed FP16.") self.config["fp16"] = { @@ -641,7 +642,7 @@ def _format_precision_config(self) -> None: "hysteresis": self.hysteresis, "min_loss_scale": self.min_loss_scale, } - elif "amp" not in self.config and isinstance(self.precision_plugin, pl.plugins.ApexMixedPrecisionPlugin): + elif "amp" not in self.config and amp_type == AMPType.APEX: rank_zero_info("Enabling DeepSpeed APEX Implementation.") self.config["amp"] = {"enabled": True, "opt_level": self.precision_plugin.amp_level} From c2dc37d03ac782aa0a02de6f894770317769cc43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 23 Nov 2021 06:39:38 +0100 Subject: [PATCH 5/9] changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index adb1b070dc386..ad5029a09bcae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -162,6 +162,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed the `precision_plugin` attribute from `Accelerator` in favor of its equivalent attribute `precision_plugin` in the `TrainingTypePlugin` ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570)) + +- Removed `DeepSpeedPlugin.{precision,amp_type,amp_level}` properties ([#10657](https://github.com/PyTorchLightning/pytorch-lightning/pull/10657)) + + ### Fixed - When a tensor is logged with `self.log`, run its computation with the same `dtype` ([#10076](https://github.com/PyTorchLightning/pytorch-lightning/pull/10076)) From 255b9604ffa8fa69ff56fc6cc9d5094e9ad5ef6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 23 Nov 2021 07:43:12 +0100 Subject: [PATCH 6/9] deepspeed precision access --- pytorch_lightning/plugins/precision/deepspeed.py | 8 +++++--- pytorch_lightning/plugins/training_type/deepspeed.py | 8 ++++---- .../trainer/connectors/accelerator_connector.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pytorch_lightning/plugins/precision/deepspeed.py b/pytorch_lightning/plugins/precision/deepspeed.py index 27ac384d25303..13be5cfe62456 100644 --- a/pytorch_lightning/plugins/precision/deepspeed.py +++ b/pytorch_lightning/plugins/precision/deepspeed.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Union +from typing import Any, Callable, Union, Optional from torch import Tensor from torch.nn import Module @@ -19,7 +19,7 @@ import pytorch_lightning as pl from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin -from pytorch_lightning.utilities import GradClipAlgorithmType +from pytorch_lightning.utilities import GradClipAlgorithmType, AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE from pytorch_lightning.utilities.model_helpers import is_overridden @@ -34,9 +34,11 @@ class DeepSpeedPrecisionPlugin(PrecisionPlugin): """Precision plugin for DeepSpeed integration.""" - def __init__(self, precision: int) -> None: + def __init__(self, precision: Union[str, int], amp_type: str, amp_level: Optional[str] = None) -> None: super().__init__() self.precision = precision + self.amp_type = amp_type + self.amp_level = amp_level def backward(self, model: "pl.LightningModule", closure_loss: Tensor, *args: Any, **kwargs: Any) -> None: if is_overridden("backward", model): diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index 26d2b8d833de8..57e7a9200b9a8 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -28,6 +28,7 @@ import pytorch_lightning as pl from pytorch_lightning.overrides.base import _LightningModuleWrapperBase +from pytorch_lightning.plugins import DeepSpeedPrecisionPlugin from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.precision import PrecisionPlugin @@ -130,7 +131,7 @@ def __init__( synchronize_checkpoint_boundary: bool = False, load_full_weights: bool = False, partition_module: bool = True, - precision_plugin: Optional[PrecisionPlugin] = None, + precision_plugin: Optional[DeepSpeedPrecisionPlugin] = None, ) -> None: """Provides capabilities to run training using the DeepSpeed library, with training optimizations for large billion parameter models. `For more information: https://pytorch- @@ -629,9 +630,8 @@ def _auto_select_batch_size(self): return batch_size def _format_precision_config(self) -> None: - amp_type = self.lightning_module.trainer._accelerator_connector.amp_type if self.precision_plugin.precision in (16, "mixed"): - if "fp16" not in self.config and amp_type == AMPType.NATIVE: + if "fp16" not in self.config and self.precision_plugin.amp_type == AMPType.NATIVE: # FP16 is a DeepSpeed standalone AMP implementation rank_zero_info("Enabling DeepSpeed FP16.") self.config["fp16"] = { @@ -642,7 +642,7 @@ def _format_precision_config(self) -> None: "hysteresis": self.hysteresis, "min_loss_scale": self.min_loss_scale, } - elif "amp" not in self.config and amp_type == AMPType.APEX: + elif "amp" not in self.config and self.precision_plugin.amp_type == AMPType.APEX: rank_zero_info("Enabling DeepSpeed APEX Implementation.") self.config["amp"] = {"enabled": True, "opt_level": self.precision_plugin.amp_level} diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 7136437bbc69d..c95d46e77b977 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -637,7 +637,7 @@ def select_precision_plugin(self) -> PrecisionPlugin: return TPUBf16PrecisionPlugin() if self._distrib_type == _StrategyType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin): - return DeepSpeedPrecisionPlugin(self.precision) + return DeepSpeedPrecisionPlugin(self.precision, self.amp_type, self.amp_level) if self.precision == 32: return PrecisionPlugin() From 19c7ee1dbfa740b94db6aaf404f89f67dc50e1a6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 23 Nov 2021 06:44:31 +0000 Subject: [PATCH 7/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytorch_lightning/plugins/precision/deepspeed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/plugins/precision/deepspeed.py b/pytorch_lightning/plugins/precision/deepspeed.py index 13be5cfe62456..6b1c6f072813c 100644 --- a/pytorch_lightning/plugins/precision/deepspeed.py +++ b/pytorch_lightning/plugins/precision/deepspeed.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Union, Optional +from typing import Any, Callable, Optional, Union from torch import Tensor from torch.nn import Module @@ -19,7 +19,7 @@ import pytorch_lightning as pl from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin -from pytorch_lightning.utilities import GradClipAlgorithmType, AMPType +from pytorch_lightning.utilities import AMPType, GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE from pytorch_lightning.utilities.model_helpers import is_overridden From ed32e18b0e7a6bd31ee2c09fa81956c81a23fd73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 23 Nov 2021 07:49:56 +0100 Subject: [PATCH 8/9] undo typing chnage --- pytorch_lightning/plugins/training_type/deepspeed.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index 57e7a9200b9a8..86d380ac24ce8 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -28,7 +28,6 @@ import pytorch_lightning as pl from pytorch_lightning.overrides.base import _LightningModuleWrapperBase -from pytorch_lightning.plugins import DeepSpeedPrecisionPlugin from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.precision import PrecisionPlugin @@ -131,7 +130,7 @@ def __init__( synchronize_checkpoint_boundary: bool = False, load_full_weights: bool = False, partition_module: bool = True, - precision_plugin: Optional[DeepSpeedPrecisionPlugin] = None, + precision_plugin: Optional[PrecisionPlugin] = None, ) -> None: """Provides capabilities to run training using the DeepSpeed library, with training optimizations for large billion parameter models. `For more information: https://pytorch- From 63e1681d226bbd8c33e168014272513f1e437389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 23 Nov 2021 11:06:26 +0100 Subject: [PATCH 9/9] unused import --- pytorch_lightning/plugins/precision/deepspeed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/plugins/precision/deepspeed.py b/pytorch_lightning/plugins/precision/deepspeed.py index 6b1c6f072813c..46cf023fc5d32 100644 --- a/pytorch_lightning/plugins/precision/deepspeed.py +++ b/pytorch_lightning/plugins/precision/deepspeed.py @@ -19,7 +19,7 @@ import pytorch_lightning as pl from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin -from pytorch_lightning.utilities import AMPType, GradClipAlgorithmType +from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE from pytorch_lightning.utilities.model_helpers import is_overridden