Lightning-AI · four4fish · Dec 16, 2021 · Dec 4, 2021 · Dec 4, 2021 · Dec 6, 2021
@@ -114,6 +114,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed duplicated file extension when uploading model checkpoints with `NeptuneLogger` ([#11015](https://github.com/PyTorchLightning/pytorch-lightning/pull/11015))
 
 
+- Moved ownership of the `Accelerator` instance to the `TrainingTypePlugin`; all training-type plugins now take an optional parameter `accelerator` ([#11022](https://github.com/PyTorchLightning/pytorch-lightning/pull/11022))
+
 
 ### Deprecated
 

@@ -25,11 +25,10 @@ One to handle differences from the training routine and one to handle different
     from pytorch_lightning.accelerators import GPUAccelerator
     from pytorch_lightning.plugins import NativeMixedPrecisionPlugin, DDPPlugin
 
-    accelerator = GPUAccelerator(
-        precision_plugin=NativeMixedPrecisionPlugin(precision=16, device="cuda"),
-        training_type_plugin=DDPPlugin(),
-    )
-    trainer = Trainer(accelerator=accelerator)
+    accelerator = GPUAccelerator()
+    precision_plugin = NativeMixedPrecisionPlugin(precision=16, device="cuda")
+    training_type_plugin = DDPPlugin(accelerator=accelerator, precision_plugin=precision_plugin)
+    trainer = Trainer(strategy=training_type_plugin)
 
 
 We expose Accelerators and Plugins mainly for expert users who want to extend Lightning to work with new

@@ -80,11 +80,10 @@ can then be passed into the Trainer directly or via a (custom) accelerator:
     trainer = Trainer(strategy=CustomDDPPlugin(), plugins=[CustomPrecisionPlugin()])
 
     # fully custom accelerator and plugins
-    accelerator = MyAccelerator(
-        precision_plugin=CustomPrecisionPlugin(),
-        training_type_plugin=CustomDDPPlugin(),
-    )
-    trainer = Trainer(accelerator=accelerator)
+    accelerator = MyAccelerator()
+    precision_plugin = MyPrecisionPlugin()
+    training_type_plugin = CustomDDPPlugin(accelerator=accelerator, precision_plugin=precision_plugin)
+    trainer = Trainer(strategy=training_type_plugin)
 
 
 The full list of built-in plugins is listed below.

@@ -12,14 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from abc import abstractmethod
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Union
 
 import torch
-from torch.nn import Module
 
 import pytorch_lightning as pl
-from pytorch_lightning.plugins.precision import PrecisionPlugin
-from pytorch_lightning.plugins.training_type import TrainingTypePlugin
 
 
 class Accelerator:
@@ -31,76 +28,21 @@ class Accelerator:
     - GPU
     - TPU
     - IPU
-
-    Each Accelerator gets two plugins upon initialization:
-    One to handle differences from the training routine and one to handle different precisions.
     """
 
-    def __init__(self, precision_plugin: Optional[PrecisionPlugin], training_type_plugin: TrainingTypePlugin) -> None:
-        """
-        Args:
-            precision_plugin: the plugin to handle precision-specific parts
-
-                .. deprecated::
-                    The ``precision_plugin`` parameter has been deprecated and will be removed soon.
-                    Pass the precision plugin as a parameter to the ``TrainingTypePlugin`` instead.
-
-            training_type_plugin: the plugin to handle different training routines
-        """
-
-        self.training_type_plugin = training_type_plugin
-
-        if precision_plugin is not None:
-            self.training_type_plugin._precision_plugin = precision_plugin
-
-    def setup_environment(self) -> None:
+    def setup_environment(self, root_device: torch.device) -> None:
         """Setup any processes or distributed connections.
 
         This is called before the LightningModule/DataModule setup hook which allows the user to access the accelerator
         environment before setup is complete.
         """
-        self.training_type_plugin.setup_environment()
 
     def setup(self, trainer: "pl.Trainer") -> None:
         """Setup plugins for the trainer fit and creates optimizers.
 
         Args:
             trainer: the trainer instance
         """
-        self.training_type_plugin.setup(trainer)
-
-    @property
-    def model(self) -> Module:
-        """Returns the model.
-
-        This can also be a wrapped LightningModule. For retrieving the pure LightningModule use
-        :attr:`Accelerator.lightning_module`
-        """
-        return self.training_type_plugin.model
-
-    @model.setter
-    def model(self, new_model: Module) -> None:
-        self.training_type_plugin.model = new_model
-
-    @property
-    def lightning_module(self) -> "pl.LightningModule":
-        """Returns the pure LightningModule.
-
-        To get the potentially wrapped model use :attr:`Accelerator.model`
-        """
-        return self.training_type_plugin.lightning_module
-
-    @property
-    def root_device(self) -> torch.device:
-        """Returns the root device."""
-        return self.training_type_plugin.root_device
-
-    def teardown(self) -> None:
-        """This method is called to teardown the training process.
-
-        It is the right place to release memory and free other resources.
-        """
-        self.training_type_plugin.teardown()
 
     def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]:
         """Gets stats for a given device.

@@ -15,26 +15,21 @@
 
 import torch
 
-import pytorch_lightning as pl
 from pytorch_lightning.accelerators.accelerator import Accelerator
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 
 class CPUAccelerator(Accelerator):
     """Accelerator for CPU devices."""
 
-    def setup(self, trainer: "pl.Trainer") -> None:
+    def setup_environment(self, root_device: torch.device) -> None:
         """
         Raises:
             MisconfigurationException:
                 If the selected device is not CPU.
         """
-        if "cpu" not in str(self.training_type_plugin.root_device):
-            raise MisconfigurationException(
-                f"Device should be CPU, got {self.training_type_plugin.root_device} instead."
-            )
-
-        return super().setup(trainer)
+        if "cpu" not in str(root_device):
+            raise MisconfigurationException(f"Device should be CPU, got {root_device} instead.")
 
     def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]:
         """CPU device stats aren't supported yet."""

@@ -30,22 +30,19 @@
 class GPUAccelerator(Accelerator):
     """Accelerator for GPU devices."""
 
-    def setup_environment(self) -> None:
+    def setup_environment(self, root_device: torch.device) -> None:
         """
         Raises:
             MisconfigurationException:
                 If the selected device is not GPU.
         """
-        super().setup_environment()
-        if "cuda" not in str(self.training_type_plugin.root_device):
-            raise MisconfigurationException(
-                f"Device should be GPU, got {self.training_type_plugin.root_device} instead"
-            )
-        torch.cuda.set_device(self.training_type_plugin.root_device)
+        if "cuda" not in str(root_device):
+            raise MisconfigurationException(f"Device should be GPU, got {self.root_device} instead")
+        torch.cuda.set_device(root_device)
 
     def setup(self, trainer: "pl.Trainer") -> None:
+        # TODO refactor input from trainer to local_rank @four4fish
         self.set_nvidia_flags(trainer.local_rank)
-        super().setup(trainer)
         # clear cache before training
         torch.cuda.empty_cache()
 
@@ -74,10 +71,6 @@ def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]:
             return torch.cuda.memory_stats(device)
         return get_nvidia_gpu_stats(device)
 
-    def teardown(self) -> None:
-        super().teardown()
-        self.training_type_plugin._move_optimizer_state(torch.device("cpu"))
-
     @staticmethod
     def auto_device_count() -> int:
         """Get the devices when set to auto."""

@@ -15,11 +15,7 @@
 
 import torch
 
-import pytorch_lightning as pl
 from pytorch_lightning.accelerators.accelerator import Accelerator
-from pytorch_lightning.plugins.precision import TPUPrecisionPlugin
-from pytorch_lightning.plugins.training_type.single_tpu import SingleTPUPlugin
-from pytorch_lightning.plugins.training_type.tpu_spawn import TPUSpawnPlugin
 from pytorch_lightning.utilities import _XLA_AVAILABLE
 
 if _XLA_AVAILABLE:
@@ -29,25 +25,6 @@
 class TPUAccelerator(Accelerator):
     """Accelerator for TPU devices."""
 
-    def setup(self, trainer: "pl.Trainer") -> None:
-        """
-        Raises:
-            ValueError:
-                If the precision or training type plugin are unsupported.
-        """
-        if not isinstance(self.training_type_plugin.precision_plugin, TPUPrecisionPlugin):
-            # this configuration should have been avoided in the accelerator connector
-            raise ValueError(
-                f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`,"
-                f" found: {self.training_type_plugin.precision_plugin}."
-            )
-        if not isinstance(self.training_type_plugin, (SingleTPUPlugin, TPUSpawnPlugin)):
-            raise ValueError(
-                "The `TPUAccelerator` can only be used with a `SingleTPUPlugin` or `TPUSpawnPlugin,"
-                f" found {self.training_type_plugin}."
-            )
-        return super().setup(trainer)
-
     def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]:
         """Gets stats for the given TPU device.
 

@@ -99,8 +99,8 @@ def __init__(
             amp_level=None,
             plugins=plugins,
         )
-        self._accelerator = self._accelerator_connector.accelerator
-        self._strategy = self._accelerator.training_type_plugin
+        self._strategy = self._accelerator_connector.training_type_plugin
+        self._accelerator = self._strategy.accelerator
         self._precision_plugin = self._strategy.precision_plugin
         self._models_setup: int = 0
 
@@ -398,7 +398,7 @@ def seed_everything(seed: Optional[int] = None, workers: Optional[bool] = None)
         return seed_everything(seed=seed, workers=workers)
 
     def _run_impl(self, run_method: Callable, *args: Any, **kwargs: Any) -> Any:
-        self._accelerator.setup_environment()
+        self._strategy.setup_environment()
 
         # apply sharded context to prevent OOM
         run_method = partial(self._run_with_sharded_context, run_method)

@@ -84,6 +84,7 @@ class DDPPlugin(ParallelPlugin):
 
     def __init__(
         self,
+        accelerator: Optional["pl.accelerators.accelerator.Accelerator"] = None,
         parallel_devices: Optional[List[torch.device]] = None,
         cluster_environment: Optional[ClusterEnvironment] = None,
         checkpoint_io: Optional[CheckpointIO] = None,
@@ -95,6 +96,7 @@ def __init__(
         **kwargs: Union[Any, Dict[str, Any]],
     ) -> None:
         super().__init__(
+            accelerator=accelerator,
             parallel_devices=parallel_devices,
             cluster_environment=cluster_environment,
             checkpoint_io=checkpoint_io,
@@ -147,6 +149,7 @@ def setup_environment(self) -> None:
             self._call_children_scripts()
 
         self.setup_distributed()
+        super().setup_environment()
 
     def _setup_model(self, model: Module) -> DistributedDataParallel:
         """Wraps the model into a :class:`~torch.nn.parallel.distributed.DistributedDataParallel` module."""

@@ -62,6 +62,7 @@ class DDPSpawnPlugin(ParallelPlugin):
 
     def __init__(
         self,
+        accelerator: Optional["pl.accelerators.accelerator.Accelerator"] = None,
         parallel_devices: Optional[List[torch.device]] = None,
         cluster_environment: Optional[ClusterEnvironment] = None,
         checkpoint_io: Optional[CheckpointIO] = None,
@@ -72,6 +73,7 @@ def __init__(
         **kwargs: Any,
     ):
         super().__init__(
+            accelerator=accelerator,
             parallel_devices=parallel_devices,
             cluster_environment=cluster_environment,
             checkpoint_io=checkpoint_io,

@@ -88,6 +88,7 @@ class DeepSpeedPlugin(DDPPlugin):
 
     def __init__(
         self,
+        accelerator: Optional["pl.accelerators.accelerator.Accelerator"] = None,
         zero_optimization: bool = True,
         stage: int = 2,
         remote_device: str = "cpu",
@@ -273,6 +274,7 @@ def __init__(
             )
 
         super().__init__(
+            accelerator=accelerator,
             parallel_devices=parallel_devices,
             cluster_environment=cluster_environment,
             precision_plugin=precision_plugin,

@@ -35,11 +35,13 @@ class DataParallelPlugin(ParallelPlugin):
 
     def __init__(
         self,
+        accelerator: Optional["pl.accelerators.accelerator.Accelerator"] = None,
         parallel_devices: Optional[List[torch.device]] = None,
         checkpoint_io: Optional[CheckpointIO] = None,
         precision_plugin: Optional[PrecisionPlugin] = None,
     ):
         super().__init__(
+            accelerator=accelerator,
             parallel_devices=parallel_devices,
             cluster_environment=None,
             checkpoint_io=checkpoint_io,

@@ -37,6 +37,7 @@ class DDPFullyShardedPlugin(DDPPlugin):
 
     def __init__(
         self,
+        accelerator: Optional["pl.accelerators.accelerator.Accelerator"] = None,
         cpu_offload: bool = False,
         flatten_parameters: bool = True,
         reshard_after_forward: bool = True,
@@ -98,6 +99,7 @@ def __init__(
         """
 
         super().__init__(
+            accelerator=accelerator,
             parallel_devices=parallel_devices,
             cluster_environment=cluster_environment,
             checkpoint_io=checkpoint_io,

@@ -41,11 +41,13 @@ class HorovodPlugin(ParallelPlugin):
 
     def __init__(
         self,
+        accelerator: Optional["pl.accelerators.accelerator.Accelerator"] = None,
         parallel_devices: Optional[List[torch.device]] = None,
         checkpoint_io: Optional[CheckpointIO] = None,
         precision_plugin: Optional[PrecisionPlugin] = None,
     ):
         super().__init__(
+            accelerator=accelerator,
             parallel_devices=parallel_devices,
             cluster_environment=None,
             checkpoint_io=checkpoint_io,

@@ -62,6 +62,7 @@ class IPUPlugin(ParallelPlugin):
 
     def __init__(
         self,
+        accelerator: Optional["pl.accelerators.accelerator.Accelerator"] = None,
         device_iterations: int = 1,
         autoreport: bool = False,
         autoreport_dir: Optional[str] = None,
@@ -86,6 +87,7 @@ def __init__(
                 created options for validation/testing and predicting.
         """
         super().__init__(
+            accelerator=accelerator,
             parallel_devices=parallel_devices,
             cluster_environment=cluster_environment,
             checkpoint_io=checkpoint_io,

@@ -34,12 +34,13 @@ class ParallelPlugin(TrainingTypePlugin, ABC):
 
     def __init__(
         self,
+        accelerator: Optional["pl.accelerators.accelerator.Accelerator"] = None,
         parallel_devices: Optional[List[torch.device]] = None,
         cluster_environment: Optional[ClusterEnvironment] = None,
         checkpoint_io: Optional[CheckpointIO] = None,
         precision_plugin: Optional[PrecisionPlugin] = None,
     ):
-        super().__init__(checkpoint_io=checkpoint_io, precision_plugin=precision_plugin)
+        super().__init__(accelerator=accelerator, checkpoint_io=checkpoint_io, precision_plugin=precision_plugin)
         self.parallel_devices = parallel_devices
         self.cluster_environment = cluster_environment
Original file line number	Diff line number	Diff line change
Expand Up		@@ -114,6 +114,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
		- Removed duplicated file extension when uploading model checkpoints with `NeptuneLogger` ([#11015](https://github.com/PyTorchLightning/pytorch-lightning/pull/11015))


		- Moved ownership of the `Accelerator` instance to the `TrainingTypePlugin`; all training-type plugins now take an optional parameter `accelerator` ([#11022](https://github.com/PyTorchLightning/pytorch-lightning/pull/11022))


		### Deprecated

Expand Down