From eb3a03c4552edfc6b82521fd75c81390236ab02b Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Wed, 12 Jan 2022 10:51:11 -0800
Subject: [PATCH 01/69] Rewrite accelerator_connector

---
 pytorch_lightning/strategies/ddp.py           |   7 +-
 pytorch_lightning/strategies/ddp2.py          |  11 +-
 pytorch_lightning/strategies/ddp_spawn.py     |   7 +-
 pytorch_lightning/strategies/deepspeed.py     |   2 +-
 .../connectors/accelerator_connector_new.py   | 569 ++++++++++++++++++
 pytorch_lightning/trainer/trainer.py          |  92 +--
 pytorch_lightning/utilities/exceptions.py     |   6 +
 pytorch_lightning/utilities/imports.py        |   2 +
 8 files changed, 646 insertions(+), 50 deletions(-)
 create mode 100644 pytorch_lightning/trainer/connectors/accelerator_connector_new.py

diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py
index feff575719ad4..fac1cbe2dc288 100644
--- a/pytorch_lightning/strategies/ddp.py
+++ b/pytorch_lightning/strategies/ddp.py
@@ -75,7 +75,7 @@ class DDPStrategy(ParallelStrategy):
     devices (e.g. GPU) per node. It is very similar to how :mod:`torch.distributed.launch` launches processes.
     """
 
-    distributed_backend = _StrategyType.DDP
+    distributed_backend = "ddp"
 
     def __init__(
         self,
@@ -428,6 +428,11 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
             description="DDP Strategy with `find_unused_parameters` as False",
             find_unused_parameters=False,
         )
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description="Strategy",
+        )
 
     def _should_run_deadlock_detection(self) -> bool:
         """Determines whether the plugin will perform process reconciliation in case of errors.
diff --git a/pytorch_lightning/strategies/ddp2.py b/pytorch_lightning/strategies/ddp2.py
index 9bde0f67e1b1a..5e1a349bd910d 100644
--- a/pytorch_lightning/strategies/ddp2.py
+++ b/pytorch_lightning/strategies/ddp2.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import torch
+from typing import Dict
 
 from pytorch_lightning.strategies.ddp import DDPStrategy
 from pytorch_lightning.utilities.apply_func import apply_to_collection
@@ -22,7 +23,7 @@
 class DDP2Strategy(DDPStrategy):
     """DDP2 behaves like DP in one node, but synchronization across nodes behaves like in DDP."""
 
-    distributed_backend = _StrategyType.DDP2
+    distributed_backend = "ddp2"
 
     @property
     def global_rank(self) -> int:
@@ -73,3 +74,11 @@ def set_world_ranks(self) -> None:
             return
         self.cluster_environment.set_global_rank(self.node_rank)
         self.cluster_environment.set_world_size(self.num_nodes)
+
+    @classmethod
+    def register_strategies(cls, strategy_registry: Dict) -> None:
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description="Strategy",
+        )
diff --git a/pytorch_lightning/strategies/ddp_spawn.py b/pytorch_lightning/strategies/ddp_spawn.py
index 03407e1c14232..501fb018a0fca 100644
--- a/pytorch_lightning/strategies/ddp_spawn.py
+++ b/pytorch_lightning/strategies/ddp_spawn.py
@@ -53,7 +53,7 @@ class DDPSpawnStrategy(ParallelStrategy):
     """Spawns processes using the :func:`torch.multiprocessing.spawn` method and joins processes after training
     finishes."""
 
-    distributed_backend = _StrategyType.DDP_SPAWN
+    distributed_backend = "ddp_spawn"
 
     def __init__(
         self,
@@ -367,6 +367,11 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
             description="DDPSpawn Strategy with `find_unused_parameters` as False",
             find_unused_parameters=False,
         )
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description="Strategy",
+        )
 
     def teardown(self) -> None:
         super().teardown()
diff --git a/pytorch_lightning/strategies/deepspeed.py b/pytorch_lightning/strategies/deepspeed.py
index fa9c4d5376ff8..530ede34ec899 100644
--- a/pytorch_lightning/strategies/deepspeed.py
+++ b/pytorch_lightning/strategies/deepspeed.py
@@ -82,7 +82,7 @@ def _move_float_tensors_to_half(self, batch: Any):
 
 
 class DeepSpeedStrategy(DDPStrategy):
-    distributed_backend = _StrategyType.DEEPSPEED
+    distributed_backend = "deepspeed"
     DEEPSPEED_ENV_VAR = "PL_DEEPSPEED_CONFIG_PATH"
 
     def __init__(
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector_new.py b/pytorch_lightning/trainer/connectors/accelerator_connector_new.py
new file mode 100644
index 0000000000000..186d175f33d64
--- /dev/null
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector_new.py
@@ -0,0 +1,569 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+from typing import List, Optional, Sequence, Union
+from weakref import proxy
+
+import torch
+
+from pytorch_lightning.accelerators.accelerator import Accelerator
+from pytorch_lightning.accelerators.cpu import CPUAccelerator
+from pytorch_lightning.accelerators.gpu import GPUAccelerator
+from pytorch_lightning.accelerators.ipu import IPUAccelerator
+from pytorch_lightning.accelerators.tpu import TPUAccelerator
+from pytorch_lightning.plugins import (
+    ApexMixedPrecisionPlugin,
+    CheckpointIO,
+    DeepSpeedPrecisionPlugin,
+    DoublePrecisionPlugin,
+    FullyShardedNativeMixedPrecisionPlugin,
+    IPUPrecisionPlugin,
+    NativeMixedPrecisionPlugin,
+    PrecisionPlugin,
+    ShardedNativeMixedPrecisionPlugin,
+    TPUBf16PrecisionPlugin,
+    TPUPrecisionPlugin,
+)
+from pytorch_lightning.plugins.environments import (
+    ClusterEnvironment,
+    KubeflowEnvironment,
+    LightningEnvironment,
+    LSFEnvironment,
+    SLURMEnvironment,
+    TorchElasticEnvironment,
+)
+from pytorch_lightning.strategies import (
+    DataParallelStrategy,
+    DDP2Strategy,
+    DDPFullyShardedStrategy,
+    DDPShardedStrategy,
+    DDPSpawnShardedStrategy,
+    DDPSpawnStrategy,
+    DDPStrategy,
+    DeepSpeedStrategy,
+    HorovodStrategy,
+    IPUStrategy,
+    SingleDeviceStrategy,
+    SingleTPUStrategy,
+    Strategy,
+    StrategyRegistry,
+    TPUSpawnStrategy,
+)
+from pytorch_lightning.utilities import (
+    _AcceleratorType,
+    _StrategyType,
+    AMPType,
+    device_parser,
+    rank_zero_deprecation,
+    rank_zero_info,
+    rank_zero_warn,
+)
+from pytorch_lightning.utilities.enums import PrecisionType
+from pytorch_lightning.utilities.exceptions import MisconfigurationException, DeviceNotAvailibleException, ImpactableConfigurationException
+from pytorch_lightning.utilities.imports import (
+    _HOROVOD_AVAILABLE,
+    _IPU_AVAILABLE,
+    _GPU_AVAILABLE,
+    _TORCH_GREATER_EQUAL_1_8,
+    _TPU_AVAILABLE,
+)
+
+if _HOROVOD_AVAILABLE:
+    import horovod.torch as hvd
+
+log = logging.getLogger(__name__)
+
+
+class AcceleratorConnector:
+    def __init__(
+        self,
+        devices,
+        num_nodes,
+        accelerator, # reduce typing
+        strategy: Optional[Union[str, Strategy]],
+        plugins,
+        precision,
+        amp_type,
+        amp_level,
+        sync_batchnorm,
+        benchmark,
+        replace_sampler_ddp,
+        deterministic: bool,
+        num_processes, # deprecated
+        tpu_cores, # deprecated
+        ipus, # deprecated
+        gpus, # deprecated
+        gpu_ids,
+    ):
+        """
+            A. accelerator could be:
+                1. strategy class (deprecated in 1.5 will be removed in 1.7)
+                2. strategy str (deprecated in 1.5 will be removed in 1.7)
+                3. accelerator class
+                4. accelerator str
+                5. accelerator auto
+
+            B. strategy could be :
+                1. strategy class
+                2. strategy str registered with strategyRegister
+                3. strategy str in _strategy_type enum which listed in each strategy as backend (registed these too, and _strategy_type could be deprecated)
+
+            C. plugins could be:
+                1. List of str, which could contains:
+                    i. strategy str
+                    ii. precision str (Not supported in the old accelerator_connector version)
+                    iii. checkpoint_io str (Not supported in the old accelerator_connector version)
+                    iv. cluster_environment str (Not supported in the old accelerator_connector version)
+                2. List of class, which could contains:
+                    i. strategy class (deprecated in 1.5 will be removed in 1.7)
+                    ii. precision class (should be removed, and precision flag should allow user pass classes)
+                    iii. checkpoint_io class
+                    iv. cluster_environment class
+
+
+        priorities which to take when:
+            A. Class > str
+            B. Strategy > Accelerator/precision/plugins
+            C. When multiple flag set to the same thing? (ignore? not handled for now)
+
+        """
+
+        # Get registered strategies, existing accelerators and precision plugins
+        self._existing_strategies_str = StrategyRegistry.available_strategies()
+        print(self._existing_strategies_str)
+        self._existing_accelerator_type = ["tpu", "ipu", "gpu", "cpu"]
+        self._supported_precision = PrecisionType.supported_types()
+
+        # raise misconfig exceptions if their is conflict between flags
+        # set the valid flag to self._x_flag after validation
+        # for example: if accelerator is strategy class, set self._strategy_flag = accelerator
+        # for devices: assign gpus ipus and etcs to accelerator_flag and devices_flag
+        self._config_check_and_set_final_flags(strategy, accelerator, precision, plugins, amp_type, amp_level)
+        self._device_config_check_and_set_final_flags(devices=devices, num_nodes=num_nodes, num_processes=num_processes, gpus=gpus, ipus=ipus, tpu_cores=tpu_cores)
+
+        # handle auto and choose flag when user hasn't set it up.
+        if self._accelerator_flag == 'auto' or self._accelerator_flag is None:
+            self._choose_accelerator()
+        else:
+            # [RFC] move to XAccelerator class init?
+            self._check_device_availibility()
+
+        # Accelerator initialization
+        # TODO devices logic handling still in process, not ready for reviews
+        self._set_parallel_devices_and_init_accelerator()
+
+        # handle strategy flag is not set, choose for user
+        if self._strategy_flag is None:
+            self._choose_strategy()
+
+        self._choose_and_init_cluster_environment()
+        self._check_capatibility_and_init_precision()
+        self._init_strategy()
+
+
+    def _config_check_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level):
+        """
+        This method checks:
+            1. strategy flag: strategy, accelerator and plugin can all set strategies
+            2. accelerator: if accelerator flag is Accelerator related flag or class, set self._acceelrator_flag;
+                If accelerator is strategy related, logic handled in 1 above
+            3. precision could be set by precision and plugins flag
+            4. plugins could be duplicated in strategy (handled by 1), precision (handled by 3), set checkpoint_io and cluster_environment
+        """
+        self._strategy_flag, self._accelerator_flag, self._precision_flag, self._cluster_environment, self.checkpoint_io, self._amp_level_flag, self._amp_type_flag = None, None, None, None, None, amp_type, amp_level
+        if strategy:
+            self._strategy_flag = strategy
+            # handle duplications and conflict
+            if isinstance(accelerator, Strategy) and strategy != accelerator:
+                raise MisconfigurationException("strategy already set through strategy flag, duplicated in accelerator")
+            if isinstance(accelerator, str) and accelerator in self._existing_strategies_str and strategy != accelerator:
+                raise MisconfigurationException("strategy str already set through strategy flag, duplicated in accelerator")
+            if plugins:
+                for plugin in plugins:
+                    if isinstance(plugin, Strategy) and strategy != plugin:
+                        raise MisconfigurationException("strategy already set through strategy flag, duplicated in plugins")
+                    if isinstance(plugin, str) and plugin in self._existing_strategies_str:
+                        raise MisconfigurationException("strategy already set through strategy flag, duplicated in plugins")
+
+
+        if accelerator in self._existing_accelerator_type or accelerator=="auto" or isinstance(accelerator, Accelerator):
+            self._accelerator_flag = accelerator
+        elif accelerator in self._existing_strategies_str or isinstance(accelerator, Strategy):
+            rank_zero_deprecation(
+                f"Passing `Trainer(accelerator={accelerator!r})` has been deprecated"
+                f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
+            )
+            self._strategy_flag = accelerator
+
+
+        if precision:
+            self._precision_flag = precision
+            # handle duplications and conflict
+            if plugins:
+                for plugin in plugins:
+                    if isinstance(plugin, PrecisionPlugin):
+                        raise MisconfigurationException("precision set in both precision flag and plugin flag")
+
+        if plugins:
+            for plugin in plugins:
+                if isinstance(plugin, Strategy) or isinstance(plugin, str) and plugin in self._existing_strategies_str:
+                    self._strategy_flag = plugin
+                elif isinstance(plugin, PrecisionPlugin) or isinstance(plugin, str) and plugin in self._supported_precision:
+                    self._precision_flag = plugin
+                elif isinstance(plugin, CheckpointIO):
+                    self.checkpoint_io =  plugin
+                elif isinstance(plugin, ClusterEnvironment):
+                    self._cluster_environment = plugin
+                else:
+                    raise MisconfigurationException(f"Does not recognize flag {plugin}")
+
+
+        # if user pass in a strategy class which has accelerator, precision, checkpoint or cluster env set up
+        if self._strategy_flag and isinstance(self._strategy_flag, Strategy):
+            if self._strategy_flag.accelerator:
+                if self._accelerator_flag:
+                    raise MisconfigurationException("accelerator set through both strategy class and accelerator flag, choose one")
+                else:
+                    self._accelerator_flag = self._strategy_flag.accelerator
+            if self._strategy_flag.precision_plugin:
+                # precision has default value 32, we can not tell whether user set it or not [RFC] remove default from trainer?
+                # if self._precision_flag:
+                #     raise MisconfigurationException("precision set through both strategy class and flags, choose one place to set")
+                # else:
+                self._precision_flag = self._strategy_flag.precision_plugin
+            if self._strategy_flag.checkpoint_io:
+                if self.checkpoint_io:
+                    raise MisconfigurationException("checkpoint_io set through both strategy class and plugins, choose one")
+                else:
+                    self.checkpoint_io = self._strategy_flag.checkpoint_io
+            if getattr(self._strategy_flag, "cluster_environment", None):
+                if self._cluster_environment:
+                    raise MisconfigurationException("cluster_environment set through both strategy class and plugins, choose one")
+                else:
+                    self._cluster_environment = getattr(self._strategy_flag, "cluster_environment")
+
+
+        amp_type = amp_type.lower() if isinstance(amp_type, str) else None
+        self._amp_type_flag = AMPType.from_str(amp_type) if amp_type is not None else None
+
+        # TODO still working on these flags
+        # if amp_level is not None and self._amp_type_flag != AMPType.APEX:
+        #     raise MisconfigurationException(
+        #         f"You have asked for `amp_level={self._amp_level_flag!r}` but it's only supported with `amp_backend='apex'`."
+        #     )
+        self._amp_level_flag = amp_level
+
+
+    def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_processes, gpus, ipus, tpu_cores):
+        if num_nodes == "auto":
+            self._num_nodes_flag = 1
+        else :
+            self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
+
+        ##### to be deleted v1.7
+        deprecated_devices_specific_nums = num_processes or gpus or ipus or tpu_cores
+        self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(devices, deprecated_devices_specific_nums, num_processes, gpus, ipus, tpu_cores)
+        ##### deleted end
+        if devices == "auto":
+            if self._accelerator_flag is None:
+                raise MisconfigurationException(
+                    f"You passed `devices={devices}` but haven't specified"
+                    " `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping"
+                )
+        if not self._device_flag:
+            self._device_flag = devices
+
+
+
+    def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(self, devices, deprecated_devices_specific_nums, num_processes, gpus, ipus, tpu_cores):
+        ##### to be deleted v1.7vbg
+        # set devices base on num_processes, gpus, ipus, tpu_cores
+        if devices:
+            rank_zero_warn(f"will be ignored, instand the device specific number {deprecated_devices_specific_nums} will be used")
+        if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(True) > 1:
+            rank_zero_warn(f"more than one device specifc flag has been set")
+        self._device_flag = deprecated_devices_specific_nums
+
+        if not self._accelerator_flag:
+        # set accelerator type base on num_processes, gpus, ipus, tpu_cores
+            if num_processes:
+                self._accelerator_flag = "cpu"
+            if gpus:
+                self._accelerator_flag = "gpu"
+            if tpu_cores:
+                self._accelerator_flag = "tpu"
+            if ipus:
+                self._accelerator_flag = "ipu"
+        #### delete end
+
+    def _choose_accelerator(self):
+        if self._accelerator_flag == "auto":
+            if _TPU_AVAILABLE:
+                self._accelerator_flag = "tpu"
+            elif _IPU_AVAILABLE:
+                self._accelerator_flag = "ipu"
+            elif _GPU_AVAILABLE:
+                self._accelerator_flag = "gpu"
+            else:
+                self._accelerator_flag = "cpu"
+        # [RFC] this is current logic, if accelerator not set, default cpu?
+        else:
+            self._accelerator_flag = "cpu"
+
+
+    def _check_device_availibility(self):
+        for accelerator_flag, available in zip(self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, _GPU_AVAILABLE, True]):
+            if self._accelerator_flag == accelerator_flag:
+                if not available:
+                    raise DeviceNotAvailibleException(f"{accelerator_flag} not avalible")
+
+    # TODO in progress for setting up devices
+    def _set_parallel_devices_and_init_accelerator(self):
+        self._parallel_devices = []
+
+        if isinstance(self._accelerator_flag, Accelerator):
+            self.accelerator = self._accelerator_flag()
+        elif self._accelerator_flag == "tpu":
+            self.accelerator = TPUAccelerator()
+            if self._device_flag == "auto" or not self._device_flag:
+                self._device_flag = TPUAccelerator.auto_device_count()
+            if isinstance(self._device_flag, int):
+                self._parallel_devices = list(range(self._device_flag))
+
+        elif self._accelerator_flag == "ipu":
+            self.accelerator = IPUAccelerator()
+            if self._device_flag == "auto" or not self._device_flag:
+                self._device_flag = IPUAccelerator.auto_device_count()
+            if isinstance(self._device_flag, int):
+                self._parallel_devices = list(range(self._device_flag))
+
+        elif self._accelerator_flag == "gpu":
+            self.accelerator = GPUAccelerator()
+            if self._device_flag == "auto" or not self._device_flag:
+                self._device_flag =  GPUAccelerator.auto_device_count()
+            if isinstance(self._device_flag, int):
+                self._parallel_devices = [torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)]
+
+        elif self._accelerator_flag == "cpu":
+            self.accelerator = CPUAccelerator()
+            if self._device_flag == "auto" or not self._device_flag:
+                self._device_flag =  CPUAccelerator.auto_device_count()
+            if isinstance(self._device_flag, int):
+                self._parallel_devices = [torch.device("cpu")] * self._device_flag
+
+
+    def _choose_and_init_cluster_environment(self):
+        self.cluster_environment = LightningEnvironment()
+        if isinstance(self._cluster_environment, ClusterEnvironment):
+            self.cluster_environment = self._cluster_environment
+        elif self._is_slurm_managing_tasks():
+            rank_zero_info("Multiprocessing is handled by SLURM.")
+            self.cluster_environment = SLURMEnvironment()
+        else:
+            for env_type in (TorchElasticEnvironment, KubeflowEnvironment, LSFEnvironment):
+                if env_type.detect():
+                    self.cluster_environment = env_type()
+
+
+    def _is_slurm_managing_tasks(self):
+        """
+            used by choosing cluster enviroment
+        """
+        if (
+            (not self._strategy_flag=="ddp" and not self._strategy_flag=="ddp2")
+            or not SLURMEnvironment.detect()
+            or SLURMEnvironment.job_name() == "bash"  # in interactive mode we don't manage tasks
+        ):
+            return False
+
+        total_requested_devices = len(self._parallel_devices) * self._num_nodes_flag
+        num_slurm_tasks = int(os.environ["SLURM_NTASKS"], 0)
+        return num_slurm_tasks == total_requested_devices
+
+    def _choose_strategy(self):
+        if _HOROVOD_AVAILABLE and ("OMPI_COMM_WORLD_RANK" in os.environ or "HOROVOD_RANK" in os.environ):
+            self._strategy_flag = HorovodStrategy()
+
+        if self._accelerator_flag == "ipu":
+            self._strategy_flag = IPUStrategy()
+        elif self._accelerator_flag == "tpu":
+            if self._parallel_devices and len(self._parallel_devices)>1:
+                self._strategy_flag = TPUSpawnStrategy()
+            else:
+                self._srategy_flag = SingleTPUStrategy()
+
+        # [RFC] in existing logic SingleDevice strategy choice diverge between cpu and gpu, should we merge?
+        elif self._accelerator_flag == "gpu":
+            if self._num_nodes_flag > 1:
+                self._strategy_flag = DDPStrategy()
+            elif len(self._parallel_devices) == 1:
+                self._strategy_flag = DDPStrategy()
+            elif len(self._parallel_devices) > 1:
+                self._strategy_flag = DDPSpawnStrategy()
+            else:
+                self._strategy_flag = DDPStrategy()
+        else:
+            if self._num_nodes_flag > 1:
+                self._strategy_flag = DDPStrategy()
+            elif len(self._parallel_devices) <= 1:
+                device = torch.device("cuda") if self._accelerator_flag == "gpu" else "cpu"
+                self._strategy_flag = SingleDeviceStrategy(device = device)
+            elif len(self._parallel_devices) > 1:
+                self._strategy_flag = DDPSpawnStrategy()
+            else:
+                self._strategy_flag = DDPStrategy()
+
+
+    def _check_capatibility_and_init_precision(self):
+        self._precision_misconfig_check()
+        if isinstance(self._precision_flag, PrecisionPlugin):
+            self.precision_plugin = self._precision_flag
+
+        if self._accelerator_flag =="ipu":
+            self.precision_plugin = IPUPrecisionPlugin(self._precision_flag)
+        if self._accelerator_flag == "tpu":
+            if self._precision_flag == 32:
+                self.precision_plugin = TPUPrecisionPlugin()
+            elif self._precision_flag in (16, "bf16"):
+                if self._precision_flag == 16:
+                    # this is not deprecated to ease transition between accelerator environments
+                    rank_zero_warn(
+                        f"You passed `Trainer(accelerator='tpu', precision=16)` but {self._amp_type_flag.value} AMP"
+                        f" is not supported with TPUs. Using `precision='bf16'` instead."
+                    )
+                self.precision_plugin = TPUBf16PrecisionPlugin()
+        if self._strategy_flag == "deepspeed" or isinstance(self._strategy_flag, DeepSpeedStrategy):
+            self.precision_plugin = DeepSpeedPrecisionPlugin(self._precision_flag, self._amp_type_flag, self._amp_level_flag)
+
+        if self._precision_flag == 32:
+            self.precision_plugin = PrecisionPlugin()
+        if self._precision_flag == 64:
+            self.precision_plugin = DoublePrecisionPlugin()
+
+        # maybe convert the precision value
+        if self._precision_flag == 16 and self._accelerator_flag == "cpu":
+            # this automatic switch is to ease transition between accelerator environments
+            rank_zero_warn(
+                "You passed `Trainer(accelerator='cpu', precision=16)` but native AMP is not supported on CPU."
+                " Using `precision='bf16'` instead."
+            )
+            self._precision_flag = "bf16"
+
+        if self._precision_flag in (16, "bf16"):
+            rank_zero_info(
+                f"Using 16bit {self._amp_type_flag.value} Automatic Mixed Precision (AMP)"
+                if self._precision_flag == 16
+                else "Using bfloat16 Automatic Mixed Precision (AMP)"
+            )
+
+            if self._amp_type_flag == AMPType.NATIVE:
+                device = "cpu" if self._accelerator_flag=="cpu" else "cuda"
+
+                # TODO in progress implement the two following shard types
+                # if self._is_sharded_training_type:
+                #     return ShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
+                # if self._is_fully_sharded_training_type:
+                #     return FullyShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
+                # return NativeMixedPrecisionPlugin(self._precision_flag, device)
+
+
+                self._amp_level_flag = self._amp_level_flag or "O2"
+                self.precision_plugin = ApexMixedPrecisionPlugin(self._amp_level_flag)
+        self.precision_plugin = PrecisionPlugin()
+
+    def _precision_misconfig_check(self):
+        if self._accelerator_flag == "ipu":
+            if self._precision_flag not in (16, 32):
+                raise MisconfigurationException(
+                    f"`Trainer(accelerator='ipu', precision={self._precision_flag!r})` is not supported."
+                )
+        if self._accelerator_flag == "tpu" and self._precision_flag == 64:
+                raise MisconfigurationException(
+                    "`Trainer(accelerator='tpu', precision=64)` is not implemented."
+                    " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`"
+                    " requesting this feature."
+                )
+        if self._precision_flag == 16 and self._accelerator_flag == "cpu" and self._amp_type_flag == AMPType.APEX:
+                # apex was explicitly passed, not a good idea to silently switch to native AMP
+                raise MisconfigurationException(
+                    "You passed `Trainer(accelerator='cpu', precision=16, amp_type='apex')`"
+                    " but apex AMP not supported on CPU."
+                )
+        if self._precision_flag == "bf16" and self._amp_type_flag != AMPType.NATIVE:
+            raise MisconfigurationException(
+                f"You passed `Trainer(amp_type={self._amp_type_flag.value!r}, precision='bf16')` but it's not supported."
+                " Try using `amp_type='native'` instead."
+            )
+
+        # if self._precision_flag in (16, "bf16") and self._amp_type_flag == AMPType.APEX:
+        #     if self._is_sharded_training_type or self._is_fully_sharded_training_type:
+        #         raise MisconfigurationException(
+        #             "Sharded plugins are not supported with apex, please switch to `amp_backend='native'`."
+        #         )
+
+
+    def _init_strategy(self):
+        if isinstance(self._strategy_flag, str):
+            self.strategy = StrategyRegistry.get(self._strategy_flag)
+        else:
+            self.strategy = self._strategy_flag
+        self.strategy.accelerator = self.accelerator
+        if self.precision_plugin:
+            self.strategy.precision_plugin = self.precision_plugin
+        if self.checkpoint_io:
+            self.strategy.checkpoint_io = self.checkpoint_io
+        self.strategy.cluster_environment = self.cluster_environment
+
+
+
+
+
+    ##############################################################################
+    # the following logic should be deprecated/removed
+    # Added here to keep backward compabilities
+
+    # @property
+    # def parallel_devices(self) -> List[Union[torch.device, int]]:
+    #     return self._parallel_device
+
+    # @property
+    # def replace_sampler_ddp():
+    #     return self.replace_sampler_ddp
+
+    # def _distrib_type():
+
+    # def _device_type():
+
+    # def num_nodes():
+
+    # def num_processes():
+
+    # def root_gpu():
+
+    def devices(self):
+        return len(self._parallel_devices)
+
+    # def parallel_device_ids():
+
+    # def gpus():
+
+    # def is_distributed():
+
+    def has_ipu(self):
+        return self._accelerator_flag == "ipu"
+
+    def has_tpu(self):
+        return self._accelerator_flag == "tpu"
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index b6a0d7fa452e0..bb3dde1e893a3 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -138,7 +138,7 @@ def __init__(
         gradient_clip_algorithm: Optional[str] = None,
         process_position: int = 0,
         num_nodes: int = 1,
-        num_processes: int = 1,
+        num_processes: int = None,
         devices: Optional[Union[List[int], str, int]] = None,
         gpus: Optional[Union[List[int], str, int]] = None,
         auto_select_gpus: bool = False,
@@ -435,23 +435,23 @@ def __init__(
         self._data_connector = DataConnector(self, multiple_trainloader_mode)
 
         self._accelerator_connector = AcceleratorConnector(
-            num_processes,
-            devices,
-            tpu_cores,
-            ipus,
-            accelerator,
-            strategy,
-            gpus,
-            gpu_ids,
-            num_nodes,
-            sync_batchnorm,
-            benchmark,
-            replace_sampler_ddp,
-            deterministic,
-            precision,
-            amp_backend,
-            amp_level,
-            plugins,
+            num_processes = num_processes,
+            devices = devices,
+            tpu_cores = tpu_cores,
+            ipus = ipus,
+            accelerator = accelerator,
+            strategy = strategy,
+            gpus = gpus,
+            gpu_ids = gpu_ids,
+            num_nodes = num_nodes,
+            sync_batchnorm = sync_batchnorm,
+            benchmark = benchmark,
+            replace_sampler_ddp = replace_sampler_ddp,
+            deterministic = deterministic,
+            precision = precision,
+            amp_type = amp_backend,
+            amp_level = amp_level,
+            plugins = plugins,
         )
         self.logger_connector = LoggerConnector(self, log_gpu_memory)
         self._callback_connector = CallbackConnector(self)
@@ -636,7 +636,7 @@ def _determine_data_use_amount(self, overfit_batches: float) -> None:
             self.limit_val_batches = 0
 
     def _setup_on_init(self, num_sanity_val_steps: int) -> None:
-        self._log_device_info()
+        # self._log_device_info()
 
         self.should_stop = False
         self.state = TrainerState()
@@ -1968,45 +1968,45 @@ def should_rank_save_checkpoint(self) -> bool:
             isinstance(strategy, pl.strategies.TPUSpawnStrategy) and strategy.local_rank == 0 or strategy.is_global_zero
         )
 
-    @property
-    def _strategy_type(self) -> _StrategyType:
-        return self._accelerator_connector._strategy_type
+    # @property
+    # def _strategy_type(self) -> _StrategyType:
+    #     return self._accelerator_connector._strategy_type
 
-    @property
-    def _device_type(self) -> _AcceleratorType:
-        return self._accelerator_connector._device_type
+    # @property
+    # def _device_type(self) -> _AcceleratorType:
+    #     return self._accelerator_connector._device_type
 
-    @property
-    def num_nodes(self) -> int:
-        return self._accelerator_connector.num_nodes
+    # @property
+    # def num_nodes(self) -> int:
+    #     return self._accelerator_connector.num_nodes
 
-    @property
-    def num_processes(self) -> int:
-        return self._accelerator_connector.num_processes
+    # @property
+    # def num_processes(self) -> int:
+    #     return self._accelerator_connector.num_processes
 
-    @property
-    def root_gpu(self) -> Optional[int]:
-        return self._accelerator_connector.root_gpu
+    # @property
+    # def root_gpu(self) -> Optional[int]:
+    #     return self._accelerator_connector.root_gpu
 
-    @property
-    def tpu_cores(self) -> int:
-        return self._accelerator_connector.tpu_cores
+    # @property
+    # def tpu_cores(self) -> int:
+    #     return self._accelerator_connector.tpu_cores
 
-    @property
-    def ipus(self) -> int:
-        return self._accelerator_connector.num_ipus
+    # @property
+    # def ipus(self) -> int:
+    #     return self._accelerator_connector.num_ipus
 
-    @property
-    def num_gpus(self) -> int:
-        return self._accelerator_connector.num_gpus
+    # @property
+    # def num_gpus(self) -> int:
+    #     return self._accelerator_connector.num_gpus
 
     @property
     def devices(self) -> Optional[Union[List[int], str, int]]:
         return self._accelerator_connector.devices
 
-    @property
-    def data_parallel_device_ids(self) -> Optional[List[int]]:
-        return self._accelerator_connector.parallel_device_ids
+    # @property
+    # def data_parallel_device_ids(self) -> Optional[List[int]]:
+    #     return self._accelerator_connector.parallel_device_ids
 
     @property
     def lightning_module(self) -> "pl.LightningModule":
diff --git a/pytorch_lightning/utilities/exceptions.py b/pytorch_lightning/utilities/exceptions.py
index ece4629819b33..24fbbac44d156 100644
--- a/pytorch_lightning/utilities/exceptions.py
+++ b/pytorch_lightning/utilities/exceptions.py
@@ -16,6 +16,12 @@
 class MisconfigurationException(Exception):
     """Exception used to inform users of misuse with PyTorch Lightning."""
 
+class DeviceNotAvailibleException(Exception):
+    """Exception used to inform users that requested devices are not availible."""
+
+class ImpactableConfigurationException(Exception):
+    """Exception used to inform users that configuration impactable with each other."""
+
 
 class DeadlockDetectedException(Exception):
     """Exception used when a deadlock has been detected and processes are being killed."""
diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py
index 6c20d90e01646..602c8b50c92e9 100644
--- a/pytorch_lightning/utilities/imports.py
+++ b/pytorch_lightning/utilities/imports.py
@@ -133,6 +133,8 @@ def _compare_version(package: str, op: Callable, version: str, use_base_version:
 else:
     _IPU_AVAILABLE = False
 
+_GPU_AVAILABLE = torch.cuda.is_available() and torch.cuda.device_count()>0
+
 
 # experimental feature within PyTorch Lightning.
 def _fault_tolerant_training() -> bool:

From 50a82d2f7c4b2960a15dec00b4bbf061e678fab0 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Mon, 24 Jan 2022 17:09:08 -0800
Subject: [PATCH 02/69] update

---
 pytorch_lightning/strategies/ddp2.py          |   2 +-
 pytorch_lightning/strategies/ddp_spawn.py     |   4 +
 pytorch_lightning/strategies/dp.py            |  13 +-
 pytorch_lightning/strategies/fully_sharded.py |   8 +-
 pytorch_lightning/strategies/horovod.py       |  12 +-
 pytorch_lightning/strategies/ipu.py           |  12 +-
 pytorch_lightning/strategies/parallel.py      |   8 +
 pytorch_lightning/strategies/sharded.py       |   7 +-
 pytorch_lightning/strategies/sharded_spawn.py |   7 +-
 pytorch_lightning/strategies/single_device.py |  13 +-
 pytorch_lightning/strategies/single_tpu.py    |  11 +-
 pytorch_lightning/strategies/strategy.py      |   7 +-
 pytorch_lightning/strategies/tpu_spawn.py     |   8 +
 .../connectors/accelerator_connector_new.py   | 231 +++++++++++++-----
 pytorch_lightning/trainer/trainer.py          |  54 ++--
 15 files changed, 297 insertions(+), 100 deletions(-)

diff --git a/pytorch_lightning/strategies/ddp2.py b/pytorch_lightning/strategies/ddp2.py
index 5e1a349bd910d..2633508e6bd82 100644
--- a/pytorch_lightning/strategies/ddp2.py
+++ b/pytorch_lightning/strategies/ddp2.py
@@ -80,5 +80,5 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.distributed_backend,
             cls,
-            description="Strategy",
+            description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/ddp_spawn.py b/pytorch_lightning/strategies/ddp_spawn.py
index 501fb018a0fca..2e73c64a1b207 100644
--- a/pytorch_lightning/strategies/ddp_spawn.py
+++ b/pytorch_lightning/strategies/ddp_spawn.py
@@ -87,6 +87,10 @@ def __init__(
     def num_nodes(self) -> int:
         return self._num_nodes
 
+    @property
+    def num_processes(self):
+        return len(self.parallel_devices) if self.parallel_devices is not None else 0
+
     @num_nodes.setter
     def num_nodes(self, num_nodes: int) -> None:
         # note that world ranks is related to num_nodes, when resetting it, need to reset world ranks
diff --git a/pytorch_lightning/strategies/dp.py b/pytorch_lightning/strategies/dp.py
index 0c9723c183a5e..bcac4f4f156d5 100644
--- a/pytorch_lightning/strategies/dp.py
+++ b/pytorch_lightning/strategies/dp.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, List, Optional
+from typing import Any, List, Optional, Dict
 
 import torch
 from torch.nn import DataParallel, Module
@@ -31,7 +31,7 @@ class DataParallelStrategy(ParallelStrategy):
     """Implements data-parallel training in a single process, i.e., the model gets replicated to each device and
     each gets a split of the data."""
 
-    distributed_backend = _StrategyType.DP
+    distributed_backend = "dp"
 
     def __init__(
         self,
@@ -149,6 +149,15 @@ def training_step_end(self, output):
 
         return output
 
+    @classmethod
+    def register_strategies(cls, strategy_registry: Dict) -> None:
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description=f"{cls.__class__.__name__} Strategy",
+        )
+
+
     def teardown(self) -> None:
         super().teardown()
         if self.root_device.type == "cuda":
diff --git a/pytorch_lightning/strategies/fully_sharded.py b/pytorch_lightning/strategies/fully_sharded.py
index 9a24197c6c33d..4a05abd0dd9d8 100644
--- a/pytorch_lightning/strategies/fully_sharded.py
+++ b/pytorch_lightning/strategies/fully_sharded.py
@@ -36,7 +36,7 @@
 
 class DDPFullyShardedStrategy(DDPStrategy):
 
-    distributed_backend = _StrategyType.DDP_FULLY_SHARDED
+    distributed_backend = "ddp_fully_sharded"
 
     def __init__(
         self,
@@ -212,3 +212,9 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             "fsdp", cls, description="Fully sharded training with checkpointing the full state dict."
         )
+
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description=f"{cls.__class__.__name__} Strategy",
+        )
diff --git a/pytorch_lightning/strategies/horovod.py b/pytorch_lightning/strategies/horovod.py
index a69850b60f9c0..90b091a9eee18 100644
--- a/pytorch_lightning/strategies/horovod.py
+++ b/pytorch_lightning/strategies/horovod.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from contextlib import ExitStack
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union, Dict
 
 import torch
 import torch.nn as nn
@@ -37,7 +37,7 @@
 class HorovodStrategy(ParallelStrategy):
     """Plugin for Horovod distributed training integration."""
 
-    distributed_backend = _StrategyType.HOROVOD
+    distributed_backend = "horovod"
 
     def __init__(
         self,
@@ -196,6 +196,14 @@ def _filter_named_parameters(model: nn.Module, optimizer: Optimizer) -> List[Tup
         opt_params = {p for group in optimizer.param_groups for p in group.get("params", [])}
         return [(name, p) for name, p in model.named_parameters() if p in opt_params]
 
+    @classmethod
+    def register_strategies(cls, strategy_registry: Dict) -> None:
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description=f"{cls.__class__.__name__} Strategy",
+        )
+
     def teardown(self) -> None:
         super().teardown()
         # teardown may be called before `_exit_stack` is set
diff --git a/pytorch_lightning/strategies/ipu.py b/pytorch_lightning/strategies/ipu.py
index 6b6433841d5ae..c13431d1ad8d8 100644
--- a/pytorch_lightning/strategies/ipu.py
+++ b/pytorch_lightning/strategies/ipu.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import json
 import os
-from typing import Any, Callable, List, Optional, Union
+from typing import Any, Callable, List, Optional, Union, Dict
 
 import torch
 from torch.utils.data import DataLoader
@@ -62,6 +62,8 @@ def _move_float_tensors_to_half(self, batch: Any) -> Any:
 class IPUStrategy(ParallelStrategy):
     """Plugin for training on IPU devices."""
 
+    distributed_backend = "ipu"
+
     def __init__(
         self,
         accelerator: Optional["pl.accelerators.accelerator.Accelerator"] = None,
@@ -360,3 +362,11 @@ def all_gather(self, tensor: torch.Tensor, group: Optional[Any] = None, sync_gra
 
     def broadcast(self, obj: object, src: int = 0) -> object:
         return obj
+
+    @classmethod
+    def register_strategies(cls, strategy_registry: Dict) -> None:
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description=f"{cls.__class__.__name__} Strategy",
+        )
diff --git a/pytorch_lightning/strategies/parallel.py b/pytorch_lightning/strategies/parallel.py
index 11207065b7e21..d8a8ab50abe2d 100644
--- a/pytorch_lightning/strategies/parallel.py
+++ b/pytorch_lightning/strategies/parallel.py
@@ -85,6 +85,14 @@ def distributed_sampler_kwargs(self):
         distributed_sampler_kwargs = dict(num_replicas=len(self.parallel_devices), rank=self.global_rank)
         return distributed_sampler_kwargs
 
+    @property
+    def parallel_devices(self):
+        return self._parallel_devices
+
+    @parallel_devices.setter
+    def parallel_devices(self, parallel_devices):
+        self._parallel_devices = parallel_devices
+
     def reconciliate_processes(self, trace: str):
         """Function to re-conciliate processes on failure."""
 
diff --git a/pytorch_lightning/strategies/sharded.py b/pytorch_lightning/strategies/sharded.py
index 2d1584a2e15e5..1f402126b6efe 100644
--- a/pytorch_lightning/strategies/sharded.py
+++ b/pytorch_lightning/strategies/sharded.py
@@ -37,7 +37,7 @@
 class DDPShardedStrategy(DDPStrategy):
     """Optimizer and gradient sharded training provided by FairScale."""
 
-    distributed_backend = _StrategyType.DDP_SHARDED
+    distributed_backend = "ddp_sharded"
     _REDUCE_BUFFER_SIZE_DEFAULT: int = 2 ** 23  # 8M
 
     def configure_ddp(self) -> None:
@@ -135,3 +135,8 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
             description="DDP Sharded Strategy with `find_unused_parameters` as False",
             find_unused_parameters=False,
         )
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description=f"{cls.__class__.__name__} Strategy",
+        )
diff --git a/pytorch_lightning/strategies/sharded_spawn.py b/pytorch_lightning/strategies/sharded_spawn.py
index 289e3491be0b4..1a7c6b6e00d1c 100644
--- a/pytorch_lightning/strategies/sharded_spawn.py
+++ b/pytorch_lightning/strategies/sharded_spawn.py
@@ -36,7 +36,7 @@
 class DDPSpawnShardedStrategy(DDPSpawnStrategy):
     """Optimizer sharded training provided by FairScale."""
 
-    distributed_backend = _StrategyType.DDP_SHARDED_SPAWN
+    distributed_backend = "ddp_sharded_spawn"
 
     def configure_ddp(self) -> None:
         self.model, self.optimizers = self._setup_model_and_optimizers(
@@ -118,3 +118,8 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
             description="DDP Spawn Sharded Strategy with `find_unused_parameters` as False",
             find_unused_parameters=False,
         )
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description=f"{cls.__class__.__name__} Strategy",
+        )
diff --git a/pytorch_lightning/strategies/single_device.py b/pytorch_lightning/strategies/single_device.py
index 440c73afce8fc..f866dfe204ade 100644
--- a/pytorch_lightning/strategies/single_device.py
+++ b/pytorch_lightning/strategies/single_device.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
-from typing import Any
+from typing import Any, Dict
 
 import torch
 
@@ -26,10 +26,11 @@
 
 class SingleDeviceStrategy(Strategy):
     """Strategy that handles communication on a single device."""
+    distributed_backend = "single_device"
 
     def __init__(
         self,
-        device: _DEVICE,
+        device: _DEVICE = "cpu",
         accelerator: pl.accelerators.accelerator.Accelerator | None = None,
         checkpoint_io: CheckpointIO | None = None,
         precision_plugin: PrecisionPlugin | None = None,
@@ -79,6 +80,14 @@ def barrier(self, *args, **kwargs) -> None:
     def broadcast(self, obj: object, src: int = 0) -> object:
         return obj
 
+    @classmethod
+    def register_strategies(cls, strategy_registry: Dict) -> None:
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description=f"{cls.__class__.__name__} Strategy",
+        )
+
     def teardown(self) -> None:
         super().teardown()
         if self.root_device.type == "cuda":
diff --git a/pytorch_lightning/strategies/single_tpu.py b/pytorch_lightning/strategies/single_tpu.py
index 8465656f034ab..3d471f2dabd24 100644
--- a/pytorch_lightning/strategies/single_tpu.py
+++ b/pytorch_lightning/strategies/single_tpu.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-from typing import Optional
+from typing import Optional, Dict
 
 import pytorch_lightning as pl
 from pytorch_lightning.plugins.io.xla_plugin import XLACheckpointIO
@@ -27,6 +27,7 @@
 
 class SingleTPUStrategy(SingleDeviceStrategy):
     """Strategy for training on a single TPU device."""
+    distributed_backend = "single_tpu"
 
     def __init__(
         self,
@@ -71,6 +72,14 @@ def setup(self, trainer: "pl.Trainer") -> None:
     def model_to_device(self) -> None:
         self.model.to(self.root_device)
 
+    @classmethod
+    def register_strategies(cls, strategy_registry: Dict) -> None:
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description=f"{cls.__class__.__name__} Strategy",
+        )
+
     def teardown(self) -> None:
         super().teardown()
         # TPU teardown
diff --git a/pytorch_lightning/strategies/strategy.py b/pytorch_lightning/strategies/strategy.py
index 629911911b780..4b339e0b0efb4 100644
--- a/pytorch_lightning/strategies/strategy.py
+++ b/pytorch_lightning/strategies/strategy.py
@@ -441,7 +441,12 @@ def teardown(self) -> None:
 
     @classmethod
     def register_strategies(cls, strategies_registry) -> None:
-        pass
+        if cls.distributed_backend:
+            strategy_registry.register(
+                cls.distributed_backend,
+                cls,
+                description=f"{cls.__class__.__name__} Strategy",
+            )
 
     def on_train_start(self) -> None:
         """Called when train begins."""
diff --git a/pytorch_lightning/strategies/tpu_spawn.py b/pytorch_lightning/strategies/tpu_spawn.py
index a6e82441da296..4bcf0d1ef31b6 100644
--- a/pytorch_lightning/strategies/tpu_spawn.py
+++ b/pytorch_lightning/strategies/tpu_spawn.py
@@ -52,6 +52,8 @@
 class TPUSpawnStrategy(DDPSpawnStrategy):
     """Strategy for training multiple TPU devices using the :func:`torch.multiprocessing.spawn` method."""
 
+    distributed_backend = "tpu_spawn"
+
     def __init__(
         self,
         accelerator: Optional["pl.accelerators.accelerator.Accelerator"] = None,
@@ -346,3 +348,9 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             "tpu_spawn_debug", cls, description="TPUSpawn Strategy with `debug` as True", debug=True
         )
+
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description=f"{cls.__class__.__name__} Strategy",
+        )
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector_new.py b/pytorch_lightning/trainer/connectors/accelerator_connector_new.py
index 186d175f33d64..8c69ef6b8ad5a 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector_new.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector_new.py
@@ -109,19 +109,19 @@ def __init__(
         gpu_ids,
     ):
         """
-            A. accelerator could be:
+            A. accelerator flag could be:
                 1. strategy class (deprecated in 1.5 will be removed in 1.7)
                 2. strategy str (deprecated in 1.5 will be removed in 1.7)
                 3. accelerator class
                 4. accelerator str
                 5. accelerator auto
 
-            B. strategy could be :
+            B. strategy flag could be :
                 1. strategy class
                 2. strategy str registered with strategyRegister
                 3. strategy str in _strategy_type enum which listed in each strategy as backend (registed these too, and _strategy_type could be deprecated)
 
-            C. plugins could be:
+            C. plugins flag could be:
                 1. List of str, which could contains:
                     i. strategy str
                     ii. precision str (Not supported in the old accelerator_connector version)
@@ -141,6 +141,7 @@ def __init__(
 
         """
 
+        # --Parsing_flags------------------------------------------------------
         # Get registered strategies, existing accelerators and precision plugins
         self._existing_strategies_str = StrategyRegistry.available_strategies()
         print(self._existing_strategies_str)
@@ -154,26 +155,40 @@ def __init__(
         self._config_check_and_set_final_flags(strategy, accelerator, precision, plugins, amp_type, amp_level)
         self._device_config_check_and_set_final_flags(devices=devices, num_nodes=num_nodes, num_processes=num_processes, gpus=gpus, ipus=ipus, tpu_cores=tpu_cores)
 
-        # handle auto and choose flag when user hasn't set it up.
+
+        # --Accelerator-------------------------------------------------------------
+        # handle `auto` and `None`
         if self._accelerator_flag == 'auto' or self._accelerator_flag is None:
             self._choose_accelerator()
-        else:
-            # [RFC] move to XAccelerator class init?
-            self._check_device_availibility()
-
-        # Accelerator initialization
-        # TODO devices logic handling still in process, not ready for reviews
+        # else:
+        #     # [RFC] move to XAccelerator class init?
+        #     self._check_device_availibility()
         self._set_parallel_devices_and_init_accelerator()
 
-        # handle strategy flag is not set, choose for user
+
+        # --Cluster_environment-----------------------------------------------------
+        self._choose_and_init_cluster_environment()
+
+
+        # --Strategy Part 1 : choose strategy ---------------------------------------
         if self._strategy_flag is None:
             self._choose_strategy()
+        # Reset strategy even user has specificed one
+        self._strategy_fallbacks()
 
-        self._choose_and_init_cluster_environment()
+
+        # --Precision----------------------------------------------------------------
         self._check_capatibility_and_init_precision()
+
+
+        # --Strategy Part 2 : init Strategy and set Strategy properties -------------
         self._init_strategy()
 
 
+        # set properties not used in accelerator_connector. TODO move out of this file
+        # self.gpus = gpus or devices
+        self.replace_sampler_ddp = replace_sampler_ddp
+
     def _config_check_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level):
         """
         This method checks:
@@ -207,6 +222,11 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                 f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
             )
             self._strategy_flag = accelerator
+        elif accelerator == "ddp_cpu":
+            rank_zero_warn(
+                    "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
+                )
+            self._strategy_flag = accelerator
 
 
         if precision:
@@ -243,6 +263,7 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                 # if self._precision_flag:
                 #     raise MisconfigurationException("precision set through both strategy class and flags, choose one place to set")
                 # else:
+                print("here")
                 self._precision_flag = self._strategy_flag.precision_plugin
             if self._strategy_flag.checkpoint_io:
                 if self.checkpoint_io:
@@ -273,9 +294,11 @@ def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_proce
         else :
             self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
 
+        self._device_flag = devices
         ##### to be deleted v1.7
-        deprecated_devices_specific_nums = num_processes or gpus or ipus or tpu_cores
-        self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(devices, deprecated_devices_specific_nums, num_processes, gpus, ipus, tpu_cores)
+        deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
+        if deprecated_devices_specific_flag:
+            self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores)
         ##### deleted end
         if devices == "auto":
             if self._accelerator_flag is None:
@@ -283,19 +306,16 @@ def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_proce
                     f"You passed `devices={devices}` but haven't specified"
                     " `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping"
                 )
-        if not self._device_flag:
-            self._device_flag = devices
-
 
 
-    def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(self, devices, deprecated_devices_specific_nums, num_processes, gpus, ipus, tpu_cores):
+    def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(self, devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores):
         ##### to be deleted v1.7vbg
         # set devices base on num_processes, gpus, ipus, tpu_cores
         if devices:
-            rank_zero_warn(f"will be ignored, instand the device specific number {deprecated_devices_specific_nums} will be used")
+            rank_zero_warn(f"will be ignored, instand the device specific number {deprecated_devices_specific_flag} will be used")
         if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(True) > 1:
             rank_zero_warn(f"more than one device specifc flag has been set")
-        self._device_flag = deprecated_devices_specific_nums
+        self._device_flag = deprecated_devices_specific_flag
 
         if not self._accelerator_flag:
         # set accelerator type base on num_processes, gpus, ipus, tpu_cores
@@ -319,6 +339,8 @@ def _choose_accelerator(self):
                 self._accelerator_flag = "gpu"
             else:
                 self._accelerator_flag = "cpu"
+                if self._device_flag == "auto":
+                    self._device_flag = 1
         # [RFC] this is current logic, if accelerator not set, default cpu?
         else:
             self._accelerator_flag = "cpu"
@@ -335,7 +357,7 @@ def _set_parallel_devices_and_init_accelerator(self):
         self._parallel_devices = []
 
         if isinstance(self._accelerator_flag, Accelerator):
-            self.accelerator = self._accelerator_flag()
+            self.accelerator = self._accelerator_flag
         elif self._accelerator_flag == "tpu":
             self.accelerator = TPUAccelerator()
             if self._device_flag == "auto" or not self._device_flag:
@@ -354,8 +376,12 @@ def _set_parallel_devices_and_init_accelerator(self):
             self.accelerator = GPUAccelerator()
             if self._device_flag == "auto" or not self._device_flag:
                 self._device_flag =  GPUAccelerator.auto_device_count()
-            if isinstance(self._device_flag, int):
+            if isinstance(self._device_flag, int) or isinstance(self._device_flag, str):
+                self._device_flag = int(self._device_flag)
                 self._parallel_devices = [torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)]
+            elif isinstance(self._device_flag, list):
+                self._parallel_devices = [torch.device("cuda", i) for i in self._device_flag]
+
 
         elif self._accelerator_flag == "cpu":
             self.accelerator = CPUAccelerator()
@@ -364,6 +390,8 @@ def _set_parallel_devices_and_init_accelerator(self):
             if isinstance(self._device_flag, int):
                 self._parallel_devices = [torch.device("cpu")] * self._device_flag
 
+        self._gpus = self._device_flag
+
 
     def _choose_and_init_cluster_environment(self):
         self.cluster_environment = LightningEnvironment()
@@ -383,8 +411,10 @@ def _is_slurm_managing_tasks(self):
             used by choosing cluster enviroment
         """
         if (
-            (not self._strategy_flag=="ddp" and not self._strategy_flag=="ddp2")
-            or not SLURMEnvironment.detect()
+            #(not self._strategy_flag=="ddp" and not self._strategy_flag=="ddp2")
+            # the above logic moved to _select_strategy(), only check _is_slurm_managing_tasks()
+            # when strategy flag is ddp or ddp2
+            not SLURMEnvironment.detect()
             or SLURMEnvironment.job_name() == "bash"  # in interactive mode we don't manage tasks
         ):
             return False
@@ -398,39 +428,70 @@ def _choose_strategy(self):
             self._strategy_flag = HorovodStrategy()
 
         if self._accelerator_flag == "ipu":
-            self._strategy_flag = IPUStrategy()
+            self._strategy_flag = "ipu"
         elif self._accelerator_flag == "tpu":
             if self._parallel_devices and len(self._parallel_devices)>1:
-                self._strategy_flag = TPUSpawnStrategy()
+                self._strategy_flag = "tpu_spawn"
             else:
                 self._srategy_flag = SingleTPUStrategy()
 
         # [RFC] in existing logic SingleDevice strategy choice diverge between cpu and gpu, should we merge?
-        elif self._accelerator_flag == "gpu":
-            if self._num_nodes_flag > 1:
-                self._strategy_flag = DDPStrategy()
-            elif len(self._parallel_devices) == 1:
-                self._strategy_flag = DDPStrategy()
-            elif len(self._parallel_devices) > 1:
-                self._strategy_flag = DDPSpawnStrategy()
-            else:
-                self._strategy_flag = DDPStrategy()
+        # elif self._accelerator_flag == "gpu":
+        #     if self._num_nodes_flag > 1:
+        #         self._strategy_flag = "ddp"
+        #     elif TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks():
+        #         self._strategy_flag = "ddp"
+        #     elif len(self._parallel_devices) == 1:
+        #         self._strategy_flag = "ddp"
+        #     elif len(self._parallel_devices) > 1:
+        #         self._strategy_flag = "ddp_spawn"
+        #     else:
+        #         self._strategy_flag = "ddp"
         else:
             if self._num_nodes_flag > 1:
-                self._strategy_flag = DDPStrategy()
+                self._strategy_flag = "ddp"
+            elif TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks():
+                self._strategy_flag = "ddp"
             elif len(self._parallel_devices) <= 1:
                 device = torch.device("cuda") if self._accelerator_flag == "gpu" else "cpu"
                 self._strategy_flag = SingleDeviceStrategy(device = device)
             elif len(self._parallel_devices) > 1:
-                self._strategy_flag = DDPSpawnStrategy()
+                self._strategy_flag = "ddp_spawn"
             else:
-                self._strategy_flag = DDPStrategy()
+                self._strategy_flag = "ddp"
+
+    def _strategy_fallbacks(self):
+        _strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag
+        if _strategy_flag == "ddp_cpu":
+            if _TPU_AVAILABLE:
+                raise MisconfigurationException(
+                    "`accelerator='ddp_cpu'` is not supported on TPU machines. "
+                    "Learn more: https://github.com/PyTorchLightning/pytorch-lightning/issues/7810"
+                )
+            if self._device_flag ==1 and self._num_nodes_flag > 1:
+                _strategy_flag = "ddp"
+            else:
+                _strategy_flag = "ddp_spawn"
+            if self._accelerator_flag == "gpu":
+                rank_zero_warn(
+                    "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
+                )
+            # if self._accelerator_flag == "cpu":
+            #     self._parallel_devices = os.cpu_count()
+
+        if "ddp_spawn" in _strategy_flag and (TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks()):
+            _strategy_flag = "ddp"
+
+        if _strategy_flag:
+            self._strategy_flag = _strategy_flag
 
 
     def _check_capatibility_and_init_precision(self):
+        print(self._precision_flag)
         self._precision_misconfig_check()
         if isinstance(self._precision_flag, PrecisionPlugin):
             self.precision_plugin = self._precision_flag
+            return
 
         if self._accelerator_flag =="ipu":
             self.precision_plugin = IPUPrecisionPlugin(self._precision_flag)
@@ -485,6 +546,7 @@ def _check_capatibility_and_init_precision(self):
         self.precision_plugin = PrecisionPlugin()
 
     def _precision_misconfig_check(self):
+
         if self._accelerator_flag == "ipu":
             if self._precision_flag not in (16, 32):
                 raise MisconfigurationException(
@@ -516,6 +578,7 @@ def _precision_misconfig_check(self):
 
 
     def _init_strategy(self):
+        print(self._strategy_flag)
         if isinstance(self._strategy_flag, str):
             self.strategy = StrategyRegistry.get(self._strategy_flag)
         else:
@@ -526,44 +589,92 @@ def _init_strategy(self):
         if self.checkpoint_io:
             self.strategy.checkpoint_io = self.checkpoint_io
         self.strategy.cluster_environment = self.cluster_environment
+        if hasattr(self.strategy, "parallel_devices"):
+            self.strategy.parallel_devices = self._parallel_devices
 
 
 
 
 
     ##############################################################################
-    # the following logic should be deprecated/removed
+    # the following logic should be deprecated/removed, and these information should be
+    # retrive from strategies and accelerators
     # Added here to keep backward compabilities
 
-    # @property
-    # def parallel_devices(self) -> List[Union[torch.device, int]]:
-    #     return self._parallel_device
-
-    # @property
-    # def replace_sampler_ddp():
-    #     return self.replace_sampler_ddp
+    @property
+    def parallel_devices(self) -> List[Union[torch.device, int]]:
+        return self._parallel_devices
 
     # def _distrib_type():
-
-    # def _device_type():
-
-    # def num_nodes():
-
-    # def num_processes():
-
-    # def root_gpu():
-
+    @property
+    def device_type(self):
+        if isinstance(self.accelerator, CPUAccelerator):
+            return "cpu"
+        if isinstance(self.accelerator, GPUAccelerator):
+            return "gpu"
+        if isinstance(self.accelerator, TPUAccelerator):
+            return "tpu"
+        if isinstance(self.accelerator, IPUAccelerator):
+            return "ipu"
+
+    @property
+    def num_nodes(self):
+        return self._num_nodes
+
+    @property
+    def num_processes(self):
+        return self.devices
+
+    @property
+    def root_gpu(self) -> Optional[int]:
+        return (
+            self.strategy.root_device.index
+            if not isinstance(self.accelerator, (IPUAccelerator, TPUAccelerator))
+            else None
+        )
+
+    @property
     def devices(self):
         return len(self._parallel_devices)
 
-    # def parallel_device_ids():
+    @property
+    def tpu_cores(self) -> int:
+        return self.devices
+
+    @property
+    def ipus(self) -> int:
+        return self.devices
 
-    # def gpus():
+    @property
+    def num_gpus(self) -> int:
+        return self.devices
 
-    # def is_distributed():
+    # def parallel_device_ids():
+    @property
+    def gpus(self):
+        return self._gpus if isinstance(self.accelerator, GPUAccelerator) else None
+
+
+    def is_distributed(self):
+        # Used for custom plugins.
+        # Custom plugins should implement is_distributed property.
+        if hasattr(self.strategy, "is_distributed") and not isinstance(self.accelerator, TPUAccelerator):
+            return self.strategy.is_distributed
+        distributed_strategy = (DDP2Strategy, DDPStrategy, DDPSpawnShardedStrategy, DDPShardedStrategy, DDPFullyShardedStrategy, DDPSpawnStrategy, DeepSpeedStrategy, TPUSpawnStrategy, HorovodStrategy)
+        is_distributed = isinstance(self.strategy, distributed_strategy)
+        if isinstance(self.accelerator, TPUAccelerator):
+            is_distributed |= self.strategy.is_distributed
+        return is_distributed
 
     def has_ipu(self):
-        return self._accelerator_flag == "ipu"
+        return isinstance(self.accelerator, IPUAccelerator)
 
     def has_tpu(self):
-        return self._accelerator_flag == "tpu"
+        return isinstance(self.accelerator, TPUAccelerator)
+
+    def use_dp(self):
+        return isinstance(self.strategy, DataParallelStrategy)
+
+    @property
+    def _strategy_type(self) -> _StrategyType:
+        return self.strategy.distributed_backend
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index bb3dde1e893a3..93fd6187be1ea 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1968,45 +1968,45 @@ def should_rank_save_checkpoint(self) -> bool:
             isinstance(strategy, pl.strategies.TPUSpawnStrategy) and strategy.local_rank == 0 or strategy.is_global_zero
         )
 
-    # @property
-    # def _strategy_type(self) -> _StrategyType:
-    #     return self._accelerator_connector._strategy_type
+    @property
+    def _strategy_type(self) -> _StrategyType:
+        return self.strategy.distributed_backend
 
-    # @property
-    # def _device_type(self) -> _AcceleratorType:
-    #     return self._accelerator_connector._device_type
+    @property
+    def _device_type(self) -> _AcceleratorType:
+        return self._accelerator_connector.device_type
 
-    # @property
-    # def num_nodes(self) -> int:
-    #     return self._accelerator_connector.num_nodes
+    @property
+    def num_nodes(self) -> int:
+        return self._accelerator_connector.num_nodes
 
-    # @property
-    # def num_processes(self) -> int:
-    #     return self._accelerator_connector.num_processes
+    @property
+    def num_processes(self) -> int:
+        return self._accelerator_connector.num_processes
 
-    # @property
-    # def root_gpu(self) -> Optional[int]:
-    #     return self._accelerator_connector.root_gpu
+    @property
+    def root_gpu(self) -> Optional[int]:
+        return self._accelerator_connector.root_gpu
 
-    # @property
-    # def tpu_cores(self) -> int:
-    #     return self._accelerator_connector.tpu_cores
+    @property
+    def tpu_cores(self) -> int:
+        return self._accelerator_connector.tpu_cores
 
-    # @property
-    # def ipus(self) -> int:
-    #     return self._accelerator_connector.num_ipus
+    @property
+    def ipus(self) -> int:
+        return self._accelerator_connector.num_ipus
 
-    # @property
-    # def num_gpus(self) -> int:
-    #     return self._accelerator_connector.num_gpus
+    @property
+    def num_gpus(self) -> int:
+        return self._accelerator_connector.num_gpus
 
     @property
     def devices(self) -> Optional[Union[List[int], str, int]]:
         return self._accelerator_connector.devices
 
-    # @property
-    # def data_parallel_device_ids(self) -> Optional[List[int]]:
-    #     return self._accelerator_connector.parallel_device_ids
+    @property
+    def data_parallel_device_ids(self) -> Optional[List[int]]:
+        return self._accelerator_connector.parallel_devices
 
     @property
     def lightning_module(self) -> "pl.LightningModule":

From 7307969152cbf83f2fa58f816e1ee6d9d23e7563 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Mon, 24 Jan 2022 22:20:25 -0800
Subject: [PATCH 03/69] update

---
 .../connectors/accelerator_connector.py       | 1327 +++++++----------
 .../connectors/accelerator_connector_new.py   |  680 ---------
 .../test_accelerator_connector.py             |   14 +-
 3 files changed, 520 insertions(+), 1501 deletions(-)
 delete mode 100644 pytorch_lightning/trainer/connectors/accelerator_connector_new.py

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index fd65975618f02..72c9a78f06602 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -66,10 +66,11 @@
 )
 from pytorch_lightning.utilities import _AcceleratorType, _StrategyType, AMPType, device_parser
 from pytorch_lightning.utilities.enums import PrecisionType
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.exceptions import MisconfigurationException, DeviceNotAvailibleException, ImpactableConfigurationException
 from pytorch_lightning.utilities.imports import (
     _HOROVOD_AVAILABLE,
     _IPU_AVAILABLE,
+    _GPU_AVAILABLE,
     _TORCH_GREATER_EQUAL_1_8,
     _TPU_AVAILABLE,
 )
@@ -84,421 +85,326 @@
 class AcceleratorConnector:
     def __init__(
         self,
-        num_processes,
         devices,
-        tpu_cores,
-        ipus,
-        accelerator,
-        strategy: Optional[Union[str, Strategy]],
-        gpus,
-        gpu_ids,
         num_nodes,
+        accelerator, # reduce typing
+        strategy: Optional[Union[str, Strategy]],
+        plugins,
+        precision,
+        amp_type,
+        amp_level,
         sync_batchnorm,
         benchmark,
         replace_sampler_ddp,
         deterministic: bool,
-        precision,
-        amp_type,
-        amp_level,
-        plugins,
+        num_processes, # deprecated
+        tpu_cores, # deprecated
+        ipus, # deprecated
+        gpus, # deprecated
+        gpu_ids,
     ):
-        # initialization
-        self._device_type = _AcceleratorType.CPU
-        self._strategy_type = None
-        self._accelerator_type = None
-
-        self._strategy_flag = strategy.lower() if isinstance(strategy, str) else strategy
-        # TODO: Rename this to something else once all the distributed flags are moved to strategy
-        self.distributed_backend = accelerator
-
-        self._init_deterministic(deterministic)
-
-        self.num_processes = num_processes
-        self.devices = devices
-        # `gpus` is the input passed to the Trainer, whereas `gpu_ids` is a list of parsed gpu ids.
-        self.gpus = gpus
-        self.parallel_device_ids = gpu_ids
-        self.tpu_cores = tpu_cores
-        self.ipus = ipus
-        self.num_nodes = num_nodes
-        self.sync_batchnorm = sync_batchnorm
-        self.benchmark = benchmark
-        self.replace_sampler_ddp = replace_sampler_ddp
-        if not PrecisionType.supported_type(precision):
-            raise MisconfigurationException(
-                f"Precision {repr(precision)} is invalid. Allowed precision values: {PrecisionType.supported_types()}"
-            )
-        self.precision = precision
-        self.amp_type = amp_type.lower() if isinstance(amp_type, str) else None
-        self.amp_level = amp_level
-
-        self._precision_plugin: Optional[PrecisionPlugin] = None
-        self._strategy: Optional[Strategy] = None
-        self._cluster_environment: Optional[ClusterEnvironment] = None
-        self._checkpoint_io: Optional[CheckpointIO] = None
-
-        plugins = plugins if plugins is not None else []
+        """
+            A. accelerator flag could be:
+                1. strategy class (deprecated in 1.5 will be removed in 1.7)
+                2. strategy str (deprecated in 1.5 will be removed in 1.7)
+                3. accelerator class
+                4. accelerator str
+                5. accelerator auto
+
+            B. strategy flag could be :
+                1. strategy class
+                2. strategy str registered with strategyRegister
+                3. strategy str in _strategy_type enum which listed in each strategy as backend (registed these too, and _strategy_type could be deprecated)
+
+            C. plugins flag could be:
+                1. List of str, which could contains:
+                    i. strategy str
+                    ii. precision str (Not supported in the old accelerator_connector version)
+                    iii. checkpoint_io str (Not supported in the old accelerator_connector version)
+                    iv. cluster_environment str (Not supported in the old accelerator_connector version)
+                2. List of class, which could contains:
+                    i. strategy class (deprecated in 1.5 will be removed in 1.7)
+                    ii. precision class (should be removed, and precision flag should allow user pass classes)
+                    iii. checkpoint_io class
+                    iv. cluster_environment class
+
+
+        priorities which to take when:
+            A. Class > str
+            B. Strategy > Accelerator/precision/plugins
+            C. When multiple flag set to the same thing? (ignore? not handled for now)
 
-        if isinstance(plugins, str):
-            plugins = [plugins]
+        """
 
-        if not isinstance(plugins, Sequence):
-            plugins = [plugins]
+        # --Parsing_flags------------------------------------------------------
+        # Get registered strategies, existing accelerators and precision plugins
+        self._existing_strategies_str = StrategyRegistry.available_strategies()
+        print(self._existing_strategies_str)
+        self._existing_accelerator_type = ["tpu", "ipu", "gpu", "cpu"]
+        self._supported_precision = PrecisionType.supported_types()
 
-        self.plugins = plugins
+        # raise misconfig exceptions if their is conflict between flags
+        # set the valid flag to self._x_flag after validation
+        # for example: if accelerator is strategy class, set self._strategy_flag = accelerator
+        # for devices: assign gpus ipus and etcs to accelerator_flag and devices_flag
+        self._config_check_and_set_final_flags(strategy, accelerator, precision, plugins, amp_type, amp_level)
+        self._device_config_check_and_set_final_flags(devices=devices, num_nodes=num_nodes, num_processes=num_processes, gpus=gpus, ipus=ipus, tpu_cores=tpu_cores)
 
-        self._handle_accelerator_and_strategy()
 
-        self._validate_accelerator_and_devices()
+        # --Accelerator-------------------------------------------------------------
+        # handle `auto` and `None`
+        if self._accelerator_flag == 'auto' or self._accelerator_flag is None:
+            self._choose_accelerator()
+        # else:
+        #     # [RFC] move to XAccelerator class init?
+        #     self._check_device_availibility()
+        self._set_parallel_devices_and_init_accelerator()
 
-        self._warn_if_devices_flag_ignored()
 
-        self.select_accelerator_type()
+        # --Cluster_environment-----------------------------------------------------
+        self._choose_and_init_cluster_environment()
 
-        if self._strategy_flag is not None:
-            self._set_strategy()
-        else:
-            self.set_distributed_mode()
 
-        self.handle_given_plugins()
-        self._set_strategy_type_if_strategy_passed()
+        # --Strategy Part 1 : choose strategy ---------------------------------------
+        if self._strategy_flag is None:
+            self._choose_strategy()
+        # Reset strategy even user has specificed one
+        self._strategy_fallbacks()
+        self._init_strategy()
 
-        self._cluster_environment = self.select_cluster_environment()
+        # --Precision----------------------------------------------------------------
+        self._check_capatibility_and_init_precision()
 
-        self.update_device_type_if_ipu_plugin()
-        self.update_device_type_if_strategy_passed()
 
-        self._validate_accelerator_type()
-        self._set_devices_if_none()
+        # --Strategy Part 2 : init Strategy and set Strategy properties -------------
+        self._lazy_init_strategy()
 
-        self.strategy = self.final_strategy()
-        self.accelerator = self.strategy.accelerator
-        self._check_plugin_compatibility()
 
-        # benchmarking
-        # TODO: should this be moved to GPU accelerator?
-        torch.backends.cudnn.benchmark = self.benchmark
 
+        # set properties not used in accelerator_connector. TODO move out of this file
+        # self.gpus = gpus or devices
         self.replace_sampler_ddp = replace_sampler_ddp
 
-    def _init_deterministic(self, deterministic: bool) -> None:
-        self.deterministic = deterministic
-        if _TORCH_GREATER_EQUAL_1_8:
-            torch.use_deterministic_algorithms(deterministic)
-        else:
-            torch.set_deterministic(deterministic)
-        if deterministic:
-            # fixing non-deterministic part of horovod
-            # https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383
-            os.environ["HOROVOD_FUSION_THRESHOLD"] = str(0)
-            # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
-            os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
-
-    def select_accelerator_type(self) -> None:
-        if self.distributed_backend == "auto":
-            if self.has_tpu:
-                self._accelerator_type = _AcceleratorType.TPU
-            elif self.has_ipu:
-                self._accelerator_type = _AcceleratorType.IPU
-            elif self.has_gpu:
-                self._accelerator_type = _AcceleratorType.GPU
-            else:
-                self._set_devices_to_cpu_num_processes()
-                self._accelerator_type = _AcceleratorType.CPU
-        elif self.distributed_backend == _AcceleratorType.TPU:
-            if not self.has_tpu:
-                msg = "TPUs are not available" if not _TPU_AVAILABLE else "you didn't pass `tpu_cores` to `Trainer`"
-                raise MisconfigurationException(f"You passed `accelerator='tpu'`, but {msg}.")
-            self._accelerator_type = _AcceleratorType.TPU
-        elif self.distributed_backend == _AcceleratorType.IPU:
-            if not self.has_ipu:
-                msg = "IPUs are not available" if not _IPU_AVAILABLE else "you didn't pass `ipus` to `Trainer`"
-                raise MisconfigurationException(f"You passed `accelerator='ipu'`, but {msg}.")
-            self._accelerator_type = _AcceleratorType.IPU
-        elif self.distributed_backend == _AcceleratorType.GPU:
-            if not self.has_gpu:
-                msg = "you didn't pass `gpus` to `Trainer`" if torch.cuda.is_available() else "GPUs are not available"
-                raise MisconfigurationException(f"You passed `accelerator='gpu'`, but {msg}.")
-            self._accelerator_type = _AcceleratorType.GPU
-        elif self.distributed_backend == _AcceleratorType.CPU:
-            self._set_devices_to_cpu_num_processes()
-            self._accelerator_type = _AcceleratorType.CPU
-
-        if self.distributed_backend in self.accelerator_types:
-            self.distributed_backend = None
-
-    def _validate_accelerator_and_devices(self) -> None:
-        if self.distributed_backend not in self.accelerator_types and self.devices is not None:
-            raise MisconfigurationException(
-                f"You passed `devices={self.devices}` but haven't specified"
-                " `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping,"
-                f" got `accelerator={self.distributed_backend!r}`."
-            )
-
-    def _validate_accelerator_type(self) -> None:
-        if self._accelerator_type and self._accelerator_type != self._device_type:
-            # internal error: should not happen.
-            raise ValueError(
-                f"Mismatch between the requested accelerator type ({self._accelerator_type})"
-                f" and assigned device type ({self._device_type})."
-            )
-        self._accelerator_type = self._device_type
-
-    def _warn_if_devices_flag_ignored(self) -> None:
-        if self.devices is None:
-            return
-        devices_warning = f"The flag `devices={self.devices}` will be ignored, as you have set"
-        if self.distributed_backend in ("auto", _AcceleratorType.TPU):
-            if self.tpu_cores is not None:
-                rank_zero_warn(f"{devices_warning} `tpu_cores={self.tpu_cores}`")
-        elif self.distributed_backend in ("auto", _AcceleratorType.IPU):
-            if self.ipus is not None:
-                rank_zero_warn(f"{devices_warning} `ipus={self.ipus}`")
-        elif self.distributed_backend in ("auto", _AcceleratorType.GPU):
-            if self.gpus is not None:
-                rank_zero_warn(f"{devices_warning} `gpus={self.gpus}`")
-        elif self.distributed_backend in ("auto", _AcceleratorType.CPU):
-            if self.num_processes != 1:
-                rank_zero_warn(f"{devices_warning} `num_processes={self.num_processes}`")
-
-    def _set_devices_if_none(self) -> None:
-        if self.devices is not None:
-            return
-        if self._accelerator_type == _AcceleratorType.TPU:
-            self.devices = self.tpu_cores
-        elif self._accelerator_type == _AcceleratorType.IPU:
-            self.devices = self.ipus
-        elif self._accelerator_type == _AcceleratorType.GPU:
-            self.devices = self.gpus
-        elif self._accelerator_type == _AcceleratorType.CPU:
-            self.devices = self.num_processes
-
-    def _handle_accelerator_and_strategy(self) -> None:
-        deprecated_types = [t for t in _StrategyType if t not in (_StrategyType.TPU_SPAWN, _StrategyType.DDP_CPU)]
-        if self.distributed_backend is not None and self.distributed_backend in deprecated_types:
-            rank_zero_deprecation(
-                f"Passing `Trainer(accelerator={self.distributed_backend!r})` has been deprecated"
-                f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={self.distributed_backend!r})` instead."
-            )
-            if self._strategy_flag is not None:
+    def _config_check_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level):
+        """
+        This method checks:
+            1. strategy flag: strategy, accelerator and plugin can all set strategies
+            2. accelerator: if accelerator flag is Accelerator related flag or class, set self._acceelrator_flag;
+                If accelerator is strategy related, logic handled in 1 above
+            3. precision could be set by precision and plugins flag
+            4. plugins could be duplicated in strategy (handled by 1), precision (handled by 3), set checkpoint_io and cluster_environment
+        """
+        self._strategy_flag, self._accelerator_flag, self._precision_flag, self._cluster_environment, self.checkpoint_io, self._amp_level_flag, self._amp_type_flag = None, None, None, None, None, amp_type, amp_level
+        if strategy:
+            self._strategy_flag = strategy
+            if strategy == "ddp_cpu":
                 raise MisconfigurationException(
-                    f"You have passed `Trainer(strategy={self._strategy_flag!r})` but have"
-                    f" also passed `Trainer(accelerator={self.distributed_backend!r})`."
-                    f" HINT: Use just `Trainer(strategy={self._strategy_flag!r})` instead."
+                    "`Trainer(strategy='ddp_cpu')` is not a valid strategy,"
+                    " you can use `Trainer(strategy='ddp'|'ddp_spawn', accelerator='cpu')` instead."
                 )
-        if self._strategy_flag == _StrategyType.TPU_SPAWN:
-            raise MisconfigurationException(
+            if strategy == "tpu_spawn":
+                raise MisconfigurationException(
                 "`Trainer(strategy='tpu_spawn')` is not a valid strategy,"
                 " you can use `Trainer(strategy='ddp_spawn', accelerator='tpu')` instead."
             )
-        if self._strategy_flag == _StrategyType.DDP_CPU:
-            raise MisconfigurationException(
-                "`Trainer(strategy='ddp_cpu')` is not a valid strategy,"
-                " you can use `Trainer(strategy='ddp'|'ddp_spawn', accelerator='cpu')` instead."
+            # handle duplications and conflict
+            if isinstance(accelerator, Strategy) and strategy != accelerator:
+                raise MisconfigurationException("strategy already set through strategy flag, duplicated in accelerator")
+            if isinstance(accelerator, str) and accelerator in self._existing_strategies_str and strategy != accelerator:
+                raise MisconfigurationException("strategy str already set through strategy flag, duplicated in accelerator")
+            if plugins:
+                for plugin in plugins:
+                    if isinstance(plugin, Strategy) and strategy != plugin:
+                        raise MisconfigurationException("strategy already set through strategy flag, duplicated in plugins")
+                    if isinstance(plugin, str) and plugin in self._existing_strategies_str:
+                        raise MisconfigurationException("strategy already set through strategy flag, duplicated in plugins")
+
+
+        if accelerator in self._existing_accelerator_type or accelerator=="auto" or isinstance(accelerator, Accelerator):
+            self._accelerator_flag = accelerator
+        elif accelerator in self._existing_strategies_str or isinstance(accelerator, Strategy):
+            rank_zero_deprecation(
+                f"Passing `Trainer(accelerator={accelerator!r})` has been deprecated"
+                f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
             )
-
-    def _set_strategy(self) -> None:
-        if isinstance(self._strategy_flag, str) and self._strategy_flag in StrategyRegistry:
-            self._strategy = StrategyRegistry.get(self._strategy_flag)
-        if isinstance(self._strategy_flag, str):
-            self.set_distributed_mode(self._strategy_flag)
-        elif isinstance(self._strategy_flag, Strategy):
-            self._strategy = self._strategy_flag
-
-    def handle_given_plugins(self) -> None:
-
-        for plug in self.plugins:
-            if self._strategy_flag is not None and self._is_plugin_training_type(plug):
-                raise MisconfigurationException(
-                    f"You have passed `Trainer(strategy={self._strategy_flag!r})`"
-                    f" and you can only specify one training type plugin, but you have passed {plug} as a plugin."
-                )
-            if self._is_plugin_training_type(plug):
-                rank_zero_deprecation(
-                    f"Passing {plug} `strategy` to the `plugins` flag in Trainer has been deprecated"
-                    f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={plug})` instead."
+            self._strategy_flag = accelerator
+        elif accelerator == "ddp_cpu":
+            rank_zero_warn(
+                    "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
                 )
-
-        strategy = self._strategy or None
-        checkpoint = None
-        precision = None
-        cluster_environment = None
-
-        for plug in self.plugins:
-            if isinstance(plug, str) and plug in StrategyRegistry:
-                if strategy is None:
-                    strategy = StrategyRegistry.get(plug)
+            self._strategy_flag = accelerator
+
+        if precision:
+            self._precision_flag = precision
+            # handle duplications and conflict
+            if plugins:
+                for plugin in plugins:
+                    if isinstance(plugin, PrecisionPlugin):
+                        raise MisconfigurationException("precision set in both precision flag and plugin flag")
+
+        if plugins:
+            plugins = [plugins] if not isinstance(plugins, list) else plugins
+            for plugin in plugins:
+                print(plugin)
+                if isinstance(plugin, Strategy) or isinstance(plugin, str) and plugin in self._existing_strategies_str:
+                    self._strategy_flag = plugin
+                    rank_zero_deprecation(
+                        f"Passing {plugin} `strategy` to the `plugins` flag in Trainer has been deprecated"
+                        f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={plugin})` instead."
+                        )
+
+                elif isinstance(plugin, PrecisionPlugin) or isinstance(plugin, str) and plugin in self._supported_precision:
+                    self._precision_flag = plugin
+                elif isinstance(plugin, CheckpointIO):
+                    self.checkpoint_io =  plugin
+                elif isinstance(plugin, ClusterEnvironment):
+                    self._cluster_environment = plugin
                 else:
-                    raise MisconfigurationException(
-                        "You can only specify one precision and one training type plugin."
-                        " Found more than 1 training type plugin:"
-                        f' {StrategyRegistry[plug]["strategy"]} registered to {plug}'
-                    )
-            if isinstance(plug, str):
-                # Reset the distributed type as the user has overridden training type
-                # via the plugins argument
-                self._strategy_type = None
-                self.set_distributed_mode(plug)
+                    raise MisconfigurationException(f"Does not recognize flag {plugin}")
 
-            elif isinstance(plug, Strategy):
-                if strategy is None:
-                    strategy = plug
 
+
+        # if user pass in a strategy class which has accelerator, precision, checkpoint or cluster env set up
+        if self._strategy_flag and isinstance(self._strategy_flag, Strategy):
+            if self._strategy_flag.accelerator:
+                if self._accelerator_flag:
+                    raise MisconfigurationException("accelerator set through both strategy class and accelerator flag, choose one")
                 else:
-                    raise MisconfigurationException(
-                        "You can only specify one training type plugin."
-                        f" Available: {type(strategy).__name__}, given: {type(plug).__name__}"
-                    )
-            elif isinstance(plug, PrecisionPlugin):
-                if precision is None:
-                    precision = plug
-                else:
-                    raise MisconfigurationException(
-                        "You can only specify one precision plugin."
-                        f" Available: {type(precision).__name__}, given: {type(plug).__name__}"
-                    )
-            elif isinstance(plug, CheckpointIO):
-                if checkpoint is None:
-                    checkpoint = plug
+                    self._accelerator_flag = self._strategy_flag.accelerator
+            if self._strategy_flag.precision_plugin:
+                # precision has default value 32, we can not tell whether user set it or not [RFC] remove default from trainer?
+                # if self._precision_flag:
+                #     raise MisconfigurationException("precision set through both strategy class and flags, choose one place to set")
+                # else:
+                self._precision_flag = self._strategy_flag.precision_plugin
+            if self._strategy_flag.checkpoint_io:
+                if self.checkpoint_io:
+                    raise MisconfigurationException("checkpoint_io set through both strategy class and plugins, choose one")
                 else:
-                    raise MisconfigurationException(
-                        "You can only specify one checkpoint plugin."
-                        f" Available: {type(checkpoint).__name__}, given: {type(plug).__name__}"
-                    )
-            elif isinstance(plug, ClusterEnvironment):
-                if cluster_environment is None:
-                    cluster_environment = plug
+                    self.checkpoint_io = self._strategy_flag.checkpoint_io
+            if getattr(self._strategy_flag, "cluster_environment", None):
+                if self._cluster_environment:
+                    raise MisconfigurationException("cluster_environment set through both strategy class and plugins, choose one")
                 else:
-                    raise MisconfigurationException(
-                        "You can only specify one cluster environment. Found more than 1 cluster environment plugin"
-                    )
-            else:
+                    self._cluster_environment = getattr(self._strategy_flag, "cluster_environment")
+
+
+        amp_type = amp_type.lower() if isinstance(amp_type, str) else None
+        self._amp_type_flag = AMPType.from_str(amp_type) if amp_type is not None else None
+
+        # TODO still working on these flags
+        # if amp_level is not None and self._amp_type_flag != AMPType.APEX:
+        #     raise MisconfigurationException(
+        #         f"You have asked for `amp_level={self._amp_level_flag!r}` but it's only supported with `amp_backend='apex'`."
+        #     )
+        self._amp_level_flag = amp_level
+
+
+    def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_processes, gpus, ipus, tpu_cores):
+        if num_nodes == "auto":
+            self._num_nodes_flag = 1
+        else :
+            self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
+
+        self._device_flag = devices
+        ##### to be deleted v1.7
+        deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
+        if deprecated_devices_specific_flag:
+            self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores)
+        ##### deleted end
+        if devices == "auto":
+            if self._accelerator_flag is None:
                 raise MisconfigurationException(
-                    f"Found invalid type for plugin {plug}. Expected a precision or training type plugin."
+                    f"You passed `devices={devices}` but haven't specified"
+                    " `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping"
                 )
 
-        self._strategy = strategy
-        self._precision_plugin = precision
-        self._checkpoint_io = checkpoint
-        self._cluster_environment = cluster_environment
-
-    @property
-    def accelerator_types(self) -> List[str]:
-        return ["auto"] + list(_AcceleratorType)
-
-    @property
-    def precision_plugin(self) -> PrecisionPlugin:
-        if self._precision_plugin is None:
-            self._precision_plugin = self.select_precision_plugin()
-        return self._precision_plugin
-
-    def final_strategy(self) -> Strategy:
-        if self._strategy is None:
-            self._strategy = self.select_strategy()
-        self._strategy = self.resolve_strategy(self._strategy)
-        # attach checkpoint plugin to the training type plugin
-        if self._checkpoint_io is not None:
-            self._strategy.checkpoint_io = self._checkpoint_io
-        if (
-            isinstance(self._strategy_flag, Strategy) and self._strategy_flag._precision_plugin is None
-        ) or not isinstance(self._strategy_flag, Strategy):
-            precision_plugin = self.precision_plugin
-            if precision_plugin is not None:
-                self._strategy.precision_plugin = precision_plugin
-        if (isinstance(self._strategy_flag, Strategy) and self._strategy_flag.accelerator is None) or not isinstance(
-            self._strategy_flag, Strategy
-        ):
-            self._strategy.accelerator = self.select_accelerator()
-        return self._strategy
-
-    @property
-    def cluster_environment(self) -> ClusterEnvironment:
-        if self._cluster_environment is None:
-            self._cluster_environment = self.select_cluster_environment()
-        return self._cluster_environment
-
-    @property
-    def has_cpu(self) -> bool:
-        return True
-
-    @property
-    def use_cpu(self) -> bool:
-        return self._accelerator_type == _AcceleratorType.CPU
-
-    @property
-    def has_gpu(self) -> bool:
-        # Here, we are not checking for GPU availability, but instead if User has passed
-        # `gpus` to Trainer for training.
-        gpus = self.parallel_device_ids
-        if gpus is not None and len(gpus) > 0:
-            return True
-        return self._map_devices_to_accelerator(_AcceleratorType.GPU)
-
-    @property
-    def use_gpu(self) -> bool:
-        return self._accelerator_type == _AcceleratorType.GPU and self.has_gpu
-
-    @property
-    def has_tpu(self) -> bool:
-        # Here, we are not checking for TPU availability, but instead if User has passed
-        # `tpu_cores` to Trainer for training.
-        if self.tpu_cores is not None:
-            return True
-        return self._map_devices_to_accelerator(_AcceleratorType.TPU)
 
-    @property
-    def use_tpu(self) -> bool:
-        return self._accelerator_type == _AcceleratorType.TPU and self.has_tpu
-
-    @property
-    def tpu_id(self) -> Optional[int]:
-        if self.use_tpu and isinstance(self.tpu_cores, list):
-            return self.tpu_cores[0]
-        return None
-
-    @property
-    def has_ipu(self) -> bool:
-        # Here, we are not checking for IPU availability, but instead if User has passed
-        # `ipus` to Trainer for training.
-        if self.ipus is not None or isinstance(self._strategy, IPUStrategy):
-            return True
-        return self._map_devices_to_accelerator(_AcceleratorType.IPU)
-
-    @property
-    def use_ipu(self) -> bool:
-        return self._accelerator_type == _AcceleratorType.IPU and self.has_ipu
-
-    def _set_devices_to_cpu_num_processes(self) -> None:
-        if self.num_processes == 1:
-            self._map_devices_to_accelerator(_AcceleratorType.CPU)
-
-    def _map_devices_to_accelerator(self, accelerator: str) -> bool:
-        if self.devices is None:
-            return False
-        if accelerator == _AcceleratorType.TPU and _TPU_AVAILABLE:
-            if self.devices == "auto":
-                self.devices = TPUAccelerator.auto_device_count()
-            self.tpu_cores = device_parser.parse_tpu_cores(self.devices)
-            return True
-        if accelerator == _AcceleratorType.IPU and _IPU_AVAILABLE:
-            if self.devices == "auto":
-                self.devices = IPUAccelerator.auto_device_count()
-            self.ipus = self.devices
-            return True
-        if accelerator == _AcceleratorType.GPU and torch.cuda.is_available():
-            if self.devices == "auto":
-                self.devices = GPUAccelerator.auto_device_count()
-            self.gpus = self.devices
-            self.parallel_device_ids = device_parser.parse_gpu_ids(self.devices)
-            return True
-        if accelerator == _AcceleratorType.CPU:
-            if self.devices == "auto":
-                self.devices = CPUAccelerator.auto_device_count()
-            if not isinstance(self.devices, int):
+    def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(self, devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores):
+        ##### to be deleted v1.7vbg
+        # set devices base on num_processes, gpus, ipus, tpu_cores
+        if devices:
+            rank_zero_warn(f"The flag `devices={devices}` will be ignored, instand the device specific number {deprecated_devices_specific_flag} will be used")
+        if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(True) > 1:
+            rank_zero_warn(f"more than one device specifc flag has been set")
+        self._device_flag = deprecated_devices_specific_flag
+
+        if not self._accelerator_flag:
+        # set accelerator type base on num_processes, gpus, ipus, tpu_cores
+            if num_processes:
+                self._accelerator_flag = "cpu"
+            if gpus:
+                self._accelerator_flag = "gpu"
+            if tpu_cores:
+                self._accelerator_flag = "tpu"
+            if ipus:
+                self._accelerator_flag = "ipu"
+        #### delete end
+
+    def _choose_accelerator(self):
+        if self._accelerator_flag == "auto":
+            if _TPU_AVAILABLE:
+                self._accelerator_flag = "tpu"
+            elif _IPU_AVAILABLE:
+                self._accelerator_flag = "ipu"
+            elif _GPU_AVAILABLE:
+                self._accelerator_flag = "gpu"
+            else:
+                self._accelerator_flag = "cpu"
+                if self._device_flag == "auto":
+                    self._device_flag = 1
+        # [RFC] this is current logic, if accelerator not set, default cpu?
+        else:
+            self._accelerator_flag = "cpu"
+
+
+    def _check_device_availibility(self):
+        for accelerator_flag, available in zip(self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, _GPU_AVAILABLE, True]):
+            if self._accelerator_flag == accelerator_flag:
+                if not available:
+                    raise DeviceNotAvailibleException(f"{accelerator_flag} not avalible")
+
+    # TODO in progress for setting up devices
+    def _set_parallel_devices_and_init_accelerator(self):
+        self._parallel_devices = []
+
+        if isinstance(self._accelerator_flag, Accelerator):
+            self.accelerator = self._accelerator_flag
+        elif self._accelerator_flag == "tpu":
+            self.accelerator = TPUAccelerator()
+            if self._device_flag == "auto" or not self._device_flag:
+                self._device_flag = TPUAccelerator.auto_device_count()
+            if isinstance(self._device_flag, int):
+                self._parallel_devices = list(range(self._device_flag))
+
+        elif self._accelerator_flag == "ipu":
+            self.accelerator = IPUAccelerator()
+            if self._device_flag == "auto" or not self._device_flag:
+                self._device_flag = IPUAccelerator.auto_device_count()
+            if isinstance(self._device_flag, int):
+                self._parallel_devices = list(range(self._device_flag))
+
+        elif self._accelerator_flag == "gpu":
+            self.accelerator = GPUAccelerator()
+            if self._device_flag == "auto" or not self._device_flag:
+                self._device_flag =  GPUAccelerator.auto_device_count()
+            if isinstance(self._device_flag, int) or isinstance(self._device_flag, str):
+                self._device_flag = int(self._device_flag)
+                self._parallel_devices = [torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)]
+            elif isinstance(self._device_flag, list):
+                self._parallel_devices = [torch.device("cuda", i) for i in self._device_flag]
+
+
+        elif self._accelerator_flag == "cpu":
+            self.accelerator = CPUAccelerator()
+            if self._device_flag == "auto" or not self._device_flag:
+                self._device_flag =  CPUAccelerator.auto_device_count()
+            if not isinstance(self._device_flag, int):
                 raise MisconfigurationException(
                     "The flag `devices` must be an int with `accelerator='cpu'`,"
-                    f" got `devices={self.devices}` instead."
+                    f" got `devices={self._device_flag}` instead."
                 )
+<<<<<<< HEAD
             self.num_processes = self.devices
             return True
         return False
@@ -519,19 +425,26 @@ def use_ddp(self) -> bool:
             _StrategyType.DEEPSPEED,
             _StrategyType.TPU_SPAWN,
         )
+=======
+            self._parallel_devices = [torch.device("cpu")] * self._device_flag
+>>>>>>> dccae1d6f (update)
 
-    @property
-    def use_ddp2(self) -> bool:
-        return self._strategy_type == _StrategyType.DDP2
+        self._gpus = self._device_flag
 
-    @property
-    def use_horovod(self) -> bool:
-        return self._strategy_type == _StrategyType.HOROVOD
 
-    @property
-    def use_deepspeed(self) -> bool:
-        return self._strategy_type == _StrategyType.DEEPSPEED
+    def _choose_and_init_cluster_environment(self):
+        self.cluster_environment = LightningEnvironment()
+        if isinstance(self._cluster_environment, ClusterEnvironment):
+            self.cluster_environment = self._cluster_environment
+        elif self._is_slurm_managing_tasks():
+            rank_zero_info("Multiprocessing is handled by SLURM.")
+            self.cluster_environment = SLURMEnvironment()
+        else:
+            for env_type in (TorchElasticEnvironment, KubeflowEnvironment, LSFEnvironment):
+                if env_type.detect():
+                    self.cluster_environment = env_type()
 
+<<<<<<< HEAD
     @property
     def use_bagua(self) -> bool:
         return self._strategy_type == _StrategyType.BAGUA
@@ -539,493 +452,277 @@ def use_bagua(self) -> bool:
     @property
     def _is_sharded_training_type(self) -> bool:
         return isinstance(self._strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy))
+=======
+>>>>>>> dccae1d6f (update)
 
-    @property
-    def _is_fully_sharded_training_type(self) -> bool:
-        return isinstance(self._strategy, DDPFullyShardedStrategy)
-
-    @property
-    def is_distributed(self) -> bool:
-        # Used for custom plugins.
-        # Custom plugins should implement is_distributed property.
-        if hasattr(self.strategy, "is_distributed") and not self.use_tpu:
-            return self.strategy.is_distributed
-        is_distributed = self.use_ddp or self.use_ddp2 or self.use_horovod
-        if self.use_tpu:
-            is_distributed |= self.strategy.is_distributed
-        return is_distributed
+    def _is_slurm_managing_tasks(self):
+        """
+            used by choosing cluster enviroment
+        """
+        if (
+            #(not self._strategy_flag=="ddp" and not self._strategy_flag=="ddp2")
+            # the above logic moved to _select_strategy(), only check _is_slurm_managing_tasks()
+            # when strategy flag is ddp or ddp2
+            not SLURMEnvironment.detect()
+            or SLURMEnvironment.job_name() == "bash"
+        ):
+            return False
 
-    @property
-    def num_gpus(self) -> int:
-        gpus = self.parallel_device_ids
-        if gpus is None:
-            return 0
-        return len(gpus)
+        total_requested_devices = len(self._parallel_devices) * self._num_nodes_flag
+        num_slurm_tasks = int(os.environ["SLURM_NTASKS"], 0)
+        return num_slurm_tasks == total_requested_devices
 
-    @property
-    def num_ipus(self) -> int:
-        if isinstance(self.ipus, int):
-            return self.ipus
-        if isinstance(self._strategy, IPUStrategy):
-            return self._strategy.replication_factor
-        return 0
+    def _choose_strategy(self):
+        if _HOROVOD_AVAILABLE and ("OMPI_COMM_WORLD_RANK" in os.environ or "HOROVOD_RANK" in os.environ):
+            self._strategy_flag = HorovodStrategy()
 
-    @property
-    def parallel_devices(self) -> List[Union[torch.device, int]]:
-        if self.use_gpu:
-            devices = [torch.device("cuda", i) for i in self.parallel_device_ids]
-        elif self.use_tpu:
-            # explicitly don't make a tpu device here!
-            # https://github.com/PyTorchLightning/pytorch-lightning/issues/3169
-            if isinstance(self.tpu_cores, int):
-                devices = list(range(self.tpu_cores))
-        elif self.use_ipu:
-            devices = list(range(self.num_ipus))
+        if self._accelerator_flag == "ipu":
+            self._strategy_flag = "ipu"
+        elif self._accelerator_flag == "tpu":
+            if self._parallel_devices and len(self._parallel_devices)>1:
+                self._strategy_flag = "tpu_spawn"
+            else:
+                self._srategy_flag = SingleTPUStrategy()
         else:
-            devices = [torch.device("cpu")] * self.num_processes
-        return devices
-
-    @property
-    def root_gpu(self) -> Optional[int]:
-        return (
-            self.strategy.root_device.index
-            if not isinstance(self.accelerator, (IPUAccelerator, TPUAccelerator))
-            else None
-        )
-
-    @staticmethod
-    def _is_plugin_training_type(plugin: Union[str, Strategy]) -> bool:
-        if isinstance(plugin, str) and (plugin in StrategyRegistry or plugin in list(_StrategyType)):
-            return True
-        return isinstance(plugin, Strategy)
-
-    @property
-    def is_training_type_in_plugins(self) -> bool:
-        return any(
-            (isinstance(plug, str) and plug in StrategyRegistry) or isinstance(plug, Strategy) for plug in self.plugins
-        )
-
-    def select_precision_plugin(self) -> PrecisionPlugin:
-        # set precision type
-        self.amp_type = AMPType.from_str(self.amp_type)
+            if self._num_nodes_flag > 1:
+                self._strategy_flag = "ddp"
+            elif TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks():
+                self._strategy_flag = "ddp"
+            elif len(self._parallel_devices) <= 1:
+                device = torch.device("cuda") if self._accelerator_flag == "gpu" else "cpu"
+                self._strategy_flag = SingleDeviceStrategy(device = device)
+            elif len(self._parallel_devices) > 1:
+                self._strategy_flag = "ddp_spawn"
+            else:
+                self._strategy_flag = "ddp"
 
-        # validation for all plugins
-        if self.amp_level is not None and self.amp_type != AMPType.APEX:
-            raise MisconfigurationException(
-                f"You have asked for `amp_level={self.amp_level!r}` but it's only supported with `amp_backend='apex'`."
-            )
 
-        if self.use_ipu:
-            if self.precision not in (16, 32):
+    def _strategy_fallbacks(self):
+        _strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag
+        if _strategy_flag == "ddp_cpu":
+            if _TPU_AVAILABLE:
                 raise MisconfigurationException(
-                    f"`Trainer(accelerator='ipu', precision={self.precision!r})` is not supported."
+                    "`accelerator='ddp_cpu'` is not supported on TPU machines. "
+                    "Learn more: https://github.com/PyTorchLightning/pytorch-lightning/issues/7810"
                 )
-            return IPUPrecisionPlugin(self.precision)
-        if self.use_tpu:
-            if self.precision == 32:
-                return TPUPrecisionPlugin()
-            elif self.precision == 64:
-                raise MisconfigurationException(
-                    "`Trainer(accelerator='tpu', precision=64)` is not implemented."
-                    " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`"
-                    " requesting this feature."
+            if self._device_flag ==1 and self._num_nodes_flag > 1:
+                _strategy_flag = "ddp"
+            else:
+                _strategy_flag = "ddp_spawn"
+            if self._accelerator_flag == "gpu":
+                rank_zero_warn(
+                    "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
                 )
-            elif self.precision in (16, "bf16"):
-                if self.precision == 16:
+        if "ddp_spawn" in _strategy_flag and (TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks()):
+            _strategy_flag = "ddp"
+        if _strategy_flag in ("dp", "ddp2") and self._accelerator_flag == "cpu":
+            rank_zero_warn(
+                f"{_strategy_flag!r} is not supported on CPUs, hence setting `strategy='ddp'`."
+            )
+            _strategy_flag = "ddp"
+        if _strategy_flag:
+            self._strategy_flag = _strategy_flag
+
+    def _init_strategy(self):
+        print(self._strategy_flag)
+        if isinstance(self._strategy_flag, str):
+            self.strategy = StrategyRegistry.get(self._strategy_flag)
+        else:
+            self.strategy = self._strategy_flag
+
+    def _check_capatibility_and_init_precision(self):
+        print(self._precision_flag)
+        self._precision_misconfig_check()
+        if isinstance(self._precision_flag, PrecisionPlugin):
+            self.precision_plugin = self._precision_flag
+            return
+
+        if self._accelerator_flag =="ipu":
+            self.precision_plugin = IPUPrecisionPlugin(self._precision_flag)
+        if self._accelerator_flag == "tpu":
+            if self._precision_flag == 32:
+                self.precision_plugin = TPUPrecisionPlugin()
+            elif self._precision_flag in (16, "bf16"):
+                if self._precision_flag == 16:
                     # this is not deprecated to ease transition between accelerator environments
                     rank_zero_warn(
-                        f"You passed `Trainer(accelerator='tpu', precision=16)` but {self.amp_type.value} AMP"
+                        f"You passed `Trainer(accelerator='tpu', precision=16)` but {self._amp_type_flag.value} AMP"
                         f" is not supported with TPUs. Using `precision='bf16'` instead."
                     )
-                return TPUBf16PrecisionPlugin()
+                self.precision_plugin = TPUBf16PrecisionPlugin()
+        if self._strategy_flag == "deepspeed" or isinstance(self._strategy_flag, DeepSpeedStrategy):
+            self.precision_plugin = DeepSpeedPrecisionPlugin(self._precision_flag, self._amp_type_flag, self._amp_level_flag)
 
-        if self._strategy_type == _StrategyType.DEEPSPEED or isinstance(self._strategy, DeepSpeedStrategy):
-            return DeepSpeedPrecisionPlugin(self.precision, self.amp_type, self.amp_level)
-
-        if self.precision == 32:
-            return PrecisionPlugin()
-        if self.precision == 64:
-            return DoublePrecisionPlugin()
+        if self._precision_flag == 32:
+            self.precision_plugin = PrecisionPlugin()
+        if self._precision_flag == 64:
+            self.precision_plugin = DoublePrecisionPlugin()
 
         # maybe convert the precision value
-        if self.precision == 16 and self.use_cpu:
-            if self.amp_type == AMPType.APEX:
-                # apex was explicitly passed, not a good idea to silently switch to native AMP
-                raise MisconfigurationException(
-                    "You passed `Trainer(accelerator='cpu', precision=16, amp_type='apex')`"
-                    " but apex AMP not supported on CPU."
-                )
+        if self._precision_flag == 16 and self._accelerator_flag == "cpu":
             # this automatic switch is to ease transition between accelerator environments
             rank_zero_warn(
                 "You passed `Trainer(accelerator='cpu', precision=16)` but native AMP is not supported on CPU."
                 " Using `precision='bf16'` instead."
             )
-            self.precision = "bf16"
-
-        if self.precision in (16, "bf16"):
-            if self.precision == "bf16" and self.amp_type != AMPType.NATIVE:
-                raise MisconfigurationException(
-                    f"You passed `Trainer(amp_type={self.amp_type.value!r}, precision='bf16')` but it's not supported."
-                    " Try using `amp_type='native'` instead."
-                )
+            self._precision_flag = "bf16"
 
+        if self._precision_flag in (16, "bf16"):
             rank_zero_info(
-                f"Using 16bit {self.amp_type.value} Automatic Mixed Precision (AMP)"
-                if self.precision == 16
+                f"Using 16bit {self._amp_type_flag.value} Automatic Mixed Precision (AMP)"
+                if self._precision_flag == 16
                 else "Using bfloat16 Automatic Mixed Precision (AMP)"
             )
 
-            if self.amp_type == AMPType.NATIVE:
-                device = "cpu" if self.use_cpu else "cuda"
-
-                if self._is_sharded_training_type:
-                    return ShardedNativeMixedPrecisionPlugin(self.precision, device)
-                if self._is_fully_sharded_training_type:
-                    return FullyShardedNativeMixedPrecisionPlugin(self.precision, device)
-                return NativeMixedPrecisionPlugin(self.precision, device)
-
-            if self.amp_type == AMPType.APEX:
-                if self._is_sharded_training_type or self._is_fully_sharded_training_type:
-                    raise MisconfigurationException(
-                        "Sharded plugins are not supported with apex, please switch to `amp_backend='native'`."
-                    )
-                self.amp_level = self.amp_level or "O2"
-                return ApexMixedPrecisionPlugin(self.amp_level)
-
-        raise RuntimeError("No precision set")
-
-    def select_strategy(self) -> Strategy:
-        if isinstance(self.distributed_backend, Accelerator) and self.distributed_backend.strategy is not None:
-            plugin = self.distributed_backend.strategy
-        elif self.use_ddp2:
-            plugin = DDP2Strategy(parallel_devices=self.parallel_devices, cluster_environment=self.cluster_environment)
-        elif self.use_ddp and self.use_deepspeed:
-            plugin = DeepSpeedStrategy(
-                cluster_environment=self.select_cluster_environment(), parallel_devices=self.parallel_devices
-            )
-        elif self.use_ddp and self.use_bagua:
-            plugin = BaguaStrategy(parallel_devices=self.parallel_devices, cluster_environment=self.cluster_environment)
-        elif self.use_ddp:
-            use_slurm_ddp = self.use_ddp and self._is_slurm_managing_tasks()
-            use_torchelastic_ddp = self.use_ddp and TorchElasticEnvironment.detect()
-            use_kubeflow_ddp = self.use_ddp and KubeflowEnvironment.detect()
-            use_ddp_spawn = self._strategy_type == _StrategyType.DDP_SPAWN
-            use_ddp_cpu_spawn = use_ddp_spawn and self.use_cpu
-            use_tpu_spawn = self.use_tpu and self._strategy_type == _StrategyType.TPU_SPAWN
-            use_ddp_cpu_torch_elastic = use_ddp_cpu_spawn and TorchElasticEnvironment.detect()
-            use_ddp_cpu_kubeflow = use_ddp_cpu_spawn and KubeflowEnvironment.detect()
-            use_ddp_cpu_slurm = use_ddp_cpu_spawn and self._is_slurm_managing_tasks()
-            use_ddp_sharded = self._strategy_type == _StrategyType.DDP_SHARDED
-            use_ddp_sharded_spawn = self._strategy_type == _StrategyType.DDP_SHARDED_SPAWN
-            use_ddp_fully_sharded = self._strategy_type == _StrategyType.DDP_FULLY_SHARDED
-
-            if use_tpu_spawn:
-                ddp_strategy_cls = TPUSpawnStrategy
-            elif use_ddp_sharded:
-                ddp_strategy_cls = DDPShardedStrategy
-            elif use_ddp_sharded_spawn:
-                ddp_strategy_cls = DDPSpawnShardedStrategy
-            elif (
-                use_ddp_cpu_slurm
-                or use_slurm_ddp
-                or use_ddp_cpu_torch_elastic
-                or use_torchelastic_ddp
-                or use_kubeflow_ddp
-                or use_ddp_cpu_kubeflow
-            ):
-                ddp_strategy_cls = DDPStrategy
-            elif use_ddp_spawn or use_ddp_cpu_spawn:
-                ddp_strategy_cls = DDPSpawnStrategy
-            elif use_ddp_fully_sharded:
-                ddp_strategy_cls = DDPFullyShardedStrategy
-            else:
-                ddp_strategy_cls = DDPStrategy
-
-            plugin = ddp_strategy_cls(
-                parallel_devices=self.parallel_devices, cluster_environment=self.cluster_environment
-            )
-        elif self.use_dp:
-            plugin = DataParallelStrategy(parallel_devices=self.parallel_devices)
-        elif self.use_horovod:
-            plugin = HorovodStrategy(parallel_devices=self.parallel_devices)
-        elif self.use_tpu and isinstance(self.tpu_cores, list):
-            plugin = SingleTPUStrategy(self.tpu_id)
-        elif self.use_ipu:
-            plugin = IPUStrategy(parallel_devices=self.parallel_devices)
-        else:
-            single_gpu_ordinal = device_parser.determine_root_gpu_device(self.parallel_device_ids)
-            plugin = SingleDeviceStrategy(device=single_gpu_ordinal if self.use_gpu else "cpu")
-        return plugin
-
-    def resolve_strategy(self, training_type: Strategy) -> Strategy:
-        # necessary for when the user has passed in a plugin
-        if hasattr(training_type, "parallel_devices") and getattr(training_type, "parallel_devices") is None:
-            training_type.parallel_devices = self.parallel_devices
-
-        if hasattr(training_type, "cluster_environment") and getattr(training_type, "cluster_environment") is None:
-            # transfer ownership of the cluster environment to the training type
-            training_type.cluster_environment = self.cluster_environment
-            self._cluster_environment = proxy(self.cluster_environment)
-
-        if hasattr(training_type, "num_nodes"):
-            # set num_nodes for training_type from trainer setting
-            training_type.num_nodes = self.num_nodes
-
-        if hasattr(training_type, "sync_batchnorm"):
-            # set sync_batchnorm for training_type from trainer setting
-            training_type.sync_batchnorm = self.sync_batchnorm
-
-        return training_type
-
-    def select_accelerator(self) -> Accelerator:
-        if isinstance(self.distributed_backend, Accelerator):
-            # custom accelerator from user
-            if self._precision_plugin is not None or self._strategy is not None:
-                # plugins also specified by user
-                rank_zero_warn(
-                    "Specified `Precision` and `TrainingType` plugins will be ignored,"
-                    " since an `Accelerator` instance was provided."
-                )
-            return self.distributed_backend
-
-        if self.use_gpu:
-            acc_cls = GPUAccelerator
-        elif self.use_tpu:
-            acc_cls = TPUAccelerator
-        elif self.use_ipu:
-            acc_cls = IPUAccelerator
-        else:
-            acc_cls = CPUAccelerator
+            if self._amp_type_flag == AMPType.NATIVE:
+                device = "cpu" if self._accelerator_flag=="cpu" else "cuda"
 
-        accelerator = acc_cls()
-        return accelerator
+                if isinstance(self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy)):
+                    return ShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
+                if isinstance(self.strategy, DDPFullyShardedStrategy):
+                    return FullyShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
+                return NativeMixedPrecisionPlugin(self._precision_flag, device)
 
-    def select_cluster_environment(self) -> ClusterEnvironment:
-        if self._cluster_environment is not None:
-            return self._cluster_environment
-        if self._is_slurm_managing_tasks():
-            rank_zero_info("Multiprocessing is handled by SLURM.")
-            return SLURMEnvironment()
+                self._amp_level_flag = self._amp_level_flag or "O2"
+                self.precision_plugin = ApexMixedPrecisionPlugin(self._amp_level_flag)
+        self.precision_plugin = PrecisionPlugin()
 
-        for env_type in (BaguaEnvironment, TorchElasticEnvironment, KubeflowEnvironment, LSFEnvironment):
-            if env_type.detect():
-                return env_type()
+    def _precision_misconfig_check(self):
 
-        return LightningEnvironment()
-
-    def set_distributed_mode(self, strategy: Optional[str] = None):
-
-        if strategy is None and self.is_training_type_in_plugins:
-            return
-
-        if strategy is not None and strategy in StrategyRegistry:
-            self.distributed_backend = StrategyRegistry[strategy]["distributed_backend"]
-        elif strategy is not None:
-            self.distributed_backend = strategy
-
-        if isinstance(self.distributed_backend, Accelerator):
-            return
-
-        is_cpu_accelerator_type = self._accelerator_type and self._accelerator_type == _AcceleratorType.CPU
-        _use_cpu = is_cpu_accelerator_type or self.distributed_backend and "cpu" in self.distributed_backend
-
-        if self.distributed_backend is None:
-            if self.has_horovodrun():
-                self._set_horovod_backend()
-            elif self.num_gpus == 0 and self.num_nodes > 1:
-                self._strategy_type = _StrategyType.DDP
-            elif self.num_gpus == 0 and self.num_processes > 1:
-                self.distributed_backend = _StrategyType.DDP_SPAWN
-            elif self.num_gpus > 1 and not _use_cpu:
-                rank_zero_warn(
-                    "You requested multiple GPUs but did not specify a backend, e.g."
-                    ' `Trainer(strategy="dp"|"ddp"|"ddp2")`. Setting `strategy="ddp_spawn"` for you.'
+        if self._accelerator_flag == "ipu":
+            if self._precision_flag not in (16, 32):
+                raise MisconfigurationException(
+                    f"`Trainer(accelerator='ipu', precision={self._precision_flag!r})` is not supported."
                 )
-                self.distributed_backend = _StrategyType.DDP_SPAWN
-
-        # special case with DDP on CPUs
-        if self.distributed_backend == _StrategyType.DDP_CPU:
-            if _TPU_AVAILABLE:
+        if self._accelerator_flag == "tpu" and self._precision_flag == 64:
                 raise MisconfigurationException(
-                    "`accelerator='ddp_cpu'` is not supported on TPU machines. "
-                    "Learn more: https://github.com/PyTorchLightning/pytorch-lightning/issues/7810"
+                    "`Trainer(accelerator='tpu', precision=64)` is not implemented."
+                    " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`"
+                    " requesting this feature."
                 )
-            if self.num_processes == 1 and self.num_nodes > 1:
-                self._strategy_type = _StrategyType.DDP
-            else:
-                self._strategy_type = _StrategyType.DDP_SPAWN
-            if self.num_gpus > 0:
-                rank_zero_warn(
-                    "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
+        if self._precision_flag == 16 and self._accelerator_flag == "cpu" and self._amp_type_flag == AMPType.APEX:
+                # apex was explicitly passed, not a good idea to silently switch to native AMP
+                raise MisconfigurationException(
+                    "You passed `Trainer(accelerator='cpu', precision=16, amp_type='apex')`"
+                    " but apex AMP not supported on CPU."
                 )
-                self.parallel_device_ids = None
-            if self.num_processes is None:
-                # define the max CPU available
-                self.num_processes = os.cpu_count()
-        # special case with TPUs
-        elif self.has_tpu and not _use_cpu:
-            self._device_type = _AcceleratorType.TPU
-            if isinstance(self.tpu_cores, int):
-                self._strategy_type = _StrategyType.TPU_SPAWN
-        elif self.has_ipu and not _use_cpu:
-            self._device_type = _AcceleratorType.IPU
-        elif self.distributed_backend and self._strategy_type is None:
-            self._strategy_type = _StrategyType(self.distributed_backend)
-
-        if self.num_gpus > 0 and not _use_cpu:
-            self._device_type = _AcceleratorType.GPU
-
-        _gpu_strategy_types = (_StrategyType.DP, _StrategyType.DDP, _StrategyType.DDP_SPAWN, _StrategyType.DDP2)
-        # DP and DDP2 cannot run without GPU
-        if self.num_gpus == 0 and self._strategy_type in _gpu_strategy_types and not _use_cpu:
-
-            if (self.num_nodes and self.num_nodes > 1) or (self.num_processes and self.num_processes > 1):
-                if self._strategy_type in (_StrategyType.DP, _StrategyType.DDP2):
-                    rank_zero_warn(
-                        f"{self._strategy_type.value!r} is not supported on CPUs, hence setting `strategy='ddp'`."
-                    )
-                    self._strategy_type = _StrategyType.DDP
-            else:
-                rank_zero_warn("You are running on single node with no parallelization, so distributed has no effect.")
-                self._strategy_type = None
-
-        # finished configuring self._strategy_type, check ipython environment
-        self.check_interactive_compatibility()
-
-        # for DDP overwrite nb processes by requested GPUs
-        if self._device_type == _AcceleratorType.GPU and self._strategy_type in (
-            _StrategyType.DDP,
-            _StrategyType.DDP_SPAWN,
-        ):
-            self.num_processes = self.num_gpus
-
-        if self._device_type == _AcceleratorType.GPU and self._strategy_type == _StrategyType.DDP2:
-            self.num_processes = self.num_nodes
-
-        # Horovod is an extra case...
-        if self.distributed_backend == _StrategyType.HOROVOD:
-            self._set_horovod_backend()
-
-        using_valid_distributed = self.use_ddp or self.use_ddp2
-        if self.num_nodes > 1 and not using_valid_distributed:
-            # throw error to force user to choose a supported strategy type such as ddp or ddp2
+        if self._precision_flag == "bf16" and self._amp_type_flag != AMPType.NATIVE:
             raise MisconfigurationException(
-                "Your chosen strategy does not support `num_nodes > 1`. Please set `strategy=('ddp'|'ddp2')`."
+                f"You passed `Trainer(amp_type={self._amp_type_flag.value!r}, precision='bf16')` but it's not supported."
+                " Try using `amp_type='native'` instead."
             )
 
-    def _set_horovod_backend(self):
-        self.check_horovod()
-        self._strategy_type = _StrategyType.HOROVOD
+        if self._precision_flag in (16, "bf16") and self._amp_type_flag == AMPType.APEX:
+            if isinstance(self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy, DDPFullyShardedStrategy)):
+                raise MisconfigurationException(
+                    "Sharded plugins are not supported with apex, please switch to `amp_backend='native'`."
+                )
 
-        # Initialize Horovod to get rank / size info
-        hvd.init()
-        if self.has_gpu:
-            # Horovod assigns one local GPU per process
-            self.parallel_device_ids = list(range(hvd.local_size()))
-        else:
-            self.num_processes = hvd.local_size()
 
-    def check_interactive_compatibility(self):
-        """Raises a `MisconfigurationException` if the accelerator and/or plugin is not compatible with an
-        interactive environment."""
-        from pytorch_lightning.utilities import _IS_INTERACTIVE
+    def _lazy_init_strategy(self):
+        # set strategy properties
+        self.strategy.accelerator = self.accelerator
+        if self.precision_plugin:
+            self.strategy.precision_plugin = self.precision_plugin
+        if self.checkpoint_io:
+            self.strategy.checkpoint_io = self.checkpoint_io
+        self.strategy.cluster_environment = self.cluster_environment
+        if hasattr(self.strategy, "parallel_devices"):
+            self.strategy.parallel_devices = self._parallel_devices
 
-        if _IS_INTERACTIVE and self._strategy_type is not None and not self._strategy_type.is_interactive_compatible():
+        from pytorch_lightning.utilities import _IS_INTERACTIVE
+        interactive_compatible_strategy = ("dp", "ddp_spawn", "ddp_sharded_spawn", "tpu_spawn")
+        if _IS_INTERACTIVE and self.strategy.distributed_backend not in interactive_compatible_strategy:
             raise MisconfigurationException(
-                f"`Trainer(strategy={self._strategy_type.value!r})` or"
-                f" `Trainer(accelerator={self._strategy_type.value!r})` is not compatible with an interactive"
+                f"`Trainer(strategy={self.strategy.distributed_backend!r})` or"
+                f" `Trainer(accelerator={self.strategy.distributed_backend!r})` is not compatible with an interactive"
                 " environment. Run your code as a script, or choose one of the compatible backends:"
-                f" {', '.join(_StrategyType.interactive_compatible_types())}."
+                f" {', '.join(interactive_compatible_strategy)}."
                 " In case you are spawning processes yourself, make sure to include the Trainer"
                 " creation inside the worker function."
             )
 
-    def check_horovod(self):
-        """Raises a `MisconfigurationException` if the Trainer is not configured correctly for Horovod."""
-        if not _HOROVOD_AVAILABLE:
-            raise MisconfigurationException(
-                'Requested `accelerator="horovod"`, but Horovod is not installed.'
-                "Install with \n $HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]"
-            )
 
-        if self.num_gpus > 1 or self.num_nodes > 1:
-            raise MisconfigurationException(
-                "Horovod does not support setting num_nodes / num_gpus explicitly. Use "
-                "horovodrun / mpirun to configure the number of processes."
-            )
 
-    @staticmethod
-    def has_horovodrun() -> bool:
-        """Returns True if running with `horovodrun` using Gloo or OpenMPI."""
-        return _HOROVOD_AVAILABLE and ("OMPI_COMM_WORLD_RANK" in os.environ or "HOROVOD_RANK" in os.environ)
-
-    def update_device_type_if_ipu_plugin(self) -> None:
-        # This allows the poptorch.Options that are passed into the IPUStrategy to be the source of truth,
-        # which gives users the flexibility to not have to pass `ipus` flag directly to Trainer
-        if isinstance(self._strategy, IPUStrategy) and self._device_type != _AcceleratorType.IPU:
-            self._device_type = _AcceleratorType.IPU
-
-    def update_device_type_if_strategy_passed(self) -> None:
-        if isinstance(self._strategy_flag, Strategy) or any(isinstance(plug, Strategy) for plug in self.plugins):
-            if self._accelerator_type is not None:
-                if self.use_ipu:
-                    self._device_type = _AcceleratorType.IPU
-                elif self.use_tpu:
-                    self._device_type = _AcceleratorType.TPU
-                elif self.use_gpu:
-                    self._device_type = _AcceleratorType.GPU
-            else:
-                if self.has_ipu:
-                    self._device_type = _AcceleratorType.IPU
-                elif self.has_tpu:
-                    self._device_type = _AcceleratorType.TPU
-                elif self.has_gpu:
-                    self._device_type = _AcceleratorType.GPU
-
-    def _set_strategy_type_if_strategy_passed(self):
-        # This is required as when `Strategy` instance is passed to either `strategy`
-        # or `plugins` flag, `AcceleratorConnector.set_distributed_mode` is not required to be
-        # called and `_strategy_type` is not set.
-        if self._strategy_type is not None:
-            return
-        if self._strategy is not None:
-            self._strategy_type = getattr(self._strategy, "distributed_backend", None)
+    ##############################################################################
+    # the following logic should be deprecated/removed, and these information should be
+    # retrive from strategies and accelerators
+    # Added here to keep backward compabilities
 
-    def _is_slurm_managing_tasks(self) -> bool:
-        """Returns whether we let SLURM manage the processes or not.
+    @property
+    def parallel_devices(self) -> List[Union[torch.device, int]]:
+        return self._parallel_devices
 
-        Returns ``True`` if and only if these conditions match:
+    # def _distrib_type():
+    @property
+    def device_type(self):
+        if isinstance(self.accelerator, CPUAccelerator):
+            return "cpu"
+        if isinstance(self.accelerator, GPUAccelerator):
+            return "gpu"
+        if isinstance(self.accelerator, TPUAccelerator):
+            return "tpu"
+        if isinstance(self.accelerator, IPUAccelerator):
+            return "ipu"
 
-            - A SLURM cluster is detected
-            - A distributed plugin is being used
-            - The process is not launching in interactive mode
-            - The number of tasks in SLURM matches the requested number of devices and nodes in the Trainer
-        """
-        if (
-            (not self.use_ddp and not self.use_ddp2)
-            or not SLURMEnvironment.detect()
-            or SLURMEnvironment.job_name() == "bash"  # in interactive mode we don't manage tasks
-        ):
-            return False
+    @property
+    def num_nodes(self):
+        return self._num_nodes
 
-        total_requested_devices = (self.num_gpus or self.num_processes) * self.num_nodes
-        num_slurm_tasks = int(os.environ["SLURM_NTASKS"], 0)
-        return num_slurm_tasks == total_requested_devices
+    @property
+    def num_processes(self):
+        return self.devices
 
-    def _check_plugin_compatibility(self) -> None:
-        """Checks that selected plugins are compatible with each other.
+    @property
+    def root_gpu(self) -> Optional[int]:
+        return (
+            self.strategy.root_device.index
+            if not isinstance(self.accelerator, (IPUAccelerator, TPUAccelerator))
+            else None
+        )
 
-        Raises:
-            ValueError: If an invalid combination of Accelerator, Strategy, PrecisionPlugin is found.
-        """
+    @property
+    def devices(self):
+        return len(self._parallel_devices)
+
+    @property
+    def tpu_cores(self) -> int:
+        return self.devices
+
+    @property
+    def ipus(self) -> int:
+        return self.devices
+
+    @property
+    def num_gpus(self) -> int:
+        return self.devices
+
+    # def parallel_device_ids():
+    @property
+    def gpus(self):
+        return self._gpus if isinstance(self.accelerator, GPUAccelerator) else None
+
+
+    def is_distributed(self):
+        # Used for custom plugins.
+        # Custom plugins should implement is_distributed property.
+        if hasattr(self.strategy, "is_distributed") and not isinstance(self.accelerator, TPUAccelerator):
+            return self.strategy.is_distributed
+        distributed_strategy = (DDP2Strategy, DDPStrategy, DDPSpawnShardedStrategy, DDPShardedStrategy, DDPFullyShardedStrategy, DDPSpawnStrategy, DeepSpeedStrategy, TPUSpawnStrategy, HorovodStrategy)
+        is_distributed = isinstance(self.strategy, distributed_strategy)
         if isinstance(self.accelerator, TPUAccelerator):
-            if not isinstance(self.strategy.precision_plugin, TPUPrecisionPlugin):
-                raise ValueError(
-                    f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`,"
-                    f" found: {self.strategy.precision_plugin}."
-                )
-            if not isinstance(self.strategy, (SingleTPUStrategy, TPUSpawnStrategy)):
-                raise ValueError(
-                    "The `TPUAccelerator` can only be used with a `SingleTPUStrategy` or `TPUSpawnStrategy`,"
-                    f" found {self.strategy}."
-                )
+            is_distributed |= self.strategy.is_distributed
+        return is_distributed
+
+    def has_ipu(self):
+        return isinstance(self.accelerator, IPUAccelerator)
+
+    def has_tpu(self):
+        return isinstance(self.accelerator, TPUAccelerator)
+
+    def use_dp(self):
+        return isinstance(self.strategy, DataParallelStrategy)
+
+    @property
+    def _strategy_type(self) -> _StrategyType:
+        return self.strategy.distributed_backend
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector_new.py b/pytorch_lightning/trainer/connectors/accelerator_connector_new.py
deleted file mode 100644
index 8c69ef6b8ad5a..0000000000000
--- a/pytorch_lightning/trainer/connectors/accelerator_connector_new.py
+++ /dev/null
@@ -1,680 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-from typing import List, Optional, Sequence, Union
-from weakref import proxy
-
-import torch
-
-from pytorch_lightning.accelerators.accelerator import Accelerator
-from pytorch_lightning.accelerators.cpu import CPUAccelerator
-from pytorch_lightning.accelerators.gpu import GPUAccelerator
-from pytorch_lightning.accelerators.ipu import IPUAccelerator
-from pytorch_lightning.accelerators.tpu import TPUAccelerator
-from pytorch_lightning.plugins import (
-    ApexMixedPrecisionPlugin,
-    CheckpointIO,
-    DeepSpeedPrecisionPlugin,
-    DoublePrecisionPlugin,
-    FullyShardedNativeMixedPrecisionPlugin,
-    IPUPrecisionPlugin,
-    NativeMixedPrecisionPlugin,
-    PrecisionPlugin,
-    ShardedNativeMixedPrecisionPlugin,
-    TPUBf16PrecisionPlugin,
-    TPUPrecisionPlugin,
-)
-from pytorch_lightning.plugins.environments import (
-    ClusterEnvironment,
-    KubeflowEnvironment,
-    LightningEnvironment,
-    LSFEnvironment,
-    SLURMEnvironment,
-    TorchElasticEnvironment,
-)
-from pytorch_lightning.strategies import (
-    DataParallelStrategy,
-    DDP2Strategy,
-    DDPFullyShardedStrategy,
-    DDPShardedStrategy,
-    DDPSpawnShardedStrategy,
-    DDPSpawnStrategy,
-    DDPStrategy,
-    DeepSpeedStrategy,
-    HorovodStrategy,
-    IPUStrategy,
-    SingleDeviceStrategy,
-    SingleTPUStrategy,
-    Strategy,
-    StrategyRegistry,
-    TPUSpawnStrategy,
-)
-from pytorch_lightning.utilities import (
-    _AcceleratorType,
-    _StrategyType,
-    AMPType,
-    device_parser,
-    rank_zero_deprecation,
-    rank_zero_info,
-    rank_zero_warn,
-)
-from pytorch_lightning.utilities.enums import PrecisionType
-from pytorch_lightning.utilities.exceptions import MisconfigurationException, DeviceNotAvailibleException, ImpactableConfigurationException
-from pytorch_lightning.utilities.imports import (
-    _HOROVOD_AVAILABLE,
-    _IPU_AVAILABLE,
-    _GPU_AVAILABLE,
-    _TORCH_GREATER_EQUAL_1_8,
-    _TPU_AVAILABLE,
-)
-
-if _HOROVOD_AVAILABLE:
-    import horovod.torch as hvd
-
-log = logging.getLogger(__name__)
-
-
-class AcceleratorConnector:
-    def __init__(
-        self,
-        devices,
-        num_nodes,
-        accelerator, # reduce typing
-        strategy: Optional[Union[str, Strategy]],
-        plugins,
-        precision,
-        amp_type,
-        amp_level,
-        sync_batchnorm,
-        benchmark,
-        replace_sampler_ddp,
-        deterministic: bool,
-        num_processes, # deprecated
-        tpu_cores, # deprecated
-        ipus, # deprecated
-        gpus, # deprecated
-        gpu_ids,
-    ):
-        """
-            A. accelerator flag could be:
-                1. strategy class (deprecated in 1.5 will be removed in 1.7)
-                2. strategy str (deprecated in 1.5 will be removed in 1.7)
-                3. accelerator class
-                4. accelerator str
-                5. accelerator auto
-
-            B. strategy flag could be :
-                1. strategy class
-                2. strategy str registered with strategyRegister
-                3. strategy str in _strategy_type enum which listed in each strategy as backend (registed these too, and _strategy_type could be deprecated)
-
-            C. plugins flag could be:
-                1. List of str, which could contains:
-                    i. strategy str
-                    ii. precision str (Not supported in the old accelerator_connector version)
-                    iii. checkpoint_io str (Not supported in the old accelerator_connector version)
-                    iv. cluster_environment str (Not supported in the old accelerator_connector version)
-                2. List of class, which could contains:
-                    i. strategy class (deprecated in 1.5 will be removed in 1.7)
-                    ii. precision class (should be removed, and precision flag should allow user pass classes)
-                    iii. checkpoint_io class
-                    iv. cluster_environment class
-
-
-        priorities which to take when:
-            A. Class > str
-            B. Strategy > Accelerator/precision/plugins
-            C. When multiple flag set to the same thing? (ignore? not handled for now)
-
-        """
-
-        # --Parsing_flags------------------------------------------------------
-        # Get registered strategies, existing accelerators and precision plugins
-        self._existing_strategies_str = StrategyRegistry.available_strategies()
-        print(self._existing_strategies_str)
-        self._existing_accelerator_type = ["tpu", "ipu", "gpu", "cpu"]
-        self._supported_precision = PrecisionType.supported_types()
-
-        # raise misconfig exceptions if their is conflict between flags
-        # set the valid flag to self._x_flag after validation
-        # for example: if accelerator is strategy class, set self._strategy_flag = accelerator
-        # for devices: assign gpus ipus and etcs to accelerator_flag and devices_flag
-        self._config_check_and_set_final_flags(strategy, accelerator, precision, plugins, amp_type, amp_level)
-        self._device_config_check_and_set_final_flags(devices=devices, num_nodes=num_nodes, num_processes=num_processes, gpus=gpus, ipus=ipus, tpu_cores=tpu_cores)
-
-
-        # --Accelerator-------------------------------------------------------------
-        # handle `auto` and `None`
-        if self._accelerator_flag == 'auto' or self._accelerator_flag is None:
-            self._choose_accelerator()
-        # else:
-        #     # [RFC] move to XAccelerator class init?
-        #     self._check_device_availibility()
-        self._set_parallel_devices_and_init_accelerator()
-
-
-        # --Cluster_environment-----------------------------------------------------
-        self._choose_and_init_cluster_environment()
-
-
-        # --Strategy Part 1 : choose strategy ---------------------------------------
-        if self._strategy_flag is None:
-            self._choose_strategy()
-        # Reset strategy even user has specificed one
-        self._strategy_fallbacks()
-
-
-        # --Precision----------------------------------------------------------------
-        self._check_capatibility_and_init_precision()
-
-
-        # --Strategy Part 2 : init Strategy and set Strategy properties -------------
-        self._init_strategy()
-
-
-        # set properties not used in accelerator_connector. TODO move out of this file
-        # self.gpus = gpus or devices
-        self.replace_sampler_ddp = replace_sampler_ddp
-
-    def _config_check_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level):
-        """
-        This method checks:
-            1. strategy flag: strategy, accelerator and plugin can all set strategies
-            2. accelerator: if accelerator flag is Accelerator related flag or class, set self._acceelrator_flag;
-                If accelerator is strategy related, logic handled in 1 above
-            3. precision could be set by precision and plugins flag
-            4. plugins could be duplicated in strategy (handled by 1), precision (handled by 3), set checkpoint_io and cluster_environment
-        """
-        self._strategy_flag, self._accelerator_flag, self._precision_flag, self._cluster_environment, self.checkpoint_io, self._amp_level_flag, self._amp_type_flag = None, None, None, None, None, amp_type, amp_level
-        if strategy:
-            self._strategy_flag = strategy
-            # handle duplications and conflict
-            if isinstance(accelerator, Strategy) and strategy != accelerator:
-                raise MisconfigurationException("strategy already set through strategy flag, duplicated in accelerator")
-            if isinstance(accelerator, str) and accelerator in self._existing_strategies_str and strategy != accelerator:
-                raise MisconfigurationException("strategy str already set through strategy flag, duplicated in accelerator")
-            if plugins:
-                for plugin in plugins:
-                    if isinstance(plugin, Strategy) and strategy != plugin:
-                        raise MisconfigurationException("strategy already set through strategy flag, duplicated in plugins")
-                    if isinstance(plugin, str) and plugin in self._existing_strategies_str:
-                        raise MisconfigurationException("strategy already set through strategy flag, duplicated in plugins")
-
-
-        if accelerator in self._existing_accelerator_type or accelerator=="auto" or isinstance(accelerator, Accelerator):
-            self._accelerator_flag = accelerator
-        elif accelerator in self._existing_strategies_str or isinstance(accelerator, Strategy):
-            rank_zero_deprecation(
-                f"Passing `Trainer(accelerator={accelerator!r})` has been deprecated"
-                f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
-            )
-            self._strategy_flag = accelerator
-        elif accelerator == "ddp_cpu":
-            rank_zero_warn(
-                    "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
-                )
-            self._strategy_flag = accelerator
-
-
-        if precision:
-            self._precision_flag = precision
-            # handle duplications and conflict
-            if plugins:
-                for plugin in plugins:
-                    if isinstance(plugin, PrecisionPlugin):
-                        raise MisconfigurationException("precision set in both precision flag and plugin flag")
-
-        if plugins:
-            for plugin in plugins:
-                if isinstance(plugin, Strategy) or isinstance(plugin, str) and plugin in self._existing_strategies_str:
-                    self._strategy_flag = plugin
-                elif isinstance(plugin, PrecisionPlugin) or isinstance(plugin, str) and plugin in self._supported_precision:
-                    self._precision_flag = plugin
-                elif isinstance(plugin, CheckpointIO):
-                    self.checkpoint_io =  plugin
-                elif isinstance(plugin, ClusterEnvironment):
-                    self._cluster_environment = plugin
-                else:
-                    raise MisconfigurationException(f"Does not recognize flag {plugin}")
-
-
-        # if user pass in a strategy class which has accelerator, precision, checkpoint or cluster env set up
-        if self._strategy_flag and isinstance(self._strategy_flag, Strategy):
-            if self._strategy_flag.accelerator:
-                if self._accelerator_flag:
-                    raise MisconfigurationException("accelerator set through both strategy class and accelerator flag, choose one")
-                else:
-                    self._accelerator_flag = self._strategy_flag.accelerator
-            if self._strategy_flag.precision_plugin:
-                # precision has default value 32, we can not tell whether user set it or not [RFC] remove default from trainer?
-                # if self._precision_flag:
-                #     raise MisconfigurationException("precision set through both strategy class and flags, choose one place to set")
-                # else:
-                print("here")
-                self._precision_flag = self._strategy_flag.precision_plugin
-            if self._strategy_flag.checkpoint_io:
-                if self.checkpoint_io:
-                    raise MisconfigurationException("checkpoint_io set through both strategy class and plugins, choose one")
-                else:
-                    self.checkpoint_io = self._strategy_flag.checkpoint_io
-            if getattr(self._strategy_flag, "cluster_environment", None):
-                if self._cluster_environment:
-                    raise MisconfigurationException("cluster_environment set through both strategy class and plugins, choose one")
-                else:
-                    self._cluster_environment = getattr(self._strategy_flag, "cluster_environment")
-
-
-        amp_type = amp_type.lower() if isinstance(amp_type, str) else None
-        self._amp_type_flag = AMPType.from_str(amp_type) if amp_type is not None else None
-
-        # TODO still working on these flags
-        # if amp_level is not None and self._amp_type_flag != AMPType.APEX:
-        #     raise MisconfigurationException(
-        #         f"You have asked for `amp_level={self._amp_level_flag!r}` but it's only supported with `amp_backend='apex'`."
-        #     )
-        self._amp_level_flag = amp_level
-
-
-    def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_processes, gpus, ipus, tpu_cores):
-        if num_nodes == "auto":
-            self._num_nodes_flag = 1
-        else :
-            self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
-
-        self._device_flag = devices
-        ##### to be deleted v1.7
-        deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
-        if deprecated_devices_specific_flag:
-            self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores)
-        ##### deleted end
-        if devices == "auto":
-            if self._accelerator_flag is None:
-                raise MisconfigurationException(
-                    f"You passed `devices={devices}` but haven't specified"
-                    " `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping"
-                )
-
-
-    def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(self, devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores):
-        ##### to be deleted v1.7vbg
-        # set devices base on num_processes, gpus, ipus, tpu_cores
-        if devices:
-            rank_zero_warn(f"will be ignored, instand the device specific number {deprecated_devices_specific_flag} will be used")
-        if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(True) > 1:
-            rank_zero_warn(f"more than one device specifc flag has been set")
-        self._device_flag = deprecated_devices_specific_flag
-
-        if not self._accelerator_flag:
-        # set accelerator type base on num_processes, gpus, ipus, tpu_cores
-            if num_processes:
-                self._accelerator_flag = "cpu"
-            if gpus:
-                self._accelerator_flag = "gpu"
-            if tpu_cores:
-                self._accelerator_flag = "tpu"
-            if ipus:
-                self._accelerator_flag = "ipu"
-        #### delete end
-
-    def _choose_accelerator(self):
-        if self._accelerator_flag == "auto":
-            if _TPU_AVAILABLE:
-                self._accelerator_flag = "tpu"
-            elif _IPU_AVAILABLE:
-                self._accelerator_flag = "ipu"
-            elif _GPU_AVAILABLE:
-                self._accelerator_flag = "gpu"
-            else:
-                self._accelerator_flag = "cpu"
-                if self._device_flag == "auto":
-                    self._device_flag = 1
-        # [RFC] this is current logic, if accelerator not set, default cpu?
-        else:
-            self._accelerator_flag = "cpu"
-
-
-    def _check_device_availibility(self):
-        for accelerator_flag, available in zip(self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, _GPU_AVAILABLE, True]):
-            if self._accelerator_flag == accelerator_flag:
-                if not available:
-                    raise DeviceNotAvailibleException(f"{accelerator_flag} not avalible")
-
-    # TODO in progress for setting up devices
-    def _set_parallel_devices_and_init_accelerator(self):
-        self._parallel_devices = []
-
-        if isinstance(self._accelerator_flag, Accelerator):
-            self.accelerator = self._accelerator_flag
-        elif self._accelerator_flag == "tpu":
-            self.accelerator = TPUAccelerator()
-            if self._device_flag == "auto" or not self._device_flag:
-                self._device_flag = TPUAccelerator.auto_device_count()
-            if isinstance(self._device_flag, int):
-                self._parallel_devices = list(range(self._device_flag))
-
-        elif self._accelerator_flag == "ipu":
-            self.accelerator = IPUAccelerator()
-            if self._device_flag == "auto" or not self._device_flag:
-                self._device_flag = IPUAccelerator.auto_device_count()
-            if isinstance(self._device_flag, int):
-                self._parallel_devices = list(range(self._device_flag))
-
-        elif self._accelerator_flag == "gpu":
-            self.accelerator = GPUAccelerator()
-            if self._device_flag == "auto" or not self._device_flag:
-                self._device_flag =  GPUAccelerator.auto_device_count()
-            if isinstance(self._device_flag, int) or isinstance(self._device_flag, str):
-                self._device_flag = int(self._device_flag)
-                self._parallel_devices = [torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)]
-            elif isinstance(self._device_flag, list):
-                self._parallel_devices = [torch.device("cuda", i) for i in self._device_flag]
-
-
-        elif self._accelerator_flag == "cpu":
-            self.accelerator = CPUAccelerator()
-            if self._device_flag == "auto" or not self._device_flag:
-                self._device_flag =  CPUAccelerator.auto_device_count()
-            if isinstance(self._device_flag, int):
-                self._parallel_devices = [torch.device("cpu")] * self._device_flag
-
-        self._gpus = self._device_flag
-
-
-    def _choose_and_init_cluster_environment(self):
-        self.cluster_environment = LightningEnvironment()
-        if isinstance(self._cluster_environment, ClusterEnvironment):
-            self.cluster_environment = self._cluster_environment
-        elif self._is_slurm_managing_tasks():
-            rank_zero_info("Multiprocessing is handled by SLURM.")
-            self.cluster_environment = SLURMEnvironment()
-        else:
-            for env_type in (TorchElasticEnvironment, KubeflowEnvironment, LSFEnvironment):
-                if env_type.detect():
-                    self.cluster_environment = env_type()
-
-
-    def _is_slurm_managing_tasks(self):
-        """
-            used by choosing cluster enviroment
-        """
-        if (
-            #(not self._strategy_flag=="ddp" and not self._strategy_flag=="ddp2")
-            # the above logic moved to _select_strategy(), only check _is_slurm_managing_tasks()
-            # when strategy flag is ddp or ddp2
-            not SLURMEnvironment.detect()
-            or SLURMEnvironment.job_name() == "bash"  # in interactive mode we don't manage tasks
-        ):
-            return False
-
-        total_requested_devices = len(self._parallel_devices) * self._num_nodes_flag
-        num_slurm_tasks = int(os.environ["SLURM_NTASKS"], 0)
-        return num_slurm_tasks == total_requested_devices
-
-    def _choose_strategy(self):
-        if _HOROVOD_AVAILABLE and ("OMPI_COMM_WORLD_RANK" in os.environ or "HOROVOD_RANK" in os.environ):
-            self._strategy_flag = HorovodStrategy()
-
-        if self._accelerator_flag == "ipu":
-            self._strategy_flag = "ipu"
-        elif self._accelerator_flag == "tpu":
-            if self._parallel_devices and len(self._parallel_devices)>1:
-                self._strategy_flag = "tpu_spawn"
-            else:
-                self._srategy_flag = SingleTPUStrategy()
-
-        # [RFC] in existing logic SingleDevice strategy choice diverge between cpu and gpu, should we merge?
-        # elif self._accelerator_flag == "gpu":
-        #     if self._num_nodes_flag > 1:
-        #         self._strategy_flag = "ddp"
-        #     elif TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks():
-        #         self._strategy_flag = "ddp"
-        #     elif len(self._parallel_devices) == 1:
-        #         self._strategy_flag = "ddp"
-        #     elif len(self._parallel_devices) > 1:
-        #         self._strategy_flag = "ddp_spawn"
-        #     else:
-        #         self._strategy_flag = "ddp"
-        else:
-            if self._num_nodes_flag > 1:
-                self._strategy_flag = "ddp"
-            elif TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks():
-                self._strategy_flag = "ddp"
-            elif len(self._parallel_devices) <= 1:
-                device = torch.device("cuda") if self._accelerator_flag == "gpu" else "cpu"
-                self._strategy_flag = SingleDeviceStrategy(device = device)
-            elif len(self._parallel_devices) > 1:
-                self._strategy_flag = "ddp_spawn"
-            else:
-                self._strategy_flag = "ddp"
-
-    def _strategy_fallbacks(self):
-        _strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag
-        if _strategy_flag == "ddp_cpu":
-            if _TPU_AVAILABLE:
-                raise MisconfigurationException(
-                    "`accelerator='ddp_cpu'` is not supported on TPU machines. "
-                    "Learn more: https://github.com/PyTorchLightning/pytorch-lightning/issues/7810"
-                )
-            if self._device_flag ==1 and self._num_nodes_flag > 1:
-                _strategy_flag = "ddp"
-            else:
-                _strategy_flag = "ddp_spawn"
-            if self._accelerator_flag == "gpu":
-                rank_zero_warn(
-                    "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
-                )
-            # if self._accelerator_flag == "cpu":
-            #     self._parallel_devices = os.cpu_count()
-
-        if "ddp_spawn" in _strategy_flag and (TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks()):
-            _strategy_flag = "ddp"
-
-        if _strategy_flag:
-            self._strategy_flag = _strategy_flag
-
-
-    def _check_capatibility_and_init_precision(self):
-        print(self._precision_flag)
-        self._precision_misconfig_check()
-        if isinstance(self._precision_flag, PrecisionPlugin):
-            self.precision_plugin = self._precision_flag
-            return
-
-        if self._accelerator_flag =="ipu":
-            self.precision_plugin = IPUPrecisionPlugin(self._precision_flag)
-        if self._accelerator_flag == "tpu":
-            if self._precision_flag == 32:
-                self.precision_plugin = TPUPrecisionPlugin()
-            elif self._precision_flag in (16, "bf16"):
-                if self._precision_flag == 16:
-                    # this is not deprecated to ease transition between accelerator environments
-                    rank_zero_warn(
-                        f"You passed `Trainer(accelerator='tpu', precision=16)` but {self._amp_type_flag.value} AMP"
-                        f" is not supported with TPUs. Using `precision='bf16'` instead."
-                    )
-                self.precision_plugin = TPUBf16PrecisionPlugin()
-        if self._strategy_flag == "deepspeed" or isinstance(self._strategy_flag, DeepSpeedStrategy):
-            self.precision_plugin = DeepSpeedPrecisionPlugin(self._precision_flag, self._amp_type_flag, self._amp_level_flag)
-
-        if self._precision_flag == 32:
-            self.precision_plugin = PrecisionPlugin()
-        if self._precision_flag == 64:
-            self.precision_plugin = DoublePrecisionPlugin()
-
-        # maybe convert the precision value
-        if self._precision_flag == 16 and self._accelerator_flag == "cpu":
-            # this automatic switch is to ease transition between accelerator environments
-            rank_zero_warn(
-                "You passed `Trainer(accelerator='cpu', precision=16)` but native AMP is not supported on CPU."
-                " Using `precision='bf16'` instead."
-            )
-            self._precision_flag = "bf16"
-
-        if self._precision_flag in (16, "bf16"):
-            rank_zero_info(
-                f"Using 16bit {self._amp_type_flag.value} Automatic Mixed Precision (AMP)"
-                if self._precision_flag == 16
-                else "Using bfloat16 Automatic Mixed Precision (AMP)"
-            )
-
-            if self._amp_type_flag == AMPType.NATIVE:
-                device = "cpu" if self._accelerator_flag=="cpu" else "cuda"
-
-                # TODO in progress implement the two following shard types
-                # if self._is_sharded_training_type:
-                #     return ShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
-                # if self._is_fully_sharded_training_type:
-                #     return FullyShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
-                # return NativeMixedPrecisionPlugin(self._precision_flag, device)
-
-
-                self._amp_level_flag = self._amp_level_flag or "O2"
-                self.precision_plugin = ApexMixedPrecisionPlugin(self._amp_level_flag)
-        self.precision_plugin = PrecisionPlugin()
-
-    def _precision_misconfig_check(self):
-
-        if self._accelerator_flag == "ipu":
-            if self._precision_flag not in (16, 32):
-                raise MisconfigurationException(
-                    f"`Trainer(accelerator='ipu', precision={self._precision_flag!r})` is not supported."
-                )
-        if self._accelerator_flag == "tpu" and self._precision_flag == 64:
-                raise MisconfigurationException(
-                    "`Trainer(accelerator='tpu', precision=64)` is not implemented."
-                    " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`"
-                    " requesting this feature."
-                )
-        if self._precision_flag == 16 and self._accelerator_flag == "cpu" and self._amp_type_flag == AMPType.APEX:
-                # apex was explicitly passed, not a good idea to silently switch to native AMP
-                raise MisconfigurationException(
-                    "You passed `Trainer(accelerator='cpu', precision=16, amp_type='apex')`"
-                    " but apex AMP not supported on CPU."
-                )
-        if self._precision_flag == "bf16" and self._amp_type_flag != AMPType.NATIVE:
-            raise MisconfigurationException(
-                f"You passed `Trainer(amp_type={self._amp_type_flag.value!r}, precision='bf16')` but it's not supported."
-                " Try using `amp_type='native'` instead."
-            )
-
-        # if self._precision_flag in (16, "bf16") and self._amp_type_flag == AMPType.APEX:
-        #     if self._is_sharded_training_type or self._is_fully_sharded_training_type:
-        #         raise MisconfigurationException(
-        #             "Sharded plugins are not supported with apex, please switch to `amp_backend='native'`."
-        #         )
-
-
-    def _init_strategy(self):
-        print(self._strategy_flag)
-        if isinstance(self._strategy_flag, str):
-            self.strategy = StrategyRegistry.get(self._strategy_flag)
-        else:
-            self.strategy = self._strategy_flag
-        self.strategy.accelerator = self.accelerator
-        if self.precision_plugin:
-            self.strategy.precision_plugin = self.precision_plugin
-        if self.checkpoint_io:
-            self.strategy.checkpoint_io = self.checkpoint_io
-        self.strategy.cluster_environment = self.cluster_environment
-        if hasattr(self.strategy, "parallel_devices"):
-            self.strategy.parallel_devices = self._parallel_devices
-
-
-
-
-
-    ##############################################################################
-    # the following logic should be deprecated/removed, and these information should be
-    # retrive from strategies and accelerators
-    # Added here to keep backward compabilities
-
-    @property
-    def parallel_devices(self) -> List[Union[torch.device, int]]:
-        return self._parallel_devices
-
-    # def _distrib_type():
-    @property
-    def device_type(self):
-        if isinstance(self.accelerator, CPUAccelerator):
-            return "cpu"
-        if isinstance(self.accelerator, GPUAccelerator):
-            return "gpu"
-        if isinstance(self.accelerator, TPUAccelerator):
-            return "tpu"
-        if isinstance(self.accelerator, IPUAccelerator):
-            return "ipu"
-
-    @property
-    def num_nodes(self):
-        return self._num_nodes
-
-    @property
-    def num_processes(self):
-        return self.devices
-
-    @property
-    def root_gpu(self) -> Optional[int]:
-        return (
-            self.strategy.root_device.index
-            if not isinstance(self.accelerator, (IPUAccelerator, TPUAccelerator))
-            else None
-        )
-
-    @property
-    def devices(self):
-        return len(self._parallel_devices)
-
-    @property
-    def tpu_cores(self) -> int:
-        return self.devices
-
-    @property
-    def ipus(self) -> int:
-        return self.devices
-
-    @property
-    def num_gpus(self) -> int:
-        return self.devices
-
-    # def parallel_device_ids():
-    @property
-    def gpus(self):
-        return self._gpus if isinstance(self.accelerator, GPUAccelerator) else None
-
-
-    def is_distributed(self):
-        # Used for custom plugins.
-        # Custom plugins should implement is_distributed property.
-        if hasattr(self.strategy, "is_distributed") and not isinstance(self.accelerator, TPUAccelerator):
-            return self.strategy.is_distributed
-        distributed_strategy = (DDP2Strategy, DDPStrategy, DDPSpawnShardedStrategy, DDPShardedStrategy, DDPFullyShardedStrategy, DDPSpawnStrategy, DeepSpeedStrategy, TPUSpawnStrategy, HorovodStrategy)
-        is_distributed = isinstance(self.strategy, distributed_strategy)
-        if isinstance(self.accelerator, TPUAccelerator):
-            is_distributed |= self.strategy.is_distributed
-        return is_distributed
-
-    def has_ipu(self):
-        return isinstance(self.accelerator, IPUAccelerator)
-
-    def has_tpu(self):
-        return isinstance(self.accelerator, TPUAccelerator)
-
-    def use_dp(self):
-        return isinstance(self.strategy, DataParallelStrategy)
-
-    @property
-    def _strategy_type(self) -> _StrategyType:
-        return self.strategy.distributed_backend
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 8ceb2de96c59c..8617b5a2c8095 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -468,10 +468,10 @@ def test_accelerator_gpu():
     assert trainer._device_type == "gpu"
     assert isinstance(trainer.accelerator, GPUAccelerator)
 
-    with pytest.raises(
-        MisconfigurationException, match="You passed `accelerator='gpu'`, but you didn't pass `gpus` to `Trainer`"
-    ):
-        trainer = Trainer(accelerator="gpu")
+    # with pytest.raises(
+    #     MisconfigurationException, match="You passed `accelerator='gpu'`, but you didn't pass `gpus` to `Trainer`"
+    # ):
+    trainer = Trainer(accelerator="gpu")
 
     trainer = Trainer(accelerator="auto", gpus=1)
 
@@ -552,8 +552,10 @@ def test_accelerator_gpu_with_gpus_priority():
 
 def test_validate_accelerator_and_devices():
 
-    with pytest.raises(MisconfigurationException, match="You passed `devices=2` but haven't specified"):
-        Trainer(accelerator="ddp_cpu", devices=2)
+    # with pytest.raises(MisconfigurationException, match="You passed `devices=2` but haven't specified"):
+    trainer = Trainer(accelerator="ddp_cpu", devices=2)
+    assert isinstance(trainer.accelerator, CPUAccelerator)
+    assert trainer.num_processes == 2
 
 
 def test_set_devices_if_none_cpu():

From 3999d80b1f320d83121f6b3b580b6e94e6dd5bc4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 25 Jan 2022 06:22:30 +0000
Subject: [PATCH 04/69] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pytorch_lightning/strategies/ddp2.py          |   3 +-
 pytorch_lightning/strategies/dp.py            |   3 +-
 pytorch_lightning/strategies/horovod.py       |   2 +-
 pytorch_lightning/strategies/single_device.py |   3 +-
 pytorch_lightning/strategies/single_tpu.py    |   3 +-
 .../connectors/accelerator_connector.py       | 244 +++++++++---------
 pytorch_lightning/trainer/trainer.py          |  34 +--
 pytorch_lightning/utilities/exceptions.py     |   2 +
 pytorch_lightning/utilities/imports.py        |   2 +-
 9 files changed, 157 insertions(+), 139 deletions(-)

diff --git a/pytorch_lightning/strategies/ddp2.py b/pytorch_lightning/strategies/ddp2.py
index 2633508e6bd82..ba8e769c35772 100644
--- a/pytorch_lightning/strategies/ddp2.py
+++ b/pytorch_lightning/strategies/ddp2.py
@@ -11,9 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import torch
 from typing import Dict
 
+import torch
+
 from pytorch_lightning.strategies.ddp import DDPStrategy
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.enums import _StrategyType
diff --git a/pytorch_lightning/strategies/dp.py b/pytorch_lightning/strategies/dp.py
index bcac4f4f156d5..01066a21c0e71 100644
--- a/pytorch_lightning/strategies/dp.py
+++ b/pytorch_lightning/strategies/dp.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, List, Optional, Dict
+from typing import Any, Dict, List, Optional
 
 import torch
 from torch.nn import DataParallel, Module
@@ -157,7 +157,6 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
             description=f"{cls.__class__.__name__} Strategy",
         )
 
-
     def teardown(self) -> None:
         super().teardown()
         if self.root_device.type == "cuda":
diff --git a/pytorch_lightning/strategies/horovod.py b/pytorch_lightning/strategies/horovod.py
index 90b091a9eee18..1e99dbc429ed8 100644
--- a/pytorch_lightning/strategies/horovod.py
+++ b/pytorch_lightning/strategies/horovod.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from contextlib import ExitStack
-from typing import Any, List, Optional, Tuple, Union, Dict
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
diff --git a/pytorch_lightning/strategies/single_device.py b/pytorch_lightning/strategies/single_device.py
index f866dfe204ade..adbd3d71371b5 100644
--- a/pytorch_lightning/strategies/single_device.py
+++ b/pytorch_lightning/strategies/single_device.py
@@ -26,6 +26,7 @@
 
 class SingleDeviceStrategy(Strategy):
     """Strategy that handles communication on a single device."""
+
     distributed_backend = "single_device"
 
     def __init__(
@@ -81,7 +82,7 @@ def broadcast(self, obj: object, src: int = 0) -> object:
         return obj
 
     @classmethod
-    def register_strategies(cls, strategy_registry: Dict) -> None:
+    def register_strategies(cls, strategy_registry: dict) -> None:
         strategy_registry.register(
             cls.distributed_backend,
             cls,
diff --git a/pytorch_lightning/strategies/single_tpu.py b/pytorch_lightning/strategies/single_tpu.py
index 3d471f2dabd24..942f9ebfa9a41 100644
--- a/pytorch_lightning/strategies/single_tpu.py
+++ b/pytorch_lightning/strategies/single_tpu.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-from typing import Optional, Dict
+from typing import Dict, Optional
 
 import pytorch_lightning as pl
 from pytorch_lightning.plugins.io.xla_plugin import XLACheckpointIO
@@ -27,6 +27,7 @@
 
 class SingleTPUStrategy(SingleDeviceStrategy):
     """Strategy for training on a single TPU device."""
+
     distributed_backend = "single_tpu"
 
     def __init__(
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 72c9a78f06602..ec30e3469d451 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -66,11 +66,15 @@
 )
 from pytorch_lightning.utilities import _AcceleratorType, _StrategyType, AMPType, device_parser
 from pytorch_lightning.utilities.enums import PrecisionType
-from pytorch_lightning.utilities.exceptions import MisconfigurationException, DeviceNotAvailibleException, ImpactableConfigurationException
+from pytorch_lightning.utilities.exceptions import (
+    DeviceNotAvailibleException,
+    ImpactableConfigurationException,
+    MisconfigurationException,
+)
 from pytorch_lightning.utilities.imports import (
+    _GPU_AVAILABLE,
     _HOROVOD_AVAILABLE,
     _IPU_AVAILABLE,
-    _GPU_AVAILABLE,
     _TORCH_GREATER_EQUAL_1_8,
     _TPU_AVAILABLE,
 )
@@ -87,7 +91,7 @@ def __init__(
         self,
         devices,
         num_nodes,
-        accelerator, # reduce typing
+        accelerator,  # reduce typing
         strategy: Optional[Union[str, Strategy]],
         plugins,
         precision,
@@ -97,10 +101,10 @@ def __init__(
         benchmark,
         replace_sampler_ddp,
         deterministic: bool,
-        num_processes, # deprecated
-        tpu_cores, # deprecated
-        ipus, # deprecated
-        gpus, # deprecated
+        num_processes,  # deprecated
+        tpu_cores,  # deprecated
+        ipus,  # deprecated
+        gpus,  # deprecated
         gpu_ids,
     ):
         """
@@ -148,23 +152,22 @@ def __init__(
         # for example: if accelerator is strategy class, set self._strategy_flag = accelerator
         # for devices: assign gpus ipus and etcs to accelerator_flag and devices_flag
         self._config_check_and_set_final_flags(strategy, accelerator, precision, plugins, amp_type, amp_level)
-        self._device_config_check_and_set_final_flags(devices=devices, num_nodes=num_nodes, num_processes=num_processes, gpus=gpus, ipus=ipus, tpu_cores=tpu_cores)
-
+        self._device_config_check_and_set_final_flags(
+            devices=devices, num_nodes=num_nodes, num_processes=num_processes, gpus=gpus, ipus=ipus, tpu_cores=tpu_cores
+        )
 
         # --Accelerator-------------------------------------------------------------
         # handle `auto` and `None`
-        if self._accelerator_flag == 'auto' or self._accelerator_flag is None:
+        if self._accelerator_flag == "auto" or self._accelerator_flag is None:
             self._choose_accelerator()
         # else:
         #     # [RFC] move to XAccelerator class init?
         #     self._check_device_availibility()
         self._set_parallel_devices_and_init_accelerator()
 
-
         # --Cluster_environment-----------------------------------------------------
         self._choose_and_init_cluster_environment()
 
-
         # --Strategy Part 1 : choose strategy ---------------------------------------
         if self._strategy_flag is None:
             self._choose_strategy()
@@ -175,26 +178,31 @@ def __init__(
         # --Precision----------------------------------------------------------------
         self._check_capatibility_and_init_precision()
 
-
         # --Strategy Part 2 : init Strategy and set Strategy properties -------------
         self._lazy_init_strategy()
 
-
-
         # set properties not used in accelerator_connector. TODO move out of this file
         # self.gpus = gpus or devices
         self.replace_sampler_ddp = replace_sampler_ddp
 
     def _config_check_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level):
+        """This method checks:
+
+        1. strategy flag: strategy, accelerator and plugin can all set strategies
+        2. accelerator: if accelerator flag is Accelerator related flag or class, set self._acceelrator_flag;
+            If accelerator is strategy related, logic handled in 1 above
+        3. precision could be set by precision and plugins flag
+        4. plugins could be duplicated in strategy (handled by 1), precision (handled by 3), set checkpoint_io and cluster_environment
         """
-        This method checks:
-            1. strategy flag: strategy, accelerator and plugin can all set strategies
-            2. accelerator: if accelerator flag is Accelerator related flag or class, set self._acceelrator_flag;
-                If accelerator is strategy related, logic handled in 1 above
-            3. precision could be set by precision and plugins flag
-            4. plugins could be duplicated in strategy (handled by 1), precision (handled by 3), set checkpoint_io and cluster_environment
-        """
-        self._strategy_flag, self._accelerator_flag, self._precision_flag, self._cluster_environment, self.checkpoint_io, self._amp_level_flag, self._amp_type_flag = None, None, None, None, None, amp_type, amp_level
+        (
+            self._strategy_flag,
+            self._accelerator_flag,
+            self._precision_flag,
+            self._cluster_environment,
+            self.checkpoint_io,
+            self._amp_level_flag,
+            self._amp_type_flag,
+        ) = (None, None, None, None, None, amp_type, amp_level)
         if strategy:
             self._strategy_flag = strategy
             if strategy == "ddp_cpu":
@@ -204,23 +212,36 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                 )
             if strategy == "tpu_spawn":
                 raise MisconfigurationException(
-                "`Trainer(strategy='tpu_spawn')` is not a valid strategy,"
-                " you can use `Trainer(strategy='ddp_spawn', accelerator='tpu')` instead."
-            )
+                    "`Trainer(strategy='tpu_spawn')` is not a valid strategy,"
+                    " you can use `Trainer(strategy='ddp_spawn', accelerator='tpu')` instead."
+                )
             # handle duplications and conflict
             if isinstance(accelerator, Strategy) and strategy != accelerator:
                 raise MisconfigurationException("strategy already set through strategy flag, duplicated in accelerator")
-            if isinstance(accelerator, str) and accelerator in self._existing_strategies_str and strategy != accelerator:
-                raise MisconfigurationException("strategy str already set through strategy flag, duplicated in accelerator")
+            if (
+                isinstance(accelerator, str)
+                and accelerator in self._existing_strategies_str
+                and strategy != accelerator
+            ):
+                raise MisconfigurationException(
+                    "strategy str already set through strategy flag, duplicated in accelerator"
+                )
             if plugins:
                 for plugin in plugins:
                     if isinstance(plugin, Strategy) and strategy != plugin:
-                        raise MisconfigurationException("strategy already set through strategy flag, duplicated in plugins")
+                        raise MisconfigurationException(
+                            "strategy already set through strategy flag, duplicated in plugins"
+                        )
                     if isinstance(plugin, str) and plugin in self._existing_strategies_str:
-                        raise MisconfigurationException("strategy already set through strategy flag, duplicated in plugins")
-
+                        raise MisconfigurationException(
+                            "strategy already set through strategy flag, duplicated in plugins"
+                        )
 
-        if accelerator in self._existing_accelerator_type or accelerator=="auto" or isinstance(accelerator, Accelerator):
+        if (
+            accelerator in self._existing_accelerator_type
+            or accelerator == "auto"
+            or isinstance(accelerator, Accelerator)
+        ):
             self._accelerator_flag = accelerator
         elif accelerator in self._existing_strategies_str or isinstance(accelerator, Strategy):
             rank_zero_deprecation(
@@ -230,8 +251,8 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
             self._strategy_flag = accelerator
         elif accelerator == "ddp_cpu":
             rank_zero_warn(
-                    "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
-                )
+                "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
+            )
             self._strategy_flag = accelerator
 
         if precision:
@@ -251,24 +272,28 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                     rank_zero_deprecation(
                         f"Passing {plugin} `strategy` to the `plugins` flag in Trainer has been deprecated"
                         f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={plugin})` instead."
-                        )
+                    )
 
-                elif isinstance(plugin, PrecisionPlugin) or isinstance(plugin, str) and plugin in self._supported_precision:
+                elif (
+                    isinstance(plugin, PrecisionPlugin)
+                    or isinstance(plugin, str)
+                    and plugin in self._supported_precision
+                ):
                     self._precision_flag = plugin
                 elif isinstance(plugin, CheckpointIO):
-                    self.checkpoint_io =  plugin
+                    self.checkpoint_io = plugin
                 elif isinstance(plugin, ClusterEnvironment):
                     self._cluster_environment = plugin
                 else:
                     raise MisconfigurationException(f"Does not recognize flag {plugin}")
 
-
-
         # if user pass in a strategy class which has accelerator, precision, checkpoint or cluster env set up
         if self._strategy_flag and isinstance(self._strategy_flag, Strategy):
             if self._strategy_flag.accelerator:
                 if self._accelerator_flag:
-                    raise MisconfigurationException("accelerator set through both strategy class and accelerator flag, choose one")
+                    raise MisconfigurationException(
+                        "accelerator set through both strategy class and accelerator flag, choose one"
+                    )
                 else:
                     self._accelerator_flag = self._strategy_flag.accelerator
             if self._strategy_flag.precision_plugin:
@@ -279,16 +304,19 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                 self._precision_flag = self._strategy_flag.precision_plugin
             if self._strategy_flag.checkpoint_io:
                 if self.checkpoint_io:
-                    raise MisconfigurationException("checkpoint_io set through both strategy class and plugins, choose one")
+                    raise MisconfigurationException(
+                        "checkpoint_io set through both strategy class and plugins, choose one"
+                    )
                 else:
                     self.checkpoint_io = self._strategy_flag.checkpoint_io
             if getattr(self._strategy_flag, "cluster_environment", None):
                 if self._cluster_environment:
-                    raise MisconfigurationException("cluster_environment set through both strategy class and plugins, choose one")
+                    raise MisconfigurationException(
+                        "cluster_environment set through both strategy class and plugins, choose one"
+                    )
                 else:
                     self._cluster_environment = getattr(self._strategy_flag, "cluster_environment")
 
-
         amp_type = amp_type.lower() if isinstance(amp_type, str) else None
         self._amp_type_flag = AMPType.from_str(amp_type) if amp_type is not None else None
 
@@ -299,18 +327,19 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
         #     )
         self._amp_level_flag = amp_level
 
-
     def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_processes, gpus, ipus, tpu_cores):
         if num_nodes == "auto":
             self._num_nodes_flag = 1
-        else :
+        else:
             self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
 
         self._device_flag = devices
         ##### to be deleted v1.7
         deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
         if deprecated_devices_specific_flag:
-            self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores)
+            self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
+                devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
+            )
         ##### deleted end
         if devices == "auto":
             if self._accelerator_flag is None:
@@ -319,18 +348,23 @@ def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_proce
                     " `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping"
                 )
 
-
-    def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(self, devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores):
+    def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
+        self, devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
+    ):
         ##### to be deleted v1.7vbg
         # set devices base on num_processes, gpus, ipus, tpu_cores
         if devices:
-            rank_zero_warn(f"The flag `devices={devices}` will be ignored, instand the device specific number {deprecated_devices_specific_flag} will be used")
-        if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(True) > 1:
+            rank_zero_warn(
+                f"The flag `devices={devices}` will be ignored, instand the device specific number {deprecated_devices_specific_flag} will be used"
+            )
+        if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(
+            True
+        ) > 1:
             rank_zero_warn(f"more than one device specifc flag has been set")
         self._device_flag = deprecated_devices_specific_flag
 
         if not self._accelerator_flag:
-        # set accelerator type base on num_processes, gpus, ipus, tpu_cores
+            # set accelerator type base on num_processes, gpus, ipus, tpu_cores
             if num_processes:
                 self._accelerator_flag = "cpu"
             if gpus:
@@ -357,9 +391,10 @@ def _choose_accelerator(self):
         else:
             self._accelerator_flag = "cpu"
 
-
     def _check_device_availibility(self):
-        for accelerator_flag, available in zip(self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, _GPU_AVAILABLE, True]):
+        for accelerator_flag, available in zip(
+            self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, _GPU_AVAILABLE, True]
+        ):
             if self._accelerator_flag == accelerator_flag:
                 if not available:
                     raise DeviceNotAvailibleException(f"{accelerator_flag} not avalible")
@@ -387,51 +422,28 @@ def _set_parallel_devices_and_init_accelerator(self):
         elif self._accelerator_flag == "gpu":
             self.accelerator = GPUAccelerator()
             if self._device_flag == "auto" or not self._device_flag:
-                self._device_flag =  GPUAccelerator.auto_device_count()
+                self._device_flag = GPUAccelerator.auto_device_count()
             if isinstance(self._device_flag, int) or isinstance(self._device_flag, str):
                 self._device_flag = int(self._device_flag)
-                self._parallel_devices = [torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)]
+                self._parallel_devices = [
+                    torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)
+                ]
             elif isinstance(self._device_flag, list):
                 self._parallel_devices = [torch.device("cuda", i) for i in self._device_flag]
 
-
         elif self._accelerator_flag == "cpu":
             self.accelerator = CPUAccelerator()
             if self._device_flag == "auto" or not self._device_flag:
-                self._device_flag =  CPUAccelerator.auto_device_count()
+                self._device_flag = CPUAccelerator.auto_device_count()
             if not isinstance(self._device_flag, int):
                 raise MisconfigurationException(
                     "The flag `devices` must be an int with `accelerator='cpu'`,"
                     f" got `devices={self._device_flag}` instead."
                 )
-<<<<<<< HEAD
-            self.num_processes = self.devices
-            return True
-        return False
-
-    @property
-    def use_dp(self) -> bool:
-        return self._strategy_type == _StrategyType.DP
-
-    @property
-    def use_ddp(self) -> bool:
-        return self._strategy_type in (
-            _StrategyType.BAGUA,
-            _StrategyType.DDP,
-            _StrategyType.DDP_SPAWN,
-            _StrategyType.DDP_SHARDED,
-            _StrategyType.DDP_SHARDED_SPAWN,
-            _StrategyType.DDP_FULLY_SHARDED,
-            _StrategyType.DEEPSPEED,
-            _StrategyType.TPU_SPAWN,
-        )
-=======
             self._parallel_devices = [torch.device("cpu")] * self._device_flag
->>>>>>> dccae1d6f (update)
 
         self._gpus = self._device_flag
 
-
     def _choose_and_init_cluster_environment(self):
         self.cluster_environment = LightningEnvironment()
         if isinstance(self._cluster_environment, ClusterEnvironment):
@@ -444,23 +456,15 @@ def _choose_and_init_cluster_environment(self):
                 if env_type.detect():
                     self.cluster_environment = env_type()
 
-<<<<<<< HEAD
-    @property
-    def use_bagua(self) -> bool:
-        return self._strategy_type == _StrategyType.BAGUA
 
     @property
     def _is_sharded_training_type(self) -> bool:
         return isinstance(self._strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy))
-=======
->>>>>>> dccae1d6f (update)
 
     def _is_slurm_managing_tasks(self):
-        """
-            used by choosing cluster enviroment
-        """
+        """used by choosing cluster enviroment."""
         if (
-            #(not self._strategy_flag=="ddp" and not self._strategy_flag=="ddp2")
+            # (not self._strategy_flag=="ddp" and not self._strategy_flag=="ddp2")
             # the above logic moved to _select_strategy(), only check _is_slurm_managing_tasks()
             # when strategy flag is ddp or ddp2
             not SLURMEnvironment.detect()
@@ -479,7 +483,7 @@ def _choose_strategy(self):
         if self._accelerator_flag == "ipu":
             self._strategy_flag = "ipu"
         elif self._accelerator_flag == "tpu":
-            if self._parallel_devices and len(self._parallel_devices)>1:
+            if self._parallel_devices and len(self._parallel_devices) > 1:
                 self._strategy_flag = "tpu_spawn"
             else:
                 self._srategy_flag = SingleTPUStrategy()
@@ -490,13 +494,12 @@ def _choose_strategy(self):
                 self._strategy_flag = "ddp"
             elif len(self._parallel_devices) <= 1:
                 device = torch.device("cuda") if self._accelerator_flag == "gpu" else "cpu"
-                self._strategy_flag = SingleDeviceStrategy(device = device)
+                self._strategy_flag = SingleDeviceStrategy(device=device)
             elif len(self._parallel_devices) > 1:
                 self._strategy_flag = "ddp_spawn"
             else:
                 self._strategy_flag = "ddp"
 
-
     def _strategy_fallbacks(self):
         _strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag
         if _strategy_flag == "ddp_cpu":
@@ -505,7 +508,7 @@ def _strategy_fallbacks(self):
                     "`accelerator='ddp_cpu'` is not supported on TPU machines. "
                     "Learn more: https://github.com/PyTorchLightning/pytorch-lightning/issues/7810"
                 )
-            if self._device_flag ==1 and self._num_nodes_flag > 1:
+            if self._device_flag == 1 and self._num_nodes_flag > 1:
                 _strategy_flag = "ddp"
             else:
                 _strategy_flag = "ddp_spawn"
@@ -513,12 +516,12 @@ def _strategy_fallbacks(self):
                 rank_zero_warn(
                     "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
                 )
-        if "ddp_spawn" in _strategy_flag and (TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks()):
+        if "ddp_spawn" in _strategy_flag and (
+            TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks()
+        ):
             _strategy_flag = "ddp"
         if _strategy_flag in ("dp", "ddp2") and self._accelerator_flag == "cpu":
-            rank_zero_warn(
-                f"{_strategy_flag!r} is not supported on CPUs, hence setting `strategy='ddp'`."
-            )
+            rank_zero_warn(f"{_strategy_flag!r} is not supported on CPUs, hence setting `strategy='ddp'`.")
             _strategy_flag = "ddp"
         if _strategy_flag:
             self._strategy_flag = _strategy_flag
@@ -537,7 +540,7 @@ def _check_capatibility_and_init_precision(self):
             self.precision_plugin = self._precision_flag
             return
 
-        if self._accelerator_flag =="ipu":
+        if self._accelerator_flag == "ipu":
             self.precision_plugin = IPUPrecisionPlugin(self._precision_flag)
         if self._accelerator_flag == "tpu":
             if self._precision_flag == 32:
@@ -551,7 +554,9 @@ def _check_capatibility_and_init_precision(self):
                     )
                 self.precision_plugin = TPUBf16PrecisionPlugin()
         if self._strategy_flag == "deepspeed" or isinstance(self._strategy_flag, DeepSpeedStrategy):
-            self.precision_plugin = DeepSpeedPrecisionPlugin(self._precision_flag, self._amp_type_flag, self._amp_level_flag)
+            self.precision_plugin = DeepSpeedPrecisionPlugin(
+                self._precision_flag, self._amp_type_flag, self._amp_level_flag
+            )
 
         if self._precision_flag == 32:
             self.precision_plugin = PrecisionPlugin()
@@ -575,7 +580,7 @@ def _check_capatibility_and_init_precision(self):
             )
 
             if self._amp_type_flag == AMPType.NATIVE:
-                device = "cpu" if self._accelerator_flag=="cpu" else "cuda"
+                device = "cpu" if self._accelerator_flag == "cpu" else "cuda"
 
                 if isinstance(self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy)):
                     return ShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
@@ -595,17 +600,17 @@ def _precision_misconfig_check(self):
                     f"`Trainer(accelerator='ipu', precision={self._precision_flag!r})` is not supported."
                 )
         if self._accelerator_flag == "tpu" and self._precision_flag == 64:
-                raise MisconfigurationException(
-                    "`Trainer(accelerator='tpu', precision=64)` is not implemented."
-                    " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`"
-                    " requesting this feature."
-                )
+            raise MisconfigurationException(
+                "`Trainer(accelerator='tpu', precision=64)` is not implemented."
+                " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`"
+                " requesting this feature."
+            )
         if self._precision_flag == 16 and self._accelerator_flag == "cpu" and self._amp_type_flag == AMPType.APEX:
-                # apex was explicitly passed, not a good idea to silently switch to native AMP
-                raise MisconfigurationException(
-                    "You passed `Trainer(accelerator='cpu', precision=16, amp_type='apex')`"
-                    " but apex AMP not supported on CPU."
-                )
+            # apex was explicitly passed, not a good idea to silently switch to native AMP
+            raise MisconfigurationException(
+                "You passed `Trainer(accelerator='cpu', precision=16, amp_type='apex')`"
+                " but apex AMP not supported on CPU."
+            )
         if self._precision_flag == "bf16" and self._amp_type_flag != AMPType.NATIVE:
             raise MisconfigurationException(
                 f"You passed `Trainer(amp_type={self._amp_type_flag.value!r}, precision='bf16')` but it's not supported."
@@ -618,7 +623,6 @@ def _precision_misconfig_check(self):
                     "Sharded plugins are not supported with apex, please switch to `amp_backend='native'`."
                 )
 
-
     def _lazy_init_strategy(self):
         # set strategy properties
         self.strategy.accelerator = self.accelerator
@@ -631,6 +635,7 @@ def _lazy_init_strategy(self):
             self.strategy.parallel_devices = self._parallel_devices
 
         from pytorch_lightning.utilities import _IS_INTERACTIVE
+
         interactive_compatible_strategy = ("dp", "ddp_spawn", "ddp_sharded_spawn", "tpu_spawn")
         if _IS_INTERACTIVE and self.strategy.distributed_backend not in interactive_compatible_strategy:
             raise MisconfigurationException(
@@ -702,13 +707,22 @@ def num_gpus(self) -> int:
     def gpus(self):
         return self._gpus if isinstance(self.accelerator, GPUAccelerator) else None
 
-
     def is_distributed(self):
         # Used for custom plugins.
         # Custom plugins should implement is_distributed property.
         if hasattr(self.strategy, "is_distributed") and not isinstance(self.accelerator, TPUAccelerator):
             return self.strategy.is_distributed
-        distributed_strategy = (DDP2Strategy, DDPStrategy, DDPSpawnShardedStrategy, DDPShardedStrategy, DDPFullyShardedStrategy, DDPSpawnStrategy, DeepSpeedStrategy, TPUSpawnStrategy, HorovodStrategy)
+        distributed_strategy = (
+            DDP2Strategy,
+            DDPStrategy,
+            DDPSpawnShardedStrategy,
+            DDPShardedStrategy,
+            DDPFullyShardedStrategy,
+            DDPSpawnStrategy,
+            DeepSpeedStrategy,
+            TPUSpawnStrategy,
+            HorovodStrategy,
+        )
         is_distributed = isinstance(self.strategy, distributed_strategy)
         if isinstance(self.accelerator, TPUAccelerator):
             is_distributed |= self.strategy.is_distributed
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 93fd6187be1ea..bd648dd99d332 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -435,23 +435,23 @@ def __init__(
         self._data_connector = DataConnector(self, multiple_trainloader_mode)
 
         self._accelerator_connector = AcceleratorConnector(
-            num_processes = num_processes,
-            devices = devices,
-            tpu_cores = tpu_cores,
-            ipus = ipus,
-            accelerator = accelerator,
-            strategy = strategy,
-            gpus = gpus,
-            gpu_ids = gpu_ids,
-            num_nodes = num_nodes,
-            sync_batchnorm = sync_batchnorm,
-            benchmark = benchmark,
-            replace_sampler_ddp = replace_sampler_ddp,
-            deterministic = deterministic,
-            precision = precision,
-            amp_type = amp_backend,
-            amp_level = amp_level,
-            plugins = plugins,
+            num_processes=num_processes,
+            devices=devices,
+            tpu_cores=tpu_cores,
+            ipus=ipus,
+            accelerator=accelerator,
+            strategy=strategy,
+            gpus=gpus,
+            gpu_ids=gpu_ids,
+            num_nodes=num_nodes,
+            sync_batchnorm=sync_batchnorm,
+            benchmark=benchmark,
+            replace_sampler_ddp=replace_sampler_ddp,
+            deterministic=deterministic,
+            precision=precision,
+            amp_type=amp_backend,
+            amp_level=amp_level,
+            plugins=plugins,
         )
         self.logger_connector = LoggerConnector(self, log_gpu_memory)
         self._callback_connector = CallbackConnector(self)
diff --git a/pytorch_lightning/utilities/exceptions.py b/pytorch_lightning/utilities/exceptions.py
index 24fbbac44d156..a0de06036792f 100644
--- a/pytorch_lightning/utilities/exceptions.py
+++ b/pytorch_lightning/utilities/exceptions.py
@@ -16,9 +16,11 @@
 class MisconfigurationException(Exception):
     """Exception used to inform users of misuse with PyTorch Lightning."""
 
+
 class DeviceNotAvailibleException(Exception):
     """Exception used to inform users that requested devices are not availible."""
 
+
 class ImpactableConfigurationException(Exception):
     """Exception used to inform users that configuration impactable with each other."""
 
diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py
index 602c8b50c92e9..24355097ce34f 100644
--- a/pytorch_lightning/utilities/imports.py
+++ b/pytorch_lightning/utilities/imports.py
@@ -133,7 +133,7 @@ def _compare_version(package: str, op: Callable, version: str, use_base_version:
 else:
     _IPU_AVAILABLE = False
 
-_GPU_AVAILABLE = torch.cuda.is_available() and torch.cuda.device_count()>0
+_GPU_AVAILABLE = torch.cuda.is_available() and torch.cuda.device_count() > 0
 
 
 # experimental feature within PyTorch Lightning.

From c2730f941ac3c78ee2e3b25b09a32232b6cd011b Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Tue, 25 Jan 2022 16:40:39 -0800
Subject: [PATCH 05/69] update

---
 .../connectors/accelerator_connector.py       | 126 ++++++++++--------
 1 file changed, 72 insertions(+), 54 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index ec30e3469d451..344d32dcfb310 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -14,8 +14,7 @@
 
 import logging
 import os
-from typing import List, Optional, Sequence, Union
-from weakref import proxy
+from typing import List, Optional, Union
 
 import torch
 
@@ -57,31 +56,23 @@
     DDPStrategy,
     DeepSpeedStrategy,
     HorovodStrategy,
-    IPUStrategy,
     SingleDeviceStrategy,
     SingleTPUStrategy,
     Strategy,
     StrategyRegistry,
     TPUSpawnStrategy,
 )
-from pytorch_lightning.utilities import _AcceleratorType, _StrategyType, AMPType, device_parser
-from pytorch_lightning.utilities.enums import PrecisionType
-from pytorch_lightning.utilities.exceptions import (
-    DeviceNotAvailibleException,
-    ImpactableConfigurationException,
-    MisconfigurationException,
-)
-from pytorch_lightning.utilities.imports import (
-    _GPU_AVAILABLE,
-    _HOROVOD_AVAILABLE,
-    _IPU_AVAILABLE,
-    _TORCH_GREATER_EQUAL_1_8,
-    _TPU_AVAILABLE,
+from pytorch_lightning.utilities import (
+    _StrategyType,
+    AMPType,
+    device_parser,
+    rank_zero_deprecation,
+    rank_zero_info,
+    rank_zero_warn,
 )
-from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_info, rank_zero_warn
-
-if _HOROVOD_AVAILABLE:
-    import horovod.torch as hvd
+from pytorch_lightning.utilities.enums import PrecisionType
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.imports import _GPU_AVAILABLE, _HOROVOD_AVAILABLE, _IPU_AVAILABLE, _TPU_AVAILABLE
 
 log = logging.getLogger(__name__)
 
@@ -118,7 +109,8 @@ def __init__(
             B. strategy flag could be :
                 1. strategy class
                 2. strategy str registered with strategyRegister
-                3. strategy str in _strategy_type enum which listed in each strategy as backend (registed these too, and _strategy_type could be deprecated)
+                3. strategy str in _strategy_type enum which listed in each strategy as
+                   backend (registed these too, and _strategy_type could be deprecated)
 
             C. plugins flag could be:
                 1. List of str, which could contains:
@@ -168,7 +160,7 @@ def __init__(
         # --Cluster_environment-----------------------------------------------------
         self._choose_and_init_cluster_environment()
 
-        # --Strategy Part 1 : choose strategy ---------------------------------------
+        # --Strategy Part 1 : choose strategy and init strategy ---------------------------------------
         if self._strategy_flag is None:
             self._choose_strategy()
         # Reset strategy even user has specificed one
@@ -192,7 +184,8 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
         2. accelerator: if accelerator flag is Accelerator related flag or class, set self._acceelrator_flag;
             If accelerator is strategy related, logic handled in 1 above
         3. precision could be set by precision and plugins flag
-        4. plugins could be duplicated in strategy (handled by 1), precision (handled by 3), set checkpoint_io and cluster_environment
+        4. plugins could be duplicated in strategy (handled by 1), precision (handled by 3),
+            set checkpoint_io and cluster_environment
         """
         (
             self._strategy_flag,
@@ -256,6 +249,11 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
             self._strategy_flag = accelerator
 
         if precision:
+            if not PrecisionType.supported_type(precision):
+                raise MisconfigurationException(
+                    f"Precision {repr(precision)} is invalid. "
+                    f"Allowed precision values: {PrecisionType.supported_types()}"
+                )
             self._precision_flag = precision
             # handle duplications and conflict
             if plugins:
@@ -285,7 +283,9 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                 elif isinstance(plugin, ClusterEnvironment):
                     self._cluster_environment = plugin
                 else:
-                    raise MisconfigurationException(f"Does not recognize flag {plugin}")
+                    raise MisconfigurationException(
+                        f"Found invalid type for plugin {plugin}. Expected a precision or training type plugin."
+                    )
 
         # if user pass in a strategy class which has accelerator, precision, checkpoint or cluster env set up
         if self._strategy_flag and isinstance(self._strategy_flag, Strategy):
@@ -297,9 +297,11 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                 else:
                     self._accelerator_flag = self._strategy_flag.accelerator
             if self._strategy_flag.precision_plugin:
-                # precision has default value 32, we can not tell whether user set it or not [RFC] remove default from trainer?
+                # precision has default value 32, we can not tell whether user set it or not
+                # [RFC] remove default from trainer?
                 # if self._precision_flag:
-                #     raise MisconfigurationException("precision set through both strategy class and flags, choose one place to set")
+                #     raise MisconfigurationException("precision set through both strategy class and flags,
+                #     choose one place to set")
                 # else:
                 self._precision_flag = self._strategy_flag.precision_plugin
             if self._strategy_flag.checkpoint_io:
@@ -318,13 +320,13 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                     self._cluster_environment = getattr(self._strategy_flag, "cluster_environment")
 
         amp_type = amp_type.lower() if isinstance(amp_type, str) else None
-        self._amp_type_flag = AMPType.from_str(amp_type) if amp_type is not None else None
+        self._amp_type_flag = AMPType.from_str(amp_type)
+        print(f"a:{amp_type}, b{self._amp_type_flag}")
 
-        # TODO still working on these flags
-        # if amp_level is not None and self._amp_type_flag != AMPType.APEX:
-        #     raise MisconfigurationException(
-        #         f"You have asked for `amp_level={self._amp_level_flag!r}` but it's only supported with `amp_backend='apex'`."
-        #     )
+        if amp_level is not None and self._amp_type_flag != AMPType.APEX:
+            raise MisconfigurationException(
+                f"You have asked for `amp_level={amp_level!r}` but it's only supported with `amp_backend='apex'`."
+            )
         self._amp_level_flag = amp_level
 
     def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_processes, gpus, ipus, tpu_cores):
@@ -334,13 +336,13 @@ def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_proce
             self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
 
         self._device_flag = devices
-        ##### to be deleted v1.7
+        # --- to be deleted v1.7
         deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
         if deprecated_devices_specific_flag:
             self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
                 devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
             )
-        ##### deleted end
+        # --- deleted end
         if devices == "auto":
             if self._accelerator_flag is None:
                 raise MisconfigurationException(
@@ -351,16 +353,17 @@ def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_proce
     def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
         self, devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
     ):
-        ##### to be deleted v1.7vbg
+        # ---- to be deleted v1.7vbg
         # set devices base on num_processes, gpus, ipus, tpu_cores
         if devices:
             rank_zero_warn(
-                f"The flag `devices={devices}` will be ignored, instand the device specific number {deprecated_devices_specific_flag} will be used"
+                f"The flag `devices={devices}` will be ignored, "
+                f"instand the device specific number {deprecated_devices_specific_flag} will be used"
             )
         if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(
             True
         ) > 1:
-            rank_zero_warn(f"more than one device specifc flag has been set")
+            rank_zero_warn("more than one device specifc flag has been set")
         self._device_flag = deprecated_devices_specific_flag
 
         if not self._accelerator_flag:
@@ -373,7 +376,7 @@ def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
                 self._accelerator_flag = "tpu"
             if ipus:
                 self._accelerator_flag = "ipu"
-        #### delete end
+        # --- delete end
 
     def _choose_accelerator(self):
         if self._accelerator_flag == "auto":
@@ -391,13 +394,13 @@ def _choose_accelerator(self):
         else:
             self._accelerator_flag = "cpu"
 
-    def _check_device_availibility(self):
-        for accelerator_flag, available in zip(
-            self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, _GPU_AVAILABLE, True]
-        ):
-            if self._accelerator_flag == accelerator_flag:
-                if not available:
-                    raise DeviceNotAvailibleException(f"{accelerator_flag} not avalible")
+    # def _check_device_availibility(self):
+    #     for accelerator_flag, available in zip(
+    #         self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, _GPU_AVAILABLE, True]
+    #     ):
+    #         if self._accelerator_flag == accelerator_flag:
+    #             if not available:
+    #                 raise DeviceNotAvailibleException(f"{accelerator_flag} not avalible")
 
     # TODO in progress for setting up devices
     def _set_parallel_devices_and_init_accelerator(self):
@@ -493,7 +496,12 @@ def _choose_strategy(self):
             elif TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks():
                 self._strategy_flag = "ddp"
             elif len(self._parallel_devices) <= 1:
-                device = torch.device("cuda") if self._accelerator_flag == "gpu" else "cpu"
+                # device = torch.device("cuda", 1) if self._accelerator_flag == "gpu" else "cpu"
+                device = (
+                    device_parser.determine_root_gpu_device(self._parallel_devices)
+                    if self._accelerator_flag == "gpu"
+                    else "cpu"
+                )
                 self._strategy_flag = SingleDeviceStrategy(device=device)
             elif len(self._parallel_devices) > 1:
                 self._strategy_flag = "ddp_spawn"
@@ -583,14 +591,17 @@ def _check_capatibility_and_init_precision(self):
                 device = "cpu" if self._accelerator_flag == "cpu" else "cuda"
 
                 if isinstance(self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy)):
-                    return ShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
+                    self.precision_plugin = ShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
                 if isinstance(self.strategy, DDPFullyShardedStrategy):
-                    return FullyShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
-                return NativeMixedPrecisionPlugin(self._precision_flag, device)
+                    self.precision_plugin = FullyShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
+                self.precision_plugin = NativeMixedPrecisionPlugin(self._precision_flag, device)
 
+            if self._amp_type_flag == AMPType.APEX:
                 self._amp_level_flag = self._amp_level_flag or "O2"
                 self.precision_plugin = ApexMixedPrecisionPlugin(self._amp_level_flag)
-        self.precision_plugin = PrecisionPlugin()
+
+        if not self.precision_plugin:
+            self.precision_plugin = PrecisionPlugin()
 
     def _precision_misconfig_check(self):
 
@@ -613,10 +624,9 @@ def _precision_misconfig_check(self):
             )
         if self._precision_flag == "bf16" and self._amp_type_flag != AMPType.NATIVE:
             raise MisconfigurationException(
-                f"You passed `Trainer(amp_type={self._amp_type_flag.value!r}, precision='bf16')` but it's not supported."
-                " Try using `amp_type='native'` instead."
+                f"You passed `Trainer(amp_type={self._amp_type_flag.value!r}, precision='bf16')` but "
+                "it's not supported. Try using `amp_type='native'` instead."
             )
-
         if self._precision_flag in (16, "bf16") and self._amp_type_flag == AMPType.APEX:
             if isinstance(self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy, DDPFullyShardedStrategy)):
                 raise MisconfigurationException(
@@ -676,7 +686,7 @@ def num_nodes(self):
 
     @property
     def num_processes(self):
-        return self.devices
+        return self.devices if self.devices is not None else 1
 
     @property
     def root_gpu(self) -> Optional[int]:
@@ -707,6 +717,7 @@ def num_gpus(self) -> int:
     def gpus(self):
         return self._gpus if isinstance(self.accelerator, GPUAccelerator) else None
 
+    @property
     def is_distributed(self):
         # Used for custom plugins.
         # Custom plugins should implement is_distributed property.
@@ -728,12 +739,19 @@ def is_distributed(self):
             is_distributed |= self.strategy.is_distributed
         return is_distributed
 
+    @property
     def has_ipu(self):
         return isinstance(self.accelerator, IPUAccelerator)
 
+    @property
+    def use_ipu(self):
+        return self.has_ipu
+
+    @property
     def has_tpu(self):
         return isinstance(self.accelerator, TPUAccelerator)
 
+    @property
     def use_dp(self):
         return isinstance(self.strategy, DataParallelStrategy)
 

From c01aee5af7c4fc39c108a801e9f0abbe66c3a337 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Tue, 25 Jan 2022 16:45:40 -0800
Subject: [PATCH 06/69] remove print

---
 .../trainer/connectors/accelerator_connector.py             | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 344d32dcfb310..9eaab976b8a38 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -135,7 +135,7 @@ def __init__(
         # --Parsing_flags------------------------------------------------------
         # Get registered strategies, existing accelerators and precision plugins
         self._existing_strategies_str = StrategyRegistry.available_strategies()
-        print(self._existing_strategies_str)
+        # print(self._existing_strategies_str)
         self._existing_accelerator_type = ["tpu", "ipu", "gpu", "cpu"]
         self._supported_precision = PrecisionType.supported_types()
 
@@ -264,7 +264,6 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
         if plugins:
             plugins = [plugins] if not isinstance(plugins, list) else plugins
             for plugin in plugins:
-                print(plugin)
                 if isinstance(plugin, Strategy) or isinstance(plugin, str) and plugin in self._existing_strategies_str:
                     self._strategy_flag = plugin
                     rank_zero_deprecation(
@@ -321,7 +320,6 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
 
         amp_type = amp_type.lower() if isinstance(amp_type, str) else None
         self._amp_type_flag = AMPType.from_str(amp_type)
-        print(f"a:{amp_type}, b{self._amp_type_flag}")
 
         if amp_level is not None and self._amp_type_flag != AMPType.APEX:
             raise MisconfigurationException(
@@ -535,14 +533,12 @@ def _strategy_fallbacks(self):
             self._strategy_flag = _strategy_flag
 
     def _init_strategy(self):
-        print(self._strategy_flag)
         if isinstance(self._strategy_flag, str):
             self.strategy = StrategyRegistry.get(self._strategy_flag)
         else:
             self.strategy = self._strategy_flag
 
     def _check_capatibility_and_init_precision(self):
-        print(self._precision_flag)
         self._precision_misconfig_check()
         if isinstance(self._precision_flag, PrecisionPlugin):
             self.precision_plugin = self._precision_flag

From d45eba0b56ca934f7fdbdccca3e9c424caf51aa5 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Wed, 26 Jan 2022 17:17:46 -0800
Subject: [PATCH 07/69] fix more tests

---
 .../connectors/accelerator_connector.py       |  82 ++++++++----
 .../test_accelerator_connector.py             |  10 +-
 tests/trainer/test_trainer.py                 | 123 +++++++-----------
 3 files changed, 112 insertions(+), 103 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 9eaab976b8a38..e89cb5d2ebf48 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -56,6 +56,7 @@
     DDPStrategy,
     DeepSpeedStrategy,
     HorovodStrategy,
+    ParallelStrategy,
     SingleDeviceStrategy,
     SingleTPUStrategy,
     Strategy,
@@ -72,7 +73,7 @@
 )
 from pytorch_lightning.utilities.enums import PrecisionType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.imports import _GPU_AVAILABLE, _HOROVOD_AVAILABLE, _IPU_AVAILABLE, _TPU_AVAILABLE
+from pytorch_lightning.utilities.imports import _HOROVOD_AVAILABLE, _IPU_AVAILABLE, _TPU_AVAILABLE
 
 log = logging.getLogger(__name__)
 
@@ -131,7 +132,7 @@ def __init__(
             C. When multiple flag set to the same thing? (ignore? not handled for now)
 
         """
-
+        torch.backends.cudnn.benchmark = benchmark
         # --Parsing_flags------------------------------------------------------
         # Get registered strategies, existing accelerators and precision plugins
         self._existing_strategies_str = StrategyRegistry.available_strategies()
@@ -164,7 +165,7 @@ def __init__(
         if self._strategy_flag is None:
             self._choose_strategy()
         # Reset strategy even user has specificed one
-        self._strategy_fallbacks()
+        self._strategy_check_and_fallbacks()
         self._init_strategy()
 
         # --Precision----------------------------------------------------------------
@@ -196,6 +197,9 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
             self._amp_level_flag,
             self._amp_type_flag,
         ) = (None, None, None, None, None, amp_type, amp_level)
+        if plugins:
+            plugins = [plugins] if not isinstance(plugins, list) else plugins
+
         if strategy:
             self._strategy_flag = strategy
             if strategy == "ddp_cpu":
@@ -210,24 +214,28 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                 )
             # handle duplications and conflict
             if isinstance(accelerator, Strategy) and strategy != accelerator:
-                raise MisconfigurationException("strategy already set through strategy flag, duplicated in accelerator")
+                raise MisconfigurationException(
+                    "strategy already set through strategy flag, but have also passed in through accelerator"
+                )
             if (
                 isinstance(accelerator, str)
                 and accelerator in self._existing_strategies_str
                 and strategy != accelerator
             ):
                 raise MisconfigurationException(
-                    "strategy str already set through strategy flag, duplicated in accelerator"
+                    "strategy str already set through strategy flag, but have also passed in through accelerator"
                 )
             if plugins:
                 for plugin in plugins:
-                    if isinstance(plugin, Strategy) and strategy != plugin:
+                    if isinstance(plugin, Strategy):
                         raise MisconfigurationException(
-                            "strategy already set through strategy flag, duplicated in plugins"
+                            f"You have passed `Trainer(strategy)`"
+                            f" and you can only specify one strategy, but you have passed {plugin} as a plugin."
                         )
                     if isinstance(plugin, str) and plugin in self._existing_strategies_str:
                         raise MisconfigurationException(
-                            "strategy already set through strategy flag, duplicated in plugins"
+                            f"You have passed `Trainer(strategy)`"
+                            f" and you can only specify one strategy, but you have passed {plugin} as a plugin."
                         )
 
         if (
@@ -262,7 +270,6 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                         raise MisconfigurationException("precision set in both precision flag and plugin flag")
 
         if plugins:
-            plugins = [plugins] if not isinstance(plugins, list) else plugins
             for plugin in plugins:
                 if isinstance(plugin, Strategy) or isinstance(plugin, str) and plugin in self._existing_strategies_str:
                     self._strategy_flag = plugin
@@ -334,13 +341,13 @@ def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_proce
             self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
 
         self._device_flag = devices
-        # --- to be deleted v1.7
+        # Delete when remove num_processes, gpus, ipus and tpu_cores
         deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
         if deprecated_devices_specific_flag:
             self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
                 devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
             )
-        # --- deleted end
+        # Delete end
         if devices == "auto":
             if self._accelerator_flag is None:
                 raise MisconfigurationException(
@@ -351,7 +358,6 @@ def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_proce
     def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
         self, devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
     ):
-        # ---- to be deleted v1.7vbg
         # set devices base on num_processes, gpus, ipus, tpu_cores
         if devices:
             rank_zero_warn(
@@ -366,15 +372,14 @@ def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
 
         if not self._accelerator_flag:
             # set accelerator type base on num_processes, gpus, ipus, tpu_cores
-            if num_processes:
-                self._accelerator_flag = "cpu"
-            if gpus:
-                self._accelerator_flag = "gpu"
-            if tpu_cores:
-                self._accelerator_flag = "tpu"
             if ipus:
                 self._accelerator_flag = "ipu"
-        # --- delete end
+            if tpu_cores:
+                self._accelerator_flag = "tpu"
+            if gpus:
+                self._accelerator_flag = "gpu"
+            if num_processes:
+                self._accelerator_flag = "cpu"
 
     def _choose_accelerator(self):
         if self._accelerator_flag == "auto":
@@ -382,7 +387,7 @@ def _choose_accelerator(self):
                 self._accelerator_flag = "tpu"
             elif _IPU_AVAILABLE:
                 self._accelerator_flag = "ipu"
-            elif _GPU_AVAILABLE:
+            elif torch.cuda.is_available() and torch.cuda.device_count() > 0:
                 self._accelerator_flag = "gpu"
             else:
                 self._accelerator_flag = "cpu"
@@ -487,7 +492,7 @@ def _choose_strategy(self):
             if self._parallel_devices and len(self._parallel_devices) > 1:
                 self._strategy_flag = "tpu_spawn"
             else:
-                self._srategy_flag = SingleTPUStrategy()
+                self._srategy_flag = SingleTPUStrategy(device=self._parallel_devices[0])
         else:
             if self._num_nodes_flag > 1:
                 self._strategy_flag = "ddp"
@@ -506,8 +511,10 @@ def _choose_strategy(self):
             else:
                 self._strategy_flag = "ddp"
 
-    def _strategy_fallbacks(self):
+    def _strategy_check_and_fallbacks(self):
+        # fallback apply to user pass in object as well, so get the _strategy_flag first
         _strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag
+
         if _strategy_flag == "ddp_cpu":
             if _TPU_AVAILABLE:
                 raise MisconfigurationException(
@@ -529,6 +536,12 @@ def _strategy_fallbacks(self):
         if _strategy_flag in ("dp", "ddp2") and self._accelerator_flag == "cpu":
             rank_zero_warn(f"{_strategy_flag!r} is not supported on CPUs, hence setting `strategy='ddp'`.")
             _strategy_flag = "ddp"
+        if isinstance(self.accelerator, TPUAccelerator) and "tpu" not in _strategy_flag:
+            raise ValueError(
+                "The `TPUAccelerator` can only be used with a `SingleTPUStrategy` or `TPUSpawnStrategy`,"
+                f" found {_strategy_flag}."
+            )
+
         if _strategy_flag:
             self._strategy_flag = _strategy_flag
 
@@ -600,7 +613,7 @@ def _check_capatibility_and_init_precision(self):
             self.precision_plugin = PrecisionPlugin()
 
     def _precision_misconfig_check(self):
-
+        # TODO change exception type to ImpactableConfigurationException
         if self._accelerator_flag == "ipu":
             if self._precision_flag not in (16, 32):
                 raise MisconfigurationException(
@@ -612,6 +625,13 @@ def _precision_misconfig_check(self):
                 " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`"
                 " requesting this feature."
             )
+        if self._accelerator_flag == "tpu" and isinstance(
+            self._precision_flag, (TPUPrecisionPlugin, TPUBf16PrecisionPlugin)
+        ):
+            raise ValueError(
+                f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`,"
+                f" found: {self.strategy.precision_plugin}."
+            )
         if self._precision_flag == 16 and self._accelerator_flag == "cpu" and self._amp_type_flag == AMPType.APEX:
             # apex was explicitly passed, not a good idea to silently switch to native AMP
             raise MisconfigurationException(
@@ -694,7 +714,12 @@ def root_gpu(self) -> Optional[int]:
 
     @property
     def devices(self):
-        return len(self._parallel_devices)
+        if isinstance(self.strategy, SingleDeviceStrategy):
+            return 1
+        elif isinstance(self.strategy, ParallelStrategy):
+            return len(self.strategy.parallel_devices)
+        else:
+            return 0
 
     @property
     def tpu_cores(self) -> int:
@@ -706,13 +731,20 @@ def ipus(self) -> int:
 
     @property
     def num_gpus(self) -> int:
-        return self.devices
+        if isinstance(self.accelerator, GPUAccelerator):
+            return self.devices
+        else:
+            return 0
 
     # def parallel_device_ids():
     @property
     def gpus(self):
         return self._gpus if isinstance(self.accelerator, GPUAccelerator) else None
 
+    @property
+    def parallel_device_ids(self):
+        return [i for i in range(len(self.parallel_devices))]
+
     @property
     def is_distributed(self):
         # Used for custom plugins.
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 8617b5a2c8095..a6b65e9542f0c 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -601,8 +601,9 @@ def test_exception_when_strategy_used_with_accelerator():
 
 
 def test_exception_when_strategy_used_with_plugins():
-    with pytest.raises(MisconfigurationException, match="only specify one training type plugin, but you have passed"):
-        Trainer(plugins="ddp_find_unused_parameters_false", strategy="ddp_spawn")
+    with pytest.raises(MisconfigurationException, match="only specify one strategy, but you have passed"):
+        with pytest.deprecated_call(match=r"`strategy` to the `plugins` flag in Trainer has been deprecated"):
+            Trainer(plugins="ddp_find_unused_parameters_false", strategy="ddp_spawn")
 
 
 def test_exception_invalid_strategy():
@@ -898,13 +899,14 @@ def test_unsupported_tpu_choice(monkeypatch):
     with pytest.raises(MisconfigurationException, match=r"accelerator='tpu', precision=64\)` is not implemented"):
         Trainer(accelerator="tpu", precision=64)
 
+    # if user haven't set strategy, accelerator_connector will choose the TPUSingleStrategy or TPUSpawnStrategy
     with pytest.raises(ValueError, match="TPUAccelerator` can only be used with a `SingleTPUStrategy`"):
         with pytest.warns(UserWarning, match=r"accelerator='tpu', precision=16\)` but native AMP is not supported"):
-            Trainer(accelerator="tpu", precision=16)
+            Trainer(accelerator="tpu", precision=16, strategy="ddp")
 
     with pytest.raises(ValueError, match="TPUAccelerator` can only be used with a `SingleTPUStrategy`"):
         with pytest.warns(UserWarning, match=r"accelerator='tpu', precision=16\)` but apex AMP is not supported"):
-            Trainer(accelerator="tpu", precision=16, amp_backend="apex")
+            Trainer(accelerator="tpu", precision=16, amp_backend="apex", strategy="single_device")
 
 
 def test_unsupported_ipu_choice(monkeypatch):
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 587ff0b7b9f72..32aa94b8e0b2c 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -1177,81 +1177,75 @@ def val_dataloader(self):
     [
         (
             dict(accelerator=None, gpus=None),
-            dict(_strategy_type=None, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=1),
+            dict(_strategy_type="single_device", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="dp", gpus=None),
-            dict(_strategy_type=None, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=1),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="ddp", gpus=None),
-            dict(_strategy_type=None, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=1),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="ddp", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="ddp", num_nodes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=1),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=None),
-            dict(
-                _strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2
-            ),
+            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="ddp2", gpus=None),
-            dict(_strategy_type=None, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=1),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator=None, gpus=1),
-            dict(_strategy_type=None, _device_type=_AcceleratorType.GPU, num_gpus=1, num_processes=1),
+            dict(_strategy_type="single_device", _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(accelerator="dp", gpus=1),
-            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=1, num_processes=1),
+            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(accelerator="ddp", gpus=1),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=1, num_processes=1),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=1),
-            dict(
-                _strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2
-            ),
+            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="ddp2", gpus=1),
-            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=1, num_processes=1),
+            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(accelerator=None, gpus=2),
-            dict(
-                _strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=2
-            ),
+            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(accelerator="dp", gpus=2),
-            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1),
+            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(accelerator="ddp", gpus=2),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=2),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(accelerator="ddp2", gpus=2),
-            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1),
+            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(accelerator="ddp2", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="dp", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
     ],
 )
@@ -1264,9 +1258,9 @@ def test_trainer_config(trainer_kwargs, expected, monkeypatch):
     else:
         with pytest.deprecated_call(match=r"accelerator='.*'\)` has been deprecated in v1.5"):
             trainer = Trainer(**trainer_kwargs)
-    assert len(expected) == 4
+    assert len(expected) == 3
     for k, v in expected.items():
-        assert getattr(trainer, k) == v, f"Failed {k}: {v}"
+        assert getattr(trainer, k) == v, f"Failed on {trainer_kwargs}, where {k}={ getattr(trainer, k)}, not {v}"
 
 
 def test_trainer_subclassing():
@@ -2103,122 +2097,107 @@ def training_step(self, batch, batch_idx):
     [
         (
             dict(strategy=None, gpus=None),
-            dict(_strategy_type=None, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=1),
+            dict(_strategy_type="single_device", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="dp", gpus=None),
-            dict(_strategy_type=None, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=1),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="ddp", gpus=None),
-            dict(_strategy_type=None, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=1),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="ddp", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="ddp", num_nodes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=1),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="ddp2", gpus=None),
-            dict(_strategy_type=None, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=1),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy=None, gpus=1),
-            dict(_strategy_type=None, _device_type=_AcceleratorType.GPU, num_gpus=1, num_processes=1),
+            dict(_strategy_type="single_device", _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(strategy="dp", gpus=1),
-            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=1, num_processes=1),
+            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(strategy="ddp", gpus=1),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=1, num_processes=1),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(strategy="ddp_spawn", gpus=1),
-            dict(
-                _strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=1, num_processes=1
-            ),
+            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(strategy="ddp2", gpus=1),
-            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=1, num_processes=1),
+            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(strategy=None, gpus=2),
-            dict(
-                _strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=2
-            ),
+            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy="dp", gpus=2),
-            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1),
+            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy="ddp", gpus=2),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=2),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy="ddp2", gpus=2),
-            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1),
+            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy="ddp2", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="dp", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="ddp_spawn", num_processes=2, gpus=None),
-            dict(
-                _strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2
-            ),
+            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="ddp_spawn", num_processes=1, gpus=None),
-            dict(_strategy_type=None, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=1),
+            dict(_strategy_type="ddp_spawn", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="ddp_fully_sharded", gpus=1),
-            dict(
-                _strategy_type=_StrategyType.DDP_FULLY_SHARDED,
-                _device_type=_AcceleratorType.GPU,
-                num_gpus=1,
-                num_processes=1,
-            ),
+            dict(_strategy_type=_StrategyType.DDP_FULLY_SHARDED, _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(strategy=DDPSpawnStrategy(), num_processes=2, gpus=None),
-            dict(
-                _strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2
-            ),
+            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy=DDPSpawnStrategy(), gpus=2),
-            dict(
-                _strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1
-            ),
+            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy=DDPStrategy(), num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy=DDPStrategy(), gpus=2),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1),
+            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy=DDP2Strategy(), gpus=2),
-            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1),
+            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy=DataParallelStrategy(), gpus=2),
-            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1),
+            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy=DDPFullyShardedStrategy(), gpus=2),
@@ -2226,7 +2205,6 @@ def training_step(self, batch, batch_idx):
                 _strategy_type=_StrategyType.DDP_FULLY_SHARDED,
                 _device_type=_AcceleratorType.GPU,
                 num_gpus=2,
-                num_processes=1,
             ),
         ),
         (
@@ -2235,14 +2213,11 @@ def training_step(self, batch, batch_idx):
                 _strategy_type=_StrategyType.DDP_SHARDED_SPAWN,
                 _device_type=_AcceleratorType.GPU,
                 num_gpus=2,
-                num_processes=1,
             ),
         ),
         (
             dict(strategy=DDPShardedStrategy(), gpus=2),
-            dict(
-                _strategy_type=_StrategyType.DDP_SHARDED, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1
-            ),
+            dict(_strategy_type=_StrategyType.DDP_SHARDED, _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
     ],
 )
@@ -2251,6 +2226,6 @@ def test_trainer_config_strategy(trainer_kwargs, expected, monkeypatch):
         monkeypatch.setattr(torch.cuda, "is_available", lambda: True)
         monkeypatch.setattr(torch.cuda, "device_count", lambda: trainer_kwargs["gpus"])
     trainer = Trainer(**trainer_kwargs)
-    assert len(expected) == 4
+    assert len(expected) == 3
     for k, v in expected.items():
-        assert getattr(trainer, k) == v, f"Failed {k}: {v}"
+        assert getattr(trainer, k) == v, f"Failed on {trainer_kwargs}, where {k}={ getattr(trainer, k)}, not {v}"

From ec17b316d8034f161458fe45c8e282679b395762 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Thu, 27 Jan 2022 12:13:05 -0800
Subject: [PATCH 08/69] change trainer.gpus

---
 .../trainer/connectors/accelerator_connector.py   | 15 +++++++++------
 pytorch_lightning/trainer/trainer.py              |  2 +-
 tests/models/test_gpu.py                          |  1 +
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index e89cb5d2ebf48..9dd1ee343bc1b 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -133,6 +133,7 @@ def __init__(
 
         """
         torch.backends.cudnn.benchmark = benchmark
+        self._gpus = gpus
         # --Parsing_flags------------------------------------------------------
         # Get registered strategies, existing accelerators and precision plugins
         self._existing_strategies_str = StrategyRegistry.available_strategies()
@@ -175,7 +176,7 @@ def __init__(
         self._lazy_init_strategy()
 
         # set properties not used in accelerator_connector. TODO move out of this file
-        # self.gpus = gpus or devices
+
         self.replace_sampler_ddp = replace_sampler_ddp
 
     def _config_check_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level):
@@ -343,12 +344,12 @@ def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_proce
         self._device_flag = devices
         # Delete when remove num_processes, gpus, ipus and tpu_cores
         deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
-        if deprecated_devices_specific_flag:
+        if deprecated_devices_specific_flag and deprecated_devices_specific_flag not in (0, "0"):
             self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
                 devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
             )
         # Delete end
-        if devices == "auto":
+        if self._device_flag == "auto":
             if self._accelerator_flag is None:
                 raise MisconfigurationException(
                     f"You passed `devices={devices}` but haven't specified"
@@ -364,6 +365,7 @@ def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
                 f"The flag `devices={devices}` will be ignored, "
                 f"instand the device specific number {deprecated_devices_specific_flag} will be used"
             )
+        gpus = int(gpus) if isinstance(gpus, str) and gpus.isnumeric() else gpus
         if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(
             True
         ) > 1:
@@ -448,7 +450,7 @@ def _set_parallel_devices_and_init_accelerator(self):
                 )
             self._parallel_devices = [torch.device("cpu")] * self._device_flag
 
-        self._gpus = self._device_flag
+        self._gpus = self._device_flag if not self._gpus else self._gpus
 
     def _choose_and_init_cluster_environment(self):
         self.cluster_environment = LightningEnvironment()
@@ -739,11 +741,12 @@ def num_gpus(self) -> int:
     # def parallel_device_ids():
     @property
     def gpus(self):
-        return self._gpus if isinstance(self.accelerator, GPUAccelerator) else None
+        return self._gpus
+        # if isinstance(self.accelerator, GPUAccelerator) else 0
 
     @property
     def parallel_device_ids(self):
-        return [i for i in range(len(self.parallel_devices))]
+        return [i for i in range(len(self.parallel_devices))] if isinstance(self.accelerator, GPUAccelerator) else None
 
     @property
     def is_distributed(self):
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index bd648dd99d332..70f72bd2488cc 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -2006,7 +2006,7 @@ def devices(self) -> Optional[Union[List[int], str, int]]:
 
     @property
     def data_parallel_device_ids(self) -> Optional[List[int]]:
-        return self._accelerator_connector.parallel_devices
+        return self._accelerator_connector.parallel_device_ids
 
     @property
     def lightning_module(self) -> "pl.LightningModule":
diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index c494c0c1c18e6..190936096ddef 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -243,6 +243,7 @@ def test_torchelastic_gpu_parsing(mocked_device_count, mocked_is_available, gpus
     trainer = Trainer(gpus=gpus)
     assert isinstance(trainer._accelerator_connector.cluster_environment, TorchElasticEnvironment)
     assert trainer._accelerator_connector.parallel_device_ids == device_parser.parse_gpu_ids(gpus)
+
     assert trainer.gpus == gpus
 
 

From ffeea284e3acde0a9982879160d9262e699ebc31 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Fri, 28 Jan 2022 12:58:53 -0800
Subject: [PATCH 09/69] fix tests

---
 .../connectors/accelerator_connector.py       | 176 +++++++++---------
 .../test_accelerator_connector.py             |   8 +-
 tests/accelerators/test_ipu.py                |   5 +-
 tests/accelerators/test_tpu.py                |  13 +-
 4 files changed, 102 insertions(+), 100 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 9dd1ee343bc1b..527b64b5625e1 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -133,7 +133,7 @@ def __init__(
 
         """
         torch.backends.cudnn.benchmark = benchmark
-        self._gpus = gpus
+
         # --Parsing_flags------------------------------------------------------
         # Get registered strategies, existing accelerators and precision plugins
         self._existing_strategies_str = StrategyRegistry.available_strategies()
@@ -154,9 +154,9 @@ def __init__(
         # handle `auto` and `None`
         if self._accelerator_flag == "auto" or self._accelerator_flag is None:
             self._choose_accelerator()
-        # else:
-        #     # [RFC] move to XAccelerator class init?
-        #     self._check_device_availibility()
+        else:
+            #     # [RFC] move to XAccelerator class init?
+            self._check_device_availibility()
         self._set_parallel_devices_and_init_accelerator()
 
         # --Cluster_environment-----------------------------------------------------
@@ -170,13 +170,11 @@ def __init__(
         self._init_strategy()
 
         # --Precision----------------------------------------------------------------
-        self._check_capatibility_and_init_precision()
+        self.precision_plugin = self._check_capatibility_and_init_precision()
 
         # --Strategy Part 2 : init Strategy and set Strategy properties -------------
         self._lazy_init_strategy()
 
-        # set properties not used in accelerator_connector. TODO move out of this file
-
         self.replace_sampler_ddp = replace_sampler_ddp
 
     def _config_check_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level):
@@ -239,23 +237,24 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                             f" and you can only specify one strategy, but you have passed {plugin} as a plugin."
                         )
 
-        if (
-            accelerator in self._existing_accelerator_type
-            or accelerator == "auto"
-            or isinstance(accelerator, Accelerator)
-        ):
-            self._accelerator_flag = accelerator
-        elif accelerator in self._existing_strategies_str or isinstance(accelerator, Strategy):
-            rank_zero_deprecation(
-                f"Passing `Trainer(accelerator={accelerator!r})` has been deprecated"
-                f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
-            )
-            self._strategy_flag = accelerator
-        elif accelerator == "ddp_cpu":
-            rank_zero_warn(
-                "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
-            )
-            self._strategy_flag = accelerator
+        if accelerator:
+            if (
+                accelerator in self._existing_accelerator_type
+                or accelerator == "auto"
+                or isinstance(accelerator, Accelerator)
+            ):
+                self._accelerator_flag = accelerator
+            elif accelerator in self._existing_strategies_str or isinstance(accelerator, Strategy):
+                rank_zero_deprecation(
+                    f"Passing `Trainer(accelerator={accelerator!r})` has been deprecated"
+                    f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
+                )
+                self._strategy_flag = accelerator
+            elif accelerator == "ddp_cpu":
+                rank_zero_warn(
+                    "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
+                )
+                self._strategy_flag = accelerator
 
         if precision:
             if not PrecisionType.supported_type(precision):
@@ -265,10 +264,12 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                 )
             self._precision_flag = precision
             # handle duplications and conflict
-            if plugins:
-                for plugin in plugins:
-                    if isinstance(plugin, PrecisionPlugin):
-                        raise MisconfigurationException("precision set in both precision flag and plugin flag")
+            # [RFC] current logic doesn't handle precision_plugin duplication
+            # if plugins:
+            #     for plugin in plugins:
+            #         if isinstance(plugin, PrecisionPlugin):
+            #             self._precision_flag = precision
+            # raise MisconfigurationException("precision set in both precision flag and plugin flag")
 
         if plugins:
             for plugin in plugins:
@@ -279,10 +280,8 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                         f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={plugin})` instead."
                     )
 
-                elif (
-                    isinstance(plugin, PrecisionPlugin)
-                    or isinstance(plugin, str)
-                    and plugin in self._supported_precision
+                elif isinstance(plugin, PrecisionPlugin) or (
+                    isinstance(plugin, str) and plugin in self._supported_precision
                 ):
                     self._precision_flag = plugin
                 elif isinstance(plugin, CheckpointIO):
@@ -343,6 +342,7 @@ def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_proce
 
         self._device_flag = devices
         # Delete when remove num_processes, gpus, ipus and tpu_cores
+        self._gpus = gpus
         deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
         if deprecated_devices_specific_flag and deprecated_devices_specific_flag not in (0, "0"):
             self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
@@ -399,18 +399,20 @@ def _choose_accelerator(self):
         else:
             self._accelerator_flag = "cpu"
 
-    # def _check_device_availibility(self):
-    #     for accelerator_flag, available in zip(
-    #         self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, _GPU_AVAILABLE, True]
-    #     ):
-    #         if self._accelerator_flag == accelerator_flag:
-    #             if not available:
-    #                 raise DeviceNotAvailibleException(f"{accelerator_flag} not avalible")
+    # TODO move this to xAccelerator
+    def _check_device_availibility(self):
+        for accelerator_flag, available in zip(
+            self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, torch.cuda.is_available(), True]
+        ):
+            # only apply to gpu to keep backward compatibility
+            if self._accelerator_flag == accelerator_flag == "gpu":
+                if not available:
+                    raise MisconfigurationException(
+                        f"You choice {accelerator_flag} accelerator, but {accelerator_flag} is not available"
+                    )
 
-    # TODO in progress for setting up devices
     def _set_parallel_devices_and_init_accelerator(self):
         self._parallel_devices = []
-
         if isinstance(self._accelerator_flag, Accelerator):
             self.accelerator = self._accelerator_flag
         elif self._accelerator_flag == "tpu":
@@ -419,6 +421,8 @@ def _set_parallel_devices_and_init_accelerator(self):
                 self._device_flag = TPUAccelerator.auto_device_count()
             if isinstance(self._device_flag, int):
                 self._parallel_devices = list(range(self._device_flag))
+            else:
+                self._parallel_devices = self._device_flag
 
         elif self._accelerator_flag == "ipu":
             self.accelerator = IPUAccelerator()
@@ -436,7 +440,7 @@ def _set_parallel_devices_and_init_accelerator(self):
                 self._parallel_devices = [
                     torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)
                 ]
-            elif isinstance(self._device_flag, list):
+            else:
                 self._parallel_devices = [torch.device("cuda", i) for i in self._device_flag]
 
         elif self._accelerator_flag == "cpu":
@@ -471,13 +475,7 @@ def _is_sharded_training_type(self) -> bool:
 
     def _is_slurm_managing_tasks(self):
         """used by choosing cluster enviroment."""
-        if (
-            # (not self._strategy_flag=="ddp" and not self._strategy_flag=="ddp2")
-            # the above logic moved to _select_strategy(), only check _is_slurm_managing_tasks()
-            # when strategy flag is ddp or ddp2
-            not SLURMEnvironment.detect()
-            or SLURMEnvironment.job_name() == "bash"
-        ):
+        if not SLURMEnvironment.detect() or SLURMEnvironment.job_name() == "bash":
             return False
 
         total_requested_devices = len(self._parallel_devices) * self._num_nodes_flag
@@ -498,10 +496,7 @@ def _choose_strategy(self):
         else:
             if self._num_nodes_flag > 1:
                 self._strategy_flag = "ddp"
-            elif TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks():
-                self._strategy_flag = "ddp"
             elif len(self._parallel_devices) <= 1:
-                # device = torch.device("cuda", 1) if self._accelerator_flag == "gpu" else "cpu"
                 device = (
                     device_parser.determine_root_gpu_device(self._parallel_devices)
                     if self._accelerator_flag == "gpu"
@@ -538,11 +533,13 @@ def _strategy_check_and_fallbacks(self):
         if _strategy_flag in ("dp", "ddp2") and self._accelerator_flag == "cpu":
             rank_zero_warn(f"{_strategy_flag!r} is not supported on CPUs, hence setting `strategy='ddp'`.")
             _strategy_flag = "ddp"
-        if isinstance(self.accelerator, TPUAccelerator) and "tpu" not in _strategy_flag:
-            raise ValueError(
-                "The `TPUAccelerator` can only be used with a `SingleTPUStrategy` or `TPUSpawnStrategy`,"
-                f" found {_strategy_flag}."
-            )
+        # Current test check precision first. So move this test to the end for now.
+        # TODO update tests and uncomment this part
+        # if isinstance(self.accelerator, TPUAccelerator) and "tpu" not in _strategy_flag:
+        #     raise ValueError(
+        #         "The `TPUAccelerator` can only be used with a `SingleTPUStrategy` or `TPUSpawnStrategy`,"
+        #         f" found {_strategy_flag}."
+        #     )
 
         if _strategy_flag:
             self._strategy_flag = _strategy_flag
@@ -552,39 +549,35 @@ def _init_strategy(self):
             self.strategy = StrategyRegistry.get(self._strategy_flag)
         else:
             self.strategy = self._strategy_flag
+        # print(self.strategy)
 
     def _check_capatibility_and_init_precision(self):
         self._precision_misconfig_check()
         if isinstance(self._precision_flag, PrecisionPlugin):
-            self.precision_plugin = self._precision_flag
-            return
+            return self._precision_flag
+        self.precision_plugin = None
 
-        if self._accelerator_flag == "ipu":
-            self.precision_plugin = IPUPrecisionPlugin(self._precision_flag)
-        if self._accelerator_flag == "tpu":
+        if isinstance(self.accelerator, IPUAccelerator):
+            return IPUPrecisionPlugin(self._precision_flag)
+        if isinstance(self.accelerator, TPUAccelerator):
             if self._precision_flag == 32:
-                self.precision_plugin = TPUPrecisionPlugin()
+                return TPUPrecisionPlugin()
             elif self._precision_flag in (16, "bf16"):
                 if self._precision_flag == 16:
-                    # this is not deprecated to ease transition between accelerator environments
                     rank_zero_warn(
                         f"You passed `Trainer(accelerator='tpu', precision=16)` but {self._amp_type_flag.value} AMP"
                         f" is not supported with TPUs. Using `precision='bf16'` instead."
                     )
-                self.precision_plugin = TPUBf16PrecisionPlugin()
+                return TPUBf16PrecisionPlugin()
         if self._strategy_flag == "deepspeed" or isinstance(self._strategy_flag, DeepSpeedStrategy):
-            self.precision_plugin = DeepSpeedPrecisionPlugin(
-                self._precision_flag, self._amp_type_flag, self._amp_level_flag
-            )
+            return DeepSpeedPrecisionPlugin(self._precision_flag, self._amp_type_flag, self._amp_level_flag)
 
         if self._precision_flag == 32:
-            self.precision_plugin = PrecisionPlugin()
+            return PrecisionPlugin()
         if self._precision_flag == 64:
-            self.precision_plugin = DoublePrecisionPlugin()
+            return DoublePrecisionPlugin()
 
-        # maybe convert the precision value
         if self._precision_flag == 16 and self._accelerator_flag == "cpu":
-            # this automatic switch is to ease transition between accelerator environments
             rank_zero_warn(
                 "You passed `Trainer(accelerator='cpu', precision=16)` but native AMP is not supported on CPU."
                 " Using `precision='bf16'` instead."
@@ -602,40 +595,44 @@ def _check_capatibility_and_init_precision(self):
                 device = "cpu" if self._accelerator_flag == "cpu" else "cuda"
 
                 if isinstance(self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy)):
-                    self.precision_plugin = ShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
+                    return ShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
                 if isinstance(self.strategy, DDPFullyShardedStrategy):
-                    self.precision_plugin = FullyShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
-                self.precision_plugin = NativeMixedPrecisionPlugin(self._precision_flag, device)
+                    return FullyShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
+                return NativeMixedPrecisionPlugin(self._precision_flag, device)
 
             if self._amp_type_flag == AMPType.APEX:
                 self._amp_level_flag = self._amp_level_flag or "O2"
-                self.precision_plugin = ApexMixedPrecisionPlugin(self._amp_level_flag)
+                return ApexMixedPrecisionPlugin(self._amp_level_flag)
 
-        if not self.precision_plugin:
-            self.precision_plugin = PrecisionPlugin()
+        raise RuntimeError("No precision set")
 
     def _precision_misconfig_check(self):
         # TODO change exception type to ImpactableConfigurationException
-        if self._accelerator_flag == "ipu":
+        if isinstance(self.accelerator, IPUAccelerator):
             if self._precision_flag not in (16, 32):
                 raise MisconfigurationException(
                     f"`Trainer(accelerator='ipu', precision={self._precision_flag!r})` is not supported."
                 )
-        if self._accelerator_flag == "tpu" and self._precision_flag == 64:
+        if isinstance(self.accelerator, TPUAccelerator) and self._precision_flag == 64:
             raise MisconfigurationException(
                 "`Trainer(accelerator='tpu', precision=64)` is not implemented."
                 " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`"
                 " requesting this feature."
             )
-        if self._accelerator_flag == "tpu" and isinstance(
-            self._precision_flag, (TPUPrecisionPlugin, TPUBf16PrecisionPlugin)
+        if (
+            isinstance(self.accelerator, TPUAccelerator)
+            and isinstance(self._precision_flag, PrecisionPlugin)
+            and not isinstance(self._precision_flag, (TPUPrecisionPlugin, TPUBf16PrecisionPlugin))
         ):
             raise ValueError(
                 f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`,"
                 f" found: {self.strategy.precision_plugin}."
             )
-        if self._precision_flag == 16 and self._accelerator_flag == "cpu" and self._amp_type_flag == AMPType.APEX:
-            # apex was explicitly passed, not a good idea to silently switch to native AMP
+        if (
+            self._precision_flag == 16
+            and isinstance(self.accelerator, CPUAccelerator)
+            and self._amp_type_flag == AMPType.APEX
+        ):
             raise MisconfigurationException(
                 "You passed `Trainer(accelerator='cpu', precision=16, amp_type='apex')`"
                 " but apex AMP not supported on CPU."
@@ -661,6 +658,8 @@ def _lazy_init_strategy(self):
         self.strategy.cluster_environment = self.cluster_environment
         if hasattr(self.strategy, "parallel_devices"):
             self.strategy.parallel_devices = self._parallel_devices
+        if hasattr(self.strategy, "num_nodes"):
+            self.strategy._num_nodes = self._num_nodes_flag
 
         from pytorch_lightning.utilities import _IS_INTERACTIVE
 
@@ -676,6 +675,13 @@ def _lazy_init_strategy(self):
             )
 
 
+        if isinstance(self.accelerator, TPUAccelerator) and not isinstance(
+            self.strategy, (SingleTPUStrategy, TPUSpawnStrategy)
+        ):
+            raise ValueError(
+                "The `TPUAccelerator` can only be used with a `SingleTPUStrategy` or `TPUSpawnStrategy`,"
+                f" found {self.strategy}."
+            )
 
     ##############################################################################
     # the following logic should be deprecated/removed, and these information should be
@@ -700,7 +706,7 @@ def device_type(self):
 
     @property
     def num_nodes(self):
-        return self._num_nodes
+        return self._num_nodes_flag
 
     @property
     def num_processes(self):
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index a6b65e9542f0c..e121d27bddd86 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -445,15 +445,17 @@ def test_accelerator_choice_multi_node_gpu(
     assert isinstance(trainer.strategy, plugin)
 
 
-@pytest.mark.skipif(torch.cuda.is_available(), reason="test doesn't require GPU")
-def test_accelerator_cpu():
+@mock.patch("torch.cuda.is_available", return_value=False)
+def test_accelerator_cpu(mack_gpu_avalible):
 
     trainer = Trainer(accelerator="cpu")
 
     assert trainer._device_type == "cpu"
     assert isinstance(trainer.accelerator, CPUAccelerator)
 
-    with pytest.raises(MisconfigurationException, match="You passed `accelerator='gpu'`, but GPUs are not available"):
+    with pytest.raises(MisconfigurationException):
+        trainer = Trainer(gpus=1)
+    with pytest.raises(MisconfigurationException):
         trainer = Trainer(accelerator="gpu")
 
     with pytest.raises(MisconfigurationException, match="You requested GPUs:"):
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 861b149733c0c..a691f4f62d983 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -505,10 +505,7 @@ def test_accelerator_ipu():
     assert trainer._device_type == "ipu"
     assert isinstance(trainer.accelerator, IPUAccelerator)
 
-    with pytest.raises(
-        MisconfigurationException, match="You passed `accelerator='ipu'`, but you didn't pass `ipus` to `Trainer`"
-    ):
-        trainer = Trainer(accelerator="ipu")
+    trainer = Trainer(accelerator="ipu")
 
     trainer = Trainer(accelerator="auto", ipus=8)
 
diff --git a/tests/accelerators/test_tpu.py b/tests/accelerators/test_tpu.py
index 608d98304c757..bec80ec9ccbc1 100644
--- a/tests/accelerators/test_tpu.py
+++ b/tests/accelerators/test_tpu.py
@@ -13,7 +13,7 @@
 # limitations under the License
 import collections
 from copy import deepcopy
-from unittest.mock import Mock, patch
+from unittest.mock import patch
 
 import pytest
 import torch
@@ -23,7 +23,7 @@
 from pytorch_lightning import Trainer
 from pytorch_lightning.accelerators.cpu import CPUAccelerator
 from pytorch_lightning.accelerators.tpu import TPUAccelerator
-from pytorch_lightning.plugins import TPUPrecisionPlugin, XLACheckpointIO
+from pytorch_lightning.plugins import PrecisionPlugin, TPUPrecisionPlugin, XLACheckpointIO
 from pytorch_lightning.strategies import DDPStrategy, TPUSpawnStrategy
 from pytorch_lightning.utilities import find_shared_parameters
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -90,10 +90,7 @@ def test_accelerator_tpu():
     assert trainer._device_type == "tpu"
     assert isinstance(trainer.accelerator, TPUAccelerator)
 
-    with pytest.raises(
-        MisconfigurationException, match="You passed `accelerator='tpu'`, but you didn't pass `tpu_cores` to `Trainer`"
-    ):
-        trainer = Trainer(accelerator="tpu")
+    trainer = Trainer(accelerator="tpu")
 
 
 @RunIf(tpu=True)
@@ -290,7 +287,7 @@ def forward(self, x):
 
 
 def test_tpu_invalid_raises():
-    training_type_plugin = TPUSpawnStrategy(accelerator=TPUAccelerator(), precision_plugin=Mock())
+    training_type_plugin = TPUSpawnStrategy(accelerator=TPUAccelerator(), precision_plugin=PrecisionPlugin())
     with pytest.raises(ValueError, match="TPUAccelerator` can only be used with a `TPUPrecisionPlugin"):
         Trainer(strategy=training_type_plugin)
 
@@ -301,7 +298,7 @@ def test_tpu_invalid_raises():
 
 def test_tpu_invalid_raises_set_precision_with_strategy():
     accelerator = TPUAccelerator()
-    training_type_plugin = TPUSpawnStrategy(accelerator=accelerator, precision_plugin=object())
+    training_type_plugin = TPUSpawnStrategy(accelerator=accelerator, precision_plugin=PrecisionPlugin())
     with pytest.raises(ValueError, match="`TPUAccelerator` can only be used with a `TPUPrecisionPlugin`"):
         Trainer(strategy=training_type_plugin)
 

From 57b16423f6ee7c66c5c7537a0ccc79c9767c8572 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Fri, 28 Jan 2022 13:48:44 -0800
Subject: [PATCH 10/69] remove gpu avalible check

---
 .../connectors/accelerator_connector.py       | 26 +++++++++----------
 .../test_accelerator_connector.py             |  4 +--
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 527b64b5625e1..6efc6c336c46c 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -154,9 +154,9 @@ def __init__(
         # handle `auto` and `None`
         if self._accelerator_flag == "auto" or self._accelerator_flag is None:
             self._choose_accelerator()
-        else:
-            #     # [RFC] move to XAccelerator class init?
-            self._check_device_availibility()
+        # else:
+        #     # [RFC] move to XAccelerator class init?
+        #     self._check_device_availibility()
         self._set_parallel_devices_and_init_accelerator()
 
         # --Cluster_environment-----------------------------------------------------
@@ -400,16 +400,16 @@ def _choose_accelerator(self):
             self._accelerator_flag = "cpu"
 
     # TODO move this to xAccelerator
-    def _check_device_availibility(self):
-        for accelerator_flag, available in zip(
-            self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, torch.cuda.is_available(), True]
-        ):
-            # only apply to gpu to keep backward compatibility
-            if self._accelerator_flag == accelerator_flag == "gpu":
-                if not available:
-                    raise MisconfigurationException(
-                        f"You choice {accelerator_flag} accelerator, but {accelerator_flag} is not available"
-                    )
+    # def _check_device_availibility(self):
+    #     for accelerator_flag, available in zip(
+    #         self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, torch.cuda.is_available(), True]
+    #     ):
+    #         # only apply to gpu to keep backward compatibility
+    #         if self._accelerator_flag == accelerator_flag == "gpu":
+    #             if not available:
+    #                 raise MisconfigurationException(
+    #                     f"You choice {accelerator_flag} accelerator, but {accelerator_flag} is not available"
+    #                 )
 
     def _set_parallel_devices_and_init_accelerator(self):
         self._parallel_devices = []
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index e121d27bddd86..0086992165143 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -455,8 +455,8 @@ def test_accelerator_cpu(mack_gpu_avalible):
 
     with pytest.raises(MisconfigurationException):
         trainer = Trainer(gpus=1)
-    with pytest.raises(MisconfigurationException):
-        trainer = Trainer(accelerator="gpu")
+    # with pytest.raises(MisconfigurationException):
+    #     trainer = Trainer(accelerator="gpu")
 
     with pytest.raises(MisconfigurationException, match="You requested GPUs:"):
         trainer = Trainer(accelerator="cpu", gpus=1)

From d374aa9f12b4acbe30540854a911e3c4122f4343 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Fri, 28 Jan 2022 15:20:38 -0800
Subject: [PATCH 11/69] update

---
 .../connectors/accelerator_connector.py       | 36 ++++++++++---------
 .../test_accelerator_connector.py             |  6 ++--
 tests/strategies/test_ddp_strategy.py         |  7 +++-
 3 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 6efc6c336c46c..aa88bd1c20c9a 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -133,6 +133,8 @@ def __init__(
 
         """
         torch.backends.cudnn.benchmark = benchmark
+        self.replace_sampler_ddp = replace_sampler_ddp
+        self.sync_batchnorm = sync_batchnorm
 
         # --Parsing_flags------------------------------------------------------
         # Get registered strategies, existing accelerators and precision plugins
@@ -175,8 +177,6 @@ def __init__(
         # --Strategy Part 2 : init Strategy and set Strategy properties -------------
         self._lazy_init_strategy()
 
-        self.replace_sampler_ddp = replace_sampler_ddp
-
     def _config_check_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level):
         """This method checks:
 
@@ -295,29 +295,29 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
 
         # if user pass in a strategy class which has accelerator, precision, checkpoint or cluster env set up
         if self._strategy_flag and isinstance(self._strategy_flag, Strategy):
-            if self._strategy_flag.accelerator:
+            if self._strategy_flag._accelerator:
                 if self._accelerator_flag:
                     raise MisconfigurationException(
                         "accelerator set through both strategy class and accelerator flag, choose one"
                     )
                 else:
-                    self._accelerator_flag = self._strategy_flag.accelerator
-            if self._strategy_flag.precision_plugin:
+                    self._accelerator_flag = self._strategy_flag._accelerator
+            if self._strategy_flag._precision_plugin:
                 # precision has default value 32, we can not tell whether user set it or not
                 # [RFC] remove default from trainer?
                 # if self._precision_flag:
                 #     raise MisconfigurationException("precision set through both strategy class and flags,
                 #     choose one place to set")
                 # else:
-                self._precision_flag = self._strategy_flag.precision_plugin
-            if self._strategy_flag.checkpoint_io:
+                self._precision_flag = self._strategy_flag._precision_plugin
+            if self._strategy_flag._checkpoint_io:
                 if self.checkpoint_io:
                     raise MisconfigurationException(
                         "checkpoint_io set through both strategy class and plugins, choose one"
                     )
                 else:
-                    self.checkpoint_io = self._strategy_flag.checkpoint_io
-            if getattr(self._strategy_flag, "cluster_environment", None):
+                    self.checkpoint_io = self._strategy_flag._checkpoint_io
+            if getattr(self._strategy_flag, "_cluster_environment", None):
                 if self._cluster_environment:
                     raise MisconfigurationException(
                         "cluster_environment set through both strategy class and plugins, choose one"
@@ -343,6 +343,9 @@ def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_proce
         self._device_flag = devices
         # Delete when remove num_processes, gpus, ipus and tpu_cores
         self._gpus = gpus
+        self._tpu_cores = tpu_cores
+        gpus = device_parser.parse_gpu_ids(gpus)
+        tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
         deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
         if deprecated_devices_specific_flag and deprecated_devices_specific_flag not in (0, "0"):
             self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
@@ -365,7 +368,7 @@ def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
                 f"The flag `devices={devices}` will be ignored, "
                 f"instand the device specific number {deprecated_devices_specific_flag} will be used"
             )
-        gpus = int(gpus) if isinstance(gpus, str) and gpus.isnumeric() else gpus
+
         if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(
             True
         ) > 1:
@@ -447,12 +450,13 @@ def _set_parallel_devices_and_init_accelerator(self):
             self.accelerator = CPUAccelerator()
             if self._device_flag == "auto" or not self._device_flag:
                 self._device_flag = CPUAccelerator.auto_device_count()
-            if not isinstance(self._device_flag, int):
-                raise MisconfigurationException(
+            if isinstance(self._device_flag, int):
+                self._parallel_devices = [torch.device("cpu")] * self._device_flag
+            else:
+                rank_zero_warn(
                     "The flag `devices` must be an int with `accelerator='cpu'`,"
                     f" got `devices={self._device_flag}` instead."
                 )
-            self._parallel_devices = [torch.device("cpu")] * self._device_flag
 
         self._gpus = self._device_flag if not self._gpus else self._gpus
 
@@ -549,7 +553,6 @@ def _init_strategy(self):
             self.strategy = StrategyRegistry.get(self._strategy_flag)
         else:
             self.strategy = self._strategy_flag
-        # print(self.strategy)
 
     def _check_capatibility_and_init_precision(self):
         self._precision_misconfig_check()
@@ -625,8 +628,7 @@ def _precision_misconfig_check(self):
             and not isinstance(self._precision_flag, (TPUPrecisionPlugin, TPUBf16PrecisionPlugin))
         ):
             raise ValueError(
-                f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`,"
-                f" found: {self.strategy.precision_plugin}."
+                f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`," f" found: {self._precision_flag}."
             )
         if (
             self._precision_flag == 16
@@ -660,6 +662,8 @@ def _lazy_init_strategy(self):
             self.strategy.parallel_devices = self._parallel_devices
         if hasattr(self.strategy, "num_nodes"):
             self.strategy._num_nodes = self._num_nodes_flag
+        if hasattr(self.strategy, "sync_batchnorm"):
+            self.strategy.sync_batchnorm = self.sync_batchnorm
 
         from pytorch_lightning.utilities import _IS_INTERACTIVE
 
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 0086992165143..338b6441944cb 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -575,8 +575,10 @@ def test_set_devices_if_none_gpu():
 
 def test_devices_with_cpu_only_supports_integer():
 
-    with pytest.raises(MisconfigurationException, match="The flag `devices` must be an int"):
-        Trainer(accelerator="cpu", devices="1,3")
+    with pytest.warns(UserWarning, match="The flag `devices` must be an int"):
+        trainer = Trainer(accelerator="cpu", devices="1,3")
+        assert isinstance(trainer.accelerator, CPUAccelerator)
+        assert trainer.devices == 1
 
 
 @pytest.mark.parametrize("training_type", ["ddp2", "dp"])
diff --git a/tests/strategies/test_ddp_strategy.py b/tests/strategies/test_ddp_strategy.py
index 157908309f0e6..dddeaed26d98f 100644
--- a/tests/strategies/test_ddp_strategy.py
+++ b/tests/strategies/test_ddp_strategy.py
@@ -97,7 +97,7 @@ def creates_processes_externally(self):
 
 
 @RunIf(skip_windows=True)
-def test_ddp_configure_ddp():
+def test_ddp_configure_ddp_fitting():
     """Tests with ddp strategy."""
     model = BoringModel()
     ddp_strategy = DDPStrategy()
@@ -115,6 +115,11 @@ def test_ddp_configure_ddp():
     # in DDPStrategy configure_ddp(), model wrapped by DistributedDataParallel
     assert isinstance(trainer.model, DistributedDataParallel)
 
+
+@RunIf(skip_windows=True)
+def test_ddp_configure_ddp_validating():
+    model = BoringModel()
+    ddp_strategy = DDPStrategy()
     trainer = Trainer(
         max_epochs=1,
         strategy=ddp_strategy,

From 0083b6985a59224c60fb028e523c9a5d53fac723 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Fri, 28 Jan 2022 16:10:01 -0800
Subject: [PATCH 12/69] fix horovod

---
 .../connectors/accelerator_connector.py       | 26 ++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index aa88bd1c20c9a..f3420eb536795 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -170,6 +170,8 @@ def __init__(
         # Reset strategy even user has specificed one
         self._strategy_check_and_fallbacks()
         self._init_strategy()
+        if _HOROVOD_AVAILABLE and isinstance(self.strategy, HorovodStrategy):
+            self.handle_horovod
 
         # --Precision----------------------------------------------------------------
         self.precision_plugin = self._check_capatibility_and_init_precision()
@@ -530,7 +532,7 @@ def _strategy_check_and_fallbacks(self):
                 rank_zero_warn(
                     "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
                 )
-        if "ddp_spawn" in _strategy_flag and (
+        if _strategy_flag in ("ddp_spawn", "ddp_spawn_find_unused_parameters_false") and (
             TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks()
         ):
             _strategy_flag = "ddp"
@@ -554,6 +556,28 @@ def _init_strategy(self):
         else:
             self.strategy = self._strategy_flag
 
+    def handle_horovod(self):
+        if self._num_nodes_flag > 1:
+            raise MisconfigurationException(
+                "Horovod does not support setting num_nodes / num_gpus explicitly. Use "
+                "horovodrun / mpirun to configure the number of processes."
+            )
+
+        if isinstance(self.strategy, HorovodStrategy) and not _HOROVOD_AVAILABLE:
+            raise MisconfigurationException(
+                'Requested `accelerator="horovod"`, but Horovod is not installed.'
+                "Install with \n $HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]"
+            )
+
+        import horovod.torch as hvd
+
+        hvd.init()
+        if isinstance(self.accelerator, GPUAccelerator):
+            # Horovod assigns one local GPU per process
+            self._parallel_device = list(range(hvd.local_size()))
+        else:
+            self._parallel_device = hvd.local_size()
+
     def _check_capatibility_and_init_precision(self):
         self._precision_misconfig_check()
         if isinstance(self._precision_flag, PrecisionPlugin):

From 7a5c3ba9cddbb7e2c1f72e49818b703d2a47a737 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Fri, 28 Jan 2022 16:32:17 -0800
Subject: [PATCH 13/69] fix horovod

---
 .../trainer/connectors/accelerator_connector.py              | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index f3420eb536795..62da8bf9e3a0b 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -77,6 +77,9 @@
 
 log = logging.getLogger(__name__)
 
+if _HOROVOD_AVAILABLE:
+    import horovod.torch as hvd
+
 
 class AcceleratorConnector:
     def __init__(
@@ -569,8 +572,6 @@ def handle_horovod(self):
                 "Install with \n $HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]"
             )
 
-        import horovod.torch as hvd
-
         hvd.init()
         if isinstance(self.accelerator, GPUAccelerator):
             # Horovod assigns one local GPU per process

From ca96f841682f3fa379c62fdd1a2648b467c852cc Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Fri, 28 Jan 2022 16:38:37 -0800
Subject: [PATCH 14/69] debug tpu

---
 .../trainer/connectors/accelerator_connector.py          | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 62da8bf9e3a0b..1b25a0d3a2dac 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -492,9 +492,6 @@ def _is_slurm_managing_tasks(self):
         return num_slurm_tasks == total_requested_devices
 
     def _choose_strategy(self):
-        if _HOROVOD_AVAILABLE and ("OMPI_COMM_WORLD_RANK" in os.environ or "HOROVOD_RANK" in os.environ):
-            self._strategy_flag = HorovodStrategy()
-
         if self._accelerator_flag == "ipu":
             self._strategy_flag = "ipu"
         elif self._accelerator_flag == "tpu":
@@ -502,6 +499,8 @@ def _choose_strategy(self):
                 self._strategy_flag = "tpu_spawn"
             else:
                 self._srategy_flag = SingleTPUStrategy(device=self._parallel_devices[0])
+        elif _HOROVOD_AVAILABLE and ("OMPI_COMM_WORLD_RANK" in os.environ or "HOROVOD_RANK" in os.environ):
+            self._strategy_flag = HorovodStrategy()
         else:
             if self._num_nodes_flag > 1:
                 self._strategy_flag = "ddp"
@@ -556,8 +555,10 @@ def _strategy_check_and_fallbacks(self):
     def _init_strategy(self):
         if isinstance(self._strategy_flag, str):
             self.strategy = StrategyRegistry.get(self._strategy_flag)
-        else:
+        elif isinstance(self._strategy_flag, Strategy):
             self.strategy = self._strategy_flag
+        else:
+            raise RuntimeError(f"{self.strategy} is not valid type: {self.strategy}")
 
     def handle_horovod(self):
         if self._num_nodes_flag > 1:

From e55a5242b65e4c3b7eb992c8606c8fb352ee987b Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Mon, 31 Jan 2022 14:20:53 -0800
Subject: [PATCH 15/69] fix global rank

---
 pytorch_lightning/strategies/ddp.py                           | 2 --
 pytorch_lightning/strategies/ddp_spawn.py                     | 2 --
 pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 ++
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py
index fac1cbe2dc288..12376358799fe 100644
--- a/pytorch_lightning/strategies/ddp.py
+++ b/pytorch_lightning/strategies/ddp.py
@@ -109,7 +109,6 @@ def __init__(
         self._pids: Optional[List[int]] = None
         self._sync_dir: Optional[str] = None
         self._rank_0_has_called_call_children_scripts: bool = False
-        self.set_world_ranks()
 
     @property
     def is_distributed(self) -> bool:
@@ -127,7 +126,6 @@ def num_nodes(self) -> int:
     def num_nodes(self, num_nodes: int) -> None:
         # note that world ranks is related to num_nodes, when resetting it, need to reset world ranks
         self._num_nodes = num_nodes
-        self.set_world_ranks()
 
     @property
     def num_processes(self):
diff --git a/pytorch_lightning/strategies/ddp_spawn.py b/pytorch_lightning/strategies/ddp_spawn.py
index 2e73c64a1b207..70b14bceac845 100644
--- a/pytorch_lightning/strategies/ddp_spawn.py
+++ b/pytorch_lightning/strategies/ddp_spawn.py
@@ -81,7 +81,6 @@ def __init__(
         self._ddp_comm_hook = ddp_comm_hook
         self._ddp_comm_wrapper = ddp_comm_wrapper
         self._local_rank = 0
-        self.set_world_ranks()
 
     @property
     def num_nodes(self) -> int:
@@ -95,7 +94,6 @@ def num_processes(self):
     def num_nodes(self, num_nodes: int) -> None:
         # note that world ranks is related to num_nodes, when resetting it, need to reset world ranks
         self._num_nodes = num_nodes
-        self.set_world_ranks()
 
     @property
     def local_rank(self) -> int:
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 1b25a0d3a2dac..951f29f342254 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -690,6 +690,8 @@ def _lazy_init_strategy(self):
             self.strategy._num_nodes = self._num_nodes_flag
         if hasattr(self.strategy, "sync_batchnorm"):
             self.strategy.sync_batchnorm = self.sync_batchnorm
+        if hasattr(self.strategy, "set_world_ranks"):
+            self.strategy.set_world_ranks()
 
         from pytorch_lightning.utilities import _IS_INTERACTIVE
 

From 9996fea3b2365e37205c13752a6db369c1f86788 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Mon, 31 Jan 2022 16:01:39 -0800
Subject: [PATCH 16/69] fix horovod

---
 .../connectors/accelerator_connector.py       | 46 +++++++++----------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 951f29f342254..9e04488e675e2 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -173,8 +173,6 @@ def __init__(
         # Reset strategy even user has specificed one
         self._strategy_check_and_fallbacks()
         self._init_strategy()
-        if _HOROVOD_AVAILABLE and isinstance(self.strategy, HorovodStrategy):
-            self.handle_horovod
 
         # --Precision----------------------------------------------------------------
         self.precision_plugin = self._check_capatibility_and_init_precision()
@@ -413,7 +411,7 @@ def _choose_accelerator(self):
     #         self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, torch.cuda.is_available(), True]
     #     ):
     #         # only apply to gpu to keep backward compatibility
-    #         if self._accelerator_flag == accelerator_flag == "gpu":
+    #         if self._accelerator_flag == accelerator_flag:
     #             if not available:
     #                 raise MisconfigurationException(
     #                     f"You choice {accelerator_flag} accelerator, but {accelerator_flag} is not available"
@@ -498,9 +496,10 @@ def _choose_strategy(self):
             if self._parallel_devices and len(self._parallel_devices) > 1:
                 self._strategy_flag = "tpu_spawn"
             else:
-                self._srategy_flag = SingleTPUStrategy(device=self._parallel_devices[0])
+                # TODO lazy initialized device, then here could be self._strategy_flag = "single_tpu_device"
+                self._strategy_flag = SingleTPUStrategy(device=self._parallel_devices[0])
         elif _HOROVOD_AVAILABLE and ("OMPI_COMM_WORLD_RANK" in os.environ or "HOROVOD_RANK" in os.environ):
-            self._strategy_flag = HorovodStrategy()
+            self._strategy_flag = "horovod"
         else:
             if self._num_nodes_flag > 1:
                 self._strategy_flag = "ddp"
@@ -510,6 +509,7 @@ def _choose_strategy(self):
                     if self._accelerator_flag == "gpu"
                     else "cpu"
                 )
+                # TODO lazy initialized device, then here could be self._strategy_flag = "single_device"
                 self._strategy_flag = SingleDeviceStrategy(device=device)
             elif len(self._parallel_devices) > 1:
                 self._strategy_flag = "ddp_spawn"
@@ -517,7 +517,7 @@ def _choose_strategy(self):
                 self._strategy_flag = "ddp"
 
     def _strategy_check_and_fallbacks(self):
-        # fallback apply to user pass in object as well, so get the _strategy_flag first
+        # current logic, fallback only apply to user pass in str config not object config
         _strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag
 
         if _strategy_flag == "ddp_cpu":
@@ -541,25 +541,10 @@ def _strategy_check_and_fallbacks(self):
         if _strategy_flag in ("dp", "ddp2") and self._accelerator_flag == "cpu":
             rank_zero_warn(f"{_strategy_flag!r} is not supported on CPUs, hence setting `strategy='ddp'`.")
             _strategy_flag = "ddp"
-        # Current test check precision first. So move this test to the end for now.
-        # TODO update tests and uncomment this part
-        # if isinstance(self.accelerator, TPUAccelerator) and "tpu" not in _strategy_flag:
-        #     raise ValueError(
-        #         "The `TPUAccelerator` can only be used with a `SingleTPUStrategy` or `TPUSpawnStrategy`,"
-        #         f" found {_strategy_flag}."
-        #     )
 
         if _strategy_flag:
             self._strategy_flag = _strategy_flag
 
-    def _init_strategy(self):
-        if isinstance(self._strategy_flag, str):
-            self.strategy = StrategyRegistry.get(self._strategy_flag)
-        elif isinstance(self._strategy_flag, Strategy):
-            self.strategy = self._strategy_flag
-        else:
-            raise RuntimeError(f"{self.strategy} is not valid type: {self.strategy}")
-
     def handle_horovod(self):
         if self._num_nodes_flag > 1:
             raise MisconfigurationException(
@@ -567,7 +552,7 @@ def handle_horovod(self):
                 "horovodrun / mpirun to configure the number of processes."
             )
 
-        if isinstance(self.strategy, HorovodStrategy) and not _HOROVOD_AVAILABLE:
+        if not _HOROVOD_AVAILABLE:
             raise MisconfigurationException(
                 'Requested `accelerator="horovod"`, but Horovod is not installed.'
                 "Install with \n $HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]"
@@ -578,7 +563,19 @@ def handle_horovod(self):
             # Horovod assigns one local GPU per process
             self._parallel_device = list(range(hvd.local_size()))
         else:
-            self._parallel_device = hvd.local_size()
+            self._parallel_device = [torch.device("cpu")] * hvd.local_size()
+
+    def _init_strategy(self):
+        if isinstance(self._strategy_flag, HorovodStrategy) or self._strategy_flag == "horovod":
+            # handle horovod has to happen before initialize strategy because HorovodStrategy needs hvd.init() first.
+            # TODO lazy initialized and setup horovod strategy `global_rank`
+            self.handle_horovod()
+        if isinstance(self._strategy_flag, str):
+            self.strategy = StrategyRegistry.get(self._strategy_flag)
+        elif isinstance(self._strategy_flag, Strategy):
+            self.strategy = self._strategy_flag
+        else:
+            raise RuntimeError(f"{self.strategy} is not valid type: {self.strategy}")
 
     def _check_capatibility_and_init_precision(self):
         self._precision_misconfig_check()
@@ -706,7 +703,8 @@ def _lazy_init_strategy(self):
                 " creation inside the worker function."
             )
 
-
+        # TODO should be moved to _strategy_check_and_fallbacks().
+        # Current test check precision first, so keep this check here to meet error order
         if isinstance(self.accelerator, TPUAccelerator) and not isinstance(
             self.strategy, (SingleTPUStrategy, TPUSpawnStrategy)
         ):

From a14879c4898d12bcf86396a47464b71eda767dc3 Mon Sep 17 00:00:00 2001
From: four4fish <88516121+four4fish@users.noreply.github.com>
Date: Tue, 1 Feb 2022 17:53:30 -0800
Subject: [PATCH 17/69] Update pytorch_lightning/utilities/exceptions.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 pytorch_lightning/utilities/exceptions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/utilities/exceptions.py b/pytorch_lightning/utilities/exceptions.py
index a0de06036792f..548e0cb655945 100644
--- a/pytorch_lightning/utilities/exceptions.py
+++ b/pytorch_lightning/utilities/exceptions.py
@@ -17,7 +17,7 @@ class MisconfigurationException(Exception):
     """Exception used to inform users of misuse with PyTorch Lightning."""
 
 
-class DeviceNotAvailibleException(Exception):
+class DeviceNotAvailableException(Exception):
     """Exception used to inform users that requested devices are not availible."""
 
 

From 1626eee0b9701d618a5e09b984dbf92a3362bfae Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Tue, 1 Feb 2022 17:55:31 -0800
Subject: [PATCH 18/69] update horovod

---
 .../connectors/accelerator_connector.py       | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 9e04488e675e2..32307f63809d6 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -320,13 +320,21 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                     )
                 else:
                     self.checkpoint_io = self._strategy_flag._checkpoint_io
-            if getattr(self._strategy_flag, "_cluster_environment", None):
+            if getattr(self._strategy_flag, "cluster_environment", None):
                 if self._cluster_environment:
                     raise MisconfigurationException(
                         "cluster_environment set through both strategy class and plugins, choose one"
                     )
                 else:
                     self._cluster_environment = getattr(self._strategy_flag, "cluster_environment")
+            # RFC existing accel_conn doesn't handle this, should we add conflict check?
+            # eg: parallel_device is torch.device(cpu) but accelerator=gpu
+            if hasattr(self._strategy_flag, "parallel_devices"):
+                if self._strategy_flag.parallel_devices:
+                    if self._strategy_flag.parallel_devices[0].type == "cpu":
+                        self._accelerator_flag = "cpu"
+                    if self._strategy_flag.parallel_devices[0].type == "cuda":
+                        self._accelerator_flag = "gpu"
 
         amp_type = amp_type.lower() if isinstance(amp_type, str) else None
         self._amp_type_flag = AMPType.from_str(amp_type)
@@ -561,9 +569,9 @@ def handle_horovod(self):
         hvd.init()
         if isinstance(self.accelerator, GPUAccelerator):
             # Horovod assigns one local GPU per process
-            self._parallel_device = list(range(hvd.local_size()))
+            self._parallel_devices = list(range(hvd.local_size()))
         else:
-            self._parallel_device = [torch.device("cpu")] * hvd.local_size()
+            self._parallel_devices = [torch.device("cpu")] * hvd.local_size()
 
     def _init_strategy(self):
         if isinstance(self._strategy_flag, HorovodStrategy) or self._strategy_flag == "horovod":
@@ -680,9 +688,13 @@ def _lazy_init_strategy(self):
             self.strategy.precision_plugin = self.precision_plugin
         if self.checkpoint_io:
             self.strategy.checkpoint_io = self.checkpoint_io
-        self.strategy.cluster_environment = self.cluster_environment
+        if hasattr(self.strategy, "cluster_environment"):
+            self.strategy.cluster_environment = self.cluster_environment
         if hasattr(self.strategy, "parallel_devices"):
-            self.strategy.parallel_devices = self._parallel_devices
+            if self.strategy.parallel_devices:
+                self._parallel_devices = self.strategy.parallel_devices
+            else:
+                self.strategy.parallel_devices = self._parallel_devices
         if hasattr(self.strategy, "num_nodes"):
             self.strategy._num_nodes = self._num_nodes_flag
         if hasattr(self.strategy, "sync_batchnorm"):

From e13411974ec943f608a0e35780c58b14b3c5966e Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Wed, 2 Feb 2022 14:24:24 -0800
Subject: [PATCH 19/69] address some ananth's comments

---
 .../connectors/accelerator_connector.py       | 60 +++++++++----------
 1 file changed, 27 insertions(+), 33 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 32307f63809d6..3a36a4f0e3ff7 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -194,11 +194,12 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
             self._strategy_flag,
             self._accelerator_flag,
             self._precision_flag,
-            self._cluster_environment,
+            self._precision_plugin_flag,
+            self._cluster_environment_flag,
             self.checkpoint_io,
             self._amp_level_flag,
             self._amp_type_flag,
-        ) = (None, None, None, None, None, amp_type, amp_level)
+        ) = (None, None, None, None, None, None, amp_type, amp_level)
         if plugins:
             plugins = [plugins] if not isinstance(plugins, list) else plugins
 
@@ -266,13 +267,6 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                     f"Allowed precision values: {PrecisionType.supported_types()}"
                 )
             self._precision_flag = precision
-            # handle duplications and conflict
-            # [RFC] current logic doesn't handle precision_plugin duplication
-            # if plugins:
-            #     for plugin in plugins:
-            #         if isinstance(plugin, PrecisionPlugin):
-            #             self._precision_flag = precision
-            # raise MisconfigurationException("precision set in both precision flag and plugin flag")
 
         if plugins:
             for plugin in plugins:
@@ -283,14 +277,14 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                         f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={plugin})` instead."
                     )
 
-                elif isinstance(plugin, PrecisionPlugin) or (
-                    isinstance(plugin, str) and plugin in self._supported_precision
-                ):
+                elif isinstance(plugin, PrecisionPlugin):
+                    self._precision_plugin_flag = plugin
+                elif isinstance(plugin, str) and plugin in self._supported_precision:
                     self._precision_flag = plugin
                 elif isinstance(plugin, CheckpointIO):
                     self.checkpoint_io = plugin
                 elif isinstance(plugin, ClusterEnvironment):
-                    self._cluster_environment = plugin
+                    self._cluster_environment_flag = plugin
                 else:
                     raise MisconfigurationException(
                         f"Found invalid type for plugin {plugin}. Expected a precision or training type plugin."
@@ -306,13 +300,11 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                 else:
                     self._accelerator_flag = self._strategy_flag._accelerator
             if self._strategy_flag._precision_plugin:
-                # precision has default value 32, we can not tell whether user set it or not
-                # [RFC] remove default from trainer?
-                # if self._precision_flag:
-                #     raise MisconfigurationException("precision set through both strategy class and flags,
-                #     choose one place to set")
-                # else:
-                self._precision_flag = self._strategy_flag._precision_plugin
+                # [RFC] handle precision plugin set up conflict?
+                if self._precision_plugin_flag:
+                    raise MisconfigurationException("precision set through both strategy class and plugins, choose one")
+                else:
+                    self._precision_plugin_flag = self._strategy_flag._precision_plugin
             if self._strategy_flag._checkpoint_io:
                 if self.checkpoint_io:
                     raise MisconfigurationException(
@@ -321,12 +313,12 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                 else:
                     self.checkpoint_io = self._strategy_flag._checkpoint_io
             if getattr(self._strategy_flag, "cluster_environment", None):
-                if self._cluster_environment:
+                if self._cluster_environment_flag:
                     raise MisconfigurationException(
                         "cluster_environment set through both strategy class and plugins, choose one"
                     )
                 else:
-                    self._cluster_environment = getattr(self._strategy_flag, "cluster_environment")
+                    self._cluster_environment_flag = getattr(self._strategy_flag, "cluster_environment")
             # RFC existing accel_conn doesn't handle this, should we add conflict check?
             # eg: parallel_device is torch.device(cpu) but accelerator=gpu
             if hasattr(self._strategy_flag, "parallel_devices"):
@@ -451,9 +443,11 @@ def _set_parallel_devices_and_init_accelerator(self):
                 self._device_flag = GPUAccelerator.auto_device_count()
             if isinstance(self._device_flag, int) or isinstance(self._device_flag, str):
                 self._device_flag = int(self._device_flag)
-                self._parallel_devices = [
-                    torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)
-                ]
+                self._parallel_devices = (
+                    [torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)]
+                    if self._device_flag != 0
+                    else []
+                )
             else:
                 self._parallel_devices = [torch.device("cuda", i) for i in self._device_flag]
 
@@ -473,8 +467,8 @@ def _set_parallel_devices_and_init_accelerator(self):
 
     def _choose_and_init_cluster_environment(self):
         self.cluster_environment = LightningEnvironment()
-        if isinstance(self._cluster_environment, ClusterEnvironment):
-            self.cluster_environment = self._cluster_environment
+        if isinstance(self._cluster_environment_flag, ClusterEnvironment):
+            self.cluster_environment = self._cluster_environment_flag
         elif self._is_slurm_managing_tasks():
             rank_zero_info("Multiprocessing is handled by SLURM.")
             self.cluster_environment = SLURMEnvironment()
@@ -587,9 +581,8 @@ def _init_strategy(self):
 
     def _check_capatibility_and_init_precision(self):
         self._precision_misconfig_check()
-        if isinstance(self._precision_flag, PrecisionPlugin):
-            return self._precision_flag
-        self.precision_plugin = None
+        if isinstance(self._precision_plugin_flag, PrecisionPlugin):
+            return self._precision_plugin_flag
 
         if isinstance(self.accelerator, IPUAccelerator):
             return IPUPrecisionPlugin(self._precision_flag)
@@ -655,11 +648,12 @@ def _precision_misconfig_check(self):
             )
         if (
             isinstance(self.accelerator, TPUAccelerator)
-            and isinstance(self._precision_flag, PrecisionPlugin)
-            and not isinstance(self._precision_flag, (TPUPrecisionPlugin, TPUBf16PrecisionPlugin))
+            and self._precision_plugin_flag
+            and not isinstance(self._precision_plugin_flag, (TPUPrecisionPlugin, TPUBf16PrecisionPlugin))
         ):
             raise ValueError(
-                f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`," f" found: {self._precision_flag}."
+                f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`,"
+                f" found: {self._precision_plugin_flag}."
             )
         if (
             self._precision_flag == 16

From 7c1eb85f801df79878c7591b6393acfe313d9fd1 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Wed, 2 Feb 2022 15:59:28 -0800
Subject: [PATCH 20/69] draft

---
 .../trainer/connectors/accelerator_connector.py      | 12 +++++++++---
 pytorch_lightning/trainer/trainer.py                 |  2 +-
 tests/utilities/test_cli.py                          |  2 +-
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 3a36a4f0e3ff7..fe705d0cdea6f 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -767,11 +767,17 @@ def devices(self):
 
     @property
     def tpu_cores(self) -> int:
-        return self.devices
+        if isinstance(self.accelerator, TPUAccelerator):
+            return self.devices
+        else:
+            return 0
 
     @property
-    def ipus(self) -> int:
-        return self.devices
+    def num_ipus(self) -> int:
+        if isinstance(self.accelerator, IPUAccelerator):
+            return self.devices
+        else:
+            return 0
 
     @property
     def num_gpus(self) -> int:
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 70f72bd2488cc..e9e89f01675ac 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -636,7 +636,7 @@ def _determine_data_use_amount(self, overfit_batches: float) -> None:
             self.limit_val_batches = 0
 
     def _setup_on_init(self, num_sanity_val_steps: int) -> None:
-        # self._log_device_info()
+        self._log_device_info()
 
         self.should_stop = False
         self.state = TrainerState()
diff --git a/tests/utilities/test_cli.py b/tests/utilities/test_cli.py
index 5ef2cf98cf3e7..9f62e10cd0fae 100644
--- a/tests/utilities/test_cli.py
+++ b/tests/utilities/test_cli.py
@@ -324,7 +324,7 @@ def test_lightning_cli_args_cluster_environments(tmpdir):
     class TestModel(BoringModel):
         def on_fit_start(self):
             # Ensure SLURMEnvironment is set, instead of default LightningEnvironment
-            assert isinstance(self.trainer._accelerator_connector._cluster_environment, SLURMEnvironment)
+            assert isinstance(self.trainer._accelerator_connector.cluster_environment, SLURMEnvironment)
             self.trainer.ran_asserts = True
 
     with mock.patch("sys.argv", ["any.py", "fit", f"--trainer.plugins={json.dumps(plugins)}"]):

From 92deb7e5800f0a0b3f19a560ddf3e77520b1be49 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 2 Feb 2022 22:38:52 +0000
Subject: [PATCH 21/69] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pytorch_lightning/strategies/ipu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/strategies/ipu.py b/pytorch_lightning/strategies/ipu.py
index c13431d1ad8d8..6a0ef29be0384 100644
--- a/pytorch_lightning/strategies/ipu.py
+++ b/pytorch_lightning/strategies/ipu.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import json
 import os
-from typing import Any, Callable, List, Optional, Union, Dict
+from typing import Any, Callable, Dict, List, Optional, Union
 
 import torch
 from torch.utils.data import DataLoader

From 8aa1f686e719bfaab4b296a0616e30bd515bc203 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Wed, 2 Feb 2022 17:37:14 -0800
Subject: [PATCH 22/69] fix ipus and cli tests

---
 pytorch_lightning/strategies/ipu.py           |  2 +-
 .../connectors/accelerator_connector.py       | 41 ++++++++++---------
 tests/utilities/test_cli.py                   |  5 ++-
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/pytorch_lightning/strategies/ipu.py b/pytorch_lightning/strategies/ipu.py
index 6a0ef29be0384..d6e1e12a36465 100644
--- a/pytorch_lightning/strategies/ipu.py
+++ b/pytorch_lightning/strategies/ipu.py
@@ -62,7 +62,7 @@ def _move_float_tensors_to_half(self, batch: Any) -> Any:
 class IPUStrategy(ParallelStrategy):
     """Plugin for training on IPU devices."""
 
-    distributed_backend = "ipu"
+    distributed_backend = "ipu_strategy"
 
     def __init__(
         self,
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index fe705d0cdea6f..b626990271f06 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -142,7 +142,6 @@ def __init__(
         # --Parsing_flags------------------------------------------------------
         # Get registered strategies, existing accelerators and precision plugins
         self._existing_strategies_str = StrategyRegistry.available_strategies()
-        # print(self._existing_strategies_str)
         self._existing_accelerator_type = ["tpu", "ipu", "gpu", "cpu"]
         self._supported_precision = PrecisionType.supported_types()
 
@@ -158,7 +157,7 @@ def __init__(
         # --Accelerator-------------------------------------------------------------
         # handle `auto` and `None`
         if self._accelerator_flag == "auto" or self._accelerator_flag is None:
-            self._choose_accelerator()
+            self._accelerator_flag = self._choose_accelerator()
         # else:
         #     # [RFC] move to XAccelerator class init?
         #     self._check_device_availibility()
@@ -390,20 +389,20 @@ def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
                 self._accelerator_flag = "cpu"
 
     def _choose_accelerator(self):
+        if _TPU_AVAILABLE:
+            return "tpu"
+        if _IPU_AVAILABLE:
+            return "ipu"
         if self._accelerator_flag == "auto":
-            if _TPU_AVAILABLE:
-                self._accelerator_flag = "tpu"
-            elif _IPU_AVAILABLE:
-                self._accelerator_flag = "ipu"
-            elif torch.cuda.is_available() and torch.cuda.device_count() > 0:
-                self._accelerator_flag = "gpu"
+            if torch.cuda.is_available() and torch.cuda.device_count() > 0:
+                return "gpu"
             else:
-                self._accelerator_flag = "cpu"
                 if self._device_flag == "auto":
                     self._device_flag = 1
+                return "cpu"
         # [RFC] this is current logic, if accelerator not set, default cpu?
         else:
-            self._accelerator_flag = "cpu"
+            return "cpu"
 
     # TODO move this to xAccelerator
     # def _check_device_availibility(self):
@@ -492,8 +491,8 @@ def _is_slurm_managing_tasks(self):
         return num_slurm_tasks == total_requested_devices
 
     def _choose_strategy(self):
-        if self._accelerator_flag == "ipu":
-            self._strategy_flag = "ipu"
+        if self._accelerator_flag == "ipu_strategy":
+            self._strategy_flag = "ipu_strategy"
         elif self._accelerator_flag == "tpu":
             if self._parallel_devices and len(self._parallel_devices) > 1:
                 self._strategy_flag = "tpu_spawn"
@@ -762,29 +761,31 @@ def devices(self):
             return 1
         elif isinstance(self.strategy, ParallelStrategy):
             return len(self.strategy.parallel_devices)
-        else:
-            return 0
+        return 0
 
     @property
     def tpu_cores(self) -> int:
         if isinstance(self.accelerator, TPUAccelerator):
             return self.devices
-        else:
-            return 0
+        return 0
+
+    @property
+    def tpu_id(self) -> Optional[int]:
+        if isinstance(self.accelerator, TPUAccelerator):
+            return self.parallel_devices[0]
+        return None
 
     @property
     def num_ipus(self) -> int:
         if isinstance(self.accelerator, IPUAccelerator):
             return self.devices
-        else:
-            return 0
+        return 0
 
     @property
     def num_gpus(self) -> int:
         if isinstance(self.accelerator, GPUAccelerator):
             return self.devices
-        else:
-            return 0
+        return 0
 
     # def parallel_device_ids():
     @property
diff --git a/tests/utilities/test_cli.py b/tests/utilities/test_cli.py
index 9f62e10cd0fae..5ba1006c234e9 100644
--- a/tests/utilities/test_cli.py
+++ b/tests/utilities/test_cli.py
@@ -580,7 +580,10 @@ def on_fit_start(self):
 @pytest.mark.parametrize(
     "trainer_kwargs",
     (
-        dict(strategy="ddp_spawn"),
+        # dict(strategy="ddp_spawn")
+        # !! old accl_conn will choose singleDeviceStrategy for both strategy=ddp/ddp_spawn
+        # this test never worked with DDPSpawnStrategy
+        dict(strategy="single_device"),
         dict(strategy="ddp"),
         pytest.param({"tpu_cores": 1}, marks=RunIf(tpu=True)),
     ),

From f4cca3c0dc39ec95bf86d47a0c9aaaec269ba125 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Wed, 2 Feb 2022 18:28:51 -0800
Subject: [PATCH 23/69] fix typo

---
 .../trainer/connectors/accelerator_connector.py          | 9 +++++----
 tests/accelerators/test_ipu.py                           | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index b626990271f06..e55a3a09de991 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -463,6 +463,7 @@ def _set_parallel_devices_and_init_accelerator(self):
                 )
 
         self._gpus = self._device_flag if not self._gpus else self._gpus
+        self._tpu_cores = self._device_flag if not self._tpu_cores else self._tpu_cores
 
     def _choose_and_init_cluster_environment(self):
         self.cluster_environment = LightningEnvironment()
@@ -491,7 +492,7 @@ def _is_slurm_managing_tasks(self):
         return num_slurm_tasks == total_requested_devices
 
     def _choose_strategy(self):
-        if self._accelerator_flag == "ipu_strategy":
+        if self._accelerator_flag == "ipu":
             self._strategy_flag = "ipu_strategy"
         elif self._accelerator_flag == "tpu":
             if self._parallel_devices and len(self._parallel_devices) > 1:
@@ -764,15 +765,15 @@ def devices(self):
         return 0
 
     @property
-    def tpu_cores(self) -> int:
+    def tpu_cores(self):
         if isinstance(self.accelerator, TPUAccelerator):
-            return self.devices
+            return self._tpu_cores
         return 0
 
     @property
     def tpu_id(self) -> Optional[int]:
         if isinstance(self.accelerator, TPUAccelerator):
-            return self.parallel_devices[0]
+            return self.tpu_cores[0]
         return None
 
     @property
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index a691f4f62d983..40ceab7195219 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -116,7 +116,7 @@ def test_accelerator_selected(tmpdir):
 @RunIf(ipu=True)
 def test_warning_if_ipus_not_used(tmpdir):
     with pytest.warns(UserWarning, match="IPU available but not used. Set the `ipus` flag in your trainer"):
-        Trainer(default_root_dir=tmpdir)
+        Trainer(default_root_dir=tmpdir, accelerator="cpu")
 
 
 @RunIf(ipu=True)

From 677c6f12b5e5db0bd0a0d6db2caddeef2fb28774 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Thu, 3 Feb 2022 15:40:36 -0800
Subject: [PATCH 24/69] fix tests

---
 pytorch_lightning/callbacks/gpu_stats_monitor.py      |  2 +-
 pytorch_lightning/strategies/ddp.py                   |  1 -
 pytorch_lightning/strategies/ddp2.py                  |  1 -
 pytorch_lightning/strategies/ddp_spawn.py             |  5 -----
 pytorch_lightning/strategies/deepspeed.py             |  2 +-
 pytorch_lightning/strategies/dp.py                    |  1 -
 pytorch_lightning/strategies/fully_sharded.py         |  2 +-
 pytorch_lightning/strategies/horovod.py               |  1 -
 pytorch_lightning/strategies/sharded.py               |  2 +-
 pytorch_lightning/strategies/sharded_spawn.py         |  1 -
 .../trainer/connectors/accelerator_connector.py       |  5 +++--
 tests/accelerators/test_tpu.py                        | 11 ++++++++---
 tests/utilities/test_cli.py                           |  2 +-
 13 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/pytorch_lightning/callbacks/gpu_stats_monitor.py b/pytorch_lightning/callbacks/gpu_stats_monitor.py
index 2d10b17acdc95..68d2ef3ba69eb 100644
--- a/pytorch_lightning/callbacks/gpu_stats_monitor.py
+++ b/pytorch_lightning/callbacks/gpu_stats_monitor.py
@@ -127,7 +127,7 @@ def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: O
         if not trainer.logger:
             raise MisconfigurationException("Cannot use GPUStatsMonitor callback with Trainer that has no logger.")
 
-        if trainer._device_type != _AcceleratorType.GPU:
+        if trainer._device_type != _AcceleratorType.GPU.lower():
             raise MisconfigurationException(
                 "You are using GPUStatsMonitor but are not running on GPU"
                 f" since gpus attribute in Trainer is set to {trainer.gpus}."
diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py
index 12376358799fe..7fd6132fa025a 100644
--- a/pytorch_lightning/strategies/ddp.py
+++ b/pytorch_lightning/strategies/ddp.py
@@ -50,7 +50,6 @@
 from pytorch_lightning.utilities.distributed import _revert_sync_batchnorm, distributed_available
 from pytorch_lightning.utilities.distributed import group as _group
 from pytorch_lightning.utilities.distributed import init_dist_connection, ReduceOp, sync_ddp_if_available
-from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import DeadlockDetectedException
 from pytorch_lightning.utilities.rank_zero import rank_zero_only, rank_zero_warn
 from pytorch_lightning.utilities.seed import reset_seed
diff --git a/pytorch_lightning/strategies/ddp2.py b/pytorch_lightning/strategies/ddp2.py
index ba8e769c35772..ff84a50fa52ba 100644
--- a/pytorch_lightning/strategies/ddp2.py
+++ b/pytorch_lightning/strategies/ddp2.py
@@ -17,7 +17,6 @@
 
 from pytorch_lightning.strategies.ddp import DDPStrategy
 from pytorch_lightning.utilities.apply_func import apply_to_collection
-from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.types import _METRIC_COLLECTION
 
 
diff --git a/pytorch_lightning/strategies/ddp_spawn.py b/pytorch_lightning/strategies/ddp_spawn.py
index 70b14bceac845..1cc926eb2fef8 100644
--- a/pytorch_lightning/strategies/ddp_spawn.py
+++ b/pytorch_lightning/strategies/ddp_spawn.py
@@ -37,7 +37,6 @@
 from pytorch_lightning.utilities.distributed import _revert_sync_batchnorm, distributed_available
 from pytorch_lightning.utilities.distributed import group as _group
 from pytorch_lightning.utilities.distributed import init_dist_connection, ReduceOp, sync_ddp_if_available
-from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.rank_zero import rank_zero_debug, rank_zero_only, rank_zero_warn
 from pytorch_lightning.utilities.seed import reset_seed
@@ -86,10 +85,6 @@ def __init__(
     def num_nodes(self) -> int:
         return self._num_nodes
 
-    @property
-    def num_processes(self):
-        return len(self.parallel_devices) if self.parallel_devices is not None else 0
-
     @num_nodes.setter
     def num_nodes(self, num_nodes: int) -> None:
         # note that world ranks is related to num_nodes, when resetting it, need to reset world ranks
diff --git a/pytorch_lightning/strategies/deepspeed.py b/pytorch_lightning/strategies/deepspeed.py
index 530ede34ec899..bd6b131574e56 100644
--- a/pytorch_lightning/strategies/deepspeed.py
+++ b/pytorch_lightning/strategies/deepspeed.py
@@ -35,7 +35,7 @@
 from pytorch_lightning.utilities import GradClipAlgorithmType
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.distributed import log
-from pytorch_lightning.utilities.enums import _StrategyType, AMPType, PrecisionType
+from pytorch_lightning.utilities.enums import AMPType, PrecisionType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE
 from pytorch_lightning.utilities.model_helpers import is_overridden
diff --git a/pytorch_lightning/strategies/dp.py b/pytorch_lightning/strategies/dp.py
index 01066a21c0e71..a886fcdcbee63 100644
--- a/pytorch_lightning/strategies/dp.py
+++ b/pytorch_lightning/strategies/dp.py
@@ -22,7 +22,6 @@
 from pytorch_lightning.plugins.precision import PrecisionPlugin
 from pytorch_lightning.strategies.parallel import ParallelStrategy
 from pytorch_lightning.utilities.apply_func import apply_to_collection
-from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.types import _METRIC_COLLECTION, STEP_OUTPUT
 
diff --git a/pytorch_lightning/strategies/fully_sharded.py b/pytorch_lightning/strategies/fully_sharded.py
index 4a05abd0dd9d8..cd7155cc41170 100644
--- a/pytorch_lightning/strategies/fully_sharded.py
+++ b/pytorch_lightning/strategies/fully_sharded.py
@@ -23,7 +23,7 @@
 from pytorch_lightning.plugins.precision import PrecisionPlugin
 from pytorch_lightning.strategies.ddp import DDPStrategy
 from pytorch_lightning.utilities import _FAIRSCALE_FULLY_SHARDED_AVAILABLE
-from pytorch_lightning.utilities.enums import _StrategyType, PrecisionType
+from pytorch_lightning.utilities.enums import PrecisionType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.types import STEP_OUTPUT
 
diff --git a/pytorch_lightning/strategies/horovod.py b/pytorch_lightning/strategies/horovod.py
index 1e99dbc429ed8..79e58c164028c 100644
--- a/pytorch_lightning/strategies/horovod.py
+++ b/pytorch_lightning/strategies/horovod.py
@@ -26,7 +26,6 @@
 from pytorch_lightning.utilities.distributed import distributed_available
 from pytorch_lightning.utilities.distributed import group as dist_group
 from pytorch_lightning.utilities.distributed import ReduceOp
-from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.imports import _HOROVOD_AVAILABLE
 from pytorch_lightning.utilities.rank_zero import rank_zero_only
 
diff --git a/pytorch_lightning/strategies/sharded.py b/pytorch_lightning/strategies/sharded.py
index 1f402126b6efe..4efdfb685722f 100644
--- a/pytorch_lightning/strategies/sharded.py
+++ b/pytorch_lightning/strategies/sharded.py
@@ -22,7 +22,7 @@
 from pytorch_lightning.core.optimizer import LightningOptimizer
 from pytorch_lightning.strategies.ddp import DDPStrategy
 from pytorch_lightning.trainer.states import TrainerFn
-from pytorch_lightning.utilities.enums import _StrategyType, PrecisionType
+from pytorch_lightning.utilities.enums import PrecisionType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _FAIRSCALE_AVAILABLE, _FAIRSCALE_OSS_FP16_BROADCAST_AVAILABLE
 from pytorch_lightning.utilities.rank_zero import rank_zero_only
diff --git a/pytorch_lightning/strategies/sharded_spawn.py b/pytorch_lightning/strategies/sharded_spawn.py
index 1a7c6b6e00d1c..a6e007a4be31b 100644
--- a/pytorch_lightning/strategies/sharded_spawn.py
+++ b/pytorch_lightning/strategies/sharded_spawn.py
@@ -21,7 +21,6 @@
 import pytorch_lightning as pl
 from pytorch_lightning.strategies.ddp_spawn import DDPSpawnStrategy
 from pytorch_lightning.trainer.states import TrainerFn
-from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _FAIRSCALE_AVAILABLE
 from pytorch_lightning.utilities.rank_zero import rank_zero_only
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index e55a3a09de991..93579130c670a 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -596,7 +596,7 @@ def _check_capatibility_and_init_precision(self):
                         f" is not supported with TPUs. Using `precision='bf16'` instead."
                     )
                 return TPUBf16PrecisionPlugin()
-        if self._strategy_flag == "deepspeed" or isinstance(self._strategy_flag, DeepSpeedStrategy):
+        if isinstance(self._strategy_flag, DeepSpeedStrategy):
             return DeepSpeedPrecisionPlugin(self._precision_flag, self._amp_type_flag, self._amp_level_flag)
 
         if self._precision_flag == 32:
@@ -773,7 +773,8 @@ def tpu_cores(self):
     @property
     def tpu_id(self) -> Optional[int]:
         if isinstance(self.accelerator, TPUAccelerator):
-            return self.tpu_cores[0]
+            if isinstance(self._tpu_cores, list):
+                return self._tpu_cores[0]
         return None
 
     @property
diff --git a/tests/accelerators/test_tpu.py b/tests/accelerators/test_tpu.py
index bec80ec9ccbc1..88b926ab382e1 100644
--- a/tests/accelerators/test_tpu.py
+++ b/tests/accelerators/test_tpu.py
@@ -228,9 +228,14 @@ def test_ddp_cpu_not_supported_on_tpus():
 
 
 @RunIf(tpu=True)
-@pytest.mark.parametrize("strategy", ["ddp_spawn", "tpu_spawn_debug"])
-def test_strategy_choice_tpu_str(tmpdir, strategy):
-    trainer = Trainer(strategy=strategy, accelerator="tpu", devices=8)
+def test_strategy_choice_tpu_str_ddp_spawn(tmpdir, strategy):
+    with pytest.raises(ValueError, match="TPUAccelerator` can only be used with a `SingleTPUStrategy`"):
+        Trainer(strategy="ddp_spawn", accelerator="tpu", devices=8)
+
+
+@RunIf(tpu=True)
+def test_strategy_choice_tpu_str_tpu_spawn_debug(tmpdir, strategy):
+    trainer = Trainer(strategy="tpu_spawn_debug", accelerator="tpu", devices=8)
     assert isinstance(trainer.strategy, TPUSpawnStrategy)
 
 
diff --git a/tests/utilities/test_cli.py b/tests/utilities/test_cli.py
index 5ba1006c234e9..8992f0c1accd9 100644
--- a/tests/utilities/test_cli.py
+++ b/tests/utilities/test_cli.py
@@ -581,10 +581,10 @@ def on_fit_start(self):
     "trainer_kwargs",
     (
         # dict(strategy="ddp_spawn")
+        # dict(strategy="ddp")
         # !! old accl_conn will choose singleDeviceStrategy for both strategy=ddp/ddp_spawn
         # this test never worked with DDPSpawnStrategy
         dict(strategy="single_device"),
-        dict(strategy="ddp"),
         pytest.param({"tpu_cores": 1}, marks=RunIf(tpu=True)),
     ),
 )

From 53516214417e8213ce8cbd4e8b0f7e1694917dd7 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Thu, 3 Feb 2022 15:48:46 -0800
Subject: [PATCH 25/69] fix pre commit

---
 pytorch_lightning/strategies/single_device.py | 2 +-
 pytorch_lightning/strategies/strategy.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/strategies/single_device.py b/pytorch_lightning/strategies/single_device.py
index adbd3d71371b5..cdbd8eaa2b7a6 100644
--- a/pytorch_lightning/strategies/single_device.py
+++ b/pytorch_lightning/strategies/single_device.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
-from typing import Any, Dict
+from typing import Any
 
 import torch
 
diff --git a/pytorch_lightning/strategies/strategy.py b/pytorch_lightning/strategies/strategy.py
index 4b339e0b0efb4..33354dc539201 100644
--- a/pytorch_lightning/strategies/strategy.py
+++ b/pytorch_lightning/strategies/strategy.py
@@ -440,7 +440,7 @@ def teardown(self) -> None:
         self.precision_plugin.teardown()
 
     @classmethod
-    def register_strategies(cls, strategies_registry) -> None:
+    def register_strategies(cls, strategy_registry) -> None:
         if cls.distributed_backend:
             strategy_registry.register(
                 cls.distributed_backend,

From 836eb98f906776cf797d350597b7024844e82e4a Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Thu, 3 Feb 2022 16:21:56 -0800
Subject: [PATCH 26/69] address comments

---
 pytorch_lightning/strategies/ddp.py           |  2 +-
 pytorch_lightning/strategies/ddp_spawn.py     |  2 +-
 .../connectors/accelerator_connector.py       | 24 +++++++++----------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py
index 7fd6132fa025a..3aaa36b01edf2 100644
--- a/pytorch_lightning/strategies/ddp.py
+++ b/pytorch_lightning/strategies/ddp.py
@@ -428,7 +428,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.distributed_backend,
             cls,
-            description="Strategy",
+            description=f"{cls.__class__.__name__} Strategy",
         )
 
     def _should_run_deadlock_detection(self) -> bool:
diff --git a/pytorch_lightning/strategies/ddp_spawn.py b/pytorch_lightning/strategies/ddp_spawn.py
index 1cc926eb2fef8..75ee8fd4eadb7 100644
--- a/pytorch_lightning/strategies/ddp_spawn.py
+++ b/pytorch_lightning/strategies/ddp_spawn.py
@@ -367,7 +367,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.distributed_backend,
             cls,
-            description="Strategy",
+            description=f"{cls.__class__.__name__} Strategy",
         )
 
     def teardown(self) -> None:
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 93579130c670a..8578ae05b8be8 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -520,32 +520,32 @@ def _choose_strategy(self):
 
     def _strategy_check_and_fallbacks(self):
         # current logic, fallback only apply to user pass in str config not object config
-        _strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag
+        strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag
 
-        if _strategy_flag == "ddp_cpu":
+        if strategy_flag == "ddp_cpu":
             if _TPU_AVAILABLE:
                 raise MisconfigurationException(
                     "`accelerator='ddp_cpu'` is not supported on TPU machines. "
                     "Learn more: https://github.com/PyTorchLightning/pytorch-lightning/issues/7810"
                 )
             if self._device_flag == 1 and self._num_nodes_flag > 1:
-                _strategy_flag = "ddp"
+                strategy_flag = "ddp"
             else:
-                _strategy_flag = "ddp_spawn"
+                strategy_flag = "ddp_spawn"
             if self._accelerator_flag == "gpu":
                 rank_zero_warn(
                     "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
                 )
-        if _strategy_flag in ("ddp_spawn", "ddp_spawn_find_unused_parameters_false") and (
+        if strategy_flag in ("ddp_spawn", "ddp_spawn_find_unused_parameters_false") and (
             TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks()
         ):
-            _strategy_flag = "ddp"
-        if _strategy_flag in ("dp", "ddp2") and self._accelerator_flag == "cpu":
-            rank_zero_warn(f"{_strategy_flag!r} is not supported on CPUs, hence setting `strategy='ddp'`.")
-            _strategy_flag = "ddp"
+            strategy_flag = "ddp"
+        if strategy_flag in ("dp", "ddp2") and self._accelerator_flag == "cpu":
+            rank_zero_warn(f"{strategy_flag!r} is not supported on CPUs, hence setting `strategy='ddp'`.")
+            strategy_flag = "ddp"
 
-        if _strategy_flag:
-            self._strategy_flag = _strategy_flag
+        if strategy_flag:
+            self._strategy_flag = strategy_flag
 
     def handle_horovod(self):
         if self._num_nodes_flag > 1:
@@ -596,7 +596,7 @@ def _check_capatibility_and_init_precision(self):
                         f" is not supported with TPUs. Using `precision='bf16'` instead."
                     )
                 return TPUBf16PrecisionPlugin()
-        if isinstance(self._strategy_flag, DeepSpeedStrategy):
+        if isinstance(self.strategy, DeepSpeedStrategy):
             return DeepSpeedPrecisionPlugin(self._precision_flag, self._amp_type_flag, self._amp_level_flag)
 
         if self._precision_flag == 32:

From 18c4d9ea4ca4a2c87ce96dafc3183a4654778758 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sat, 5 Feb 2022 17:37:46 +0100
Subject: [PATCH 27/69] rename ttp to strategy

---
 tests/accelerators/test_tpu.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/accelerators/test_tpu.py b/tests/accelerators/test_tpu.py
index 88b926ab382e1..2c0b265b0fd16 100644
--- a/tests/accelerators/test_tpu.py
+++ b/tests/accelerators/test_tpu.py
@@ -292,27 +292,27 @@ def forward(self, x):
 
 
 def test_tpu_invalid_raises():
-    training_type_plugin = TPUSpawnStrategy(accelerator=TPUAccelerator(), precision_plugin=PrecisionPlugin())
+    strategy = TPUSpawnStrategy(accelerator=TPUAccelerator(), precision_plugin=PrecisionPlugin())
     with pytest.raises(ValueError, match="TPUAccelerator` can only be used with a `TPUPrecisionPlugin"):
-        Trainer(strategy=training_type_plugin)
+        Trainer(strategy=strategy)
 
-    training_type_plugin = DDPStrategy(accelerator=TPUAccelerator(), precision_plugin=TPUPrecisionPlugin())
+    strategy = DDPStrategy(accelerator=TPUAccelerator(), precision_plugin=TPUPrecisionPlugin())
     with pytest.raises(ValueError, match="TPUAccelerator` can only be used with a `SingleTPUStrategy`"):
-        Trainer(strategy=training_type_plugin)
+        Trainer(strategy=strategy)
 
 
 def test_tpu_invalid_raises_set_precision_with_strategy():
     accelerator = TPUAccelerator()
-    training_type_plugin = TPUSpawnStrategy(accelerator=accelerator, precision_plugin=PrecisionPlugin())
+    strategy = TPUSpawnStrategy(accelerator=accelerator, precision_plugin=PrecisionPlugin())
     with pytest.raises(ValueError, match="`TPUAccelerator` can only be used with a `TPUPrecisionPlugin`"):
-        Trainer(strategy=training_type_plugin)
+        Trainer(strategy=strategy)
 
     accelerator = TPUAccelerator()
-    training_type_plugin = DDPStrategy(accelerator=accelerator, precision_plugin=TPUPrecisionPlugin())
+    strategy = DDPStrategy(accelerator=accelerator, precision_plugin=TPUPrecisionPlugin())
     with pytest.raises(
         ValueError, match="The `TPUAccelerator` can only be used with a `SingleTPUStrategy` or `TPUSpawnStrategy"
     ):
-        Trainer(strategy=training_type_plugin)
+        Trainer(strategy=strategy)
 
 
 @RunIf(tpu=True)

From 0bbc1c4c5a97938c2ca52b6a400f098fe441b8a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sat, 5 Feb 2022 17:37:54 +0100
Subject: [PATCH 28/69] fix typo

---
 tests/accelerators/test_accelerator_connector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 338b6441944cb..2df4a8e1b63da 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -903,7 +903,7 @@ def test_unsupported_tpu_choice(monkeypatch):
     with pytest.raises(MisconfigurationException, match=r"accelerator='tpu', precision=64\)` is not implemented"):
         Trainer(accelerator="tpu", precision=64)
 
-    # if user haven't set strategy, accelerator_connector will choose the TPUSingleStrategy or TPUSpawnStrategy
+    # if user didn't set strategy, AcceleratorConnector will choose the TPUSingleStrategy or TPUSpawnStrategy
     with pytest.raises(ValueError, match="TPUAccelerator` can only be used with a `SingleTPUStrategy`"):
         with pytest.warns(UserWarning, match=r"accelerator='tpu', precision=16\)` but native AMP is not supported"):
             Trainer(accelerator="tpu", precision=16, strategy="ddp")

From 2d54316f312c56b2df62d62ce07f130511e55814 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sat, 5 Feb 2022 18:21:27 +0100
Subject: [PATCH 29/69] add typing to constructor

---
 .../connectors/accelerator_connector.py       | 41 ++++++++++---------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 8578ae05b8be8..932c343347efd 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -34,7 +34,7 @@
     PrecisionPlugin,
     ShardedNativeMixedPrecisionPlugin,
     TPUBf16PrecisionPlugin,
-    TPUPrecisionPlugin,
+    TPUPrecisionPlugin, PLUGIN_INPUT,
 )
 from pytorch_lightning.plugins.environments import (
     BaguaEnvironment,
@@ -84,25 +84,28 @@
 class AcceleratorConnector:
     def __init__(
         self,
-        devices,
-        num_nodes,
-        accelerator,  # reduce typing
-        strategy: Optional[Union[str, Strategy]],
-        plugins,
-        precision,
-        amp_type,
-        amp_level,
-        sync_batchnorm,
-        benchmark,
-        replace_sampler_ddp,
-        deterministic: bool,
-        num_processes,  # deprecated
-        tpu_cores,  # deprecated
-        ipus,  # deprecated
-        gpus,  # deprecated
-        gpu_ids,
+        devices: Optional[Union[List[int], str, int]] = None,
+        num_nodes: int = 1,
+        accelerator: Optional[Union[str, Accelerator]] = None,
+        strategy: Optional[Union[str, Strategy]] = None,
+        plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None,
+        precision: Union[int, str] = 32,
+        amp_type: str = "native",
+        amp_level: Optional[str] = None,
+        sync_batchnorm: bool = False,
+        benchmark: bool = False,
+        replace_sampler_ddp: bool = True,
+        deterministic: bool = False,  # TODO: why is it unused?
+        num_processes: int = None,  # deprecated
+        tpu_cores: Optional[Union[List[int], int]] = None,  # deprecated
+        ipus: Optional[int] = None,  # deprecated
+        gpus: Optional[Union[List[int], str, int]] = None,  # deprecated
+        gpu_ids: Optional[List[int]] = None,  # TODO: why is it unused?
     ):
-        """
+        """The AcceleratorConnector parses several Trainer arguments and instantiates the Strategy including other
+        components such as the Accelerator and Precision plugin.
+
+
             A. accelerator flag could be:
                 1. strategy class (deprecated in 1.5 will be removed in 1.7)
                 2. strategy str (deprecated in 1.5 will be removed in 1.7)

From f7eee0579b28c8ff0c592983809d21ddd5be8289 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sat, 5 Feb 2022 18:38:23 +0100
Subject: [PATCH 30/69] update on comments

---
 .../connectors/accelerator_connector.py       | 62 ++++++++++---------
 1 file changed, 32 insertions(+), 30 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 932c343347efd..dc9097af14a65 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -142,66 +142,67 @@ def __init__(
         self.replace_sampler_ddp = replace_sampler_ddp
         self.sync_batchnorm = sync_batchnorm
 
-        # --Parsing_flags------------------------------------------------------
-        # Get registered strategies, existing accelerators and precision plugins
+        # 1. Parsing flags
+        # Get registered strategies, built-in accelerators and precision plugins
         self._existing_strategies_str = StrategyRegistry.available_strategies()
         self._existing_accelerator_type = ["tpu", "ipu", "gpu", "cpu"]
         self._supported_precision = PrecisionType.supported_types()
 
-        # raise misconfig exceptions if their is conflict between flags
-        # set the valid flag to self._x_flag after validation
-        # for example: if accelerator is strategy class, set self._strategy_flag = accelerator
-        # for devices: assign gpus ipus and etcs to accelerator_flag and devices_flag
+        # Raise an exception if there are conflicts between flags
+        # Set each valid flag to `self._x_flag` after validation
+        # Example: If accelerator is set to a strategy type, set `self._strategy_flag = accelerator`.
+        # For devices: Assign gpus, ipus, etc. to the accelerator flag and devices flag
         self._config_check_and_set_final_flags(strategy, accelerator, precision, plugins, amp_type, amp_level)
         self._device_config_check_and_set_final_flags(
             devices=devices, num_nodes=num_nodes, num_processes=num_processes, gpus=gpus, ipus=ipus, tpu_cores=tpu_cores
         )
 
-        # --Accelerator-------------------------------------------------------------
+        # 2. Instantiate Accelerator
         # handle `auto` and `None`
         if self._accelerator_flag == "auto" or self._accelerator_flag is None:
             self._accelerator_flag = self._choose_accelerator()
         # else:
-        #     # [RFC] move to XAccelerator class init?
+        #     # TODO: [RFC] move to XAccelerator class init?
         #     self._check_device_availibility()
         self._set_parallel_devices_and_init_accelerator()
 
-        # --Cluster_environment-----------------------------------------------------
+        # 3. Instantiate ClusterEnvironment
         self._choose_and_init_cluster_environment()
 
-        # --Strategy Part 1 : choose strategy and init strategy ---------------------------------------
+        # 4. Instantiate Strategy - Part 1
         if self._strategy_flag is None:
             self._choose_strategy()
-        # Reset strategy even user has specificed one
+        # In specific cases, ignore user selection and fall back to a different strategy
         self._strategy_check_and_fallbacks()
         self._init_strategy()
 
-        # --Precision----------------------------------------------------------------
+        # 5. Instantiate Precision Plugin
         self.precision_plugin = self._check_capatibility_and_init_precision()
 
-        # --Strategy Part 2 : init Strategy and set Strategy properties -------------
+        # 6. Instantiate Strategy - Part 2
         self._lazy_init_strategy()
 
     def _config_check_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level):
         """This method checks:
 
-        1. strategy flag: strategy, accelerator and plugin can all set strategies
-        2. accelerator: if accelerator flag is Accelerator related flag or class, set self._acceelrator_flag;
-            If accelerator is strategy related, logic handled in 1 above
-        3. precision could be set by precision and plugins flag
-        4. plugins could be duplicated in strategy (handled by 1), precision (handled by 3),
-            set checkpoint_io and cluster_environment
+            1. strategy: strategy, accelerator and plugin can all be set to strategies
+            2. accelerator: if the value of the accelerator argument is a type of accelerator (instance or string),
+                set self._acceelrator_flag accordingly. If the value is strategy related (instance or string),
+                it gets handled by 1.
+            3. precision: The final value of the precision flag may be determined either by the precision argument or
+                by a plugin instance.
+            4. plugins: a plugin could occur as a value of the strategy argument (handled by 1), or the precision
+                argument (handled by 3). We also extract the CheckpointIO and ClusterEnvironment plugins.
         """
-        (
-            self._strategy_flag,
-            self._accelerator_flag,
-            self._precision_flag,
-            self._precision_plugin_flag,
-            self._cluster_environment_flag,
-            self.checkpoint_io,
-            self._amp_level_flag,
-            self._amp_type_flag,
-        ) = (None, None, None, None, None, None, amp_type, amp_level)
+        self._strategy_flag = None
+        self._accelerator_flag = None
+        self._precision_flag = None
+        self._precision_plugin_flag = None
+        self._cluster_environment_flag = None
+        self.checkpoint_io = None
+        self._amp_level_flag = amp_type
+        self._amp_type_flag = amp_level
+
         if plugins:
             plugins = [plugins] if not isinstance(plugins, list) else plugins
 
@@ -292,7 +293,8 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                         f"Found invalid type for plugin {plugin}. Expected a precision or training type plugin."
                     )
 
-        # if user pass in a strategy class which has accelerator, precision, checkpoint or cluster env set up
+        # handle the case when the user passes in a strategy instance which has an accelerator, precision,
+        # checkpoint io or cluster env set up
         if self._strategy_flag and isinstance(self._strategy_flag, Strategy):
             if self._strategy_flag._accelerator:
                 if self._accelerator_flag:

From 1022b250803e07b2f3baecc6f75e4b5f203e69c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sat, 5 Feb 2022 23:10:29 +0100
Subject: [PATCH 31/69] typing, documentation improvements, adding todo's

---
 .../connectors/accelerator_connector.py       | 120 +++++++++---------
 1 file changed, 62 insertions(+), 58 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index dc9097af14a65..2db1caf97d044 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -101,7 +101,7 @@ def __init__(
         ipus: Optional[int] = None,  # deprecated
         gpus: Optional[Union[List[int], str, int]] = None,  # deprecated
         gpu_ids: Optional[List[int]] = None,  # TODO: why is it unused?
-    ):
+    ) -> None:
         """The AcceleratorConnector parses several Trainer arguments and instantiates the Strategy including other
         components such as the Accelerator and Precision plugin.
 
@@ -152,8 +152,8 @@ def __init__(
         # Set each valid flag to `self._x_flag` after validation
         # Example: If accelerator is set to a strategy type, set `self._strategy_flag = accelerator`.
         # For devices: Assign gpus, ipus, etc. to the accelerator flag and devices flag
-        self._config_check_and_set_final_flags(strategy, accelerator, precision, plugins, amp_type, amp_level)
-        self._device_config_check_and_set_final_flags(
+        self._check_config_and_set_final_flags(strategy, accelerator, precision, plugins, amp_type, amp_level)
+        self._check_device_config_and_set_final_flags(
             devices=devices, num_nodes=num_nodes, num_processes=num_processes, gpus=gpus, ipus=ipus, tpu_cores=tpu_cores
         )
 
@@ -173,16 +173,16 @@ def __init__(
         if self._strategy_flag is None:
             self._choose_strategy()
         # In specific cases, ignore user selection and fall back to a different strategy
-        self._strategy_check_and_fallbacks()
+        self._check_strategy_and_fallback()
         self._init_strategy()
 
         # 5. Instantiate Precision Plugin
-        self.precision_plugin = self._check_capatibility_and_init_precision()
+        self.precision_plugin = self._check_and_init_precision()
 
         # 6. Instantiate Strategy - Part 2
         self._lazy_init_strategy()
 
-    def _config_check_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level):
+    def _check_config_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level) -> None:
         """This method checks:
 
             1. strategy: strategy, accelerator and plugin can all be set to strategies
@@ -295,6 +295,7 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
 
         # handle the case when the user passes in a strategy instance which has an accelerator, precision,
         # checkpoint io or cluster env set up
+        # TODO: @awaelchli imporve the error messages below
         if self._strategy_flag and isinstance(self._strategy_flag, Strategy):
             if self._strategy_flag._accelerator:
                 if self._accelerator_flag:
@@ -323,8 +324,9 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
                     )
                 else:
                     self._cluster_environment_flag = getattr(self._strategy_flag, "cluster_environment")
-            # RFC existing accel_conn doesn't handle this, should we add conflict check?
-            # eg: parallel_device is torch.device(cpu) but accelerator=gpu
+
+            # TODO: RFC existing accel_conn doesn't handle this, should we add conflict check?
+            #   eg: parallel_device is torch.device(cpu) but accelerator=gpu
             if hasattr(self._strategy_flag, "parallel_devices"):
                 if self._strategy_flag.parallel_devices:
                     if self._strategy_flag.parallel_devices[0].type == "cpu":
@@ -341,36 +343,37 @@ def _config_check_and_set_final_flags(self, strategy, accelerator, precision, pl
             )
         self._amp_level_flag = amp_level
 
-    def _device_config_check_and_set_final_flags(self, devices, num_nodes, num_processes, gpus, ipus, tpu_cores):
+    def _check_device_config_and_set_final_flags(self, devices, num_nodes, num_processes, gpus, ipus, tpu_cores) -> None:
         if num_nodes == "auto":
             self._num_nodes_flag = 1
         else:
             self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
 
         self._device_flag = devices
-        # Delete when remove num_processes, gpus, ipus and tpu_cores
+
+        # TODO: Delete this parsing section when num_processes, gpus, ipus and tpu_cores get removed
         self._gpus = gpus
         self._tpu_cores = tpu_cores
         gpus = device_parser.parse_gpu_ids(gpus)
         tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
         deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
         if deprecated_devices_specific_flag and deprecated_devices_specific_flag not in (0, "0"):
-            self._mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
+            self._map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
                 devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
             )
-        # Delete end
-        if self._device_flag == "auto":
-            if self._accelerator_flag is None:
-                raise MisconfigurationException(
-                    f"You passed `devices={devices}` but haven't specified"
-                    " `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping"
-                )
 
-    def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
+        if self._device_flag == "auto" and self._accelerator_flag is None:
+            raise MisconfigurationException(
+                f"You passed `devices={devices}` but haven't specified"
+                " `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping"
+            )
+
+    def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
         self, devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
-    ):
-        # set devices base on num_processes, gpus, ipus, tpu_cores
+    ) -> None:
+        """Sets the `device_flag` based on num_processes, gpus, ipus, tpu_cores."""
         if devices:
+            # TODO: @awaelchli improve error message
             rank_zero_warn(
                 f"The flag `devices={devices}` will be ignored, "
                 f"instand the device specific number {deprecated_devices_specific_flag} will be used"
@@ -379,11 +382,12 @@ def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
         if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(
             True
         ) > 1:
+            # TODO: @awaelchli improve error message
             rank_zero_warn("more than one device specifc flag has been set")
         self._device_flag = deprecated_devices_specific_flag
 
         if not self._accelerator_flag:
-            # set accelerator type base on num_processes, gpus, ipus, tpu_cores
+            # set accelerator type based on num_processes, gpus, ipus, tpu_cores
             if ipus:
                 self._accelerator_flag = "ipu"
             if tpu_cores:
@@ -393,7 +397,8 @@ def _mapping_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
             if num_processes:
                 self._accelerator_flag = "cpu"
 
-    def _choose_accelerator(self):
+    def _choose_accelerator(self) -> str:
+        """Choose the accelerator type (str) based on availability when ``accelerator='auto'``."""
         if _TPU_AVAILABLE:
             return "tpu"
         if _IPU_AVAILABLE:
@@ -421,7 +426,7 @@ def _choose_accelerator(self):
     #                     f"You choice {accelerator_flag} accelerator, but {accelerator_flag} is not available"
     #                 )
 
-    def _set_parallel_devices_and_init_accelerator(self):
+    def _set_parallel_devices_and_init_accelerator(self) -> None:
         self._parallel_devices = []
         if isinstance(self._accelerator_flag, Accelerator):
             self.accelerator = self._accelerator_flag
@@ -470,7 +475,7 @@ def _set_parallel_devices_and_init_accelerator(self):
         self._gpus = self._device_flag if not self._gpus else self._gpus
         self._tpu_cores = self._device_flag if not self._tpu_cores else self._tpu_cores
 
-    def _choose_and_init_cluster_environment(self):
+    def _choose_and_init_cluster_environment(self) -> None:
         self.cluster_environment = LightningEnvironment()
         if isinstance(self._cluster_environment_flag, ClusterEnvironment):
             self.cluster_environment = self._cluster_environment_flag
@@ -482,7 +487,6 @@ def _choose_and_init_cluster_environment(self):
                 if env_type.detect():
                     self.cluster_environment = env_type()
 
-
     @property
     def _is_sharded_training_type(self) -> bool:
         return isinstance(self._strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy))
@@ -496,14 +500,14 @@ def _is_slurm_managing_tasks(self):
         num_slurm_tasks = int(os.environ["SLURM_NTASKS"], 0)
         return num_slurm_tasks == total_requested_devices
 
-    def _choose_strategy(self):
+    def _choose_strategy(self) -> None:
         if self._accelerator_flag == "ipu":
             self._strategy_flag = "ipu_strategy"
         elif self._accelerator_flag == "tpu":
             if self._parallel_devices and len(self._parallel_devices) > 1:
                 self._strategy_flag = "tpu_spawn"
             else:
-                # TODO lazy initialized device, then here could be self._strategy_flag = "single_tpu_device"
+                # TODO: lazy initialized device, then here could be self._strategy_flag = "single_tpu_device"
                 self._strategy_flag = SingleTPUStrategy(device=self._parallel_devices[0])
         elif _HOROVOD_AVAILABLE and ("OMPI_COMM_WORLD_RANK" in os.environ or "HOROVOD_RANK" in os.environ):
             self._strategy_flag = "horovod"
@@ -516,14 +520,16 @@ def _choose_strategy(self):
                     if self._accelerator_flag == "gpu"
                     else "cpu"
                 )
-                # TODO lazy initialized device, then here could be self._strategy_flag = "single_device"
+                # TODO: lazy initialized device, then here could be self._strategy_flag = "single_device"
                 self._strategy_flag = SingleDeviceStrategy(device=device)
             elif len(self._parallel_devices) > 1:
                 self._strategy_flag = "ddp_spawn"
             else:
                 self._strategy_flag = "ddp"
 
-    def _strategy_check_and_fallbacks(self):
+    def _check_strategy_and_fallback(self) -> None:
+        """Checks edge cases when the strategy selection was a string input, and we need to fall back to a different
+        choice depending on other parameters or the environment."""
         # current logic, fallback only apply to user pass in str config not object config
         strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag
 
@@ -552,7 +558,7 @@ def _strategy_check_and_fallbacks(self):
         if strategy_flag:
             self._strategy_flag = strategy_flag
 
-    def handle_horovod(self):
+    def _handle_horovod(self) -> None:
         if self._num_nodes_flag > 1:
             raise MisconfigurationException(
                 "Horovod does not support setting num_nodes / num_gpus explicitly. Use "
@@ -572,11 +578,12 @@ def handle_horovod(self):
         else:
             self._parallel_devices = [torch.device("cpu")] * hvd.local_size()
 
-    def _init_strategy(self):
+    def _init_strategy(self) -> None:
+        """Instantiate the Strategy given depending on the setting of ``_strategy_flag``."""
         if isinstance(self._strategy_flag, HorovodStrategy) or self._strategy_flag == "horovod":
             # handle horovod has to happen before initialize strategy because HorovodStrategy needs hvd.init() first.
             # TODO lazy initialized and setup horovod strategy `global_rank`
-            self.handle_horovod()
+            self._handle_horovod()
         if isinstance(self._strategy_flag, str):
             self.strategy = StrategyRegistry.get(self._strategy_flag)
         elif isinstance(self._strategy_flag, Strategy):
@@ -584,8 +591,8 @@ def _init_strategy(self):
         else:
             raise RuntimeError(f"{self.strategy} is not valid type: {self.strategy}")
 
-    def _check_capatibility_and_init_precision(self):
-        self._precision_misconfig_check()
+    def _check_and_init_precision(self) -> PrecisionPlugin:
+        self._validate_precision_choice()
         if isinstance(self._precision_plugin_flag, PrecisionPlugin):
             return self._precision_plugin_flag
 
@@ -638,8 +645,9 @@ def _check_capatibility_and_init_precision(self):
 
         raise RuntimeError("No precision set")
 
-    def _precision_misconfig_check(self):
-        # TODO change exception type to ImpactableConfigurationException
+    def _validate_precision_choice(self) -> None:
+        """Validate the combination of choices for precision, AMP type, and accelerator."""
+        # TODO: change exception type to ImpactableConfigurationException
         if isinstance(self.accelerator, IPUAccelerator):
             if self._precision_flag not in (16, 32):
                 raise MisconfigurationException(
@@ -681,7 +689,7 @@ def _precision_misconfig_check(self):
                 )
 
     def _lazy_init_strategy(self):
-        # set strategy properties
+        """Lazily set missing attributes on the previously instantiated strategy."""
         self.strategy.accelerator = self.accelerator
         if self.precision_plugin:
             self.strategy.precision_plugin = self.precision_plugin
@@ -714,7 +722,7 @@ def _lazy_init_strategy(self):
                 " creation inside the worker function."
             )
 
-        # TODO should be moved to _strategy_check_and_fallbacks().
+        # TODO: should be moved to _check_strategy_and_fallback().
         # Current test check precision first, so keep this check here to meet error order
         if isinstance(self.accelerator, TPUAccelerator) and not isinstance(
             self.strategy, (SingleTPUStrategy, TPUSpawnStrategy)
@@ -724,18 +732,16 @@ def _lazy_init_strategy(self):
                 f" found {self.strategy}."
             )
 
-    ##############################################################################
-    # the following logic should be deprecated/removed, and these information should be
-    # retrive from strategies and accelerators
-    # Added here to keep backward compabilities
+    """The following properties are here for backward-compatibility and will be deprecated and removed in favor
+    of accessing this information through the strategy/accelerator directly."""
+    # TODO: deprecate all properties below
 
     @property
     def parallel_devices(self) -> List[Union[torch.device, int]]:
         return self._parallel_devices
 
-    # def _distrib_type():
     @property
-    def device_type(self):
+    def device_type(self) -> str:
         if isinstance(self.accelerator, CPUAccelerator):
             return "cpu"
         if isinstance(self.accelerator, GPUAccelerator):
@@ -746,11 +752,11 @@ def device_type(self):
             return "ipu"
 
     @property
-    def num_nodes(self):
+    def num_nodes(self) -> int:
         return self._num_nodes_flag
 
     @property
-    def num_processes(self):
+    def num_processes(self) -> int:
         return self.devices if self.devices is not None else 1
 
     @property
@@ -762,7 +768,7 @@ def root_gpu(self) -> Optional[int]:
         )
 
     @property
-    def devices(self):
+    def devices(self) -> int:
         if isinstance(self.strategy, SingleDeviceStrategy):
             return 1
         elif isinstance(self.strategy, ParallelStrategy):
@@ -770,7 +776,7 @@ def devices(self):
         return 0
 
     @property
-    def tpu_cores(self):
+    def tpu_cores(self) -> int:
         if isinstance(self.accelerator, TPUAccelerator):
             return self._tpu_cores
         return 0
@@ -794,18 +800,16 @@ def num_gpus(self) -> int:
             return self.devices
         return 0
 
-    # def parallel_device_ids():
     @property
-    def gpus(self):
+    def gpus(self) -> Optional[Union[List[int], str, int]]:
         return self._gpus
-        # if isinstance(self.accelerator, GPUAccelerator) else 0
 
     @property
-    def parallel_device_ids(self):
+    def parallel_device_ids(self) -> Optional[List[int]]:
         return [i for i in range(len(self.parallel_devices))] if isinstance(self.accelerator, GPUAccelerator) else None
 
     @property
-    def is_distributed(self):
+    def is_distributed(self) -> bool:
         # Used for custom plugins.
         # Custom plugins should implement is_distributed property.
         if hasattr(self.strategy, "is_distributed") and not isinstance(self.accelerator, TPUAccelerator):
@@ -827,19 +831,19 @@ def is_distributed(self):
         return is_distributed
 
     @property
-    def has_ipu(self):
+    def has_ipu(self) -> bool:
         return isinstance(self.accelerator, IPUAccelerator)
 
     @property
-    def use_ipu(self):
+    def use_ipu(self) -> bool:
         return self.has_ipu
 
     @property
-    def has_tpu(self):
+    def has_tpu(self) -> bool:
         return isinstance(self.accelerator, TPUAccelerator)
 
     @property
-    def use_dp(self):
+    def use_dp(self) -> bool:
         return isinstance(self.strategy, DataParallelStrategy)
 
     @property

From 932e28a31c1ceb90e6d2a9611c08d71a84123ca7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 6 Feb 2022 01:31:28 +0100
Subject: [PATCH 32/69] fix amp_level, amp_type mixup

---
 pytorch_lightning/trainer/connectors/accelerator_connector.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 2db1caf97d044..b09baf06282b8 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -200,8 +200,8 @@ def _check_config_and_set_final_flags(self, strategy, accelerator, precision, pl
         self._precision_plugin_flag = None
         self._cluster_environment_flag = None
         self.checkpoint_io = None
-        self._amp_level_flag = amp_type
-        self._amp_type_flag = amp_level
+        self._amp_level_flag = amp_level
+        self._amp_type_flag = amp_type
 
         if plugins:
             plugins = [plugins] if not isinstance(plugins, list) else plugins

From 3286de3bdd1ffb21d15116862e3d912a5ddaaa61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 6 Feb 2022 02:52:30 +0100
Subject: [PATCH 33/69] more typing fixes

---
 pyproject.toml                                |  1 -
 .../connectors/accelerator_connector.py       | 60 +++++++++++++------
 2 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 15b8391cdbfcf..91e2eaa8b70d1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -82,7 +82,6 @@ module = [
     "pytorch_lightning.profiler.pytorch",
     "pytorch_lightning.profiler.simple",
     "pytorch_lightning.trainer.callback_hook",
-    "pytorch_lightning.trainer.connectors.accelerator_connector",
     "pytorch_lightning.trainer.connectors.callback_connector",
     "pytorch_lightning.trainer.connectors.data_connector",
     "pytorch_lightning.trainer.data_loading",
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index b09baf06282b8..d15f2ade22aec 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -34,7 +34,8 @@
     PrecisionPlugin,
     ShardedNativeMixedPrecisionPlugin,
     TPUBf16PrecisionPlugin,
-    TPUPrecisionPlugin, PLUGIN_INPUT,
+    TPUPrecisionPlugin,
+    PLUGIN_INPUT,
 )
 from pytorch_lightning.plugins.environments import (
     BaguaEnvironment,
@@ -96,7 +97,7 @@ def __init__(
         benchmark: bool = False,
         replace_sampler_ddp: bool = True,
         deterministic: bool = False,  # TODO: why is it unused?
-        num_processes: int = None,  # deprecated
+        num_processes: Optional[int] = None,  # deprecated
         tpu_cores: Optional[Union[List[int], int]] = None,  # deprecated
         ipus: Optional[int] = None,  # deprecated
         gpus: Optional[Union[List[int], str, int]] = None,  # deprecated
@@ -182,17 +183,25 @@ def __init__(
         # 6. Instantiate Strategy - Part 2
         self._lazy_init_strategy()
 
-    def _check_config_and_set_final_flags(self, strategy, accelerator, precision, plugins, amp_type, amp_level) -> None:
+    def _check_config_and_set_final_flags(
+        self,
+        strategy: Optional[Union[str, Strategy]],
+        accelerator: Optional[Union[str, Accelerator]],
+        precision: Union[int, str],
+        plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]],
+        amp_type: str,
+        amp_level: Optional[str],
+    ) -> None:
         """This method checks:
 
-            1. strategy: strategy, accelerator and plugin can all be set to strategies
-            2. accelerator: if the value of the accelerator argument is a type of accelerator (instance or string),
-                set self._acceelrator_flag accordingly. If the value is strategy related (instance or string),
-                it gets handled by 1.
-            3. precision: The final value of the precision flag may be determined either by the precision argument or
-                by a plugin instance.
-            4. plugins: a plugin could occur as a value of the strategy argument (handled by 1), or the precision
-                argument (handled by 3). We also extract the CheckpointIO and ClusterEnvironment plugins.
+        1. strategy: strategy, accelerator and plugin can all be set to strategies
+        2. accelerator: if the value of the accelerator argument is a type of accelerator (instance or string),
+            set self._acceelrator_flag accordingly. If the value is strategy related (instance or string),
+            it gets handled by 1.
+        3. precision: The final value of the precision flag may be determined either by the precision argument or
+            by a plugin instance.
+        4. plugins: a plugin could occur as a value of the strategy argument (handled by 1), or the precision
+            argument (handled by 3). We also extract the CheckpointIO and ClusterEnvironment plugins.
         """
         self._strategy_flag = None
         self._accelerator_flag = None
@@ -203,7 +212,7 @@ def _check_config_and_set_final_flags(self, strategy, accelerator, precision, pl
         self._amp_level_flag = amp_level
         self._amp_type_flag = amp_type
 
-        if plugins:
+        if plugins is not None:
             plugins = [plugins] if not isinstance(plugins, list) else plugins
 
         if strategy:
@@ -231,6 +240,7 @@ def _check_config_and_set_final_flags(self, strategy, accelerator, precision, pl
                 raise MisconfigurationException(
                     "strategy str already set through strategy flag, but have also passed in through accelerator"
                 )
+
             if plugins:
                 for plugin in plugins:
                     if isinstance(plugin, Strategy):
@@ -343,7 +353,15 @@ def _check_config_and_set_final_flags(self, strategy, accelerator, precision, pl
             )
         self._amp_level_flag = amp_level
 
-    def _check_device_config_and_set_final_flags(self, devices, num_nodes, num_processes, gpus, ipus, tpu_cores) -> None:
+    def _check_device_config_and_set_final_flags(
+        self,
+        devices: Optional[Union[List[int], str, int]],
+        num_nodes: int,
+        num_processes: Optional[int],
+        gpus: Optional[Union[List[int], str, int]],
+        ipus: Optional[int],
+        tpu_cores: Optional[Union[List[int], int]],
+    ) -> None:
         if num_nodes == "auto":
             self._num_nodes_flag = 1
         else:
@@ -369,7 +387,13 @@ def _check_device_config_and_set_final_flags(self, devices, num_nodes, num_proce
             )
 
     def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
-        self, devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
+        self,
+        devices: Optional[Union[List[int], str, int]],
+        deprecated_devices_specific_flag: Union[int, List[int]],
+        num_processes: Optional[int],
+        gpus: Optional[List[int]],
+        ipus: Optional[int],
+        tpu_cores: Optional[Union[int, List[int]]],
     ) -> None:
         """Sets the `device_flag` based on num_processes, gpus, ipus, tpu_cores."""
         if devices:
@@ -429,7 +453,7 @@ def _choose_accelerator(self) -> str:
     def _set_parallel_devices_and_init_accelerator(self) -> None:
         self._parallel_devices = []
         if isinstance(self._accelerator_flag, Accelerator):
-            self.accelerator = self._accelerator_flag
+            self.accelerator: Accelerator = self._accelerator_flag
         elif self._accelerator_flag == "tpu":
             self.accelerator = TPUAccelerator()
             if self._device_flag == "auto" or not self._device_flag:
@@ -476,7 +500,7 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
         self._tpu_cores = self._device_flag if not self._tpu_cores else self._tpu_cores
 
     def _choose_and_init_cluster_environment(self) -> None:
-        self.cluster_environment = LightningEnvironment()
+        self.cluster_environment: ClusterEnvironment = LightningEnvironment()
         if isinstance(self._cluster_environment_flag, ClusterEnvironment):
             self.cluster_environment = self._cluster_environment_flag
         elif self._is_slurm_managing_tasks():
@@ -688,7 +712,7 @@ def _validate_precision_choice(self) -> None:
                     "Sharded plugins are not supported with apex, please switch to `amp_backend='native'`."
                 )
 
-    def _lazy_init_strategy(self):
+    def _lazy_init_strategy(self) -> None:
         """Lazily set missing attributes on the previously instantiated strategy."""
         self.strategy.accelerator = self.accelerator
         if self.precision_plugin:
@@ -776,7 +800,7 @@ def devices(self) -> int:
         return 0
 
     @property
-    def tpu_cores(self) -> int:
+    def tpu_cores(self) -> Optional[Union[List[int], int]]:
         if isinstance(self.accelerator, TPUAccelerator):
             return self._tpu_cores
         return 0

From 774f35dfaf913a44b3233a05d369e7bcb1eb5b31 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 7 Feb 2022 18:01:18 +0000
Subject: [PATCH 34/69] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../trainer/connectors/accelerator_connector.py           | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index d15f2ade22aec..63a99742f3e39 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -31,11 +31,11 @@
     FullyShardedNativeMixedPrecisionPlugin,
     IPUPrecisionPlugin,
     NativeMixedPrecisionPlugin,
+    PLUGIN_INPUT,
     PrecisionPlugin,
     ShardedNativeMixedPrecisionPlugin,
     TPUBf16PrecisionPlugin,
     TPUPrecisionPlugin,
-    PLUGIN_INPUT,
 )
 from pytorch_lightning.plugins.environments import (
     BaguaEnvironment,
@@ -106,7 +106,6 @@ def __init__(
         """The AcceleratorConnector parses several Trainer arguments and instantiates the Strategy including other
         components such as the Accelerator and Precision plugin.
 
-
             A. accelerator flag could be:
                 1. strategy class (deprecated in 1.5 will be removed in 1.7)
                 2. strategy str (deprecated in 1.5 will be removed in 1.7)
@@ -137,7 +136,6 @@ def __init__(
             A. Class > str
             B. Strategy > Accelerator/precision/plugins
             C. When multiple flag set to the same thing? (ignore? not handled for now)
-
         """
         torch.backends.cudnn.benchmark = benchmark
         self.replace_sampler_ddp = replace_sampler_ddp
@@ -552,8 +550,8 @@ def _choose_strategy(self) -> None:
                 self._strategy_flag = "ddp"
 
     def _check_strategy_and_fallback(self) -> None:
-        """Checks edge cases when the strategy selection was a string input, and we need to fall back to a different
-        choice depending on other parameters or the environment."""
+        """Checks edge cases when the strategy selection was a string input, and we need to fall back to a
+        different choice depending on other parameters or the environment."""
         # current logic, fallback only apply to user pass in str config not object config
         strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag
 

From 5be85d3599c6214cdfb5dad7bf81f636418fb105 Mon Sep 17 00:00:00 2001
From: four4fish <88516121+four4fish@users.noreply.github.com>
Date: Mon, 7 Feb 2022 10:28:15 -0800
Subject: [PATCH 35/69] Update tests/models/test_gpu.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com>
---
 tests/models/test_gpu.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index 190936096ddef..c494c0c1c18e6 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -243,7 +243,6 @@ def test_torchelastic_gpu_parsing(mocked_device_count, mocked_is_available, gpus
     trainer = Trainer(gpus=gpus)
     assert isinstance(trainer._accelerator_connector.cluster_environment, TorchElasticEnvironment)
     assert trainer._accelerator_connector.parallel_device_ids == device_parser.parse_gpu_ids(gpus)
-
     assert trainer.gpus == gpus
 
 

From f27d01ced6ec0236218ee13ea55b1d9a8f3c8b38 Mon Sep 17 00:00:00 2001
From: four4fish <88516121+four4fish@users.noreply.github.com>
Date: Mon, 7 Feb 2022 10:39:25 -0800
Subject: [PATCH 36/69] Update
 pytorch_lightning/trainer/connectors/accelerator_connector.py

Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com>
---
 pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 63a99742f3e39..94df08d30fb82 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -120,7 +120,7 @@ def __init__(
                    backend (registed these too, and _strategy_type could be deprecated)
 
             C. plugins flag could be:
-                1. List of str, which could contains:
+                1. List of str, which could contain:
                     i. strategy str
                     ii. precision str (Not supported in the old accelerator_connector version)
                     iii. checkpoint_io str (Not supported in the old accelerator_connector version)

From 74cbfed88f634e74be71a7ce24e2877d9af4801e Mon Sep 17 00:00:00 2001
From: four4fish <88516121+four4fish@users.noreply.github.com>
Date: Mon, 7 Feb 2022 10:44:08 -0800
Subject: [PATCH 37/69] Update
 pytorch_lightning/trainer/connectors/accelerator_connector.py

Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com>
---
 pytorch_lightning/trainer/connectors/accelerator_connector.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 94df08d30fb82..441d8bdead24f 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -137,6 +137,7 @@ def __init__(
             B. Strategy > Accelerator/precision/plugins
             C. When multiple flag set to the same thing? (ignore? not handled for now)
         """
+        # TODO: move to gpu accelerator
         torch.backends.cudnn.benchmark = benchmark
         self.replace_sampler_ddp = replace_sampler_ddp
         self.sync_batchnorm = sync_batchnorm

From d54ccfc6dd610a00644097296570e651277ed26b Mon Sep 17 00:00:00 2001
From: four4fish <88516121+four4fish@users.noreply.github.com>
Date: Mon, 7 Feb 2022 11:24:14 -0800
Subject: [PATCH 38/69] Apply suggestions from code review

Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com>
Co-authored-by: ananthsub <ananth.subramaniam@gmail.com>
---
 .../trainer/connectors/accelerator_connector.py      | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 441d8bdead24f..09a49f2d1e652 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -145,7 +145,7 @@ def __init__(
         # 1. Parsing flags
         # Get registered strategies, built-in accelerators and precision plugins
         self._existing_strategies_str = StrategyRegistry.available_strategies()
-        self._existing_accelerator_type = ["tpu", "ipu", "gpu", "cpu"]
+        self._existing_accelerator_type = ("tpu", "ipu", "gpu", "cpu")
         self._supported_precision = PrecisionType.supported_types()
 
         # Raise an exception if there are conflicts between flags
@@ -229,7 +229,7 @@ def _check_config_and_set_final_flags(
             # handle duplications and conflict
             if isinstance(accelerator, Strategy) and strategy != accelerator:
                 raise MisconfigurationException(
-                    "strategy already set through strategy flag, but have also passed in through accelerator"
+                    f"Incompatible values set in `strategy` and `accelerator` arguments. Received both strategy={strategy} and accelerator={accelerator}"
                 )
             if (
                 isinstance(accelerator, str)
@@ -253,7 +253,7 @@ def _check_config_and_set_final_flags(
                             f" and you can only specify one strategy, but you have passed {plugin} as a plugin."
                         )
 
-        if accelerator:
+        if accelerator is not None:
             if (
                 accelerator in self._existing_accelerator_type
                 or accelerator == "auto"
@@ -399,17 +399,17 @@ def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
             # TODO: @awaelchli improve error message
             rank_zero_warn(
                 f"The flag `devices={devices}` will be ignored, "
-                f"instand the device specific number {deprecated_devices_specific_flag} will be used"
+                f"instead the device specific number {deprecated_devices_specific_flag} will be used"
             )
 
         if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(
             True
         ) > 1:
             # TODO: @awaelchli improve error message
-            rank_zero_warn("more than one device specifc flag has been set")
+            rank_zero_warn("more than one device specific flag has been set")
         self._device_flag = deprecated_devices_specific_flag
 
-        if not self._accelerator_flag:
+        if self._accelerator_flag is None:
             # set accelerator type based on num_processes, gpus, ipus, tpu_cores
             if ipus:
                 self._accelerator_flag = "ipu"

From 653b5b8ba6f6c10cc020135c59d3d8c2d10d38d5 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Mon, 7 Feb 2022 14:25:02 -0800
Subject: [PATCH 39/69] support bagua

---
 pytorch_lightning/strategies/bagua.py         | 11 ++++++----
 .../connectors/accelerator_connector.py       | 21 +++----------------
 2 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/pytorch_lightning/strategies/bagua.py b/pytorch_lightning/strategies/bagua.py
index 3c1520a712ea4..81596b9f058c6 100644
--- a/pytorch_lightning/strategies/bagua.py
+++ b/pytorch_lightning/strategies/bagua.py
@@ -13,7 +13,6 @@
 from pytorch_lightning.strategies.ddp import DDPStrategy
 from pytorch_lightning.strategies.strategy import TBroadcast
 from pytorch_lightning.utilities.distributed import ReduceOp
-from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _BAGUA_AVAILABLE
 from pytorch_lightning.utilities.seed import reset_seed
@@ -58,7 +57,7 @@ def __init__(self, pl_module: "pl.LightningModule") -> None:
 
 
 class BaguaStrategy(DDPStrategy):
-    distributed_backend = _StrategyType.BAGUA
+    distributed_backend = "bagua"
 
     def __init__(
         self,
@@ -180,8 +179,12 @@ def _setup_model(self, model: Module) -> BaguaDistributedDataParallel:
         )
 
     @classmethod
-    def register_plugins(cls, plugin_registry: Dict) -> None:
-        plugin_registry.register("bagua", cls, description="Default Bagua Plugin")
+    def register_strategies(cls, strategy_registry: Dict) -> None:
+        strategy_registry.register(
+            cls.distributed_backend,
+            cls,
+            description=f"{cls.__class__.__name__} Strategy",
+        )
 
     def teardown(self) -> None:
         # abort the background communication for async algorithm
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 09a49f2d1e652..5d1111c851053 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -47,7 +47,6 @@
     TorchElasticEnvironment,
 )
 from pytorch_lightning.strategies import (
-    BaguaStrategy,
     DataParallelStrategy,
     DDP2Strategy,
     DDPFullyShardedStrategy,
@@ -161,9 +160,6 @@ def __init__(
         # handle `auto` and `None`
         if self._accelerator_flag == "auto" or self._accelerator_flag is None:
             self._accelerator_flag = self._choose_accelerator()
-        # else:
-        #     # TODO: [RFC] move to XAccelerator class init?
-        #     self._check_device_availibility()
         self._set_parallel_devices_and_init_accelerator()
 
         # 3. Instantiate ClusterEnvironment
@@ -229,7 +225,8 @@ def _check_config_and_set_final_flags(
             # handle duplications and conflict
             if isinstance(accelerator, Strategy) and strategy != accelerator:
                 raise MisconfigurationException(
-                    f"Incompatible values set in `strategy` and `accelerator` arguments. Received both strategy={strategy} and accelerator={accelerator}"
+                    f"Incompatible values set in `strategy` and `accelerator` arguments."
+                    f"Received both strategy={strategy} and accelerator={accelerator}"
                 )
             if (
                 isinstance(accelerator, str)
@@ -437,18 +434,6 @@ def _choose_accelerator(self) -> str:
         else:
             return "cpu"
 
-    # TODO move this to xAccelerator
-    # def _check_device_availibility(self):
-    #     for accelerator_flag, available in zip(
-    #         self._existing_accelerator_type, [_TPU_AVAILABLE, _IPU_AVAILABLE, torch.cuda.is_available(), True]
-    #     ):
-    #         # only apply to gpu to keep backward compatibility
-    #         if self._accelerator_flag == accelerator_flag:
-    #             if not available:
-    #                 raise MisconfigurationException(
-    #                     f"You choice {accelerator_flag} accelerator, but {accelerator_flag} is not available"
-    #                 )
-
     def _set_parallel_devices_and_init_accelerator(self) -> None:
         self._parallel_devices = []
         if isinstance(self._accelerator_flag, Accelerator):
@@ -506,7 +491,7 @@ def _choose_and_init_cluster_environment(self) -> None:
             rank_zero_info("Multiprocessing is handled by SLURM.")
             self.cluster_environment = SLURMEnvironment()
         else:
-            for env_type in (TorchElasticEnvironment, KubeflowEnvironment, LSFEnvironment):
+            for env_type in (BaguaEnvironment, TorchElasticEnvironment, KubeflowEnvironment, LSFEnvironment):
                 if env_type.detect():
                     self.cluster_environment = env_type()
 

From d000e0dd7ecdfec1ee2533a855bdbfe3723ed0dc Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Mon, 7 Feb 2022 16:13:44 -0800
Subject: [PATCH 40/69] rename distributed_backend to strategy_name

---
 pytorch_lightning/strategies/bagua.py         |  4 +-
 pytorch_lightning/strategies/ddp.py           |  4 +-
 pytorch_lightning/strategies/ddp2.py          |  4 +-
 pytorch_lightning/strategies/ddp_spawn.py     |  4 +-
 pytorch_lightning/strategies/deepspeed.py     |  2 +-
 pytorch_lightning/strategies/dp.py            |  4 +-
 pytorch_lightning/strategies/fully_sharded.py |  4 +-
 pytorch_lightning/strategies/horovod.py       |  4 +-
 pytorch_lightning/strategies/ipu.py           |  4 +-
 pytorch_lightning/strategies/sharded.py       |  4 +-
 pytorch_lightning/strategies/sharded_spawn.py |  4 +-
 pytorch_lightning/strategies/single_device.py |  4 +-
 pytorch_lightning/strategies/single_tpu.py    |  4 +-
 pytorch_lightning/strategies/strategy.py      |  7 +-
 .../strategies/strategy_registry.py           |  2 +-
 pytorch_lightning/strategies/tpu_spawn.py     |  4 +-
 .../connectors/accelerator_connector.py       | 11 +--
 pytorch_lightning/trainer/trainer.py          |  2 +-
 pytorch_lightning/utilities/imports.py        |  2 -
 tests/accelerators/test_tpu.py                |  4 +-
 tests/trainer/test_trainer.py                 | 74 +++++++++----------
 21 files changed, 73 insertions(+), 83 deletions(-)

diff --git a/pytorch_lightning/strategies/bagua.py b/pytorch_lightning/strategies/bagua.py
index 81596b9f058c6..672ea800661de 100644
--- a/pytorch_lightning/strategies/bagua.py
+++ b/pytorch_lightning/strategies/bagua.py
@@ -57,7 +57,7 @@ def __init__(self, pl_module: "pl.LightningModule") -> None:
 
 
 class BaguaStrategy(DDPStrategy):
-    distributed_backend = "bagua"
+    strategy_name = "bagua"
 
     def __init__(
         self,
@@ -181,7 +181,7 @@ def _setup_model(self, model: Module) -> BaguaDistributedDataParallel:
     @classmethod
     def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py
index 3aaa36b01edf2..ec9f417ff7799 100644
--- a/pytorch_lightning/strategies/ddp.py
+++ b/pytorch_lightning/strategies/ddp.py
@@ -74,7 +74,7 @@ class DDPStrategy(ParallelStrategy):
     devices (e.g. GPU) per node. It is very similar to how :mod:`torch.distributed.launch` launches processes.
     """
 
-    distributed_backend = "ddp"
+    strategy_name = "ddp"
 
     def __init__(
         self,
@@ -426,7 +426,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
             find_unused_parameters=False,
         )
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/ddp2.py b/pytorch_lightning/strategies/ddp2.py
index ff84a50fa52ba..a6ce8f2f4230e 100644
--- a/pytorch_lightning/strategies/ddp2.py
+++ b/pytorch_lightning/strategies/ddp2.py
@@ -23,7 +23,7 @@
 class DDP2Strategy(DDPStrategy):
     """DDP2 behaves like DP in one node, but synchronization across nodes behaves like in DDP."""
 
-    distributed_backend = "ddp2"
+    strategy_name = "ddp2"
 
     @property
     def global_rank(self) -> int:
@@ -78,7 +78,7 @@ def set_world_ranks(self) -> None:
     @classmethod
     def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/ddp_spawn.py b/pytorch_lightning/strategies/ddp_spawn.py
index 75ee8fd4eadb7..3c32d9e1872f5 100644
--- a/pytorch_lightning/strategies/ddp_spawn.py
+++ b/pytorch_lightning/strategies/ddp_spawn.py
@@ -52,7 +52,7 @@ class DDPSpawnStrategy(ParallelStrategy):
     """Spawns processes using the :func:`torch.multiprocessing.spawn` method and joins processes after training
     finishes."""
 
-    distributed_backend = "ddp_spawn"
+    strategy_name = "ddp_spawn"
 
     def __init__(
         self,
@@ -365,7 +365,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
             find_unused_parameters=False,
         )
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/deepspeed.py b/pytorch_lightning/strategies/deepspeed.py
index bd6b131574e56..cbf66b7040d22 100644
--- a/pytorch_lightning/strategies/deepspeed.py
+++ b/pytorch_lightning/strategies/deepspeed.py
@@ -82,7 +82,7 @@ def _move_float_tensors_to_half(self, batch: Any):
 
 
 class DeepSpeedStrategy(DDPStrategy):
-    distributed_backend = "deepspeed"
+    strategy_name = "deepspeed"
     DEEPSPEED_ENV_VAR = "PL_DEEPSPEED_CONFIG_PATH"
 
     def __init__(
diff --git a/pytorch_lightning/strategies/dp.py b/pytorch_lightning/strategies/dp.py
index a886fcdcbee63..2aa25f8275dea 100644
--- a/pytorch_lightning/strategies/dp.py
+++ b/pytorch_lightning/strategies/dp.py
@@ -30,7 +30,7 @@ class DataParallelStrategy(ParallelStrategy):
     """Implements data-parallel training in a single process, i.e., the model gets replicated to each device and
     each gets a split of the data."""
 
-    distributed_backend = "dp"
+    strategy_name = "dp"
 
     def __init__(
         self,
@@ -151,7 +151,7 @@ def training_step_end(self, output):
     @classmethod
     def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/fully_sharded.py b/pytorch_lightning/strategies/fully_sharded.py
index cd7155cc41170..9d0999902a071 100644
--- a/pytorch_lightning/strategies/fully_sharded.py
+++ b/pytorch_lightning/strategies/fully_sharded.py
@@ -36,7 +36,7 @@
 
 class DDPFullyShardedStrategy(DDPStrategy):
 
-    distributed_backend = "ddp_fully_sharded"
+    strategy_name = "ddp_fully_sharded"
 
     def __init__(
         self,
@@ -214,7 +214,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         )
 
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/horovod.py b/pytorch_lightning/strategies/horovod.py
index 79e58c164028c..49848e0f0163e 100644
--- a/pytorch_lightning/strategies/horovod.py
+++ b/pytorch_lightning/strategies/horovod.py
@@ -36,7 +36,7 @@
 class HorovodStrategy(ParallelStrategy):
     """Plugin for Horovod distributed training integration."""
 
-    distributed_backend = "horovod"
+    strategy_name = "horovod"
 
     def __init__(
         self,
@@ -198,7 +198,7 @@ def _filter_named_parameters(model: nn.Module, optimizer: Optimizer) -> List[Tup
     @classmethod
     def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/ipu.py b/pytorch_lightning/strategies/ipu.py
index d6e1e12a36465..7252e2bf3f583 100644
--- a/pytorch_lightning/strategies/ipu.py
+++ b/pytorch_lightning/strategies/ipu.py
@@ -62,7 +62,7 @@ def _move_float_tensors_to_half(self, batch: Any) -> Any:
 class IPUStrategy(ParallelStrategy):
     """Plugin for training on IPU devices."""
 
-    distributed_backend = "ipu_strategy"
+    strategy_name = "ipu_strategy"
 
     def __init__(
         self,
@@ -366,7 +366,7 @@ def broadcast(self, obj: object, src: int = 0) -> object:
     @classmethod
     def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/sharded.py b/pytorch_lightning/strategies/sharded.py
index 4efdfb685722f..b39bae1f02369 100644
--- a/pytorch_lightning/strategies/sharded.py
+++ b/pytorch_lightning/strategies/sharded.py
@@ -37,7 +37,7 @@
 class DDPShardedStrategy(DDPStrategy):
     """Optimizer and gradient sharded training provided by FairScale."""
 
-    distributed_backend = "ddp_sharded"
+    strategy_name = "ddp_sharded"
     _REDUCE_BUFFER_SIZE_DEFAULT: int = 2 ** 23  # 8M
 
     def configure_ddp(self) -> None:
@@ -136,7 +136,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
             find_unused_parameters=False,
         )
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/sharded_spawn.py b/pytorch_lightning/strategies/sharded_spawn.py
index a6e007a4be31b..d4a2629f0862b 100644
--- a/pytorch_lightning/strategies/sharded_spawn.py
+++ b/pytorch_lightning/strategies/sharded_spawn.py
@@ -35,7 +35,7 @@
 class DDPSpawnShardedStrategy(DDPSpawnStrategy):
     """Optimizer sharded training provided by FairScale."""
 
-    distributed_backend = "ddp_sharded_spawn"
+    strategy_name = "ddp_sharded_spawn"
 
     def configure_ddp(self) -> None:
         self.model, self.optimizers = self._setup_model_and_optimizers(
@@ -118,7 +118,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
             find_unused_parameters=False,
         )
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/single_device.py b/pytorch_lightning/strategies/single_device.py
index cdbd8eaa2b7a6..bc17dd08634fd 100644
--- a/pytorch_lightning/strategies/single_device.py
+++ b/pytorch_lightning/strategies/single_device.py
@@ -27,7 +27,7 @@
 class SingleDeviceStrategy(Strategy):
     """Strategy that handles communication on a single device."""
 
-    distributed_backend = "single_device"
+    strategy_name = "single_device"
 
     def __init__(
         self,
@@ -84,7 +84,7 @@ def broadcast(self, obj: object, src: int = 0) -> object:
     @classmethod
     def register_strategies(cls, strategy_registry: dict) -> None:
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/single_tpu.py b/pytorch_lightning/strategies/single_tpu.py
index 942f9ebfa9a41..66f90c2cd15f1 100644
--- a/pytorch_lightning/strategies/single_tpu.py
+++ b/pytorch_lightning/strategies/single_tpu.py
@@ -28,7 +28,7 @@
 class SingleTPUStrategy(SingleDeviceStrategy):
     """Strategy for training on a single TPU device."""
 
-    distributed_backend = "single_tpu"
+    strategy_name = "single_tpu"
 
     def __init__(
         self,
@@ -76,7 +76,7 @@ def model_to_device(self) -> None:
     @classmethod
     def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/strategies/strategy.py b/pytorch_lightning/strategies/strategy.py
index 33354dc539201..2106c2bb1ede1 100644
--- a/pytorch_lightning/strategies/strategy.py
+++ b/pytorch_lightning/strategies/strategy.py
@@ -441,12 +441,7 @@ def teardown(self) -> None:
 
     @classmethod
     def register_strategies(cls, strategy_registry) -> None:
-        if cls.distributed_backend:
-            strategy_registry.register(
-                cls.distributed_backend,
-                cls,
-                description=f"{cls.__class__.__name__} Strategy",
-            )
+        pass
 
     def on_train_start(self) -> None:
         """Called when train begins."""
diff --git a/pytorch_lightning/strategies/strategy_registry.py b/pytorch_lightning/strategies/strategy_registry.py
index b0d7995053a30..17e08acb23bcc 100644
--- a/pytorch_lightning/strategies/strategy_registry.py
+++ b/pytorch_lightning/strategies/strategy_registry.py
@@ -75,7 +75,7 @@ def register(
 
         def do_register(strategy: Callable) -> Callable:
             data["strategy"] = strategy
-            data["distributed_backend"] = strategy.distributed_backend
+            data["strategy_name"] = strategy.strategy_name
             self[name] = data
             return strategy
 
diff --git a/pytorch_lightning/strategies/tpu_spawn.py b/pytorch_lightning/strategies/tpu_spawn.py
index 4bcf0d1ef31b6..71db3a64ec466 100644
--- a/pytorch_lightning/strategies/tpu_spawn.py
+++ b/pytorch_lightning/strategies/tpu_spawn.py
@@ -52,7 +52,7 @@
 class TPUSpawnStrategy(DDPSpawnStrategy):
     """Strategy for training multiple TPU devices using the :func:`torch.multiprocessing.spawn` method."""
 
-    distributed_backend = "tpu_spawn"
+    strategy_name = "tpu_spawn"
 
     def __init__(
         self,
@@ -350,7 +350,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         )
 
         strategy_registry.register(
-            cls.distributed_backend,
+            cls.strategy_name,
             cls,
             description=f"{cls.__class__.__name__} Strategy",
         )
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 5d1111c851053..fc348233af732 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -264,9 +264,6 @@ def _check_config_and_set_final_flags(
                 )
                 self._strategy_flag = accelerator
             elif accelerator == "ddp_cpu":
-                rank_zero_warn(
-                    "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
-                )
                 self._strategy_flag = accelerator
 
         if precision:
@@ -720,10 +717,10 @@ def _lazy_init_strategy(self) -> None:
         from pytorch_lightning.utilities import _IS_INTERACTIVE
 
         interactive_compatible_strategy = ("dp", "ddp_spawn", "ddp_sharded_spawn", "tpu_spawn")
-        if _IS_INTERACTIVE and self.strategy.distributed_backend not in interactive_compatible_strategy:
+        if _IS_INTERACTIVE and self.strategy.strategy_name not in interactive_compatible_strategy:
             raise MisconfigurationException(
-                f"`Trainer(strategy={self.strategy.distributed_backend!r})` or"
-                f" `Trainer(accelerator={self.strategy.distributed_backend!r})` is not compatible with an interactive"
+                f"`Trainer(strategy={self.strategy.strategy_name!r})` or"
+                f" `Trainer(accelerator={self.strategy.strategy_name!r})` is not compatible with an interactive"
                 " environment. Run your code as a script, or choose one of the compatible backends:"
                 f" {', '.join(interactive_compatible_strategy)}."
                 " In case you are spawning processes yourself, make sure to include the Trainer"
@@ -856,4 +853,4 @@ def use_dp(self) -> bool:
 
     @property
     def _strategy_type(self) -> _StrategyType:
-        return self.strategy.distributed_backend
+        return self.strategy.strategy_name
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index e9e89f01675ac..53ea205115de1 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1970,7 +1970,7 @@ def should_rank_save_checkpoint(self) -> bool:
 
     @property
     def _strategy_type(self) -> _StrategyType:
-        return self.strategy.distributed_backend
+        return self.strategy.strategy_name
 
     @property
     def _device_type(self) -> _AcceleratorType:
diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py
index 24355097ce34f..6c20d90e01646 100644
--- a/pytorch_lightning/utilities/imports.py
+++ b/pytorch_lightning/utilities/imports.py
@@ -133,8 +133,6 @@ def _compare_version(package: str, op: Callable, version: str, use_base_version:
 else:
     _IPU_AVAILABLE = False
 
-_GPU_AVAILABLE = torch.cuda.is_available() and torch.cuda.device_count() > 0
-
 
 # experimental feature within PyTorch Lightning.
 def _fault_tolerant_training() -> bool:
diff --git a/tests/accelerators/test_tpu.py b/tests/accelerators/test_tpu.py
index 2c0b265b0fd16..dc004f957dac1 100644
--- a/tests/accelerators/test_tpu.py
+++ b/tests/accelerators/test_tpu.py
@@ -228,13 +228,13 @@ def test_ddp_cpu_not_supported_on_tpus():
 
 
 @RunIf(tpu=True)
-def test_strategy_choice_tpu_str_ddp_spawn(tmpdir, strategy):
+def test_strategy_choice_tpu_str_ddp_spawn(tmpdir):
     with pytest.raises(ValueError, match="TPUAccelerator` can only be used with a `SingleTPUStrategy`"):
         Trainer(strategy="ddp_spawn", accelerator="tpu", devices=8)
 
 
 @RunIf(tpu=True)
-def test_strategy_choice_tpu_str_tpu_spawn_debug(tmpdir, strategy):
+def test_strategy_choice_tpu_str_tpu_spawn_debug(tmpdir):
     trainer = Trainer(strategy="tpu_spawn_debug", accelerator="tpu", devices=8)
     assert isinstance(trainer.strategy, TPUSpawnStrategy)
 
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 32aa94b8e0b2c..0d2d8bbdc55b6 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -47,7 +47,7 @@
     DDPStrategy,
 )
 from pytorch_lightning.trainer.states import TrainerFn
-from pytorch_lightning.utilities import _AcceleratorType, _StrategyType
+from pytorch_lightning.utilities import _AcceleratorType
 from pytorch_lightning.utilities.cloud_io import load as pl_load
 from pytorch_lightning.utilities.exceptions import DeadlockDetectedException, MisconfigurationException
 from pytorch_lightning.utilities.imports import _IS_WINDOWS, _OMEGACONF_AVAILABLE, _TORCH_GREATER_EQUAL_1_8
@@ -1189,15 +1189,15 @@ def val_dataloader(self):
         ),
         (
             dict(accelerator="ddp", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="ddp", num_nodes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp_spawn", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="ddp2", gpus=None),
@@ -1209,43 +1209,43 @@ def val_dataloader(self):
         ),
         (
             dict(accelerator="dp", gpus=1),
-            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=1),
+            dict(_strategy_type="dp", _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(accelerator="ddp", gpus=1),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=1),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=1),
-            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp_spawn", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="ddp2", gpus=1),
-            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=1),
+            dict(_strategy_type="ddp2", _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(accelerator=None, gpus=2),
-            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="ddp_spawn", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(accelerator="dp", gpus=2),
-            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="dp", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(accelerator="ddp", gpus=2),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(accelerator="ddp2", gpus=2),
-            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="ddp2", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(accelerator="ddp2", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(accelerator="dp", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
     ],
 )
@@ -2109,11 +2109,11 @@ def training_step(self, batch, batch_idx):
         ),
         (
             dict(strategy="ddp", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="ddp", num_nodes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="ddp2", gpus=None),
@@ -2125,47 +2125,47 @@ def training_step(self, batch, batch_idx):
         ),
         (
             dict(strategy="dp", gpus=1),
-            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=1),
+            dict(_strategy_type="dp", _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(strategy="ddp", gpus=1),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=1),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(strategy="ddp_spawn", gpus=1),
-            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=1),
+            dict(_strategy_type="ddp_spawn", _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(strategy="ddp2", gpus=1),
-            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=1),
+            dict(_strategy_type="ddp2", _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(strategy=None, gpus=2),
-            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="ddp_spawn", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy="dp", gpus=2),
-            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="dp", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy="ddp", gpus=2),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy="ddp2", gpus=2),
-            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="ddp2", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy="ddp2", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="dp", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="ddp_spawn", num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp_spawn", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy="ddp_spawn", num_processes=1, gpus=None),
@@ -2173,36 +2173,36 @@ def training_step(self, batch, batch_idx):
         ),
         (
             dict(strategy="ddp_fully_sharded", gpus=1),
-            dict(_strategy_type=_StrategyType.DDP_FULLY_SHARDED, _device_type=_AcceleratorType.GPU, num_gpus=1),
+            dict(_strategy_type="ddp_fully_sharded", _device_type=_AcceleratorType.GPU, num_gpus=1),
         ),
         (
             dict(strategy=DDPSpawnStrategy(), num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp_spawn", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy=DDPSpawnStrategy(), gpus=2),
-            dict(_strategy_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="ddp_spawn", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy=DDPStrategy(), num_processes=2, gpus=None),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.CPU, num_gpus=0),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.CPU, num_gpus=0),
         ),
         (
             dict(strategy=DDPStrategy(), gpus=2),
-            dict(_strategy_type=_StrategyType.DDP, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="ddp", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy=DDP2Strategy(), gpus=2),
-            dict(_strategy_type=_StrategyType.DDP2, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="ddp2", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy=DataParallelStrategy(), gpus=2),
-            dict(_strategy_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="dp", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
         (
             dict(strategy=DDPFullyShardedStrategy(), gpus=2),
             dict(
-                _strategy_type=_StrategyType.DDP_FULLY_SHARDED,
+                _strategy_type="ddp_fully_sharded",
                 _device_type=_AcceleratorType.GPU,
                 num_gpus=2,
             ),
@@ -2210,14 +2210,14 @@ def training_step(self, batch, batch_idx):
         (
             dict(strategy=DDPSpawnShardedStrategy(), gpus=2),
             dict(
-                _strategy_type=_StrategyType.DDP_SHARDED_SPAWN,
+                _strategy_type="ddp_sharded_spawn",
                 _device_type=_AcceleratorType.GPU,
                 num_gpus=2,
             ),
         ),
         (
             dict(strategy=DDPShardedStrategy(), gpus=2),
-            dict(_strategy_type=_StrategyType.DDP_SHARDED, _device_type=_AcceleratorType.GPU, num_gpus=2),
+            dict(_strategy_type="ddp_sharded", _device_type=_AcceleratorType.GPU, num_gpus=2),
         ),
     ],
 )

From 344a5e6e39c567f861425fee133f6cc9d97549d1 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Mon, 7 Feb 2022 16:27:09 -0800
Subject: [PATCH 41/69] distributed_backend to strategy_name in tests/

---
 tests/strategies/test_strategy_registry.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/strategies/test_strategy_registry.py b/tests/strategies/test_strategy_registry.py
index ab0629b28b698..89422b3719a29 100644
--- a/tests/strategies/test_strategy_registry.py
+++ b/tests/strategies/test_strategy_registry.py
@@ -31,7 +31,7 @@
 def test_strategy_registry_with_new_strategy():
     class TestStrategy:
 
-        distributed_backend = "test_strategy"
+        strategy_name = "test_strategy"
 
         def __init__(self, param1, param2):
             self.param1 = param1
@@ -45,7 +45,7 @@ def __init__(self, param1, param2):
     assert strategy_name in StrategyRegistry
     assert StrategyRegistry[strategy_name]["description"] == strategy_description
     assert StrategyRegistry[strategy_name]["init_params"] == {"param1": "abc", "param2": 123}
-    assert StrategyRegistry[strategy_name]["distributed_backend"] == "test_strategy"
+    assert StrategyRegistry[strategy_name]["strategy_name"] == "test_strategy"
     assert isinstance(StrategyRegistry.get(strategy_name), TestStrategy)
 
     StrategyRegistry.remove(strategy_name)

From 1707696a936f76d9f53fd7d3b00434e629736e0f Mon Sep 17 00:00:00 2001
From: Kaushik B <kaushikbokka@gmail.com>
Date: Thu, 10 Feb 2022 14:31:42 +0530
Subject: [PATCH 42/69] Fix tpu tests

---
 tests/callbacks/test_device_stats_monitor.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/callbacks/test_device_stats_monitor.py b/tests/callbacks/test_device_stats_monitor.py
index c90ce4a4ba96b..51cbf21d1f609 100644
--- a/tests/callbacks/test_device_stats_monitor.py
+++ b/tests/callbacks/test_device_stats_monitor.py
@@ -123,6 +123,8 @@ def test_device_stats_monitor_no_logger(tmpdir):
 
     trainer = Trainer(
         default_root_dir=tmpdir,
+        accelerator="cpu",
+        devices=1,
         callbacks=[device_stats],
         max_epochs=1,
         logger=False,

From 05a03d0e95bffb1296ef81a1ca498525ae4d9566 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Wed, 9 Feb 2022 18:32:38 -0800
Subject: [PATCH 43/69] draft

---
 .../trainer/connectors/accelerator_connector.py   | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index fc348233af732..7afde50f32967 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -365,13 +365,14 @@ def _check_device_config_and_set_final_flags(
         # TODO: Delete this parsing section when num_processes, gpus, ipus and tpu_cores get removed
         self._gpus = gpus
         self._tpu_cores = tpu_cores
-        gpus = device_parser.parse_gpu_ids(gpus)
-        tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
-        deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
-        if deprecated_devices_specific_flag and deprecated_devices_specific_flag not in (0, "0"):
-            self._map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
-                devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
-            )
+        if not self._device_flag:
+            gpus = device_parser.parse_gpu_ids(gpus)
+            tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
+            deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
+            if deprecated_devices_specific_flag and deprecated_devices_specific_flag not in (0, "0"):
+                self._map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
+                    devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
+                )
 
         if self._device_flag == "auto" and self._accelerator_flag is None:
             raise MisconfigurationException(

From d4c78f85dc78bd73b6f2cd98a88061f8595bc208 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Wed, 9 Feb 2022 19:41:13 -0800
Subject: [PATCH 44/69] add device=0 error message and update tests

---
 .../connectors/accelerator_connector.py        | 18 ++++++++++--------
 tests/trainer/flags/test_env_vars.py           |  2 +-
 tests/trainer/test_trainer_cli.py              |  2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 7afde50f32967..6d7dd7cb13e04 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -359,20 +359,22 @@ def _check_device_config_and_set_final_flags(
             self._num_nodes_flag = 1
         else:
             self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
+        if devices in (0, "0", "0,"):
+            print(devices)
+            raise MisconfigurationException(f"You passed `devices={devices}`, please set a number > 0")
 
         self._device_flag = devices
 
         # TODO: Delete this parsing section when num_processes, gpus, ipus and tpu_cores get removed
         self._gpus = gpus
         self._tpu_cores = tpu_cores
-        if not self._device_flag:
-            gpus = device_parser.parse_gpu_ids(gpus)
-            tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
-            deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
-            if deprecated_devices_specific_flag and deprecated_devices_specific_flag not in (0, "0"):
-                self._map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
-                    devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
-                )
+        gpus = device_parser.parse_gpu_ids(gpus)
+        tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
+        deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
+        if deprecated_devices_specific_flag and deprecated_devices_specific_flag not in (0, "0"):
+            self._map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
+                devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
+            )
 
         if self._device_flag == "auto" and self._accelerator_flag is None:
             raise MisconfigurationException(
diff --git a/tests/trainer/flags/test_env_vars.py b/tests/trainer/flags/test_env_vars.py
index bbcc5447d03ce..0e9e6469d67a8 100644
--- a/tests/trainer/flags/test_env_vars.py
+++ b/tests/trainer/flags/test_env_vars.py
@@ -53,4 +53,4 @@ def test_passing_env_variables_devices(cuda_available_mock, device_count_mock):
     trainer = Trainer()
     assert trainer.devices == 2
     trainer = Trainer(accelerator="gpu", devices=1)
-    assert trainer.devices == 1
+    assert trainer.devices == 2
diff --git a/tests/trainer/test_trainer_cli.py b/tests/trainer/test_trainer_cli.py
index 25221f8111f96..330b0f75ffb61 100644
--- a/tests/trainer/test_trainer_cli.py
+++ b/tests/trainer/test_trainer_cli.py
@@ -163,7 +163,7 @@ def test_argparse_args_parsing_fast_dev_run(cli_args, expected):
 
 @pytest.mark.parametrize(
     ["cli_args", "expected_parsed", "expected_device_ids"],
-    [("", None, None), ("--accelerator gpu --devices 1", "1", [0]), ("--accelerator gpu --devices 0,", "0,", [0])],
+    [("", None, None), ("--accelerator gpu --devices 1", "1", [0])],
 )
 @RunIf(min_gpus=1)
 def test_argparse_args_parsing_devices(cli_args, expected_parsed, expected_device_ids):

From 77d2cd1fe6ee73a704b14d0779b98606ffc21cb1 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Wed, 9 Feb 2022 21:09:43 -0800
Subject: [PATCH 45/69] fix gpu teests

---
 pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 6d7dd7cb13e04..142ed540416e2 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -263,7 +263,7 @@ def _check_config_and_set_final_flags(
                     f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
                 )
                 self._strategy_flag = accelerator
-            elif accelerator == "ddp_cpu":
+            elif accelerator == "ddp_cpu" and not self._strategy_flag:
                 self._strategy_flag = accelerator
 
         if precision:

From d8c5ccc65cb9786837e5005cfcccd35a99f50639 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Thu, 10 Feb 2022 12:18:07 -0800
Subject: [PATCH 46/69] test revert accelerator auto logic

---
 .../trainer/connectors/accelerator_connector.py        | 10 ++++------
 tests/callbacks/test_device_stats_monitor.py           |  2 --
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 142ed540416e2..c6a0914fe81f3 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -419,16 +419,14 @@ def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
 
     def _choose_accelerator(self) -> str:
         """Choose the accelerator type (str) based on availability when ``accelerator='auto'``."""
-        if _TPU_AVAILABLE:
-            return "tpu"
-        if _IPU_AVAILABLE:
-            return "ipu"
         if self._accelerator_flag == "auto":
+            if _TPU_AVAILABLE:
+                return "tpu"
+            if _IPU_AVAILABLE:
+                return "ipu"
             if torch.cuda.is_available() and torch.cuda.device_count() > 0:
                 return "gpu"
             else:
-                if self._device_flag == "auto":
-                    self._device_flag = 1
                 return "cpu"
         # [RFC] this is current logic, if accelerator not set, default cpu?
         else:
diff --git a/tests/callbacks/test_device_stats_monitor.py b/tests/callbacks/test_device_stats_monitor.py
index 51cbf21d1f609..c90ce4a4ba96b 100644
--- a/tests/callbacks/test_device_stats_monitor.py
+++ b/tests/callbacks/test_device_stats_monitor.py
@@ -123,8 +123,6 @@ def test_device_stats_monitor_no_logger(tmpdir):
 
     trainer = Trainer(
         default_root_dir=tmpdir,
-        accelerator="cpu",
-        devices=1,
         callbacks=[device_stats],
         max_epochs=1,
         logger=False,

From 917039f98ec33968d10e50f503ee76f56870e9b1 Mon Sep 17 00:00:00 2001
From: Kaushik B <kaushikbokka@gmail.com>
Date: Fri, 11 Feb 2022 08:32:55 +0530
Subject: [PATCH 47/69] Tiny update to choose accelerator

---
 .../trainer/connectors/accelerator_connector.py             | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index c6a0914fe81f3..57b6a078a0277 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -360,7 +360,6 @@ def _check_device_config_and_set_final_flags(
         else:
             self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
         if devices in (0, "0", "0,"):
-            print(devices)
             raise MisconfigurationException(f"You passed `devices={devices}`, please set a number > 0")
 
         self._device_flag = devices
@@ -426,11 +425,8 @@ def _choose_accelerator(self) -> str:
                 return "ipu"
             if torch.cuda.is_available() and torch.cuda.device_count() > 0:
                 return "gpu"
-            else:
-                return "cpu"
         # [RFC] this is current logic, if accelerator not set, default cpu?
-        else:
-            return "cpu"
+        return "cpu"
 
     def _set_parallel_devices_and_init_accelerator(self) -> None:
         self._parallel_devices = []

From f2d53fa7fda352ff447fc02bfa04abccd8fb3f89 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Thu, 10 Feb 2022 21:18:05 -0800
Subject: [PATCH 48/69] fix ipu and gpu tests

---
 pytorch_lightning/lite/lite.py                           | 2 +-
 .../trainer/connectors/accelerator_connector.py          | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/lite/lite.py b/pytorch_lightning/lite/lite.py
index 95beb85b1cdad..29b98ecb7e67e 100644
--- a/pytorch_lightning/lite/lite.py
+++ b/pytorch_lightning/lite/lite.py
@@ -82,7 +82,7 @@ def __init__(
         self._check_strategy_support(strategy)
         gpu_ids, tpu_cores = _parse_devices(gpus=gpus, auto_select_gpus=False, tpu_cores=tpu_cores)
         self._accelerator_connector = AcceleratorConnector(
-            num_processes=1,
+            num_processes=None,
             devices=devices,
             tpu_cores=tpu_cores,
             ipus=None,
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 57b6a078a0277..daa9b3616f55c 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -56,6 +56,7 @@
     DDPStrategy,
     DeepSpeedStrategy,
     HorovodStrategy,
+    IPUStrategy,
     ParallelStrategy,
     SingleDeviceStrategy,
     SingleTPUStrategy,
@@ -418,11 +419,11 @@ def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
 
     def _choose_accelerator(self) -> str:
         """Choose the accelerator type (str) based on availability when ``accelerator='auto'``."""
+        if _IPU_AVAILABLE:
+            return "ipu"
         if self._accelerator_flag == "auto":
             if _TPU_AVAILABLE:
                 return "tpu"
-            if _IPU_AVAILABLE:
-                return "ipu"
             if torch.cuda.is_available() and torch.cuda.device_count() > 0:
                 return "gpu"
         # [RFC] this is current logic, if accelerator not set, default cpu?
@@ -834,11 +835,11 @@ def is_distributed(self) -> bool:
 
     @property
     def has_ipu(self) -> bool:
-        return isinstance(self.accelerator, IPUAccelerator)
+        return isinstance(self.accelerator, IPUAccelerator) and isinstance(self.strategy, IPUStrategy)
 
     @property
     def use_ipu(self) -> bool:
-        return self.has_ipu
+        return isinstance(self.accelerator, IPUAccelerator)
 
     @property
     def has_tpu(self) -> bool:

From 266d3f8f4e9177f82ee67db622d598b855025bbf Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Fri, 11 Feb 2022 11:21:52 -0800
Subject: [PATCH 49/69] add special handling for ipustrategy

---
 .../trainer/connectors/accelerator_connector.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index daa9b3616f55c..1aac1c3297ba6 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -159,6 +159,7 @@ def __init__(
 
         # 2. Instantiate Accelerator
         # handle `auto` and `None`
+        self._special_handle_for_ipu()
         if self._accelerator_flag == "auto" or self._accelerator_flag is None:
             self._accelerator_flag = self._choose_accelerator()
         self._set_parallel_devices_and_init_accelerator()
@@ -417,16 +418,22 @@ def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
             if num_processes:
                 self._accelerator_flag = "cpu"
 
+    def _special_handle_for_ipu(self) -> None:
+        # current logic only apply to object config
+        # TODO this logic should apply to both str and object config
+        if isinstance(self._strategy_flag, IPUStrategy):
+            self._accelerator_flag = "ipu"
+
     def _choose_accelerator(self) -> str:
         """Choose the accelerator type (str) based on availability when ``accelerator='auto'``."""
-        if _IPU_AVAILABLE:
-            return "ipu"
         if self._accelerator_flag == "auto":
             if _TPU_AVAILABLE:
                 return "tpu"
+            if _IPU_AVAILABLE:
+                return "ipu"
             if torch.cuda.is_available() and torch.cuda.device_count() > 0:
                 return "gpu"
-        # [RFC] this is current logic, if accelerator not set, default cpu?
+        # [RFC] this is current logic, if accelerator=None, default cpu?
         return "cpu"
 
     def _set_parallel_devices_and_init_accelerator(self) -> None:
@@ -533,7 +540,8 @@ def _choose_strategy(self) -> None:
     def _check_strategy_and_fallback(self) -> None:
         """Checks edge cases when the strategy selection was a string input, and we need to fall back to a
         different choice depending on other parameters or the environment."""
-        # current logic, fallback only apply to user pass in str config not object config
+        # current fallback and check logic only apply to user pass in str config and object config
+        # TODO this logic should apply to both str and object config
         strategy_flag = "" if isinstance(self._strategy_flag, Strategy) else self._strategy_flag
 
         if strategy_flag == "ddp_cpu":
@@ -714,6 +722,7 @@ def _lazy_init_strategy(self) -> None:
 
         from pytorch_lightning.utilities import _IS_INTERACTIVE
 
+        # TODO move is_compatible logic to strategy API
         interactive_compatible_strategy = ("dp", "ddp_spawn", "ddp_sharded_spawn", "tpu_spawn")
         if _IS_INTERACTIVE and self.strategy.strategy_name not in interactive_compatible_strategy:
             raise MisconfigurationException(

From 0f833f91224c9be011c7451f81f218f467bccbae Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Fri, 11 Feb 2022 11:51:06 -0800
Subject: [PATCH 50/69] Address comments

---
 pytorch_lightning/strategies/bagua.py         | 2 +-
 pytorch_lightning/strategies/ddp.py           | 2 +-
 pytorch_lightning/strategies/ddp2.py          | 2 +-
 pytorch_lightning/strategies/ddp_spawn.py     | 2 +-
 pytorch_lightning/strategies/dp.py            | 2 +-
 pytorch_lightning/strategies/fully_sharded.py | 2 +-
 pytorch_lightning/strategies/horovod.py       | 2 +-
 pytorch_lightning/strategies/ipu.py           | 2 +-
 pytorch_lightning/strategies/sharded.py       | 2 +-
 pytorch_lightning/strategies/sharded_spawn.py | 2 +-
 pytorch_lightning/strategies/single_device.py | 2 +-
 pytorch_lightning/strategies/single_tpu.py    | 2 +-
 pytorch_lightning/strategies/tpu_spawn.py     | 2 +-
 pytorch_lightning/trainer/trainer.py          | 2 +-
 pytorch_lightning/utilities/exceptions.py     | 8 --------
 tests/strategies/test_ddp_strategy.py         | 7 +------
 16 files changed, 15 insertions(+), 28 deletions(-)

diff --git a/pytorch_lightning/strategies/bagua.py b/pytorch_lightning/strategies/bagua.py
index 672ea800661de..17318331b840d 100644
--- a/pytorch_lightning/strategies/bagua.py
+++ b/pytorch_lightning/strategies/bagua.py
@@ -183,7 +183,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
 
     def teardown(self) -> None:
diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py
index ec9f417ff7799..010bbf4baa573 100644
--- a/pytorch_lightning/strategies/ddp.py
+++ b/pytorch_lightning/strategies/ddp.py
@@ -428,7 +428,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
 
     def _should_run_deadlock_detection(self) -> bool:
diff --git a/pytorch_lightning/strategies/ddp2.py b/pytorch_lightning/strategies/ddp2.py
index a6ce8f2f4230e..2023316e0e118 100644
--- a/pytorch_lightning/strategies/ddp2.py
+++ b/pytorch_lightning/strategies/ddp2.py
@@ -80,5 +80,5 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
diff --git a/pytorch_lightning/strategies/ddp_spawn.py b/pytorch_lightning/strategies/ddp_spawn.py
index 3c32d9e1872f5..a2415c72d5c7c 100644
--- a/pytorch_lightning/strategies/ddp_spawn.py
+++ b/pytorch_lightning/strategies/ddp_spawn.py
@@ -367,7 +367,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
 
     def teardown(self) -> None:
diff --git a/pytorch_lightning/strategies/dp.py b/pytorch_lightning/strategies/dp.py
index 2aa25f8275dea..484f7b474b02f 100644
--- a/pytorch_lightning/strategies/dp.py
+++ b/pytorch_lightning/strategies/dp.py
@@ -153,7 +153,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
 
     def teardown(self) -> None:
diff --git a/pytorch_lightning/strategies/fully_sharded.py b/pytorch_lightning/strategies/fully_sharded.py
index 9d0999902a071..af2d6d74bfdd2 100644
--- a/pytorch_lightning/strategies/fully_sharded.py
+++ b/pytorch_lightning/strategies/fully_sharded.py
@@ -216,5 +216,5 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
diff --git a/pytorch_lightning/strategies/horovod.py b/pytorch_lightning/strategies/horovod.py
index 49848e0f0163e..f4a733909651e 100644
--- a/pytorch_lightning/strategies/horovod.py
+++ b/pytorch_lightning/strategies/horovod.py
@@ -200,7 +200,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
 
     def teardown(self) -> None:
diff --git a/pytorch_lightning/strategies/ipu.py b/pytorch_lightning/strategies/ipu.py
index 7252e2bf3f583..6f6f4dd92a1f9 100644
--- a/pytorch_lightning/strategies/ipu.py
+++ b/pytorch_lightning/strategies/ipu.py
@@ -368,5 +368,5 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
diff --git a/pytorch_lightning/strategies/sharded.py b/pytorch_lightning/strategies/sharded.py
index b39bae1f02369..6811721ecaab7 100644
--- a/pytorch_lightning/strategies/sharded.py
+++ b/pytorch_lightning/strategies/sharded.py
@@ -138,5 +138,5 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
diff --git a/pytorch_lightning/strategies/sharded_spawn.py b/pytorch_lightning/strategies/sharded_spawn.py
index d4a2629f0862b..8cb6ca8b62028 100644
--- a/pytorch_lightning/strategies/sharded_spawn.py
+++ b/pytorch_lightning/strategies/sharded_spawn.py
@@ -120,5 +120,5 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
diff --git a/pytorch_lightning/strategies/single_device.py b/pytorch_lightning/strategies/single_device.py
index bc17dd08634fd..da80bad32ad13 100644
--- a/pytorch_lightning/strategies/single_device.py
+++ b/pytorch_lightning/strategies/single_device.py
@@ -86,7 +86,7 @@ def register_strategies(cls, strategy_registry: dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
 
     def teardown(self) -> None:
diff --git a/pytorch_lightning/strategies/single_tpu.py b/pytorch_lightning/strategies/single_tpu.py
index 66f90c2cd15f1..757b335e5ae2c 100644
--- a/pytorch_lightning/strategies/single_tpu.py
+++ b/pytorch_lightning/strategies/single_tpu.py
@@ -78,7 +78,7 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
 
     def teardown(self) -> None:
diff --git a/pytorch_lightning/strategies/tpu_spawn.py b/pytorch_lightning/strategies/tpu_spawn.py
index 71db3a64ec466..867624fd2151e 100644
--- a/pytorch_lightning/strategies/tpu_spawn.py
+++ b/pytorch_lightning/strategies/tpu_spawn.py
@@ -352,5 +352,5 @@ def register_strategies(cls, strategy_registry: Dict) -> None:
         strategy_registry.register(
             cls.strategy_name,
             cls,
-            description=f"{cls.__class__.__name__} Strategy",
+            description=f"{cls.__class__.__name__}",
         )
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 53ea205115de1..2a4c01061c922 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -138,7 +138,7 @@ def __init__(
         gradient_clip_algorithm: Optional[str] = None,
         process_position: int = 0,
         num_nodes: int = 1,
-        num_processes: int = None,
+        num_processes: Optional[int] = None,
         devices: Optional[Union[List[int], str, int]] = None,
         gpus: Optional[Union[List[int], str, int]] = None,
         auto_select_gpus: bool = False,
diff --git a/pytorch_lightning/utilities/exceptions.py b/pytorch_lightning/utilities/exceptions.py
index 548e0cb655945..ece4629819b33 100644
--- a/pytorch_lightning/utilities/exceptions.py
+++ b/pytorch_lightning/utilities/exceptions.py
@@ -17,14 +17,6 @@ class MisconfigurationException(Exception):
     """Exception used to inform users of misuse with PyTorch Lightning."""
 
 
-class DeviceNotAvailableException(Exception):
-    """Exception used to inform users that requested devices are not availible."""
-
-
-class ImpactableConfigurationException(Exception):
-    """Exception used to inform users that configuration impactable with each other."""
-
-
 class DeadlockDetectedException(Exception):
     """Exception used when a deadlock has been detected and processes are being killed."""
 
diff --git a/tests/strategies/test_ddp_strategy.py b/tests/strategies/test_ddp_strategy.py
index dddeaed26d98f..157908309f0e6 100644
--- a/tests/strategies/test_ddp_strategy.py
+++ b/tests/strategies/test_ddp_strategy.py
@@ -97,7 +97,7 @@ def creates_processes_externally(self):
 
 
 @RunIf(skip_windows=True)
-def test_ddp_configure_ddp_fitting():
+def test_ddp_configure_ddp():
     """Tests with ddp strategy."""
     model = BoringModel()
     ddp_strategy = DDPStrategy()
@@ -115,11 +115,6 @@ def test_ddp_configure_ddp_fitting():
     # in DDPStrategy configure_ddp(), model wrapped by DistributedDataParallel
     assert isinstance(trainer.model, DistributedDataParallel)
 
-
-@RunIf(skip_windows=True)
-def test_ddp_configure_ddp_validating():
-    model = BoringModel()
-    ddp_strategy = DDPStrategy()
     trainer = Trainer(
         max_epochs=1,
         strategy=ddp_strategy,

From 6b434e2dc9c06fb4020c5584c9eb9c742c89c3ef Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Mon, 14 Feb 2022 14:48:49 -0800
Subject: [PATCH 51/69] address comments and add kaushik's suggestions

---
 pytorch_lightning/strategies/tpu_spawn.py     |   2 +-
 .../connectors/accelerator_connector.py       | 196 +++++++++---------
 pytorch_lightning/trainer/trainer.py          |   2 +-
 3 files changed, 99 insertions(+), 101 deletions(-)

diff --git a/pytorch_lightning/strategies/tpu_spawn.py b/pytorch_lightning/strategies/tpu_spawn.py
index 867624fd2151e..b43267b5c91d6 100644
--- a/pytorch_lightning/strategies/tpu_spawn.py
+++ b/pytorch_lightning/strategies/tpu_spawn.py
@@ -52,7 +52,7 @@
 class TPUSpawnStrategy(DDPSpawnStrategy):
     """Strategy for training multiple TPU devices using the :func:`torch.multiprocessing.spawn` method."""
 
-    strategy_name = "tpu_spawn"
+    strategy_name = "tpu_spawn_strategy"
 
     def __init__(
         self,
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 1aac1c3297ba6..500274fd7a4d8 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -72,7 +72,6 @@
     rank_zero_info,
     rank_zero_warn,
 )
-from pytorch_lightning.utilities.enums import PrecisionType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _HOROVOD_AVAILABLE, _IPU_AVAILABLE, _TPU_AVAILABLE
 
@@ -96,15 +95,15 @@ def __init__(
         sync_batchnorm: bool = False,
         benchmark: bool = False,
         replace_sampler_ddp: bool = True,
-        deterministic: bool = False,  # TODO: why is it unused?
+        deterministic: bool = False,
         num_processes: Optional[int] = None,  # deprecated
         tpu_cores: Optional[Union[List[int], int]] = None,  # deprecated
         ipus: Optional[int] = None,  # deprecated
         gpus: Optional[Union[List[int], str, int]] = None,  # deprecated
-        gpu_ids: Optional[List[int]] = None,  # TODO: why is it unused?
+        gpu_ids: Optional[List[int]] = None,  # TODO can be removed
     ) -> None:
         """The AcceleratorConnector parses several Trainer arguments and instantiates the Strategy including other
-        components such as the Accelerator and Precision plugin.
+        components such as the Accelerator and Precision plugins.
 
             A. accelerator flag could be:
                 1. strategy class (deprecated in 1.5 will be removed in 1.7)
@@ -115,7 +114,7 @@ def __init__(
 
             B. strategy flag could be :
                 1. strategy class
-                2. strategy str registered with strategyRegister
+                2. strategy str registered with StrategyRegistry
                 3. strategy str in _strategy_type enum which listed in each strategy as
                    backend (registed these too, and _strategy_type could be deprecated)
 
@@ -146,7 +145,7 @@ def __init__(
         # Get registered strategies, built-in accelerators and precision plugins
         self._existing_strategies_str = StrategyRegistry.available_strategies()
         self._existing_accelerator_type = ("tpu", "ipu", "gpu", "cpu")
-        self._supported_precision = PrecisionType.supported_types()
+        self._supported_precision_type = ("16", "32", "64", "bf16", "mixed")
 
         # Raise an exception if there are conflicts between flags
         # Set each valid flag to `self._x_flag` after validation
@@ -165,11 +164,11 @@ def __init__(
         self._set_parallel_devices_and_init_accelerator()
 
         # 3. Instantiate ClusterEnvironment
-        self._choose_and_init_cluster_environment()
+        self.cluster_environment = self._choose_and_init_cluster_environment()
 
         # 4. Instantiate Strategy - Part 1
         if self._strategy_flag is None:
-            self._choose_strategy()
+            self._strategy_flag = self._choose_strategy()
         # In specific cases, ignore user selection and fall back to a different strategy
         self._check_strategy_and_fallback()
         self._init_strategy()
@@ -193,7 +192,7 @@ def _check_config_and_set_final_flags(
 
         1. strategy: strategy, accelerator and plugin can all be set to strategies
         2. accelerator: if the value of the accelerator argument is a type of accelerator (instance or string),
-            set self._acceelrator_flag accordingly. If the value is strategy related (instance or string),
+            set self.accelerator_flag accordingly. If the value is strategy related (instance or string),
             it gets handled by 1.
         3. precision: The final value of the precision flag may be determined either by the precision argument or
             by a plugin instance.
@@ -212,7 +211,7 @@ def _check_config_and_set_final_flags(
         if plugins is not None:
             plugins = [plugins] if not isinstance(plugins, list) else plugins
 
-        if strategy:
+        if strategy is not None:
             self._strategy_flag = strategy
             if strategy == "ddp_cpu":
                 raise MisconfigurationException(
@@ -238,7 +237,6 @@ def _check_config_and_set_final_flags(
                 raise MisconfigurationException(
                     "strategy str already set through strategy flag, but have also passed in through accelerator"
                 )
-
             if plugins:
                 for plugin in plugins:
                     if isinstance(plugin, Strategy):
@@ -268,11 +266,11 @@ def _check_config_and_set_final_flags(
             elif accelerator == "ddp_cpu" and not self._strategy_flag:
                 self._strategy_flag = accelerator
 
-        if precision:
-            if not PrecisionType.supported_type(precision):
+        if precision is not None:
+            if str(precision) not in self._supported_precision_type:
                 raise MisconfigurationException(
                     f"Precision {repr(precision)} is invalid. "
-                    f"Allowed precision values: {PrecisionType.supported_types()}"
+                    f"Allowed precision values: {self._supported_precision_type}"
                 )
             self._precision_flag = precision
 
@@ -287,7 +285,7 @@ def _check_config_and_set_final_flags(
 
                 elif isinstance(plugin, PrecisionPlugin):
                     self._precision_plugin_flag = plugin
-                elif isinstance(plugin, str) and plugin in self._supported_precision:
+                elif isinstance(plugin, str) and plugin in self._supported_precision_type:
                     self._precision_flag = plugin
                 elif isinstance(plugin, CheckpointIO):
                     self.checkpoint_io = plugin
@@ -339,7 +337,7 @@ def _check_config_and_set_final_flags(
                     if self._strategy_flag.parallel_devices[0].type == "cuda":
                         self._accelerator_flag = "gpu"
 
-        amp_type = amp_type.lower() if isinstance(amp_type, str) else None
+        amp_type = amp_type if isinstance(amp_type, str) else None
         self._amp_type_flag = AMPType.from_str(amp_type)
 
         if amp_level is not None and self._amp_type_flag != AMPType.APEX:
@@ -366,16 +364,10 @@ def _check_device_config_and_set_final_flags(
 
         self._device_flag = devices
 
-        # TODO: Delete this parsing section when num_processes, gpus, ipus and tpu_cores get removed
-        self._gpus = gpus
-        self._tpu_cores = tpu_cores
-        gpus = device_parser.parse_gpu_ids(gpus)
-        tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
-        deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
-        if deprecated_devices_specific_flag and deprecated_devices_specific_flag not in (0, "0"):
-            self._map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
-                devices, deprecated_devices_specific_flag, num_processes, gpus, ipus, tpu_cores
-            )
+        # TODO: Delete this method num_processes, gpus, ipus and tpu_cores get removed
+        self._map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
+            devices, num_processes, gpus, ipus, tpu_cores
+        )
 
         if self._device_flag == "auto" and self._accelerator_flag is None:
             raise MisconfigurationException(
@@ -386,37 +378,42 @@ def _check_device_config_and_set_final_flags(
     def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
         self,
         devices: Optional[Union[List[int], str, int]],
-        deprecated_devices_specific_flag: Union[int, List[int]],
         num_processes: Optional[int],
         gpus: Optional[List[int]],
         ipus: Optional[int],
         tpu_cores: Optional[Union[int, List[int]]],
     ) -> None:
-        """Sets the `device_flag` based on num_processes, gpus, ipus, tpu_cores."""
-        if devices:
-            # TODO: @awaelchli improve error message
-            rank_zero_warn(
-                f"The flag `devices={devices}` will be ignored, "
-                f"instead the device specific number {deprecated_devices_specific_flag} will be used"
-            )
+        """Sets the `device_flag` and `accelerator_flag `based on num_processes, gpus, ipus, tpu_cores."""
+        self._gpus = gpus
+        self._tpu_cores = tpu_cores
+        gpus = device_parser.parse_gpu_ids(gpus)
+        tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
+        deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
+        if deprecated_devices_specific_flag and deprecated_devices_specific_flag not in (0, "0"):
+            if devices:
+                # TODO: @awaelchli improve error message
+                rank_zero_warn(
+                    f"The flag `devices={devices}` will be ignored, "
+                    f"instead the device specific number {deprecated_devices_specific_flag} will be used"
+                )
 
-        if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(
-            True
-        ) > 1:
-            # TODO: @awaelchli improve error message
-            rank_zero_warn("more than one device specific flag has been set")
-        self._device_flag = deprecated_devices_specific_flag
-
-        if self._accelerator_flag is None:
-            # set accelerator type based on num_processes, gpus, ipus, tpu_cores
-            if ipus:
-                self._accelerator_flag = "ipu"
-            if tpu_cores:
-                self._accelerator_flag = "tpu"
-            if gpus:
-                self._accelerator_flag = "gpu"
-            if num_processes:
-                self._accelerator_flag = "cpu"
+            if [(num_processes is not None), (gpus is not None), (ipus is not None), (tpu_cores is not None)].count(
+                True
+            ) > 1:
+                # TODO: @awaelchli improve error message
+                rank_zero_warn("more than one device specific flag has been set")
+            self._device_flag = deprecated_devices_specific_flag
+
+            if self._accelerator_flag is None:
+                # set accelerator type based on num_processes, gpus, ipus, tpu_cores
+                if ipus:
+                    self._accelerator_flag = "ipu"
+                if tpu_cores:
+                    self._accelerator_flag = "tpu"
+                if gpus:
+                    self._accelerator_flag = "gpu"
+                if num_processes:
+                    self._accelerator_flag = "cpu"
 
     def _special_handle_for_ipu(self) -> None:
         # current logic only apply to object config
@@ -486,16 +483,15 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
         self._tpu_cores = self._device_flag if not self._tpu_cores else self._tpu_cores
 
     def _choose_and_init_cluster_environment(self) -> None:
-        self.cluster_environment: ClusterEnvironment = LightningEnvironment()
         if isinstance(self._cluster_environment_flag, ClusterEnvironment):
-            self.cluster_environment = self._cluster_environment_flag
-        elif self._is_slurm_managing_tasks():
+            return self._cluster_environment_flag
+        if self._is_slurm_managing_tasks():
             rank_zero_info("Multiprocessing is handled by SLURM.")
-            self.cluster_environment = SLURMEnvironment()
-        else:
-            for env_type in (BaguaEnvironment, TorchElasticEnvironment, KubeflowEnvironment, LSFEnvironment):
-                if env_type.detect():
-                    self.cluster_environment = env_type()
+            return SLURMEnvironment()
+        for env_type in (BaguaEnvironment, TorchElasticEnvironment, KubeflowEnvironment, LSFEnvironment):
+            if env_type.detect():
+                return env_type()
+        return LightningEnvironment()
 
     @property
     def _is_sharded_training_type(self) -> bool:
@@ -510,32 +506,31 @@ def _is_slurm_managing_tasks(self):
         num_slurm_tasks = int(os.environ["SLURM_NTASKS"], 0)
         return num_slurm_tasks == total_requested_devices
 
-    def _choose_strategy(self) -> None:
+    def _choose_strategy(self) -> str:
         if self._accelerator_flag == "ipu":
-            self._strategy_flag = "ipu_strategy"
-        elif self._accelerator_flag == "tpu":
+            return IPUStrategy.strategy_name
+        if self._accelerator_flag == "tpu":
             if self._parallel_devices and len(self._parallel_devices) > 1:
-                self._strategy_flag = "tpu_spawn"
+                return TPUSpawnStrategy.strategy_name
             else:
                 # TODO: lazy initialized device, then here could be self._strategy_flag = "single_tpu_device"
-                self._strategy_flag = SingleTPUStrategy(device=self._parallel_devices[0])
-        elif _HOROVOD_AVAILABLE and ("OMPI_COMM_WORLD_RANK" in os.environ or "HOROVOD_RANK" in os.environ):
-            self._strategy_flag = "horovod"
-        else:
-            if self._num_nodes_flag > 1:
-                self._strategy_flag = "ddp"
-            elif len(self._parallel_devices) <= 1:
-                device = (
-                    device_parser.determine_root_gpu_device(self._parallel_devices)
-                    if self._accelerator_flag == "gpu"
-                    else "cpu"
-                )
-                # TODO: lazy initialized device, then here could be self._strategy_flag = "single_device"
-                self._strategy_flag = SingleDeviceStrategy(device=device)
-            elif len(self._parallel_devices) > 1:
-                self._strategy_flag = "ddp_spawn"
-            else:
-                self._strategy_flag = "ddp"
+                return SingleTPUStrategy(device=self._parallel_devices[0])
+        if _HOROVOD_AVAILABLE and ("OMPI_COMM_WORLD_RANK" in os.environ or "HOROVOD_RANK" in os.environ):
+            return HorovodStrategy.strategy_name
+        if self._num_nodes_flag > 1:
+            return DDPStrategy.strategy_name
+        if len(self._parallel_devices) <= 1:
+            device = (
+                device_parser.determine_root_gpu_device(self._parallel_devices)
+                if self._accelerator_flag == "gpu"
+                else "cpu"
+            )
+            # TODO: lazy initialized device, then here could be self._strategy_flag = "single_device"
+            return SingleDeviceStrategy(device=device)
+        if len(self._parallel_devices) > 1:
+            return DDPSpawnStrategy.strategy_name
+
+        return DDPStrategy.strategy_name
 
     def _check_strategy_and_fallback(self) -> None:
         """Checks edge cases when the strategy selection was a string input, and we need to fall back to a
@@ -551,7 +546,7 @@ def _check_strategy_and_fallback(self) -> None:
                     "Learn more: https://github.com/PyTorchLightning/pytorch-lightning/issues/7810"
                 )
             if self._device_flag == 1 and self._num_nodes_flag > 1:
-                strategy_flag = "ddp"
+                strategy_flag = DDPStrategy.strategy_name
             else:
                 strategy_flag = "ddp_spawn"
             if self._accelerator_flag == "gpu":
@@ -651,7 +646,6 @@ def _check_and_init_precision(self) -> PrecisionPlugin:
                 return NativeMixedPrecisionPlugin(self._precision_flag, device)
 
             if self._amp_type_flag == AMPType.APEX:
-                self._amp_level_flag = self._amp_level_flag or "O2"
                 return ApexMixedPrecisionPlugin(self._amp_level_flag)
 
         raise RuntimeError("No precision set")
@@ -664,21 +658,20 @@ def _validate_precision_choice(self) -> None:
                 raise MisconfigurationException(
                     f"`Trainer(accelerator='ipu', precision={self._precision_flag!r})` is not supported."
                 )
-        if isinstance(self.accelerator, TPUAccelerator) and self._precision_flag == 64:
-            raise MisconfigurationException(
-                "`Trainer(accelerator='tpu', precision=64)` is not implemented."
-                " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`"
-                " requesting this feature."
-            )
-        if (
-            isinstance(self.accelerator, TPUAccelerator)
-            and self._precision_plugin_flag
-            and not isinstance(self._precision_plugin_flag, (TPUPrecisionPlugin, TPUBf16PrecisionPlugin))
-        ):
-            raise ValueError(
-                f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`,"
-                f" found: {self._precision_plugin_flag}."
-            )
+        if isinstance(self.accelerator, TPUAccelerator):
+            if self._precision_flag == 64:
+                raise MisconfigurationException(
+                    "`Trainer(accelerator='tpu', precision=64)` is not implemented."
+                    " Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`"
+                    " requesting this feature."
+                )
+            if self._precision_plugin_flag and not isinstance(
+                self._precision_plugin_flag, (TPUPrecisionPlugin, TPUBf16PrecisionPlugin)
+            ):
+                raise ValueError(
+                    f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`,"
+                    f" found: {self._precision_plugin_flag}."
+                )
         if (
             self._precision_flag == 16
             and isinstance(self.accelerator, CPUAccelerator)
@@ -723,7 +716,12 @@ def _lazy_init_strategy(self) -> None:
         from pytorch_lightning.utilities import _IS_INTERACTIVE
 
         # TODO move is_compatible logic to strategy API
-        interactive_compatible_strategy = ("dp", "ddp_spawn", "ddp_sharded_spawn", "tpu_spawn")
+        interactive_compatible_strategy = (
+            DataParallelStrategy.strategy_name,
+            DDPSpawnStrategy.strategy_name,
+            DDPSpawnShardedStrategy.strategy_name,
+            TPUSpawnStrategy.strategy_name,
+        )
         if _IS_INTERACTIVE and self.strategy.strategy_name not in interactive_compatible_strategy:
             raise MisconfigurationException(
                 f"`Trainer(strategy={self.strategy.strategy_name!r})` or"
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 2a4c01061c922..b4fec24b0f101 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1969,7 +1969,7 @@ def should_rank_save_checkpoint(self) -> bool:
         )
 
     @property
-    def _strategy_type(self) -> _StrategyType:
+    def _strategy_type(self) -> Optional[int]:
         return self.strategy.strategy_name
 
     @property

From 3560c555c6e2580316a222d5248d03df5aaeb131 Mon Sep 17 00:00:00 2001
From: four4fish <88516121+four4fish@users.noreply.github.com>
Date: Mon, 14 Feb 2022 14:52:35 -0800
Subject: [PATCH 52/69] Apply suggestions from code review

Co-authored-by: ananthsub <ananth.subramaniam@gmail.com>
---
 pytorch_lightning/callbacks/gpu_stats_monitor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/callbacks/gpu_stats_monitor.py b/pytorch_lightning/callbacks/gpu_stats_monitor.py
index 68d2ef3ba69eb..f5348b779a803 100644
--- a/pytorch_lightning/callbacks/gpu_stats_monitor.py
+++ b/pytorch_lightning/callbacks/gpu_stats_monitor.py
@@ -127,7 +127,7 @@ def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: O
         if not trainer.logger:
             raise MisconfigurationException("Cannot use GPUStatsMonitor callback with Trainer that has no logger.")
 
-        if trainer._device_type != _AcceleratorType.GPU.lower():
+        if trainer.strategy.root_device.type != "cuda":
             raise MisconfigurationException(
                 "You are using GPUStatsMonitor but are not running on GPU"
                 f" since gpus attribute in Trainer is set to {trainer.gpus}."

From 55547bcbfb85b70658473b3cf68c0cf65eaad051 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Tue, 15 Feb 2022 18:34:23 -0800
Subject: [PATCH 53/69] fix mypy

---
 .../callbacks/gpu_stats_monitor.py            |  1 -
 .../connectors/accelerator_connector.py       | 81 ++++++++++---------
 pytorch_lightning/utilities/device_parser.py  |  2 +-
 3 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/pytorch_lightning/callbacks/gpu_stats_monitor.py b/pytorch_lightning/callbacks/gpu_stats_monitor.py
index f5348b779a803..a871bfa309c96 100644
--- a/pytorch_lightning/callbacks/gpu_stats_monitor.py
+++ b/pytorch_lightning/callbacks/gpu_stats_monitor.py
@@ -29,7 +29,6 @@
 
 import pytorch_lightning as pl
 from pytorch_lightning.callbacks.base import Callback
-from pytorch_lightning.utilities import _AcceleratorType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.parsing import AttributeDict
 from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_only
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 500274fd7a4d8..dbfd885235e42 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -68,6 +68,7 @@
     _StrategyType,
     AMPType,
     device_parser,
+    LightningEnum,
     rank_zero_deprecation,
     rank_zero_info,
     rank_zero_warn,
@@ -134,7 +135,7 @@ def __init__(
         priorities which to take when:
             A. Class > str
             B. Strategy > Accelerator/precision/plugins
-            C. When multiple flag set to the same thing? (ignore? not handled for now)
+            C. TODO When multiple flag set to the same thing
         """
         # TODO: move to gpu accelerator
         torch.backends.cudnn.benchmark = benchmark
@@ -151,7 +152,23 @@ def __init__(
         # Set each valid flag to `self._x_flag` after validation
         # Example: If accelerator is set to a strategy type, set `self._strategy_flag = accelerator`.
         # For devices: Assign gpus, ipus, etc. to the accelerator flag and devices flag
-        self._check_config_and_set_final_flags(strategy, accelerator, precision, plugins, amp_type, amp_level)
+        self._strategy_flag: Optional[Union[Strategy, str]] = None
+        self._accelerator_flag: Optional[Union[Accelerator, str]] = None
+        self._precision_flag: Optional[Union[int, str]] = None
+        self._precision_plugin_flag: Optional[PrecisionPlugin] = None
+        self._cluster_environment_flag: Optional[Union[ClusterEnvironment, str]] = None
+        self.checkpoint_io: Optional[CheckpointIO] = None
+        self._amp_type_flag: Optional[LightningEnum] = None
+        self._amp_level_flag: Optional[str] = amp_level
+
+        self._check_config_and_set_final_flags(
+            strategy=strategy,
+            accelerator=accelerator,
+            precision=precision,
+            plugins=plugins,
+            amp_type=amp_type,
+            amp_level=amp_level,
+        )
         self._check_device_config_and_set_final_flags(
             devices=devices, num_nodes=num_nodes, num_processes=num_processes, gpus=gpus, ipus=ipus, tpu_cores=tpu_cores
         )
@@ -199,15 +216,6 @@ def _check_config_and_set_final_flags(
         4. plugins: a plugin could occur as a value of the strategy argument (handled by 1), or the precision
             argument (handled by 3). We also extract the CheckpointIO and ClusterEnvironment plugins.
         """
-        self._strategy_flag = None
-        self._accelerator_flag = None
-        self._precision_flag = None
-        self._precision_plugin_flag = None
-        self._cluster_environment_flag = None
-        self.checkpoint_io = None
-        self._amp_level_flag = amp_level
-        self._amp_type_flag = amp_type
-
         if plugins is not None:
             plugins = [plugins] if not isinstance(plugins, list) else plugins
 
@@ -344,7 +352,6 @@ def _check_config_and_set_final_flags(
             raise MisconfigurationException(
                 f"You have asked for `amp_level={amp_level!r}` but it's only supported with `amp_backend='apex'`."
             )
-        self._amp_level_flag = amp_level
 
     def _check_device_config_and_set_final_flags(
         self,
@@ -355,10 +362,8 @@ def _check_device_config_and_set_final_flags(
         ipus: Optional[int],
         tpu_cores: Optional[Union[List[int], int]],
     ) -> None:
-        if num_nodes == "auto":
-            self._num_nodes_flag = 1
-        else:
-            self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
+        self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
+
         if devices in (0, "0", "0,"):
             raise MisconfigurationException(f"You passed `devices={devices}`, please set a number > 0")
 
@@ -379,13 +384,13 @@ def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
         self,
         devices: Optional[Union[List[int], str, int]],
         num_processes: Optional[int],
-        gpus: Optional[List[int]],
+        gpus: Optional[Union[List[int], str, int]],
         ipus: Optional[int],
-        tpu_cores: Optional[Union[int, List[int]]],
+        tpu_cores: Optional[Union[List[int], str, int]],
     ) -> None:
         """Sets the `device_flag` and `accelerator_flag `based on num_processes, gpus, ipus, tpu_cores."""
-        self._gpus = gpus
-        self._tpu_cores = tpu_cores
+        self._gpus: Optional[Union[List[int], str, int]] = gpus
+        self._tpu_cores: Optional[Union[List[int], str, int]] = tpu_cores
         gpus = device_parser.parse_gpu_ids(gpus)
         tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
         deprecated_devices_specific_flag = num_processes or gpus or ipus or tpu_cores
@@ -434,7 +439,7 @@ def _choose_accelerator(self) -> str:
         return "cpu"
 
     def _set_parallel_devices_and_init_accelerator(self) -> None:
-        self._parallel_devices = []
+        self._parallel_devices: List[Union[int, torch.device]] = []
         if isinstance(self._accelerator_flag, Accelerator):
             self.accelerator: Accelerator = self._accelerator_flag
         elif self._accelerator_flag == "tpu":
@@ -444,7 +449,7 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
             if isinstance(self._device_flag, int):
                 self._parallel_devices = list(range(self._device_flag))
             else:
-                self._parallel_devices = self._device_flag
+                self._parallel_devices = self._device_flag  # type: ignore[assignment]
 
         elif self._accelerator_flag == "ipu":
             self.accelerator = IPUAccelerator()
@@ -460,7 +465,7 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
             if isinstance(self._device_flag, int) or isinstance(self._device_flag, str):
                 self._device_flag = int(self._device_flag)
                 self._parallel_devices = (
-                    [torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)]
+                    [torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)]  # type: ignore
                     if self._device_flag != 0
                     else []
                 )
@@ -482,7 +487,7 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
         self._gpus = self._device_flag if not self._gpus else self._gpus
         self._tpu_cores = self._device_flag if not self._tpu_cores else self._tpu_cores
 
-    def _choose_and_init_cluster_environment(self) -> None:
+    def _choose_and_init_cluster_environment(self) -> ClusterEnvironment:
         if isinstance(self._cluster_environment_flag, ClusterEnvironment):
             return self._cluster_environment_flag
         if self._is_slurm_managing_tasks():
@@ -497,7 +502,7 @@ def _choose_and_init_cluster_environment(self) -> None:
     def _is_sharded_training_type(self) -> bool:
         return isinstance(self._strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy))
 
-    def _is_slurm_managing_tasks(self):
+    def _is_slurm_managing_tasks(self) -> bool:
         """used by choosing cluster enviroment."""
         if not SLURMEnvironment.detect() or SLURMEnvironment.job_name() == "bash":
             return False
@@ -506,7 +511,7 @@ def _is_slurm_managing_tasks(self):
         num_slurm_tasks = int(os.environ["SLURM_NTASKS"], 0)
         return num_slurm_tasks == total_requested_devices
 
-    def _choose_strategy(self) -> str:
+    def _choose_strategy(self) -> Union[Strategy, str]:
         if self._accelerator_flag == "ipu":
             return IPUStrategy.strategy_name
         if self._accelerator_flag == "tpu":
@@ -514,7 +519,7 @@ def _choose_strategy(self) -> str:
                 return TPUSpawnStrategy.strategy_name
             else:
                 # TODO: lazy initialized device, then here could be self._strategy_flag = "single_tpu_device"
-                return SingleTPUStrategy(device=self._parallel_devices[0])
+                return SingleTPUStrategy(device=self._parallel_devices[0])  # type: ignore
         if _HOROVOD_AVAILABLE and ("OMPI_COMM_WORLD_RANK" in os.environ or "HOROVOD_RANK" in os.environ):
             return HorovodStrategy.strategy_name
         if self._num_nodes_flag > 1:
@@ -526,7 +531,7 @@ def _choose_strategy(self) -> str:
                 else "cpu"
             )
             # TODO: lazy initialized device, then here could be self._strategy_flag = "single_device"
-            return SingleDeviceStrategy(device=device)
+            return SingleDeviceStrategy(device=device)  # type: ignore
         if len(self._parallel_devices) > 1:
             return DDPSpawnStrategy.strategy_name
 
@@ -553,6 +558,8 @@ def _check_strategy_and_fallback(self) -> None:
                 rank_zero_warn(
                     "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
                 )
+                self._accelerator_flag = "cpu"
+                self.accelerator = CPUAccelerator()
         if strategy_flag in ("ddp_spawn", "ddp_spawn_find_unused_parameters_false") and (
             TorchElasticEnvironment.detect() or KubeflowEnvironment.detect() or self._is_slurm_managing_tasks()
         ):
@@ -603,19 +610,21 @@ def _check_and_init_precision(self) -> PrecisionPlugin:
             return self._precision_plugin_flag
 
         if isinstance(self.accelerator, IPUAccelerator):
-            return IPUPrecisionPlugin(self._precision_flag)
+            return IPUPrecisionPlugin(self._precision_flag)  # type: ignore
         if isinstance(self.accelerator, TPUAccelerator):
             if self._precision_flag == 32:
                 return TPUPrecisionPlugin()
             elif self._precision_flag in (16, "bf16"):
                 if self._precision_flag == 16:
                     rank_zero_warn(
-                        f"You passed `Trainer(accelerator='tpu', precision=16)` but {self._amp_type_flag.value} AMP"
-                        f" is not supported with TPUs. Using `precision='bf16'` instead."
+                        "You passed `Trainer(accelerator='tpu', precision=16)` but AMP"
+                        " is not supported with TPUs. Using `precision='bf16'` instead."
                     )
                 return TPUBf16PrecisionPlugin()
         if isinstance(self.strategy, DeepSpeedStrategy):
-            return DeepSpeedPrecisionPlugin(self._precision_flag, self._amp_type_flag, self._amp_level_flag)
+            return DeepSpeedPrecisionPlugin(
+                self._precision_flag, self._amp_type_flag, self._amp_level_flag
+            )  # type: ignore
 
         if self._precision_flag == 32:
             return PrecisionPlugin()
@@ -631,7 +640,7 @@ def _check_and_init_precision(self) -> PrecisionPlugin:
 
         if self._precision_flag in (16, "bf16"):
             rank_zero_info(
-                f"Using 16bit {self._amp_type_flag.value} Automatic Mixed Precision (AMP)"
+                f"Using 16bit {self._amp_type_flag.value} Automatic Mixed Precision (AMP)"  # type: ignore
                 if self._precision_flag == 16
                 else "Using bfloat16 Automatic Mixed Precision (AMP)"
             )
@@ -646,7 +655,7 @@ def _check_and_init_precision(self) -> PrecisionPlugin:
                 return NativeMixedPrecisionPlugin(self._precision_flag, device)
 
             if self._amp_type_flag == AMPType.APEX:
-                return ApexMixedPrecisionPlugin(self._amp_level_flag)
+                return ApexMixedPrecisionPlugin(self._amp_level_flag)  # type: ignore
 
         raise RuntimeError("No precision set")
 
@@ -683,7 +692,7 @@ def _validate_precision_choice(self) -> None:
             )
         if self._precision_flag == "bf16" and self._amp_type_flag != AMPType.NATIVE:
             raise MisconfigurationException(
-                f"You passed `Trainer(amp_type={self._amp_type_flag.value!r}, precision='bf16')` but "
+                f"You passed `Trainer(amp_type={self._amp_type_flag.value!r}, precision='bf16')` but "  # type: ignore
                 "it's not supported. Try using `amp_type='native'` instead."
             )
         if self._precision_flag in (16, "bf16") and self._amp_type_flag == AMPType.APEX:
@@ -788,7 +797,7 @@ def devices(self) -> int:
     @property
     def tpu_cores(self) -> Optional[Union[List[int], int]]:
         if isinstance(self.accelerator, TPUAccelerator):
-            return self._tpu_cores
+            return self._tpu_cores  # type: ignore
         return 0
 
     @property
diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index 6fa9ace7f20ec..1e51c5479bdc7 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -21,7 +21,7 @@
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 
-def determine_root_gpu_device(gpus: List[int]) -> Optional[int]:
+def determine_root_gpu_device(gpus: List[Union[int, torch.device]]) -> Optional[Union[int, torch.device]]:
     """
     Args:
         gpus: non-empty list of ints representing which gpus to use

From cc684f14fcb118b291dfc6ccf0b1bce70e050545 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Wed, 16 Feb 2022 09:54:19 -0800
Subject: [PATCH 54/69] address comments and fix mypy

---
 .../trainer/connectors/accelerator_connector.py        | 10 +++++-----
 pytorch_lightning/trainer/trainer.py                   |  6 ++++--
 tests/models/test_gpu.py                               |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index dbfd885235e42..4a0793abfc068 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -181,7 +181,7 @@ def __init__(
         self._set_parallel_devices_and_init_accelerator()
 
         # 3. Instantiate ClusterEnvironment
-        self.cluster_environment = self._choose_and_init_cluster_environment()
+        self.cluster_environment: ClusterEnvironment = self._choose_and_init_cluster_environment()
 
         # 4. Instantiate Strategy - Part 1
         if self._strategy_flag is None:
@@ -623,8 +623,8 @@ def _check_and_init_precision(self) -> PrecisionPlugin:
                 return TPUBf16PrecisionPlugin()
         if isinstance(self.strategy, DeepSpeedStrategy):
             return DeepSpeedPrecisionPlugin(
-                self._precision_flag, self._amp_type_flag, self._amp_level_flag
-            )  # type: ignore
+                self._precision_flag, self._amp_type_flag, self._amp_level_flag  # type: ignore
+            )
 
         if self._precision_flag == 32:
             return PrecisionPlugin()
@@ -824,8 +824,8 @@ def gpus(self) -> Optional[Union[List[int], str, int]]:
         return self._gpus
 
     @property
-    def parallel_device_ids(self) -> Optional[List[int]]:
-        return [i for i in range(len(self.parallel_devices))] if isinstance(self.accelerator, GPUAccelerator) else None
+    def parallel_device_ids(self) -> List[int]:
+        return [i for i in range(len(self.parallel_devices))] if isinstance(self.accelerator, GPUAccelerator) else []
 
     @property
     def is_distributed(self) -> bool:
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index b4fec24b0f101..0430e3d30558c 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1969,7 +1969,7 @@ def should_rank_save_checkpoint(self) -> bool:
         )
 
     @property
-    def _strategy_type(self) -> Optional[int]:
+    def _strategy_type(self) -> Optional[str]:
         return self.strategy.strategy_name
 
     @property
@@ -2006,7 +2006,9 @@ def devices(self) -> Optional[Union[List[int], str, int]]:
 
     @property
     def data_parallel_device_ids(self) -> Optional[List[int]]:
-        return self._accelerator_connector.parallel_device_ids
+        return (
+            self._accelerator_connector.parallel_device_ids if self._accelerator_connector.parallel_device_ids else None
+        )
 
     @property
     def lightning_module(self) -> "pl.LightningModule":
diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index c494c0c1c18e6..d17322e191ff1 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -242,7 +242,7 @@ def test_torchelastic_gpu_parsing(mocked_device_count, mocked_is_available, gpus
     sanitizing the gpus as only one of the GPUs is visible."""
     trainer = Trainer(gpus=gpus)
     assert isinstance(trainer._accelerator_connector.cluster_environment, TorchElasticEnvironment)
-    assert trainer._accelerator_connector.parallel_device_ids == device_parser.parse_gpu_ids(gpus)
+    assert trainer.data_parallel_device_ids == device_parser.parse_gpu_ids(gpus)
     assert trainer.gpus == gpus
 
 

From ce18f52b60c0d0582ed08fe88f661bbbaf34fcb2 Mon Sep 17 00:00:00 2001
From: Kaushik B <kaushikbokka@gmail.com>
Date: Thu, 17 Feb 2022 08:19:10 +0530
Subject: [PATCH 55/69] Updates to attributes

---
 .../connectors/accelerator_connector.py       | 31 +++++++------------
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 4a0793abfc068..7bb665a75864c 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -144,9 +144,9 @@ def __init__(
 
         # 1. Parsing flags
         # Get registered strategies, built-in accelerators and precision plugins
-        self._existing_strategies_str = StrategyRegistry.available_strategies()
-        self._existing_accelerator_type = ("tpu", "ipu", "gpu", "cpu")
-        self._supported_precision_type = ("16", "32", "64", "bf16", "mixed")
+        self._registered_strategies = StrategyRegistry.available_strategies()
+        self._accelerator_types = ("tpu", "ipu", "gpu", "cpu")
+        self._precision_types = ("16", "32", "64", "bf16", "mixed")
 
         # Raise an exception if there are conflicts between flags
         # Set each valid flag to `self._x_flag` after validation
@@ -237,11 +237,7 @@ def _check_config_and_set_final_flags(
                     f"Incompatible values set in `strategy` and `accelerator` arguments."
                     f"Received both strategy={strategy} and accelerator={accelerator}"
                 )
-            if (
-                isinstance(accelerator, str)
-                and accelerator in self._existing_strategies_str
-                and strategy != accelerator
-            ):
+            if isinstance(accelerator, str) and accelerator in self._registered_strategies and strategy != accelerator:
                 raise MisconfigurationException(
                     "strategy str already set through strategy flag, but have also passed in through accelerator"
                 )
@@ -252,20 +248,16 @@ def _check_config_and_set_final_flags(
                             f"You have passed `Trainer(strategy)`"
                             f" and you can only specify one strategy, but you have passed {plugin} as a plugin."
                         )
-                    if isinstance(plugin, str) and plugin in self._existing_strategies_str:
+                    if isinstance(plugin, str) and plugin in self._registered_strategies:
                         raise MisconfigurationException(
                             f"You have passed `Trainer(strategy)`"
                             f" and you can only specify one strategy, but you have passed {plugin} as a plugin."
                         )
 
         if accelerator is not None:
-            if (
-                accelerator in self._existing_accelerator_type
-                or accelerator == "auto"
-                or isinstance(accelerator, Accelerator)
-            ):
+            if accelerator in self._accelerator_types or accelerator == "auto" or isinstance(accelerator, Accelerator):
                 self._accelerator_flag = accelerator
-            elif accelerator in self._existing_strategies_str or isinstance(accelerator, Strategy):
+            elif accelerator in self._registered_strategies or isinstance(accelerator, Strategy):
                 rank_zero_deprecation(
                     f"Passing `Trainer(accelerator={accelerator!r})` has been deprecated"
                     f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
@@ -275,16 +267,15 @@ def _check_config_and_set_final_flags(
                 self._strategy_flag = accelerator
 
         if precision is not None:
-            if str(precision) not in self._supported_precision_type:
+            if str(precision) not in self._precision_types:
                 raise MisconfigurationException(
-                    f"Precision {repr(precision)} is invalid. "
-                    f"Allowed precision values: {self._supported_precision_type}"
+                    f"Precision {repr(precision)} is invalid. " f"Allowed precision values: {self._precision_types}"
                 )
             self._precision_flag = precision
 
         if plugins:
             for plugin in plugins:
-                if isinstance(plugin, Strategy) or isinstance(plugin, str) and plugin in self._existing_strategies_str:
+                if isinstance(plugin, Strategy) or isinstance(plugin, str) and plugin in self._registered_strategies:
                     self._strategy_flag = plugin
                     rank_zero_deprecation(
                         f"Passing {plugin} `strategy` to the `plugins` flag in Trainer has been deprecated"
@@ -293,7 +284,7 @@ def _check_config_and_set_final_flags(
 
                 elif isinstance(plugin, PrecisionPlugin):
                     self._precision_plugin_flag = plugin
-                elif isinstance(plugin, str) and plugin in self._supported_precision_type:
+                elif isinstance(plugin, str) and plugin in self._precision_types:
                     self._precision_flag = plugin
                 elif isinstance(plugin, CheckpointIO):
                     self.checkpoint_io = plugin

From 88db8306346b89d62f7a839715ad8b783e40d6de Mon Sep 17 00:00:00 2001
From: Kaushik B <kaushikbokka@gmail.com>
Date: Thu, 17 Feb 2022 08:43:58 +0530
Subject: [PATCH 56/69] Improve exceptions

---
 .../trainer/connectors/accelerator_connector.py     | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 7bb665a75864c..f6ccb903a57b1 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -209,7 +209,7 @@ def _check_config_and_set_final_flags(
 
         1. strategy: strategy, accelerator and plugin can all be set to strategies
         2. accelerator: if the value of the accelerator argument is a type of accelerator (instance or string),
-            set self.accelerator_flag accordingly. If the value is strategy related (instance or string),
+            set self._accelerator_flag accordingly. If the value is strategy related (instance or string),
             it gets handled by 1.
         3. precision: The final value of the precision flag may be determined either by the precision argument or
             by a plugin instance.
@@ -239,18 +239,19 @@ def _check_config_and_set_final_flags(
                 )
             if isinstance(accelerator, str) and accelerator in self._registered_strategies and strategy != accelerator:
                 raise MisconfigurationException(
-                    "strategy str already set through strategy flag, but have also passed in through accelerator"
+                    f"strategy {strategy} already set through `strategy` flag,"
+                    f" but have also passed {accelerator} in through the accelerator flag."
                 )
             if plugins:
                 for plugin in plugins:
                     if isinstance(plugin, Strategy):
                         raise MisconfigurationException(
-                            f"You have passed `Trainer(strategy)`"
+                            f"You have passed `Trainer(strategy={strategy})`"
                             f" and you can only specify one strategy, but you have passed {plugin} as a plugin."
                         )
                     if isinstance(plugin, str) and plugin in self._registered_strategies:
                         raise MisconfigurationException(
-                            f"You have passed `Trainer(strategy)`"
+                            f"You have passed `Trainer(strategy={strategy})`"
                             f" and you can only specify one strategy, but you have passed {plugin} as a plugin."
                         )
 
@@ -269,7 +270,7 @@ def _check_config_and_set_final_flags(
         if precision is not None:
             if str(precision) not in self._precision_types:
                 raise MisconfigurationException(
-                    f"Precision {repr(precision)} is invalid. " f"Allowed precision values: {self._precision_types}"
+                    f"Precision {repr(precision)} is invalid. Allowed precision values: {self._precision_types}"
                 )
             self._precision_flag = precision
 
@@ -292,7 +293,7 @@ def _check_config_and_set_final_flags(
                     self._cluster_environment_flag = plugin
                 else:
                     raise MisconfigurationException(
-                        f"Found invalid type for plugin {plugin}. Expected a precision or training type plugin."
+                        f"Found invalid type for plugin {plugin}. Expected a precision plugin or training strategy."
                     )
 
         # handle the case when the user passes in a strategy instance which has an accelerator, precision,

From ee70db83c3b7229e7880ec49a47dc3449aece1d8 Mon Sep 17 00:00:00 2001
From: Kaushik B <kaushikbokka@gmail.com>
Date: Thu, 17 Feb 2022 08:54:35 +0530
Subject: [PATCH 57/69] Updates to attributes

---
 .../connectors/accelerator_connector.py       | 56 +++++++++----------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index f6ccb903a57b1..8cb1f03ccdcc3 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -298,7 +298,7 @@ def _check_config_and_set_final_flags(
 
         # handle the case when the user passes in a strategy instance which has an accelerator, precision,
         # checkpoint io or cluster env set up
-        # TODO: @awaelchli imporve the error messages below
+        # TODO: @awaelchli improve the error messages below
         if self._strategy_flag and isinstance(self._strategy_flag, Strategy):
             if self._strategy_flag._accelerator:
                 if self._accelerator_flag:
@@ -359,14 +359,14 @@ def _check_device_config_and_set_final_flags(
         if devices in (0, "0", "0,"):
             raise MisconfigurationException(f"You passed `devices={devices}`, please set a number > 0")
 
-        self._device_flag = devices
+        self._devices_flag = devices
 
         # TODO: Delete this method num_processes, gpus, ipus and tpu_cores get removed
         self._map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
             devices, num_processes, gpus, ipus, tpu_cores
         )
 
-        if self._device_flag == "auto" and self._accelerator_flag is None:
+        if self._devices_flag == "auto" and self._accelerator_flag is None:
             raise MisconfigurationException(
                 f"You passed `devices={devices}` but haven't specified"
                 " `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping"
@@ -399,7 +399,7 @@ def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
             ) > 1:
                 # TODO: @awaelchli improve error message
                 rank_zero_warn("more than one device specific flag has been set")
-            self._device_flag = deprecated_devices_specific_flag
+            self._devices_flag = deprecated_devices_specific_flag
 
             if self._accelerator_flag is None:
                 # set accelerator type based on num_processes, gpus, ipus, tpu_cores
@@ -436,48 +436,48 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
             self.accelerator: Accelerator = self._accelerator_flag
         elif self._accelerator_flag == "tpu":
             self.accelerator = TPUAccelerator()
-            if self._device_flag == "auto" or not self._device_flag:
-                self._device_flag = TPUAccelerator.auto_device_count()
-            if isinstance(self._device_flag, int):
-                self._parallel_devices = list(range(self._device_flag))
+            if self._devices_flag == "auto" or not self._devices_flag:
+                self._devices_flag = TPUAccelerator.auto_device_count()
+            if isinstance(self._devices_flag, int):
+                self._parallel_devices = list(range(self._devices_flag))
             else:
-                self._parallel_devices = self._device_flag  # type: ignore[assignment]
+                self._parallel_devices = self._devices_flag  # type: ignore[assignment]
 
         elif self._accelerator_flag == "ipu":
             self.accelerator = IPUAccelerator()
-            if self._device_flag == "auto" or not self._device_flag:
-                self._device_flag = IPUAccelerator.auto_device_count()
-            if isinstance(self._device_flag, int):
-                self._parallel_devices = list(range(self._device_flag))
+            if self._devices_flag == "auto" or not self._devices_flag:
+                self._devices_flag = IPUAccelerator.auto_device_count()
+            if isinstance(self._devices_flag, int):
+                self._parallel_devices = list(range(self._devices_flag))
 
         elif self._accelerator_flag == "gpu":
             self.accelerator = GPUAccelerator()
-            if self._device_flag == "auto" or not self._device_flag:
-                self._device_flag = GPUAccelerator.auto_device_count()
-            if isinstance(self._device_flag, int) or isinstance(self._device_flag, str):
-                self._device_flag = int(self._device_flag)
+            if self._devices_flag == "auto" or not self._devices_flag:
+                self._devices_flag = GPUAccelerator.auto_device_count()
+            if isinstance(self._devices_flag, int) or isinstance(self._devices_flag, str):
+                self._devices_flag = int(self._devices_flag)
                 self._parallel_devices = (
-                    [torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._device_flag)]  # type: ignore
-                    if self._device_flag != 0
+                    [torch.device("cuda", i) for i in device_parser.parse_gpu_ids(self._devices_flag)]  # type: ignore
+                    if self._devices_flag != 0
                     else []
                 )
             else:
-                self._parallel_devices = [torch.device("cuda", i) for i in self._device_flag]
+                self._parallel_devices = [torch.device("cuda", i) for i in self._devices_flag]
 
         elif self._accelerator_flag == "cpu":
             self.accelerator = CPUAccelerator()
-            if self._device_flag == "auto" or not self._device_flag:
-                self._device_flag = CPUAccelerator.auto_device_count()
-            if isinstance(self._device_flag, int):
-                self._parallel_devices = [torch.device("cpu")] * self._device_flag
+            if self._devices_flag == "auto" or not self._devices_flag:
+                self._devices_flag = CPUAccelerator.auto_device_count()
+            if isinstance(self._devices_flag, int):
+                self._parallel_devices = [torch.device("cpu")] * self._devices_flag
             else:
                 rank_zero_warn(
                     "The flag `devices` must be an int with `accelerator='cpu'`,"
-                    f" got `devices={self._device_flag}` instead."
+                    f" got `devices={self._devices_flag}` instead."
                 )
 
-        self._gpus = self._device_flag if not self._gpus else self._gpus
-        self._tpu_cores = self._device_flag if not self._tpu_cores else self._tpu_cores
+        self._gpus = self._devices_flag if not self._gpus else self._gpus
+        self._tpu_cores = self._devices_flag if not self._tpu_cores else self._tpu_cores
 
     def _choose_and_init_cluster_environment(self) -> ClusterEnvironment:
         if isinstance(self._cluster_environment_flag, ClusterEnvironment):
@@ -542,7 +542,7 @@ def _check_strategy_and_fallback(self) -> None:
                     "`accelerator='ddp_cpu'` is not supported on TPU machines. "
                     "Learn more: https://github.com/PyTorchLightning/pytorch-lightning/issues/7810"
                 )
-            if self._device_flag == 1 and self._num_nodes_flag > 1:
+            if self._devices_flag == 1 and self._num_nodes_flag > 1:
                 strategy_flag = DDPStrategy.strategy_name
             else:
                 strategy_flag = "ddp_spawn"

From c516830008cacc131b0df6bbd38f86fa6b44fbe4 Mon Sep 17 00:00:00 2001
From: Kaushik B <kaushikbokka@gmail.com>
Date: Thu, 17 Feb 2022 09:30:05 +0530
Subject: [PATCH 58/69] Add utility methods

---
 .../connectors/accelerator_connector.py       | 28 ++++++++-----------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 8cb1f03ccdcc3..1224bbd07a87d 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -175,7 +175,7 @@ def __init__(
 
         # 2. Instantiate Accelerator
         # handle `auto` and `None`
-        self._special_handle_for_ipu()
+        self._set_accelerator_if_ipu_strategy_is_passed()
         if self._accelerator_flag == "auto" or self._accelerator_flag is None:
             self._accelerator_flag = self._choose_accelerator()
         self._set_parallel_devices_and_init_accelerator()
@@ -380,7 +380,7 @@ def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
         ipus: Optional[int],
         tpu_cores: Optional[Union[List[int], str, int]],
     ) -> None:
-        """Sets the `device_flag` and `accelerator_flag `based on num_processes, gpus, ipus, tpu_cores."""
+        """Sets the `devices_flag` and `accelerator_flag `based on num_processes, gpus, ipus, tpu_cores."""
         self._gpus: Optional[Union[List[int], str, int]] = gpus
         self._tpu_cores: Optional[Union[List[int], str, int]] = tpu_cores
         gpus = device_parser.parse_gpu_ids(gpus)
@@ -412,7 +412,7 @@ def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
                 if num_processes:
                     self._accelerator_flag = "cpu"
 
-    def _special_handle_for_ipu(self) -> None:
+    def _set_accelerator_if_ipu_strategy_is_passed(self) -> None:
         # current logic only apply to object config
         # TODO this logic should apply to both str and object config
         if isinstance(self._strategy_flag, IPUStrategy):
@@ -427,17 +427,16 @@ def _choose_accelerator(self) -> str:
                 return "ipu"
             if torch.cuda.is_available() and torch.cuda.device_count() > 0:
                 return "gpu"
-        # [RFC] this is current logic, if accelerator=None, default cpu?
         return "cpu"
 
     def _set_parallel_devices_and_init_accelerator(self) -> None:
         self._parallel_devices: List[Union[int, torch.device]] = []
+
         if isinstance(self._accelerator_flag, Accelerator):
             self.accelerator: Accelerator = self._accelerator_flag
         elif self._accelerator_flag == "tpu":
             self.accelerator = TPUAccelerator()
-            if self._devices_flag == "auto" or not self._devices_flag:
-                self._devices_flag = TPUAccelerator.auto_device_count()
+            self._set_devices_flag_if_auto_passed()
             if isinstance(self._devices_flag, int):
                 self._parallel_devices = list(range(self._devices_flag))
             else:
@@ -445,15 +444,13 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
 
         elif self._accelerator_flag == "ipu":
             self.accelerator = IPUAccelerator()
-            if self._devices_flag == "auto" or not self._devices_flag:
-                self._devices_flag = IPUAccelerator.auto_device_count()
+            self._set_devices_flag_if_auto_passed()
             if isinstance(self._devices_flag, int):
                 self._parallel_devices = list(range(self._devices_flag))
 
         elif self._accelerator_flag == "gpu":
             self.accelerator = GPUAccelerator()
-            if self._devices_flag == "auto" or not self._devices_flag:
-                self._devices_flag = GPUAccelerator.auto_device_count()
+            self._set_devices_flag_if_auto_passed()
             if isinstance(self._devices_flag, int) or isinstance(self._devices_flag, str):
                 self._devices_flag = int(self._devices_flag)
                 self._parallel_devices = (
@@ -466,8 +463,7 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
 
         elif self._accelerator_flag == "cpu":
             self.accelerator = CPUAccelerator()
-            if self._devices_flag == "auto" or not self._devices_flag:
-                self._devices_flag = CPUAccelerator.auto_device_count()
+            self._set_devices_flag_if_auto_passed()
             if isinstance(self._devices_flag, int):
                 self._parallel_devices = [torch.device("cpu")] * self._devices_flag
             else:
@@ -479,6 +475,10 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
         self._gpus = self._devices_flag if not self._gpus else self._gpus
         self._tpu_cores = self._devices_flag if not self._tpu_cores else self._tpu_cores
 
+    def _set_devices_flag_if_auto_passed(self) -> None:
+        if self._devices_flag == "auto" or not self._devices_flag:
+            self._devices_flag = self.accelerator.auto_device_count()
+
     def _choose_and_init_cluster_environment(self) -> ClusterEnvironment:
         if isinstance(self._cluster_environment_flag, ClusterEnvironment):
             return self._cluster_environment_flag
@@ -490,10 +490,6 @@ def _choose_and_init_cluster_environment(self) -> ClusterEnvironment:
                 return env_type()
         return LightningEnvironment()
 
-    @property
-    def _is_sharded_training_type(self) -> bool:
-        return isinstance(self._strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy))
-
     def _is_slurm_managing_tasks(self) -> bool:
         """used by choosing cluster enviroment."""
         if not SLURMEnvironment.detect() or SLURMEnvironment.job_name() == "bash":

From 8b0721825415efcaf5f342fc062b476628e925f2 Mon Sep 17 00:00:00 2001
From: Kaushik B <kaushikbokka@gmail.com>
Date: Thu, 17 Feb 2022 10:57:01 +0530
Subject: [PATCH 59/69] Handle zero/empty list values for devices flag

---
 .../trainer/connectors/accelerator_connector.py      | 12 ++++++------
 tests/accelerators/test_accelerator_connector.py     |  8 ++++++++
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 1224bbd07a87d..413009a25c373 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -355,16 +355,16 @@ def _check_device_config_and_set_final_flags(
         tpu_cores: Optional[Union[List[int], int]],
     ) -> None:
         self._num_nodes_flag = int(num_nodes) if num_nodes is not None else 1
-
-        if devices in (0, "0", "0,"):
-            raise MisconfigurationException(f"You passed `devices={devices}`, please set a number > 0")
-
         self._devices_flag = devices
 
-        # TODO: Delete this method num_processes, gpus, ipus and tpu_cores get removed
+        # TODO: Delete this method when num_processes, gpus, ipus and tpu_cores gets removed
         self._map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
             devices, num_processes, gpus, ipus, tpu_cores
         )
+        
+        if self._devices_flag in ([], 0, "0", "0,"):
+            rank_zero_warn(f"You passed `devices={devices}`, switching to `cpu` accelerator")
+            self._accelerator_flag = "cpu"
 
         if self._devices_flag == "auto" and self._accelerator_flag is None:
             raise MisconfigurationException(
@@ -380,7 +380,7 @@ def _map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
         ipus: Optional[int],
         tpu_cores: Optional[Union[List[int], str, int]],
     ) -> None:
-        """Sets the `devices_flag` and `accelerator_flag `based on num_processes, gpus, ipus, tpu_cores."""
+        """Sets the `devices_flag` and `accelerator_flag` based on num_processes, gpus, ipus, tpu_cores."""
         self._gpus: Optional[Union[List[int], str, int]] = gpus
         self._tpu_cores: Optional[Union[List[int], str, int]] = tpu_cores
         gpus = device_parser.parse_gpu_ids(gpus)
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 2df4a8e1b63da..946da00e4d1f0 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -942,3 +942,11 @@ def test_devices_auto_choice_gpu(is_gpu_available_mock, device_count_mock):
     trainer = Trainer(accelerator="auto", devices="auto")
     assert trainer.devices == 2
     assert trainer.gpus == 2
+    
+
+def test_passing_zero_and_empty_list_to_devices_flag():
+    with pytest.warns(UserWarning, match=r"switching to `cpu` accelerator"):
+        Trainer(accelerator="gpu", devices=0)
+        
+    with pytest.warns(UserWarning, match=r"switching to `cpu` accelerator"):
+        Trainer(accelerator="gpu", devices=[])

From caaf390baec507a51aeae3762df7bac80586d6b8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 17 Feb 2022 05:28:23 +0000
Subject: [PATCH 60/69] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 +-
 tests/accelerators/test_accelerator_connector.py              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 413009a25c373..7e3aa4ef32af7 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -361,7 +361,7 @@ def _check_device_config_and_set_final_flags(
         self._map_deprecated_devices_specfic_info_to_accelerator_and_device_flag(
             devices, num_processes, gpus, ipus, tpu_cores
         )
-        
+
         if self._devices_flag in ([], 0, "0", "0,"):
             rank_zero_warn(f"You passed `devices={devices}`, switching to `cpu` accelerator")
             self._accelerator_flag = "cpu"
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 946da00e4d1f0..7401fa477d982 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -942,11 +942,11 @@ def test_devices_auto_choice_gpu(is_gpu_available_mock, device_count_mock):
     trainer = Trainer(accelerator="auto", devices="auto")
     assert trainer.devices == 2
     assert trainer.gpus == 2
-    
+
 
 def test_passing_zero_and_empty_list_to_devices_flag():
     with pytest.warns(UserWarning, match=r"switching to `cpu` accelerator"):
         Trainer(accelerator="gpu", devices=0)
-        
+
     with pytest.warns(UserWarning, match=r"switching to `cpu` accelerator"):
         Trainer(accelerator="gpu", devices=[])

From cd12345cd9d75b93e87d2427f1e84bcb44d8d5b5 Mon Sep 17 00:00:00 2001
From: four4fish <88516121+four4fish@users.noreply.github.com>
Date: Thu, 17 Feb 2022 09:22:55 -0800
Subject: [PATCH 61/69] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 pytorch_lightning/strategies/parallel.py     | 8 --------
 pytorch_lightning/utilities/device_parser.py | 2 +-
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/pytorch_lightning/strategies/parallel.py b/pytorch_lightning/strategies/parallel.py
index d8a8ab50abe2d..11207065b7e21 100644
--- a/pytorch_lightning/strategies/parallel.py
+++ b/pytorch_lightning/strategies/parallel.py
@@ -85,14 +85,6 @@ def distributed_sampler_kwargs(self):
         distributed_sampler_kwargs = dict(num_replicas=len(self.parallel_devices), rank=self.global_rank)
         return distributed_sampler_kwargs
 
-    @property
-    def parallel_devices(self):
-        return self._parallel_devices
-
-    @parallel_devices.setter
-    def parallel_devices(self, parallel_devices):
-        self._parallel_devices = parallel_devices
-
     def reconciliate_processes(self, trace: str):
         """Function to re-conciliate processes on failure."""
 
diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index 1e51c5479bdc7..17e2f70aa626a 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -21,7 +21,7 @@
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 
-def determine_root_gpu_device(gpus: List[Union[int, torch.device]]) -> Optional[Union[int, torch.device]]:
+def determine_root_gpu_device(gpus: List[_DEVICE]) -> Optional[_DEVICE]:
     """
     Args:
         gpus: non-empty list of ints representing which gpus to use

From a442852753961232915104f19f7d3271a5d24c34 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Thu, 17 Feb 2022 10:47:24 -0800
Subject: [PATCH 62/69] address comments

---
 pytorch_lightning/accelerators/gpu.py                |  3 ++-
 pytorch_lightning/strategies/tpu_spawn.py            |  2 +-
 .../trainer/connectors/accelerator_connector.py      | 12 ++++++++++--
 pytorch_lightning/utilities/device_parser.py         |  3 ++-
 tests/accelerators/test_accelerator_connector.py     | 12 ++++--------
 tests/utilities/test_cli.py                          |  4 ++--
 6 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py
index aa8b0d56dbf63..6fa9fa94594af 100644
--- a/pytorch_lightning/accelerators/gpu.py
+++ b/pytorch_lightning/accelerators/gpu.py
@@ -82,7 +82,8 @@ def auto_device_count() -> int:
 
     @staticmethod
     def is_available() -> bool:
-        return torch.cuda.device_count() > 0
+        print(torch.cuda.is_available() and torch.cuda.device_count() > 0)
+        return torch.cuda.is_available() and torch.cuda.device_count() > 0
 
 
 def get_nvidia_gpu_stats(device: _DEVICE) -> dict[str, float]:
diff --git a/pytorch_lightning/strategies/tpu_spawn.py b/pytorch_lightning/strategies/tpu_spawn.py
index b43267b5c91d6..867624fd2151e 100644
--- a/pytorch_lightning/strategies/tpu_spawn.py
+++ b/pytorch_lightning/strategies/tpu_spawn.py
@@ -52,7 +52,7 @@
 class TPUSpawnStrategy(DDPSpawnStrategy):
     """Strategy for training multiple TPU devices using the :func:`torch.multiprocessing.spawn` method."""
 
-    strategy_name = "tpu_spawn_strategy"
+    strategy_name = "tpu_spawn"
 
     def __init__(
         self,
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 7e3aa4ef32af7..a3c626cecfe1c 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -451,6 +451,8 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
         elif self._accelerator_flag == "gpu":
             self.accelerator = GPUAccelerator()
             self._set_devices_flag_if_auto_passed()
+            # TODO add device availablity check for all devices, not only GPU
+            self._check_device_availability()
             if isinstance(self._devices_flag, int) or isinstance(self._devices_flag, str):
                 self._devices_flag = int(self._devices_flag)
                 self._parallel_devices = (
@@ -459,7 +461,7 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
                     else []
                 )
             else:
-                self._parallel_devices = [torch.device("cuda", i) for i in self._devices_flag]
+                self._parallel_devices = [torch.device("cuda", i) for i in self._devices_flag]  # type: ignore
 
         elif self._accelerator_flag == "cpu":
             self.accelerator = CPUAccelerator()
@@ -479,6 +481,12 @@ def _set_devices_flag_if_auto_passed(self) -> None:
         if self._devices_flag == "auto" or not self._devices_flag:
             self._devices_flag = self.accelerator.auto_device_count()
 
+    def _check_device_availability(self) -> None:
+        if not self.accelerator.is_available():
+            raise MisconfigurationException(
+                f"You requested {self._accelerator_flag}, " f"but {self._accelerator_flag} is not available"
+            )
+
     def _choose_and_init_cluster_environment(self) -> ClusterEnvironment:
         if isinstance(self._cluster_environment_flag, ClusterEnvironment):
             return self._cluster_environment_flag
@@ -514,7 +522,7 @@ def _choose_strategy(self) -> Union[Strategy, str]:
             return DDPStrategy.strategy_name
         if len(self._parallel_devices) <= 1:
             device = (
-                device_parser.determine_root_gpu_device(self._parallel_devices)
+                device_parser.determine_root_gpu_device(self._parallel_devices)  # type: ignore
                 if self._accelerator_flag == "gpu"
                 else "cpu"
             )
diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index 17e2f70aa626a..d7b8a319ea4d2 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -19,6 +19,7 @@
 from pytorch_lightning.tuner.auto_gpu_select import pick_multiple_gpus
 from pytorch_lightning.utilities import _TPU_AVAILABLE
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.types import _DEVICE
 
 
 def determine_root_gpu_device(gpus: List[_DEVICE]) -> Optional[_DEVICE]:
@@ -164,7 +165,7 @@ def _sanitize_gpu_ids(gpus: List[int]) -> List[int]:
     for gpu in gpus:
         if gpu not in all_available_gpus:
             raise MisconfigurationException(
-                f"You requested GPUs: {gpus}\n But your machine only has: {all_available_gpus}"
+                f"You requested gpu: {gpus}\n But your machine only has: {all_available_gpus}"
             )
     return gpus
 
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 7401fa477d982..69a40c2adc997 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -453,12 +453,11 @@ def test_accelerator_cpu(mack_gpu_avalible):
     assert trainer._device_type == "cpu"
     assert isinstance(trainer.accelerator, CPUAccelerator)
 
-    with pytest.raises(MisconfigurationException):
+    with pytest.raises(MisconfigurationException, match="You requested gpu"):
         trainer = Trainer(gpus=1)
-    # with pytest.raises(MisconfigurationException):
-    #     trainer = Trainer(accelerator="gpu")
-
-    with pytest.raises(MisconfigurationException, match="You requested GPUs:"):
+    with pytest.raises(MisconfigurationException, match="You requested gpu, but gpu is not available"):
+        trainer = Trainer(accelerator="gpu")
+    with pytest.raises(MisconfigurationException, match="You requested gpu:"):
         trainer = Trainer(accelerator="cpu", gpus=1)
 
 
@@ -470,9 +469,6 @@ def test_accelerator_gpu():
     assert trainer._device_type == "gpu"
     assert isinstance(trainer.accelerator, GPUAccelerator)
 
-    # with pytest.raises(
-    #     MisconfigurationException, match="You passed `accelerator='gpu'`, but you didn't pass `gpus` to `Trainer`"
-    # ):
     trainer = Trainer(accelerator="gpu")
 
     trainer = Trainer(accelerator="auto", gpus=1)
diff --git a/tests/utilities/test_cli.py b/tests/utilities/test_cli.py
index 8992f0c1accd9..2803c0c4601c1 100644
--- a/tests/utilities/test_cli.py
+++ b/tests/utilities/test_cli.py
@@ -582,8 +582,8 @@ def on_fit_start(self):
     (
         # dict(strategy="ddp_spawn")
         # dict(strategy="ddp")
-        # !! old accl_conn will choose singleDeviceStrategy for both strategy=ddp/ddp_spawn
-        # this test never worked with DDPSpawnStrategy
+        # the previous accl_conn will choose singleDeviceStrategy for both strategy=ddp/ddp_spawn
+        # TODO revisit this test as it never worked with DDP or DDPSpawn
         dict(strategy="single_device"),
         pytest.param({"tpu_cores": 1}, marks=RunIf(tpu=True)),
     ),

From 3152f81728bb29e77fab03abdc1dec6d50514fbb Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Thu, 17 Feb 2022 10:50:25 -0800
Subject: [PATCH 63/69] minor comments change

---
 tests/accelerators/test_accelerator_connector.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 69a40c2adc997..78fb7b0c30b48 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -550,7 +550,6 @@ def test_accelerator_gpu_with_gpus_priority():
 
 def test_validate_accelerator_and_devices():
 
-    # with pytest.raises(MisconfigurationException, match="You passed `devices=2` but haven't specified"):
     trainer = Trainer(accelerator="ddp_cpu", devices=2)
     assert isinstance(trainer.accelerator, CPUAccelerator)
     assert trainer.num_processes == 2

From 2c2e5ace657318607eb52f5d29d7e241dfde1d9a Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Thu, 17 Feb 2022 11:24:11 -0800
Subject: [PATCH 64/69] fix tests

---
 .../trainer/connectors/accelerator_connector.py      | 12 +++---------
 tests/accelerators/test_accelerator_connector.py     |  5 +++--
 tests/strategies/test_deepspeed_strategy.py          |  7 ++++++-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index a3c626cecfe1c..e68806a024994 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -430,6 +430,7 @@ def _choose_accelerator(self) -> str:
         return "cpu"
 
     def _set_parallel_devices_and_init_accelerator(self) -> None:
+        # TODO add device availability check
         self._parallel_devices: List[Union[int, torch.device]] = []
 
         if isinstance(self._accelerator_flag, Accelerator):
@@ -451,8 +452,6 @@ def _set_parallel_devices_and_init_accelerator(self) -> None:
         elif self._accelerator_flag == "gpu":
             self.accelerator = GPUAccelerator()
             self._set_devices_flag_if_auto_passed()
-            # TODO add device availablity check for all devices, not only GPU
-            self._check_device_availability()
             if isinstance(self._devices_flag, int) or isinstance(self._devices_flag, str):
                 self._devices_flag = int(self._devices_flag)
                 self._parallel_devices = (
@@ -481,12 +480,6 @@ def _set_devices_flag_if_auto_passed(self) -> None:
         if self._devices_flag == "auto" or not self._devices_flag:
             self._devices_flag = self.accelerator.auto_device_count()
 
-    def _check_device_availability(self) -> None:
-        if not self.accelerator.is_available():
-            raise MisconfigurationException(
-                f"You requested {self._accelerator_flag}, " f"but {self._accelerator_flag} is not available"
-            )
-
     def _choose_and_init_cluster_environment(self) -> ClusterEnvironment:
         if isinstance(self._cluster_environment_flag, ClusterEnvironment):
             return self._cluster_environment_flag
@@ -651,7 +644,8 @@ def _check_and_init_precision(self) -> PrecisionPlugin:
                 return NativeMixedPrecisionPlugin(self._precision_flag, device)
 
             if self._amp_type_flag == AMPType.APEX:
-                return ApexMixedPrecisionPlugin(self._amp_level_flag)  # type: ignore
+                self._amp_level_flag = self._amp_level_flag or "O2"
+                return ApexMixedPrecisionPlugin(self._amp_level_flag)
 
         raise RuntimeError("No precision set")
 
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 78fb7b0c30b48..526d94fa3c829 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -455,8 +455,9 @@ def test_accelerator_cpu(mack_gpu_avalible):
 
     with pytest.raises(MisconfigurationException, match="You requested gpu"):
         trainer = Trainer(gpus=1)
-    with pytest.raises(MisconfigurationException, match="You requested gpu, but gpu is not available"):
-        trainer = Trainer(accelerator="gpu")
+    # TODO enable this test when add device availability check
+    # with pytest.raises(MisconfigurationException, match="You requested gpu, but gpu is not available"):
+    #     trainer = Trainer(accelerator="gpu")
     with pytest.raises(MisconfigurationException, match="You requested gpu:"):
         trainer = Trainer(accelerator="cpu", gpus=1)
 
diff --git a/tests/strategies/test_deepspeed_strategy.py b/tests/strategies/test_deepspeed_strategy.py
index 5eed2578546ba..e5306b0942131 100644
--- a/tests/strategies/test_deepspeed_strategy.py
+++ b/tests/strategies/test_deepspeed_strategy.py
@@ -167,7 +167,12 @@ def test_deepspeed_precision_choice(amp_backend, precision, tmpdir):
     """
 
     trainer = Trainer(
-        fast_dev_run=True, default_root_dir=tmpdir, strategy="deepspeed", amp_backend=amp_backend, precision=precision
+        fast_dev_run=True,
+        default_root_dir=tmpdir,
+        accelerator="gpu",
+        strategy="deepspeed",
+        amp_backend=amp_backend,
+        precision=precision,
     )
 
     assert isinstance(trainer.strategy, DeepSpeedStrategy)

From 5f32feb07eb62dc881def9bc8776c7382370dd1b Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Thu, 17 Feb 2022 13:44:35 -0800
Subject: [PATCH 65/69] minor fix

---
 pytorch_lightning/accelerators/gpu.py | 3 +--
 tests/trainer/test_trainer_cli.py     | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py
index 6fa9fa94594af..aa8b0d56dbf63 100644
--- a/pytorch_lightning/accelerators/gpu.py
+++ b/pytorch_lightning/accelerators/gpu.py
@@ -82,8 +82,7 @@ def auto_device_count() -> int:
 
     @staticmethod
     def is_available() -> bool:
-        print(torch.cuda.is_available() and torch.cuda.device_count() > 0)
-        return torch.cuda.is_available() and torch.cuda.device_count() > 0
+        return torch.cuda.device_count() > 0
 
 
 def get_nvidia_gpu_stats(device: _DEVICE) -> dict[str, float]:
diff --git a/tests/trainer/test_trainer_cli.py b/tests/trainer/test_trainer_cli.py
index 330b0f75ffb61..b5713893f769b 100644
--- a/tests/trainer/test_trainer_cli.py
+++ b/tests/trainer/test_trainer_cli.py
@@ -163,7 +163,7 @@ def test_argparse_args_parsing_fast_dev_run(cli_args, expected):
 
 @pytest.mark.parametrize(
     ["cli_args", "expected_parsed", "expected_device_ids"],
-    [("", None, None), ("--accelerator gpu --devices 1", "1", [0])],
+    [("", None, None), ("--accelerator gpu --devices 1", "1", [0]), ("--accelerator gpu --devices 0,", "0,", None)],
 )
 @RunIf(min_gpus=1)
 def test_argparse_args_parsing_devices(cli_args, expected_parsed, expected_device_ids):

From f2ab1d6df2a9ae3686fa8b1996b18f0af5c6c23f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 17 Feb 2022 21:46:18 +0000
Subject: [PATCH 66/69] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pytorch_lightning/strategies/ddp.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py
index e13f8a3536bc8..b5d83478101f1 100644
--- a/pytorch_lightning/strategies/ddp.py
+++ b/pytorch_lightning/strategies/ddp.py
@@ -96,7 +96,6 @@ def __init__(
         self._sync_dir: Optional[str] = None
         self._rank_0_will_call_children_scripts: bool = False
 
-
     @property
     def is_distributed(self) -> bool:
         return True

From a6ff2c34c03222dd79f08af087785924ae068af2 Mon Sep 17 00:00:00 2001
From: Siyu Wang <siyuw@fb.com>
Date: Thu, 17 Feb 2022 14:02:21 -0800
Subject: [PATCH 67/69] add _configure_launcher call to accl_conn

---
 pytorch_lightning/trainer/connectors/accelerator_connector.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index e68806a024994..20c5f485b4e71 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -711,6 +711,7 @@ def _lazy_init_strategy(self) -> None:
             self.strategy.sync_batchnorm = self.sync_batchnorm
         if hasattr(self.strategy, "set_world_ranks"):
             self.strategy.set_world_ranks()
+        self.strategy._configure_launcher()
 
         from pytorch_lightning.utilities import _IS_INTERACTIVE
 

From 869e5710163803a729581ec1ce2d3dc144db1fbd Mon Sep 17 00:00:00 2001
From: four4fish <88516121+four4fish@users.noreply.github.com>
Date: Thu, 17 Feb 2022 14:49:39 -0800
Subject: [PATCH 68/69] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 tests/accelerators/test_accelerator_connector.py | 7 ++++---
 tests/accelerators/test_ipu.py                   | 1 +
 tests/accelerators/test_tpu.py                   | 1 +
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 526d94fa3c829..aabc21b10d20b 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -446,7 +446,7 @@ def test_accelerator_choice_multi_node_gpu(
 
 
 @mock.patch("torch.cuda.is_available", return_value=False)
-def test_accelerator_cpu(mack_gpu_avalible):
+def test_accelerator_cpu(_):
 
     trainer = Trainer(accelerator="cpu")
 
@@ -471,6 +471,7 @@ def test_accelerator_gpu():
     assert isinstance(trainer.accelerator, GPUAccelerator)
 
     trainer = Trainer(accelerator="gpu")
+    assert isinstance(trainer.accelerator, GPUAccelerator)
 
     trainer = Trainer(accelerator="auto", gpus=1)
 
@@ -573,8 +574,8 @@ def test_devices_with_cpu_only_supports_integer():
 
     with pytest.warns(UserWarning, match="The flag `devices` must be an int"):
         trainer = Trainer(accelerator="cpu", devices="1,3")
-        assert isinstance(trainer.accelerator, CPUAccelerator)
-        assert trainer.devices == 1
+    assert isinstance(trainer.accelerator, CPUAccelerator)
+    assert trainer.devices == 1
 
 
 @pytest.mark.parametrize("training_type", ["ddp2", "dp"])
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 40ceab7195219..b8f01815704f5 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -506,6 +506,7 @@ def test_accelerator_ipu():
     assert isinstance(trainer.accelerator, IPUAccelerator)
 
     trainer = Trainer(accelerator="ipu")
+    assert isinstance(trainer.accelerator, IPUAccelerator)
 
     trainer = Trainer(accelerator="auto", ipus=8)
 
diff --git a/tests/accelerators/test_tpu.py b/tests/accelerators/test_tpu.py
index dc004f957dac1..d8f99ec4dcedb 100644
--- a/tests/accelerators/test_tpu.py
+++ b/tests/accelerators/test_tpu.py
@@ -91,6 +91,7 @@ def test_accelerator_tpu():
     assert isinstance(trainer.accelerator, TPUAccelerator)
 
     trainer = Trainer(accelerator="tpu")
+    assert isinstance(trainer.accelerator, TPUAccelerator)
 
 
 @RunIf(tpu=True)

From 9568f3b728b10bfd1b737f0445acf664653a159d Mon Sep 17 00:00:00 2001
From: four4fish <88516121+four4fish@users.noreply.github.com>
Date: Thu, 17 Feb 2022 14:59:32 -0800
Subject: [PATCH 69/69] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 pytorch_lightning/trainer/trainer.py             | 2 +-
 tests/accelerators/test_accelerator_connector.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 4360015ba537b..6ed5d6c31f719 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1964,7 +1964,7 @@ def should_rank_save_checkpoint(self) -> bool:
         )
 
     @property
-    def _strategy_type(self) -> Optional[str]:
+    def _strategy_type(self) -> str:
         return self.strategy.strategy_name
 
     @property
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index aabc21b10d20b..76fa6d64f5a56 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -453,7 +453,7 @@ def test_accelerator_cpu(_):
     assert trainer._device_type == "cpu"
     assert isinstance(trainer.accelerator, CPUAccelerator)
 
-    with pytest.raises(MisconfigurationException, match="You requested gpu"):
+    with pytest.raises(MisconfigurationException, match="You requested gpu:"):
         trainer = Trainer(gpus=1)
     # TODO enable this test when add device availability check
     # with pytest.raises(MisconfigurationException, match="You requested gpu, but gpu is not available"):