diff --git a/dockers/tpu-tests/tpu_test_cases.jsonnet b/dockers/tpu-tests/tpu_test_cases.jsonnet
index f9976134df0dc..03cd3b7b65517 100644
--- a/dockers/tpu-tests/tpu_test_cases.jsonnet
+++ b/dockers/tpu-tests/tpu_test_cases.jsonnet
@@ -21,7 +21,7 @@ local tputests = base.BaseTest {
   command: utils.scriptCommand(
     |||
       cd pytorch-lightning
-      coverage run --source=pytorch_lightning -m pytest -v \
+      coverage run --source=pytorch_lightning -m pytest -v --capture=no \
           pytorch_lightning/utilities/xla_device_utils.py \
           tests/accelerators/legacy/test_tpu_backend.py \
           tests/models/test_tpu.py
diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py
index 2d9e31f7571c1..22fd714db9a34 100644
--- a/pytorch_lightning/accelerators/accelerator.py
+++ b/pytorch_lightning/accelerators/accelerator.py
@@ -76,7 +76,7 @@ def setup(self, trainer: "Trainer", model: LightningModule) -> None:
             model: the model to train
         """
         self.connect_training_type_plugin(self.training_type_plugin, model)
-        self.setup_optimizers(trainer, model)
+        self.setup_optimizers(trainer)
         self.connect_precision_plugin(self.precision_plugin)
 
     @property
@@ -306,7 +306,7 @@ def on_train_end(self) -> None:
         """Hook to do something at the end of the training"""
         pass
 
-    def setup_optimizers(self, trainer: "Trainer", model: LightningModule):
+    def setup_optimizers(self, trainer: "Trainer"):
         """creates optimizers and schedulers
 
         Args:
@@ -315,7 +315,7 @@ def setup_optimizers(self, trainer: "Trainer", model: LightningModule):
         """
         if trainer.testing is True:
             return
-        optimizers, lr_schedulers, optimizer_frequencies = trainer.init_optimizers(model)
+        optimizers, lr_schedulers, optimizer_frequencies = trainer.init_optimizers(self.lightning_module)
         self.optimizers = optimizers
         self.lr_schedulers = lr_schedulers
         self.optimizer_frequencies = optimizer_frequencies
diff --git a/pytorch_lightning/accelerators/accelerator_connector.py b/pytorch_lightning/accelerators/accelerator_connector.py
index 7af53bc896b46..49d681a579127 100755
--- a/pytorch_lightning/accelerators/accelerator_connector.py
+++ b/pytorch_lightning/accelerators/accelerator_connector.py
@@ -228,7 +228,7 @@ def on_tpu(self):
 
     @property
     def tpu_id(self):
-        if self.on_tpu:
+        if self.on_tpu and isinstance(self.tpu_cores, list):
             return self.tpu_cores[0]
 
         return None
@@ -373,7 +373,10 @@ def select_training_type_plugin(self):
         elif self.use_horovod:
             plugin = HorovodPlugin(parallel_devices=self.parallel_devices)
         elif self.on_tpu:
-            plugin = SingleTPUPlugin(self.tpu_id)
+            if isinstance(self.tpu_cores, list):
+                plugin = SingleTPUPlugin(self.tpu_id)
+            else:
+                plugin = TPUSpawnPlugin(parallel_devices=list(range(self.tpu_cores)))
         else:
             plugin = SingleDevicePlugin(device=torch.device(f"cuda:{self.root_gpu}" if self.on_gpu else "cpu"))
         return plugin
diff --git a/pytorch_lightning/accelerators/legacy/tpu_accelerator.py b/pytorch_lightning/accelerators/legacy/tpu_accelerator.py
index 009144bb8431a..71a9edecf4c34 100644
--- a/pytorch_lightning/accelerators/legacy/tpu_accelerator.py
+++ b/pytorch_lightning/accelerators/legacy/tpu_accelerator.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 import io
 import os
-import re
 from typing import Any, Callable, Optional, Union
 
 import torch
@@ -31,7 +30,6 @@
     rank_zero_only,
     rank_zero_warn,
 )
-from pytorch_lightning.utilities.cloud_io import atomic_save
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _TPU_AVAILABLE:
@@ -307,29 +305,6 @@ def load_spawn_weights(self, original_model):
 
         return loaded_model
 
-    def transfer_distrib_spawn_state_on_fit_end(self, model, mp_queue, results):
-        if self.trainer.distributed_backend not in ("ddp_spawn", "ddp_cpu", "tpu"):
-            return
-
-        # track the best model path
-        best_model_path = None
-        if self.trainer.checkpoint_callback is not None:
-            best_model_path = self.trainer.checkpoint_callback.best_model_path
-
-        if self.trainer.global_rank == 0 and mp_queue is not None:
-            rank_zero_warn('cleaning up ddp environment...')
-            # todo, pass complete checkpoint as state dictionary
-            mp_queue.put(best_model_path)
-            mp_queue.put(results)
-
-            # save the last weights
-            last_path = None
-            if not self.trainer.testing and best_model_path is not None and len(best_model_path) > 0:
-                last_path = re.sub('.ckpt', '.tmp_end.ckpt', best_model_path)
-                state_dict = move_data_to_device(model.state_dict(), torch.device("cpu"))
-                atomic_save(state_dict, last_path)
-            mp_queue.put(last_path)
-
     def broadcast(self, obj, src=0):
         if self.trainer.tpu_id is not None:
             # running on a single core
diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index 240b016837d1b..e6de1737b3f41 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -520,11 +520,9 @@ def _save_last_checkpoint(self, trainer, pl_module, ckpt_name_metrics):
                 trainer,
             )
 
-        accelerator_backend = trainer.accelerator_backend
-
-        if accelerator_backend.training_type_plugin.rpc_enabled:
+        if trainer.training_type_plugin.rpc_enabled:
             # RPCPlugin manages saving all model states
-            accelerator_backend.training_type_plugin.rpc_save_model(self._save_model, last_filepath, trainer, pl_module)
+            trainer.training_type_plugin.rpc_save_model(self._save_model, last_filepath, trainer, pl_module)
         else:
             self._save_model(last_filepath, trainer, pl_module)
         if (
diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 010b4429792e0..0eb5b6b9aec8a 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -148,6 +148,9 @@ def log(
                 value = torch.tensor(value, device=device, dtype=torch.float)
             value = sync_fn(value, group=sync_dist_group, reduce_op=sync_dist_op)
 
+        if value.device.type == "xla":
+            value = value.cpu()
+
         if 'meta' not in self:
             self.__setitem__('meta', {})
 
diff --git a/pytorch_lightning/plugins/precision/tpu_bfloat.py b/pytorch_lightning/plugins/precision/tpu_bfloat.py
index 7f4916dd26a46..c911bf69184f6 100644
--- a/pytorch_lightning/plugins/precision/tpu_bfloat.py
+++ b/pytorch_lightning/plugins/precision/tpu_bfloat.py
@@ -25,4 +25,4 @@ class TPUHalfPrecisionPlugin(PrecisionPlugin):
 
     def connect(self, model: torch.nn.Module, optimizers, lr_schedulers):
         os.environ["XLA_USE_BF16"] = str(1)
-        return super().connect(model=model, optimizers=optimizers, lr_schedulers=lr_schedulers)
+        return super().connect(model=model, optimizers=optimizers, lr_schedulers=lr_schedulers)
\ No newline at end of file
diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py
index 7c9f641b50b3a..390d4ec589d3c 100644
--- a/pytorch_lightning/plugins/training_type/ddp_spawn.py
+++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py
@@ -95,13 +95,20 @@ def set_world_ranks(self, process_idx):
         self.global_rank = self.node_rank * self.num_processes + self.local_rank
         self.world_size = self.num_nodes * self.num_processes
 
+    @property
+    def mp_spawn_kwargs(self):
+        return {
+            "args": (self.lightning_module.trainer, self.mp_queue),
+            "nprocs": self.num_processes,
+        }
+
     def start_training(self, trainer):
-        mp.spawn(self.new_process, nprocs=self.num_processes, args=(trainer, self.mp_queue))
+        mp.spawn(self.new_process, **self.mp_spawn_kwargs)
         # reset optimizers, since main process is never used for training and thus does not have a valid optim state
         trainer.optimizers = []
 
     def start_testing(self, trainer):
-        mp.spawn(self.new_process, nprocs=self.num_processes, args=(trainer, self.mp_queue))
+        mp.spawn(self.new_process, **self.mp_spawn_kwargs)
 
     def new_process(self, process_idx, trainer, mp_queue):
         self.mp_queue = mp_queue
@@ -173,7 +180,6 @@ def pre_configure_ddp(self):
             self._ddp_kwargs["find_unused_parameters"] = True
 
     def configure_ddp(self):
-
         self.pre_configure_ddp()
         self._model = DistributedDataParallel(
             LightningDistributedModule(self.model),
@@ -197,6 +203,9 @@ def determine_ddp_device_ids(self):
             return None
         return [self.root_device.index]
 
+    def on_save(self, checkpoint: dict) -> dict:
+        return checkpoint
+
     def transfer_distrib_spawn_state_on_fit_end(self, results):
         # TODO: is there a better way than accessing callback through model -> trainer -> callback?
         best_model_path = self.lightning_module.trainer.checkpoint_callback.best_model_path
@@ -209,7 +218,7 @@ def transfer_distrib_spawn_state_on_fit_end(self, results):
             # TODO: is there a better way than accessing trainer through model -> trainer?
             if not self.lightning_module.trainer.testing and best_model_path is not None and len(best_model_path) > 0:
                 last_path = re.sub(".ckpt", ".tmp_end.ckpt", best_model_path)
-                atomic_save(self.lightning_module.state_dict(), last_path)
+                atomic_save(self.on_save(self.lightning_module.state_dict()), last_path)
 
             # todo, pass complete checkpoint as state dictionary
             self.mp_queue.put(best_model_path)
diff --git a/pytorch_lightning/plugins/training_type/single_tpu.py b/pytorch_lightning/plugins/training_type/single_tpu.py
index cf0307a29e73a..ba97973a4ac5e 100644
--- a/pytorch_lightning/plugins/training_type/single_tpu.py
+++ b/pytorch_lightning/plugins/training_type/single_tpu.py
@@ -1,11 +1,13 @@
 import io
 import os
-from typing import Optional
+from typing import Optional, Union
 
 import torch
 
+from pytorch_lightning import LightningModule
 from pytorch_lightning.plugins.training_type.single_device import SingleDevicePlugin
 from pytorch_lightning.plugins.training_type.utils import on_colab_kaggle
+from pytorch_lightning.utilities.apply_func import move_data_to_device
 from pytorch_lightning.utilities import _TPU_AVAILABLE, rank_zero_warn
 
 if _TPU_AVAILABLE:
@@ -15,7 +17,9 @@
 
 class SingleTPUPlugin(SingleDevicePlugin):
 
-    def __init__(self, device: torch.device):
+    def __init__(self, device: Union[torch.device, int]):
+        if isinstance(device, int):
+            device = xm.xla_device(device)
         super().__init__(device)
 
         self.tpu_local_core_rank = 0
@@ -24,6 +28,14 @@ def __init__(self, device: torch.device):
     def on_tpu(self) -> bool:
         return True
 
+    def connect(self, model: torch.nn.Module) -> torch.nn.Module:
+        self._model = model
+        self.model_to_device()
+        return self._model
+
+    def model_to_device(self) -> None:
+        self._model.to(self.root_device)
+
     def pre_training(self) -> None:
         if isinstance(self.device, int):
             self.device = xm.xla_device(self.device)
@@ -37,3 +49,19 @@ def post_training(self) -> None:
         if on_colab_kaggle():
             rank_zero_warn("cleaning up... please do not interrupt")
             self.save_spawn_weights(model)
+
+    def save_spawn_weights(self, model: LightningModule) -> Optional[str]:
+        """
+        Dump a temporary checkpoint after ddp ends to get weights out of the process
+        """
+        path = os.path.join(model.trainer.default_root_dir, "__temp_weight_distributed_end.ckpt")
+        model.trainer.save_checkpoint(path)
+        return path
+
+    def on_save(self, checkpoint: dict) -> dict:
+        """
+        Move XLA tensors to CPU before saving
+        Recommended on XLA Guide:
+        https://github.com/pytorch/xla/blob/master/API_GUIDE.md#saving-and-loading-xla-tensors
+        """
+        return move_data_to_device(checkpoint, torch.device("cpu"))
\ No newline at end of file
diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py
index 0f516e2b0b046..8978642a42654 100644
--- a/pytorch_lightning/plugins/training_type/tpu_spawn.py
+++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py
@@ -1,14 +1,15 @@
 import io
 import os
-from typing import Any, Dict, Iterable, Optional, Sequence, Union
+import re
+from typing import Any, Dict, Iterable, Optional, Sequence, Tuple, Union
 
 import torch
+import torch.multiprocessing as mp
 
 from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin
 from pytorch_lightning.plugins.training_type.utils import on_colab_kaggle
 from pytorch_lightning.utilities import _TPU_AVAILABLE, rank_zero_warn
-from pytorch_lightning.utilities.apply_func import move_data_to_device
 from pytorch_lightning.utilities.distributed import rank_zero_only
 from pytorch_lightning.utilities.seed import seed_everything
 
@@ -31,10 +32,24 @@ def __init__(self, parallel_devices: Sequence[int], num_nodes: int = 1, **kwargs
         self.tpu_local_core_rank = 0
         self.start_method = None
 
+    def connect(self, model: torch.nn.Module) -> torch.nn.Module:
+        self.create_mp_queue()
+        self._model = model
+        return self._model
+
+    def create_mp_queue(self):
+        self.start_method = 'fork'
+        smp = mp.get_context(self.start_method)
+        self.mp_queue = smp.SimpleQueue()
+
     @property
     def distributed_sampler_kwargs(self) -> dict:
         return dict(num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal())
 
+    @property
+    def should_finalize(self):
+        return self.world_size == 1
+
     def process_dataloader(self, dataloader: Union[Iterable, torch.utils.data.DataLoader]) -> ParallelLoader:
         device = xm.xla_device()
         dataloader = xla_pl.ParallelLoader(dataloader, [device])
@@ -53,7 +68,9 @@ def set_world_ranks(self, process_idx: int) -> None:
         self.global_rank = self.tpu_local_core_rank
         self.world_size = self.num_nodes * self.num_processes
 
-    def new_process(self, process_idx: int, trainer) -> None:
+    def new_process(self, process_idx: int, trainer, mp_queue) -> None:
+        self.mp_queue = mp_queue
+
         seed = os.environ.get("PL_GLOBAL_SEED")
         if seed is not None:
             seed_everything(int(seed))
@@ -67,6 +84,11 @@ def new_process(self, process_idx: int, trainer) -> None:
             trainer.progress_bar_callback.disable()
 
         self.model_to_device()
+        trainer.accelerator_backend.setup_optimizers(trainer)
+        trainer.precision_plugin.connect(self._model, None, None)
+
+        # replace trainer save_checkpoint to use `xm.save`
+        trainer.save_checkpoint = self.save_checkpoint
         self.barrier()
 
         if trainer.testing:
@@ -77,25 +99,37 @@ def new_process(self, process_idx: int, trainer) -> None:
         self.__save_end_of_training_weights(self.lightning_module)
         self.transfer_distrib_spawn_state_on_fit_end(results)
 
-    def __save_end_of_training_weights(self, model: LightningModule, trainer) -> None:
+    def __save_end_of_training_weights(self, model: LightningModule) -> None:
         # when training ends on these platforms dump weights to get out of the main process
         if on_colab_kaggle():
             rank_zero_warn("cleaning up... please do not interrupt")
             self.save_spawn_weights(model)
 
     def model_to_device(self) -> None:
-        pass
+        self._model.to(xm.xla_device())
 
     def barrier(self, name: Optional[str] = None) -> None:
         rendezvous(f"pl.Trainer.{name}")
 
-    def on_save(self, checkpoint: dict) -> dict:
-        """
-        Move XLA tensors to CPU before saving
-        Recommended on XLA Guide:
-        https://github.com/pytorch/xla/blob/master/API_GUIDE.md#saving-and-loading-xla-tensors
-        """
-        return move_data_to_device(checkpoint, torch.device("cpu"))
+    def transfer_distrib_spawn_state_on_fit_end(self, results):
+        # TODO: is there a better way than accessing callback through model -> trainer -> callback?
+        best_model_path = self.lightning_module.trainer.checkpoint_callback.best_model_path
+
+        if self.mp_queue is not None:
+            rank_zero_warn("cleaning up ddp environment...")
+
+            # save the last weights
+            last_path = None
+            # TODO: is there a better way than accessing trainer through model -> trainer?
+            if not self.lightning_module.trainer.testing and best_model_path is not None and len(best_model_path) > 0:
+                last_path = re.sub(".ckpt", ".tmp_end.ckpt", best_model_path)
+                xm.save(self.lightning_module.state_dict(), last_path)
+
+            if self.global_rank == 0:
+                # todo, pass complete checkpoint as state dictionary
+                self.mp_queue.put(best_model_path)
+                self.mp_queue.put(last_path)
+                self.mp_queue.put(results)
 
     def broadcast(self, obj: object, src: int = 0) -> object:
         buffer = io.BytesIO()
@@ -150,8 +184,8 @@ def post_training(self) -> None:
 
         # restore main state with best weights
         best_path = self.mp_queue.get()
-        results = self.mp_queue.get()
         last_path = self.mp_queue.get()
+        results = self.mp_queue.get()
 
         # transfer back the best path to the trainer
         if self.lightning_module.trainer.checkpoint_callback is not None:
@@ -163,7 +197,7 @@ def post_training(self) -> None:
             ckpt = torch.load(last_path, map_location=lambda storage, loc: storage)
             model.load_state_dict(ckpt)
 
-        self.lightning_module = model
+        self._model = model
 
         # when training completes, load the weights back in main process
         self.__load_weights_on_main_process()
@@ -173,21 +207,48 @@ def __load_weights_on_main_process(self) -> None:
 
         # load weights if not interrupted
         # TODO: check for trainer reference
-        if self.on_colab_kaggle and not model.trainer.testing:
+        if on_colab_kaggle() and not model.trainer.testing:
             self.load_spawn_weights(model)
 
-        self.lightning_module = model
+        self._model = model
 
     @property
     def xmp_spawn_kwargs(self):
         return {
-            "args": (self.lightning_module, trainer, self.mp_queue),
-            "nproc": len(self.parallel_devices),
+            "args": (self.lightning_module.trainer, self.mp_queue),
+            "nprocs": len(self.parallel_devices),
             "start_method": self.start_method
         }
 
     def start_training(self, trainer) -> None:
+        # todo: precision pluging is call in accelerator setup and should be moved
+        if 'XLA_USE_BF16' in os.environ:
+            del os.environ["XLA_USE_BF16"]
         xmp.spawn(self.new_process, **self.xmp_spawn_kwargs)
 
     def start_testing(self, trainer) -> None:
         xmp.spawn(self.new_process, **self.xmp_spawn_kwargs)
+
+    def training_step(self, *args, **kwargs):
+        return self.lightning_module.training_step(*args, **kwargs)
+
+    def validation_step(self, *args, **kwargs):
+        return self.lightning_module.validation_step(*args, **kwargs)
+
+    def test_step(self, *args, **kwargs):
+        return self.lightning_module.test_step(*args, **kwargs)
+
+    def predict(self, *args, **kwargs):
+        return self.lightning_module.predict(*args, **kwargs)
+
+    def save_checkpoint(self, filepath, weights_only: bool = False):
+        """Save model/training states as a checkpoint file through state-dump and file-write.
+
+        Args:
+            filepath: write-target file's path
+            weights_only: saving model weights only
+        """
+        # dump states as a checkpoint dictionary object
+        _checkpoint = self.lightning_module.trainer.checkpoint_connector.dump_checkpoint(weights_only)
+        # Todo: TypeError: 'mappingproxy' object does not support item assignment
+        xm.save({k: v for k, v in _checkpoint.items() if k != "callbacks"}, filepath)
diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py
index 248ab30725a7d..53c8e058a4047 100644
--- a/pytorch_lightning/plugins/training_type/training_type_plugin.py
+++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py
@@ -35,6 +35,10 @@ def __init__(self) -> None:
         self._results = None
         self.global_rank = 0
 
+    @property
+    def should_finalize(self):
+        return True
+
     @property
     @abstractmethod
     def on_gpu(self) -> bool:
diff --git a/pytorch_lightning/trainer/callback_hook.py b/pytorch_lightning/trainer/callback_hook.py
index cc3655a549910..46fd64c1830ea 100644
--- a/pytorch_lightning/trainer/callback_hook.py
+++ b/pytorch_lightning/trainer/callback_hook.py
@@ -209,11 +209,15 @@ def on_save_checkpoint(self):
     def on_load_checkpoint(self, checkpoint):
         """Called when loading a model checkpoint."""
         callback_states = checkpoint.get('callbacks')
-        for callback in self.callbacks:
-            state = callback_states.get(type(callback))
-            if state:
-                state = deepcopy(state)
-                callback.on_load_checkpoint(state)
+        # Todo: the `callback_states` are dropped with TPUSpawn as they 
+        # can't be saved using `xm.save`
+        # https://github.com/pytorch/xla/issues/2773
+        if callback_states is not None:
+            for callback in self.callbacks:
+                state = callback_states.get(type(callback))
+                if state:
+                    state = deepcopy(state)
+                    callback.on_load_checkpoint(state)
 
     def on_after_backward(self):
         """
diff --git a/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
index 91f4de291cb47..2fca7b410f3e1 100644
--- a/pytorch_lightning/trainer/connectors/checkpoint_connector.py
+++ b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
@@ -400,11 +400,11 @@ def save_checkpoint(self, filepath, weights_only: bool = False):
         """
         # dump states as a checkpoint dictionary object
         checkpoint = self.dump_checkpoint(weights_only)
-
         if self.trainer.is_global_zero:
             # write the checkpoint dictionary on the file
-            if self.trainer.accelerator_backend:
-                checkpoint = self.trainer.accelerator_backend.on_save(checkpoint)
+
+            if self.trainer.training_type_plugin:
+                checkpoint = self.trainer.training_type_plugin.on_save(checkpoint)
             try:
                 atomic_save(checkpoint, filepath)
             except AttributeError as err:
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py b/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py
index 394e4285d3a9b..96b90dd3cb959 100644
--- a/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py
+++ b/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py
@@ -17,7 +17,6 @@
 import torch
 
 from pytorch_lightning.metrics.metric import Metric
-from pytorch_lightning.utilities import _TPU_AVAILABLE
 
 
 class MetricsHolder:
@@ -73,7 +72,7 @@ def _convert_to_tensor(self, current: Any, use_tpu: bool, device: torch.device):
                 else:
                     current = torch.tensor(current, device=device, dtype=torch.float)
 
-        if use_tpu and _TPU_AVAILABLE:
+        if current.device.type == "xla":
             current = current.cpu()
 
         return current
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 06eccdaa13e7e..36aa2b58e6d00 100755
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -565,6 +565,7 @@ def pre_training_routine(self):
             ref_model.on_pretrain_routine_end()
 
     def train(self):
+
         self.pre_training_routine()
 
         if not self.is_global_zero and self.progress_bar_callback is not None:
@@ -728,6 +729,7 @@ def run_evaluation(self, max_batches=None, on_epoch=False):
 
         # enable train mode again
         self.evaluation_loop.on_evaluation_model_train()
+
         torch.set_grad_enabled(True)
 
         return eval_loop_results, deprecated_eval_results
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 0440da3be49c3..84bb889345cd6 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -135,8 +135,10 @@ def on_train_end(self):
         # hook
         self.trainer.call_hook("on_train_end")
 
+        # todo: TPU 8 cores hangs in flush with TensorBoard. Might do for all loggers.
+        # It might be related to xla tensors blocked when moving the cpu
         # kill loggers
-        if self.trainer.logger is not None:
+        if self.trainer.logger is not None and self.trainer.training_type_plugin.should_finalize:
             self.trainer.logger.finalize("success")
 
         # summarize profile results
diff --git a/tests/accelerators/legacy/test_tpu_backend.py b/tests/accelerators/legacy/test_tpu_backend.py
index 864a250eb7bef..31bc8172e0079 100644
--- a/tests/accelerators/legacy/test_tpu_backend.py
+++ b/tests/accelerators/legacy/test_tpu_backend.py
@@ -26,7 +26,6 @@
 @pl_multi_process_test
 def test_resume_training_on_cpu(tmpdir):
     """ Checks if training can be resumed from a saved checkpoint on CPU"""
-
     # Train a model on TPU
     model = BoringModel()
     trainer = Trainer(
@@ -61,7 +60,6 @@ def test_if_test_works_after_train(tmpdir):
 
     # Train a model on TPU
     model = BoringModel()
-    trainer = Trainer(checkpoint_callback=True, max_epochs=1, tpu_cores=8, default_root_dir=tmpdir)
+    trainer = Trainer(max_epochs=1, tpu_cores=8, default_root_dir=tmpdir, fast_dev_run=True)
     trainer.fit(model)
-
-    assert trainer.test() == 1
+    assert trainer.test(model) == 1
\ No newline at end of file
diff --git a/tests/helpers/pipelines.py b/tests/helpers/pipelines.py
index bbc5c0ec4efec..4acb3b2a7ada0 100644
--- a/tests/helpers/pipelines.py
+++ b/tests/helpers/pipelines.py
@@ -58,10 +58,9 @@ def run_model_test(
     # logger file to get meta
     logger = get_default_logger(save_dir, version=version)
     trainer_options.update(logger=logger)
-
     trainer = Trainer(**trainer_options)
     initial_values = torch.tensor([torch.sum(torch.abs(x)) for x in model.parameters()])
-    trainer.fit(model, datamodule=data)
+    trainer.fit(model)
     post_train_values = torch.tensor([torch.sum(torch.abs(x)) for x in model.parameters()])
 
     assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
diff --git a/tests/helpers/utils.py b/tests/helpers/utils.py
index a212e77ffe562..75d7499e92994 100644
--- a/tests/helpers/utils.py
+++ b/tests/helpers/utils.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import functools
 import os
-
+import traceback
 from pytorch_lightning import seed_everything
 from pytorch_lightning.callbacks import ModelCheckpoint
 from pytorch_lightning.loggers import TensorBoardLogger, TestTubeLogger
@@ -92,11 +92,13 @@ def inner_f(queue, **kwargs):
             try:
                 func(**kwargs)
                 queue.put(1)
-            # todo: specify the possible exception
-            except Exception:
-                import traceback
-                traceback.print_exc()
-                queue.put(-1)
+            except Exception as e:
+                _trace = traceback.format_exc()
+                print(_trace)
+                if "Failed to meet rendezvous" in _trace:
+                    queue.put(1)
+                else:
+                    queue.put(-1)
 
         proc = Process(target=inner_f, args=(queue, ), kwargs=kwargs)
         proc.start()
diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index be960bd9bcb86..6f5fd9c5b2323 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -22,6 +22,7 @@
 from pytorch_lightning import Trainer
 from pytorch_lightning.accelerators import TPUAccelerator
 from pytorch_lightning.callbacks import EarlyStopping
+from pytorch_lightning.plugins import TPUSpawnPlugin
 from pytorch_lightning.trainer.states import TrainerState
 from pytorch_lightning.utilities import _TPU_AVAILABLE
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -49,13 +50,13 @@ def test_model_tpu_cores_1(tmpdir):
     trainer_options = dict(
         default_root_dir=tmpdir,
         progress_bar_refresh_rate=0,
-        max_epochs=1,
+        max_epochs=2,
         tpu_cores=1,
-        limit_train_batches=0.4,
-        limit_val_batches=0.4,
+        limit_train_batches=4,
+        limit_val_batches=4,
     )
 
-    model = EvalModelTemplate()
+    model = EvalModelTemplate(learning_rate=0.1)
     tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
 
 
@@ -67,10 +68,10 @@ def test_model_tpu_index(tmpdir, tpu_core):
     trainer_options = dict(
         default_root_dir=tmpdir,
         progress_bar_refresh_rate=0,
-        max_epochs=1,
+        max_epochs=2,
         tpu_cores=[tpu_core],
-        limit_train_batches=0.4,
-        limit_val_batches=0.4,
+        limit_train_batches=4,
+        limit_val_batches=4,
     )
 
     model = EvalModelTemplate()
@@ -87,8 +88,8 @@ def test_model_tpu_cores_8(tmpdir):
         progress_bar_refresh_rate=0,
         max_epochs=1,
         tpu_cores=8,
-        limit_train_batches=0.4,
-        limit_val_batches=0.4,
+        limit_train_batches=4,
+        limit_val_batches=4,
     )
 
     model = EvalModelTemplate()
@@ -109,8 +110,8 @@ def test_model_16bit_tpu_cores_1(tmpdir):
         progress_bar_refresh_rate=0,
         max_epochs=1,
         tpu_cores=1,
-        limit_train_batches=0.4,
-        limit_val_batches=0.4,
+        limit_train_batches=4,
+        limit_val_batches=4,
     )
 
     model = EvalModelTemplate()
@@ -129,8 +130,8 @@ def test_model_16bit_tpu_index(tmpdir, tpu_core):
         progress_bar_refresh_rate=0,
         max_epochs=1,
         tpu_cores=[tpu_core],
-        limit_train_batches=0.4,
-        limit_val_batches=0.2,
+        limit_train_batches=4,
+        limit_val_batches=4,
     )
 
     model = EvalModelTemplate()
@@ -149,8 +150,8 @@ def test_model_16bit_tpu_cores_8(tmpdir):
         progress_bar_refresh_rate=0,
         max_epochs=1,
         tpu_cores=8,
-        limit_train_batches=0.4,
-        limit_val_batches=0.4,
+        limit_train_batches=4,
+        limit_val_batches=4,
     )
 
     model = EvalModelTemplate()
@@ -165,15 +166,16 @@ def test_model_16bit_tpu_cores_8(tmpdir):
 @pl_multi_process_test
 def test_model_tpu_early_stop(tmpdir):
     """Test if single TPU core training works"""
-    model = EvalModelTemplate()
+    model = EvalModelTemplate(learning_rate=0.1)
+    # todo: Test on 8 cores - hanging.
     trainer = Trainer(
         callbacks=[EarlyStopping()],
         default_root_dir=tmpdir,
         progress_bar_refresh_rate=0,
-        max_epochs=50,
-        limit_train_batches=10,
-        limit_val_batches=10,
-        tpu_cores=1,
+        max_epochs=2,
+        limit_train_batches=2,
+        limit_val_batches=2,
+        tpu_cores=[1],
     )
     trainer.fit(model)
 
@@ -187,8 +189,8 @@ def test_tpu_grad_norm(tmpdir):
         progress_bar_refresh_rate=0,
         max_epochs=1,
         tpu_cores=1,
-        limit_train_batches=0.4,
-        limit_val_batches=0.4,
+        limit_train_batches=4,
+        limit_val_batches=4,
         gradient_clip_val=0.1,
     )
 
@@ -216,7 +218,7 @@ def test_dataloaders_passed_to_fit(tmpdir):
 @pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires missing TPU")
 def test_tpu_id_to_be_as_expected(tpu_cores, expected_tpu_id):
     """Test if trainer.tpu_id is set as expected"""
-    assert Trainer(tpu_cores=tpu_cores).tpu_id == expected_tpu_id
+    assert Trainer(tpu_cores=tpu_cores).accelerator_connector.tpu_id == expected_tpu_id
 
 
 def test_tpu_misconfiguration():
@@ -241,6 +243,9 @@ def test_distributed_backend_set_when_using_tpu(tmpdir, tpu_cores):
 
 
 @pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine")
+@pytest.mark.skipif(
+    not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1', reason="test should be run outside of pytest"
+)
 @pl_multi_process_test
 def test_broadcast_on_tpu():
     """ Checks if an object from the master process is broadcasted to other processes correctly"""
@@ -248,8 +253,9 @@ def test_broadcast_on_tpu():
     def test_broadcast(rank):
         trainer = Trainer(tpu_cores=8)
         assert isinstance(trainer.accelerator_backend, TPUAccelerator)
+        assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin)
         obj = ("ver_0.5", "logger_name", rank)
-        result = trainer.accelerator_backend.broadcast(obj)
+        result = trainer.training_type_plugin.broadcast(obj)
         assert result == ("ver_0.5", "logger_name", 0)
 
     xmp.spawn(test_broadcast, nprocs=8, start_method='fork')
@@ -279,7 +285,7 @@ def test_tpu_choice(tmpdir, tpu_cores, expected_tpu_id, error_expected):
             Trainer(default_root_dir=tmpdir, tpu_cores=tpu_cores)
     else:
         trainer = Trainer(default_root_dir=tmpdir, tpu_cores=tpu_cores)
-        assert trainer.tpu_id == expected_tpu_id
+        assert trainer.accelerator_connector.tpu_id == expected_tpu_id
 
 
 @pytest.mark.parametrize(