From f26c9ab3f40f79f274bdec3c599bb45b953dadd1 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Wed, 26 May 2021 15:18:27 +0200
Subject: [PATCH 01/26] PoC

---
 pytorch_lightning/callbacks/early_stopping.py |  2 +-
 .../callbacks/model_checkpoint.py             | 44 ++++++++++++++++---
 pytorch_lightning/trainer/evaluation_loop.py  |  3 --
 pytorch_lightning/trainer/properties.py       |  5 ---
 pytorch_lightning/trainer/trainer.py          |  2 +-
 pytorch_lightning/trainer/training_loop.py    | 29 +-----------
 6 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 242eeed808f34..f0c1a3a95819e 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -97,7 +97,7 @@ def __init__(
         check_finite: bool = True,
         stopping_threshold: Optional[float] = None,
         divergence_threshold: Optional[float] = None,
-        check_on_train_epoch_end: bool = False,
+        check_on_train_epoch_end: bool = True,
     ):
         super().__init__()
         self.monitor = monitor
diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index 7642ad95d08bf..1bf8046dcee5b 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -118,6 +118,7 @@ class ModelCheckpoint(Callback):
             will only save checkpoints at epochs 0 < E <= N
             where both values for ``every_n_val_epochs`` and ``check_val_every_n_epoch`` evenly divide E.
         period: Interval (number of epochs) between checkpoints.
+        save_on_train_epoch_end: TODO
 
             .. warning::
                This argument has been deprecated in v1.3 and will be removed in v1.5.
@@ -202,6 +203,7 @@ def __init__(
         train_time_interval: Optional[timedelta] = None,
         every_n_val_epochs: Optional[int] = None,
         period: Optional[int] = None,
+        save_on_train_epoch_end: bool = True,
     ):
         super().__init__()
         self.monitor = monitor
@@ -210,6 +212,7 @@ def __init__(
         self.save_top_k = save_top_k
         self.save_weights_only = save_weights_only
         self.auto_insert_metric_name = auto_insert_metric_name
+        self._save_on_train_epoch_end = save_on_train_epoch_end
         self._last_global_step_saved = -1
         self._last_time_checked: Optional[float] = None
         self.current_score = None
@@ -267,16 +270,47 @@ def on_train_batch_end(
 
         self.save_checkpoint(trainer)
 
+    def on_train_epoch_end(
+        self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule', unused: Optional = None
+    ) -> None:
+        """ Save a checkpoint at the end of the training epoch. """
+        if (
+            self._should_skip_saving_checkpoint(trainer) or self._save_on_train_epoch_end
+            # TODO: should every_n_val_epochs be repurposed to work for this too?
+        ):
+            return
+        # as we advance one step at end of training, we use `global_step - 1` to avoid saving duplicates
+        trainer.train_loop.global_step -= 1
+        self.save_checkpoint(trainer)
+        trainer.train_loop.global_step += 1
+
     def on_validation_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -> None:
         """ Save a checkpoint at the end of the validation stage. """
-        skip = (
-            self._should_skip_saving_checkpoint(trainer) or self._every_n_val_epochs < 1
-            or (trainer.current_epoch + 1) % self._every_n_val_epochs != 0
-        )
-        if skip:
+        if (
+            self._should_skip_saving_checkpoint(trainer) or self._save_on_train_epoch_end
+            or self._every_n_val_epochs < 1 or (trainer.current_epoch + 1) % self._every_n_val_epochs != 0
+        ):
             return
         self.save_checkpoint(trainer)
 
+    def on_train_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -> None:
+        """
+        Save a checkpoint at the very end of training.
+
+        This will only save a checkpoint if `save_last` is also enabled
+        as the monitor metrics produced by training or validation steps or end of epochs
+        is not guaranteed to be available at this stage.
+        """
+        if self._should_skip_saving_checkpoint(trainer) or not trainer.checkpoint_connector.has_trained:
+            return
+        if self.save_last and self.verbose:
+            rank_zero_info("Saving last checkpoint...")
+        # as we advance one step at end of training, we use `global_step - 1` to avoid saving duplicates
+        trainer.train_loop.global_step -= 1
+        monitor_candidates = self._monitor_candidates(trainer)
+        self._save_last_checkpoint(trainer, monitor_candidates)
+        trainer.train_loop.global_step += 1
+
     def on_save_checkpoint(
         self,
         trainer: 'pl.Trainer',
diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py
index f048297892533..810efef3fa52b 100644
--- a/pytorch_lightning/trainer/evaluation_loop.py
+++ b/pytorch_lightning/trainer/evaluation_loop.py
@@ -72,9 +72,6 @@ def get_evaluation_dataloaders(self) -> Tuple[Optional[List[DataLoader]], List[U
             dataloaders = self.trainer.val_dataloaders
         return dataloaders, max_batches
 
-    def should_skip_evaluation(self, max_batches: List[Union[int, float]]) -> bool:
-        return sum(max_batches) == 0
-
     def on_evaluation_start(self, *args: Any, **kwargs: Any) -> None:
         self.should_track_batch_outputs_for_epoch_end: bool = self._should_track_batch_outputs_for_epoch_end()
         if self.trainer.testing:
diff --git a/pytorch_lightning/trainer/properties.py b/pytorch_lightning/trainer/properties.py
index e469d1bc12394..440a6693aba43 100644
--- a/pytorch_lightning/trainer/properties.py
+++ b/pytorch_lightning/trainer/properties.py
@@ -258,11 +258,6 @@ def progress_bar_dict(self) -> dict:
         all_metrics.update(**logged_metrics)
         return all_metrics
 
-    @property
-    def disable_validation(self) -> bool:
-        """ Check if validation is disabled during training. """
-        return not self.enable_validation
-
     @property
     def enable_validation(self) -> bool:
         """ Check if we should run validation during training. """
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index b01f4fa36bd33..b24d6d7b2da48 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -940,7 +940,7 @@ def _run_evaluation(self) -> _EVALUATE_OUTPUT:
         dataloaders, max_batches = self.evaluation_loop.get_evaluation_dataloaders()
 
         # check if we want to skip this evaluation
-        if self.evaluation_loop.should_skip_evaluation(max_batches):
+        if sum(max_batches) == 0:
             return [], []
 
         # enable eval mode + no grads
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 09a32c3c96aad..ea33241b7a4af 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -27,7 +27,6 @@
 from pytorch_lightning.plugins import ParallelPlugin
 from pytorch_lightning.trainer.supporters import TensorRunningAccum
 from pytorch_lightning.utilities import _TPU_AVAILABLE, AMPType, DeviceType
-from pytorch_lightning.utilities.distributed import rank_zero_info
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.finite_checks import detect_nan_parameters
 from pytorch_lightning.utilities.grads import grad_norm
@@ -107,12 +106,6 @@ def on_train_end(self):
             return
         self._teardown_already_run = True
 
-        # trigger checkpoint check. need to temporarily decrease the global step to avoid saving duplicates
-        # when a checkpoint was saved at the last step
-        self.global_step -= 1
-        self.check_checkpoint_callback(should_update=True, is_last=True)
-        self.global_step += 1
-
         # hook
         self.trainer.call_hook("on_train_end")
 
@@ -131,19 +124,6 @@ def on_train_end(self):
         # reset bookkeeping
         self.trainer.state.stage = None
 
-    def check_checkpoint_callback(self, should_update, is_last=False):
-        # TODO bake this logic into the ModelCheckpoint callback
-        if should_update and self.trainer.checkpoint_connector.has_trained:
-            callbacks = self.trainer.checkpoint_callbacks
-
-            if is_last and any(cb.save_last and cb.verbose for cb in callbacks):
-                rank_zero_info("Saving latest checkpoint...")
-
-            model = self.trainer.lightning_module
-
-            for cb in callbacks:
-                cb.on_validation_end(self.trainer, model)
-
     def on_train_epoch_start(self, epoch):
 
         # update training progress in trainer
@@ -540,6 +520,7 @@ def run_training_epoch(self):
             return
 
         # handle epoch_output on epoch end
+        # TODO: this can log so ModelCheckpoint won't have access to them since the logger conector is updated after.
         self.on_train_epoch_end(epoch_output)
 
         # the global step is manually decreased here due to backwards compatibility with existing loggers
@@ -553,14 +534,6 @@ def run_training_epoch(self):
 
         self.update_lr_schedulers('epoch')
 
-        did_train_only = self.trainer.disable_validation or self.trainer.evaluation_loop.should_skip_evaluation(
-            self.trainer.num_val_batches
-        )
-        if did_train_only:
-            self.global_step -= 1
-            self.check_checkpoint_callback(True)
-            self.global_step += 1
-
     def on_train_epoch_end(self, epoch_output: List[List[List[Result]]]) -> None:
         # inform logger the batch loop has finished
         self.trainer.logger_connector.on_train_epoch_end()

From e2acb7871ec2808eec01edb1215079bfec457eb0 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Sun, 4 Jul 2021 14:28:29 +0200
Subject: [PATCH 02/26] Update code to new loops

---
 pytorch_lightning/loops/fit_loop.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/pytorch_lightning/loops/fit_loop.py b/pytorch_lightning/loops/fit_loop.py
index a7699eaec812c..c7207f2cf833f 100644
--- a/pytorch_lightning/loops/fit_loop.py
+++ b/pytorch_lightning/loops/fit_loop.py
@@ -21,7 +21,6 @@
 from pytorch_lightning.loops.epoch import TrainingEpochLoop
 from pytorch_lightning.trainer.connectors.logger_connector.result import ResultCollection
 from pytorch_lightning.trainer.supporters import TensorRunningAccum
-from pytorch_lightning.utilities import rank_zero_info
 
 log = logging.getLogger(__name__)
 
@@ -229,12 +228,6 @@ def on_advance_end(self) -> None:
 
         self.epoch_loop.update_lr_schedulers('epoch', update_plateau_schedulers=True)
 
-        did_train_only = self.trainer.disable_validation or self.epoch_loop.val_loop.skip
-        if did_train_only:
-            self.global_step -= 1
-            self._check_checkpoint_callback(True)
-            self.global_step += 1
-
     def on_run_end(self) -> None:
         """Calls the ``on_train_end`` hook"""
         # NOTE: the iteration_count/current_epoch is already incremented
@@ -243,13 +236,6 @@ def on_run_end(self) -> None:
         # TODO: must be fixed by https://github.com/PyTorchLightning/pytorch-lightning/issues/5007
         self.current_epoch -= 1
 
-        # trigger checkpoint check. need to temporarily decrease the global step to avoid saving duplicates
-        # when a checkpoint was saved at the last step
-        self.epoch_loop.global_step -= 1
-        # TODO: see discussion/rework https://github.com/PyTorchLightning/pytorch-lightning/issues/7406
-        self._check_checkpoint_callback(should_update=True, is_last=True)
-        self.epoch_loop.global_step += 1
-
         # hook
         self.trainer.call_hook("on_train_end")
 
@@ -269,20 +255,6 @@ def should_accumulate(self) -> bool:
         """Whether the gradients should be accumulated"""
         return self.epoch_loop.batch_loop.should_accumulate()
 
-    def _check_checkpoint_callback(self, should_update: bool, is_last: bool = False):
-        """Checks if checkpointing needs to be done"""
-        # TODO: bake this logic into the ModelCheckpoint callback
-        if should_update and self.trainer.checkpoint_connector.has_trained:
-            callbacks = self.trainer.checkpoint_callbacks
-
-            if is_last and any(cb.save_last and cb.verbose for cb in callbacks):
-                rank_zero_info("Saving latest checkpoint...")
-
-            model = self.trainer.lightning_module
-
-            for cb in callbacks:
-                cb.on_validation_end(self.trainer, model)
-
     def state_dict(self) -> Dict:
         return {"epoch_loop": self.epoch_loop.state_dict()}
 

From 7b348dbd1d13cc40dcc4234e0fe25dafe6c92f00 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Sun, 4 Jul 2021 14:34:19 +0200
Subject: [PATCH 03/26] Pass through function

---
 pytorch_lightning/callbacks/model_checkpoint.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index 5152aa9924a7c..87dbcfe59b6c9 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -307,10 +307,8 @@ def on_train_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -
         if self.save_last and self.verbose:
             rank_zero_info("Saving last checkpoint...")
         # as we advance one step at end of training, we use `global_step - 1` to avoid saving duplicates
-        trainer.train_loop.global_step -= 1
-        monitor_candidates = self._monitor_candidates(trainer)
+        monitor_candidates = self._monitor_candidates(trainer, trainer.current_epoch, trainer.global_step - 1)
         self._save_last_checkpoint(trainer, monitor_candidates)
-        trainer.train_loop.global_step += 1
 
     def on_save_checkpoint(
         self,

From db2a6e54c10a57d96f17bb304278e91dd2b57850 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Sun, 4 Jul 2021 17:13:54 +0200
Subject: [PATCH 04/26] Update after loop refactor

---
 .azure-pipelines/gpu-tests.yml                |   17 +-
 .azure-pipelines/ipu-tests.yml                |   16 +-
 .circleci/config.yml                          |    8 +-
 .deepsource.toml                              |   26 +
 .github/BECOMING_A_CORE_CONTRIBUTOR.md        |   16 +-
 .github/CODEOWNERS                            |    5 +
 .github/CONTRIBUTING.md                       |   30 +-
 .github/ISSUE_TEMPLATE/bug_report.md          |    9 +-
 .github/workflows/ci_test-conda.yml           |    2 +-
 .github/workflows/ci_test-full.yml            |    2 +-
 .github/workflows/code-formatting.yml         |   20 +
 .github/workflows/docs-checks.yml             |   39 +-
 .github/workflows/release-pypi.yml            |    2 +-
 .gitignore                                    |    9 +-
 .gitmodules                                   |    4 +
 .pre-commit-config.yaml                       |   38 +-
 .readthedocs.yml                              |    4 +
 CHANGELOG.md                                  |  302 +-
 MANIFEST.in                                   |    6 +-
 Makefile                                      |    1 +
 README.md                                     |   15 +-
 _notebooks                                    |    1 +
 benchmarks/test_basic_parity.py               |    2 +-
 dockers/base-cuda/Dockerfile                  |    3 +-
 dockers/nvidia/Dockerfile                     |    8 +-
 dockers/tpu-tests/tpu_test_cases.jsonnet      |    1 +
 .../images/accelerator/ipus/profiler.png      |  Bin 0 -> 129635 bytes
 docs/source/_templates/layout.html            |   10 +
 docs/source/_templates/theme_variables.jinja  |    2 +
 docs/source/advanced/advanced_gpu.rst         |  100 +-
 docs/source/advanced/amp.rst                  |   94 -
 docs/source/advanced/ipu.rst                  |  234 +
 docs/source/advanced/multi_gpu.rst            |   48 +-
 docs/source/advanced/multiple_loaders.rst     |   17 -
 docs/source/api_references.rst                |   11 +-
 docs/source/benchmarking/performance.rst      |  183 -
 docs/source/clouds/cloud_training.rst         |   33 +-
 docs/source/common/fast_training.rst          |   82 -
 docs/source/common/lightning_cli.rst          |  181 +-
 docs/source/common/lightning_module.rst       |  218 +-
 docs/source/common/loggers.rst                |   13 +-
 docs/source/common/optimizers.rst             |   82 -
 docs/source/common/test_set.rst               |    4 +-
 docs/source/common/trainer.rst                |   85 +-
 docs/source/conf.py                           |   36 +-
 docs/source/ecosystem/asr_nlp_tts.rst         |    4 +-
 docs/source/extensions/datamodules.rst        |    2 +-
 docs/source/extensions/logging.rst            |    4 +
 docs/source/extensions/plugins.rst            |    2 -
 docs/source/guides/speed.rst                  |  482 ++
 docs/source/index.rst                         |   14 +-
 docs/source/starter/new-project.rst           |    2 +-
 notebooks/01-mnist-hello-world.ipynb          |  448 -
 notebooks/02-datamodules.ipynb                |  588 --
 notebooks/03-basic-gan.ipynb                  |  472 --
 .../04-transformers-text-classification.ipynb |  599 --
 notebooks/05-trainer-flags-overview.ipynb     | 2926 -------
 notebooks/06-mnist-tpu-training.ipynb         |  368 -
 notebooks/07-cifar10-baseline.ipynb           |  394 -
 notebooks/08-Domain-specific-demos.ipynb      | 7415 -----------------
 notebooks/README.md                           |   15 -
 pl_examples/basic_examples/autoencoder.py     |   13 +-
 .../backbone_image_classifier.py              |   11 +-
 .../basic_examples/conv_sequential_example.py |  226 -
 .../basic_examples/profiler_example.py        |    4 +
 .../basic_examples/simple_image_classifier.py |    2 +-
 pl_examples/bug_report_model.py               |    4 +-
 .../domain_templates/reinforce_learn_Qnet.py  |    4 +-
 .../domain_templates/reinforce_learn_ppo.py   |    6 +-
 pl_examples/ipu_examples/__init__.py          |    0
 pl_examples/ipu_examples/mnist.py             |   89 +
 pyproject.toml                                |   25 +
 pytorch_lightning/accelerators/__init__.py    |    1 +
 pytorch_lightning/accelerators/accelerator.py |   76 +-
 pytorch_lightning/accelerators/gpu.py         |    5 +-
 pytorch_lightning/accelerators/ipu.py         |   35 +
 pytorch_lightning/callbacks/early_stopping.py |   18 +-
 pytorch_lightning/callbacks/finetuning.py     |   26 +-
 pytorch_lightning/callbacks/lr_monitor.py     |  111 +-
 .../callbacks/model_checkpoint.py             |   62 +-
 .../callbacks/prediction_writer.py            |    2 +-
 pytorch_lightning/callbacks/progress.py       |    2 +-
 pytorch_lightning/callbacks/pruning.py        |   25 +-
 .../callbacks/stochastic_weight_avg.py        |   13 +-
 pytorch_lightning/callbacks/timer.py          |    3 +-
 pytorch_lightning/core/datamodule.py          |   65 +-
 pytorch_lightning/core/grads.py               |    2 +-
 pytorch_lightning/core/hooks.py               |   27 +-
 pytorch_lightning/core/lightning.py           |  320 +-
 pytorch_lightning/core/memory.py              |   83 +-
 pytorch_lightning/core/optimizer.py           |    2 +-
 pytorch_lightning/core/saving.py              |   12 +-
 pytorch_lightning/core/step_result.py         |  613 --
 pytorch_lightning/loggers/base.py             |   23 +-
 pytorch_lightning/loggers/comet.py            |    4 +-
 pytorch_lightning/loggers/csv_logs.py         |    3 +-
 pytorch_lightning/loggers/neptune.py          |    6 +-
 pytorch_lightning/loggers/tensorboard.py      |   15 +-
 pytorch_lightning/loggers/test_tube.py        |    8 +-
 pytorch_lightning/loggers/wandb.py            |   93 +-
 pytorch_lightning/loops/__init__.py           |   19 +
 pytorch_lightning/loops/base.py               |  158 +
 pytorch_lightning/loops/batch/__init__.py     |   15 +
 .../loops/batch/training_batch_loop.py        |  677 ++
 .../loops/dataloader/__init__.py              |   17 +
 .../loops/dataloader/dataloader_loop.py       |   53 +
 .../loops/dataloader/evaluation_loop.py       |  269 +
 .../loops/dataloader/prediction_loop.py       |  151 +
 pytorch_lightning/loops/epoch/__init__.py     |   17 +
 .../loops/epoch/evaluation_epoch_loop.py      |  255 +
 .../loops/epoch/prediction_epoch_loop.py      |  151 +
 .../loops/epoch/training_epoch_loop.py        |  426 +
 pytorch_lightning/loops/fit_loop.py           |  265 +
 pytorch_lightning/metrics/__init__.py         |    6 -
 .../metrics/classification/accuracy.py        |    4 +-
 .../metrics/classification/auc.py             |    4 +-
 .../metrics/classification/auroc.py           |    4 +-
 .../classification/average_precision.py       |    4 +-
 .../classification/confusion_matrix.py        |    4 +-
 .../metrics/classification/f_beta.py          |    8 +-
 .../classification/hamming_distance.py        |    4 +-
 .../metrics/classification/iou.py             |    8 +-
 .../classification/precision_recall.py        |   10 +-
 .../classification/precision_recall_curve.py  |    4 +-
 .../metrics/classification/roc.py             |    4 +-
 .../metrics/classification/stat_scores.py     |    8 +-
 pytorch_lightning/metrics/compositional.py    |    3 +-
 .../metrics/functional/accuracy.py            |    3 +-
 pytorch_lightning/metrics/functional/auc.py   |    3 +-
 pytorch_lightning/metrics/functional/auroc.py |    3 +-
 .../metrics/functional/average_precision.py   |    3 +-
 .../metrics/functional/confusion_matrix.py    |    3 +-
 .../metrics/functional/explained_variance.py  |    3 +-
 .../metrics/functional/f_beta.py              |    8 +-
 .../metrics/functional/hamming_distance.py    |    3 +-
 .../metrics/functional/image_gradients.py     |    3 +-
 pytorch_lightning/metrics/functional/iou.py   |    3 +-
 .../metrics/functional/mean_absolute_error.py |    3 +-
 .../metrics/functional/mean_relative_error.py |    3 +-
 .../metrics/functional/mean_squared_error.py  |    3 +-
 .../functional/mean_squared_log_error.py      |    3 +-
 pytorch_lightning/metrics/functional/nlp.py   |    3 +-
 .../metrics/functional/precision_recall.py    |   11 +-
 .../functional/precision_recall_curve.py      |    3 +-
 pytorch_lightning/metrics/functional/psnr.py  |    3 +-
 .../metrics/functional/r2score.py             |    3 +-
 pytorch_lightning/metrics/functional/roc.py   |    3 +-
 .../metrics/functional/self_supervised.py     |    3 +-
 pytorch_lightning/metrics/functional/ssim.py  |    3 +-
 .../metrics/functional/stat_scores.py         |    5 +-
 pytorch_lightning/metrics/metric.py           |    4 +-
 .../metrics/regression/explained_variance.py  |    4 +-
 .../metrics/regression/mean_absolute_error.py |    4 +-
 .../metrics/regression/mean_squared_error.py  |    4 +-
 .../regression/mean_squared_log_error.py      |    4 +-
 pytorch_lightning/metrics/regression/psnr.py  |    4 +-
 .../metrics/regression/r2score.py             |    4 +-
 pytorch_lightning/metrics/regression/ssim.py  |    4 +-
 pytorch_lightning/metrics/utils.py            |   16 +-
 pytorch_lightning/overrides/base.py           |   58 +-
 pytorch_lightning/overrides/data_parallel.py  |    4 +-
 pytorch_lightning/overrides/distributed.py    |   10 +-
 pytorch_lightning/overrides/fairscale.py      |    4 +-
 pytorch_lightning/plugins/__init__.py         |    8 +-
 .../plugins/precision/apex_amp.py             |   18 +-
 pytorch_lightning/plugins/precision/double.py |  100 +-
 .../plugins/precision/ipu_precision.py        |   60 +
 .../plugins/precision/native_amp.py           |   23 +-
 .../plugins/precision/precision_plugin.py     |    3 +-
 .../plugins/training_type/__init__.py         |    2 -
 .../plugins/training_type/ddp.py              |  153 +-
 .../plugins/training_type/ddp2.py             |   20 +-
 .../plugins/training_type/ddp_spawn.py        |   59 +-
 .../plugins/training_type/deepspeed.py        |  320 +-
 pytorch_lightning/plugins/training_type/dp.py |   37 +-
 .../plugins/training_type/horovod.py          |    5 +-
 .../plugins/training_type/ipu.py              |  393 +
 .../plugins/training_type/parallel.py         |   15 +-
 .../plugins/training_type/rpc.py              |   85 -
 .../plugins/training_type/rpc_sequential.py   |  408 -
 .../plugins/training_type/sharded.py          |    7 +-
 .../plugins/training_type/sharded_spawn.py    |    4 +-
 .../plugins/training_type/single_device.py    |    3 +-
 .../plugins/training_type/single_tpu.py       |   29 +-
 .../plugins/training_type/tpu_spawn.py        |   12 +-
 .../training_type/training_type_plugin.py     |  105 +-
 pytorch_lightning/profiler/__init__.py        |   12 +-
 pytorch_lightning/profiler/advanced.py        |   92 +
 pytorch_lightning/profiler/base.py            |  219 +
 pytorch_lightning/profiler/profilers.py       |  409 +-
 pytorch_lightning/profiler/pytorch.py         |   33 +-
 pytorch_lightning/profiler/simple.py          |  123 +
 pytorch_lightning/profiler/xla.py             |  110 +
 pytorch_lightning/trainer/callback_hook.py    |   20 +-
 .../trainer/configuration_validator.py        |   25 +-
 .../connectors/accelerator_connector.py       |   64 +-
 .../trainer/connectors/callback_connector.py  |   12 +-
 .../connectors/checkpoint_connector.py        |  302 +-
 .../trainer/connectors/data_connector.py      |   75 +-
 .../trainer/connectors/debugging_connector.py |   13 +-
 .../trainer/connectors/env_vars_connector.py  |    2 +-
 .../logger_connector/epoch_result_store.py    |  493 --
 .../logger_connector/fx_validator.py          |   41 +-
 .../logger_connector/logger_connector.py      |  499 +-
 .../logger_connector/metrics_holder.py        |   82 -
 .../connectors/logger_connector/result.py     |  700 ++
 .../trainer/connectors/optimizer_connector.py |   15 +-
 .../connectors/training_trick_connector.py    |    3 +-
 pytorch_lightning/trainer/data_loading.py     |   38 +-
 pytorch_lightning/trainer/deprecated_api.py   |   10 +-
 pytorch_lightning/trainer/evaluation_loop.py  |  252 -
 pytorch_lightning/trainer/logging.py          |    2 +-
 pytorch_lightning/trainer/model_hooks.py      |    8 +-
 pytorch_lightning/trainer/optimizers.py       |    4 +-
 pytorch_lightning/trainer/predict_loop.py     |  164 -
 pytorch_lightning/trainer/progress.py         |  164 +-
 pytorch_lightning/trainer/properties.py       |  355 +-
 pytorch_lightning/trainer/supporters.py       |   13 +-
 pytorch_lightning/trainer/trainer.py          |  539 +-
 pytorch_lightning/trainer/training_loop.py    |  944 ---
 pytorch_lightning/trainer/training_tricks.py  |    4 +-
 pytorch_lightning/tuner/batch_size_scaling.py |   16 +-
 pytorch_lightning/tuner/lr_finder.py          |   20 +-
 pytorch_lightning/tuner/tuning.py             |   37 +-
 pytorch_lightning/utilities/__init__.py       |   13 +-
 pytorch_lightning/utilities/apply_func.py     |  129 +-
 pytorch_lightning/utilities/argparse.py       |   20 +-
 pytorch_lightning/utilities/cli.py            |  253 +-
 pytorch_lightning/utilities/cloud_io.py       |    5 +-
 pytorch_lightning/utilities/data.py           |   10 +-
 pytorch_lightning/utilities/debugging.py      |   13 +-
 pytorch_lightning/utilities/device_parser.py  |   50 +-
 pytorch_lightning/utilities/distributed.py    |   56 +-
 pytorch_lightning/utilities/enums.py          |    2 +-
 pytorch_lightning/utilities/exceptions.py     |   10 +-
 pytorch_lightning/utilities/finite_checks.py  |    8 +-
 pytorch_lightning/utilities/imports.py        |    9 +-
 pytorch_lightning/utilities/memory.py         |   15 +-
 pytorch_lightning/utilities/metrics.py        |   35 +-
 pytorch_lightning/utilities/model_helpers.py  |   71 +-
 pytorch_lightning/utilities/parsing.py        |   28 +-
 pytorch_lightning/utilities/seed.py           |    8 +-
 pytorch_lightning/utilities/types.py          |   19 +-
 pytorch_lightning/utilities/warnings.py       |   42 +-
 requirements.txt                              |    9 +-
 requirements/adjust_versions.py               |    5 +-
 requirements/docs.txt                         |    8 +-
 requirements/extra.txt                        |    2 +-
 setup.cfg                                     |    8 +
 .../test_accelerator_connector.py             |   76 +-
 tests/accelerators/test_cpu.py                |  110 +
 tests/accelerators/test_ddp.py                |   35 +-
 tests/accelerators/test_ipu.py                |  547 ++
 tests/accelerators/test_multi_nodes_gpu.py    |   13 +-
 tests/base/model_train_steps.py               |   31 +-
 tests/callbacks/test_callback_hook_outputs.py |    2 +-
 tests/callbacks/test_callbacks.py             |  158 +-
 tests/callbacks/test_early_stopping.py        |    6 +-
 tests/callbacks/test_finetuning_callback.py   |   43 +-
 tests/callbacks/test_lambda_function.py       |   13 +-
 tests/callbacks/test_lr_monitor.py            |  173 +
 tests/callbacks/test_progress_bar.py          |   76 +-
 tests/callbacks/test_pruning.py               |   43 +-
 tests/callbacks/test_stochastic_weight_avg.py |   26 +-
 tests/callbacks/test_timer.py                 |    7 +-
 .../test_checkpoint_callback_frequency.py     |   15 +-
 .../checkpointing/test_legacy_checkpoints.py  |    6 +
 tests/checkpointing/test_model_checkpoint.py  |   74 +-
 tests/conftest.py                             |    9 +
 tests/core/test_datamodules.py                |   72 +-
 tests/core/test_lightning_module.py           |   23 -
 tests/core/test_lightning_optimizer.py        |    3 +-
 tests/core/test_memory.py                     |  112 +-
 tests/core/test_metric_result_integration.py  |  274 +-
 tests/core/test_results.py                    |   70 +-
 tests/deprecated_api/test_remove_1-4.py       |   13 +
 tests/deprecated_api/test_remove_1-5.py       |   22 +-
 tests/deprecated_api/test_remove_1-6.py       |  196 +-
 tests/helpers/advanced_models.py              |   19 +-
 tests/helpers/boring_model.py                 |   26 +-
 tests/helpers/datasets.py                     |    2 +-
 tests/helpers/pipelines.py                    |    4 +-
 tests/helpers/runif.py                        |   21 +-
 tests/loggers/test_all.py                     |    3 +
 tests/loggers/test_base.py                    |    5 +
 tests/loggers/test_tensorboard.py             |   15 +-
 tests/loggers/test_wandb.py                   |   77 +-
 tests/loops/__init__.py                       |    0
 tests/loops/test_loop_state_dict.py           |   54 +
 tests/loops/test_loops.py                     |   74 +
 tests/metrics/test_metric_lightning.py        |   81 +-
 tests/metrics/test_remove_1-5_metrics.py      |    2 +-
 tests/metrics/utils.py                        |   16 +-
 .../data/horovod/train_default_model.py       |    2 +-
 tests/models/test_cpu.py                      |   22 +-
 tests/models/test_gpu.py                      |   26 +
 tests/models/test_grad_norm.py                |   35 +-
 tests/models/test_hooks.py                    |  861 +-
 tests/models/test_horovod.py                  |    4 +-
 tests/models/test_hparams.py                  |   19 +
 tests/models/test_restore.py                  |   24 +-
 tests/models/test_tpu.py                      |   21 +-
 tests/overrides/test_base.py                  |   44 +
 tests/overrides/test_distributed.py           |   15 +-
 tests/plugins/test_amp_plugins.py             |   41 +
 tests/plugins/test_cluster_integration.py     |    3 +-
 tests/plugins/test_ddp_plugin.py              |   30 +
 tests/plugins/test_ddp_spawn_plugin.py        |   41 +-
 tests/plugins/test_deepspeed_plugin.py        |  102 +-
 tests/plugins/test_double_plugin.py           |   53 +-
 tests/plugins/test_plugins_registry.py        |   20 +-
 tests/plugins/test_rpc_plugin.py              |   89 -
 tests/plugins/test_rpc_sequential_plugin.py   |  185 -
 tests/plugins/test_sharded_plugin.py          |   18 +-
 tests/plugins/test_single_device_plugin.py    |    2 +-
 tests/plugins/test_tpu_spawn.py               |    6 +-
 tests/profiler/__init__.py                    |    0
 tests/{ => profiler}/test_profiler.py         |    4 +-
 tests/profiler/test_xla_profiler.py           |   72 +
 tests/special_tests.sh                        |   12 +-
 .../connectors/test_callback_connector.py     |   13 +
 .../connectors/test_checkpoint_connector.py   |  155 +
 tests/trainer/flags/test_fast_dev_run.py      |    1 -
 .../logging_/test_distributed_logging.py      |    2 +-
 .../logging_/test_eval_loop_logging.py        |  630 +-
 .../trainer/logging_/test_logger_connector.py |  534 +-
 .../logging_/test_train_loop_logging.py       |  669 +-
 tests/trainer/loops/test_evaluation_loop.py   |   63 +-
 .../loops/test_evaluation_loop_flow.py        |   35 +-
 tests/trainer/loops/test_training_loop.py     |   47 +-
 .../loops/test_training_loop_flow_scalar.py   |   49 +-
 .../optimization/test_manual_optimization.py  |  302 +-
 .../optimization/test_multiple_optimizers.py  |   17 +-
 tests/trainer/optimization/test_optimizers.py |   85 +
 tests/trainer/test_config_validator.py        |   26 +-
 tests/trainer/test_data_loading.py            |   10 +-
 tests/trainer/test_dataloaders.py             |   23 +-
 tests/trainer/test_progress.py                |  198 +-
 tests/trainer/test_states.py                  |   15 +-
 tests/trainer/test_supporters.py              |    4 +-
 tests/trainer/test_trainer.py                 |  134 +-
 tests/tuner/test_auto_gpu_select.py           |    2 +-
 tests/utilities/distributed.py                |    5 +-
 tests/utilities/test_apply_func.py            |  147 +-
 tests/utilities/test_cli.py                   |  303 +-
 tests/utilities/test_model_helpers.py         |   67 +
 tests/utilities/test_warnings.py              |   52 +
 347 files changed, 14628 insertions(+), 22518 deletions(-)
 create mode 100644 .deepsource.toml
 create mode 100644 .gitmodules
 create mode 160000 _notebooks
 create mode 100644 docs/source/_static/images/accelerator/ipus/profiler.png
 create mode 100644 docs/source/_templates/layout.html
 delete mode 100644 docs/source/advanced/amp.rst
 create mode 100644 docs/source/advanced/ipu.rst
 delete mode 100644 docs/source/benchmarking/performance.rst
 delete mode 100644 docs/source/common/fast_training.rst
 create mode 100644 docs/source/guides/speed.rst
 delete mode 100644 notebooks/01-mnist-hello-world.ipynb
 delete mode 100644 notebooks/02-datamodules.ipynb
 delete mode 100644 notebooks/03-basic-gan.ipynb
 delete mode 100644 notebooks/04-transformers-text-classification.ipynb
 delete mode 100644 notebooks/05-trainer-flags-overview.ipynb
 delete mode 100644 notebooks/06-mnist-tpu-training.ipynb
 delete mode 100644 notebooks/07-cifar10-baseline.ipynb
 delete mode 100644 notebooks/08-Domain-specific-demos.ipynb
 delete mode 100644 notebooks/README.md
 delete mode 100644 pl_examples/basic_examples/conv_sequential_example.py
 create mode 100644 pl_examples/ipu_examples/__init__.py
 create mode 100644 pl_examples/ipu_examples/mnist.py
 create mode 100644 pytorch_lightning/accelerators/ipu.py
 delete mode 100644 pytorch_lightning/core/step_result.py
 create mode 100644 pytorch_lightning/loops/__init__.py
 create mode 100644 pytorch_lightning/loops/base.py
 create mode 100644 pytorch_lightning/loops/batch/__init__.py
 create mode 100644 pytorch_lightning/loops/batch/training_batch_loop.py
 create mode 100644 pytorch_lightning/loops/dataloader/__init__.py
 create mode 100644 pytorch_lightning/loops/dataloader/dataloader_loop.py
 create mode 100644 pytorch_lightning/loops/dataloader/evaluation_loop.py
 create mode 100644 pytorch_lightning/loops/dataloader/prediction_loop.py
 create mode 100644 pytorch_lightning/loops/epoch/__init__.py
 create mode 100644 pytorch_lightning/loops/epoch/evaluation_epoch_loop.py
 create mode 100644 pytorch_lightning/loops/epoch/prediction_epoch_loop.py
 create mode 100644 pytorch_lightning/loops/epoch/training_epoch_loop.py
 create mode 100644 pytorch_lightning/loops/fit_loop.py
 create mode 100644 pytorch_lightning/plugins/precision/ipu_precision.py
 create mode 100644 pytorch_lightning/plugins/training_type/ipu.py
 delete mode 100644 pytorch_lightning/plugins/training_type/rpc.py
 delete mode 100644 pytorch_lightning/plugins/training_type/rpc_sequential.py
 create mode 100644 pytorch_lightning/profiler/advanced.py
 create mode 100644 pytorch_lightning/profiler/base.py
 create mode 100644 pytorch_lightning/profiler/simple.py
 create mode 100644 pytorch_lightning/profiler/xla.py
 delete mode 100644 pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py
 delete mode 100644 pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py
 create mode 100644 pytorch_lightning/trainer/connectors/logger_connector/result.py
 delete mode 100644 pytorch_lightning/trainer/evaluation_loop.py
 delete mode 100644 pytorch_lightning/trainer/predict_loop.py
 delete mode 100644 pytorch_lightning/trainer/training_loop.py
 create mode 100644 tests/accelerators/test_ipu.py
 create mode 100644 tests/loops/__init__.py
 create mode 100644 tests/loops/test_loop_state_dict.py
 create mode 100644 tests/loops/test_loops.py
 create mode 100644 tests/overrides/test_base.py
 delete mode 100644 tests/plugins/test_rpc_plugin.py
 delete mode 100644 tests/plugins/test_rpc_sequential_plugin.py
 create mode 100644 tests/profiler/__init__.py
 rename tests/{ => profiler}/test_profiler.py (99%)
 create mode 100644 tests/profiler/test_xla_profiler.py
 create mode 100644 tests/trainer/connectors/test_checkpoint_connector.py
 create mode 100644 tests/utilities/test_model_helpers.py
 create mode 100644 tests/utilities/test_warnings.py

diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml
index 05e8624b72630..b1fedd578bc85 100644
--- a/.azure-pipelines/gpu-tests.yml
+++ b/.azure-pipelines/gpu-tests.yml
@@ -25,20 +25,14 @@ jobs:
 
     pool: gridai-spot-pool
 
-    #strategy:
-    #  matrix:
-    #    PT16:
-    #      torch.version: '1.6'
-    #      python.version: '3.7'
-
     # ToDo: this need to have installed docker in the base image...
-    #container: pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.6
-    #container: "pytorchlightning/pytorch_lightning:base-cuda-py$[ variables['python.version'] ]-torch1.6"
     container:
       # base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
-      image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.6"
-      #endpoint: azureContainerRegistryConnection
-      options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all"
+      # run on torch 1.8 as it's the LTS version
+      image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.8"
+      # default shm size is 64m. Increase it to avoid:
+      # 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
+      options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=256m"
 
     workspace:
       clean: all
@@ -57,6 +51,7 @@ jobs:
     - bash: |
         python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
         pip install fairscale>=0.3.4
+        pip install deepspeed>=0.4.0 -U
         pip install . --requirement requirements/devel.txt
         pip list
       displayName: 'Install dependencies'
diff --git a/.azure-pipelines/ipu-tests.yml b/.azure-pipelines/ipu-tests.yml
index 763549e88200b..42cee6b040ba3 100644
--- a/.azure-pipelines/ipu-tests.yml
+++ b/.azure-pipelines/ipu-tests.yml
@@ -53,12 +53,9 @@ jobs:
         export GIT_TERMINAL_PROMPT=1
         python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)"
         python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
-
         python ./requirements/adjust_versions.py requirements/extra.txt
         python ./requirements/adjust_versions.py requirements/examples.txt
-
-        pip install --requirement ./requirements/devel.txt --upgrade-strategy only-if-needed
-
+        pip install . --requirement requirements/devel.txt
         pip list
       displayName: 'Install dependencies'
 
@@ -84,8 +81,17 @@ jobs:
     - bash: |
         source ${{ variables.poplar_sdk }}/poplar-ubuntu*/enable.sh
         source ${{ variables.poplar_sdk }}/popart-ubuntu*/enable.sh
-
+        export POPTORCH_WAIT_FOR_IPU=1
         python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
       env:
         MKL_THREADING_LAYER: "GNU"
       displayName: 'Testing: standard'
+
+    - bash: |
+        source ${{ variables.poplar_sdk }}/poplar-ubuntu*/enable.sh
+        source ${{ variables.poplar_sdk }}/popart-ubuntu*/enable.sh
+        export POPTORCH_WAIT_FOR_IPU=1
+        bash tests/special_tests.sh
+      env:
+        MKL_THREADING_LAYER: "GNU"
+      displayName: 'Testing: special'
diff --git a/.circleci/config.yml b/.circleci/config.yml
index 660645abcbbe4..fa9753e063a3f 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -91,7 +91,7 @@ jobs:
     docker:
       - image: circleci/python:3.7
     environment:
-      - XLA_VER: 1.7
+      - XLA_VER: 1.8
       - MAX_CHECKS: 240
       - CHECK_SPEEP: 5
     steps:
@@ -119,6 +119,10 @@ jobs:
       - image: readthedocs/build:latest
     steps:
       - checkout
+      - run:
+          command: |
+            git submodule update --init --recursive
+          name: Init git submodule
       - *make_docs
       - store_artifacts:
           # allows us to preview the generated html pages
@@ -127,7 +131,7 @@ jobs:
 
 workflows:
   version: 2
-  tpu-tests:
+  ci-tests:
     jobs:
       - build-Docs
       - TPU-tests
diff --git a/.deepsource.toml b/.deepsource.toml
new file mode 100644
index 0000000000000..49e710a55b3b2
--- /dev/null
+++ b/.deepsource.toml
@@ -0,0 +1,26 @@
+version = 1
+
+test_patterns = ["tests/**", "benchmarks/**"]
+
+[[analyzers]]
+name = "secrets"
+enabled = true
+
+[[analyzers]]
+name = "shell"
+enabled = true
+
+[[analyzers]]
+name = "docker"
+enabled = true
+
+[[analyzers]]
+name = "python"
+enabled = true
+
+  [analyzers.meta]
+  runtime_version = "3.x.x"
+
+[[analyzers]]
+name = "test-coverage"
+enabled = true
diff --git a/.github/BECOMING_A_CORE_CONTRIBUTOR.md b/.github/BECOMING_A_CORE_CONTRIBUTOR.md
index 2b3ba3ee93235..a5e7d3830bdd9 100644
--- a/.github/BECOMING_A_CORE_CONTRIBUTOR.md
+++ b/.github/BECOMING_A_CORE_CONTRIBUTOR.md
@@ -5,18 +5,18 @@ We're currently recruiting for a team of 5 core maintainers.
 
 As a core maintainer you will have a strong say in the direction of the project. Big changes will require a majority of maintainers to agree.
 
-### Code of conduct
+## Code of conduct
 First and foremost, you'll be evaluated against [these core values](https://github.com/PyTorchLightning/pytorch-lightning/blob/master/.github/CONTRIBUTING.md). Any code we commit or feature we add needs to align with those core values.
 
-### The bar for joining the team
+## The bar for joining the team
 Lightning is being used to solve really hard problems at the top AI labs in the world. As such, the bar for adding team members is extremely high. Candidates must have solid engineering skills, have a good eye for user experience, and must be a power user of Lightning and PyTorch.
 
 With that said, the Lightning team will be diverse and a reflection of an inclusive AI community. You don't have to be an engineer to contribute! Scientists with great usability intuition and PyTorch ninja skills are welcomed!
 
-### Responsibilities:
+## Responsibilities:
 The responsibilities mainly revolve around 3 things.
 
-#### Github issues
+### Github issues
 - Here we want to help users have an amazing experience. These range from questions from new people getting into DL to questions from researchers about doing something esoteric with Lightning
 Often, these issues require some sort of bug fix, document clarification or new functionality to be scoped out.
 
@@ -27,7 +27,7 @@ Pleasant/helpful tone.
 
 - Don’t make users feel like they don’t know what they’re doing. We’re here to help and to make everyone’s experience delightful.
 
-#### Pull requests
+### Pull requests
 
 - Here we need to ensure the code that enters Lightning is high quality. For each PR we need to:
 - Make sure code coverage does not decrease
@@ -43,16 +43,16 @@ Guidance
 for a sanity check. At the end of 10 PRs if your PR reviews are inline with expectations described above, then you can merge PRs on your own going forward,
 otherwise we'll do a few more until we're both comfortable :)
 
-#### Project directions
+### Project directions
 There are some big decisions which the project must make. For these I expect core contributors to have something meaningful to add if it’s their area of expertise.
 
-#### Diversity
+### Diversity
 Lightning should reflect the broader community it serves. As such we should have scientists/researchers from
 different fields contributing!
 
 The first 5 core contributors will fit this profile. Thus if you overlap strongly with experiences and expertise as someone else on the team, you might have to wait until the next set of contributors are added.
 
-#### Summary: Requirements to apply
+### Summary: Requirements to apply
 The goal is to be inline with expectations for solving issues by the last one so you can do them on your own. If not, I might ask you to solve a few more specific ones.
 
 - Solve 10+ Github issues.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 39f38bf266af0..d6fc6ce5fe64e 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -25,6 +25,7 @@
 /pytorch_lightning/core                 @tchaton @SeanNaren @borda @carmocca @justusschock @kaushikb11
 /pytorch_lightning/distributed          @williamfalcon @tchaton @awaelchli @kaushikb11
 /pytorch_lightning/loggers              @tchaton @awaelchli @borda
+/pytorch_lightning/loops                @tchaton @awaelchli @justusschock @carmocca
 /pytorch_lightning/overrides            @tchaton @SeanNaren @borda
 /pytorch_lightning/plugins              @tchaton @SeanNaren @awaelchli @justusschock
 /pytorch_lightning/profiler             @williamfalcon @tchaton @borda @carmocca
@@ -33,6 +34,10 @@
 /pytorch_lightning/tuner                @SkafteNicki @borda @awaelchli
 /pytorch_lightning/utilities            @borda @tchaton @SeanNaren @carmocca
 
+# Specifics
+/pytorch_lightning/trainer/connectors/logger_connector @tchaton @carmocca
+/pytorch_lightning/trainer/progress.py  @tchaton @awaelchli @carmocca
+
 # Metrics
 /pytorch_lightning/metrics/             @SkafteNicki @ananyahjha93 @justusschock
 /tests/metrics/                         @SkafteNicki @ananyahjha93 @justusschock
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 8414bf43f68a2..ee9706172e2ac 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -2,6 +2,8 @@
 
 Welcome to the PyTorch Lightning community! We're building the most advanced research platform on the planet to implement the latest, best practices that the amazing PyTorch team rolls out!
 
+If you are new to open source, check out [this blog to get started with your first Open Source contribution](https://devblog.pytorchlightning.ai/quick-contribution-guide-86d977171b3a).
+
 ## Main Core Value: One less thing to remember
 
 Simplify the API as much as possible from the user perspective.
@@ -14,18 +16,18 @@ This helps users avoid all sorts of subtle errors.
 
 We encourage all sorts of contributions you're interested in adding! When coding for lightning, please follow these principles.
 
-#### No PyTorch Interference
+### No PyTorch Interference
 
 We don't want to add any abstractions on top of pure PyTorch.
 This gives researchers all the control they need without having to learn yet another framework.
 
-#### Simple Internal Code
+### Simple Internal Code
 
 It's useful for users to look at the code and understand very quickly what's happening.
 Many users won't be engineers. Thus we need to value clear, simple code over condensed ninja moves.
 While that's super cool, this isn't the project for that :)
 
-#### Force User Decisions To Best Practices
+### Force User Decisions To Best Practices
 
 There are 1,000 ways to do something. However, eventually one popular solution becomes standard practice, and everyone follows.
 We try to find the best way to solve a particular problem, and then force our users to use it for readability and simplicity.
@@ -35,22 +37,22 @@ A bad forced decision would be to make users use a specific library to do someth
 
 When something becomes a best practice, we add it to the framework. This is usually something like bits of code in utils or in the model file that everyone keeps adding over and over again across projects. When this happens, bring that code inside the trainer and add a flag for it.
 
-#### Simple External API
+### Simple External API
 
 What makes sense to you may not make sense to others. When creating an issue with an API change suggestion, please validate that it makes sense for others.
 Treat code changes the way you treat a startup: validate that it's a needed feature, then add if it makes sense for many people.
 
-#### Backward-compatible API
+### Backward-compatible API
 
 We all hate updating our deep learning packages because we don't want to refactor a bunch of stuff. In Lightning, we make sure every change we make which could break an API is backward compatible with good deprecation warnings.
 
 **You shouldn't be afraid to upgrade Lightning :)**
 
-#### Gain User Trust
+### Gain User Trust
 
 As a researcher, you can't have any part of your code going wrong. So, make thorough tests to ensure that every implementation of a new trick or subtle change is correct.
 
-#### Interoperability
+### Interoperability
 
 Have a favorite feature from other libraries like fast.ai or transformers? Those should just work with lightning as well. Grab your favorite model or learning rate scheduler from your favorite library and run it in Lightning.
 
@@ -58,13 +60,13 @@ Have a favorite feature from other libraries like fast.ai or transformers? Those
 
 ## Contribution Types
 
-We are always looking for help implementing new features or fixing bugs.
+We are always open to contributions of new features or bug fixes.
 
 A lot of good work has already been done in project mechanics (requirements.txt, setup.py, pep8, badges, ci, etc...) so we're in a good state there thanks to all the early contributors (even pre-beta release)!
 
 ### Bug Fixes:
 
-1. If you find a bug please submit a github issue.
+1. If you find a bug please submit a GitHub issue.
 
    - Make sure the title explains the issue.
    - Describe your setup, what you are trying to do, expected vs. actual behaviour. Please add configs and code samples.
@@ -79,12 +81,12 @@ A lot of good work has already been done in project mechanics (requirements.txt,
 
 3. Submit a PR!
 
-_**Note**, even if you do not find the solution, sending a PR with a test covering the issue is a valid contribution and we can help you or finish it with you :]_
+_**Note**, even if you do not find the solution, sending a PR with a test covering the issue is a valid contribution, and we can help you or finish it with you :]_
 
 ### New Features:
 
-1. Submit a github issue - describe what is the motivation of such feature (adding the use case or an example is helpful).
-2. Let's discuss to determine the feature scope.
+1. Submit a GitHub issue - describe what is the motivation of such feature (adding the use case, or an example is helpful).
+2. Determine the feature scope with us.
 3. Submit a PR! We recommend test driven approach to adding new features as well:
 
    - Write a test for the functionality you want to add.
@@ -199,7 +201,7 @@ Note: if your computer does not have multi-GPU nor TPU these tests are skipped.
 **GitHub Actions:** For convenience, you can also use your own GHActions building which will be triggered with each commit.
 This is useful if you do not test against all required dependency versions.
 
-**Docker:** Another option is utilize the [pytorch lightning cuda base docker image](https://hub.docker.com/repository/docker/pytorchlightning/pytorch_lightning/tags?page=1&name=cuda). You can then run:
+**Docker:** Another option is to utilize the [pytorch lightning cuda base docker image](https://hub.docker.com/repository/docker/pytorchlightning/pytorch_lightning/tags?page=1&name=cuda). You can then run:
 
 ```bash
 python -m pytest pytorch_lightning tests pl_examples -v
@@ -230,7 +232,7 @@ We welcome any useful contribution! For your convenience here's a recommended wo
    - Make sure all tests are passing.
    - Make sure you add a GitHub issue to your PR.
 5. Use tags in PR name for following cases:
-   - **[blocked by #<number>]** if you work is depending on others changes.
+   - **[blocked by #<number>]** if your work is dependent on other PRs.
    - **[wip]** when you start to re-edit your work, mark it so no one will accidentally merge it in meantime.
 
 ### Question & Answer
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index cef062516b0eb..9faa2331a2f27 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -41,13 +41,14 @@ wget https://raw.githubusercontent.com/PyTorchLightning/pytorch-lightning/master
 python collect_env_details.py
 ```
 
- - PyTorch Version (e.g., 1.0):
- - OS (e.g., Linux):
- - How you installed PyTorch (`conda`, `pip`, source):
- - Build command you used (if compiling from source):
+ - PyTorch Lightning Version (e.g., 1.3.0):
+ - PyTorch Version (e.g., 1.8)
  - Python version:
+ - OS (e.g., Linux):
  - CUDA/cuDNN version:
  - GPU models and configuration:
+ - How you installed PyTorch (`conda`, `pip`, source):
+ - If compiling from source, the output of `torch.__config__.show()`:
  - Any other relevant information:
 
 ### Additional context
diff --git a/.github/workflows/ci_test-conda.yml b/.github/workflows/ci_test-conda.yml
index 9ed2f30e0b062..0d7dae8fa8b41 100644
--- a/.github/workflows/ci_test-conda.yml
+++ b/.github/workflows/ci_test-conda.yml
@@ -34,9 +34,9 @@ jobs:
         pip list
 
     - name: Pull checkpoints from S3
+      working-directory: ./legacy
       run: |
         # enter legacy and update checkpoints from S3
-        cd legacy
         curl https://pl-public-data.s3.amazonaws.com/legacy/checkpoints.zip --output checkpoints.zip
         unzip -o checkpoints.zip
         ls -l checkpoints/
diff --git a/.github/workflows/ci_test-full.yml b/.github/workflows/ci_test-full.yml
index bffd604c9d333..1064e603bee1f 100644
--- a/.github/workflows/ci_test-full.yml
+++ b/.github/workflows/ci_test-full.yml
@@ -96,8 +96,8 @@ jobs:
           ${{ runner.os }}-pip-td${{ steps.times.outputs.period }}-py${{ matrix.python-version }}-${{ matrix.release }}-${{ matrix.requires }}-
 
     - name: Pull checkpoints from S3
+      working-directory: ./legacy
       run: |
-        cd legacy
         # wget is simpler but does not work on Windows
         python -c "from urllib.request import urlretrieve ; urlretrieve('https://pl-public-data.s3.amazonaws.com/legacy/checkpoints.zip', 'checkpoints.zip')"
         ls -l .
diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml
index bc03905ab2bbd..1cb8633545995 100644
--- a/.github/workflows/code-formatting.yml
+++ b/.github/workflows/code-formatting.yml
@@ -74,3 +74,23 @@ jobs:
       - name: mypy check
         run: |
           mypy
+
+  dead-code-vulture:
+    name: Python dead code checker
+    runs-on: ubuntu-20.04
+
+    # Timeout: https://stackoverflow.com/a/59076067/4521646
+    timeout-minutes: 10
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+
+      - name: Install dependencies
+        run: pip install vulture && pip list
+
+      - name: Check for dead code with Vulture
+        run: |
+          vulture pytorch_lightning
diff --git a/.github/workflows/docs-checks.yml b/.github/workflows/docs-checks.yml
index 7613310df40de..8569bf4b0a24b 100644
--- a/.github/workflows/docs-checks.yml
+++ b/.github/workflows/docs-checks.yml
@@ -11,20 +11,25 @@ jobs:
   sphinx-check:
     runs-on: ubuntu-20.04
     steps:
-    - uses: actions/checkout@v2
-    - uses: ammaraskar/sphinx-action@master
-      with:
-        # git is required to clone the docs theme
-        # before custom requirement are resolved https://github.com/ammaraskar/sphinx-action/issues/16
-        pre-build-command: "apt-get update -y && apt-get install -y git && pip install -r requirements/docs.txt"
-        docs-folder: "docs/"
-        repo-token: "${{ secrets.GITHUB_TOKEN }}"
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+          # lfs: true
+      - uses: ammaraskar/sphinx-action@master
+        with:
+          # git is required to clone the docs theme
+          # before custom requirement are resolved https://github.com/ammaraskar/sphinx-action/issues/16
+          pre-build-command: "apt-get update -y && apt-get install -y git pandoc && pip install -r requirements/docs.txt"
+          docs-folder: "docs/"
+          repo-token: "${{ secrets.GITHUB_TOKEN }}"
 
   test-docs:
     runs-on: ubuntu-20.04
-
     steps:
       - uses: actions/checkout@v2
+        with:
+          submodules: true
+          # lfs: true
       - uses: actions/setup-python@v2
         with:
           python-version: 3.7
@@ -45,7 +50,8 @@ jobs:
 
       - name: Install dependencies
         run: |
-          python --version
+          sudo apt-get update
+          sudo apt-get install -y cmake pandoc
           pip --version
           # remove Horovod from requirements
           python .github/prune-packages.py requirements/extra.txt "horovod"
@@ -60,18 +66,19 @@ jobs:
       - name: Test Documentation
         env:
           SPHINX_MOCK_REQUIREMENTS: 0
+        working-directory: ./docs
         run: |
           # First run the same pipeline as Read-The-Docs
-          apt-get update && sudo apt-get install -y cmake
-          cd docs
           make doctest
           make coverage
 
   make-docs:
     runs-on: ubuntu-20.04
-
     steps:
       - uses: actions/checkout@v2
+        with:
+          submodules: true
+          # lfs: true
       - uses: actions/setup-python@v2
         with:
           python-version: 3.7
@@ -88,7 +95,8 @@ jobs:
 
       - name: Install dependencies
         run: |
-          python --version
+          sudo apt-get update
+          sudo apt-get install -y cmake pandoc
           pip --version
           # pip install --requirement requirements.txt --upgrade-strategy only-if-needed --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --quiet
           pip install --requirement requirements/docs.txt
@@ -98,10 +106,9 @@ jobs:
         shell: bash
 
       - name: Make Documentation
+        working-directory: ./docs
         run: |
           # First run the same pipeline as Read-The-Docs
-          cd docs
-          make clean
           make html --debug --jobs $(nproc) SPHINXOPTS="-W --keep-going"
 
       - name: Upload built docs
diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml
index 62bf1b1aa00ac..8e81ef40a0b3c 100644
--- a/.github/workflows/release-pypi.yml
+++ b/.github/workflows/release-pypi.yml
@@ -137,8 +137,8 @@ jobs:
         bash legacy/generate_checkpoints.sh $pl_ver
 
     - name: Push files to S3
+      working-directory: ./legacy
       run: |
         aws s3 sync legacy/checkpoints/ s3://pl-public-data/legacy/checkpoints/
-        cd legacy
         zip -r checkpoints.zip checkpoints
         aws s3 cp checkpoints.zip s3://pl-public-data/legacy/ --acl public-read
diff --git a/.gitignore b/.gitignore
index 99939ff7fce0c..59340744ce420 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,15 +8,14 @@ lightning_logs/
 .vscode/
 
 # Test-tube
-test_tube_logs/
-test_tube_data/
-test_tube_exp/
+test_tube_*/
 
 # Documentations
 docs/source/api
 docs/source/*.md
 docs/source/generated
 docs/source/*/generated
+docs/source/notebooks
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -154,7 +153,3 @@ cifar-10-batches-py
 *.pt
 # ctags
 tags
-data
-MNIST
-runs
-*trace*
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000000000..b311352c45f4c
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "_notebooks"]
+	path = _notebooks
+	url = https://github.com/PyTorchLightning/lightning-tutorials.git
+	branch = publication
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5814ea965d179..fe1cbced9a9a9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,17 +19,47 @@ repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.0.1
     hooks:
-      - id: trailing-whitespace
       - id: end-of-file-fixer
+      - id: trailing-whitespace
+      - id: check-yaml
+      - id: check-docstring-first
+      - id: check-executables-have-shebangs
+      - id: check-toml
+      - id: check-case-conflict
+      - id: check-added-large-files
+        args: ['--maxkb=350', '--enforce-all']
+        exclude: |
+            (?x)^(
+                docs/source/_static/images/general/fast_2.gif|
+                docs/source/_static/images/mnist_imgs/pt_to_pl.jpg|
+                docs/source/_static/images/lightning_module/pt_to_pl.png|
+                docs/source/_static/images/general/pl_quick_start_full_compressed.gif|
+                docs/source/_static/images/general/pl_overview_flat.jpg|
+                docs/source/_static/images/general/pl_overview.gif
+            )$
+      - id: detect-private-key
 
   - repo: https://github.com/PyCQA/isort
-    rev: 5.8.0
+    rev: 5.9.1
     hooks:
       - id: isort
-        args: [--settings-path, ./pyproject.toml]
+        name: Format imports
 
   - repo: https://github.com/pre-commit/mirrors-yapf
     rev: v0.31.0
     hooks:
       - id: yapf
-        args: [--parallel, --in-place]
+        name: Format code
+        language: python
+
+  - repo: https://github.com/jendrikseipp/vulture
+    rev: 'v2.3'
+    hooks:
+      - id: vulture
+        name: Check dead code
+
+  - repo: https://github.com/PyCQA/flake8
+    rev: 3.9.2
+    hooks:
+      - id: flake8
+        name: Check PEP8
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 32a5a16248b91..ef0c98ec96797 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -19,6 +19,10 @@
 # Required
 version: 2
 
+submodules:
+  include: all
+  recursive: true
+
 # Build documentation in the docs/ directory with Sphinx
 # reference: https://docs.readthedocs.io/en/stable/config-file/v2.html#sphinx
 sphinx:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 199aa70329e24..2256dcefeac31 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- Add support for named parameter groups in `LearningRateMonitor` ([#7987](https://github.com/PyTorchLightning/pytorch-lightning/pull/7987))
+
+
+- Add `dataclass` support for `pytorch_lightning.utilities.apply_to_collection` ([#7935](https://github.com/PyTorchLightning/pytorch-lightning/pull/7935))
+
+
 - Added support to `LightningModule.to_torchscript` for saving to custom filesystems with fsspec ([#7617](https://github.com/PyTorchLightning/pytorch-lightning/pull/7617))
 
 
@@ -24,9 +30,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added support for checkpointing based on a provided time interval during training ([#7515](https://github.com/PyTorchLightning/pytorch-lightning/pull/7515))
 
 
-- Added dataclasses for progress tracking (
-    [#6603](https://github.com/PyTorchLightning/pytorch-lightning/pull/6603),
-    [#7574](https://github.com/PyTorchLightning/pytorch-lightning/pull/7574))
+- Progress tracking
+  * Added dataclasses for progress tracking ([#6603](https://github.com/PyTorchLightning/pytorch-lightning/pull/6603), [#7574](https://github.com/PyTorchLightning/pytorch-lightning/pull/7574), [#8140](https://github.com/PyTorchLightning/pytorch-lightning/pull/8140))
+  * Add `{,load_}state_dict` to the progress tracking dataclasses ([#8140](https://github.com/PyTorchLightning/pytorch-lightning/pull/8140))
+
+
+- Added support for passing a `LightningDataModule` positionally as the second argument to `trainer.{validate,test,predict}` ([#7431](https://github.com/PyTorchLightning/pytorch-lightning/pull/7431))
 
 
 - Added argument `trainer.predict(ckpt_path)` ([#7430](https://github.com/PyTorchLightning/pytorch-lightning/pull/7430))
@@ -35,21 +44,94 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added `clip_grad_by_value` support for TPUs ([#7025](https://github.com/PyTorchLightning/pytorch-lightning/pull/7025))
 
 
+- Added support for passing any class to `is_overridden` ([#7918](https://github.com/PyTorchLightning/pytorch-lightning/pull/7918))
+
+
 - Added `sub_dir` parameter to `TensorBoardLogger` ([#6195](https://github.com/PyTorchLightning/pytorch-lightning/pull/6195))
 
 
 - Added correct `dataloader_idx` to batch transfer hooks ([#6241](https://github.com/PyTorchLightning/pytorch-lightning/pull/6241))
 
 
+- Added `include_none=bool` argument to `apply_to_collection` ([#7769](https://github.com/PyTorchLightning/pytorch-lightning/pull/7769))
+
+
+- Added `apply_to_collections` to apply a function to two zipped collections ([#7769](https://github.com/PyTorchLightning/pytorch-lightning/pull/7769))
+
+
 - Added `ddp_fully_sharded` support ([#7487](https://github.com/PyTorchLightning/pytorch-lightning/pull/7487))
 
 
 - Added `should_rank_save_checkpoint` property to Training Plugins ([#7684](https://github.com/PyTorchLightning/pytorch-lightning/pull/7684))
 
 
+- Added `log_grad_norm` hook to `LightningModule` to customize the logging of gradient norms ([#7873](https://github.com/PyTorchLightning/pytorch-lightning/pull/7873))
+
+
+- Added `save_config_filename` init argument to `LightningCLI` to ease resolving name conflicts ([#7741](https://github.com/PyTorchLightning/pytorch-lightning/pull/7741))
+
+
+- Added `save_config_overwrite` init argument to `LightningCLI` to ease overwriting existing config files ([#8059](https://github.com/PyTorchLightning/pytorch-lightning/pull/8059))
+
+
+- Added reset dataloader hooks to Training Plugins and Accelerators ([#7861](https://github.com/PyTorchLightning/pytorch-lightning/pull/7861))
+
+
+- Added trainer stage hooks for Training Plugins and Accelerators ([#7864](https://github.com/PyTorchLightning/pytorch-lightning/pull/7864))
+
+
+- Added IPU Accelerator ([#7867](https://github.com/PyTorchLightning/pytorch-lightning/pull/7867))
+
+
+- Fault-tolerant training
+    * Added `{,load_}state_dict` to `ResultCollection` ([#7948](https://github.com/PyTorchLightning/pytorch-lightning/pull/7948))
+    * Added `{,load_}state_dict` to `Loops` ([#8197](https://github.com/PyTorchLightning/pytorch-lightning/pull/8197))
+
+
+- Added `rank_zero_only` to `LightningModule.log` function ([#7966](https://github.com/PyTorchLightning/pytorch-lightning/pull/7966))
+
+
+- Added `metric_attribute` to `LightningModule.log` function ([#7966](https://github.com/PyTorchLightning/pytorch-lightning/pull/7966))
+
+
+- Added a warning if `Trainer(log_every_n_steps)` is a value too high for the training dataloader ([#7734](https://github.com/PyTorchLightning/pytorch-lightning/pull/7734))
+
+
+- Added LightningCLI support for argument links applied on instantiation ([#7895](https://github.com/PyTorchLightning/pytorch-lightning/pull/7895))
+
+
+- Added LightningCLI support for configurable callbacks that should always be present ([#7964](https://github.com/PyTorchLightning/pytorch-lightning/pull/7964))
+
+
+- Added DeepSpeed Infinity Support, and updated to DeepSpeed 0.4.0 ([#7234](https://github.com/PyTorchLightning/pytorch-lightning/pull/7234))
+
+
+- Added support for `torch.nn.UninitializedParameter` in `ModelSummary` ([#7642](https://github.com/PyTorchLightning/pytorch-lightning/pull/7642))
+
+
+- Added support `LightningModule.save_hyperparameters` when `LightningModule` is a dataclass ([#7992](https://github.com/PyTorchLightning/pytorch-lightning/pull/7992))
+
+
+- Add support for overriding `optimizer_zero_grad` and `optimizer_step` when using accumulate_grad_batches ([#7980](https://github.com/PyTorchLightning/pytorch-lightning/pull/7980))
+
+
+- Add support for calling scripts using the module syntax (`python -m package.script`) ([#8073](https://github.com/PyTorchLightning/pytorch-lightning/pull/8073))
+
+
+- Add support for optimizers and learning rate schedulers to `LightningCLI` ([#8093](https://github.com/PyTorchLightning/pytorch-lightning/pull/8093))
+
+
+- Added XLA Profiler ([#8014](https://github.com/PyTorchLightning/pytorch-lightning/pull/8014))
+
+
+- Added `max_depth` parameter in `ModelSummary` ([#8062](https://github.com/PyTorchLightning/pytorch-lightning/pull/8062))
+
+
+- Added `restore` function and `restarting` attribute to base `Loop` ([#8247](https://github.com/PyTorchLightning/pytorch-lightning/pull/8247))
+
+
 ### Changed
 
-- Changed calling of `untoggle_optimizer(opt_idx)` out of the closure function ([#7563](https://github.com/PyTorchLightning/pytorch-lightning/pull/7563)
 
 - Changed the `Trainer`'s `checkpoint_callback` argument to allow only boolean values ([#7539](https://github.com/PyTorchLightning/pytorch-lightning/pull/7539))
 
@@ -57,9 +139,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Log epoch metrics before the `on_evaluation_end` hook ([#7272](https://github.com/PyTorchLightning/pytorch-lightning/pull/7272))
 
 
+- Explicitly disallow calling `self.log(on_epoch=False)` during epoch-only or single-call hooks ([#7874](https://github.com/PyTorchLightning/pytorch-lightning/pull/7874))
+
+
 - Changed these `Trainer` methods to be protected: `call_setup_hook`, `call_configure_sharded_model`, `pre_dispatch`, `dispatch`, `post_dispatch`, `call_teardown_hook`, `run_train`, `run_sanity_check`, `run_evaluate`, `run_evaluation`, `run_predict`, `track_output_for_epoch_end`
 
 
+- Changed `metrics_to_scalars` to work with any collection or value ([#7888](https://github.com/PyTorchLightning/pytorch-lightning/pull/7888))
+
+
 - Changed `clip_grad_norm` to use `torch.nn.utils.clip_grad_norm_` ([#7025](https://github.com/PyTorchLightning/pytorch-lightning/pull/7025))
 
 
@@ -67,10 +155,34 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 
 - Refactored Loops
-    * Moved attributes `global_step`, `current_epoch`, `max/min_steps`, `max/min_epochs`, `batch_idx`, and `total_batch_idx` to TrainLoop ([#7437](https://github.com/PyTorchLightning/pytorch-lightning/pull/7025))
+    * Moved attributes `global_step`, `current_epoch`, `max/min_steps`, `max/min_epochs`, `batch_idx`, and `total_batch_idx` to TrainLoop ([#7437](https://github.com/PyTorchLightning/pytorch-lightning/pull/7437))
     * Refactored result handling in training loop ([#7506](https://github.com/PyTorchLightning/pytorch-lightning/pull/7506))
     * Moved attributes `hiddens` and `split_idx` to TrainLoop ([#7507](https://github.com/PyTorchLightning/pytorch-lightning/pull/7507))
     * Refactored the logic around manual and automatic optimization inside the optimizer loop ([#7526](https://github.com/PyTorchLightning/pytorch-lightning/pull/7526))
+    * Simplified "should run validation" logic ([#7682](https://github.com/PyTorchLightning/pytorch-lightning/pull/7682))
+    * Simplified logic for updating the learning rate for schedulers ([#7682](https://github.com/PyTorchLightning/pytorch-lightning/pull/7682))
+    * Removed the `on_epoch` guard from the "should stop" validation check ([#7701](https://github.com/PyTorchLightning/pytorch-lightning/pull/7701))
+    * Refactored internal loop interface; added new classes `FitLoop`, `TrainingEpochLoop`, `TrainingBatchLoop` ([#7871](https://github.com/PyTorchLightning/pytorch-lightning/pull/7871), [#8077](https://github.com/PyTorchLightning/pytorch-lightning/pull/8077))
+    * Removed `pytorch_lightning/trainer/training_loop.py` ([#7985](https://github.com/PyTorchLightning/pytorch-lightning/pull/7985))
+    * Refactored evaluation loop interface; added new classes `DataLoaderLoop`, `EvaluationLoop`, `EvaluationEpochLoop` ([#7990](https://github.com/PyTorchLightning/pytorch-lightning/pull/7990), [#8077](https://github.com/PyTorchLightning/pytorch-lightning/pull/8077))
+    * Removed `pytorch_lightning/trainer/evaluation_loop.py` ([#8056](https://github.com/PyTorchLightning/pytorch-lightning/pull/8056))
+    * Restricted public access to several internal functions ([#8024](https://github.com/PyTorchLightning/pytorch-lightning/pull/8024))
+    * Refactored trainer `_run_*` functions and separate evaluation loops ([#8065](https://github.com/PyTorchLightning/pytorch-lightning/pull/8065))
+    * Refactored prediction loop interface; added new classes `PredictionLoop`, `PredictionEpochLoop` ([#7700](https://github.com/PyTorchLightning/pytorch-lightning/pull/7700), [#8077](https://github.com/PyTorchLightning/pytorch-lightning/pull/8077))
+    * Removed `pytorch_lightning/trainer/predict_loop.py` ([#8094](https://github.com/PyTorchLightning/pytorch-lightning/pull/8094))
+    * Moved result teardown to the loops ([#8245](https://github.com/PyTorchLightning/pytorch-lightning/pull/8245))
+
+
+- Refactored logging
+    * Renamed and moved `core/step_result.py` to `trainer/connectors/logger_connector/result.py` ([#7736](https://github.com/PyTorchLightning/pytorch-lightning/pull/7736))
+    * Dramatically simplify the `LoggerConnector` ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882))
+    * `trainer.{logged,progress_bar,callback}_metrics` are now updated on-demand ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882))
+    * Completely overhaul the `Result` object in favor of `ResultMetric` ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882))
+    * Improve epoch-level reduction time and overall memory usage ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882))
+    * Allow passing `self.log(batch_size=...)` ([#7891](https://github.com/PyTorchLightning/pytorch-lightning/pull/7891))
+    * Each of the training loops now keeps its own results collection ([#7891](https://github.com/PyTorchLightning/pytorch-lightning/pull/7891))
+    * Remove `EpochResultStore` and `HookResultStore` in favor of `ResultCollection` ([#7909](https://github.com/PyTorchLightning/pytorch-lightning/pull/7909))
+    * Remove `MetricsHolder` ([#7909](https://github.com/PyTorchLightning/pytorch-lightning/pull/7909))
 
 
 - Moved `ignore_scalar_return_in_dp` warning suppression to the DataParallelPlugin class ([#7421](https://github.com/PyTorchLightning/pytorch-lightning/pull/7421/))
@@ -79,6 +191,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Changed the behaviour when logging evaluation step metrics to no longer append `/epoch_*` to the metric name ([#7351](https://github.com/PyTorchLightning/pytorch-lightning/pull/7351))
 
 
+- Raise `ValueError` when a `None` value is `self.log`-ed ([#7771](https://github.com/PyTorchLightning/pytorch-lightning/pull/7771))
+
+
 - Changed `resolve_training_type_plugins` to allow setting `num_nodes` and `sync_batchnorm` from `Trainer` setting ([#7026](https://github.com/PyTorchLightning/pytorch-lightning/pull/7026))
 
 
@@ -91,21 +206,78 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - MLflowLogger now uses the env variable `MLFLOW_TRACKING_URI` as default tracking uri ([#7457](https://github.com/PyTorchLightning/pytorch-lightning/pull/7457))
 
 
+- Changed `WandbLogger(log_model={True/'all'})` to log models as artifacts ([#6231](https://github.com/PyTorchLightning/pytorch-lightning/pull/6231))
+
+
 - MLFlowLogger now accepts `run_name` as an constructor argument ([#7622](https://github.com/PyTorchLightning/pytorch-lightning/issues/7622))
 
 
 - Changed `teardown()` in `Accelerator` to allow `training_type_plugin` to customize `teardown` logic ([#7579](https://github.com/PyTorchLightning/pytorch-lightning/pull/7579))
 
 
+- `Trainer.fit` now raises an error when using manual optimization with unsupported features such as `gradient_clip_val` or `accumulate_grad_batches` ([#7788](https://github.com/PyTorchLightning/pytorch-lightning/pull/7788))
+
+
+- Accelerator hooks are called regardless if `LightningModule` overrides the same hooks ([#7826](https://github.com/PyTorchLightning/pytorch-lightning/pull/7826))
+
+
+- Moved profilers to their own file ([#7822](https://github.com/PyTorchLightning/pytorch-lightning/pull/7822))
+
+
+- Added `on_load_checkpoint` and `on_save_checkpoint` hooks to the `PrecisionPlugin` base class ([#7831](https://github.com/PyTorchLightning/pytorch-lightning/pull/7831))
+
+
+- `LightningCLI` now aborts with a clearer message if config already exists and disables save config during `fast_dev_run`([#7963](https://github.com/PyTorchLightning/pytorch-lightning/pull/7963))
+
+
+- `Trainer(resume_from_checkpoint=...)` now restores the model directly after `LightningModule.setup()`, which is before `LightningModule.configure_sharded_model()` ([#7652](https://github.com/PyTorchLightning/pytorch-lightning/pull/7652))
+
+
 ### Deprecated
 
 
+- Deprecated `LightningModule.loaded_optimizer_states_dict` ([#8229](https://github.com/PyTorchLightning/pytorch-lightning/pull/8229))
+
+
+- Standardized the dataloaders arguments of `trainer.{fit,valdiate,test,tune}` ([#7431](https://github.com/PyTorchLightning/pytorch-lightning/pull/7431))
+
+
+- Deprecated `DataModule` properties: `has_prepared_data`, `has_setup_fit`, `has_setup_validate`, `has_setup_test`, `has_setup_predict`, `has_teardown_fit`, `has_teardown_validate`, `has_teardown_test`, `has_teardown_predict` ([#7657](https://github.com/PyTorchLightning/pytorch-lightning/pull/7657/))
+
+
 - Deprecated `TrainerModelHooksMixin` in favor of `pytorch_lightning.utilities.signature_utils` ([#7422](https://github.com/PyTorchLightning/pytorch-lightning/pull/7422))
 
 
 - Deprecated `num_nodes` and `sync_batchnorm` arguments in `DDPPlugin` and `DDPSpawnPlugin` ([#7026](https://github.com/PyTorchLightning/pytorch-lightning/pull/7026))
 
 
+- Deprecated `self.log(sync_dist_op)` in favor of `self.log(reduce_fx)`. ([#7891](https://github.com/PyTorchLightning/pytorch-lightning/pull/7891))
+
+
+- Deprecated `is_overridden(model=...)` in favor of `is_overridden(instance=...)` ([#7918](https://github.com/PyTorchLightning/pytorch-lightning/pull/7918))
+
+
+- Deprecated automatically detaching returned extras with grads ([#7994](https://github.com/PyTorchLightning/pytorch-lightning/pull/7994))
+
+
+- Deprecated default value of `monitor` argument in EarlyStopping callback to enforce `monitor` as a required argument ([#7907](https://github.com/PyTorchLightning/pytorch-lightning/pull/7907))
+
+
+- Deprecated importing `rank_zero_{warn,deprecation}` directly from `pytorch_lightning.utilities.distributed` ([#8085](https://github.com/PyTorchLightning/pytorch-lightning/pull/8085))
+
+
+- Deprecated the use of `CheckpointConnector.hpc_load()` in favor of `CheckpointConnector.restore()` ([#7652](https://github.com/PyTorchLightning/pytorch-lightning/pull/7652))
+
+
+- Deprecated `DDPPlugin.task_idx` in favor of `DDPPlugin.local_rank` ([#8203](https://github.com/PyTorchLightning/pytorch-lightning/pull/8203))
+
+
+- Deprecated the `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#8025](https://github.com/PyTorchLightning/pytorch-lightning/pull/8025))
+
+
+- Deprecated `mode` parameter in `ModelSummary` in favor of `max_depth` ([#8062](https://github.com/PyTorchLightning/pytorch-lightning/pull/8062))
+
+
 ### Removed
 
 - Removed `ProfilerConnector` ([#7654](https://github.com/PyTorchLightning/pytorch-lightning/pull/7654))
@@ -126,29 +298,139 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed deprecated utils modules `model_utils`, `warning_utils`, `xla_device_utils` and partially `argparse_utils` ([#7503](https://github.com/PyTorchLightning/pytorch-lightning/pull/7503))
 
 
+- Removed `RPCPlugin` and `RPCSequentialPlugin`. If you were successfully using these plugins, please open a GitHub discussion about your use case ([#8101](https://github.com/PyTorchLightning/pytorch-lightning/pull/8101))
+
+
 - Removed deprecated trainer attributes - `on_cpu`, `on_tpu`, `use_tpu`, `on_gpu`, `use_dp`, `use_ddp`, `use_ddp2`, `use_horovod`, `use_single_gpu` ([#7501](https://github.com/PyTorchLightning/pytorch-lightning/pull/7501))
 
 
 ### Fixed
 
+- Fixed `lr_scheduler` checkpointed state by calling `update_lr_schedulers` before saving checkpoints ([#7877](https://github.com/PyTorchLightning/pytorch-lightning/pull/7877))
+
+
 - Fixed ambiguous warning when both overfit and train dataloader shuffling are enabled ([#7685](https://github.com/PyTorchLightning/pytorch-lightning/pull/7685))
 
-- Fixed dataloaders are not reset when tuning the model ([#7566](https://github.com/PyTorchLightning/pytorch-lightning/pull/7566))
 
+- Fixed dev debugger memory growing due to tracking events even when disabled ([#7875](https://github.com/PyTorchLightning/pytorch-lightning/pull/7875))
 
-- Fixed global step update when the epoch is skipped ([#7677](https://github.com/PyTorchLightning/pytorch-lightning/pull/7677))
 
+- Fixed `None` loss keys getting added in `training_epoch_end` when using manual optimization and not returning a loss ([#7772](https://github.com/PyTorchLightning/pytorch-lightning/pull/7772))
 
-- Fixed training loop total batch counter when accumulate grad batches was enabled ([#7692](https://github.com/PyTorchLightning/pytorch-lightning/pull/7692))
 
+- Fixed a bug where `precision=64` with `accelerator='ddp_spawn'` would throw a pickle error ([#6924](https://github.com/PyTorchLightning/pytorch-lightning/pull/6924))
 
-- Fixed broadcasting in multi-node, multi-gpu DDP using torch 1.7 ([#7592](https://github.com/PyTorchLightning/pytorch-lightning/pull/7592))
 
+- Do not override the existing `epoch` value in `logged_metrics` when already logged by the user ([#7982](https://github.com/PyTorchLightning/pytorch-lightning/issues/7982))
 
-- Fixed `ProgressBar` pickling after calling `trainer.predict` ([#7608](https://github.com/PyTorchLightning/pytorch-lightning/pull/7608))
+
+- Support manual optimization with DeepSpeed ([#7970](https://github.com/PyTorchLightning/pytorch-lightning/pull/7970))
+
+
+- Fixed `dataloader_idx` argument value when predicting with only one `DataLoader` ([#7941](https://github.com/PyTorchLightning/pytorch-lightning/pull/7941))
+
+
+- Pass the `stage` argument of `Callback.{setup,teardown}` as a keyword ([#7973](https://github.com/PyTorchLightning/pytorch-lightning/pull/7973))
+
+
+- Fixed metrics generated during `validation sanity checking` are cleaned on end ([#8171](https://github.com/PyTorchLightning/pytorch-lightning/pull/8171))
+
+
+- Fixed `log_gpu_memory` metrics not being added to `logging` when nothing else is logged ([#8174](https://github.com/PyTorchLightning/pytorch-lightning/pull/8174))
+
+
+- Fixed a bug where calling `log` with a `Metric` instance would raise an error if it was a nested attribute of the model ([#8181](https://github.com/PyTorchLightning/pytorch-lightning/pull/8181))
+
+
+- Fixed a bug where using `precision=64` would cause buffers with complex dtype to be cast to real ([#8208](https://github.com/PyTorchLightning/pytorch-lightning/pull/8208))
+
+
+
+## [1.3.8] - 2021-07-01
+
+### Fixed
+
+- Fixed a sync deadlock when checkpointing a `LightningModule` that uses a torchmetrics 0.4 `Metric` ([#8218](https://github.com/PyTorchLightning/pytorch-lightning/pull/8218))
+- Fixed compatibility TorchMetrics v0.4 ([#8206](https://github.com/PyTorchLightning/pytorch-lightning/pull/8206))
+- Added torchelastic check when sanitizing GPUs ([#8095](https://github.com/PyTorchLightning/pytorch-lightning/pull/8095))
+- Fixed a DDP info message that was never shown ([#8111](https://github.com/PyTorchLightning/pytorch-lightning/pull/8111))
+- Fixed metrics deprecation message at module import level ([#8163](https://github.com/PyTorchLightning/pytorch-lightning/pull/8163))
+- Fixed a bug where an infinite recursion would be triggered when using the `BaseFinetuning` callback on a model that contains a `ModuleDict` ([#8170](https://github.com/PyTorchLightning/pytorch-lightning/pull/8170))
+- Added a mechanism to detect `deadlock` for `DDP` when only 1 process trigger an `Exception`. The mechanism will `kill the processes` when it happens ([#8167](https://github.com/PyTorchLightning/pytorch-lightning/pull/8167))
+- Fixed NCCL error when selecting non-consecutive device ids ([#8165](https://github.com/PyTorchLightning/pytorch-lightning/pull/8165))
+- Fixed SWA to also work with `IterableDataset` ([#8172](https://github.com/PyTorchLightning/pytorch-lightning/pull/8172))
 
 
+
+- Fixed a bug where `truncated_bptt_steps` would throw an AttributeError when the target RNN has multiple hidden states ([#8145](https://github.com/PyTorchLightning/pytorch-lightning/pull/8145))
+
+
+- Fixed passing a custom `DDPPlugin` when choosing `accelerator="ddp_cpu"` for the accelerator ([#6208](https://github.com/PyTorchLightning/pytorch-lightning/pull/6208))
+
+
+## [1.3.7] - 2021-06-22
+
+### Fixed
+
+- Fixed a bug where skipping an optimizer while using amp causes amp to trigger an assertion error ([#7975](https://github.com/PyTorchLightning/pytorch-lightning/pull/7975))
+- Fixed deprecation messages not showing due to incorrect stacklevel ([#8002](https://github.com/PyTorchLightning/pytorch-lightning/pull/8002), [#8005](https://github.com/PyTorchLightning/pytorch-lightning/pull/8005))
+- Fixed setting a `DistributedSampler` when using a distributed plugin in a custom accelerator ([#7814](https://github.com/PyTorchLightning/pytorch-lightning/pull/7814))
+- Improved `PyTorchProfiler` chrome traces names ([#8009](https://github.com/PyTorchLightning/pytorch-lightning/pull/8009))
+- Fixed moving the best score to device in `EarlyStopping` callback for TPU devices ([#7959](https://github.com/PyTorchLightning/pytorch-lightning/pull/7959))
+
+
+## [1.3.6] - 2021-06-15
+
+### Fixed
+
+- Fixed logs overwriting issue for remote filesystems ([#7889](https://github.com/PyTorchLightning/pytorch-lightning/pull/7889))
+- Fixed `DataModule.prepare_data` could only be called on the global rank 0 process ([#7945](https://github.com/PyTorchLightning/pytorch-lightning/pull/7945))
+- Fixed setting `worker_init_fn` to seed dataloaders correctly when using DDP ([#7942](https://github.com/PyTorchLightning/pytorch-lightning/pull/7942))
+- Fixed `BaseFinetuning` callback to properly handle parent modules w/ parameters ([#7931](https://github.com/PyTorchLightning/pytorch-lightning/pull/7931))
+- Fixes access to `callback_metrics` in ddp_spawn ([#7916](https://github.com/PyTorchLightning/pytorch-lightning/pull/7916))
+
+
+## [1.3.5] - 2021-06-08
+
+### Added
+
+- Added warning to Training Step output ([#7779](https://github.com/PyTorchLightning/pytorch-lightning/pull/7779))
+
+### Fixed
+
+- Fixed `LearningRateMonitor` and `BackboneFinetuning` ([#7835](https://github.com/PyTorchLightning/pytorch-lightning/pull/7835))
+- Minor improvements to `apply_to_collection` and type signature of `log_dict` ([#7851](https://github.com/PyTorchLightning/pytorch-lightning/pull/7851))
+- Fixed docker versions ([#7834](https://github.com/PyTorchLightning/pytorch-lightning/pull/7834))
+- Fixed sharded training check for fp16 precision ([#7825](https://github.com/PyTorchLightning/pytorch-lightning/pull/7825))
+- Fixed support for torch Module type hints in LightningCLI ([#7807](https://github.com/PyTorchLightning/pytorch-lightning/pull/7807))
+
+### Changed
+
+- Move `training_output` validation to after `train_step_end` ([#7868](https://github.com/PyTorchLightning/pytorch-lightning/pull/7868))
+
+
+## [1.3.4] - 2021-06-01
+
+### Fixed
+
+- Fixed info message when max training time reached ([#7780](https://github.com/PyTorchLightning/pytorch-lightning/pull/7780))
+- Fixed missing `__len__` method to `IndexBatchSamplerWrapper` ([#7681](https://github.com/PyTorchLightning/pytorch-lightning/pull/7681))
+
+
+## [1.3.3] - 2021-05-27
+
+### Changed
+
+- Changed calling of `untoggle_optimizer(opt_idx)` out of the closure function ([#7563](https://github.com/PyTorchLightning/pytorch-lightning/pull/7563))
+
+### Fixed
+
+- Fixed `ProgressBar` pickling after calling `trainer.predict` ([#7608](https://github.com/PyTorchLightning/pytorch-lightning/pull/7608))
+- Fixed broadcasting in multi-node, multi-gpu DDP using torch 1.7 ([#7592](https://github.com/PyTorchLightning/pytorch-lightning/pull/7592))
+- Fixed dataloaders are not reset when tuning the model ([#7566](https://github.com/PyTorchLightning/pytorch-lightning/pull/7566))
 - Fixed print errors in `ProgressBar` when `trainer.fit` is not called ([#7674](https://github.com/PyTorchLightning/pytorch-lightning/pull/7674))
+- Fixed global step update when the epoch is skipped ([#7677](https://github.com/PyTorchLightning/pytorch-lightning/pull/7677))
+- Fixed training loop total batch counter when accumulate grad batches was enabled ([#7692](https://github.com/PyTorchLightning/pytorch-lightning/pull/7692))
 
 
 ## [1.3.2] - 2021-05-18
diff --git a/MANIFEST.in b/MANIFEST.in
index b1e7613831fe8..1b97e27a98abe 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -53,19 +53,23 @@ include pyproject.toml
 # Exclude build configs
 exclude *.yml
 exclude *.yaml
+exclude *.toml
 exclude *.jsonnet
 exclude .yapfignore
 
 # Exclude pyright config
 exclude .pyrightconfig.json
 
+# Exclude submodules
+exclude .gitmodules
+exclude _notebooks
+
 # Exclude Makefile
 exclude Makefile
 
 prune .git
 prune .github
 prune .circleci
-prune notebook*
 prune temp*
 prune test*
 prune benchmark*
diff --git a/Makefile b/Makefile
index 04b08fa2d27d1..34b67fc458131 100644
--- a/Makefile
+++ b/Makefile
@@ -14,6 +14,7 @@ clean:
 	rm -rf .mypy_cache
 	rm -rf .pytest_cache
 	rm -rf ./docs/build
+	rm -rf ./docs/source/notebooks
 	rm -rf ./docs/source/generated
 	rm -rf ./docs/source/*/generated
 	rm -rf ./docs/source/api
diff --git a/README.md b/README.md
index 8da7836fb689e..c0e5c87cbb2b7 100644
--- a/README.md
+++ b/README.md
@@ -74,10 +74,10 @@ Lightning is rigorously tested across multiple GPUs, TPUs CPUs and against major
 
   <center>
 
-  | System / PyTorch ver. | 1.4 (min. req.) | 1.5 | 1.6 | 1.7 | 1.8 (latest) | 1.9 (nightly) |
+  | System / PyTorch ver. | 1.4 (min. req.) | 1.5 | 1.6 | 1.7 | 1.8 (LTS) | 1.9 (latest) |
   | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
   | Conda py3.7 [linux] | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) |
-  | Linux py3.7 [GPUs**] | - | - | [![Build Status](https://dev.azure.com/PytorchLightning/pytorch-lightning/_apis/build/status/PL.pytorch-lightning%20(GPUs)?branchName=master)](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - | - |
+  | Linux py3.7 [GPUs**] | - | - | - | - | [![Build Status](https://dev.azure.com/PytorchLightning/pytorch-lightning/_apis/build/status/PL.pytorch-lightning%20(GPUs)?branchName=master)](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - |
   | Linux py3.{6,7} [TPUs***] | - | - | [![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22TPU+tests%22+branch%3Amaster) | - | [![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22TPU+tests%22+branch%3Amaster) | - |
   | Linux py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - |
   | OSX py3.{6,7,8,9} | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - |
@@ -338,8 +338,7 @@ class LitAutoEncoder(pl.LightningModule):
 ## Examples
 
 ###### Hello world
-- [MNIST hello world](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/01-mnist-hello-world.ipynb)
-- [MNIST on TPUs](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/06-mnist-tpu-training.ipynb)
+- [MNIST hello world](https://pytorch-lightning.readthedocs.io/en/latest/notebooks/lightning_examples/mnist-hello-world.html)
 
 ###### Contrastive Learning
 - [BYOL](https://lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#byol)
@@ -348,8 +347,8 @@ class LitAutoEncoder(pl.LightningModule):
 - [SIMCLR](https://lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#simclr)
 
 ###### NLP
-- [BERT](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/04-transformers-text-classification.ipynb)
 - [GPT-2](https://lightning-bolts.readthedocs.io/en/latest/convolutional.html#gpt-2)
+- [BERT](https://pytorch-lightning.readthedocs.io/en/latest/notebooks/lightning_examples/text-transformers.html)
 
 
 ###### Reinforcement Learning
@@ -358,7 +357,7 @@ class LitAutoEncoder(pl.LightningModule):
 - [Reinforce](https://lightning-bolts.readthedocs.io/en/latest/reinforce_learn.html#reinforce)
 
 ###### Vision
-- [GAN](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/03-basic-gan.ipynb)
+- [GAN](https://pytorch-lightning.readthedocs.io/en/latest/notebooks/lightning_examples/basic-gan.html)
 
 ###### Classic ML
 - [Logistic Regression](https://lightning-bolts.readthedocs.io/en/latest/classic_ml.html#logistic-regression)
@@ -370,7 +369,9 @@ class LitAutoEncoder(pl.LightningModule):
 
 The lightning community is maintained by
 - [10+ core contributors](https://pytorch-lightning.readthedocs.io/en/latest/governance.html) who are all a mix of professional engineers, Research Scientists, and Ph.D. students from top AI labs.
-- 400+ community contributors.
+- 480+ active community contributors.
+
+Want to help us build Lightning and reduce boilerplate for thousands of researchers? [Learn how to make your first contribution here](https://devblog.pytorchlightning.ai/quick-contribution-guide-86d977171b3a)
 
 Lightning is also part of the [PyTorch ecosystem](https://pytorch.org/ecosystem/) which requires projects to have solid testing, documentation and support.
 
diff --git a/_notebooks b/_notebooks
new file mode 160000
index 0000000000000..29aea106edefc
--- /dev/null
+++ b/_notebooks
@@ -0,0 +1 @@
+Subproject commit 29aea106edefc9d1904c0c17223a8ac2b15c48e7
diff --git a/benchmarks/test_basic_parity.py b/benchmarks/test_basic_parity.py
index 53f303693ffdb..bf2ddae2c0084 100644
--- a/benchmarks/test_basic_parity.py
+++ b/benchmarks/test_basic_parity.py
@@ -174,4 +174,4 @@ def lightning_loop(cls_model, idx, device_type: str = 'cuda', num_epochs=10):
     )
     trainer.fit(model)
 
-    return trainer.train_loop.running_loss.last().item(), _hook_memory()
+    return trainer.fit_loop.running_loss.last().item(), _hook_memory()
diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile
index e16971bdc2a1a..5c15e096cfb4b 100644
--- a/dockers/base-cuda/Dockerfile
+++ b/dockers/base-cuda/Dockerfile
@@ -118,8 +118,7 @@ RUN \
 
 RUN \
     # install DeepSpeed
-    # TODO(@SeanNaren): CI failing with `>=0.3.15` - skipping to unblock
-    pip install deepspeed==0.3.14
+    pip install deepspeed==0.4.0
 
 RUN \
     # Show what we have
diff --git a/dockers/nvidia/Dockerfile b/dockers/nvidia/Dockerfile
index fbfd2224a66a9..c0fad8fba5124 100644
--- a/dockers/nvidia/Dockerfile
+++ b/dockers/nvidia/Dockerfile
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes
-FROM nvcr.io/nvidia/pytorch:21.04-py3
+FROM nvcr.io/nvidia/pytorch:21.06-py3
 
 LABEL maintainer="PyTorchLightning <https://github.com/PyTorchLightning>"
 
@@ -39,14 +39,16 @@ RUN \
 
 # Installations
     python -c "fname = './pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \
+    pip install "Pillow>=8.2, !=8.3.0" "cryptography>=3.4" "py>=1.10" --no-cache-dir --upgrade-strategy only-if-needed && \
     pip install -r ./pytorch-lightning/requirements/extra.txt --no-cache-dir --upgrade-strategy only-if-needed && \
     pip install -r ./pytorch-lightning/requirements/examples.txt --no-cache-dir --upgrade-strategy only-if-needed && \
     pip install ./pytorch-lightning --no-cache-dir && \
-    pip install "Pillow>=8.1" --no-cache-dir --upgrade-strategy only-if-needed && \
     rm -rf pytorch-lightning && \
+    pip install jupyterlab[all] -U && \
     pip list
 
-RUN pip install lightning-grid -U
+RUN pip install lightning-grid -U && \
+    pip install "py>=1.10" "protobuf>=3.15.6" --upgrade-strategy only-if-needed
 
 ENV PYTHONPATH="/workspace"
 
diff --git a/dockers/tpu-tests/tpu_test_cases.jsonnet b/dockers/tpu-tests/tpu_test_cases.jsonnet
index 13f70deed43ca..e4b3db9cac53e 100644
--- a/dockers/tpu-tests/tpu_test_cases.jsonnet
+++ b/dockers/tpu-tests/tpu_test_cases.jsonnet
@@ -22,6 +22,7 @@ local tputests = base.BaseTest {
     |||
       cd pytorch-lightning
       coverage run --source=pytorch_lightning -m pytest -v --capture=no \
+          tests/profiler/test_xla_profiler.py \
           pytorch_lightning/utilities/xla_device.py \
           tests/accelerators/test_tpu_backend.py \
           tests/models/test_tpu.py
diff --git a/docs/source/_static/images/accelerator/ipus/profiler.png b/docs/source/_static/images/accelerator/ipus/profiler.png
new file mode 100644
index 0000000000000000000000000000000000000000..cbed276a4f515449111e6957e2526ee6a0ae14b0
GIT binary patch
literal 129635
zcmcG#2Ut_lwk{eg3L;V!M0)Q{dJ*ZpgGiN*^d6*(lz@PMbO_QrNEZk_0hB7e1`NH1
z(0guB_de&{yZ5>0z3;sBeaTAJoNJ7K{&UQ+#uzgJsjJH2Vv}J5003MCd1+0|2>}3L
z72dmrxdIihv;qKbo~bKq%b+pGE^2OW?&|XL{QUg%^z7o|a({pS@bJ*o4ATp+yt;jS
ze2hY&PEJnHPTq6#E1YU-MMXtPNlDoNoQ0pm78VxB-Q7LJB6@ExARu7(7oviL^930F
zYj|XNd6|ow_t!6ksp*@;#YJ0NTXPGWkqA&$cJ9*X*gM|<w7Wlgcyx7b9le8WgZ3q*
zq@jZ&Ev?^9O-(~oRo~h>p=aR^jxHN8D0%?_eCONT(i$J1h+f;+*+E*{Iv|#oK1Rhv
zL`J2iWe^e)1_phI`4o>140mz$L~m}lwzZ>|R-9bi-hT-B{{08Kw|{GUd+6uS;*zr7
zzW(*~jfTdi@$rfIg#~mQ6blQhzP<sSkTx?r3x##%7Zfe7tb2I*Y;JCSx3@!A*W=>i
zqVr48pOVSR$<g7Ts%vUHIyygugeMmiz~S@gto)IYQMAtoG%yHV4?(AXLBqN?Ha62U
zGAq7Tx_JQS3k!1d^7}A-N~_S3@fJQlm6g>@OicX)gW;e^1qFrY+S=&GR&-Bq)=eB5
z8k(~53L!l`F>&$G@CfhdXz4d^&__q$%&gGlWB~y||F~FbX_>L9S#)zpSwjN^(sJ+%
zv9hv?&d9O$_n(<x5H~c?*ME)vT8kd|IW#_jcJ=8oFhcu>YGbra&!BS))zs8_27h*S
z_n?z9&?l#d%PZ)@a`f1QuCp^cJ3Bf$v8uV*Ej%1OK4s|Pp=f1Q{)95UxMU&fle(Q9
zIzAP`C*<kri9SC^*EW<sA}zR2_^qQeE<3xUx37_!rLd+p3Jh-U?)IohI|K)BW#_*E
z0(A<}WAERu?xCwE&?BqpGZYHiGnhAoE@xm`-P~+?DK(drj5<cwiikoN(C}?^Vh=j9
z9lg;AK@JTnW}{apr?$UVSyZ6+r>B4Lh<%q;>eSJjmen}w`5w@OZc>oXtEyhAsyrMW
z9ddTb{`&PxS^0ika&~#eR(IE+xA(H1>AaK2LR$JlcJ`i?)3ASl)+>c`z>Q-%1!+lb
z;Plo6H$DJx4?xX3DN#yli2ib?b7BW${zG4Yn0L|$ECAqB*8l9sZun(W4StskIl5z+
z@5}u`uMMUh)j9eANg*2%iEq~1bQZ1zj`t)DN82k6UV!AgnCfm@qjAC#cMdtqqUyK@
z6C%CAOP8<7MQKF`PkkjQ2PrW~0bjY&S&aM1`aorsk3;kx005thz@H=(&Ewy-DV!rM
z6r=ZJrC=qu-DlWo4Bl{JS3Cg#m{iQvi|Lkfx6_raGZ&-17zGn8SJhFam1FW;k2S`~
zdX0&Uxd8z6#OLO9=6iAq*}$3PTJ`v&4;I3i<N`yWyH!j1dXUu0_F2zJrmFldV<JOt
zFzOcO2I%A~_3X)1y2-z?Q-w9O5@!!0tW|Wl<aErZB3~AZcHIJeVe>Gz0n$fC6=9|A
z7{;$}xfyC1(70w<?<q${2s9%|2}kQmNYpGI_-FuO0Kf}of@*fAsXB%CDqhv9`LW#E
z5WPYmblICXv>6!TLjnk#F!K)AYnhyM*6J;dXC{t`s3$4}YKsB@Rw0?IsliN3?s703
z0P$b|HRa8nLd>+Dgcqe!&v_f822vY(yCR7>8%1has~!{ruW(KuId^g01hgqBk3Co)
zKt|LTwWhbKqw0qJbml~a6JT<%eUqNBK;BS{{TQ1STgiL*>p>I?5=0uohN!zfjT2%U
zJ0-^7J8}@GoJ&$|&pPh`0F>N1ytyK+tl&;0!ePhGM@M>`GwGK^juz_T@gDWU$2YeD
zfl0~5q{BwL#1%PGqz%I@Y{D#H^h@#E*rrcz02Zk8--fzB_JwUq7$+6J)_2wxMThv_
z`(}RKM*Fu-<kW4h-BCq4dZdP?K{C}OARgXpJH2hRZH2rP-@?PA{Z425uKp@#Obt&@
z1|iurW9KB+qPu#OJt6dI)^R%Vtp6)*=12_-t-yNTw`>G^{3pC1nkTXtvcj&(IyFL(
zU(NndA6y36jWsT{*l_y9Z=CA&`^D$4MC&d|N*ylf@?Kh0SEO9peV2B4M(WogC010U
zZLnvhhNOR-xt8?}RfQ;n_#LAz!{iOi_&t|WGP;s+PXnGtaf|TCl*RTqp*pdkixq^u
z7sRJ8&-h+LJX~~7yEVI`*qJ3Q?Jw)dbiDC9*BE`QcT;<ayHS+S`cLj8WUNqa(E}gh
zTR^E^(xjHs$;-b_wV|SW8?Er_=yMd^(5J6SY%}yqv5A_;pK^qVU%&oAF*BSvoHm5c
z4m+YOL>|lLSjA<`IVxi#Z#7w^AA_Czd4-XkK2G45S?Wwg$M_z4+pbBeOFo*@>Fz>~
z`p0go9tCSh;?>WZdu0gyxjznG`9>oA&|4*$!DSU6ttCk31K^_Qu}9fm5yva1cu`TU
znNR0ixW(<tE|0o^>j^qs#+T$a{#H-I5)poHFRli&l24CiY1?DE+&G1n;mlxTg_obE
zyjZ+P<bOV5K8VddW(*R)D<NypO0eu<QPe93P!wCf%_ZqI*yI~aA*q?^^?IL)hp+GM
z+k+d)fPo$aHOHFOXute5`Us!!#uCp45fzqEYI&08Iv)v|>^pt&w6YaIFdbVQxXJMB
zLhC2a@zkp1_B$A_u}k+@n(E7khpdZvdBDMj1*Zp6N8c|`m0SEdGvZp}g(Q%4qKuLw
zT>cF0dS)@^J1Gc1ZLECFf~k97YYyQNkHG{04^)*jaxk5V3QPI^!$Lgvn}ah%NExSY
zv?eMp!@Qh6NcvQqi2_>@MT8ylGV4TQajCp`y+p%^_UQ%1<L0NS-EI!z$d8$4E`EWM
zFV94Xe@)cJ*6&<czu@p71K*-4aS!3r-Kj@x(@r{&?#%x(6di8yHos@w`op9EICxRD
zP@ehxL1$-#HSrqH$BL#-@c{blgZg>?vM~3nH_Y&Px>H5*Hr}^S?3O;C=L6SoZ)h@M
zE9AlrP){CnDNw#=3^ZDI(t@bpT`y<uLdQht(Y23rY#hHtxl-f-uK}xH#CV?@>DZ<A
zHAE#m?_-*ieqEqssFUSlbw2I88m0$7spv}Jf4Y=BmS|fycuDIxXNf<*(#dA6emu<8
zB*>86wsc$QO^wH3By}nX<`-%B^1(#kd=vteaCT}v??s24iaCnt<{TdNrIUXuE5%|Q
znVd8y-k{g9a3&YjQo}P~c!WRj%EP_SPYdEbDn@tz=1+7j0#?oBF~{ifG$JWqa<4uc
z`1$Vp;}@tn;D$OX9ZQ+{k10QVl7MI!5b>O_=byGx3ebburFw7tvZaKV0(E#_1h1_;
zxMYMl6hw4qI`OB5S@rU2s!RjGpK<}|Q!McF#lweA0X>aQouS(zTA~cQ6H1HT`>EY!
zkM?s=AD_fHi|9hEyp}^K8}+ZKy2j3y3-l_k)>z=x+;KMZk^bV_dHa^*$P9$aZh|#;
z<|OrALiAkTWVC87ab5Ld!DBL$#Jn0O;WT#drU*TzXeTw}9AJDNv6+=(HxZk|H;#_Q
z0t34H<<^;rN9io^@w1W1qq`nV6}M*SGo2eW1$9!!9f*)mnxN!w8cKj6m!s_u^$d{O
zb`BzS&9b>ji%bq1sQp&=fMUt8QR|oGt+S*&&D7)&g2Tm5=0mrLwlXFP_UxgOs6zdB
z!hIU1kjDj!Q`@eY2!{kkJA|vD_*0YlO%`wfbvjFVxcO|X-0`;>h4-4@*=g_%V{|xb
z(l|a6e;FzO`o$tP+gp7f&0NLVaQ$$?N!Ap-HW{XJ>(K}S^EwD(%+Hacr3CTph}Af7
z-%fn~`RVMoTiRap0cB6U%4)=_K!1%*Z}Mu6wy49yqLygTr<l8BT|YISsVPbRG;L|f
z%X_A$`HQn$`njP=p1AJjFT5h4+!xk`wTHwpAaMa>KPuRM>pKT7Bbd+JMCI|U68mCa
zB)Kb+2uTz<+L_~G0{6vkV<>9@@G`dGFTN_Iy}fj^6j-fqyLS|!C)=e4ZBWdBQVvF>
zQzyn%jyw{!@Jwfhdy>EB<@|Z$w&Y%DJDaMpj;P*E%>f$H3T;sn?Dx}m4SMbGzi0IE
zK<&LG>>vuJGA4Km>U)eBQT^&<s~UHgDN8<ysM1fQ8CY`9o6}CJCOz;AVr5~x9a>Y4
zShkao!jH=MB88A=fxA3>|F}e2-&>FyI_K+OAWi3aBAUJ?zyv0d8Zc*uJLh(}K%fSX
z`JOD--&Q_EFkkzRUxBeq#;y4cDkJE5V&s`GULSkh!K&@B&!S_pRc{00dEShz!wBz7
zSnn;nQ5md!+b?e*HzgJNU`>^I%l_2I$|F?k^5vYvKWDYC_=(gE3|_q&anwK^AaPfw
zBqo@_;EAGM)>xDj6BsG7c&HYpGq-=N1KSrD@XvCJ%Isl2Rz<0DDFxq-ZL@coP{Z%5
zxXmANU;E@G&hn3GUF;@v1VVbveNo{7(<BR6xARUIKgFI0K^OwZe-J;;VD^Wa<Jq1G
zv=SkW;&?P2eU~FH96wLuCPT2J@CysZv*mZU-SmiKz&~PBd3)ZL0;dU9et<!@EcQBH
z^9ZJ~rBkKMu&b2@vwDXv$xt;u>!|rutj?h6%hv!(&7&hGTZ(M)VqyB}zyvNKZ)Y|d
z55>N>U9r*aLH<aC>Vx8xm+xJ2gNUT^`*Va3MRmp*SKAU|tUeiy^tu%`+1^n?<kAK4
z95+6;q~o_5mlAALKpn`4TEd#hid^V6(o9G^V|VdnPgeqq8veLNK~sx}3I1dGVL`l4
zFDm%SbdZ6?dBIUu?jPx{B%b0{evPZ2<`F0sR?tneYI;qtSNk^8sdO$eV9hRLOWdr+
zb4Dia>frL5w8QMqTX;Us*Jhn(x{yknY7~{zv#_0judx&F#2c+GeYjr2SZ_IQXvigE
zRtDJg_NHW}I^?OrYZ<mXmv%CyF?UfiB#b>~X50i{4AdcO9Ku_&PHL#^P?&XHL^o*u
z!Q~v!gP1&DWC<{Ulyh0~lBX1yzNE5{f=}N)D*!9W0?ME)np!?KD)ILf<RWA&1NQk@
z;qTMCy-Z7h?sPDh8_cSZ&gJmqY?Qisqru#zCv2bos{);C)@(-PYJXs---Fx=7ULUD
z!~NKrDB-HEh=9=KuOzA&SZ@(MRfXYd5fucW5BDiGZsKdO2i&Cu1@}|SOR-Uv#=gKY
zPZh60gdm=k)>6K!B~6RV{48hl-iDH!pzqg5Vzno5ttj(E6n<>eKtw&OTDH<;w!p&-
z8(f9-Juogl=&P@y<`M&pmZJE3gaCPw3-lP8#2B>P&NTmo>+l=#Z#P8FB@DAm4!f0@
zC&qPWdo2&qK$ic7zS&-7XFeOkT<D1#F;DD<+S-?4C-pJqQHi?U-!cWfT=zMdF?1ZW
z0$T0|rG*l6&hy_{H3s`Uwohp;|IR%^M#K_9hVkp>u^bA&eyVrA8h{Z-c$q70QvaCc
zj-Sn?K4{1#?ntqK%GG}X23ffBX26vhZq?@liNq`hj4;o-mlN?9PEl&0iD4Xq{kd}%
zii+^IH2w&^=O5>z=D%}emKedw``gR$@sFRa=%dC@Qd2JEcO;{U)fz{6tYEMd>ULqx
zqlo&DPin?>G%hNOthY&{BAze?i3Bq`=|L=|`3OG5ZnJ<lT_pW=@BJ)^5J;4M9d75U
z%mOdZBaRO=c-3oOsnOk%E}cq1AJ)XmvlVVr8o$8YWJij(j-4hrB7MdidnLtcoYX){
zQy%p|R<mhqxmRL}ww>9m6d1&;K|NlJeP8h<%J*Jg*}e{ADy&Ly)?q6`EHX75nxy+v
zfQ15-m0Akyj3ukLcf4=%K=w30u!!+3%OC%R&tm+=q3)?qdG7-&pRw(Vx_*fE^Sb)Z
zz}^wQ8GruVBd~!}YJPVfWvWhd+KAS3gGM<fuwP5)_p><unHx#<(}@P3k;i3M2PD&3
zQ$t3liMB|~&WKfq$fY2u>O2>fnJBBArd~cvo#Vdod&Cv*^MXiO@vxqD`CZSE9f^MZ
zq?8`2kAC7+bG)1ITqq)s$A`RLdE?<K;?xAY!km98+moa71Xk60tOHRiCBG8rj9^sJ
zWNTSS_9*~9Pmf)=sT@Z<C`HopJtUZ!?MDNU?nKjG#XS19NOG-~Xk9G9PnI~*VqV!k
z8U`Ek{SoEcBVw86NA^U>dumOoUoDs^7EPK2{o1hwN`dYS;wRi4bi|yXA2kb+j~G7G
zu0dBCrZ&w*V*?^T%24Zc59>@8SJmCZ;e4qdRb#*QUL1nZ5v7O`*GiVNiGNp{(lyE#
zUv=?n=AGsb4n<g`9MlC{bF#8Soe)c%k%6<+j?5Y%e`wL8ly<MWwIQ#YzUPYXRYU<N
zP4Dj|LsX{5jw^F|7FZ-h_&Yd#f;)gLZsGUJc<yIJILwYe%%)@T+`xP^i`o4L?NN2~
z^X(NxC1Pqhr*1v|sU(WPK2lcTaREMWy7AtN;*8d9Wz;}FE-JgW56g_wCpW|~RXWik
zS{)UwUZ7-_{LKEzF*7`hy$pwnSE5<Y6X;181EOF|6m@?Zu^RCh#N1`%l@DZdo!<j?
zP@0gL*t$A2O|Vy9o}KzIQ=rkxhHU{?j+4yrtT(<DvF1A&h>TZaIzg^sv_-)CX2S_G
z?mhYudQ?GrHp6_HP8txLK9BQ+d<{CQhpY#W#8@ow-!JsYqRb`>%3TkM2Ajo09!6qf
z01`1BVr50x{z9BN_pPX0MA-omXBZE+oEdA7>4Y!N(%=VQi%vzRmFSQ}CnqCO>ZHim
z75v2RZ>o)zECC;K5vWuBVT**!PgGH<f^~#ud~b}rmwG5o2pD70hojLZ+iJnuqQWFW
zgb@NnMJ`GK=1jP<*L-{YNd}$}WB4xLXkfPaCjG8ArKXP5REH3*oxs85Zl7?xFI{g(
zJjjtbPsS)@PI0C%^i+JE+KQ<e_NIX3eLgMB%_GdxF@=2xwka^RljxKiQ7^>RX8TYr
zjI8T2!pl!7a?LDuCaBvY_*llwoEFuggo%?SH6+`)-+L7T502eBftPb$6}XF=fcwRR
z!d6FzZi_R={<Df_d8mtHH8k+(OiKioGeJ{(P4+YDrF@QVOiDv{Z#351y%8c{%M8c9
zzt>~)kZ3UD#pk#8st`Jj(>0P!g0Cv?#mRg~sndXPSuaP7HANKk)AD(JB^-TO7g4}Y
zQ?~4ysx2z;^{ZK|RTil(q-ZC^0H2;-*gM$)YNNGPzbM%9L?VAiOH};Irh;Ea^mQc@
zp-NX}@-)YqTq1(lPvJEE2mR&)hx;%CtEZn8TNwSN8L*j#Z?c$!KK2@;N=LYIfkd6z
z0}&IHMM0F{b|#ajT(t)czmwY>=&6woj;J7?_u+@osX`w1>bFI&XY6%Jv&?647#nm^
zTYKw?_1@F@-r_l0kKe@gslNk_KQn6BkVpA;Y~A)p+3ILyQNVkRA>%fgg+dBO2M_rF
zL3c^8Zn-qTSCapP(vm0U!d)cdaobI9oO`o*2#o_R<Kv5d0WyWuO!vCXR@dfwgvPFc
zMu*;*$rcfE7Dz(1Ix7O3xmCAKj0E8MZMm2=d1a}XC6O~xWV{jSdG?_h`WiwTuTPZ0
z;RlzPsBjljce|6)U4^KNkbCo)L4&#os4W-NILQJRRfOiMZz&L`m9o;uO=*S`t?3dw
z5%e6tt>)$WDDu9PLFBBfjPb28l{ht?;R<m?{kTUx)xzWXpPF|pr#m}&%q&WPM{cMk
zbroI6l|1g%?tvV}V3}cBOc)PgHY(U!QeM}O)iM2!FR!?vPifEHQT7=oR&5CcntZ7T
z8m>K1c)dcxEwxS$E>F5c&TL$hr1**kxryW?ZwhVzx_1PZ(Ggo*1miA>Ux%bJWO|qQ
zG!|hZ39cjT%oNM2eC0a|I|CE2eStKNjn0vKjq4J>YzJ&C1`E0hmeS7)fbO2_z}J-~
zsFkm(@3{y{ftU(+5G`z`&9Iv_7eVj7ImLXz_DEddUAZo>36T(c|J9}}38)LmVsvMp
zE>dbp6~%x;0AiXVlB$l>tY~f+z(io_>HB2e@#3%?{ZflKb9eUo5+LzXp<Wu*OA}Wj
zD>u2N8{?%#-<Xw9HsF{yau<W%c61ck-u4MKy)u0RAQ{x#9<!+igg!h*kRz044Bg2~
zv}~syQGlsNV29k2Edd%tF>#BA#;r~+v%43)O|_NXC%GCB(P4}bNUsP0=8Dta4doBF
z^@x0;$cgf5W<$!Oj=o`!FY<;RY0MnVw-Y|Mz4eOmyT!fpPF!=x&sB)I(P2e79)qf{
z-()A8?IP+y*{}Q>!uv(^AR2K2R^-D?&PbxcXE!=5B{cjS_9uq(5d8@0x8!Lf-wYYE
z$OrobHAPcjiY>)QbYH6aJh$JYz3epiZDxQtX^AF+y(MBjKR{UqNzTJ<3|0)40F+bA
z;6$k^StA%u=*jzoiQS%-$7*T_`Axb<siPIZ-ol)hw0fd?ScA%U1oL{QbH8rJ2c^@o
z56rW(<MbURiVxmXn%KN}T7@qc)tXSNgD-D)qvIWUOzvQ}7_t!f`Oa6NCq>k-&6ujc
zspAf=Q&Y*^{_(t=+PivD<~oEVi904QAPP4p`rsZB)@g$?X{Ox`Y5D7QP$fp$e9HVL
z)zgo(u#qN;b^N!Jt!h7(3MXLZ5icV2YK5`iC_Ht1GQu{VhnSljPVLa@e{KC{DMAjb
zFe78k`32=`RQaMY6WGviQ0Q0<C9ihlo=hBofPPyOl^B$lgWynoIRpzyY~Q*9GHFv;
zN7O%z4JWkT{R&L{Y@{6`NR#0FFw5Na3y@G@v;#&@H@@@)rfgyUJovSxpSV`Z8<;9e
z{zO9!WkU&cCsQ(Y88b$$(R|25NaErhiWecif4CQIh<exe*01)vSLtEt$s_W1F2sJG
z^X=>kFma3&c_wvg&%zQPNyJw~!|JD=or1t;1mry})0Whs$YS8TXyOV9&0q=9OAXyW
z#s<6m<=~~0X?r#0<fMb5>wj3T%i3SeaAnt%yPcSpJ$}Vb?Y<fuF;PW?d8k+k8Raxv
zi}l00E1EqvLx+#ycCJ;4w6Ldk2E=b8rg53!51jVgEM%t}a~uG*r}FpYz03JOcHWc0
z1b&Y8w#0$3>R@JzA6{R8+R1~<D>w&1#7Mct*Z#R8)?4%y-xtz-3W3tyMUMhc3}qPO
zBD$4vHcOm_aoECf9GVG`_Qs#2U`k=Has^RVH+zVl6G*E;!`KtPgAlP!q7Uh`Vib12
zJyg~bqOqu16PM}JMe%=_IV6g~)>?aHkY`5}^Ii$THRw*JE{eTKJr{+br!9KVul{sU
z-*D6=Xm0-n)M<y-i~MIf!j_Y(0AG&b^J`03PUYq8M*;3XaSFq;IBuSlN62-T0xx69
zbRoS=tQhxpWr=irmwp?;6)547AHwj$SLk5Uc~|x-zYA!m+YID6tOaowiKRxNYThj@
z=p)G+oS_xJ^pf%DaI{Bdq(>Hp{QSdkGf3;UZzrSU<6cNx#9-pIq|uUqS7}LaSad9_
zQ^qy=%P7RT`srEE)d0$_Q#bV+jP_xLEQs3euvfZFxjNE@Nx?VdT($k5A?2Ei9dMKh
zZKsE9KrRh>`6~J*Ya_C{9%roSGQl#U_7rT^A?GpLN^iE(;puZMw8qwXhbNFTBQOV-
zJJgO`7(pEsKnp`3A5L384rm_;$5PIo!3i;0hdEH@ZM+!Ki;(m?6+ph-+W+#}{^r*D
zZbS@xltz2f&G$~>+;+yxfSrUPsicMN@@@TuL98g?+-UWRh|jMXK$-Rd%L{WanZ*~W
zDH~u*Sy9YZUgU*~j!sc{@+47!CsXOiCEHY`{WZ4_G*e^HaMZwg<l{79*F2h2l3Gys
z_XNg#zgBgArrWSy3&KeI&G5wRz0H#v>;R3!UsY$9dpqF;X50$N;nI)KooV7eq&&e2
zF$^u;aX+AOzfnWd)&BahsAk1U3ikmyO5Bj^94NFQ=(WA*G)abP<ikf+K+=tzqL)xV
zy^^A3@R0<+MDg+QalaMIrr_hp3o?195FvzG<erzhGSg8EG*lAtYXO8=U#FqXr47}T
z&;6sJV>=+Cz2}OczMnQ-0xkn;uJL{KaR(rvdKNzSl)eGwz{hZ?_}yqAE-Ut8OJ;MU
z%fb6JlULofP2~7a6p$vj{k{m=QxoO%vGXY(F2rGXxSX`KMW%nr1{7~dy*vlSyDg}-
zrSeQ~Ti{lR>=)GeC6CJ{|HOQ~3Y{bAJVM$ytDsGz6=~OE-4}lB7wM7iuoVRqh)70v
zMPtOW4In>&V-V89;^~YuG}(&=Iz|cxacMEC3U$R!Z6BT0Q(F<-h$*|C@Z<FZ{4<{h
z-^&}d1qEtcJnfy!&G9bj^4($k($doIA4=Z7<@3+qyFJGr=M$(3Q+8HIm9&?LC}Eq+
zLh!%&&rRFvJ`s4HNVtCD3=R(uk9O7>vkK01fJq5tL@rf^ySTi9=aua|LCLlu!sX<g
z@z)}g?qUo<WU8jvQDrFZv~itFfT3xlgT=i3x>q3npuc&`y#tuiEqwggjMCRS1=tdr
zS|3M~OGGdVPr9O?rN4M^OY#xlJ#@$|Oxc?Jhbi7ga!@hRgR*C7z|&M(E_z(Rr|%{2
zzU?SOSQw03R_-Y)+yYqa6;qUp5LQHhk}8C5Y7GMbDUjl@%>ihM8K%+}-S(Le@PeUY
z<5wdusyO-om6#*r$CL*B6B!`AHfZkzc@VQLAW#|E!YS^FtUrFsjlvd<2@v=-4sU^(
zB=P|+CaZI_?!TdWkn^7PUL+5$zLaqCRqwa+miK!1N=XNpsZqbqaSw(O$!lBT{PuGh
z2}RL9shgmrO4W-YvULu}%qM^$7zG&h&Vc6V&E`(7vd8SAp&EqU^=l?hZ69e`(>Fge
zzza(D;jYcM-)%Y*kMvVEzI8W%fm*WVQhx6e`s`#IBYhZGR`CGt2hoBgmp{Kl$nlXH
zO6R2*QMWG55zRa&S-uUCGmLiu?j!HZr+e})jq-(eyu}HT)BKYKdJ2v^2=GB89>8x~
zCBQx2f%02TYp-^<*iUQ34a~-9FbK0ftRDLR`Mr(u+=J@+ml<3=^Jf$$`nT<wC;))A
z&}-rU^B*7mKb1lMe^T=g_{Li^l5cZg5C#6K-y=a@dAU%REy!*W-1ed{OMh5-pFKW^
z4%eZ@X}LPLxDNdi00<oUbiL1BK#jt@t4dvSz_~$?_6F!9{rhw2(i4q-H)3w9`*awR
zb{jXgXgN}n!o@dVTsK!JArvtCybFAEU?_%BRWr5}{RtH-t%(%<u0I_Li}%}K-yy&Q
z0DxljZtD`Sc$CPCi#Og}yctuuz`LHq3(v9i<;t@amWKx^-k>?lr!yz|M)1DgX;{H;
zm1!PE6FXx^S##44>s+WayhUCDC-{;TDT44?>IqB{4h@Dd%Yi1K<=(0YBXiywm2n+k
z9p3uuX2Al_D?NnRxO%3_3b3MLMF><c5rxuGQM^VCZCr$IB3@)3rf#0|S)%)(k@S(*
z!#L@JZXTH&hYE%HAfob&$Nj_8cs#hvmFCcwHJh83Y4b*B4p|?O1#0xgo*8Dz`I}dc
z!pe~6o0E|3Z;8ErVNZ_5ko75bBglz{#51xHzEvI-8N`i%$tIsMSu>^wodr|OzV~tF
zFXtn%sqg3Gp9S68SfTNZmHh!Uf51_-u;deQoAu<QR*Q`+1ICY?<r^!DR!-*a3tBK3
z^R3yZ`dH?*j_+>1sPA1M&Tv_M)I{1A&PUF=Ou=aUbkjhx;5?nH!ZF)@$G6PiVU8Xk
z$yIZ#1W9{@dHCS}9NafBj#DZ-azRYJR`UN06xm`c=Og5N{R(L&_{Ez~N9w+iehqfK
ztDse(&X>^S=8#5wo7XN|l63vsl2{+)K&$TV*+Dx(D>qNv-b>!+Mh4EE1>}7d?{)_)
zD?^IwPMDbWM?c>rjKTQn@bH$bvkU3|GbU7a^v&L{JcJc(U2b1uTUWB!9C2+6z2x%0
zg__?cb0?ax(in*3^Zo>Eh>FbB52PSFuV{G0^>hg8MlgVoo;zWqxW<m(oRa2y$pO|T
zr`<eC0g}oO!(t6cWQA`*?dqpc@R-T{K$FhxqlS)~&ZDnm?oM)D8Cuv(o`0xSe|JDn
z)oZ$<<FMqyih1HlOCLBoqJQa&-WQwU#}ObLloB9)ZSrbxQ-^Pz;2_h7>*<ese2h|~
zSMVVbhx>w41k3)?lFgZ)g1Ed0nr_=K%JzFnXv7P=CAbFlQ;!ZlE2hMS8QqKi@7ab@
z#T|y`tT!}N7iS*(nzSC7%ABq)o_=N-Et6as9ok{E56s*aOyi6}SRk%%$Nw3#`wrp#
z3O~4p0NDmZ(V;vGmG5{CG&*+3$Xs1C0}axGJq_19I_!l)l44er78fBte{aNdkDmbg
zdCo?hb%P!gx3@d8qF6o}AmX7<CBhnAUAP6Iojt2i1G%Dbtl6-ZFFec_bKs0y|Ck&2
zDCN-(15ASod*P%r!jjuv!SiLO)U40@yWjB*?{NfUIj>tn;&8Zw?CB~cos*=Lb;Mc3
z2)kADea_5*67RXYhNrK%fBFzsq_yN&34noLDBS{|B|~RSaUT<c>ZN9l-jz4efv1W{
zokbFXD?7;|(7PYa_iK0etT=sUTc6BcQG(<oM(I90dU=of1H`e}xI&rNh%cP=koY4b
zc%YOlO$kxP88%s#27)d&H*}02d=q8_b1zWXiKha6+k(>2Gtc-o=eSXINz(Rs?Ng~A
z%7O@Dehd55+7;+40BLP`J&|Hy5zRpHpioa4;gVprf^?q&L4$FRwolWhTNEJ)4eo+_
zhL91_Cb!#lhwH*$iaKs&s?GG<>$ZB#1<utgi(X@U^io{rKr*E#xxdY7)<cl6nl!Dy
z<;4g8pq9%U0a;Sy{H;&iZB*#N4X&ZzggqwReBOx)Wh#n_lV>_k?BGv0CjcE{CO^`H
z>x3TK+L0fK_wS?9h>tG6=uKmvueg^Fj3b}x?iwJrU2K?9zt!Dq_}PK)92G2P4yYsS
z*6iwtAO6*5)n`eS%+6E9|BVLwf=2#wc5ejAI#xgjsupxbl-{e(W#W+of<MJBN^4l@
zdz<RFquA^BM%DIZz*soXXNT>IM9e>K#^2*0r+y|^IF`bOtxn!mwH6cvaCUH(W-~jN
z{N_R<>F4WgpVa?ikktO}%ZoS{@^4EDudoLbb~V>Gj&3`*dsq-jKqJ1U_!->cIXp{;
zE)pA0?SF?>pMx_A2WRUo3rDlz?b--QE$Z<x=a!^M1yL<i;*Shq-8XX^?7`yFQQ<Qq
z_~V!B$ulE-gVjZ;N{AHi1F6AzPSldCxGgi1Qt}@LVG?4gZ<&1dCYE8jWAL&KZ#X0~
zvElWKZt+{?R^M8RC2z-~3H=m<#l4cTr$r0BT4Hocm;z((i%gha`zwYw;1cgUk_6Be
zN9pFKX?FE^-JFB$-4Qky63ax+2(|~b^LX_8*s(Hs#KoO?8-Q%KY#!#t+yg(IB!v~u
zDz>PHt2oi|J)>57f;=^1%oorAX7>Tsj=?;=cls;d4<6?Ag!D^nY%J7CL5qt$gxBGl
zb%1yfmJt>M>S1cLV|k06DGREmKQR2TB`6kJ?K68Ngi58zl>B8B^~2vG`fEuaH=bwM
z)Ps^(iG{KQ0c0Yudig@9cj}Us>5l>QOqkBmsgw<VS|!Bwj^`7#Dk6=abJjf^E=6oe
z=PW+H380{8OVDf!3Jj>|z<m)snf$TIq{;3VgRg}gp>I97<W~}RH|}nvRX192JoH!*
zvh2(JMA`J^_Ff%+%wx3|zpbn%tg)f?iZJMqKhJ~xCSdRuY1m%0grK<CTZxCg^^9<9
zQs-M<Jo>=adPZ<MCn@AT`|2Jp?dO7m{XP6YW_qel(x4=s80G&fBriT;J-aWil<mi)
z+pp&_oQexG3~;i_OI=WXIu0-)nG-X&Ah~<gON>5VyVHC`Sk4#SO2b={B%u_d3hQrr
zOu2Y+sj><2rCFeFpWP#LBf1aHuIf#;M!&zpIkyvijB-N3@>u#jLAGacr<X&+atdUr
zO6Ly4j{!Ef)Usl=6bVyi*6`xi+a=#6AL*q4=l9WVhyJSBd)q5sl~6-XBjE&~fkCw4
zkE!Y6mb=$MM<72HiYyM^i=g}Nr1f0jl_lb^C4xH*^d{7^uTKZRF%f9?`BgE3>^cGg
zOb0;?a@-{m9$t#kaC==h_Up(3@G9@c8+?Kaw_--)ubd6enb(m+Tfz%OSv(PW7?gEV
zMRe81-AC(VWefOsi1qEJJjCN&EU+b<ZoQ2fh0RasbX+w44!LIGN@Dx0nLQR*pg6mV
zClM8DqI&jQ@9S`A^&(eVw7h8NTf}MoA*FbtJS0QRLrwxNBJN+)?8i=b9kmS!(}AB$
z5e$A!1@ioC6+8Mt2e<KAO`+{tDcgSb8`g^%d#2A)2#*(IP@l4Zri{-DqL)IR%d@>}
zkxGagI426GI`%&2`ldR*Ql?3f)8vv~LOVB&{Y|c*72T1+swfgMN)n{d14x{Yg^(P_
z9`O%ql<*fbgxE+|njSJn+-9*zivd;BLfO-x<@XXJ5Q?p9d9&#*w#VuF9R$@d8H~<2
z?GsR_1az!w*Srd<448i~ZvqCs<w;5ZK0B8shqys!%({)erCI2pHDr{At@H_~_XMQ(
z^)pbdtNWB2@->`?*PoBVI)Nkg%K<3}>%u7CbWGbcAxC{iOwubBX!rRO(BUP5n2jE8
zogotI)-1nz#7Yk)j8YH{gxck`JW(AY{~jV~PR>31h1<AOB)me8^^p>;abTmU>hfCn
zN2nbkL51malxp5?p(}z^#7hA(zR_T;9-`;%El@~P+T!~}Tf_Tw#q-lDnctm~;Yk~w
znnlB6vg)T#v5L8aCxKZ*R-jQXokF_>bXuE-4;@^1JMJ3MvOOc5kolt`9X!aN<1w2W
zMC8<|p`;GK@3fWgTP$wCm?Fg8ujnSgU0;~%Oak|c#vlUkM#)272J+WAEh4LTWY@?9
zh!km2)8&c?v2dEQLl2>e30Wd`HBq(JvQ(hd9nADEw7x))9uW-ghQ8jkj&p;j{3=nh
z9`)T1yf#RQ$)<n_h7Q5#MU!_L$oqWc(R0G?_r6KfQQZl6k@q>@#*9c>NN=WHf%(~7
z<DtosD%r3MvhR~!&*pQn&i&I{@ZGjsn3dul59C&1juOYEIbs}JhvjrZ(+V!X9r}xB
zW{O@LcOcYYO%QdrQ(Fiqu3EcqqYP=Uj+7ZQD)^y%ohA@CWi3J@^<R_8x*=dq!2-=Q
z=2w=V2d<5r3PsYW3Ncxa#$zYoR2`nsABBsnc3&^BPe7$-jPO-$Au%!&Rn27`H^f-l
z8)30@I7HR<Y*<PK^UG8OyGJ?@b-<K|)M${;*_14SMuu5SaCUdcJE_s6t^SdMW5j4|
zu_~Z(4m_dFRzaNdVE}ys2K@9!%>7P!KwcH{m~7z7Gf@4FYeYlQ&>6t>tu}H9hd`e=
zpik}9H_uuOnf4(iC`zANQwLCzOin42t`XZdhX0uY%2Hm`51JTRZ;dN7#{93x+YhQY
z_^i*@^TW(*45k@?@i>#>?ZSkdKeGF(Pr}$L%1gig<;@Fm_#<0m%HYkfECv9eb*eU)
z5D@evJe#^Z#Np>v?<3Yn($Wmbc5<z@ut2$fCqiUZOq$`08pj{<jMX6XyM^}w07Bwx
z$nqMP907xLA2|9MGc7M3fRa*y6whw}OtXIjV~*&9+5rHj6fvolpnBc36wEPIL!9#4
zC0^Uh@1#vS_BtixzSe+|fWiLgcU}nn?Zy~=Jne6BJ?MNfB%gALbwq}D=J=j-^ik@Y
z>w))iT{|SJ<m>x+l`*Uak}_bVv^^lifHVD_vU7D_>q-81-LpW)0R^Q-j|a5`(k99y
zSBuj6?JyfK(;g^Mk81Dj#&_^EWk~G4!#^5E`&Vi=5ts%bxN!>NX(WbJ$pd*|)c-~j
zt^zqqtN*`BYu=jTR=lM_B>M}8Gr4O;$FU>B63bRAPo9xq3R`LOwltfJG*m1_nK;dY
zi!0r0HJ#WO&v@T$O4P&F+x}U40S9|bS@8a(u%MdJYYGFoRTbW0C=T!M!wqrGaY_oQ
zJ)&|D@*kZ?^Oh?;oGA>jR*RH+%{SK|wkuGjJ-`1!k@p}*+cP0TsOj^T?Y%$hz>PJc
zbq@#3Hy8dO4_bxNszJE^0u@5_T_5&wLZ}`>Jm(cl%B-2?-{}5!PwyUY_ud%Zn-sq3
z)B|O$P7_@BM&svbTXicM_JIVxS<KfP{Z*lrlz83wALFIn4ONCrG)Bi8Z`47bc!)XF
zp6buSPODV^DnFlxrg5I{aH>N5w#_~_Hom459)JuACJ_$)hO~u*8jQEVU?4pT#gCeL
zt&2n3BQ4_<n<kCZd;%zdweb;t<G<B>R}F&NWHdHe-!L$oF`pAgRr$?({u#!f`NCSo
z%*1cp(QS5S2$)aVKIT_OT)gL&$SeKP&_%)M0IE7KDOTV7fjJ~I5#@V>r1$Q}r1w-)
zZr%?MkF;`zsHi`P(~t=kT-A49``LUZ<KqyYSa&cuc(9+kqq>L``{o<&g(;HK?1eGH
z*$i)bKY0k?ec1U}uyO9FAo8lOQ-Sxv{<M9jYUh0p;Z2Rk`MDeU{G#H*^FXdBIL&%_
z+I%w5yehbZbS>n#XCllU-qI;!E~M7fwB>3sDdpFMq{VrB(8-l<g8*Hp-ST67jaC;*
zgcaTR3-nvJth$Nh;y+lSrV~j{t<<VRy{mFiN&G|L0=_K2`n>yhmkFJu_#>>T5S}X|
z9O>;qq+cp#r_MufGtQ_+p}MgcwA$uiT<`SK#0k8?lD6uZvFKe~Vh^72h+b_lS_S^O
zIM>>1H{$N^hj`P!a9<S?t~9wE*+0|7W~mvb0tso|feer=Er?ED9Hp%WXADwDAEgiM
z4W#;FnZDc+_Nr#idA$zZ<lVhS5x!m=Ypvx9^rtyL?iZLzcJT)TsgLoV@l{a-v-P^F
zKsE!;b*a@n68hbm22i47I{bs%!rl9`<!+*XF76aF!j*V#nNG~zP}ABGBOlY+F(n+N
zymiz@F2^b|psTk_0$dpxA5X`biS#3n`hFe<(ld#}V*&?-&&~{#Zj-hGoC6wEk_u)!
zADVOhME)f;?=ln=c4u|f36KBF1%tBed^M;()O&%*v!VhknK`NMkwf}(J<`D4i>321
zB~dbN?3jn#r0?jhsmwlgF10kjbbZV7)YUv`xA|%|;(C_CwP%pYK;+sypb$nrE&ue!
z3h`7Gs6I%4YK+}=Z~g31;px_#|2UzVIu}CU>(1x&pzvRh(J^9-B7#Z{p(>CrVl~lv
z<jbaiPQkyP7o7lgLQdZq&!^MFX*{GEu}{;0=E+fW6}>$>r>GBFuU>M!ouBV8W~CXq
z+C7{gbgG=PxN*)E!uv>Uyqt7OV~|o{9`2ce)+R*8M?m~aJ{2_nSox1>$LMsN5CZ?X
z=ru=(=i_<fL;51qf;F6&5q^tus@J=-r)6typ^$jbB9$1Oy)v|7I7J#R7+h&Rw)?Ak
z=sDqJ`3M?DcwGvKjW}l~bdurMz_Y1l`&EAUUBv~_zb5cHa{>CaHLZIoD>OdMzMiUq
zSy^<8R7?&d=7ho;NqUP{1Ur8)nBjVFAUyoyv41bL1j{#u*xW$JClsii4!l(rl06-)
zlisX6#jXmm>J$1g@B#`qAHDG;zQSeP5_!f5p7F|>`=<f@YpePpQ3w(*NUjbzHC)nq
z;>5$;5UE$s+c#(NV7A_xORGh$Y;M3Mj+;qUw6sSDgDv?T7yPr>mle1B$+N5Kgh)GL
z@BfS4PBR*ZrJrmqEO9K5tcYDchJnH;a)ebKX?FribI4Rl(r?_K&&Yh5pe*{~<9;0P
zBh+5-M^D2v$$eLay{%p&ft&z02SPlIo536bb_nnLS_~9@w=S{pfUcCz(H+og%X%BY
zx6K(^?>6ISb^XS-#4vvb5-<w2Bt$YGdRQ^`U)k!5ZA$z#EQgUfXJGxkP(ynXq0@_&
z{RP{9c$SMTsL$t2MzDUo;jI<hm<J?Snd$IE710L-Jft!;om&ehT#B}w>T2KDuO4v?
zC^3MWyXt^<X<Bed*&4Zp>M6feQZ0^@+?Dcuj)L4tk!~tU1v<`$c>d>=tgSaCTE9s?
z_#ex=ar5-ciU0##DbC!`{o#%8YrjbH2=NY$I&w~z_S`AxdGX}xHv@Ai>(qr&LpkUw
z=esnH%^u3Gjj)vZ^tg{JWeCoNWuG{^|Ay~)%>c*SCktLZ0nbR5gsOod6tfHQD_@o`
zmq_vJ+eXH}L=TQ(seCSJ?K^A&{$^TJXg&it(a8W;`_|Z6cpqcNF<aBzc3K)wx=@eJ
zsYFfhh7lR&QiW5t%YUH`j?Av>B__Wa@={5*iGPxwIx<`<$&thHG8C>vmL2rK%7y~I
zk8U^OME_#LTgB{hd^{i=JSUlM8W4?Xtnr3>tay*+_3xMS4qm@<I2^!U89rb!1~{5!
z{p8)MMiT$vf9m?r?rwwCN&blIWC1|5xF99)_Bu4f1ZS7f{g#!(e@gPE@UY#7Z1(+r
z6)L5z8m)r$7#NJ>7B@5GjHX4A!uz5d!z<nsDXZ77Kib49lvN2iuf+lWN}c*|NX-vR
z0fz0nF~B9Td;bjMUn<YfU^G5IeIrn}uH1#HPZD0dj*&5eI8O>N7+$1p6!3BS_~(zG
zml6T;sn5jVJSdW!uDL_!w(AfRGdwkjob_<wRk6SHgIZVoT3xkbs|5EZON2^Zv8Dt6
zuPiI-f6)0C1BTAOIA-^M)Qmvw*!j*0Ttr7oiMh3%s@xy-!j3l-+%pyQF!c=?>CGjT
zJQHQe%e~)RS%d$<(LZ@=SWU1k0390$$4n=<lXH*Gyb=)_+qAhD-97?M01Jv?*I5iE
zHQ0xq=Qr8-CWt$zLb7BtY?RNC0*vr}U;k<_(9gE-&uGNdAiZ!)K8E4dbI^os_wY+j
zGfQ&>ZhmU?1R=6$bS2A*^nV)-98=<*tOapBTs*;xc?(p>3b|KbNWVZs^!}a>v}=uu
zq*Qmg%@Ip7&t1#8a)=)Gklx+(JHzezt6P^deLp|YJtN;33OjBw0U9)Z9voVxp73e*
zSP^yqz6bHxfxN@)kRZeo{bk&1X1A2Ov7*9z6z6>AAtO!uyNgqxi6?~tK1W|x4uhgk
zOdzSdL-f7-<0<=pPmupCM}n~8=Y-9kY=1VZtuwuu)?4CYmK}_z?)BNIGoLL#63#fp
zYW^NGT(ja@4g2LRNd7Ra^Oy66=X+5GV<p)6Tg18zYh-`^Xv7Iq(edH>rGS{Noj<}-
zCBZ4@tzJA>z)XBuJ5yNTIySn}XDm1)vfxG;%g30{uGX>@%d$AaLbV;czn$kDtuPw<
zzXQP!v>?IpvF?!X!KsG}SxSkkWxT6@Pu`n--fMBo-kY=Dhg%nfNOfV*sko9sW!d`q
zbG^L0Dd^LB&z0d1QJQ$k!2j~zSwdv!0afHFe;)E@VT<%wOO03>(9)zyjfBw{GR&RC
z@O!%k-{=B*uDwPVS$y<kNv|gFGF;Pc1h=rs4exDT6B!$ZqwWnJ)bHnBIEODnjHQOG
zU|VS-dn4sQ+)OQJz4z6Ryav!?P8{57-b}=yw|X_jpXvKMiz}i|8A5nQRhW}V1vtYA
zamjor$}pFtntcWzp`vL~k}#V!!2HF46Q1>XPz~Z)WXWJG2K-IHUmp6r;lgedjTV@b
zm$8`01axMtI5V@3xRNDm$L}?=*hWtJNb9ZNdClv>V+$9A42NGC16Y=om36Z>q_GvK
z9{(Hsni(TJtekIK33?c&se`uEZWfO#zANHlg|L)bpPOw4O3dH=yQg&BHP3f^^-28*
z>6Ie|2h4V=SFiKUrV2^5=U(j-FAuQ*5*OYI;cu1r_@agScz>|{h3E#ZA4m`5(8ruI
z^ai^K2k9A%8~+PNgj66@znwe{!=EiE{1g>YWQ_Mem%+c}ksC#Dx_`LxFD&P(%DS5$
z(1yvL&(LFjQqF}qf)?C7y>^=$khBB_cs!j#99;@94s*(#gxa-w&om0gI3`Yv!@S+A
zA$1%BcAE&%Mtx%gz16us=u@G2{(sk6%7+493;PcJEfdCIeBM9vPcb=Z%e5XZ#s-g9
zfA;T9Hwq)I7~nmpb$^xopRIWy%>TAAs2G0BWadXrJHat%{&b|6W2-2O+@Jmnb<A6U
z7bQDT-~aDgSydp6*TQ&Ezhzc}Cj9s7JY4>YS+nwXFiQW&h|33>Qa}Q(Ee`*#Ro@mN
zb}ek!dNA7IX+##)y{gptZzZm4s)oV2FCrI}@8G=u{ww9W90A2}&^PlR6@~w^aG;7|
z;rPEOEU5l(g@u&=vapbWDN`V42L5dsBSVo8iGgK$T{d}?2HfU#YT^JFIPzEgN44U*
zu(4r<seK$mL{fmX^Yt13z8zye7(v$$zQE(lXUQ6=|6Zba@41-%WjCgX_w=-7`%hWr
zZ}mj<xAtRw_=wozA!SVJFjNGy>3>J<<RjK~9S5>>D)cp*4>fHK_nedPIo{8fCJ%GD
zh|Lxz?r=LGXrC4K{wg)}{ETyDR)R{hpwWGe<MK;qy;AN_nL=_fA!+Q;8&lU^p7-)3
zy-XhxN%(S#W}K9&230d3H*Ik=g$bC1g@oY~g9|)T`s0llMb$QW(iodmf#5CSU*4}V
zwFTlAET>@wxj#WkvAuep;BiZSHP4J)(Xw9q-(9LUKdjo$B@zw>Y472wDbU&?B$@L*
zYbq9HoIfTU4AZm|Mq$2TE(s42I(?MJRqVI)M0{`QXT8?(uZ2naWLrH0F%kZEw00iR
z*gI}X14@<2;S#Q!!+5hDXw_k8@Nbox=s+2J&Vx$k)_z-w4@T|f%b)OL_9YIUpoKDk
z*O)Xn0<7_8&Yzgys(o9p!IPWfog-<u@uhVn=}ig^Eyqg08qJkk90Pc(Y3TPjjjZ-9
zChiZ8H1@50FDC^5=4sp^ss0igIYjx<H}_#q1ZCBnLQEp{`X|R1XU$r>KVeTm9Pa{7
zcemckQp`l2_MVjY3ZvW+pjO}NPyd?p;|fOjy`1+?sh^k1zA8fC5WY*VBK&l)LYW?d
zpIsRQViCXFxjTyYkaLIjjf_DHRxqhwLGwERr9lP|%lsegN8R_%&NtZvgfN+!o(G!j
zc(AiQi;hz(C5?NwBk*=+xy{qp$rLM3wjC69!Z9tM{PjH-Q~*K7Zw?_1e0k@b3)-GT
zsKyBXL!*QlitFlCiu6)#O8EJj8`1PhsWCW91gd!_6o(?|?)tIu9{o=${k30Bld>JJ
zv_>j-qs|_raa%laE+pjaq*D={s^7KzPrUte+`kQJhGWo-GN2GUjZK6uEf<s{aB$Zl
zy?!Ca+I+$fU+pDZh_kIB!^X}9Qmm?dkjy<f)hvX;_!-3<42q-h^a(87yj0~u3-JW1
zHayqzt%(}s*2&}*94g{2=y9*fV`<m_gfs(~c3yGy{C@tww()J~Q^avpN#J~$kReb>
z_zgA9Gc0zqw@(+js8n!?`-p$#FP3kAJ?uI(9-Z<<y4phZltW&<p*B<#9l+Q<!0}#h
z$21t{<X2KP9`0k4v{-UJ26D5qBT8k6-05gSm_Q~)_l{vN((9o_{1^!9dT}u4dG4q&
z7!wQjapMn~4bzHMc(>bq?bnE>OZ_Nf+!j(KAyZ6pJ9@@WVBXM@)mp&K`h;OMjv}oh
zm5;r}Y1mH&ht+*tg00TvXH_Dh)80)#Hlc@H7WFnNW~D>0iG1Zd<Yo>%_aB1Y5oPlL
zzhIEp_H-@l_n}5VJQ24`MV6FAw|}h4={61qV;9T!gEXc+G199rxyFZcR*R|w-YPNh
zlDiGvm!&|&El+aB*0R-)X+E90J2~GoY{B0f`t>;GTU|1YBb$%;^o($D-oOf~^=4<P
zmFk!uW7fTqny7iFQhcd`i)QYkS6Gb=?#nL*Fmjw%>$-NA#X2=9dxBllpQMY2gPQd&
z?D5isKh7CoLhaKA%>3EOll$#a1#HuKZdi(=AG9Wpff&z8T7LVZq$$xUkB<Esj%x4g
z1$n&)3p{3)_1Vs)e8bTAyG@csMDB`oBC5DJ`N#=<)0E)TsHWq5Z(%QVmq^9wPWk!S
zT-d~Hch%bD7}W#$qeQ}rD;lpOXv*xDxvG&g?+VrWlYBY1vk+YEAAoez4!@RkV2JR?
z8$t_#7N|K9>lInP^lu~!?jx>&H*~QRs*0TH&jz4R3GTR>K1oz$ck~7N*}v%~#1lN?
zLX)@VUN52BRlnh8PBx_t3I4!r3ZW%zYq7Suc7tiQWIl-Tu@k+V6f0dKNhIIse{sh}
zDH7|xZu5tH@c$z1Euh+Lw*FD7P-xKtrC5svm*NnhcyYG^!CKrkxD*Q%X>s?W#hnBS
z1Sswfp#%!W3A9lBAL#pj=bU@r^Z)KzcP;WHL}vDEnc2_YzdiF2J5WZVxS*D4PPynY
zIgzbWchtL~8D)*;>aZ`V^htLmj-zzV%xco?+bzu9Kk^(Y>1;KMW|_t*Sy=c#X7x$&
zeQ?wqFdVn_)E!hgeb49Sl4+o0flLY6?x3Gr9sBAkp7qH)2Vd<KEe2aNc~Gz5Sf@)Q
z2FVAm4==VyLtb$6Q@=Ruv)Vb|SMIvYI#X|QC(-KmDIsBHz{e!8iw{RfnQP@ILA##N
zZe;&-b9b+*P_?O|N}N6Me!zYxe7sX1dk!o1Wrv>H=S@AcTlXyOk%F@3g|d9~p#F1O
zA_p7@=1)Qu7Nqui&lzV8VH3|4kH6=lW|+nCYpCDBg4*y{9NLponTWq}Sw3F&cL-*m
z?UK+u&2eiW-MhsSUne2O24TR_jwE}YD;T`a<-HUv8AisND2Qku7^SWM<T~g(>+&u3
z!K#{`JV)oiol1;1mFbd@B<yptQ?T-axXBa`JA|5q2ZV2Qpa8{Iz#;4HV8ZE^B;+8t
zkFDIb*k@T708GTtmDf@UCdroM9_!UVeMm4Ur7R9GSsim_L(%Bg>>DE&bEO&+!6!6{
z37L|dQTq~!nH|_7pI_1;c(r`ve(YC?&-q=fIXT<5`1#oS?60~XWO%O+z{8fHuQYnf
zKX8J#X&ya_&T@RWvzgLc#loNZ%5k{b#@feuAJgfqF0O|`40_s;N9b?}N%ebj?sU*c
zxd--#qxzNp^5s6#75gpI8@righVMp=gr9+!8^Wu-ZZR74(fp)jV-R}rwKlmXJ~Xh4
z_)AcMBxZIEiG|K~0L=BrxQ=PY=e^mNPkqlw<-PpK5<fq8=zMR*==6GpQPhP0K?>2+
z2AZ{`%*3_F#y^B&+o=gv242+O#cx(>t`F$)d=^F%Y^^o?rh0@cE@e{3%^>$lb)oSG
z^erMk!CPgwVX0@yX@q+ui}(DVvnjeH*9RC7XcZW8nQY{ot&-15#Bi!7f71D3b(a=p
zU$j{&1(>YS7uAGm7*MDh60Lu&`(EPVeNgw{7eC?siLGh)PN$S>la5=46^--o*xKTW
z{CHFiLe5=;2E~{0<1=@OP4<z+Is<fmrGj4<dGCzS?^@+E4W=s(gJA664p*B)Gf21H
zZe`tE0MnEF9O+X8AVW8hH$OkjYl{!|f*O=d=lYC?REAN$dkpd3igi<4A`Jhwu{KXb
z$3tbOK|uEfZ5C&P^8!XSW0x7~*gG|c!-FPm;?dMVauS%nT#kb)ZJQop$%b+jfL46E
zXC$Dm0Q~W0mPw#jTMAdg4Pb1w|0d9L9cH|Ci*QY!ho)n3cVwZHaF2?u5#nLwb8)W@
zuJQP|^7e_uVa#cNn(PtbkK(&DsPd9Kpma(N2ZU3pl9oTTs&preORUSjWOHJf65zL!
z1>6E4P?v$laU6t{+LLKXOV(#~Iv3pv&`SuUUvm9^mx$Ndk|4h;%m{Wc13y&xiPUrZ
z<~&~-=1>BYl-h-$i$ChNy7Psq3>gq+7sHoU*qlw}ChPaS4Zp1Sx;+x+nAy55dUH`J
zFXcx?m<zN>xa#QZWxzf!cKP;Wdw)onBJheiSiWes8<^2GBd@A0NU2N`?B@M^@^fQ&
z=EHCr6aT6yYB7;&-i}J7IexudWI!&;%m#9-{-SrGt88(dP^j_aV<Vs+GG)K+&7N}D
z0{qUcr{?x)j1fjbW4O=m#LO(B^VD3JvG`Rlq=H|06nTrFw0n6a;-676j^?h>XMU$g
zRC*_ii?SRPwLt}gRZN02dBsrdZ8h!R0RWJGo7X@Tcik0*AV{PXm+wR6uLz;asBk~k
z$;egkVc8}i(`x{y!ff-4b_EfjSGYgf`>c^!{j+;i<&G0%J<C#+az;&fr4jsdonbO<
zH+&~<Q`3x|>iQ1o`X;FDSvshah=qH0)|KIouJ--=*`051cej7?`Rxm{B<K5f#gp<a
zWL!4MWdHu$L7|$v&s0gF^Wb*%BXio;vzH$!^JG)hkf}JNGga|xgbVt9NY9vfF5CHe
zlUp^51d9q#ef;-jQ8az{@gj)l<###be&6GEK{l!?yt*Uv=|KZ2o39A!wL}fw!06Y9
zw{(;FtRWU8{mMw#9#tukl0}V_>jMI>U~grlbiFF&rk||RQdo=vlv3Co9ZgE}b2AMi
zwP+*sbmObcFc3b?fLcFQ#q4!iQ~_381h=z@#jfPewM+{4FVF3~PT~Dk%1;#szJ!24
zXS29kBJ1fz$Glgb`LKVRiFV<U#Mq&4O}Z<Z-KcNXS92u!PqWX}@2qQcHN+w-Jh;V@
z!G*{Em1!bgo0jQ@_ue&iK+~qP;dt^4O~l*3=YnizFHRA+-U@6hSKlgo^DE`{IgYzI
z;~rfm?zgYvSk2!7AanOyIr$;1FOrG*NgvdZ?fD5d7)rie<s0o=#QKOqvP%{@zL*?l
zqQfj82Ho=}$IY!2P*I?ZN=Xb;2#yUmcCV)&MQVOK2TE&q9Ihs37WzkLfi<Y@Ie#U?
z52NrtkXqQSrs|6Ms8Rx~H71`QKr0i$JwL=Hk-G>zDA31+ZI+?SkSYVs1~@mcA=W=E
zZm15*z3Qo@wPafa$2p=uH?F|>4gBvWbPP#d$g42Is<Bx*wf*i>RW|dMAH}LGD3at=
zA=r)k82qk$2)BHcv~lh7;h`Ubak{UyF4D8=MT@l#wnFNmE)6Qa(nAn0AQL5XvK4`x
z{=lqwXiR#yb10=UJ5Oez<Uz>tm?{u2ewosxrNy|majBf)MYlQ$)7JcEAYJRy__%E&
z9#@8LP(lDFImA_V;Y!<LPug6qg*OuZQJCXSK)3{3FbV5N+A)ckpW8-}h04mx(Tfzo
zZGP=Nm(`hiDq~l}9fw@%b6cYO=kR&QZX&=E83i=Agu}by2ko@YP@TYZvez%JYr~P>
z5z}zd&T!u4U0%|jw0VJ|TK7Lv(FpjTX+@>nX2!D}M7say;%vAGA#M1^jpMi8BsIfC
zH9vHV;b;5@&^Lh=#VG#puG#4t5^udI8`-mX#w#`9mfHM#$>D`b7>Bi4U>P!!sSQCa
z;`q^mFFa?WE*A6hxF<e4SkElHmu3T!z|_Pr_`GgK;=M)7-Y?Z8A9y^deK<m<qVt?O
zVM`R)FL8n&HPsg;5I$46wagmar6-2^G#aFeK~#}7W&6H{sKhI1N=#)L&|I_pwW`iz
zB0n_?pgC=%2)vTwpL|q~|H}b-;Uf93S%G_<fofXx)-BE?7EWU=)i{Hygk?!tD4cu8
zxqLwf#O4%Lj*8>)sNT<q9~R`kB^PQe#2iUDwt*xG@9-DC?Fi0zcaPO9%kyQRmF#TL
zXPbKU+7hiVuJeN`P#@vS+expW%M^W|JZKhgo4ONhXp`$Ycc`zAXE1U0G~Ql~n_><g
z=|m%Y%I|jUa<Q@bAu~=L=n*QHX{F%3&rOq_o$-{bR+~%d;GRiD`}KxF8ATi@xM%m0
z!Qs=<V4*s{GtWN}G4vGolb<g0!#Kt`hU@OjdnZK?h7IPlz2;h=@P3`t7-wi6Pam&W
zP}cS^9VmW9!JVsTAmH=%Pjuqa8kfE1HhC9~;x}&}ha*o&A+3dtV1Xqww((YgUG5K%
z%VVu1*$x$`^Vy3M28x8*(1+_wG7<W}MmU=7lZB{ZvA1#Pvwzmvi#z%&*vdokKqd5`
z`f5Rx`IQ1h)?#vNt1z9;THpWEYHTwTD)gL+qJ$ger-5mim$p@LWK$a>iub?U;u$2>
z;8g)@f4MI4J2IT3Bm<_~49?n2DH`lPb9oOV;ibDVRkW>XHrqT`o`z0->1#03)D*Nh
zumrK17}sqzF!${yL^DdMs@;Nxnp@fVSw>vArXr<RT9d$^io?IV3sz`8U!(N^d2*tJ
zT$I1k^fJ*kjum)tR+LpzT0hzvfMSri1Jq0@%fsjqLu)Z&?nD&^3&~(K;4n4|T%(zF
zx@7;o7XrW)lg#|Y1*lDBm_VW3K|Xo}f9@^A=}kpw<d#Fp3s-y6Q7t!Q(_FaA`qM3D
zQ03J+-ha6M|MG+Wa)wTc9Ev9_o}66tYDYCa*?I2-z2j94Eu3Q}j5)beU`U7XL$GG8
z`A696Ed3%>k^Rv0y4Hil#Qk9G$9}_3XcP%Q=JBd|iK$baf!w-#qV(fAt?#N-_0Fxb
zOe6`_Anhr_3Rc33!VXfN$Ha6^!K~EVC7dn-y`k+c%E(`Vlr|8H_76*oRf*ud_<#e%
z1{Do+_K#3U@k2s9av}~Xw~{MKqRez~SeKUYl?6-^<oC!Pko#G~dha#zBu4Pt<WiLo
z(OwhR744WxqknVh(p8+%B!co$0{G930-fA&dJ?!7T&u1ZktERD>TFa?JCt7XklSd+
zGyB8@lB9?dlb#m+fp)e&QY0peRcIqAKaKS=T_VpK9(A6OMdI5jGY?S8##;_Mdh>F8
ztlV6Xtp%;GV@9X%k|FXKQ%D+_8DqLr1Or>Cd{Yz{ou-p02hx?Qa!|Wu=InOzK%$_`
zGgpTR6(|h74!0d%tXZ<gn39cr=%4%m9^TFy<_Bb6oi=YNN$DfhY~1(?N{HThM5`3(
z0MLTvt(XEemQ1C9U6r+B>}ai@KGndi#~-DvJC1%*^~<!)tDq{g=r7mf<Indekm+?F
zSMi7ypLqU_M$SHpJ=1!-%Xhl(ub8?obZe@|<)9@ArU?Hx;^$?sxV&@U^>zs=Ie(-0
zl;a%}d;2|dV$Z_g5Ylzn*-Aw9xC)RPa$rxA_=S2^bbT2AJA1=x4HpCa!cqZ<MHTH9
zAoDCOLjz&w-;h8Nb~J_!H%nh~@l(O=ig+;ZHwVW-*q|)uowRnYoS-^dm<=Ddy7jaK
zyg%{*K;vXuA}$6#L+!>eh-}i?B%+u50%;QZ3T&_X{>hcv`zP~Lx)9QBh2J-8fwN)f
z5YB%n@Za~YS46xktnUwKeLHMt&RGZZJvH^TBN$3{xfxsF@PiH5uD2`I1W!Kr-H`cO
zG?lq@F|)*w9B>T`Xe*`g6ebGVphvuzCUmH!ah`g7y!9sM@!QXE;JQy~4Q&V`Ile8{
zTY?A&77&ujo9W?q7HL5`s{38vDVXQbPWq8QElO%E>R33zBiTn^IsJUCTibe5i$hG;
zd{_m7wME(T+Y1H(`1y1_=)G(=TopXedy<2@Fw20pVAQ3>70<oo|9&%)0ap3RTVt|c
zlTR;LBGt^y(477vl8^h4S=iJMd7;*V?EC)sJGc4nw+eXw+RxWWwmfuRXl8-Y1u*T(
zSvbr6x{RKxf<7~(`e3qs%GVnWC=s{}asD~)`!@cVd9vKKN}Y#GDR}l5d<$0<DIz|;
zWvAqafSn(>A3%#sBx`#U>-p=)mR9SC9C+<0M%|HJ-QKcF*Y0ip(ril>NkOi_`<3;a
zTqRfhA%^foHYk=|eoLbb0qMB9C6jb%psoAG>;?^^r5e7P;#+}88rpD0Y#|vLp5<~K
zd@J0$cO)JW$8Stzs5ZZy-OKb8O`Cmx>gKY%Oc5Jyxt6O0WgXcuO$1V3%Kd(1<Gh?B
z7%K)9GC}T;D?!VrsO~4$@*7cK{>U=C<U-*a=Me{o05W%eDyJI&15ZZaEXa;Q%{i7?
zyh+*==Z!Jbs<-USZ{2&Gks-FQkKpXkA$ny8H77U(U4ttnm1Y<Z30sDzeV(8hsk|@N
zPi>=<+W%ywa!R%W4G}ZjVXJ$;QTtk{Wx=5R<@`5N_YeSxV<XANMfbJpo#wKmREqO^
z7%uLE90<h(1;GIyq|%EnIQgTN^>=R@R<-~@jVd}TWUpvP5;}+;<2G~?Anp;exCt0v
z2i9ZU##bL?g7=TI)u_2qYxBfsPtD?gj>e_r?z(d4L6WBZmgnUR#AaD$Ez#hih-t2R
z4|2h~jE@^M6XtRwtt;-~W4xL^)7!gSf|L9U?)@?O2PCz-J_!Q9Ahl37l%pFe0wdur
z>!Wl}w$g7R`F|5}9i^t)GROcIi&z3Tzgf&?FI7|+KQuZ!8CWQ&(^_~H#(=<taTp3D
z8kSxnm_Vgg!rkQsT&j#y2OEZy%W!hHpYr=0&-VEgp*q~j;LQWn1NPF5Z66OlF30ua
z2qrlqm1`ssfpi@iyEQHpbjURn=KX|LKfvS3*MbkIX#fZnw_`&c;`zRIr$VewFc~aj
zBm4>Rg}<P)cVKZ<?ZUC#w3DUw{;C8~h1I|*JhGS#vs^^1&9I9EaFYSFbI%1JvY0Kx
z{A;sH6NBdUG;N+(O`}*?+`fHACE8B^T$j<`Raf5%ni86=!g|2DZ|RFnnNfi@#_^!u
zP%rnRgM@zz!Yt!ofx{jl9S&<#yicq2Rcb#WI$ST??6f%vS*<ffJ_@RB&OP!>2Uj~H
zn+`1r-~XdH)4_4zo=Z!lr{Oa(ylj)(Wh`6djh>uYuPM1v&FNtESE=9|&>7=X(8T+%
zE&H-W9QvIj&fGsir_bz@N_6Y6Qg~2hxkUnhlF0%nn&NwAXxMcB@)gVj3kQd(EZWnz
z%RsYXV3%KvAye&fLJg715y4F$w9MsC7G_HDU&)%Xv|i~!c?u5e+v7^m>17VeczjKQ
zRIoQ%Zjt84mwQBev;dH6I#|uFS~kx3pIaD&mp%)6q6me#Ns)lDny}x{DnnuE;88(g
z)Z=Rzj}AaG!B<jw(ZxRHW|V+TU^@8Pv}$LakV^ieha!N?$Jgl+TA%AEt?v4t6w%*Z
z-MrTmnTiHjh^^NRk}Ctr1W*f}JP*46GL!$5jxzL040Ti*`KcFjd?oj9%uJWa`^671
z1tZDOUx~TNQUAaZ_9z?EeiG_*ewN+RFMK~KNWsy0;~NE>{KxhP%#0)Z=;^{^^*Mbk
z%%NEBbl!KW4-Fq<Lx`>Jgyd>kD#;6WbNfiU9Q<CpB9~qkqXhp3!Ale}z;Qn$6FJ<G
zl$@x`m?cQzMR0{7AX7GD9=Icf0&Xs>SxrOMuODjDUvt*zoRAyT)}mwZ(zUmWKw?Fp
zSQdlHMAP7SIni+5kX@#G2Kf3x<)#Szj*fCOWZo};kfO7Z0o`&cP^Kn`&eZvU?aC6|
zyH#W(gA?_U>l)S_K(7c*OZ(9o#DF+)<U=8TZFiS-!4=j#kXB;LHrHLR^Qp8b17n2H
zdltDNZypo8&P9T+&gzH-CRq2@VLN)m_5g&Yi9htgt4_E=z3G1?>#Yhjulfbb{9t7z
z;ISV7gzg{XDL@2eKEVJQTxw9LX@X3*ZQswo$)92<B!kJxN5iBQp`-<ir{-FKTTJK5
zz#V|kzcJ@8>TW1(mcivcJ~j&iI2L(cSQ0t-`XTM~w5Vc(AW61eqDmb_E@_4T#@?G&
zs02{19Gs+FD89D$)c#1j!~#7lkGc-S;oH_t8DMns6~i+9CtN>#G4CFQmwDI}$6%RQ
zb?e0>jz`A53DfH|FNPwNAzuXwRy*o53aOLH3nVtzCi#92C(q18sqk$zW`O5ZeqVF$
zpCpCXpY}r;`iqgVf9t?~&hxT_wYu(@T68z6qP+ETrfxG@^z5?fZPx4cTgRLz_0Kbf
za!=gs4{NvJhkp&`W<ZOb2S?RKn-7$rPY%x=9cp1y5U#E64EOiiO``*0l4;nDJv(q5
zG^sCF@&TFotTA;5PTri+6~bKgM%|6*v|eFSmt+4Y+}mq6DuKZOs_te0d022pXDFu^
zD?`g?hdTzaYa#<Y?E#rbM6EYU_Wve>GAtEc>EQpEDTwpo%CE&7N$z%A8nR^Chu*hV
zT0<~Lgu|^mWlR}(?oj@`OGK>rj1`xG+;t=}oVzmI5{bRlwwRfli|}am^8<i$gOX0i
zx&<e;S@eG$oqmPeMCR%yq~+o?MnaNoOjY7I|FVsL7rzh#BH5HXT}P|F)D~syh^*_1
zI%G%BO=k<rQ1sX_R+m)|Z*_Iw*TNJ|n-`nYS)N)yDzS^0OSDjg-nv1#1wf9C5wJS(
z$*D`7z0S{{BBtk+9L&+5UJsk}|1#{ASL+E72<adR*9%lA7s^>_+lo-;AFQ|sPA)Cv
zFy+<@u{cy<KyWtv!Pt2UOla~br~vgjuYY|_D??^$8_}McMH?sT{s~%xjQ*<zQCORk
z30GV`MTFrWJ)g)*20u`Q8W4G>2LF@Ya-pi<XH2`sw1wfjJ0ydx?EjeH0oq`&Q+MUa
zBp8o#-XaTbsw}Q)SVJE_a8e6x^a63B2(N9A<G<OS5H4@c`;6n9-)2K_;b=&b-G3Dl
zD6#4gw=2rwmzQB}H(YpV?~i6u;qt1qmvgZ5^(Xln@hd{_UPBWwzQBcVC2`DkPTAJM
z*i*m{{Qeg~=yeaJ=@FBfJMqq=0XxayS%xE?2Y=0tRDW5{jrEuz-;#-Oun(3+y4$z$
z8r|OA7e@J2d+e1dg*rXyz9Ljvz{d>1rUBmtBza*Z<o=p$Ckn?5|Dg(go-Kf*+|K%7
z<5|0O63wfZcf)*cVTuJmQ%2JH-4W-sGBqQ-Bi@eib-ws!^&sAPbZ5=CkNkQhu8l=a
zT<JgjJWuS#zfA8168xod4Nep=sMWX0o-g69fXriBpBtdz#~VCo9{cPKKJ(_S#v(!D
zFenPra59qfMJ5f5xp|xdmNaLLX@2t;w(|xjdV>{3+kz&=K{o(#c)WMVoycLU^`zhl
zqbO7_e2GFWB{CIF5a(MUYV0r!(r~Qt#SCd^WAXV1bLIg3WTFSW#9K*jwip=<nf}^5
zH`#UXMidV@Q4D7sM0>hlOpwHh^jdi<?s|QxO%pRzlooW8&Q@3TVk*!GP3Gdfqu9t)
z?vJvD)qkL^^k~SbMdtNGmXegW5E+ZM0*e<Y5KsQWCVYNQQ+v<Hn+CRp+xk(PB-c^7
zSqKgP31?gYYN$s-_-Q>S{`~&$=5FAx7y1Ym1}Y*BN`Da)RgwE%cFCgAbCKwB4Q686
zwFv|vWyNGB5Y=2>qWLZN8e2M*fr2aGq2VF@aDC7$B@E)Bt5^0qc9XzPWaA=0K96Wm
z8g^;QiBeWjxRFYkFazR^BJ|NZ8YvMSehIfJCJ=cGi~Q{3a(H3kHTxB=UsP1hlMEIv
zU^AFVj{<J~H*o<O5CU<K>8hj06%-;(Yo<yXlgp3>Sn*tIyYwKXlE{ib%AD@M3oseH
zvW>>1ZpgU8K4>G8gvau7APsdTwxUAJCG6}V5zwyxL@Kh?^l$j2z||S_6y-iX&yjxR
z9PLYSaVB>4#TzxifKZp)sUdJ`psCAsDE=g~d<@Y^)<Eb75&trr8S=iKaw;$1XCVv1
z3c+UQ3?N{_!Mj3j`94=puX_Fj$|qjGPN));Wy?gm^!<)sTQ9BVuGy}tCE5N#*7p7j
zJlrEaSS+y0h3Y<0f)*uBe#ofUbS#@w*Yab#A6>Mv@Ku4Rd|hY=p7zfyO9pE=eKkWy
zai@c;PR33Q8{DOoB4IIX-wVwwk>7c>NJ0T11GTgp_0{$0CgKoTA=*0xFk~*lw*;RV
zMnrdre?LaMZ3*k#ybTXkpxk}Q;NQK7i+56FJ=aHsJf{+q_-*wVm=~<^d;)(Wb``%p
z@Hs)W9Dd+8hMB-P$+uE8gwmehp5^mA!M!J;DtMQWpO2qpf^cGFY*FvBX??74xAAOY
z&$BHwZE3tcY>5;-MFk|9A$iq&kQt&~aH2Huon5cB;iUaOT6UL#N9DS|;5Cu7<*~(H
zT`gTX=0XQWIDed}X5*Vh{AMazQROBGL8D&N7vSU`yJaZ}{0496D;B<0b^$UMJ=o_&
zP|6?|m?G45RgKj4kAv6#FUPP4t|0Y120{wpN0oguFv&d&N8`fH6J6|?<~}ZRqgb8o
zb6=n`kB*Pmy<``@(c+@Z1~EpQJBT=diqI$Rkm;sXX7j$t&pkVh%^t|N*h7YhUxv^W
zlh(-Vdg>`N<OL;B#m)pT%3g~9vnjm}qk%wN)yMhK=&}nwsz<uLIR>tXG5~YSyl?M-
zz=WbKmD?ZM{v!1cxfIq6xoE48BuWLlnIa1qV*VQbU)=cPUNV4auss!_PlGbR_tE-D
z*O_D9ayr@rFYUfTN+*NuO1{D)AN<QN{U>JRn4=qcg!UURF~}3qE4Sk%dLjI$FJ5!4
zVhT=P=U=^nS7~MSi3f!~=LO?csN|q~9ePp`JzGMXZF1IO=}D^gg@IpH=h*w3ddga~
zxW#ReN`5g)(ADnq_(`7cJ*?m38UDvF`fuZJ5J5$VrHP=TMSG2UUz@qX(gssW7(fF4
zXvg#)w89KYpBsz*P3uoyH>6wC=hNzAo2gee2!CESc{C;nEu1$36Sg{C$OkNEzfU#H
z0EngmKOzUeMei#lN@Byq<?B-mzqxfKoa-V@467cvO)hf%GvxSSKTWPQnZgbZ_ujHV
zFCa^wE>4{bQz-o%dpKD2VN;*>W)pMp7-X#e$N#@(-y(<mE|n_;4KDeuQ;r#?F6uZk
zHijIzQ#+|zD_vV#cUMhKLrO&fONpOyAy7&#8&l-vGqneR*Lspx#mNG9<!WVOMrcJy
zT@iaaR)$U@?G0OJuU8$sS^>+Q7w<hYC+PWNJSY!=2)BX5d}Ung?vCdU;0;JBSZ6TJ
z-x;}KkqZD(3B*2sD^7$kI8~Ac!xI#tt3JG33~rFY6~i8&3VL^=o|b4&sy(4KPjvth
z2MMmH<l*lXRDR3(nu-$8jFZz3h%)CHEgW9+5_m?m=e?T!>1Jb*-^W~;;9hQ?!F11z
zuh5G^285|GDh7aXHx}_lQ3%Kf25ID|d5@IY87XG1&KH=sm0k@A-|KTdU4E86zQLbX
zU_plcGhK+a$0?{ZpVERj<h{1Zh-+pH<6{sMshP&R^sGR=lux1Sq4Y-x;LpY=ico(w
zP82A`BYYste76Ar>e8dIK=Kwk%n!P%Wd}q?JD3a{{c3Y1s2(Ca1uee6z~V4C!grny
z$ttf6*WPS;ReB$5R+`2awT_iwuKL9h-+Jg1<AV$^)M5Fs+$<CD&pg58HCv7)MZ3dm
zL^LkwGII~*i*7i}vcY>u(k&sL;tJ4KZm_w{WRcFji2QO3yCu$J<7o*s6(Hutx@WE@
zdr1t5?Pe2W)Ve_hM$7b8RI4KN7vF-Z=VHRskx8No-1WYozFXf?n%#cD7?Y?yi_{*c
zfVnP=jFjHX8JZq~r@4mXn&E3GG=0}{%ADwhM}|nE-AeXUj*Zjr@hN#^g^7k<XQf0c
zkD8wl?LAY?kuwBahvbOG-R3^sUn5Y%`5HIpe)MVo){)xN!@w8!$}tW<`9&6@_m6q$
z%N_;8z%7Z~ngzdv)#7H|rqs2HNJK~F{%rMfepj&Q1P_}5;d97G*&5#*pO<9ekk+&W
zi)_Hvm374}*i(YiaGst*l$Nn;ejU={*wJ)K2gWlG6;F%gsTO`>7rs*@r39p&c_~ln
z=1aPF_$8h4lCsrFyMi}{<|~}&PcICJHpXOd(gd7*?Z?q~5A$6Q|47*#J_0s(W^_`T
z+)5o&m#myUtoEtA6nSS83G){<2LtJG;Nv_r#y1|lV`<7h(hz&_4sNK)9ZCYW6rz}8
zHlG`NV6XCoZY8h(&Bj0KzM5^$N^uZs{%xamVRmAl7{=?6+S$-1iu!c%EqjePRdWQB
zw+&kVD5(f@n}g|*X6JK@!CV!Y%O?9AV`&wr5O#jTWWLL2j;f5W7u8GH{A<}IS|W~5
zR~x11EhrUi|JYH|%#B6f2u~;lT;=$6{^0W0V#_iAQcIGMzfYHlizSu+DO!W?3&_UF
zGZsVE<FxOZkGLpS{)!!Ec4HhR8t$%vs2tKVP<@{yK7AXJ`1Nn0zyX-NS8~Gc<~80)
ztcDpv{ws@GF0G#DQ%jlP2`LD}8=xzCUkjNzZm^VSnHITNS}M3Tq}2$Nr0hLPR1wH$
zMVn2?g}P09_43X>i3VEP{z%~~{ERfC)$v7(w;EqEcuks{^XkAT3p`w?Vx4(*o&_R#
z#bW%@)$xGeTSIbaf8=u60cBnEArW&*S_$|)g7U7+s~_+cU!1J#+3277AXr3(6A5K2
z)Y(S3v9_R{K(k{Gkk=s#(eONVZ{%y$ni9)_e3i9Xuem16r;lHhF#e!T>_g^#?|tsU
zlWrHtL&K1{DHo?6ps=J2)tYzHA@z-*`(HU<`BlG`(Ty%`&|Yp4YMYugxExmM?dOMd
z+c%^<ol?tgjfi(9Do-`rT_-&bsD5ohR<~;43J5cx(p52&7D4&w)&KwqAf6%i=Y|nT
zGMF~I43H_gB)<u#z1|B6>p3#+UFuZ{_iP-FPX1IT|8Q$7576Ij3!MfK%-G4fH9{%o
z9vJBLi93wuG22WG$&)wnM!JrNkDZtG*3Ajj`enq~gzN5-Qe%ADMHUdS3*^1sfOz18
z-^8Lkkst6Lui=x4;JMM7BGrh0DM{6Sk^b->W@GJn^mX^!a*n6z@#Jz{i@w+hhkT18
z<LVcsalEpM+m)q!0hd5^1E90<^s)C#CD@7a9m{wF4ABipp)F>Ka@(yQ4V=NEJC7R9
z-Xcd`aJ1cgipRop84zCN+AyPg_wP&pf0Ahuly_d;Mk6q?80HZcMrAuV%3<9}rDhoy
z#dq=by@lKRonxka?c~uT3{Bzux6P04TFY8_A<1G4FrrBF0aQ_^7;|38kUn3$$`Z<V
zGytm4&a4>j!b%_BAiI7V`Tplpb^IYiSQ|ncf#mcf8GIqF+oWZ^B|Rha!tiyeZCe-g
zWuGgKc58?BbEUD_`x4=a#X)$Ld*6t3nHgUceRX>(HtZudb6I--a-p+&>zn7c5IECK
z8OmG<eOJ?aeozK`ioW%UJK*2-VFDNeD`u}y8@>z8MkeF-+>x#3<2&7E!t+W*zBLzi
zz{-nO^5K~czJ9R!9E22|kx}Qx<6VMw%6Wr%^d4q{Nnv7rKF5UyQW=+Ug|85FiAR2w
z3lf(4E^TRg@(aY{b>CIvaYeiafC{}4Le~DQ+dgn>Y^@4S)KtNs*>wBi%=J6KiL#D?
zG`w{1_@Og}NY8hlO}Gp4?I};7UJ_r7A?9!myB$>vZe~yUyLG*FcxlG~xezM#mlv5x
z|F32zWEBJG6`HaH;Dq9eI}#Rz*RMyndR+lCHY11v(G0^CB6v_UD!*^oVD+cRKQ!FD
zLuIXPvL=X12T#fzS>67tD+_epqdO(=rt=|)PDsi+yk8h4f$pYcCC8lVaIz<sNCo|$
zGddPx6!K9Sctj+Mrtq(y4Wo-T(QRMTY>^R`s+H{7<{y#{cKg3|FwGt<Aqcn`i5qZY
zAkWYTSMa7*1^~j|LFzNE!_CB&YyQQ!GKI8je%!Fqvq&!_W%_afM-Wx=etZ0``5)^4
zgAAeyZ0?FhG+Q&k&(Tt{ao@?Vzx5<MsOU}+u;e3CJ|S~JEMs7%M7vt<n_54-_H<TX
z&)F4K7hIy@YxkPS;USU3F5Jd{x#7AzrTb?4jlZ6*ci|8UOVv;H%o1C|A#&(9QPD_l
zuKKe($ITPyy2xb+K2JD!_WSifIv5>lM0~3dM&VXnN0Pqjm0ywWnjjMr(8W49Z8w!L
ze-*^sJYDYuG9i-aaRJk$bSkn!rhr>;y;D0TqHgvG`RnOss}Kf6qAkL74X%$aUvWmy
zFySE(OyGP}hOmqsMR*6sdFLh;b0PW2y-&#+woRBLY_9Pei}ElzQp0CmUySp*t?%lG
zzaht*Vl4wkKW1|xe55cEcP?6KoGCM~2jPOgYK+CIYW`*OsQZZ-f22P2<pNSornoB)
zy&AX+AyER0bpB(WD@`v_-43PF?{Ht*NxvWdd=S_0#TC9Xsw@kn+Cjf?67o}ZA*@Dy
zY3xc{?QTuQT5*jFpBZ|}L1G>{h@*F9v(mMIWCKv5z_sH9F|xww7wOUZP`R(&cl0eH
zF}v+k!ynxqL?Ay~Uwpo2_P$DAy0NQ;z<2@4RWCYk_l}IXT*S0_^0Pw*r}29(uQGpM
z^?ht8XX97Wql!{wCwEs}yDH}sE_4_8GeY{nxrTkKLRzYraV0_)5epz~e5T2v#MIRo
zSVqwjHFeUfZ;T$8<@Gn*n>~@U55G7-Ba|_Y(7=t4yH)L7cVJ5&QmZ{qC=qcOKZfUE
z9PjBIu+OvC&d}7}X#mHuUfTGwT{EUi=xal3|MBC1hiVwI7p>#=E`?VFIa66*e};$H
zxoS{fw7X*3{Tz>@O2`Dst4|xk0)EkNDCpJ64OH3xRDH3o*E;dLIdJm1_4&xSi1vRl
zXmPqbGJ;}ij^6`;JNy#X{snIv_m0^9#^sxRny|<VDow@N7O!_(RO+O+gd>ft?o;r;
z97QHr1sZ%yaBE~94h+`^mY}{&%_0M<(nt!2AQ;Ei4UbZn>Xm)JohV;OYFd}x)|3#;
zftM4P9E9~zYl=Oko-0QbIN2xD&)NGLQ8T>9YboCy`f$6^C4VFHM0_x1^1+z{*^Kks
zNm1*0!&(pWNri&=;m`5j0NS6;fdlfin0ibc6C4|q$k$$JS!3kvL*I!>U=sIhe)#%K
z^$pYSo_2Zabzw5~Z~vgu<X3v-*~~H0uX4P8d`^cMlNWeWEHl9LAtQ)-f)4AwKCpWV
zU|%a8AQLvoQOk#ATxXB}Y`R$n>vIg2noWjTvqKI!oDDHh&1`bTq-ES+fUK$ZuIrVt
zVZ(jR!o|3fiM56VF_kyQcq`v!5rcxskLzhh1auO@BPW!-HO#b1>dKp^*(uA37myck
z*Q4#`!-Jj$us=)sqFVJ-^_$m1h=K`Gxl91jx@~&^Eg+O|wD~Oe(a<hTr``A+9<F`<
z%dUZlB-qebWl}1YW<zBY6qOL=o&)M53+M>78@s!e^aDKGbGCBm;tJhfSrkKTEzufn
zjyIGD$rkjtF1ApC5a*GpNqeEGfhcTZhot8!;{o8Yx-#(R=tAV9SB1!8Flyt{qf0?K
z$+zV#fS7|5#8rbNrkE@KJ)O%!<-kJ4-wY)26<I$&?mtmRw}MTKx2A@gDh9_Dfg6NN
z!5PJ1a!u$)L<%v-7Lni4mux-Y#zWquC)W2lC$`<7(-EYU9B1I;7>lB&E>-UM9P?+y
z73<>@teLLN2zzD_`fQ(RlqPh`t4jmLR|GEBguav`uJ|_ckXNh&J#|VdNe;f_o>+KO
zB&eT*YzHzUgwW?;5UX?v3lKZJi{Ljz6Z_V;W0>j*Pb45CXAN6&@NQ@-EZU;T)35?8
z#@@S8?SSr?h7Xj3+<Eca%9=z4-2gu^M{vaS7MK~~21ANu#*M#SQtr|xAg@T)Lt*0%
zs=63X4YQyr=vF0i7-0`h?IdQV8puPk9UfZ88;Ri%?|p@e-wgVx9;#Y&*#hrjYFFHl
zstY8?JU}?#+bbeE$rmU&h#nN0bkicTK2`46i0Q+B>ivd$n&`un@*{K{Ekjecs{KGt
zE?u3fhv+`^AM%E=XabXEyY$!ACmESDBRm`t*~i7;6=H4{&=lZ&J5am^JVs_+&N-oI
zTr|4{4o#i@74dJZ-sAW7u%*?)zk93pU85pr_<D6?KKldpmqHKXFe;s@_C*Jlxa>>4
z5s}#w-IfY%`8y{(yl6&7c=X4uT%79R04P<uw%mZkd#@|d+zN_FlJCNa*F<4Qn-`tD
zF={2j<DPhLS~Q!QvlfxF<`@!zx#DxBy{(G+q>2`5XEB(C8R2-Xz`q`dc^i+Qg?{Xp
zCf(C7ifBVh>!1~u%=$<uGiR+1ghK1gLtbt&>-*{`FHxmIq+B6#@e?vc73GB{p=F9C
zGom3E8KRE5O|JzFiqq5Gc`-rJp*qg(EFd{ZEIGK0*2`|xylqSbe`v%ynP7TELpD-L
zXP^wMm$hGmc^}=NMbQb?ztrQi4q8I=l7ot9oz{eAs%W4YSqyF{20zz?f_l!j=!HRN
zXcbL%Wd@l-P0hPl1X4mK*(C>`KjgiDtu3RqW3;Cw$2?P_>z$J(l#LldaV;LSR(4)i
zX+eR~lkk5Vb!ch~*Mv_iAph)wRtRml4|ywY#AI?c6PoIYHq5=m_P;1VOP4G&cJ~+C
z_4hQPHZZh>;xL1li8)l&uZhT6qmmrt0xh96J&w$poy>YjJ3az)ya=qX2^G{tOUjXY
zugkv}-1+}d5z!7+?!X-L3N*_>Tf}?xLWV~>4|%E4O0SBtCp-U3>}e6C68|W0<)bi|
zu({jQ2qcEz$U|j`Z-EN+5`cz>#~Qz%?{1SeVkwv7($J7CG`A{aYiG<!lR7cR0}NB?
zM{=~2uto3g!Q|$OKFiri4k=xV?StQK3_kaazuZvcD)M~#(7OKqeO|%fyG6Jv!*8EV
ztcGI{;AOkvqm+Zfr>VlhSPv+}3U%%iRD8#pBSV3GLx#P-e5pDbp}fx<6DY2P%UQHB
zHQ1TG=4W3QCO)W+Zs5VY)Q8WJO(#>Uy52u<2`XMl0U7X73(cP4;IpPlDjLuODPTE1
z>Iw2O3M;#x^Zy?WX-@AfI%dEvyB}RMMjW8`i0SCq<%l#5FOJ!d{Dhz4auxOFv|K%%
z*tS!WOC!<#4jgmmpxPDuURv~W!$?kLKHP^m(qCxiBWGy-EM@KvGh+9t!`WQ2R`nuh
zb?KWHB|k8S@8a}3Uqe&X@u*$SqTVWz+fq>Z*XMtauKE)#;`Nk;88S8g-Kaql=fi@B
zowMpmffO62xJg!e(UOBy?46FR7Kf@8?223*<mX!V)z8G|RIDB!&AkYl#cdMD{{ga<
zJL7swb+`HbZEtIB?LhIlx`BbjE+X?97OPqN>cozCgkwTt28-{gMAcag0V*{LjG+6f
z#9mwF%WTg3pl>#50K2qpuoHVgF3{<1G&9tmn4%h+cV5X@+iX4w*6luBx?`R2`5<y`
z(mw6nF+P)9Y+{;D0`8J0ECr0-A*kg>61+?@QLV93PodE=)He6$(y@8|Q%$>ENw3sK
zBc3g?Ru>2M;;kWLuI@y6k;-!`vxI7ndh)gH^4ZWP0m4tV@pFcXQdLa74y2K_z2CF=
zEvszk%<!hI)M$uhM`h?fZQJHDk~o2W=5W#fHX;S%S!?5)sE$9nR<~%k^<8m~q7{4;
zM)IqDREddtK>_=c|Bq^2bwerL!!Y=>4x?l<Jc2!*aB#-E@E!unjixKS;o^A#X&}gG
z`H+g!u}s8OCj?8cq33Lk(mF;9zIE)TR$ZUgpqJvMwgkhW5xru3w3h{x#pLyr0wQAS
zYhj-6#g+J*JF^*jj()sSx$p<eFNTJ%uj$8`?c@kcK{r$1gU<-x3!~3mc$pQAk4g<g
ze>f;C>z(uRZ>CXFqd<$ipfi;{^UTwVoxKXQMoR;4|9C+_m`+|AetZ<iKD|`Lt`?g(
z7aXJlzw^-g;4$_?>*ibUQYk1YQPmX3BW)@vXP2EnWAIVvv4da2g&~pzD!tpKP?~ib
z1n{}BE%G@s<2FHs3qDFf0-onC^Kaut+u8~bZ;_r&P%kaWj(<4q$H##!^m$jW#7LdK
zoB=A@cY}WXJo+fe_2q07jZV}x`&)fssk8_shjoUG;T=2`d$qUqQJqoGkXob228NzF
zNsn-F0xucr{)L~vrcsbu+@*(L1qX6e`YO&K6df09SC<!*ftxKF7KHbBeGE9xW3y9X
zmICrOOTt%X5GaCSe=8`94Ag@oD#n`zLGHg!mNBfbbR{wAi;AQgr=ELqZSi0q=w`Ls
zQnI>X@V38pNq2C5i;lH;-7MpsD^E%r@!;cZnO+BZ-#EF7X@%gN_;_bK9hrr3zBNv+
zQr%V4ht}Fb;)CwOqR6)q1ROPDNRd8R_8~gJ;{S3I*6`@J9=CrjBpkCY`KUfT6m}<6
z>gbqBX}ns;PXaZ<vaM>LT(4<vGz`i)%W2*AqayJ1o@ZkgX$O^^pIgc&g*9~dl;S?|
zy4yC>vJ}fVB!K(P$fH@_Qp68GR^0ggBh0y%FAYNU`m~=&C$~Pa3A<xeFWBIgc7+Jn
zI9g{g?54kKp`bIEZeRM!Fl~^>Ng?dBe-TYJsYLqr;HmitX+~M~gZZ>Y5f8bQR7=7r
z(e8Cg!L#I+^;UzNgYTJ#!NF2p{bX0xQTBsY;|W{o9_c$`PyJo|zVnFwmZqL_ZA(ns
zm67V2H)%tV5XXp-X+mR_Ar)ZxSL^b+D1#Xp5qXTzxq^q$3}RHVrNl86`oDcRIKRq3
zyJ%V3WNbf$Pd=@pC%Irf=y&#n^Vw(A(z}d%jd#B%H_}dDsUovYYdINCofNWHm|bn7
zpF4R%yUYL}H)4VO*yzdYscGz$zBHI(A9?rDu;lCF_m&U8oemU;nMiSX-OgAuUnSL`
z;1Yu{PgFG#2X%4s#O@{}BvrWhJY2V^*9giAj7;i^C{)K#%SA#WERbA?52%XE0ohHN
zD+3liML@=(a_`=&&gz;~zOj9WlVO%uol8Sm^W%-~V|(9+s6T|Nt-tPNRIdh)Z@3}|
zh#j)Jh&G)0GK;|8hsYgss4!oemL@dnZ5#4}jv2Hm)kQ?C!aA|Q^85EjtIFqE8^3qY
zu!3}pnpC9O!$VSMlc31Jt%wGdZQHJvf}_V09*SCLIUhbjahAVqic7gE9)~nTVQT}M
zkCSiLpW#`DFO-y!+zu)6dKf;KUCtg@tTkD1UUgDwM7dCebSnZqUQD&{ENBW4o8Z;Q
zfipY|J^Q56sR=Iaq24BoL(?+0KTnAj^bE^;R+|~or-SeQVP_93_)&O7(wTcba<b*U
zZ~n7ENtfoBC&v`Y5*%c=Pe$%qJsl9wCAdfref^DbG<%eoG~6kJKU9iin1~xD-X*Nk
zdd_azgmVFBq&0`%bAe);b4n@huOdpoK8qde-htwL$U0$yE|cc7BR!#^6;wv_A(+N+
zsC&N4wZ7$)<A`xopr5F*QVr@35cqN|X2-X(XxCt;(cJgIHWDJ@vV#`oPe*c&iwIc}
zA^sC@ML*}7ikv|fsv)953|)F7t^i3<=NL-iIW84>F`g4Y^`W0sn_1eglvGjo*P9sy
z`1y4l+mIQqAI^OIcN7CGr^>+MZ<|+E4PuKtIwK0l&aE*6L=Q5^Sa)@wd2pq@%-Vm0
z%9e-An}zW$4{UV0m|zn_B9ij1cp(uD4r5MKF?+7*J=YRP>=wk?Q;(i&<m!nYWFHW#
zp?*i{wAv95qns%7Wh>w691qkH5p#$vbStzW>60pvOz%STjMsX7F=Ci_?nF&*YpF6Z
zR-FZwgMO@qpHi2Bt1M8a#44||zdiqP`GR?o>%}~>(*)aA6ZMrB%Mf-F1T!U7x)s~}
z)#xE@nu_W8fWpY*5G!{TEg+Wu7q>2=yW;rnLBUb1TWtjbrgaD&KE6`7u?5A~yvVRG
z<<76|W=JwCDqM4$&9@Grm{GwCbR^aXj9VV4+@W{HztmzL({b>`5DW$~nMdtZN{D^=
zR>+OM1>?$1GQm=h)q?(bUb?zeiCnxhPN0gDt#6E}T;-`p7b5hNGT}9+Oa7}|<bz^n
zra-iA$F|VDiX>&nm}3yDC*$Rl7;2>B7by2Ve_Fq}a7-8=5AV$x0x(m>R9~`Eu{~Kc
zPNqpM91+j6Y&tN(8uz@QEx-spZSPSp28&FDogI=?Meq|*w(ymKdn!Qzni!zh0mUAb
zB&x-DRoqj11rf*iig_^(4boC?fnA5iu;Mg5AD<RGzB&?z{wb<5a8r|znM1(}kZNf|
zAO<k@-4@t7{+A57#~yu+UR%B|d#}FZj)44dMiKB=6%2c8MnupFt#nFMD?8^3M4Ax@
zrZjq;tX|ogFFYXx4u=t>bR3EoQm``QuXicWnP;+*On9^<j+=<uX>yv$bk58yiP3N=
zZtrU31-Jb?Xoa5hLe#2X6*~prjo)-06h~^z455o`aO)3mSn^|Vj@$$L6s`boCavGf
zJ)Sr84ZrfxV^0WM%^@G7f1@mXWTPi~tn6YlxPKR^r8!rs7CJbagWC7nk?LCELJa9Z
z&7@2oG<!x6jr1H^X#+_Y-xY^jvPMO$zuVY{O5EoaGI>nib7ID!1+<u`xH9$rJd^&(
z8BlG$K<4H`FuzqQtQ@BV6;{GWYf$i;%{QN_3d(WGyzj-Py$xd(W>1`YCQ};4YZo0@
z5*iWZ|De#~c2Ua9N98onW2$-sbc<5@-J!@T)8luZFDpeqX;X9L>!BoUG0o-X>CtD?
zZP8v)!xTb7ZmH)H$yA|ieFp+zny5Lm)~=)1wlPgnWhb9StnbZlyL%7Np!-Mv5!$dO
zxJgPDJW82j#7o_ItMsV4T+?1UzHr>BZbKw&WnzSrup~5R47XOxd$o_{RgvOt)#dhG
zrkEK;g3?_NHIzWCUsvAWtI=E;QhI)phY2=vE^QBQkrhq0mi5x>O5+WaV+I98rQqhy
zJw(4~jzDI1rL~DI_kQs5E$e^}mJU;&ttz3$g58evfFIZ`{xxr{2ve@3ZbU_!ktc5d
z@%z@Pk^q=lEpT=F72NFVAlRpP)Jctxho)q^x!$X${ke^exa^0oOnAwG^4Z@NTDBeO
zpzdP{%ovN4Ga^f%@Kcgh-qPr$2mpz-{rOn%?+6R|0`yF5DZiQPNDx4f>-FRm6R*aJ
z<00svTBa~4MNS<x6kEaR9!$QW1NC9UeIh12&C`TOa~j3uOX2n8J&!=6nFmU_&6mf|
z^~n~{%F|F2`maH-*MN`m_s+jRH+*r%`u9MtZDP0#yuLcL_ubwzdjoy`I>%_x>Rf>A
zen5oI>+-(E!NrOVyWP5}{ujBg@0WXs>Xe9W0*sV?pMxwyC{s(iZHnwKPet^U4N!gZ
zHmYi)>Mjq?N05^V-%;|K&^N>milosMV3VWIA&;3eYhT`)I)q|S9ogLZlw^q;dtUoY
z+UPT9r@1?6WnS|@#I$6)8*#;qY>lxpvnvbwR7>1(3Ju*Zedkf0WP;o~Urdrt1b08E
zrw>LPpIG$7NCHH>r%bD$S7mn>^HeArbU_!$?{A3r##fs6M7`FEz*gw>Zh<L|s-4jq
zJv~*DCTQ0pzy+w_g+&HD{`xun5Ao|lukDR*UulfHqN;(asmJmgQ9xNGH+_w=`>pPq
zu+%Aay{XGuFhl<D;u7OFAn-ZAVZBkDqkQWNAHykL+7DaK85ezD_8t3wFsgT}Yd$B^
zDyHaj>zW6+ALs%3+^KIn_7~Z{ld^vP?AKXu241#NPAfeI%x^R9{o1q!RyuCuKtw~|
z)AYxs;3u>PRCn_QsqYm$WTpywJTM&>T+7J$agc<JD3fcbs7(H4dwAsSuclqzk~YpH
zmiZ2>iAh7PZ>oT|1|ut8vsx@o%Oeium=R}lh`FITkV9ky0PD7cy=FpAbFO$$PxJ%y
z;=ixd;pL2|>)Va=B<0ZROLcMU)v?e{@}j^U#<*OM2Q7zEcv4Thts9o~2`>xW8h1<+
z;ZV`XoSS9j*VfYN;KbH6WUJAM$K9|1PU;Dc77u2R>QfOmeoBCwH>tHy3#dlLCy0-S
zDyEzDL`L+$%5=~uh(J(?dG?3Xuf7;xk|X0Cvm(Dv<1NO=FfNcz@pY}m=l_;meagiG
z`mAvFmEWZeX`(fH^y7eFD-`Wl;^wM8`v$nWUCcUL>(?jBul6!%&4l9vOJg?v2~Ld;
zDk8I57sStus3GYOI}!L9(sjU~>Nw?mbm!aRF<TEFT`Ab%))<aT+nW;^v>ygQNj0ZO
z^>{Q7AbDLIG~*2^gw(Wr2SK3%2Kfr+S${7T$JE{)M028KJ-5z}b(gR<NeMEv_IGlD
zCz$V^lo6s5)bnK2=w6Gw8W`wGMVF=AZXH2-`+m0con<Z1($Qxl!6Ru&3->xY#SA0m
z2<ohnn`?FxaB7uF8h+JcAs5Fn+wuEjzLH0?KYgY|o06gYwC0!mptTwk$Sk|8w-n?)
z+YcTc!kk7`ut?jPD6r4&_P6g^+78UD^(CriI+!IdxAkyc9b&UC7WU#sxjRXWY55*t
z^xR^t$335Gt=Y#3A^Di_Ydxb%oO$OtS$tlr^jVxG^=rW)!C2?nkW}vCsN)!I=!&Qz
ztMh8HUf${2N<*%o3d}Lsyxixg$?|+5rbiw5n>s5Zlv)%DmR0gvAGbV5pbyw|@H^2q
z=o1|tZD#>hFNC4)8wq`1VN^vX@PD-P6L-P$ztAMykUBkMM%Z=ul8SL))eO1cmsV~z
zYK;GJU%WArh99RR?o0<dqtubnREDIC2J1Cn*+E?k&uCHDY@kPg`^Kl8@(%CZ<;0sW
zD63tWVv=_MT&{ee25q-Ouu&ehVKO|1qE&9uay(83But!;D&z6{Z{)*kE9D_fo>T_e
z4-`*pvvoZcWCXF^VY65fMtbJv!zmSWM+J((1*z58U-YsjhaDGUn;wyJ%~NB%;OpDh
zWH_-m^IZD=I9H}iFJ#4dHB`5-Nyc#Vqo_i$g7{MQxw8dJazXjn&k4~TTELj7#KS?9
zt5$q<1b^Hd7_$OQO%3sN-c*oX0}1_#TRmB6Y@=LYt5`_OGt>qN2D-0CL!EJU+&j`(
zYJ2z{s1z`3q3zyQvmf|XatxhyV9qb3FS{c=5cIAl>L@D0;CaTQ@_qYS32|@+P0xc4
zD{mIWmWt@fsd86~1pe+I`^3PT7sa`Eo`1V-7fhj>HvsaJ%~NDJ#ET8*v2Rx$xKE5-
z8zwIID{jE(Gz*C@5Mdkkf7p7<sJNP-Yjl7B!QFy;a1U<5o!}0^H8>0wAOv@J5AJRQ
z8Qk67WzbA;3wNGR)_U){_s{uvq^tJc)qT3U3R$gGI@KDVz$eO+)M>yGU~FJpgi^qS
zk;PzGeqs@zti|!sjrzkK7}YUIUP3lfe((suBV(2f2?{j}qP9?GR&1V`iI8`y9E)f>
z{TL21<S&>vYFp%0bmK7jWw4b%pm~v$S4?i+fcq{->emXdnxDe$>lEz#e*3bF_P7<1
zed&5H%a$`R60=R*2&yU+cSLZN#4{z3XRe0kjMtr?)2)=}yrK!M^_K=7Y{?8NjqSMV
z!}^JGSIiA0C2)_&*dxQzh+sp!x5r+jH$dJ&B!$!VH(=2I5mu;58jAYGv?v1}OJc<9
z9f)~)$$B=%>T@yD;V3U)ge(Xl<fwE55+r5wufC%RceSGRX<nyouY7h!`ldck{!7xu
z%Zx+>vG+NjBX_BQTPlBcC~h=9eNw~+1)=;1>1niJmDFVe1`G2BS$A!AEcBn!PXH&d
zi)U^YGyM}j7eMSRwV^>irHo!NwY+)7lg`ubYgEL#)av~wJNA5S5N{EJ@9o0&A7=7z
zfk)(g$x&_lX~Lm45pKsQ%bPMDXS97j9fwigd(nM($CpOsK4Z~+<E!1ka5|H;%Ro|&
zO~^la5TDCA&3qjGuk~&5v?=(4aSB8la^==*4cblUT<#@h%rbTh1YE!@fZb`dS5wk~
ztL>zP-19AhmPWg9#R_0FY$1Glw^FEAC~~FvNlv1(UDnFmvfseHel1Inw=E8O>dQjb
z^rLf%P`RKFO)TA*u3z;=9e`{GaAG`=kMT9(XICgWHRvU?XZ_mt#79GasbzZmu`DiW
z8AD&2x46TgYK<yZz66B4!o8-6io??HX_(AK&r3t=T$DD^_{7Xqvh?9Fi+S=k9N$|i
zQaNg{_**Wa$AEzPu4oos6B^$<aXhB|;Yg3pNy0N4%hq9EZX4e0<1tEKz&%ReeBHdp
zpzuG}-A<*C6~K5?_equXEpbZJ-vUQ31O^Hh9?GcPFGK%b&&A1;f1txEg37JBLZ|<I
zV_T7LPJG~?BzoFzxN3!SboHUXY1-ezCMQ~biw=D8HhlWJzR=n>?X_9A7ExXFx;gbS
z9%nD!4A;}mw!Z#tdLxfKUJmYO<KPtNWa9D0_*QwK?(ti{pl|E_=4`5`^!F7n{wa2v
zhph@G>bJPXR-*M=?Cz<De`P!+Jkyyqv=p=q2)4ufHRd->8fPp3Omo^e%vP>)evUPb
zVxLniqTHG(QwOiOwdTea?(TEAWl_JgkA214t0D$(-iD){tY<GDYRHUd1=cWIjqBEs
zz+6qK2T9+B0!x}&CNmN7yjIYsS=Z_*%YnUZb(3yybaK;%sz80Y_+cI1N7I^_)Z;qd
zTF^;IqknUo5Zp{Joxx?AYjez_!?4VwZ4OiFydcxaAJTb9;H>Y*WSI8EQ{~7cUT+iN
za%?qdLEUWL-*3t`Me%#TMy`f4SaN52G@#>s40EeHg#Y;CdphrCPU(@|H=0?~I`kEG
zOJ<vVvW(G}fCZCS=LeaS_h)c&K0j1t$`C2<X8QMplrG_0;<zRgG4WFuh1O*(3L@ZB
z2yPg%5GB%j8)n0LnNhD<htQ*s4rnZtt;Q8UeHx3vk7m_SqZrIOrry=zM+z2c&bAM>
zbBx#UgHJ`$95s~tNaPt2<5Mzc&LSKCtQsO4%VnvCehcTg?aB2cawY3CPL1r(gz#z<
zu4V=@GKuws+%Ii9A8i+=+X^&l-z(49VcUJnw{4R9r7pGD?7M0)p$i~NxW=dy!kwk-
zQs;^NnA{iTeec;9J&Ze>I=k92OcC&B?K$-Ma>W=rg?kD5fjg=XvJs2U+L3FsA>c?J
z{S{@!8**eYX_iG!*v+R5xyPojk%SbC2~F~<XB!AUsdR{|L)%e28l%!jUt;MpiKy2l
zNV>ULidm3$tM}}reIgOQe>r+DC}1S@E}x1xP9dDFQ8AJY4u@|D;QXWi5B7@vVjZBK
zUdLvFjJwoN8l#BpOV`Nv(62mA6F!0o`j#22nw<8Y9q+-Is8N>hCe9j75s0ijn@J>O
z@y!SjhqBIzI?<6+$>7P;^y`~lmCT2vG06xpk(n7}I*apA!In}H*`7gl>APc2fDnbn
z7PmNzh*x&pV8yCG3Y9Uor|DUnQH|91#1`}`yR1&gw5POHKs)Ko8*R{ae#EvSy<0D6
zjl9DY=)L?SvFhI6wy)+MCEfb|e*}18TTl+Zu80h4>~Ks&p7T=p)J*YqukBcrqV%lR
z6Ztx1{>FLTN626CMnxhNx{jz;Eczk(IAHG3Dg^_ewS;AWgEebv(m7uu0=v9wLj7Ng
zD!D__Gd?x=Ydl+G$5sIZvKFJl<7_eXDl~s~f7N<2l*w=KY@N7`%tE-NX7E10MYSmD
zL`Nr?C&ts~t%-9F)A%rtPyNgXcb%HoVq##0qhP`1YSf6tW<rKv)VpHF7x|h+v&Z2A
z2wlWBv4gM{Mr6PPIRkX!e{eDI8J?-=K#@ti-Sn3uJmPQwi50D-<r1m4W(F?C&HE&t
z(!SjQYIRp&8S2R0h%4H^HJ8SrVW(wFoTe6ixOUBV>Mb)PmxD@QMaF*B?vY1sv0fWR
z88hZU$7_5#auKhhnRimh3F5!ui;XdF?M9@RJZE-y*}3MU76bl?5K2b5Nw^l0%Dmeh
zGQ(z<joBfkmW`zO+<d~oU`;Upt5Z=Tfl#t=j|Q!)zGOk?gmdt-H{x;sgQW6YrDpEG
zfb{amUP~dmNl~7kh2ehdFE;yp!kbqy$6Zm{-}DZ%`ogR-KA=DGA*s+h3DOq9d2&B#
zS2~gACVK?#N*hLnw%-g%3uCy<bb0<cIL>D+OW-S=yHplJQ9g?zBGj6@D&9U1^f=@b
zHtzrLx#9%Va;8Q2(UFs&F3Y3fGEe+f{i^Lw2demG<|>w$(>~A4eZ_IYJMYC{=6vqw
z(?)UAE`bCi7pA_F!BqRI_c46|X$hf0&NTYRv6gp_jA4Vc!=zK~=1kSsv2IR3Y4Z?2
zJ`5nuVE(rK&AB4<OBZ$BoXN>yIXn%Qpj<z^K67R@q`CF0i)~y$geVoSUhmqRi{;SQ
zJXI>rLDw0}o%cNVE@sUHb-~c`#pbB{8~#l!$`Zx0^?+dedyg(PQ^WE{U`N9B-F(!&
z*8QVbp>@@wJ*xP1DG^?J9nZ1LNwqVUp>s3FuFkN`ub&=Dt!i|sy|S;9n@-;Itm{7R
zfLA?S`Zf6UlSJQtObDp-iSDn}6ii5|MnZ;A5Ds8bx%?oee9`{(kRjTLk6*BFFDlb1
zJd+KpKaVAp(T|Js7PE~*y}i-Mq3b(V$ilwqnmHFTHxU>$XevpGLiKn~Ibx-(YUaq0
z1d1}g^muZ9%~0_ZfZb<#*?Wj5<br}j!()T+FSubh$>1vV<Re7_T2Or01Uu`}JW2w>
zio-vbSVZ$UEzCWMf<|Km_XPxtVFEcWh9~7^Y==>yiM7xy4C$IMZBQ4*`S)uva?tu~
zl08=b#9p@&N6+kQ<8_lhD1JR^*}f&K&-C8S=JJmx1E+|c-rYbcB@=nkMC(16%tuc^
zI5Bf?9jz0w;tQ<Y=3s+Ch3FICsM_&7LDp=Ygv#8Y@Ap-_fLLDGo;-_Z-dupFM!Hth
zb##hIs3Oj$BLWbWSv2*829mg#;!WLZ@aCl;D<>+x$Ks$T-Pv1W!GzEX_Y-iJXzpGS
zG}W>gMhyVID9wvh-o*giSCG+LPn&KZ8F3v_nsZN}YzqH28(9dye>2B^n7|<u`G
zBSP|UcjBwS`v8_;LZgZ3n;dNEmR&@SuD-J2!o@W5D@8nM7$=jKO$h@mZMnv&9rP#9
zGwVH1`2DT{!i;0B4H6cEe7{B|7hSmMQ7Bp7I{EJDYYLj~+<dOn1r2XQ#1_~WUIGUk
z=3oCW&5k7~QB8^9R~7nK3koP%-TSqh6A|!+mvKKHVs8H6O^?g|##bgK$fSrxE;O!G
zo>Hmxfs{PDmd3!7ZWGr$#rgmZU6-?^@RP9rNGX3&{^$U@yJv{&QBngro}jv$L75in
z^vgw##bDKNL9lb&E9KK}%D0RE!N_e==hJyuiz0fEMTlrB%dJ-gnOtHYAJG#O?%Iq^
zqx?nbG=`Q3q(QjrAs9T4+@BLz)K6fyjf@R7t#JX05@YVu2M&`-?rPF{^Jjh_+(t)+
ztuny}`LnA#C4{FU28}&=wzO#zc=3mhYuA&upE^7WYWzgi4)M-Prq0#=167P%jjT3k
zUHsI`wDY$Y)-1xbcNdoE-@ATik8Piq=Dpe1%}Zp@!RM!jVG6&;LjKO_z*!y8n;huV
zX5c52ruo;csPL%1Wkj0GSO}Q;cd3-62QT(VWeGzRpF1xI5kISkGA-P%ID9uTeD%nl
zZXd*>K@*m0SNpLBm6VJzvwDLO|N72W)f=RYOl)~C01^fK#9)wrc*M)k?=IV4n<*G1
zw7@bR4ZQ?k;GGKZ_O%%Mx)Jif?YzfFwV!I7aXWFTU}*Djcq?Y9TZMd}fL;D=0slqW
z-^cP=4KU}k$9N3Rk<t+RI7s_*hD`D4JD($NE8mOgLc=FOGmqd&Rpz{&;|W6UtiPsC
zrS9g=vK@nJoN6kgWuKz>+#hm8WMeKyGP#f$wg+0<y?m-8qfBBeMkLMkg$WTEYx~PK
zgmmg);QQZ3<meKAD$UAy1{prT@IC1do~6k3m}rBF)l3|&qG8PZ(2qP}ss*BwaPwc^
zNRcna8z8MLPXP!jzS3XuY5fG$174*ctp<6%taB7dB7`Y%mp8xoF!E|iyr-16Xy~3E
zrsKJPf8_dGN-pVpq14W%YeWwX&P6b!t%B#SRu@!aa6HEuULkY%i6VwG-`MCgc!Ene
zf@&|D#pGKjE+79U!uwr3l0}MGgPNuMhRZJsuU<(U9-{==fqO?2sHmwWe;y=)r<P^>
z!<uE?v~CCaC^5+XMYeq5@EH|66M9=jKcZh(97;7V_{(J2Sov4*dys;uO>{iL6$sAM
zqTwsCk-<9XAV1ki!}sFzBvO%%6!Vtf{kEF)^wwLkU-{pqWK*%5gY4X7g#5y}T#oHG
zvRTpF!u+jQ<}Zd&gBl%+4LxYl`wxkdrz<B_GNqZ|60OLQ7Vd)hr15is#bO_8dpMC9
zwMNWbx1U6a0)K<lZ-nE#Bt2wW==D7k<N<~$PyH<`78Dk7Q6jZbpy@)|R7}>!;egf@
z>=Acmk0I@Mwy0sk4Td*EQ&1NhY;7kEgdH@Dl3P@8!I~M8fHa^v*nm-JQXIvIyhRHD
zFCkBauqdPh!fY|95id`#BHv&C1u%nhw2L)iWM{$}$U6_%=yz7<3GLG!di?YqCU|r5
z2zAi>Cm7<~%jS}*s}R_-x9Xp~`zT>x2>lRYfKdSqH0e^me#5C#xzC5~H+guh=hp$Z
z%IjvWCFY`CBiBAFMaV1pr+jX(vK}9KF0CNwX-g6EI4g5s)A}$gsXgx2KbMPtL>Qp5
zdUtPE-le|y%%G*T5MzI8052bejnnkjPgZzR{K42>y6XpX>z-su)M-*x)`SQz<Gg+z
zr^6HK3>lE5Hi%n)6WIzU>mJAHof(567>iG<`6)Z5LG6oDxz^z<WXDhxiof|M1wV+B
znqdT%0WQDUK#QJ4>O+}(E#CPzE4OwRnw5Kb6}ub~Xm^Q&!0<NbI5Hfi)$ULA_1g3P
zuTSe3eRMplnD&MC!<7a3-0Dl0JPC_ttqjB4cO@~o<^Ll&t$VKwZ+U@dZ?{0F&NoMp
zb=fm4naV&9;-67F#DbTB5W&b|rX(G-g3R^9f0Dq#(rxGOlzAMN7R0c!X6c~v0IQ0!
z>MxHw4ZBaFh26GQYmOfGc<K8YG&|*_Ar^<PGY>uFKA+98=)=O9n$X<2ZyJIKscHO<
zSVvTGnT>7F2>cc3t>zTiVC8jVk6k#c<Zv4@%=siy<>|;bQ4h`FGh`@e)`V!w>Lzdc
zAhfa-ME7iz;u?`d9}<(}%I9WV2KE&R;d^j1NDJ#!SONGX_?ZtCPBSF`Q-&_1kTnbN
zl^@bm$a&8Eb1}tSIrvv8<!u6{w>HmOokG8oiL3d+O>Y<qAiryWe+bT`60oHQCt~+T
zVzb39$k(nm+a{V3|D}>bdS{ELZfZ~w-&%{`qI&je_~zN&oKf>1Lg5LVN&i>8tUxX(
z8HfSziht>dX?8lY>?ii#v%&}pWS37|y7!`6ct>IHnMXr-j9M@XPc(~2fEzA~5E+h9
z>CI<T=Nqi?R!O$VjBw!D#vZH@!jdG}lf?Y%9s-uE(rwX8_pbV;Ri}%fnPgL*j$;r#
z5jE1KrO1c4WbI6TEd@^3h50G<<Kg+Sn{i`oCvP}Rb4qv0Tn`E^o4k2?3`VK@;CszH
z+rXB;{r<cCy?Jm7-8t{dAG-7^N_2L`3WQwr@)#-lYJ>+^!DLM_HqX!DD3gDNIZg@W
zP^h;UGTk_%llf>Vmz5)wC}zFC-n(ikL-SrVF!|Vi{ZtmA{t2`}(NCaY$3J&eKw7ZC
zdXF~;bx895-YsjeGotKD$mv3a=Qbf{AE%&Fs#uY!p|;)ypL;^`eij;(!RT9u`gZuW
zH*}0wn8#a2(=v?9EY4V_X|p%Q{S@b_G$&5EkI6@Es^{Z{4H;5409h<yufVu~b<{$7
z!faaihs8jjlnsw5tNYaKyFUU)nO+M&OmS8I{T)|h_895HXDmedT1sq|3dPE5iruf!
z-MehNkH-D{Qe#PoP+kOYy1yT_{T6xOHH>~0#SLZsB`pp+@4QUnwX6?2!q`38)Kxp~
z7ap1ZmvxU$Vwj_2#Z|dA!eRh}-BIBvi#*-N;vOQ$M_HLB8H_;r+msDsG|8X~?ls_y
z`}ja^vN%#I**c^F4uqR~M*<GIx9kgf9u=j8hs{Q0T_xCw+Xb#&2>(>mT~U?~i_H9r
z4zVE{H~}i5w?+J5__|{q%)qZY6+;i12vg;uqGZVt4Ep`Kj7sV6OXNPCK_!o=C<epk
zGP6dm+ReeLv5imOrsq|@cQ@x}0bmt;-OV5M>(rZW#ym26gN(AD${_wn0gkf(*!jn;
zqgCU3F}&%zjN~l1(qcKX>PK~+?IrNkjzQ^Tc+fBF+G48|<V9M2qn>Rp)+%07ZNs@5
z<0M~i3QNS35|SU}JJQ4WltIIBOTUh0l}4+5tMecHUzWLz+*UKMe~m0(4_Wo)Z0um|
zKDrZAWDh+~gA>S>u{!dO;>l7ZKj|kKPn7)NsEX07g)}B-GH5Z9YR?A5ELc|v<)4f;
zbxr*h={oy&9eIBtHa_5Bxu>tpkQeHh1yp=b-IM>by25p^J~wUlmy9u=K!p4yX)^pS
z>T;vjSyx5X=+B8;jqGN#ZsJ6bQFx-4FEJq~3dV$FJdDGZE?Gbak#OMTzNCrUcrkFG
zx0V%yB?*ql$l&-Ogd~}D^sA=s6sKv!N{#4-mK6X!vk~R>aBDxO%-^@YS((OcZyH06
z-PkRwElvnk2krE4!xBd#3`CI|DBrUcG5N&d=q>k9s0W0VMl$@2NZLbOqK5i8eTQ)L
zr?3ax>FsrjCykoiI!r9~zDjp5PB<jdZgA2lac|i;*n2lxRmS7hfkP7xGR|@cgz49<
zG4ee+s5b{DT`E4fc0vtRlW9sKt%cFE>M3LpHmp_goQ0~%!5j(c7I`~oCOguO^8pPd
zM@RbP{?zVCml4<gcXT{z?=yjKIludalOk;x>7N9l=RH&P|Jk<U%n62*ZU1r~qAbK(
zU^F%#4HzO8=T;jI#^w)IwP1c%dNfx6uVJ5d*hibWcW`RYM1Sng9=qykDo61u6C1FA
z-)`xqD!7I{>f2}cZGwlBfs*%8&vB_8H-7&;P)~QzMI1a6Liu%Or|zMucIss5em3)~
zj*SONjDo-MauC&rsW7I|$NgjtHF1oalw3dK6yKMLP*mjnga7ON1NA0;Pq%VAi#%Lz
z=+?nLsQO-1?bJ8h#(T3(6(sPJe;~EKJI5WN6#+bK`UgY+zh8oh4PWt1U|fT4ejSg)
zy56<}o@Pkq18x;TP&+tPpVQ_8UVO!H`YdX$-6-%n$htw%m7X_z;o{@QLpV3t#h+!9
zx0Jp2|I98f+y4;nocROz$iz3k@R<G50tKjQP_U7-cP!lkZxr%i&}5x%>QZGeaXz)&
zTg@o@Pc3jiM_e&-1s(6Now`j42KR2Get8zic4A>~!e5MI`ldJ4B#b8wFw*?R+0JBd
z^9|3+*3d=U=VZdY)h^69guFKA%&<9d?D^B6$R4&rP`2tQW*OZik!RUD&0!+Sk1SXj
z^V*--Fnb^fE;J{?OahB3Z@zv6dvi<u{E-e)&ba-(7m>%?ll$eeHqDiqFD1Ex$TQ-x
zbAz3{P3T{v*G16(Ovh@>gq!YsLPqxANKPJ1$Jz89qTN?znc6^RfDsFC8i^a~`2UqY
z)mO9e^{TH9Cj&?lUf1e)N;zz_AnoYn2lf?hlAZkkqvaZ%2sq$_u)emsrrNRPhh%yM
zj&bz;7-g~1WE=0?Lm#i@(L!tI(*yPSArTlwx<`k7^soH#7H&UHUkdp#<V7M0#znz=
z272mW_qp2KT71<EvCdBbF=l&BOwf1J8M<a9UNZXpJ+EyNf!&b_?5;?Tw1FYxHRpdR
zLa>u_-M8pJ*))TJeZitSUwey*cU+{xJa^|O6m(IjVedtPS&sI9vs2FB&y#2$@8e1)
zCz1x0%PgS_awsf#^}IPG2qgUf^5l-p{(GpCuF{o(Zo<MvNN=wT{2LD5RVc>`of?cr
zX~(O^jNw_ZZ<10Wy-rDpyl0?1w(7p}(O6!GLB(<}pso{f)C`-L42V|n`Ba>P3U3<K
z3wG1}_EAS3sQjhOaxYgu`~RTm2CSO<a2nF^5O^OyH17)<^mlnuLX#$j$1MN(kWRB`
zT8j*QOOR3#ejVF-Jui(4`w9r$2Et9-6K?g%$@eIK?)$+<&AO6Q&x@@S7O14xmzGQV
z>BHyK&&83E!g$?59T6QVa2GlAd5{T+$3MGpQ%%Ddr~I`)kYbKhS&Ko^SlRm%WohBg
z%3R0KSXLm<w`csX8Gm`Ce&=yt>p^7$S3Bs6*aB272A`K>2(H9tPuz7t&v{erS03IJ
z&dT>+ZUcs&ojOHlrS)!SZq5gLYZ!0f_6v?gAEF^gCB451mfylR@3!VQ9YCX$zt(jv
zXHzfKN`HNEBE@^6wdt&&gp85G4rBAdZV4DFp4EeH9lEC7PXDh*S9&f(Pxv85jbzo!
z^sCKd^j@keflGSO%YIX38@(4K%@U4gwXXFozs@(YJm_7=_OJCGTl;aoO+q{;9Hvk7
zmHP_Mj<8NZAh9Jm-;mD6tG{EUEL-A^)xhb?PZY5ynmQsMVrA=AQAG*2<pFT>9qO5i
z%_=hS`*F^j;?YC|(_Z$sFZ#z9V)-bqvG$>Ps~o3Wbj8bg5vBboi_ec0#eemt18(Qw
zP)nj^;Q0Oip=y_HGl#pK0DbS4w93%bDseV*UDdBmc`)s_7(c`kIP*T4&ZOO_FO<tL
z!o9x+eG)EEi4FT_V(9x7<aeuQq6LyEVy=`b9xj+M``dON+L9Lld6Xs*ZF2LS{Gw3k
z$ydmYP(OhdY|aK}sc+*IM8wORVb6|nGv3^zJrvnf;{Py^x|zgFqeNeHy}Up{7b7gf
z?O_!1kI!PTOO5v_hu}&)pS<I0G)*p!zx4Uz#Z52p(`?E=pY2jj>J5fLq@Yu{?Dk;f
zCqGCa!<O-OcV}HiNg#bLuf3D~EZERN$*DMC=s#ZuQ04V#F-q%KiYyCXOpAPIS~kh6
zNI0Ui`A~j%Sw2<fkzBU@%U#3M##_eD6PL?!t>{>is!xR!arr8L!gdF|IFglX=P->A
zxwy}Km!P4bSwcf2RZ`_BbHY?E?sZ6VQ}bK_eCwAv3>lJ~e2$STZrJOd1||nE4tY>E
zXGLQTxsS@9<loa9tLQQt)y8<DNTiZZRMNiFdU{qyYa?UXGNhn2uGi<_4o7lo7h<~j
zi+9=Toqve{{X1_&KOVxW;Hs51vGsO}bULb5y56G7--Xc8Q8xn?ROm>^RKXKUMt$Vo
zTv*bO<_09df9d?@T0%s$k<k#8gIyu)t`dUOq_{e&cx~(x1WCi~p0Y1@BW*l=<x7y<
zs*s$1Y*I@8N2XS|JQQ`G0r_t)32J*I$U+e-TdeCJnI(20!I<B37Aq(iqSg5YqBc|7
zhBcGXp*4dd$68R22`<<cQZxD8vv}~5M`^?Ifjzs7HXwC#ezR)!{+)yc!$MO~W_7~c
z$>pjvMS%k;O0|LxNd*(+c0K*otyLydMtWWPX><ccXW=?uUY6k34TH3h6&p59?qc-C
zyb!#7)=V8)8JGv|ogji>&<|ro&d_(Nt<g&io(#KR^77fi));UD3AS*MU1p}!e)cTO
zRb|@Pj{-_Ynf0bfZ~x#TE*I*{bv0BtVjX5IJ|Od5;lvI_@qY+VJ!ZpbMZ#F3{En(z
zyE}jN1(L}Si5_b*ws3rL0ad;#6WtU8n_EwI@*<luo3+B(-WY}{dU&0n1(eL)YrA7O
z?2b7Q&}$1O(j-AI7ZDVKv4u}-f~2}6<%_#`3U#NV9R}CSB!1#ik7~0u&aGxQ?bH_M
zL+m9hs!Y0GgrH=Q8GU-^VtAz+qoO2SBOisYy4cv5kZGO*`wI@JHps{Y^p)qMVymMB
zG##F(HCQ8iSfcA4HbN;FZ5E|l)vJnqVCsHnCWhf-C^%M!-mR-7xS<ka*z(VJetItI
zL|O8p>SnGTN%WU0Im^~-;p}lW@LqJyZ5whavXyZez)I6kp*npxTNycmkP<)06I@i9
z%@bfDGmS5(fSTWWqrKnmQDS{3{BL(OLG`LF*-=fjX5YrHUF#hW0<fqqhDh$1m_5K%
zg!m>P(px$l&(`k~0)1lA5m=zy=av}q@?JtplxG7#;}}`KC3f@e83^einl9k5r_ms-
z>@NaLxDXB#{0PxXp!J_ae_9fpESyXAChU>*cqg*$UZT`mO1?%S{SIz`&*uO^KA1fW
zdDqKMmSnb8jepY)g2ZW2z$r{sPyUJEDA&!<NYA_fG0}%GOeMMv#rBYi`GEcN&pgM5
z`N=hadE~mj-TG%^<G|5zN70hyh5e)+p-SHbe2m#?e8v$zU&wdntv8o1kDQw1Flca4
zz_4*Iwy$!ROFZAL_NlPD?0G&wyQ7KGw^GIIwE-vr2(N7=?EYjt*F;zOE^D-ifu^KX
zGNTrv*kyUUUT0cbDed{jhj;6HC^#4vr+3*Z{lt7|<NoE8frrC&vXI>4gJYgl2c~}X
zPus>iyB!I?;U?74ih+VVO{!a=1mvGXdQji^#3`qDE;J$U4^Oc(S?tuE^t9&9Is*na
z@1S!59C^<hm!)k)=}IaF3Qj21ytJ%bBj)d$BR?<IV5>qh=LlVx*Kof$&}v0+gRlFg
zk5W<-0Ps||Q9~7od=k~X#1SE#IrIm3p5i)Q7J1U7!zR?lNd)*4Lit)p8pnC9mR|6e
zq9wMocsM4V-yUalznMVp(7UA~wlJOMso&G!XOrn>si6ru?_*D$$qlp~o^xuSuam4d
z2~WF3(5ZZ{oL$$xNrL?V8&=}i0r|mHa9ST~%lj0V?J$oh9sS9))P?6K;J{vz!`85q
z`^0<U)eviBSJ~D~;Ks3NX<?q`RKK|h0~KXj%t{)s(0DytLhcvd>?yv1N9v42l<YC0
zRwM0)pV$Xf7&x5}^Qmd(>gp)J%wr>#jVPqKUt1|@Xl{vkH7-hRL3eRB1a8!v>hze{
zfCYr0#Z8ePVozd}3EJQDz}Dj?F8fJt3-X1%{{9%DT{8<^D^%HWc%J<9Y_g*9=ZFPd
z{Ep`JCbi7(F2y>?F}t2yK9txdhG$7B{y?HB_C=C6WpFxslt44XJW%z@T;)`zQg6b<
zg;@9AO_VLH(4>-6VV4~FHW><6NBs07vNnN;0L@!<MMhk;dG+>nWmew9X{lp``}w~6
zneK_dZ7ryIeuQyU5?oaokknFDIGE*|<}~}bc5`<m$qdQzC}NuYi_K=<w76s!hABe|
zs`Oli5u3uDYyl+2`+;T4_68+#AhP~*#y1e*cRk68;1ad4r;?+I3lxkLrfvmMjZa4K
zE)4>6R4gY$8b3d`lnSf2X$c=oy`vf|lr%_3Ztu6!1fYg0%90c$TI0c%=zU&_9kR0F
zGsb)7lo*uYLJ9N;`h%Q6dhMRkaOd+Pkvaxu1%d3$B=xL1ng?l2ZW=yU4wsYrg_J~{
zS)D*dI1~xuBE90sAQC_(Z1xpQoZXH=yNK<tUE;eXkTL^?O=pDlZl-7L%%%Xb_-vX=
z0=x)+iT&TP9{nWJn+!(eDKn-Dap=|*4vEQf3!TA}&%Q<qSv8OcIAj7Cd|iWai<;xL
zPpM+&`>gkI>0Q%ECx`?EjXbE~vHE%k`I}^Xxb&v^E?1ZDoh<>D4XWIbf;P<Yk2gwe
z_kFPxoM$2{o*!e~tUZV)y2}n}xLMh&K1+CfOvrE>X{E1BU2JHLjPYERZ1%xPR%&iV
z#O@j))I)Hf({Npn{A)f&RbnDqD~V+VQ_~39{HDFld)x7VvaV~OMW^>_k;4qMv0<H%
z6bGu%pqhXoI6P4>{?pe<+pa2W#1n#%wP({jAE|{$Ad-&swX1~1lnlP6uQz-@L3CXL
zeuNs}K`{fX3NEXJqK23hiS-fRC#q86sq%ZHyTC0O_DQBjtAMRwLLLC+{6Sak#W4;0
zi+L^mf~;M!x*|uBOJ3S;*oaILi<<HA+xJ0-v}R$nL>aaqPXfRo-cr*XN-*J3<3?;q
zR`;;NF+%-4gDI2id@ub~$DZiUx97St_k}?At<Ut8_a}g9mnN-LJpo1>_#bRPC#pH0
zs}V?erUA`Jp?4l|p>ZF4F7axG8l;Kgu&2-7!J9>HRZE91d{u(tQmKbs6;GuM8E`d4
zYh@iBA}`Tact7ix!%_s^W^iN@1RV7%%rABH9-r7L&DpOJ0}D%L+KJ5S4P8neL}rf7
z8G}C#QME=kJEoJ+(}xqn>!yiU7{dKR^{%7`NNX6PzwUgVzT>rHT>g6h*0cAjMwE(0
z3{!2M|E1dOSnkQMko{Q57CpkiMR~GE>e+J++mWDwb9T6Vjmqa=hAa4NeU^=op4VY-
zwc9~Lc(Kq-t8xbX2>*6@3|o)GE>s!-+ZwPz9IW~BK9i`BDErH-H8?mGKt2-+u(sLx
zDWw#0of&~HELi&f7ywEJGbF&#<nw`z0W@J#zY`H{iY(b(^db;M9uRkEKISbk7X1>A
zV9%B-oRHsCg<!btOYu>MlmNKbm*UQra@f#s6)3#QjE_oTnx8?*m`e(`I(kqgYG5Y&
z1F7sUocDU5=w{=_TIav%fpw9JRd@u*(RT+WSQ2DS(5O+$gd?Z!1UbGndoYc%E@u@U
z{N(hW03--gcTS-n9{jRa@H0r=9EDi*SO09LX9wYXj}X7Gw0h4PV0TWS*%mNo4Q;)V
zHT5llUU%XYTegM8M2}sZJX7D(DE7Cad5E79RU8-3N-GNm`h^B$NgFkijMPdobNh0M
zGwcP9iYTaYmH^;7y1|)kjQ;M_{sdXFN8K(#_|h}cm|40KCjW8@|1bGBWdRJ3UXoOf
zRsWT6=tID6ZE9C(o{&U~0EiW+j+E9l&N!FcP_v&LtnmiWepbY1U;)3j?CF<wBG3#-
zyf5Totmuo9a5UFnnUf~tbVsZXrXzBhDb&SFwEu{-Sf#xK3<{EHwP((LeBWeF>XQ5Z
zI}X%&HCH8NBBvI|hv+aP6`M1(p2OAR7@1cxD}RPAJOm-!_xRsrL9ixy_e2WCU1+2R
zlln&yz4NA1FYkG4(d3fXC;E<4SNYRC2sLN;BPe*wAk<SQwnn*iAsh67_Ra1F!#Ve2
zW=8z;L1@a!V535?KDvaSV>E)h^>ymth6Je?lMG~pa0F5MqZ=661?MD`#dEZaI2^ag
zQ97!;(Vxg{&~jANm;#lT*>n{XE5)cc3_1EIp4S68Z_g?Z>9Wc#p~f*$We~E^%(+X4
z@e2cwfI5(H(oFsh+Zq>v(s+m^!U!P=d|h;)P7=U;WX3@KV^=QJm$iYR=oM)i`ERKE
z;#4O@p!|^vwi@x1hAl>7W%J|lv9C!2rI6KGKch=?f&A1?YIw(N>q{U$+&F+A1p}!P
z;PY=2pm%HUQH)Cn(v8TEo^3Itq)~a+JBN&lSk2CXVVuqxj}h{c6P58&0#a%je0Ej-
zl6O|YG9O*RvZpT|(QP~#)b|GaU6WbcL5}4>S#?>p(b2TpI1>1G{zkNYh;iGK?0Vm%
zZ~P@-Nz?!A8$5O;<yn7V*Cxt!O!51L2A1sXsIr|}`E{<vd)HlNeu$8#+F>D;*kIpX
zjq;9Uui{DkRE*}No%PL5Q7%=sZcu}QYU&0nV~2T-m7g0E4zD{Q^Ezd9j{~<?H*ZQ2
z@qGbqza6rK-Hdw%#*~*^K8jjZC2G>+Xsq&58t4FSkNoIm;)qn+w(WveykRo%$CxhR
zZyE1LMZ(&Pu9qLZIj5j8f=~{2B+?s=kj(5O1*Zf>@K>cnhg@_=tfMeLNhAr|V<AME
zK(;AfM9e{?Q#~lf0Bos`6Dwvj`22>s_0qT1eO4M5`UUWB`46Bfi>9ugP3c>;>+V{k
zQG3gPM;e>%2vU0K&$KBFjQ7+ZC8(%Nq!cO4C~PRq+LbTq`i;2vE$_Ui5@X&8vJ7ki
zN5v1;hvy-+<|<m^t9z;WE91E0ALzR5ht0hWmm;Ts#MoCzThgIruEk|u;rGYQPNNT|
z4x@g|RR~PyQCtbQ+aqu^ZVpFeK*)Ir66Mk~kIu5&fZM5eXk(+Q^btCP-06>bBzx0k
zF;-WjB%7=69?0>xz!9@!o?AwsX{pggx;T4t8k>DE_cZhn65Fvn0%z{!wK|0Rl^l_c
z_39zb;Uvx!5H!`4m+yU2BCeVn)HmA?mi!jR(t+26x9X;q<*VK_3K{Ch%-pG-_HSfd
zC3hb>9QzcC-c-O!UwL70>74z2<=+_uXdWml(xfip5@Z0SD_caUKiTPkP%shzk}Fl+
zW3+qmXvgsJa)=3Hdf^$)MG$wo1Ppc}RzGd-v`oQ%*kMt=fd;(H<bZZ4RH%Su)4OQU
zAGGL2rgBaAHdZakoJ=qhl%pFCb1volwx(Q(`}5>s>t1kC9jyGHypU-5>Hx3^66GCR
zcb)Wlu@CWB?&-v`psaft5sCq{2Tk5ke0Pbb`Eb>Fq2ot~J0|_<i>1xV!T0ku<7#4J
zGI%ldX$_$kMd=^mQOI~1?Fbogc@0hv5w<zgy??QL@iU&%_F0=)PZH;A2RDVvXs(k0
z=V%<O*5~{1bs3mR?=jKI+R=!K83m$TwQ!N!bmBL{9k^_~eCH`zCUOpv^3sp1$jCo{
z!9yWCOv0Jd+SnZD{+X-Aw!ib+Vg28h|CX1qYy~E<_tlR;g@5bPTGool%xQGb(AiAb
zh(ZotThGT!DO=xsg^mbE@5;19rWu|o0NIS!Q3|0U>?RpImcEzv#MNd47K6DZVPXuq
zlcG~(E9`OA_;swMF2*2h|I<8vaByt8Jzz+1u5@{F26CHENRl6w3m>nj9C+ipK;2Pe
zYsE@TO(QHbW$eAC<}vt81MwJiFc+Eau^Gfuc{Uw4X0-?HhPt>8LE*vJnGNJIf}=UJ
zlZ}gBUJBY}8P@f1l(LwXm~o0aOg{o>Zm0CK{Sa)VjPU1r6_&w?k?)X69Y=C3DUxnd
z94`OFVW-DbEQs|XTxaZ%^$sTP?t;}U+)6Ju1vBqcPH*w?N-a-skpl*JgAm!@0}s9o
zYUSu_`PlzHaX#xRYdd+YaiU`Mrm3Y!dWz4;eTM~4`6>NyLqkO4(<%q722#w#dL?DV
z9sGLY;<3rv-*_HT>Fix3<b$5gG9u+7ILw@Axz0$0J8aBga0Ce{3*hkX0yp{2g#a&q
zbO;emgmZlDf_ea74c^(r2YAposBhz_U)o^N`-wK?SE_FgyqzJI{dXJ~B_gHg3Do9$
zDB-_H@@tS{Rgb;fF+ZfDSrqwuYF@e~6WC(kZ?apB{L*7rk4Ck1FFz%)y*Du^0~e6f
z&OYTxhCbWE%}nw3W%Ck%aP{70H0AXZEofZ9)#C%I_Uli^Dr1A$jO3Uq@JV6bj%bu)
za%Gz^e>baUVMY%`%&5to;m+2Q;}YGKp1pfbM7!s!QrOeY>2KT=;7|#I+5FYDnKJu%
zL4G3QsQI^uyA1=UmwFq0+A;B|8^D{<vDPOUBg>Dm*v^l@B4&ZIb)2$GK#)V<Ad4Nc
z%Axo=Iw!a)FB3D2o_(c#oNc1~g!AjW5X;k9fQFJTh~u*^hynM)50(~mwKLfkUp8+X
z3UCD7Squ+{i~<-Vqp7i;I^;AGdAzXc1V{mVsyQo2oWB%@zHXL<#9z251i9_i&5e4P
zPe1$FTyvBE%NU3Lq~1F8cCnN@Zz|K-ND{nabCBo1<CYETk`w^@anI%Hb&bXx2sq#0
zO*B%GV9FdE*W*{{9cb2JrVZEpV7_cM*z;u(o`n^5)b9jLm#_y>5hrL(xC8bTzM*#q
zP<Z!+!N}t!v`z}&pK9fH1FK5{Oz4ovdFSFtN_mYWfOgDY+CJfu)p-0GYzUfLH;E%M
zk3gvFZ&x6=dV~Ez7lipb6AE+o6|?Cf{qysGN?r>TGkJCmlIz80GzO0-3|6Ym(Nx$|
z4Xy5JYx3%4i_MgV#LKi#yvKHge>l;>DN2X9ggQ2s1m9y&xV!C5rk&`1<I0`PAyX|I
zKS??FinzaNfA{h}?yB*0X|dkfYH^}+iAH>KmHd#<&Q-7^zrvmRa-k;5$RM~D7^`TL
zTpcxv3a0ze=Y5`9$FjvzhzuszW0-^;BjgK`d<W)&a&h$G-8)?RsQH7E+SB(RU0iqA
z^m#;dL7QC!(`ijDwLpv_78AVv$U9M%JlnOm53aNDYGgT%!Rr`jsW_SU&~E)rH3Yzr
zFeB4YhpFeuj)fRI8el=-Pnj6}iyhkVIWcgX&<2mEE_O`@C8bvP(GMIU@NS1@WluEK
zMm)#R00AtKlA?9q7Vb?gc-Gi-&wAG-^1@Ut$=GIHS=at|W&M=8Le}oMLKCYCForEB
zCR7Njqu(AlQLQEL5pY3GFMXL;z8W;g-h}6bB7<<ZR{@lN&1HGb63EJoK$=j2q+(B(
zGGxANDJ=U>iME}4c5C-iMwWXRlrDrmW_<%+oOVVAt%f?*RglI0b?#`Sosl4I-KRM0
zh<vAIsE6Y`Usr=;M`L+|LaEKniK&%PD_48U+@>-GWE08>{5;J2Z|4o5X7WdAV%_%m
z-UPiZZ2&^F9DgI|`PCeBLc+vtyZd;c4+oaa;*X9sEhARD6L6Jmj!gM?&qo^J)2ON<
z^2y{OTbA2_+C>`7V}&qV|7Ys0H?KcO|KaDK?BFvF(WCqyDvv+Z{lhkoILlBBqy9m6
z>e{_^Aoha*CmEgVnyJA7gT@UFKujUN7RyZJCWB>~x`I_R?<3_~QK<?WAI+u<D)`+F
zvVb9`xPYUD=1ZE<aO$zFTWnXL?-!C$t2-?d=0oxj)<GPnwY}Dj*-0C8`#iz+FdP4H
z(W|<lvtbiy1kCPj7L)KRT@k<fj>V9y*g}|5iPd<Gp|wyIKd^30+5GD@0hXSK)8j+k
zON1K!P8~G(V<m<+=PSW(6i%E-)kwNPWy~f!34%_%<wvy$@xP&Ypt?+Ump_Ud0ax{W
zRw=9I>u#ypp&~X$x5RIhdRiu0^%X{z3k@P|t^9X~(%zz_0_F-9%}^!&mz*f??wdek
z=$yDFv~<6yYf}-1QreTKWms$vn2uu1y8A8rA|uwUdyX9x30Uu?0)GiyyCo-ERCQLz
z3Mv%ewkZ*zZhp&()ZJ6u`l1L{@4J+*ePl&r^zY=h$>L7Ls+o>&7))h}Js@w3zart4
z!!_+O96lX&4>|0yFnKZ<TriI)7PQkh(NBe(Xd>~|c{}e1vR?QmddmztY({F?^?q9<
zIF+1~oe9ZOq06cTX5GD>gh8<yU5^}HTe-<|Lci}RtMRu+bUh)@IJzE<S!K@#wWQcx
zY>&o9ylbN*_(Y3Kq5wf|iTDz?MmgoSwOVF4|L2u?#n*E_!P~<S%XB5~rGd8Yk!G&c
zn^6tEQ60~RrD)7<-?k|N-!2jT-)vObtDxP1OkO}GaPc7|sx1P3b#jz$mY|La7OiRi
z3)T)3Afu+T#~yQ$+)EB!P-$$de~m{tur#^}jzWN#2CTmUbMvGs(Q&dj=DJCXCB3o1
zzI<ik$bpHa)0m-m!v^9PUCa5V1$KgpXV15};k9tA^v8{g4F%TQ1J#<tq{I!et9Ueh
z4-PiwxClpO5)wC$Y?Ve=B@>c}kvQIiFIAHv|3crI#}Sa_VC}*O9?%?2idjsg?m%aI
zC5c47m}Yv{?NXE{E_BYE^((1@>%&nt8ZZ+7pjdE(%{Dv+yN38L;7qylRLfYlw8ePx
zLKxvFJS==XlJ~J*<MTjazoIKT|1Lct+Y$L$^P^y~rqNtzwPvz8H^+#e4<}3akzgJ1
zhj>XUnQ&vsS<)h(fm1GI9p}-8D&?M?oQ<LviZ!H->>^3F$tZ2L(+N{GzR4R-JmIta
z%4)#vNIdrHm1lM1>GWw&uuakuZ9PaGHU3N(fQJRt$aXM>ZrzVUhrU>AUAh*Nd%veD
zWU8yltni{gaPg@GmevAQ3jU|2X650%+{-A;PdT@w+s_&Nh7nP&u~_4LlDo>RH=v%~
zy8F`wkwprvAg5ntz4h~}R^n)wB9|xX+2Zqk%tu_v_C};*nQHggq5t%w2!02~VP>Mh
zp0$_NmOMWv-Go#kXismeorEnmD<`T+`2Z$Xmfn`*w5G~OUMw8z5Eq)u$1D@@kKC`{
zS`RvOCU#?PJ^_CLnM2})=U{=7pkDzahgjAF3p}dgGx_UKCNe?RqZSB@bSO6k)_9Sn
zEP15?;+=c1XHx`q1kKV04gufSrE#vVFMcs{#7ubX8M0Vs8(|=AnhY~nM?}540&6{c
zpFH^AnkTD=Lr6Nfr51HNLh|TsRFzcMM)fSyZe>2*ucwe%MMY)KqtHjNbR^478Fyb+
zT5fHpc2~2IGF+xbL9$Jo%)&iqzZ^)M_cWFqdTCaXxtpi$ja>D4rjyA^pHK2ANDm#w
zvSyrz*Kf$@>0`-9TMU!zC6>7@UIB-e774C?Cw~C;K9}#Mj{iprYHnR!&=JmmbCscq
zaX!DSE0&>o$W6hK_P<x);mgv6lXdc?{d@z@OXAyn@^XTQ=2cp*Nf-Sz{q-t{pCgls
zY%TnE|AnYs3CnN1Cqui@k7Ax6FrneVU26%f+@z8=CuprP_$#}O=Jn;mSX3(`!zd|_
z;i3CnUgAY%NIT2s-9JdO5j;x&za?|Y#tSa@s2_qP%S*X2m(u5DG9}r!klFE77I^};
z<JZd8T;)yG^C<(>iN#OrRk34SRBV}<=Gw+6b{ktXY{z?Df6c%leTeWPfA3s{WAxJ2
zvjeRs-FbK=VAOKhS3wZDmK=0Q44YNOg{_6KD{5dmPW1oMaY)oX>9be$ngF4!>kGvZ
zoNBIa`{x4Lg1x2B<?0T9yktM8Ca|WuF||Q9+8elrn8?gI+j(Lr%_z+&tqy@nnYk+5
zCSFZCIaj(?NIYb`y+>+2Mqt6~iJlR;Tgwaljyt@UF&kf?u|EI32dul#Et!vOMa`1O
ziR%v?nCwJ}@FEOo^Qot~X|iaY_GE=0UfAbS3vAqC@;LksCZBq?{gSKvX;+cSa~zSM
zHVNWUILfD~F&(nS1$~-w^HblxSs1VxlS4Z#nwaN8B@g~)cF~h{3Q;-K*=*pO>T93r
zU_fg`bxes-vcah=HftfC8$i1#mYd_`_IO#Rc3RPM$l6sGC8_P8ka<8W<4$!I6w$hk
zebwH4JG&R*mqhkB6ux@#`K<~X{K)Jy*YZ?CXQC^==job<yPOGBj8Id-<m32Cu=_D+
z^cj28qMwmm+$Eo;PGhX1dch*${tw-Zon;LtvFbfD#nQR{X;n*&hCMfu<N7<eFk2sQ
zTj;uq6}fI~XE#3`TGClc&FfDlEIw>=ux%IBYndsOrG_aUwe@eO(bFMj7jg)wHvDW7
z2<&Pc`ueOmOq$3#ivYSUput)|0>=n3R&fm1#tbQ^3*0OlN+as?ShFZbE(PWMn!k$V
zC$9bcqHS*0&)JjBZa==!dFq&}3c~t!){_8KuI^Fs)%nd4o@Jca?pD4kswlsQ;u=5)
zWv`-?^7vw!OeV)H<Tcwpmcoo4#CORmvaV)~MQmxA#(^q?aJX2lV2=r%L}l{}B3!(X
z^-)aiWY?}c{gNI6d@&oG^2(L9zOu5ws-XxMG%N#cY@UcwmM{rco@XgI0f=0hYxSRf
ztK?KGwI-C4bEw}XV2HS#OWVf!@UCpIG}&SPif_=r_{tYrx1Te}d2HvhlYFz7+{+1`
zwkJF6Q6ub*^{c0c3Q_scLvu9u=o^OXFZ>ih(a8g6H1xGRu!hha%NGtk@d9c~qlHKM
z?7|%1{;=djc0Fx2t+SJ?ES{FSQFYps450#bk`{l?>(Sr~^pmpW*0lUeX-i!xb~#v^
zALt#m$i6E;1nxd<I*+4U?E9$j)Sm(nl)VH_T_siK#D+?;?jCGSry6glG0h;2eZ@bO
zR~0EuU$afv<?n_C8vI7T1w1ixw2}?>KRS^ulVnm!Bv8M%Ui~UJ^#hVs*2|^o=2EP>
zNAd57*L8q~{y9V+))=#j%)aOF^|(6^$os3!jj%yUw<4F!-Xw?2-V27iB;axT9OgsB
zwPh~d7Hm{?4gY&owwodhw?ZZ<nwsBO?wVc%rp>~yM95+vKQd;L@!qb*_qX(#uGl_G
zrcR_ORE!RW{aQqKv8*|AfkR8|{WADtGAn0mYxyV*je?Dh#r_lwkG+T#&rLDtGeg_)
zn#&cionQTjjt7H6C95*?s?7Uxj|<`6mJjg5#fsw=GcCYow;Z}@(1dyZoW0dO<_jEb
z&YPC~wSN%j%CfJx*Bug=4uli*)K3hSyJOQ)#1K^gp(PCX&8_+<8rkeIQH${jE`X{(
z{yZs-s6ia1@1V~mp+J0CRTTbZE&Ze{Z>>p$00K{si0LoWSu_qD-EfEO3|mU?`1C9%
zK)h6J`avI;Mw=y1rbt)BzY*KE_K6kr$7CgYk~kG#p&`3KM=&?Mj%cQVykx~Ky$bm2
z0Qit%PFd~bCJ$bN1<BcdmQ$*F2ElmLiyNi?zGL7OVpG9yZ^U_>k??->UyOsN`*O0>
zBpQ$f`t2JmM-U+;VE}LMNq)~b4d28Dais0WAzX<NQE<pXx${VjltgTSu0@;}p2CFF
zqI1wOZqhdEEPR6MWS4+s$Zj*i=yc_KhBJdZAozYhLIHFyhxmbN@aL~HQ7TNRBPQuI
zd<`EYM&lyyUySq6gJftd(WLy52h9{TO3*BuDf7>)4(?SW?mdH+c%xS4pDEfhj6dLu
zAEBuCnRHtv0cs+3AP1cmw#a3;|19fyWKz4n>`&>MR}C9LlYgU6<E8)MDW_}(Z}EjO
z4pQB8v<<B{@mD(|?rG~uB#!mlV;qAmVh4oh2Y#RoqHry!7iddkhWHaM4*j3`7O6op
zlTpDC^F4WpAj1PYINn;{$l2x#A1VNJ$BcypPAkzp&LTAVTD-WwaCz28PcFFNKmI&D
z9=0FyG<)7Ef8wKg32<zug2?n(9H?LS<>6hOk!uD@8e;MZbE=^zfSQbKko^aI8dY(x
zSo0n8s)e^vIVJEie8mDCwnRRFc{0EWXD&v&1i?1zWgI)ZPDthG-v^_OfXU7Z8LwQT
zvEO5zVw{}J_#aB33sm)EiJ@&j?Lu5kXh{|Jy5q0$rX%l77ragHsgtpLA?cr`j_3lC
zq+n1~GP)gHxo2+vTY=G*FmTo<@}GXB{6ExvbzBr}_cwx+ARQtpDc!w*G)PH@lyoD#
zD-F^DBHbY=C0$D?-6fq%H!CR!yo28H{GR*yJ)ige^W6_SGwjaHb<X*I&pFr3UUMCs
z%oH-42XJ~MyQ6$6{p=c~${Qqoj9YZ<R<kCRa+iA;gnI3B>cdwdoU+Vln2QAE2yc&A
zWPyahysDQ7JkwQYpr?4GXh|!}2?2)c5hp122U6q}89Jl3B}xvmy^z3D3Eia$W9aau
zLEvyS_PkiSf6*n=-|5@cCCHKn{s1OU7=2=TIye=q&(&T9sm>g&%AbD@Gtl8;a_%1W
z*v_D!g$qf9?J7ct=KI~QQVwZ`Vu8V{kOUPzmC5E^5dRmkqfyF?NPKmmbn44RUqeTQ
z(nO{g&mpzp1-TnYEf_gRSZB(_JIF%yJ0Vl0ZrB0{IxjOsq-+WLvMo?I7@vfBL^1FC
z-NjFNSo(VGV65Ar&@lJ{wQOTWl|&xO54{B?$`7mrqH78kHY^v)8x_KfS~PvTcYVMM
zdc-g*oMl=kAU<=b05XE;Itu7RQY3zc*m=o7NdSqYA&L^2^HB%9FSpx8mk&3JVzB+$
z(11DxKoYw5I|;3g8=%42oep$%EX!yYZ+m)Qs9sqg*eWS^O|KjV+$f9Q7m~&+$B(NY
z>ewA<)~_r7AqoTO0=BmNVRme`@NKARSxA&aQ%rVo?Zyy`_qGm_f$T{Vz0h;g?e!UP
z8Z1r(3=u8p$+lqlkJ@Sr9|r6?e<=Ex;ibwdT)VLe-PENM%9^{au^igfwUU;L3`rC@
z0YJDD;`mMqbqOyU=Z`cUnR~Mp;jkA3U2xANRDqiE*OF9`@2P9#!mIBogylYvVXc{%
z9I{9W#Wc+>;V`aS22F^zS-4sn5LxgyeOz1}JP&+Tef5Il=Smwxjq^dYZu4A`ntB0-
zrg$8#ewWKLS=&oF55Ew<rveiM;`i{d+eE>QNNo&xPLI`m^P@H#wBs!Xkns-l^@Vgt
zWQmN9aBz^r7y=N5@l^(z)AnpzJkvtl1SU#yEsj4A%|cM7Sic55wHGNI^O^Pp@gVXV
z8!{7ue*PT9PX|}F&iba+6-0NSWARG2XXl4P9=r!@>df9d7r@^AG`(?h;6MR9_lKSt
z;x;+N-N;6-w+64xo*>?sFruG(+wgHiC1wsS2CTLrElHd4R1SoDOfa1cevmaZPjnB#
z>x0UK&*@k>P3v%pkr1SITdc+%xr&WXU?T3zw%sPG8{Sjmkd%D4GeFH}cWMgBfe`B7
zg^s837F5M46!-2*RVjZbY?1>#ac5WZU3wuq`a}YjoFFX+x_8%%+QGV+(ec7MsO0Lf
z1K($s+AIACqGP!aTHXB_cIf-+U`&}EG&pXVLe%8WIgkpp!ZkcK<FHSrVJ)nK1yo|t
zBU*W(-UdRJgFY|W^J@aE25i~zVegitTY9CF;tYl>E3VZK(eQ`OK2d!pLYol>5#814
zj$%UY4c?T4&Pl?0Qw!PvS|GRuODZAMr%}zbZVwjA9==fm0?UeX(Yi>XJKrLh3|U};
zXUDa0`^SBhCvXBlQ;9oGbF<_fbI8+wvchH1+Xqj6DBUtnr6t^U%YkCwgC(AR=7#6a
z_xHAJH1$JtLL>+Okc`3HAk~N4_2wj%f9nH+nkdwEIQ6wP*Z$5ZTr)sYsj5#>X*4WE
zQt27&`b!^H{oS!0#eY!2WXgP$y%jRZiZ{FreNwd{>ug!QF@_W7;UWiu{0ZIbLUe?w
zc<G&xkc;<hrXG~RV|e9kT|eyh2{m@kcs$9;Y*K)4vo7!Xi_3v%Df}YegAraEY<5H>
z&Dcg(Qve{+$guPD<*XK)e;FqTv(%cjh7w&M4oJXsQ~w1sp!^*jZ|WVd@S6_23Q6oK
zk~6i`H!9vFcF_7wV;_Dw4hZ-no%%dV?0JUBU7PT_1fn#sz{GvpMfQJ(i@|+C5Kd%T
zuqwq)&{d??elN`93c{Tu(u{7sbP@v&j)bhQ%RznpI%1X8V2pl3U$f~E^rr+wDa~uo
zCk%0Fw0(|kXVI>IxTr|Mz6GnSWh?;=@f7#Jii2TrV1$KqAbrNg!$v487JeZ{pq`^{
z0yaFb9&G$63G=o*jMAJ#`S>&{v;2*pu;(_C99nhgn-RMXek)cm4|+y8i=|>W_*(*<
zLICGJPPSkB%V*wxq8jGHZt3r9t!cWazW)#pMWX*X>i&gp5nma4O&ZrVUl4p2hF@NT
zuhQR~c3|5QIABVS&1<yQ^Id0Z7F6T58+Lv(s+zx<XgQe~q6(DpMxGFbrf<MsxGbT-
zJ&)z#lzNat`84oLy#^#Ku4uP3FuSuEq{P-fW42Lz!%fw@b=w%kN$L^9iT^QZHl#aj
zYNt`YS&ipbL01_hv=)Y%W7;LDL_J&TxDFOlG?-EvXAdT*5TUR+(AMy!3qA|S2a1Mo
z9eyW64aScSb?-;nuF;QFlMl*-mqfgV?Pu~<Yo~!r)4_TN>#jz}<YtShQ-VYh$O%9D
zJ{7NKZyb{8Qtaxb7SywG)tyO#dXyuz!YLQoj^Bo)T;*L8sCfX&^1?dQo786G&Nti)
z@hs_V;0sFmWl{Kt9#JgOZl#0j22%zby3GvMpC~C93_W*&dCe98H6L52fT?Wcv@(p4
zL;X@CPRLLLLQ<rpWW#}F8aNGn;_gL8?Z^j8e=aJ1s=3CTlCa9445Frf!wM`-1>hr-
zVFl}k%ICaGb8G85Rm6{XZ<z3%r1!3SUFpXhp=A;Fpa~5{k6+R^DMK<2geYukYn6X6
z=H6}Dbc7YS<?Znfiy@gzC<i~-y<v-G6%TZq%9QnJleJsv82=xKDnlnH87Wm8cLm@)
zV?6L@dahy~s$Fe5^Sk#Wnfx9!CCHb?#jl_ZZkH3SGL-x{_s2eZ5gQt67zilS^%~}n
zAMHMX&mYSrLB&PzlQN<JDxIc*ivtsnnI6QBQ~~kF<(){3#KpR@arNOEn>#P=(xrz2
zZ~-U4kM4N{w5|+VD0ttvo6>)Lk07r{QEEPUg6U_|#n0!#fedc`%ymv^wB#kaF&XzM
zQZ~#)8bGJ!Ow>r+$Ek1Jjm#NI6-W?U7;1EPlp%#hi3gy`8a@?|^zWbrH{pI4W`HX3
zXX<hbKrNsNpi%ro&f$$C^VDeHb6u&jE|neq7#=~%n3)I6*(`KcdiqgxjXZU|uPMOv
zp^x9V|FG2U%cmay!Qiy+4?O9Z@IOz!`y&nPc<3h+cOaw;isVrOjoi4EDGck)Bwa5B
zpi8YaMtA4(wGPMPA7Wu~Y#Z`{;La@4>1(-Usbvj3f&CF9X~9-9PpN4<T$y?K-$%Z`
zBgn~PkxXDsr3{+I_2ud7G9}4~HlwWxlRFu}=}3Uhge5-b`gz**fLAxg5ZU_Blwwv{
zXs>q+8R0O9QaBOx=4G`(+*fq}&bl9a?u(RZB&LlU5tK9C_`^Tc-Gd@6ozsFsbv##|
zGf}J09380<GEm^+igbp_nqe{++A?Gu+!-oz{YNDiU*Js9L?iVR7#V34lCfI5cngIh
z<O8u-f%?0U#E+^}hQK`GEhQKjR39P@Axnq-ftW$$+O22W)jj@fBTjxG599PfvfY#G
zkf{dN4?`q2L8}*<ApfZvj5Lv-H_?-J??lA9(*yG^J!3pRuX*dLe0^~E6A^d0%{|<N
z8}S(N$4=9xu&P@i_2pCQuK7@}5AoFu%wg)<oo*J$0$a=H)FnSa6JCok>lI}+kjCA(
z&FwOvoRsaZ&u5SGM)a4j9{CLy7`6$!^{`G?u62G0;eMHbTAl#X)5&=6`v@MY&Bx9{
zWwzq~fHv1oZe302M8ZZAUru5|V{n+!O9fA2FZL@bwZGxI`U@`twS8AmVG(s1SRX!w
z`Gmr0O?{-RN4<M)1^!C3?j*hy%t4{GNPC|53g`{-ZfTK5JL|9gpgyuc_xOqy*w8$f
zx1X&y-K7T3_Cm|)yLG98)8Bi)LRTur^BF3w_bqraUa)9aQ-o6YpmDAP-{ht4`1NE8
zsXAwk5BH8;32N8Z!!Jo4u?wHP;u<yCW$ZONQu!z@sK3ZRvy@yKF729nNJ6rhPqFT@
zkS+?Ah7w8)xZNg-eIJE={!m*O@^MCDYN730-K2%tRRT_#p-jk&r6#u#YD3)ZHZob|
z07EVe=cP=-+C2wT+RavdX2^KamPP@hHLBVPtu|EqfQ6!<X$EXlpbpBI-L8i%*a>wi
z#NBNb$>oJJOb>INWuuMs>yNn!eV>FIhjrKu<VQ|L7O_^HJ~sc5f7J0*abR8TrA<om
z9{;O=gXbUPl@NCX>?5h|swaA@&8rmE?EFeP@KICk*P=;X$g@W$BL&_~Da8)qxfofk
zEK;J!B*qfu2Xe#(2~MYusM$Z-JT(jqM*m>YI8OGET<I&<qhy2x1g6FkaL~pxm)Z2%
zCDs}l$;a3sLBZ)bGP7oqF3A=O+=db433BOX>HCkX-cGK+rtGU+YHU|lNYqo5VlJ)0
z;(q3$H8y$eMUZ`q+qCbl%w@|oc-HZ<cJwyLkw$C5jh!87v>5y%ma}WR2qtorMc?ER
z;M8eqWE}KD>f!tRCo!(uYkucz)ZM~dJ0+YQL3yU67CJf`7PYbry{NdjGYE<&uBiPt
z;9wd{Dxb`cZDBu5m`~2Y?uOjYKM;&;+dZPKx3NYeCC<k(bivbq1omE(V{lHV&u?94
z53ruL>m<c)uvBDK)vGz$o9}qTB(%Cz59925B&-ER)&{vB-ftQVFF1ReJG(7Pw}_T)
zb?)C%x*ZUGZWV!e__`3kS)_hOS?)Y{pvsLCzV=p#<c80)*_Wrg)dZ1l5Ie(h$Cb~J
z&B*}QtloHqQfh)Tcfm1*;BZkqp2Kb_LQgVO^C<Py_(J|(aoP1_xf7CE0&8A}&C2&*
z@GUEI>oh3sRfW^<3U&>4so!l(D4(0d(`*`7#0M#JFYkl`zfhOfhu6bIl56E+9zLX&
zC#HQfV@*^UYXgTrBeJ=!7@z4Fz2pX)sL4+G9pgR38V$GOirU<}kpXJg6=;$q6wHBX
z2H6aAvv`#D_nG#$B?=awkLkJ^!kr-jiC=6}4a2u)OhDPKy;J=WZx%M+vJuhEuGW(9
zZ8)nT6rsBwTKT<>mgU5=)Kav(O=OU+8D|8FbS>C{?lx5RU5$v!dGJG+7tgPj;-1!w
zJ9VnZJSSB7Olh9YpF8zMNtp7!ypRCdtQGe$jcFu(+EfA(=8g$=Rq396F`;JjjD2vg
z9UlpH;cApW3lqKlJ6FTuS7WvmZYalEf!pkORLVcy5vWoVBTa3#^3Y`3?aT)Iy%3W!
zT0U9$a6Ao0a9`n&AOC76z%^yAk1(<wiS~4A<F!`TidTBv6<8~3pnhSH2Ekfw;5ln@
z5~?MtKvf|tYSXD*VuJi5vnzu~wz^t2jKRCKkz01vSqxzo2VC7`_F3W)eDt5C$}?=e
zy^f0zuquh0Ih8qUQ6$jBJ`*n%-EpT$HVEY}BBwik0@ucs?Ovj0>Tf^hBUyW8hd+un
zB40+-I91-)eDLKvw3M~?w8)ZS<1J|FrM<WFLT*kmQzZ>%s59*xG9&}2ys@HH!;3J2
zY12(DiAN$tVk<S_6b6mAC5kfd-52n%m&Z`iBOIANkujt2666Fe7`lmtIF#x~5Y<-q
zqNK?7R}rTMR^&WKrDj|xol38wpX~V<R-G#%D)liZxY~<xdk+@;DY8X~#m2e+YnfXq
zALHwZ<vcn?s9IU+r(o+~QEd>QlMxbvdgP-f+kgd&PIb?_u$_rJjA#sdHq}rjJDj!*
zw&HPSCmKPPm))%qZ#KG8k!&2I`UE)W&`o_WhY_Uvx}<Lrc)aXTIQO1NYNY87NOZGO
zk-@dxl3{mtg(`(?9Kv^MJlicGQs*gbaB5)f$e48w(K;V3FP{%n%8j<wuR`JDMA$!Z
z`f%|IOI?A9Vq$MW7o_Y_1-L~47$BUtE-3k(vEZkC^{bXEf=|&BpVFgHc{Ot_I=SDz
zIrs>Z_Ph)o3=mzzF#wgRY#Dr`-@;gV|K_wO%hF$oSA;b3I%4p6N_Yn&U36=<N*QmQ
z5XC!5;U)EVqG*H!0b7KmJYmVQ?m|RMVuug3$YfNz%o!*Qo8ePURp7J(i`AlZWjGMZ
zuYekZYvJenH?nlk_g(ej8o9tMi~Jz8vuhTF(<D)ex-Chs^r705vVEto^4^zU^10Ku
zVls52Cp(Kul{5QJZ?1^CG3xH85%nO@BDfkC4l_;xgC22IIh;^OG4(Kv2{yLYK!v@J
zOg&g@F5+FL)A8RiQ(SpzRZ6dq%ht1bhw^L<WO+1HE{*7^s*EB3fZ2Yn2dzI>o4EfP
z(YXHJcf65wJ84l(B>7<3ZoJ~%_RkZf#$-_qceHh>hgEnoOxtS#w?`xuI>S9s15-F-
z;!X&C@}fDmEPp=F9864o?qHun>oPa&wk;n>3!!M{fY;(<_T3W#6+&Ds|3{>Xhr5en
z+uV?SlYR0{xjQh0Nv#rA%0bxI5FLIVb|V(Oo0&QEF<J+AM%e11kZBpXSvaD5+kp?r
zgapwA?v22+OfH>Q4!0li^4x{Wz)WgIurLNuji0{E{L!2DTay)}IF(U;Vi>=xXd=1-
zEOL-$tPj5lLWN@~aFl_??4qN338Oi?4#<Hhets9l&q1~V{sdE{Z`sPOq@fHrhoL~$
zLFM%oTEKaxg@E)=I+;UrHpB-rKmHYHZ8V{#DxNrkvrBK2!arI+9kTFL*I%cTbo$1_
z)9DCHS*rG3$ljaB$?QP$Dg%dKtb}h0xR|kOLl@IKS6%r@fp{3+tuDNv-9@^g4{w>d
zEtD!sk78al+FMYFIMyITIl5hXLsN+T)X)&w5v?z*oeCvT2C60zN41!vE3X(Co^AaJ
z!@3QFHC#g6jL7|!LQ$!JWJJ(G_&D}*$O{~P2mMgnSobuNnng#aji2Cg1!sD1dPx7#
z2)>?-`6sXxMjvwZmk}zP**Naht+DJvl*roT$sozOdOxAX0#=Dftykl1f#e8G*GiaK
z3UjRVCr!O!DYq`g_pL48rLe2`0hLONi0Zf!a@8YqWA)&VrB-j|(fxgba!~fiNxZQ_
zu+W9ML$?MRT{OezkZ!Up>`JW$AFcFdv(m9k*OwZN@}c6sZ*<M|1mA8lWys7mh+Z24
zGx;wS@p^ZHoXk6cN~l5$uopMD@0A5VC|h(2M|d^ii@&=sanjgh^$qLEdtHqghDa+X
z`jy!vISi}Og^$2I1OC^uEbc{>vTGmQUmOqZxoPIeyioB-<;lu~$KY94zZ&bxkbSAM
z^M-ycBXognf8Wt&!Q?q9+ckQGQ4v@{Q1y5!t<d$%Y@K6RBQuo9XB!rOwS0ZJ+Uv@|
zXV@?v4+0j~vG7Zu(vIyu@4cJGN|hiM4<SL-s&{>w@ZjcG6VX!nsW=o_3}5dHMh_G<
zQa|{@)U95;eVD(mRk9t65+nSG<#82`#omWGT7_v%w@-Xw^8)U%+td!)yO-5XWU2U(
za$;vOY1o-P#V>}kTI6l3@}$N1U!O-{<!OG+pL309eDp$Fs@j5|Eb@kf__I}0EgSP<
zxuEX$?BmUMq2afc>k--qf#<%_4CCq3;B{r~@4(_*r3=HX^Q!JT`ng8KcyGA&9sbp*
zRK{~Ka%GM*Qw>L5T)feobg6d<`2K?3BmPf<aZc|)u@*2^Dj0m@onnIJ|AYmv)1XWp
zIj0^hkcB{)(~Q1OCadC=6D%)v_2zMn6{}S1_v1qiv=od$tpu%yCe5QG%<AD9b*Op@
z3^7Ws7Pr{ipuk$&trHKY!WW}HM!89OzUd7O->g+)8J>Nh-9CBXC8(+{v@UnqstMg)
zd~<WI0eV>6vys-WE2g?KJrK(h`G&{33+);=VIJ}1QPW7G|2UdF;Z{iDx3+t(C6Sme
z97cN)Jc|A@-^#MzhmEpzj)bO&B9jC?Z2p8FG-_;6CzJIh8Ph~s<H?x*R>D;Ai&u3k
zLrt1r2LhP%I6?0ht!gS%3_>v984D~lJbxL<J9iNjzB%UZul!PT3)}WZ`-pibv!FVI
z<i4T~P(W8GPwOLv!dQ{an6EDUKxh|q=@u`WS?}kwCbO=4snk~&*_aQPu_@LY5UdeP
zo$S?K4b?5}A~VbW_~cQIVo39Q{nkOGTwz93gdO3iHZ1OL`_fsR*C@6F-V<3L#}9Yc
zBtJSNStA5Rekz-b#i7!>S210nkKf?L%xZ(X%xA1B?pTk*@ox6Ph(DoRW{06B4GlGA
z8%xrJ!R(S&>R2J=jYMaJHM1q!Y~M?(YJ;z~8PanX2I|a`oy_NZ@9=V~cGi$1vr6E7
z-bXjBaI;2y((?K3=M=!=rWZ^B!z*AuuejRn6D6#i-(Hg%<hl$bJWL~A)F4=&-Q4OS
zJzu(IBzKuS9bS`MY`Gw2UYt887KN-XHf_lgyz{yd55!L5ZVG$Kna^u+P+i~rkuyTZ
zhRemJlDL{@vU0j|+F4yOSsgqu|9s~-dC;Dt>LYfJfD4Nz0bzM@a3ji#*)NYCdJ_vs
zKH(^`iL<2w%0Y><A=DuUK9HYA$r^GOd4@_qa(`Ki_c1{Yu8AJ1K3ARB1kr;~it9M}
zi8R>&to`vAwCRubT7C+10&jx%300;TxN|B>z<DYmT|J3}(JDaC;Gm<6*`x78H?xP3
zE?n8J@k3&vb3rU&`MVpP&ZeJw(vbY8ki-#e!`{pV8QBa^HPsAnUL%O^Ix&-B3%r*h
zem<3b%qcvY<kq+y)y@J*LZ|(M3-?*qfc~kwKmis$E{6-5bNkyqkW6ecQdT)<G5V%j
zStkm2pw}bAR{lza!D!?H))gc@bHw|N-a7D|L3cSeKbiU>F#g+4l&Q#Ov<5mJ1e?jN
z`T$p|G1YG(HsdCl)gnUqq<r*8+XbM=$KvnUu8p6`esEpPMj|4we3~{iE;2h3<?9lt
zs-8V6w4=he`^Y_X{w;vs2PC{06c+gpwCQR#l_v5+-{daKnHEX=xcM`b*u8T>Kc3p_
zUzB{`gNJ+U#`YCc;y1fC=YUQk2oHlyxHi*Om?!~FagUxvMkm$e_lZMw7b=nkZH3a*
zV@fa<c$Nbm#_KXWOs7tEclt4HUZ&LwXp5;!a`$*W^2@){jI!-PP&oC^vH)9zNKD)-
zba=j)59>_LiK|7ZFV(6dzlZZWT$R_?18WRfEmXNRJ3rp|70J-6?y6pjJuIR^wW0ZO
zqWEx0gFu#)M+;CG-A=M`G*4;sl2W(EwDz&@=a&kf-fM2F@#~XKLXAOiXo{?Dt%@0*
zH6L$iV?-XG^V5<&T+NaP>mX{|%us}rE<)l;SbTc8^+O5;z>N#yyFR!!#+*=ULN^K4
z8T-=D-#Js-g9`O;gPrH#c;MSXjf)qM@7G3U;GmE=24>fH&Ww(hha6Qq@RUx$;kAJ9
z3XUWQXoHvFTr)nISg%LWw1uL}sjZ<$iDN-Jm}FVpj7&onnZ6WlTHb(MQbjfa?NLO-
zIKoK8r?1uAf;2B8?@4`p=B7b~6kfW)^&CNaOQ(G^D_%y~an@a!ZJkNkqhL{&*X!GT
zlsDab=A%|c82u$eK5-{GkY#CRY}T4YdL?3-oC7e7%`#TwwIpT$1Ul!jI@+`4{?syk
z#=s&&cd#c-MME;Ab~UE<ld$H?VhHC|raF<#KDYI3T6O4{Jiptp!2Nq`o}FLD_A1Qx
z`@>T5l%`U?Eywd4HefH_@@6P{n5@PN8}$BO^t$@$%@jx^2&bnN>dN>Q!n|{#;<n`r
zPRykuB_Nk$c=&lvj=sEY9aA5MSSk3h^>p$|mhccu8gl8O3F3n^k<@o}XFbwMcWPge
zJOhR>AvH;a$jMq`OPth4>La3MjTe3M7t*`V!tN#ufAaoY%#$KXX<n&>Ekt3gRJm~z
z1;i?BDqoHT&1`2+0kVy&tss0~W70JVgx(kLFqNoq5p6~1u_lF45Uaq_{`%GTHJ(om
z2j}!DA$(Ng$>Qa8hi-pn3zqnXP!oPy9}{Z*t}w(5y<>io_nvVaK}?!RD6T8#yPak?
z8B9q^UA1VlPG)<dR1fpcRdphjeBlasUwYZv?}b^T*oLo0PG7pR7L@CfhVT$k`bY0p
zUsGSRD-^a+@bqxJ%d^i@tO)GU{0Vw_qT1P{qdJsrN^tX0@pE^#85XyP-A!_GJ2E*%
zuwTO*myluRj17%H#h1}tBBW{i{4;^1eXhe2AsJEX<I382_bgjw1}t}z=otd>X_${2
z(JVQF@QY-KNd<%7eK}Y1#!dCf;_Y!BL`CIYD;dpqvkL0R&4h^63vWhjUt@3SHn}&)
z?>y`O!u4QRo+9*{+3aB!ay3J&zo;S}r<C}*pwxT`N-<j4NKu$Y0>VLMC)uQ|MQJjR
zjPRMWc@8pKNy0a$=8Pv&X`%G=$y<G4DeHu$ZGijSc@Vz*tx~6_l<I-c&R6YB@2hMK
zY|@3)&7XxpCE0luR0<XfigNH~YcUiuI4U(5u{bEaY*Y*$O0&N~vf+5oEoezpcgCf*
zD&8gBG=M(hp*4Wy4*t54V9kvxbUC2BJaZN>6E4C}Y4*9xl*)xGGWoa@!xItpGKL=X
zZ7qm-gNQ#>q<<FIpU(m-=hcE#EGr)<qp`7b03#5Ry!~VXg*4K=x!b+p#M3NzoH`I8
z!J8#E_d&b|-hx?tL*^sVG|}n3NcV=B6_8piaav<P=Wrz-s5$*dO1iGMBPJ60)aR;X
zgKq#2${#V^1q>jFD$`!`6CRpKH;C(cp7An)yj#DMT?3AgrLTZ`pc>Z)gi!~d<i*nJ
zGO**k=b>bsi06@ppzF4h_7UOj+IRbSQz{*?mN(5qmuLBorR<KEpS(Mj+?J1-2Pkg`
zHam~WX**Zh_Xiu<p~YCWzVTA+gqf7@u74IgUF_Fs8ib-DQtL)Hd^#Ic(w<(L&xjsu
zft|LT^t4}Ewrn^$?A#vdP2R`RHb{wXn$<p@>Y4KXYE)Vm>F3zmp_v)PI`0!i<lRJZ
z=mi<KbPE+Sf*UBjJX{?+z3d`h^6AtT4;^l8rvCXepLtdr*j1#Fk<lkw_eR*pqZv$m
zJvR39j8gb4k?1_nfQ?l=*b5zgI(eFyKsCqC4lE^q43~Nt{mheYVOIw3L!V#etZixm
z+k02A9XzuO;vv>Msx;+YJco8?_`ow5;w=wz=FR&^tBb(h<&(lpZ#OB6!SVfZmK51a
zox(?LKdRUDpuX=5xY)<AD7lyYfZ>}EiXZg1Z|%9OG`R&lgei57u!Zc7u~{a`WJ#;N
z+=3wNc1L=t7_!k_5_9Gy=f=eSs!@$e-}PdBOHl3`ezBR`o%>NmRBpX@HXiI#9X;5t
zK4ZF}`OQtkp?Cmmlb#F?3?7;SYOENL$|SGeb^XQuN7p|dl%aTD5Pxk`2lN$y{YEK^
zz~^fZ56SvO@}8FCk9LQDzffUX43UjxH`0o8iPD5J$1@yK3H>BEOji+8i&LS<10sMn
z80EI-9luHW*SVRWG>>l|n3@Z+4hk~)h;cwlJAr##Vg+t2i%id84Vv!yZu%1l`yBD(
zD-^fyG~d1u+)ROvt}#0H0C9TK<*p|R^qpO)w}C(-ZS5I8xu~^BFLbWAE`BNB*<=Wb
z#R6r4<YfujJ%&|Ucve;{<K}Jx!wYPF(hxbakVW32U~F*Saz(aU9{L!jM6C)$wYIW~
zFS_b2ze;?Lf{Pcx6Uu^C@ZQ?$6;FGhz^v-l3mkGHJeEBiJaXtBy>#Ny$nBJml}~})
zMY^r0x0ooUHf{zQmZ8_u+=J1H*r#Eol)evMMn+zo?3H_mzX{zRI0Ku*?jayNi+#m|
z?>KlJTr+<hJ`<2wR>HG)+0hqXv#z}8mw0Yt2aC5g^qvre+x23HxE7L}bL<Rf?cibe
z79Fbcw+<(3ho%TjD4M5(Gu+d`6mIPs)>C1dcW>1x{!?|ieIYDfLb_z|+(B{oajC_X
zxtMBgE$YE=$$q{{mzb(c{+6d{I=<s%1kvj3?N6HGCeK?KL=k#2`Nv6!?e5O|``o>`
zR#MheoNxl*e5UIkP5TG352xR(hk7=s@e1GcwzoR!fQd7~wL0J5Y)q&_Q#!$K+sBi@
z2!N8>iqIG(P`@WUftRIzsWxHS6V8Wqpl=@~m{}7%_({9mT@iG(lM$f;O|Deof(M6&
zp#p~~1>-6~uX7SNr!uD&TFr+Pp_1n%ysg(m9vhVn;fX%Fn;lhYS96mcSArrU%^=<H
zFwu~d!1ip5dN&wk7I;q}rxqA6WZb=F4VETUp499LOlZVe!DhNs>J%Mjlf&vZZ)OI%
z7@&ETRs{@(`3EgTSBB69;;xP+H2OvJLj2+JGW_9>Y}259A^5NN&IkF?(6KU87~^w3
zxE(09d@7NT<-prv;ju*m5!d92bBoXW;i#7b?gtgT`Y(qDlMRM>y?e`2NPxLGzIno$
zFL40S5hS}iNKZP{+dHlf4T(!a_+R1zA9A=VLX8<R&S<>$8;{rMQoxDZ(SM8o`Ikn&
zv^rG+ej#p(l2npbgc_=yjCFKi)F^{gnv_A&jmF2-yB)vYg$nf2G|!u&!V2W9VbYI4
zk-u!}+Sy?W%Vt*u0S#Jly%mI4Q?A%fITe!F{R#7bq!1_kjr^zrK0f=zpLPfsyfe*j
z^CS~QKHV3h&j6-UBz|?oLr(#me1s4`IxM7x0}gCw_2O)FjQhMTH|Wl``0NHuc4qpm
zw*4vQ*V~2w?oA@v2e&3i%es%Sb&*LT-!kBKKVIe3tupU5QCAeArN!COvSKT(CwOed
zk?=fC<@=R>djv{*2o~P&#>?vdHcU;v&x0bMjeKhQMK<y`y!#?>if%=y^0z--V(xDo
zQ?^Hb<EWMb4#uuc2L~P+0V69A5d7j0=qYA(a-XcZ4R#>i{qN_a4BA(@n032a*DQDf
zDy0yZ*i-w{T+M&mp^W!WkK=^9Z;upPw}vGWM?eHyUjao@{{(39Up!PXL?+Ii9A717
ztb^O@v&~Nb=@{yYK{+uBU^6nZJNqSkBF>zhjW{Z!<_A7oUQPlxr-NrRk7{<$`ipZ^
z?4clx)&+uByOtu5pB^ys@vnb@@?Y)pqbZ(+zv-GAE<>UGg+rfSnnEIp11@F#N96cl
z&7}3t$hoSW#jcqR)b;sslW*jTS)2<nSO&pQrTvaqd9w#<Rj)7^Vs!>BZ{9CB?y!WV
zzH)c>P&Xt6#DD!-5xV%7DI@J{A5E=GyB(a1o}y+Fc~}-J!BU0t&!#v<{zmD|zc{e%
zPQm+GH!>S06WIP(wzhg8bQA>trrA&dPNm)KbiLf~bL5Uj$5;8_{ySd%D>BJge=*cj
z05jTfEn=W=cMr;9G-#t@d64n=w{#$Siks-fneUgC)tkL`x?S@ur{W~UzkJ0D1(=o+
za83rmIb|Sj@pRUgUk5T&h~@41GE~Yw^`V}1;r?IB{SoT6jr4LxEB@WUUT@1<qG$zw
z)Yi&har->NK$tI*HZ<qyEa6DXuKoSkw#$C{EHZ)!GCD>xd8$WSF^?Rs1-q@T^d3fE
znjX)(!J+O3Ae99{(#S&h^e}CW6mWzm#M^M!yxFr6>dS6>eR=MC525Wk8B9;Hbn5AS
zdR_@|=|&M+fnR?8cB$;jA1byhS$+_US{$)ivkQalbZy+N_JKS0jj7*Mp3J#n@kS^@
zZTT*FYmO%(2W_W1|6BpBgWNXlcyeal=nYk^VXVNJ?~&g{{WbzrZU)0E^i~7nVRe4K
z^1oNV4qqtoc`#GvQmAVNGpHIZFG??Purq98%lKn8RTi;XBy9N+_Zu)yHT*fgBM>&R
zx7zL;GG8D`5I(DN@k_jl-%|cs{W|=>;#9rIJza8;8vigQ(no0@<o3%(f2{stDhvc*
zKmqDq$l~a19|)WB`QrGuc7N#lOG*CXOo`fEG8>{~@RZ$&t<7c;Y3eU2f3JSoZD__y
zss<RYnlsk%5&^++Z?^s6TMnSSsL_qQ5sKWAP4FkQOLOZrWYI0-yX4P|l?osbv~;Iy
z>TjzK{%!egfbaW4At}=Ii}A!FcZyW~R(9r2iC^~g7><z9Xd3EZW>SJuA|M=zqvB&D
zAm}(vr-Ap`fBs^{A4dIU|6hg|(Gm-?n-ZL0;erFjnydjm{@Jvr;a`AW1I~>t^b64N
zuNxh<zAJTi46(T5-5(PDwtK}Tqgm5Xwna>s$GWtNy0FT^Kbl@R&iND1Lm<I>jkMq4
zsYS*I@C4&GPkx*9w?w~Mi2j>@%<OMcX8%*u8N`1vUku=1Al4n6KWAzJNzq;b#W!1i
z#<pO-U;c)viF?vN*3<w0mmZ&Cp8>iX{igdb=>M?nFY<Z*iyJ{k$0vr=`Tt?UbLd~J
zyaHH9>3Wxx{Rl$_!=`S2v;8;geuMhkLrnknqP~cXeD{pTe>B~+DNXaqz!F9nPrnGT
z?jkGju7^<~V5bZ{Blh{_%71wMFNuCR2*n>xbn_QD?|sJ-@8O->VE+c^r5Q{_@PA>Y
z2i9ZNeIfkvLLRuo9n5TZk=eMj^S`mu@oznP#s4!an_WgiZGdN;r1Qc&06J2-zWU{x
zZ2ygwzJCV;6|Db-mFa<h`6iJ4F0wFw`)0*|LG$Y6H9@7p=8LMH4o~Ni|BU7#*<Zf7
z07O|-gWtXx@!!x~`a6`w2L5L>JvX!d^39FzGV=LnXh`BXC+nFf=6?6fEegZ4j1puF
z{~uunVx_j>2@jAo{>d;A_P@~dxzmIEH=3_pU^T5rbAJVwKZy4$+@0(EWuB(*IR$q)
z;~xw=fQ<f$<~e}o|A&#bEfeSfgmHv^c`8Jj#G>=Q9mRxQ=8Wp`Y$SPjvOmk7jK`v{
zGBW?;izHMaXPNxX{@+>qUz2y!-@((C?-xz~xzF6+$vYhg3rKpulXpb&U#aPzdG6p}
za>&dtZ~f;!4pn~v;em*NfMxZ|t<+g8{tgfSNcc^E^S{aB<1c{!xzC`?UttI6c?x6&
z-Gv>+in~t1A5rq3odSoy!$3;pF9-PNK74=opBRAf;zvZP1nsVixI23i_;%J7CgKIn
z9NwHN=sxY9>MuVW-i(kO{J8dKH{j~kCYDytgH?`qzORCzQV{_}=+eymrC&7h%r}^b
z6R2ni(2z`U_w}lKZMd_Tokj`ok2yGG*+#s|?V=ZWiV4~j9!ByVS-^ga#O{)21F<7d
z>z-_f?68qvYs%FQ5%Y!(y2AFcVsFI7NbC|@$!VJnKVU8j!-c-6YqLg|1|Cmn_M7f5
zgKDvc)IBQncygwN7$_=qm7xt^PbSL~ixsXhHyIcfo76UHwch+#iAb@yduf%B;~y%0
zuXr~rpOKLX{>V3$>H<r?d%3lk>?eU4Nc2f>UDE=$!_J@1oJtQm9R1OZ+qdu2LIS_q
zH4wTies7n~ZiAP)((zI*s;@q8t#_iMDS1E`-q`-%q$5#=viJIsoIjcQ;Q?C^7z(?M
zu>2)fryeJ~g(1~^e$~v{1m^wHU_ocGfA;HW`HykfLML;^Cgj(j+kSWr@U=2UT-Aq{
z`1pq<n;7f*oZP^x3QMbm5Z_U3hHZ5t)^by<Q~1&?1+%WtY_FB&CF*8Tal$X-ep;_^
zdrk3D`%Mtp%PICPl%V-n;vNN$$^gOYdUv@F48J??>}v5{={k(Mr2lQ>zm$)DT00a5
z+=a-W<%AzFWcJl-_YN0@X&ahN=WGi7vmI1QRl9WH5iQF|K<xPDdA8Nb*r4sOx(_u(
zB2zO$0xAYIzwrAF=(Ny9sL5^`SQ%vdi4OnJ_VT6q#5_mAx^E`zq27tUB>?tsLH;-l
zK%XYO)CDF8FDXh;gf?dE0vswR)1&xX5Ib@&sR}n`5JLv<dWUV9>3?amFKkM`+x2xu
z<1Z@yaheLZBiznOLEsUl$vFJ-hN$g-6X9ymhaCSN!q;b*tHX^rm6t7eh~*AboeV%X
z0nS1}9N}?Qa@WKEx1f#xt*i=66dlpo<gImgt?+Lhm@u0uT~I`KCu~6!xIX#!-s%O=
z;bzI`Ts46^WPSrKOa3j<cmWqCM8hyA5`()t9xOn=5P=BjtQd!P0?8ri6@0nEk*`!q
zO7)SbfIL1$3L62nYb^GC{(u)!9CRAZ;h{vf6I^+OZ~e2Mzs?IFRj*2Z&qiyeTDe_@
z)7VdWRJXlmG199k^LEjYZurZ5hqTe6=aA<Hk867XG6oTg&S&HHfLoLhEvQ-(V;QX3
zhFG1Fpw0;5Jha9fPq!&eza~@TwG(L3M61=SZNvOMaDBGN<@xfMRnrBrhkb~b`5MWF
zS%WpY{h|yTx1J*ZzG)xl;BkgMdj#_q4yVH;eK0>s{FdStiGhuGSLuX|ZshBX_{61Y
zIfDfC<nO4_2v|Yk^>k&r@Qnc%X3dse7k5LD?p)7suhY(}tk>BF?|pOaiR&w2>tY^$
zM_Te^Ss(FEt}OWpUNj$sbu}3xPsCfAq>fdnO`TMXBL9p<!u62apk(|O&i#xI)2y#O
z4BIu@OlDgPO)7K$)TfHfZ)pK<a9k8;f<07#`tt=Z!X@_VaoKZ$p+;NbcRLl3ZgxzU
z%K7S!_0)SK3$aO+&jmPgI$w|D4ayaH(`@_LF9uE9UGfDA8fn^yh<d4_*G1w~cvs0p
z<wAQuNin->erbAF;Uvm^U?p(vFWqbP*5wm)aymMqSWp|nX4aQ|Db8rxsE0+UzGxbM
z)Q;>#Jy_3r4fhu;)0c~{ZpmD5F){SoLuiL1bMwxFA6cE=yOf0)&#}FB@ATyL&%%FJ
z&AO@5hG{fajra|o-9*jV`Q(Be`4gSHV5DD6M+u_p{dq!5sWMIfaRCYWB)NeFsXdY8
zu_%iv1N(4twzpXlKxtbNbyOkv9J!OEF*hRNJxXb_h_?<K26M*j2zgY~+ylMN;}4pi
z+<$V!<HJw?YCM$atCE8Ly*Dyr*JDs`$}d<i&N_%f+R`sxhrH#N37u}yrIA%zG}Fp?
z_=#4(pMvzva^95cf${r(tGCBRQ1)lJR7`oy!$FEaFwwuh+qMzbP=xTy*KY*~kObi<
z`Kidz)W2t9^kzIKOCnxGzxaj|piaY^27Xlk&{e37&Z!)Lz7YoT{(05^q3JP))3U;X
zU)?_A+&I1ZTj7j22XS4KozHrBj$%?GxDjASsx;s?%vbvO5MjTNvQQ`~4n4Es$fO4B
zRv@4qBSuf#k)x6<TXZ8AJZHHT_7EyMnjyzb{oPeD!6<Cc?PzKqM@QBhZ>AKFK&m#W
zns<upa29d!eY?M0vE*-(`+LPJe@#u%2IiJIoJ%33(Zl55|16It!nvNbQToGGS$gr?
zN`+1i(FKz(yw85noJPuS$xcWX(~*mL6j3vyj#nYs^y|&LCIL6oZK3&Lhx=FY9uF2$
zdQdZExBFb4s6*585w*GVi}zn}t}mF?lrq*+DPI=S#LObJiY%eA<vxFI)F*<v(J7?Q
z7``Vh)d>+~17VUqYo%8s`I6vi-_!=ou|Wq-Od_vm-Bf-i?(>=sVpxyNDw3i_XMy1m
zDxJ~38f*v;55H0Ssmo{r3zy4GwY@BkrGM_T?6#%&aPU;M|570^xI;f6V|tmuu)91n
z|G@5If-XPjk?@nxToG-oTb;L9v`>!9hBKPpj;sEBtQRbp?2sYyFks=rjTFX1T6NrH
z#W`R7RV}`Fgd4DmJh7XSiSm)GNeJh;P;dD8GrV^=6<$@_WSr365-AJ~2)y9VA}`Se
z6Ox3k_hddBK{PLaT%1y5Cw>AEbJ49}iF2AZ&fYnU(xiDX25h#<5iD~AX~F&FFw-I)
z8`7f2PdOb|4xnVj6ex{v6X}TXREaI;-2H8t7vEdSXQH^S&C3t&NIWyhOu4td-GTX>
zYdxvWVxc8MdA-IE-ttWHLqV##tW@xo8j?$=XL?5*C?t{EP|B!vILNgVB{SIyW2LA^
zWICRfn3)aD>p6!#!tf?X4a|J$bVfb1^McEZW1!ddcu#j-iju%XjX49CH9u8L$8uSG
zIL<U>Dp>Q{=iy=~;Td|wM!4nM(EU_iXWD%wcRGvL3CtNYd1sdT*eCgXHqaqEfDTgu
z31<bPJ$X!k%gArN7)mSSlkAL3G*r{asY6f2!~gEK2qIRaD`j^dPKS(ACmS-n+gz_d
z;#g+zev{hfQfp5vPzxhmdBLOoN^%bvmT4%@^KhC#DcR~VrnXcSs;5Z|p}ZDUD7~Y}
z&oe4iKV~9)laSW>+^qM6x{sQes;II10#ea-RT-6X#v^{^DqVF?+W=>pc_}6dR5Gju
z)mP;q@$fz)V7JAXH16kM7$2;%{84!WL7rC8f_^yfzr()0#!tyVgRz_EknNomx7ky^
zr!x9+ncC2t76%$BmZg#y8f(W&X86D;v*C7hSMW87w#xiP>>cuTY7q{fzLWLDkraGR
z7Btuo4rf$hXAIhAS`Mhk6;fn6^7%t~r{PFQ1(NOMN&Lurmm6#(<oa9{+0VN+K%lhm
z6!!BTg$9ueIwH25AWhWgwIbwDrCh<`=<~!%=#!mHz<9cKDGN*iCt0&f7_R=~i?`k5
z^^dvVlQ6(5P}!xb%A+TBLSk-|CNMjZ67L^KDwsv|zWT-h<@z92Z~HQ?QPKI1^LnOv
ziiNjChj4)oO8t|8Uy5_O;cg{6D;_&!nRU^`O&iCz`~=1dt)tP0NaOyi>U%|%q8j(V
zZqD3MKlQVoCkI;bdV(2$z;z#~x=)P|8bkNP<Z_S)SDrG%6>ghDVbb?C`sgUhpF4SJ
zp%}2P3vCf=Y&48u2v%+%|JhJhUJOh4mqfOYD;#GMPbBQ$J)uGhprV$FsVz+qfA`2f
z9&enAfWX2^-;LkQRF5CM@%?-0Rr;o9ueE*KmiV-qKXrT`8c0<#r1*Np_qFZWP@3mb
z!=Y9IbNOlB{J60p=w4Irleec@Ll^8w2^gL@4VnhgrQ&sjhr};PFx^<j>fs0i4nM>5
z@Ic`WC`4Oi^%OTDe2jkOV9~Q+%Yu*5J?-JBZTFTZL$@3rDcD-!u_C$979#Mf$<~I6
zs-jFkHXM8K7`=M7Cjy)BOpuwjyavxE=p}0V15TG*hfgcHBj=+f;rFn>6n?xEtOH8O
zC0{)Wm?>hv4(?K`aK7}D{%F}Lz5Xp6wI0jwjGzKX)nLXoio-ECUEs}k1Q2msf~#F&
zrW4x(3TlTnRB`W3<gFi}I#r9V8T83`-VYBx{(O+dp-~Tdg}(d-;W9QiuzoPA(ERh4
z!Eq{0wM(^wNMA!T1Y3d+=ze=I1t069eD6_Flfx1iKbd}9{Ssd0PZxgA!g?OOILY6a
z_T%;D*G0P*1EQZL2#qsM9)szTj|hh)lTE^{c4?eH!+tCkY}Yj+F(|~z#+dZI&e6$A
zl3h-kP<`u(7qp(W>p1#NZdn#?O~`{?#Z$9N{duiM>x%~hID_6IYC?&35AoS6`4Vua
zOQrH`+mi^Qjg75dp4TcSou<z95bt@c9Fm@~t?ewiaxLJuq?}>FEIAe_4Q;-3ngLR4
zhNujvtuNJX(LFNxuHZ|Oa|*w@(^Hth1^b;ze)!pz+i4y8MLP<q#43iVIfmtqYnm6n
zhQzcOcIJ%xY~^CX*uq1tv>G6ff2W{_8`3J6;pf>MMRm?R^_X@D!?AGoj5O;vb`NJu
z!HI2*=v-}lk)&4)L1g#nazuEH7HEdI9~3_9)(9hL(*{{yv?I@N({LHWEAz?;#t<#k
zRVEaU!rcABR!9kaVz}J>9>TBRqlPy~YE$~NhT8wEaD5L_5=)rUL*K+cH=vp}+mKt(
z_`3SFI}O9vKa>cY(X6kV{Fg}@#E-q9FiydFD1}ft+iQ}?f=0{wnA*_Ugm|xb>!_g6
zoxXgRw-B2Bj?pTb%2$h$+FX1S!}UrCDsXM$#|e8>g@+*a7HmCxff?6L>ko`8mgjfY
zum`N+$~fEG_qC1(C$aY{^M0lm`tkwSF^^n)TuZg*Y9?{C=m><3Zv_G0aoy@QNfOYB
z+c6ZLs4Ls9;I*oQee+{N-|&|l3&IkT;^UAH5IRaG@{`8Xd6o<q1WNBr;1coDTPmE8
zU3`mx?}0O#S~+PUqHJzB>aw7FM}87Y_$(U8eY8fcswarGyoT#j+u;CNQR<EWF~7b4
zQJ6G_i^~KH*h;k$P<;UU<+Yh_fC7!>Y&TXIVKf~VMvRlDmPRCmW>-oR&HO_}wpW)D
zmEgm<HtiYl7fTS)SpqnQj*3SHTXH4hPpS{f+IiVu)*tHL-{8g3j-rgPAnoD)7e%W_
z#N}3m_4(Lc>oLuyaK_5+$-bd@8R)~ji1>n`9rsA8@r4wP=#K!;di*#ic-Vd7@WuQ6
z+Rz~mdqjd$TtOZ9SYikYdPkkV&DY12;kV>;0|>=Ls&B;IRVSs52PMTM84=e2>dRt7
z?sy-x*RC+-nej`*6DJQGL>3A~ZovT%>1V^d;hvemR)Rj;FQQ2b*Y_>cgF<0_m1Ods
zG>1}dKhbpnMSZ${$e;Qk?p6k7wH(j(Jbbh3*?8>Q8)qp%0?eSR`It#RgNm^P1vXnx
z-hy~u`kUXg&G6v}Vyzuv?&A~mdrc&}P9*hK`z%_856HG4b`LL0!E4xhrl>t*Nk#0f
zA4Ay?b6lw4g&|E7sjt`xy;Y*h!L8VfGEL}qyEJMgVEm=T)`u<6zYEDTkjI7Q>(665
zCgp}I5D#mqP0>Qi{ESl2K;K6QaYdS;X-Y$#pQ<Nm3J7USH3Gg4#54*zp9Ok>W-a^~
z^^D3M-t7C`*`Y%pM(9l+l3a(i$LwV)hg^<kWiEJ7LYUPlA?0MQ8<pD};{oew&@ZD+
zwAbdlqc`XbhJ-j_hAIx(RK*qv2j3?ePv^f;4C@j2B0l4)Ny?<ZIQpGLnhZ&d7iUV!
zDXZZ7BotooT+e8>xnelt2|0m|&AVGJj*egz;6gnRc;?|DbeO;dd275L{fLFDI?UnZ
zeuF(->q0A`9*REQaKdpeVhe1rY5EZ~dsTum#-ua#goD|n2cvXAC?;D%Gbl9tsEUAV
z-8gdM^8psO1!bsdGaAFkkT!hOpx*YcgR5&c6;Db}dx{VX0>33C)3~{;c_Fx?^4mvf
zjVezF5#4!uXvOuTHaLbN7WeRj5mHdiwfIL1_ou_h!V`%3v72)YEK&s83F~ObGGk#h
z1<?+b{<Y?&lGpE;sU17V38Mi%K##ZR85f@%#T`K5md`t)<sS6r7x%qb`TjOmV&Sl@
z8U>a!&S3|whODXpS7z0orkTX3y;Vi{%cu&Ga3hqb(I4alDQ$DtD|ol+VM@aFsSmC$
zND9a5b%Tx{-BhtWu0@F};Rruiw!<4W!*8#<SeI~9yJrM1L~s<F)dH=_-I?X1Xo{+!
z3-zGbJ=QlX>CwtRo!>eHhtfB`UW&oXP-}fT>+eg^+IOllCb8TQohHqBZx;Vm_~Mrz
zEP!!(FSevquzVYza=Dl}Fl9v=v)PEE)=8Cu2lf!@h=5FKj;4}CX0b@!f!S)6#VOv0
z<4f#_ZbHlc!>S6%lDY{%VvuKuN@LB!o3hjTMU#RH)E;GW*WA0Lz}m6&Y1%GFFlGlt
z&Sie#SvI!)NkF3>nh7V>(+0V^;b@~ItakOc@~VVr$Zsn&6tqLFKBl9auZ<3H2(Gqb
zj!>ipy%=WtEb*H$6s(9o5Q{Umy`+k&wKhg0u@l_PZR(N5q#sr2)gQ!goS$<qT|JAQ
z0D@s2{rlo67q*nt$6@cZq0CtgZX^j+itLKW&paw__S$hV#zk<v(q<mST{C%hijNa%
zbW$cmJWo5>Mux1O97*CwvDVWHA$=!JB)xY;Ra%3n_w4wlvSsH>vgJn4yvDO{LAOP2
zI^UOXXe(!5Pg!71`E_~zGFPbHUF__Ax$N%Qb+#fJBh(QtOPAFO)^%Sl`~0sKv%@zm
z(qu;<R}i}MEw+(VK{n6V!|Qien|W4nW801TX?tk=iMk<}z<@5btWPqie-|<6GWrAR
ztpuvm_`|H*>!TSY_ob8q$xpc_zxr5d_u3~{yzwtO)g(ju+n`9@8MrYechqWtjlK&+
zo6CW#+f$pcD?EwiqfR42clVZB<H|*8fsIO#;(AcZ(Zs;RWv6n|93TM_d{?1pWpbZa
zs5?^W=+cDba8we@9*1rX$6z6DK7RCqHpgCN!a2dLMoVHLOzbv1mBvXOy<3#}dyN&E
z8;+A_Urta%5f-O^GMR%RUkI9S;zUdd_;3nmbl&_)4z!X5xY(ByRz+rjNV&4F-Fehi
zDJgLky)c^THfR@H>Y~(jb32A{beDM}6v&$pAaA-jWzP|cbg(W^=Qy2Bf<)BJ3)%NG
zh>od~uf=q1wwy@h)XEg^GDto%g_8Z|o1iQ8v4EmUoAQ^B-PgY;)g{g?0wso<r*~%=
zax<`NMWh2xn+-i27sj5iRof4%_lRGtWVWb%@hO^m**x&#OQ&*n;+!ruFSIITLW;w%
zRpSDZd4dCeh24-#Oi_o?{AJLRTE3L9;)k<<%lbkqw=69GalT<=sc~lKjm~cCd1J99
zi(XPH6Jtbd&-&|QWl=ZJ&SaJ%v&ph9?#opLOA#}j(86+-e%y`7Tzta!f*3E0m`F%X
zQ=Me#R28@H@+oZ9P|d+KMJ>HRA%Emuvm9+|*>eeLHpkX^PsX&o$a?0c5pU5nFWU^Z
zq7PdNO$EJBofKC0h&dM)Ue^Xp=xycH;6mDm{O-xD-Ed2eN0=yq?Z$VjDj3OM+kx#R
z;6FbPY(Mh-`ak|{{o~vBoKua}Tdk%Bb!=NpJ$vjFLos-Zb!FTq*b{aChqku>itE|>
zMF$9wph1JXOK=G8t|7rC1Shx;?lQP*fP_FuAO!cp2MZ3teS$-P!JRvhobS~+_1^v8
z@7DLGsHt6>p6*`Sd;NO#+5t%qz1|57JPZ;ZV7YaHj-wUmc)ZjQaR;wk-2`akV_xAY
zm%3|{f1zxCKZ8#AS+`bOlDA|%Fl?<)z6!!jx=vqlB+<g8n2=v7m8NS1hcnn{VklBO
zInh1k?vhB$n1{X%ry&lG&Zb(pM!lZ%KC{lBdNchIfAxrjtR+5vxYY5*{(20qx{S<c
zzHcDJvDL#RhD8*wP;qCQqNbjV_EuG}LUi3z9GpxZB@y_yrE7i+hHVb~HlfdX@}q{Z
zwyJUIf#p1U3XeYW$jHcnXY{#|kt)iM?9y;sm<T7&VCkSbDF>)}S|1joeM=9<tr7HN
z55J6z%zE_wsQ#urVW3uJk#xW7QRWMrpxpp<Fslm^4GoQ}EQpbYrd1sr=gFG`v<fp_
zhjPi)DtDa~oxcQk3n3;pzpJ?3_4jVk_pOR<yCBg!0H3E$nGYSMIc>!fdVuQ=M2}8Y
z9)FH1*@*rX#$#AY-dUHzqTMVT3cg8VZZQmYUd}NQboUA<HQuF$<udSeT~Z?|FLJ?)
z6+I4Qh`uM8^`7JlKd8}IE}gM?ja*(c%9{=gs%Ns(I5R<xt&(qkV<K8aBM9iO^!jJX
zA=sa)CONxpCSX$SaW5OTyXh7y69(z#BCvu$3KN!8H~1|EgyIPeCU)N^1W6c;fAFXC
z!~mE^8mfAU0!6C+Yrsq{Sh%@Q%`D9l(&kukO5-YD2Mbx-GuukNW{1Qj!0SG_!dd$4
zh&I;r-OuBBNjA~H-7W!IlY|Jq^gaieFCH0Ajun3FlKV-QD}CjGzM1I2mrT!bK&5IK
zQS$vRlM}R2KB<I|X=3=hf?o-YF9`G>XFOtn*=2_;A_meCM?^;UioAq^B75yXb;^Xa
z0PbpW8l^>q^EQNzMz<B=&o_jTwV9~3ee*Pd?$I}tchLe~-!0;+&DwAqf~-~v2fEFY
zfdk?w<<hQtiDWrM-6ECs0F&r%84$BO<iHyeR&>i~MKDkHf{Aj@JTh8=JH{{4i^p(z
zAIaRyyNcd8Jp}nYA8YC$bROk^*(<c%V12piQ_bUL-Ub{yH{8c5y~%la*s3jvxzp7R
zSOwOyPFel3%dWZtJ0VQI_KlCh^ptA$T%?lAe7}KCjYmW+WZ7Jgj58smV^OptmR)p<
zgmP-VNcXCWcdx1gX+XlJ9XWo5N`#!95+;A{<)<xJGvD_+%dZm?sakJoU@Ufs!gi~I
zk1hu~Bf+AA#2JpfnL!+CpQkV*rXFYL5qEPp+&;~4$_R2c!7Uq{Y#g<sWGW}@>@_<i
z`!BM1=#69W{7e=YY?daUJx#Mf4#M@*_C#*)MwH89gO3r{XR#^>HYbecKLbFN*-1V@
ze4Z?cF&a~wSfX;8GEh1SKKdNLn*nh>Hv!;pn(QVU`#*=XOzxg#JoJk+Give3B@q*I
zBz+Veh8}O<YgZmOx8rex5>X;r6~JG*Gz<D-d(5pOJTQQhbX5&MM&SDA_ukdPn@vy=
zj7gM8@@gHFbwJ<CPQ=j-N>`27k_LGyIY`OKnCCLV;NPDiA)Tb5qJkcM_Mxlr%rW&k
zlImy6P6To(Y&R)D%lIZm)?mUNFOR{tnGD}geU}Au9fW=8M9Qn9($IEO_e;4!!lU|U
zN??LYBpjSXuY3p|PT~yiaC3ZJ&LK*iYbxxM&hw{9#3!!=b;2FfeIqDPaUMPIEqOzq
z+cTTs_2GxZ__cA!@QOh>BgpErAd#`xGc&zLPKVx3MQF`*@JlXGx+QH?o1_<g3b2q$
zQWoxAK=<CoQW>m#i0*4e`053fIs$}kk7|{)0o^234Uu63T2EVY;O9^o`xRmfLy7^$
zu2MM3F1-RESuQJcr2uVL5q$gKlj$>{AK1_CzD)w2z|I3#D+LcxT4X}HG@uQ;l5oC!
zf8b6nB>K45@X(=Daf3M@Q^-+UB>hxAluZ|mQm$UhF}Z_T3s~**zhE*O6@wR?B`baX
z*5b8(bshjG+HFO7{JG=aD!j~)Ot$au-}5i**i6`<Nej_OZ)4T0SYW0|+`sMD_TTs!
zQ(uLpx}=?c+JY$m)D4GQso`HMjp!o7v=Yb=#F(b=PoP*x)BD1;yo(e`V8dI-l>=-d
z?4$PO=zH#KbcpGj!+@m6m;yZC93yf&pvwK6xX|+&x>7DG<m{LNOr*xNMIHy~SpLTN
z>wcGJnxxaFgE;}6l%6_6Yvo&Lj4BN*!7mfoO>rtp-%8@hB1=BywJo>dCW<l5tfjhr
z9FU#s9%9X9H?!jPqm6T5wQd0Qpwoz?ds=@sCbA@@*cx-mh2p*2C#}*`*P{cnrJKx{
ztgC>j)mvXjm*Jr?>x(7BYPDBc(G@o-*+u8R%QZ&vuV!Pl`Iad8SQjRK%B{#X#)6_H
zha4kv&fnGNx&~BwCg(z5Sve+T_ZX*+*;L2XimTb*kPBGO##HS2i9QA}MMIEG8q>DJ
zh%Y);8^sRt7yyNdK!D2Bo|hkSl?m+IBD_ruyv+|REA!m*FaBGP0QhoH1Wx<rVp2d`
z8N31=06+)d5|r3__~!b*{XQ7-C+~9A0Yw6K}7yc{%c%S!98U@cC_{YnqSMGL5
zoEV6Jm<Hu_vH7Me2^!gQD)WF9F8uqQN;p(1q=(#|T1NE(h=vHj$IeiKdEl3ccOiYi
zy#LpZswUXr;tTVlp4hu6R0$@EgDS^6wkNXAL_L(a=oC&i3f2_`T#xg=K1Oq`{Md^p
zVAn*2Jo;Kv`7RC0uP9w#{RBEmT3^OazosT1;S;AXmbYeSbq*sP@_ccZJ`8F427a65
zz&cgl%hu(@P*<IrqX8vg8O|t>vcTql)`c|&WJyZ}($E|-!tKR8>xB{+Ir#-L|Evz+
zhhdd$BG({YUCh_^5Oq0X-S@7Tdt}@c(omyJ2qIc3cOb2Qr|D3J7sfnMa)eK2q}H@=
zdFI|BwlNUCc!gB5m&08+H{6k{;K_r|+GIjidP6>f>J%7kDpe;}j9VzacQC9I&u2Ow
zj0M2!C%o*U2-zKh_|V03Jx#20+Rrp7pTQ(iD0-UaaYJVuB{@T=3-=nCaNqG6^a;Y_
ztdo7RSb?+){wYUKA-u_Y=|xS+89rf~5hiUFbIoWhxW`g4A<SPfyV&yu;v@Mc))<$A
z5{0q6VMACms$o|YCWrm*7?Bn3K`nD0`s{B7dfHKOiNFvEVXV&{ed}y6WY&!s<ZE4i
z9WwXpBD$`F1-BEAV;S4Wb7ZWSxX1ZA!p!u%0`}o4^|N136M{9K>h&s3XK>+XM)GTU
z(Gi9QNj`&BKL^fOKYub45=sV9b_iN#ccdyS9Xe7W<8hcD<L9d#-WW6GJzBjJiP$h`
z+9giG=z4)$ra(&TULV!EHSJt!rU-5o{&Ii-FY0EKbt+zjr#@b-c&HDWBBa5x$z6Zp
z(0gRp`jf%+Tz+jP6AV%2L}K1<oZzC%U8#(DxhP0*ZErsmPY=7@`_O@^%b5GwX7=vA
znuzK+Lmv=#ze6k_30RR`qfJ98X%c6Xe3X!78vndN6aFIG-40jFvR`6)gYOZ$r~LVx
z(Yn5zl~}$-(_G@OoXY7^Sc;SWF=KV$?)AjvhgwI~yWw^o$Nhc{&8efW!Fs=^FfjxN
zUjHtupl<@G3y#}oGR7-6vU&)287U-2d-PxPp$*gIYy}9w2!9(U0NXba2^Pn-N`%ed
zq$yi-tp7e7s+<|J3i^8DarO#B0?6ibN-akB{bjjwLd&erL8|U6N7sU=#i3%Zwbe7O
z5JuB}+lj$fzd}ka8&bb(FXf4gy`A!({z;%9+~vUMhrN|F0b^L)ltg9)SMrQ+V-N(m
z1*TIlSml9%fzGdY-J;})b|uUk{9gD5@~Mtj*U8_rDcM{EG_^KG1^doYAgiqeUvh`>
zMK*Yq-7L5*6sValQ37fC(T?BC2p!vfxwKBeVn*YZc%<`g6~FPd=xO2}Z;D<-9s=Z1
zKQO^uR;&X@7=UFiNQ6wi^zEe_)f_!}`+Dvr+fK^TSh&7sUy}M#3}Wx;s<Kw9vS{vY
z6R55~MG1Vc8SEnX{SaM*Jk6pZ^fbS#0gaNErU;AjXB^CqO_ss607TXlQnwIM*2^bo
zz?VzhE+|s}ITn?n4(EkVr_!4$>^AHEHJ8PJjxgWR;(h|5Lwc=OJ@{3>)WHWIq*&jp
zaXq1$U7hboB}$JI!tMEleW0S>>`e3B!=y~v7=zM8b)5J2?@_qg&C(GEK-q;<EV*0<
z^>?>0iJ5;4m3!&um)w?mniCZ=Y^-6<R6HkK9DGt_!QNw*E|r2{Zj|*Z<Ez1S0d85Q
zeoIz2>Lf-h;quXvtCN>nqB20N9HF6HB+#*Xn}?~+#)8X)cScBnrbtxBKg}m<$+P^;
z%D#)@ce7mT+FedmszFH7%WY@!Tz>SjRN%@e@|vcVR(O)N<h`!wyESf%hOm_6S2?QG
zWVBUP;Lo`2&Iv=vDVE!rNr^TO<zi>KSNFytAt-eK8TP5RPr?zW@(>R8bU(eI)MmY3
zGLbwNp;7f>-Ok}`eHLXL5G-WR73T)wTL?Nf3%J9N`a75-6L<j#D0IKTie{Lj#pW!w
zb5NJBePsuKHfbc4hkZo)W|Q@~SyBkbtC~#*=PrNp!Eq*WBlU29exxpec+XH+O}<N;
zAWB>>p2&GvvCSBF39d@#D?(E(2IJ1aq8@30TrA~^Fl;Kxil7ab{FZI0^aIPPzy*cX
z0iCt!veg~=FBaJ4wpYK)SNY>xYU$Aiq<1vv%e9srW>t6kpl1|D8qZ(qgxklZR~mNs
zjGDTVv{i_A3$HBC=tiwli|kLJ@>=|6QN5#2KRdi4e<q==`x0Pa*eP)^@AJb`5vFqz
zsto@FCfSkbnxkQhO|0L<`@Hx=ztrk<^e(z50tdyiC*R`bU8afSPlOh8j$qV8iu5%s
zUC)(&Bi>9M%qEMGtHmGT(CgfjiLgC?;MmKboG`l`Zg;V?C8ecJlJW68hOwO*h`uEi
zKF{8y@l!{H^$@mm&G4q=0MN!o%I#{Wz#7b>5q!7gBK_T?f^DjWJz#kgw#9A?5Z$A3
zQa!4{<q&E7V3?4?ar{kB@A*ozwe!N=5IXSP^=~-Ab`mV(Q3%dGQ6_B`Pl}C`0^d&H
zF3PO`q_SItj$)AqbZ9j=u2p`tOiKh!h2Xos2xD7#fvB|@BM$2bK`VPI7jnZ;7x)bO
zAie#|dM2@R;OMH3f_ng}KiVIIU?2Nq5I#`yQFf(lw{VAE1s?3zT~>GD^QKKqH5o{8
z(}7m;>_D|?<h{&bxTZ2Jk{9SjmvjbIN#bcHGcLaJ@K$^DYJ0;mBl+Y2St<TZgC=;N
zYN7Ip4XnE58_Q}WACoxj*)pl@)rKq550R#_6yV2E%+!7wBc!=%>VA6gGxob?691{s
zz>|9~@WCw;^>AQ0G2Q%Kyk?u|Z=o`TS2w<cS>2!5`9IRlL{xhV*Uhi{i1<lKI}nuC
zG{hg&=n(}#!%zu~smoV2*nN)?()bKhR6OL#Or)I<G}Af&`ryCT|Njtc#OWmBCf#C+
z7QNfnO5Nm}Lg6gkVuJeP`!$Xvl2X!sA^{%~(uJgppBV{HevnvQ`A~uW!p{-PqwPaP
zrA)#USN=7r?3C9~t&j7)JD1_IV?Q7O0+zp|p*M<1W^EMT>n^l3Pg`CR8rF?_y;Y9A
zi=axeJmUzX!O}@snGCjWE0@7&BN*JjO~u`*Ov%sYQ(}R&NOEN&d5i^{(_<_0kJRIF
zh<q7A)B5R(Gq#Cjclgo~f*|$@Ij@VS0qL)*%KouaxT<p4v0=)<HP!lX8}AH7U=_cW
z?ZQKDK0$pN&7Xre1?bt4skedh^tK8(ZH4Jh=+rM(%>BkvMjd%@D|v~6xyqbuOy}Y#
zZ<EkN$8zrmVk^b`B{#c;<MU?Gs?Y7Q66hJpf>Q%oC8=vx!W!;uG!FcRasqJEdFTDa
z#~L!JcSk6qtWn{SOC?@(dh*L@%uM`OfiEqNT->cJJJrErDRJ8knbJ*i>(^F`aevkG
zd@#TGXCkVuUoHI-G!ludC{ikm@O`52Q=zsj_|l$T@bGxReF|Z%LV0r@?%U~BYKh^{
zYyaHL1cK=WdY<ByB@IGN-eGo*kuU7&(XMwq1XR%w;2qk!Yq2vNC19j0A84V}SXDid
zA#;vE)$xbwY1pj?yB}5+zdxB$2AjC8xNyA#uZOXuj~K@&fIn7X&@Unj^Q*U<p{_lj
z5&ZB3|APK&%OoDxBeC%%651{?r{UL+uwDG2?jH(tgZD{U4|L}#|Ek}}ns2;-_=`5L
zEODW>FU|yGVz%#4rBHRpc~yOq-JmvE{EFSV!0;VZNRka)OkB$4k?ojXx$FwaM{dWM
z4Or^cJ^d??rhHkvAkW~QgiCO(CZKqr1ih(`b%I{B^Nq?=F+pw1)ww79LAb9D=FdSJ
z;-XL&(l!0g{ZID2sYcD(&@lODDbaTHE0EXl71mS+iWzIc1hZscsYgM~HVjE2P>XvL
zX#e@u;a3X}*z2jKZ1+lRyOHpoV?1@X4p%kS@HKr(Y*VRT_pzvdNyy9NWxngZ0}(5&
z7pk)K|B0Z6A(v)JGaa-Hu!YOWG5OEa*EAls>n{7(vO;MA^XXOSFHEn-9z+VeWY13y
zy>)KN02&+qm=%b0h)4&Xma&vjVON1+Y!~(-yt0g&T*yA{;*sH^Db=jG{oc3N_(uFe
zW+!t+O|w+6c_Y9mp9i3Gg*woz9T`yK7OSHK9@&&yJJjRE)O!B0nJ5wXvEmElRnjV>
z!pc%gtu=_qM3<1XIK}bcr#u{_h&Ooh(t`3_>p6)*hq_2@T(cs7JyuzP2jKme`rAzV
zzkafOSC>^H9P?KfQVj1L7r;5)^%K^Cm2$6{7wxy<cC1H#gqZzTh<QMTaT~`hK5gv|
zfmJ_kocV+@L;l|?(wq@51<dU<-IBM^SS><v_mf0L_{6^;b<&iXhmA)4P^qR)W&Ku=
zY}Bs$h)~7*d)-V*9&M_Dx-hdiEX#gWv1Px)1F#%U^U|(KrQ|ag?B3#xz8H>J_Hm@I
z+!HEo_5=>UbUA9EB(qDiBp97=)&!%(bD7ulp$<jZo(6@H2=9=RCm!N)ysQ|=->MYC
zY%8#J)F!_z9vVHI#&tY~KPJWSubI+_8n^le{!*R9%{Ge8+3ZE~g_e)*3v9lZ_}w;w
zFv+VrzQ~jV%=)aF6hIAgN`jXp@`yOVB#tdpcj)b9U`zTcB*#b*M-uFIwf|HNeD=QQ
zm9y0dErabrJ+Ic9out>4r{d^~j)aA4js_mGN;DJd6;bwv9Y$l?tgS?CQ#rp#G{vG9
z=v4YEd$#n()^HP?QEKP7H$Ww@3EK*!q$-v=`x<!+!3!y~Sp=!P14Sy|puyiC!7o$s
ziajaSyImc;>iAiCk{{#ZSe>wMb|2`(Fz;!m3?2<ZD=ps)*iCxc#6h{d=^k6)L+-T#
z2-wvvn9!3cM{56f;<#aQ(eDFz53J^MhL=(EuY`dP-Of_tndX#BZ|k-3<KuKb0k@Ai
z(YLaQQ-iK5!-KaC^*TPCf4_s$Hxc<jZ-8!h)5c#aLTl>7sL|ujWA=Ni0*frM&oC6U
zS~aRRoSFP((yD;fmDyn_CJ4V&z@tCW?t;(vxcpa5Y<~XRR9aJkc;>k_7zR!`^dfbt
zc%N!!f~<Phs1YThemdV6hTe$6x*hTeLJeIpmtL-EGL<xZN6S*vkdTDNdVl!R)tv%N
z?h$@)nFfV_BC0ovU<ywhm;Ue|EAH#4Q4jJgfvZBWGG2ffY>aKWH4kXZMg_5U!XAdm
zqIDkP-*{gzZ5^Nl2*GHWUv}Ivz`#h~ZrXFnp}bE~-V)_Ls!&9dq)JHLsUR5PRqfK|
z{fPLe810ojd+L1PoD;uhf30ej<hAgXh)xEVqgY@|5>NtO{9y5_;`-9Mc9dJsu!ndl
z?^8(n=SC5jL)uYq9&p0al`<eUyU5zhTAx5PFBZ$v)7cAZpWZ~I0wt#)3S9i#1w1R|
zu3lf=Kg}_I^whAmn;A&~?A#o;S6sGb+E|ir@4oKVsR;J7hahcoflyQE6rlv{^3YTU
z9Vo(xEiN`wpS>0CK2&2z1)9UZtpY`Oii_Q=3?-+9{a}Ovn8NRm`zc7@AFnE)zCRc~
z1}~fs0NC6EF@d}f=nDzF#yo%u^Zw99!vA~ogZ_U9TiV0#`?F($<2Y^q^Oul+AI$#$
zr(ymzeVir_69m9x*_Un+r<AqUK{BgC5&A0)zHpvAu+98|RTvNKdEg-T1J@on)%(Cq
zVqEM%f$qxN601Cb1Qg+b0N$4Iz7gPovuaR;a>1XE0s(kvZ458~Ra+?hY|g*;8rb*m
zH2?R|tbc^N{-eQvFc{(g5H$ObUj7fUw*MUL|E`yRhV&l#g)H7w^JDfnliiuV6Oe4i
z;AaZ;BV^$w>NS6C^R@9KzH{e|bMOFnd^U!<>q!3>VMR|OYSfv+I*I9!X{2dM?T_hW
z`b@`@sBg302nqlGj8`?dchs1|Z<FLW-k%+w66uX{U&<c-`C`Qhd${=T-+HeDuP;(l
zJknZWzt@*~*xy$5`Rfy$`_GL9R95uR6U&xf9OiaY63T`<Dip6LdQ~d_$#bp4CXx2#
z1v@G6MSwNV@ACqe%IxGhzX0w7+dz;!d;?JCxDMp`p^i7JP)Tp7`18)hks^5KV(95+
zT{%kt4a$6aG+|GoZyEg}{%#x@sf46j?Gy7%;QbF}Rv3+&ji*H4u(-&{HXCD!!tn9J
z$-F?mGPg1)7h(<8&TVMZ&1pc|wiq(!gw$kq#3L2>+Y4xD@<;H>m}w&3L}MD2r50v)
zdN;EeY4y9PfzB{=6KZ)sukjja`yATdxU4{dRQB*5G&f%mD7OW_H}%uW_%rlI0<dLM
zpbXZ?nEPF4?a@XmaO$Yn!>~xu)f;M5tg^iYnF}IMmB1BPb^AM}tD;EZX5p-bstBn4
zd=yX%uNy)Z9whptKEn+9z(sP;M9?$)P0W`qX;>tKjuG3nnW{#Plka3xeHMe|s&Kz)
zmDJ#I02i&v@IT>(3$$f04RbNG-(+UIA~Jd@dN9e`Q$67)Z~n7dgLAOw_U24pmN-+X
zsoO;=S>iddCu1&FH114>6Z+OfT;*eSSarOmrtcG;$^F(t?>VPmePNVcRdu^Bwp3N=
zh+I3If6Ziz-<mC-yAUtVc*g$LIqU<ojxFX_wT^TA7-{=ptFn}?K?J1v;YYQ$FIw@&
z36-)8^^Ft{t7CEqaiZ9`?@7PJyXA$V@a$ZYTR|9wpF&f}z6)-~jOY6Z*SfHK4cv?p
zvV|(jPL;ItWw&^Lov?2_RJ|D~BCDR-R{>}3*{0Ax8Wjl++^XV)r)RUD$JqLJA}FgP
zL`pbov42_b(?1YoDfq75VWn#0k0f}InN=N@0^H8fhE!2t9QOte&)Pg2#`67zqow9`
zUc2b=<~i#-WLq)9UF)|Zz#bkOFG@QTwT=YQ#uHUs?3?jdkJo3C3Gud!Nb4C75qsS!
zt;eO5{f~f9d)<TBMcjDHjtz_lT_2rrTZah$Jc;ryH7-soG2SGQ{gIk~ru+4^)*c`0
zTBoTI$4wTH{zmz$KD$4RpD5ghNOH_XS>#CKt^3>aDx|YoC9pj{ina_cNF<jImxmlA
zfElw_D7Rj_eBQu`WozPW?L5}O2vhk`UXiVgNc<!;16Ud3%j!U}95*TCfPPwJ`*oiC
zXT>z9NsC@K2tf~4VD}=kCM?~6DBYpo0u|WO*is8!M}H?T&A%I)`0NPVgdL>N8tZ0y
zGb2{cL`{UW7&+Ya`O@xp(9G+hz#L7Gz@Bc!K$a5tJ5tvQ*q<+r3E-Lo5<t#q_^Ho;
z{c6x=$w>c)_xR&`6&>|F-TP1&%NzIWJYFqXR|A`s>2XMFI6h9OEKYmsH+_~uYtC7j
zr%gXUvC|jvFU!up|8Y#!NfwpffZWmY_)TC$0-CW5?c;_M+1-I76^4qqg&NK#<)b^Y
zF|1_s^_LuPEZ4of$8+#FY_f_P=hZr7alVU2d)iDUAMg%N4nl<ahdZ&msO00?1^O)j
zeJoZOKoncUpP(M`m^~|)5GWn}@Cm%SpOtbC%0c3!>j#c{)Z<7S2vw(>F=?n{+X`E`
zdcXJ=E)`GA7VOI>(u|MEW5$AtRMb!fioa8v-en9@pI=fWR5e<$&YVi&h2OPjAyB<s
z;f5xxGx^HSG6T>hnXK3ej}^-skt7(6j{`074(PP4EkYpUNIs}bwZ_O?gGMLvs#gJ=
z%EeEm9#e-t^*mUjr`TRE^tP9>%`BS2Eh82Q^EA!sBL(T`lquxq^&|nqJCWdLHkCr%
zXN5J;sxec6tP*a}iHxArpw8`VF^Q5(Vp#a=*os%@F+}FmwZvlD^>_17ZKrTACOsGF
zH-#aZH1dFJ=K}OSu}o>mZTdG7jaqkTGj%{n<>wY3(epO9w@A+CA=-$!7Ge|F1B3Lz
z$JyApWs#gP8dP(2DZ%Q7c|~x#8zxg;2Ey`Y9V2=eTj;~wHx<P*fS6i#O5@;|_b-n+
zcu8?|U~#08ql}FQyH#kzk9Z!Bzm=q4>@mRIXU_~t8%z%j<19JI!jtG&-@!Lc{&*rG
zj1m71^T+~tW%D6p=_P#?5mpJj^1V2XX=|tGqvO~ti&R6USPPSoYilYo;Z@cfX^szb
zuZFfDt|N-zUWDddU%GKcFxryjoGEe}=eXG)mgcA5jKpTK%*P?YU*KR5HS!g-=jWis
zRUhmhdzYI>%|Obs#@YGCefaff#8sYDASv9ZF5(h-%VDY8@TJB|7l`s5(}X>CX3HAU
zp=CunD7?5#<9l@388MG_w#5GZ`>bL7be>k(?0caR+|sXsU9xQ%G&x1TF0LZ`Xw?8r
zquUW_EHNCd#-&?s*}t50pt4xjkAu9lJ0Qx5dnxNl%vADYMl9g4KXYx~9s+rO{A?h|
z*JNk}M}gd5fs{nvlV!J*J&O!mIUwgWJ)#!jZ`$1==5tg8JL_+Hv|-isGwEzrh{sAq
z3&P6PzDrdI-U@)yUBAjQ7=zeo4kXE~!;Q271?%?S?~#xybam1(p9!K`*iC*Wq+1;S
z4v8ikd5&mt=r0UY+U3VIp;cYl<^L;CRZsX4TI2q&Q*u25<E0scN8K}ekk~AxU)Plu
zStlr_6*AA5#xc1aY^Zq;79ewsc$<~#KCaY{^0=DMeo~LOUzw_bgV6e7mtSwdgA@xF
zOHUuhR2LdN$51vZ&Q85%xoatF)D}p$MmIw`PBhyEhFq5qjDTGGEGqfyPo>xQtild*
z7P4ksBx<O6s?;XXAgoVI-tgwty`@MUe`#V^)LRh2qO3-Dr(cw9TFb{%7k<gg0-;uS
zpy@<9w-gMD+h;IN1f$w>rt@6?MxIZ3LFK9epOoO|yvn3tt1$V;^Zo4eDla<UrR%vt
z+n@BaZ+$_EtqOij%VER#G#X<gp(r(wV&s$Ld0aj&hl@nsQ@D8~0Jr@tWPop-A3s>E
z)Nw%*gYl~(LrwOGB3`xUjd=S%?IZ=`6~NtFHdnSSG=y?t@l~sIzWv|mJ`!DApOr!B
zd!4QG^j<l?4(CtHL%AiJ4TFa7NAtnZswpkW<2rcMi?H=m4FvF>tM9d?o4P?`$Gp+k
z0=FeNgb;*t#2-yYEOrV=5aL90m$uW3jpo=nqeqdZ0uzynNi<?ubUGo@LU!kgzKd<^
zBV`(CFVOFq^_bdwYyo$FpcS3Ry4t(aPp^#ld*t_L0ui`e3sGq8P9#G>w#v&y8zwXU
zkd@fvnH`YMFH=wGTaY%P*ssLdqpVcP9Abj!2u*H#s9KtGRNW9C+Y2k>5nECf{r3?`
zw5SdY^Kw^c<`-!YgQ^U%P^+cdiJ&Ac3Q6X#(lIEp6Y|fg2$A^F3E(L?OSd(4+$?Z!
zQgDatPg^Gp(xEE)I538cHJ9E0KJbqnh;HRE8XrQhtM>?u^Nl>%PcRlOH0*WOEpUal
zhHh?(630uc2h3_Tyce#6f%ttSb0I<>{7e9#_k*;C>R{r@N=_lrl+N4VqJcT-Zj*ol
zvc_xx7UOFHm082zs0R&3l#x8J7z`fxMw#F9)&%8{K-diQS7hi8Z90I+;FKK{5v*#v
zXTm(VBIx%)L@*IC#zv=b$Ewl}aQ1AAD6)smb|t;(GN{{^of}Ta>U4ZEz=OR-juFNw
zy+II|C|bPamAbV7SbSj^@h^HP8H^2X9t9<*)wvrAxQ`;JI##)VTk`ew^0naq1dnQe
zfZzGqdpSxPzN4G^Krf!)A!MKIu$>L@k)73Q5?2vX(NSdlHCw&dsZ*SosuH&PT;!_Y
zWcf^A1uw~bx}qozEiaU@gffRGO>ypIc&#^5slKN`n9E=#)Lj|+cCN6;SSbdDU9Z+!
zr-*G#Q#|oyz_)m^Lju!E<wLw`GCe^dm#xpA?p55!wR+)gw4tIq<Dy!)TX+eb&dqNh
zetXYu`f-$h`FT$Tu=mbkKe1Gw<-)PAsEwoXmXq+&oms6~evyHVVODO*I1j`kcYx~n
zHb}P8Wh8<VhG-G7YQhLhASzC*&Ncsd!ZEq2*C*Xqxb-y!^AS`D><q4GKrejCHfelp
z8#A$#Z}pS+lq{(Rwz;K^Z|1rK+aoy;&XW)narsH|)J(!kG4dD%npCOPY{OxgG?aYX
zyQQ6vYK|dDSJDc23uUG(;aBujb`*PmRQK%y|6ksW1?K;AMwS*7Cs&?oXGrJV)ZIsX
zE8E=(eM}=7FdngU8I`=5#==hbRB4_SYgwF!Uo_HyIJpqt;pAYj_e9UKI%muk>Eml>
zpQm?Oc3Xmh!kA1A8oTnLkYf)@scu~x)W80KjROW5b_j2ko})SXBYvstYC4#Ao0SU_
zLfdo=@X;TOgbXX=mOW!j(zbm5irV%tCF@UPaUMmmv5AQ5mg_0*zkUv(gWZ~L#K9|4
zyaj|aOAsL*gdAUN;y&|hgm|4@CBTzRa_-TO{|eIoTM_*~HV%OopNItjf&k&oRw$=$
z@M5LjdiN3XAp-*cw^+eGWG;^lj=l6#-B$#PlDtP!2tw}X$%4{E0LKg-(1qCl{luN7
z%aDlSZ3885C(q7*&;SDAc_{$Ew8lNE^FMtVI9(>q68M(sad0)(_S)5#JS9g(r{J@}
z1X?Q-g#QLc@{b(-cUiq~9UQpX=Q(otzSETkXUDSHK9=@uWIh0ca->+C*T{|5ZAvyF
z@F@+oiEOrlY%nuREb{1w{t9#*S!decc&nn9Db1NgeE6k`nG!2^gs$}ienFI2hBk8R
zgh>sfHUzrLRcNI!R%{ynlGJFTg|Vef;C(?;%TJ0fS9VZ8vjHz~*la%N%!MHre!C!3
zmo51<gEE-nrxL?r^uOAwI-E2>=4|NPKTHx$d%kp)`qOyEx&tdgG!sbsCdgQX6J|UF
zfxk~8>2>{R?)sH2&<c73_g(k(i`DmRa%Hj4xfP-0_KuG6jEiH=?l+BI^<<tsyBm8B
z8&2XWJh1yJtAkQ_-W4e3$#lk~Kg>X$<;o5v!e9eN9bq)gKT|>Dn;jPcHK6!jrbC30
zhb{%kmwCz4Ny_VtRF_IMnYjM+B0+k(PIK9IwuM*rGw{||#_eMZ8H_e|s&lOAF=oaC
z{ZUiFsk~T$zqarkkta9gIW{eD>dJBYEYiqB|8gQf^@mgD`Zb~3XLT@|q3j-7F;3Bz
zrw2u&74)7a7Gu(FbkEZ6i)zqWF-Fr)gXk9-yGg}UkID&+zkC1_^75mww87vl5_{g^
zIC~9hxKs0*{Q0^5*8$ob8$3s1-3up<q>DE<Xh;GdAH!=5(kW`fW@BdZxMBGS3+J6<
z8$8ybG=KQmW!v7J{gHnphaaXt5;L!#Rzus^y!euivCZ@^={J`94$dTz`J5O_<|^uA
zs;^(au0?#QslbrH(xILZssB^~*HAP+i@jV)=y%v_b?$t|;i^EG<9d^zbg}SYUNY&X
zqaWTr^>#$}PA9Lc4#(G4IYYiM&3j|z{6g~XQ?j^s#dapJFgEhk;_5|ohFV<Tl*5wv
zBu-se+`TP_Yn`P|aMR>o!Sh<Ui??3QT*YidTSnlybAYzMZ8cZD+ix_`5*;YHo#Q$V
zmpx<Q`23)$V~gwEwZgrJ*eJX(w#zzn^lhF7M(8O1R~DViKVrB40QLS#zY*%Dri#hu
zyi9t+RiY8~&bu)HIZ}@6<y@eQ84~{>FE@#FFr;6Qw7%}<!8QFeT>KA1{(phd{z<^-
z%2;fJbX5$Y6jyK|dkNgbog2V${&(}Brq9ORC33%Qa68veDq5$e_$+v0v-SME{Hvnm
zue_fb&NPKg@&<Ea{o*isbt66akpi(<0b3a-beTqqWrpM>-IoFexdh$}Gdv}A8)=UA
zc|_4wpW|1xJo;v~=tfbY_IXPd8RQwF;^o3{VF9RWCX;}eg3e#XFmV|dNo_KCVa58|
zwaQ;_Q9(Lm==V$qw6&V6f}UxYOlW9sQQ1IaJ2;iHfD2KyVg4eOv2k#8AKnIVD{iX<
zuFXU?;1GUi&!8eQCt6+r+8|em;ID)Icu0l>+~u9n=kgR=zh{HJZ&ek6zx5UVy8}vn
zN2s2G`@={8e-2fUz{rFMEGo}Cx%(uXqimftyH6n7f~VgfoO!&Lo1N?$5B_ksWm?2F
zU6Z+7K9V&rvT&0OA>}RV2W`}`5Oim^DuXv`xf;B(>F;*|00nbdc>BppRgh>F7l!n`
zoNdbi(j68~ADz^?{StK{u8qwqu#=tIclt7yYdFo9u=>;mQLmgW4!nKXD=%S#2Y5U1
zwSg<t=nLGN;GbP~CGSLSTY3;~M|%lB@8Y*DR`)DI-n!6)rkc4AM_*X`<of#*`-$|)
zKijrUW-zY3vM+HFdsm`)tp%+qluV(-EGZ<p2iXBsgOk}nzpZ)3+}e<fLeJ-w+uNCV
z%=6qnWgvt3`mtp(STJeL;rjbx!VDuN={(zt%Rby;%$yl*)sx@}NVZbX`uho)d!qp;
zRPoUp(`FAEQ!ItOLluLu2)YJOTj|m@yN+Ye@>ls8WgKmDHo_xHhl9h9G*!Q(c+7^Z
z78q2(P}BC`%}Ky^8P#m!KZ4Y>R|TCJy&d<3)(~o~kruC&Y%6}VCiJ}u5>zJkC>rJ$
z5Spp<akpnw0lRi(b{!q1LWGkkW>g-GACT}4t_DCWt0EnultH~l-j78=q>EpYJb!Ds
zB%7#jPxi!9a4;IHbL=uUbgnFSMJqxf!dmW*-#?G1LVVcQ?BW9c2yHei-rzzNYSGxp
z-!wAvo-sss$*sHH585T(GpW}^!{H3AsVRW(O-Xm18o@Jl;Vek7#7+i!ln(vMhrS;)
z&)<=g?e8fy+?q^_ZDhfeM{Qv26aRVrA^KBTX)0t+C!?YP>#qzbm|%wlC+hB2%9V3y
zJkgMn5);?1U$g>CbDTv9FJRr<)5Wq-S<M;C+yjpLt&YIgJ|7@e!YA)YGOO;mxEUE^
z2|=DEd+Yg-%QVo0Kw0Vw*rKvL|IziT9X;%DtYz@{!HWUDo@#*3lr;On*hdqpTP}$}
z)Cz||VeO2n9)f1xA~FUw@Ny>FdUNt;R(K7keZB@~St!CAcs1iR{!Vx`V^nckF=_H`
zs(Sngj9{uXjV~s=^j*~Cc1k%cjh|oCMk;SeMGW1}B~@EfyD6k6#P5(Sr5_<VV3EOV
zV9r+JmZ9_&Sa$ane4Pd6zC6kRy1zGcFV1r2Sh5V+pDcOzTL_)exRl<Jza(pX;l1Mk
z<6`^Ea8#|Dy1D)P$(y9Iv%})XcYZp)Np7lYXKMIT1hHN8bmbKhR>#UNzMLLhSUGhF
z7bjE)XC03(P#!1g6!GN&Prl}X#hR4wp!)YpT4Dz>0GC{T$~0u`+ekmj0KQ5eXX?ML
zw|pM1C?_tAo@bDGV;`S%yzlt#Iqb6-_&5E+?~r?)Y*Wf+SmYL=!#*)TI+QJ1U)a4l
zx!gT1xLq3ht@!Hhr|--h3tTJ&b>ARO!yn)hvX+M0pKw2iU(F@}dx-7v;aBzw;2U3#
zKeq;lhP>eu8hO~+`k-__x%+2SfWG?uW%a{t>xXT^k%u+l10UQ8v`&|W&;Z_20Zhq(
z%R9U)0oxxCYeCTf0P%eycEUqW_W$f9yi`M`*VL%M5eLAe$`iE|^vRj5C%~@yGGw-M
zNODtFB=Qj&E^{yePbm|Pez7)cL6m4MH?TO>INTd=edN<I`#{P3dS5c6`qSLdUfYjb
z@n=0lOBov2!*yINp-)Yp-K0TyPERqu&Ad7bVEvUd?4(9Ay&ZfV{EV>y-(Paq_Nv%U
z;sI)jT<V_Y>G1UH&c157DG^3$iFF-y6N*acO}Djs>RsSG!{L}Ya*7|BaOVrnNisuf
zp?p1YY+#{x?TNP^$F|Zr#OOBt3H_m~$1tC7Y?c+>1^W;q(kroccTSh^T{RIf|0%)p
zw)(m0SVYhHWM!{qX?)&h(m-dbP>lkiQlBcGemItGi?s}Ob&i6_>59RwDO7d`+>YAi
zO4pO86k)zj_$T<fMyVjV7=<)_NebIH8=GIs0+JW4zf*O2q;n{2qZ(yy87P3YF4=bS
zExp08-;}5ItyKR3P%Mu!`CB@*VN)Vd5cs?y8Wgc<<*VVXcI83Ip-FGq_$La3zL_c;
zX9KH&eO00xZGUBFtP?J{CwEmN9J6an+5XZWK<Z&{hyf&Gd7;aHQ@e7;RuI|RM&CN<
zL$7B&tj1V|#%Oc%!uVk%Kpb*@z0{p-L*YcAtUi~uGNC6IOl;sq!Nu-PlbkK(UrPE9
zSEZ}Zg<zVx>1$}zdRb3$3?~tMH`u5PevqV)lz6u!OfIwy(06ENI{!d_k=*r|ERp68
zL)A_}#6(F<+WPJWvcoGF*#E`#0Dh8Z@v@{v<@PVSa#!i_5!06wmP9zG0sR!QF$k*5
zTX~X#LX4VF?u9*NoWm2!KHWTy;7R_?zp2{nBS;lgm%btxT8R$wT!JOrD)H>=jNiWV
z&@IKle36v;qpRKI<p3X4;#kDMUeu^F`ORch&6W7F(AVPx!{^$%T0$cfEb;KSb7#lQ
zPex5BQRIHg)~ra~A=v??e}8@Y;l0k@N1_evWK;Y44SM6Ag==YxD4V?zHcU)w&k*&l
z$xa-SR0KrDY`sTAx*>-rHrx(cEsuICT7}qiJ-&Y+w=nqC#r@<@NPTO}0>WlAj*vxa
z83a9J^wH2>MS!RB;*L0ED4HxJSREjKSN$B!!!SMqfuBqoqzJ7=0zJEC5wF-_T%5@G
zL01S3zrM_gGd-Ls=yce@_szG(52NiFb5Fhw=fBV^!t=sc5^3&p1O32_8m;=0u~q}I
zO@7HluC}c3UX$5qJBQCJ<eB?-l-EOU__yU-<wrb|Pl=KgAH^uhEC5jt2ZiIxTv<Lv
zlKQ>g<@l>q<otBT*F%r#jgPqzOMm#-sfhe?2(Xt%m*&_s*wge!7(Ob|juzly7GGBD
zB{B@}nlmv~hSK-RKek}R#V-2RhEw$|BOQ2wQS$rHeQNc^_GyNf?-AcZ)y3D|BL89o
z^BVw1K0|4CiA+oK+kU6<&4KD%?O{%1l#k|KQF-*n{rc;#;_=E@s>}=X>P5be$G~A_
zU``L?mW)57YX@S>HRZoI+)F`*RQdC{_#M&Qga*`Ix5r?qH^Kv2<q0p$G11}7k+Zqc
z`@ZFQO>!%R_DsxQm+?j!>|3ig8_mk3cZy=h?fS)FCqvo{I4NQr6KBl0Xf{F9lf#tq
zbx;p6^0u#o(t%Wa8I)9~d(?({jxfbowj7n%e^07~#gpB+ScuP0%K9-gEFl<1G9MC0
zzU=MRX@*~cdSFBAX64$VhweFs{35hXl{QQ9g|>IosF_DV7EqgnBPKsv!>`%Gkg$F>
z2}nCswqkwc4&A~Wn1)2#D(58#>n7BUNOBxZzNde|FtGhABmD&AaEjnIZ+fGm_7|in
zqtES%nW6J4o9b4pi&dP=2b}iZ##AflH#<S~%-aVNWEcFIlhgES1RF-|r-RDc=n>!O
zMs5X;8*MnIRMp`HVNiY^x2c;lm0axo{96G^?0$LxP4MG@k=&ww@vtE-?%8W*WiQcq
zM}|;8P>!k-;W424i8m$03~Qe48>zS|7%K%q^=u*lZ7MQa6;zZ9Bm6$L1Id=S2yr`u
z@O~)Z{s<{Kdh%Un#vMQ>=BLzNvFe!vG-3+x16{V`#`y~%|DnU59cH=pIN{ya5sF^z
z;gy7)oX7n5lO>)23gS`UXfP(daaHys8l-4Eq__~`R82Go(o?Bn#Jo}HlS#zq@;k(F
zGC2dyIgic1#9AQS3#3yZt9YR&JRL|NUWdC{xLxrzT^`%`>%{98J)8i|!ByP|e&{@)
zd|A{#3uwQ8@(zh9*{|MICjef*nc5!dd(RYT3U;>28pz-?qECuDTu>6!_Cqv~Ec;gd
z_4;B}!e89<jtW*O%{0o@gA$m$Mv+3sBQKG?sO8vAjQ-^Okif4<66-|XY5?{3?T><!
zdQlh&Z|q5+GQM@$^VlZ2s_&l9MR0nZxU&aYip;n@0Uvi2ps74y#7b~bI#6q9n76Xj
zdzUAL#u4nF{biSXh1uzY@q2XZDPl39_`#GV2IF!?a%X#Pgr#-J&cV~!4FSqJM0zDP
zcb9jm8ql1Jrh03kUTQL*@d@tin<0jdo>865Z0n@1nbK=y>4&xM`W+NGd=+QXFd5TC
zDQ#IPkrCKW#DlurVai^-w_$D6>i20~R0L#pii1|~mips-XjRqP^w$}bKsRK>g9fGn
z#TfmiGNvw)ZR>4-Th>h?!anqxBz2h?hOt>5>Pfd4$iJ8jEL<l3Wq`NOni<Y-iW!7D
z`EhIT8i6WH^fQ!x5o4Bq#?!&Y?e*Gt=mQE0f$J0gOjjd=yo7h@&bD}fd0fOhS=*lG
zl21PkqXmI9t50)@m4O6K-HJnOLk?yy*0Dt^L}4TJ<4=FY(JiKUT%#%<;5iPB7IL)E
z!*X8)Sq-d}K>CxJKMig9;5sVE!6zi>AK_8W2QxJncrieN$hd+RuCprk)V890V5#fO
z$BxFA>`gB|8La1Jv^nYDrOpK!#*sT-%!cr!lH7Y|#(<UrDdPZMr>OLapxTEpeRg$^
z@i>03Y_-<Ki0fJq6%4DF@_s(KT<d+rbCS>rT}cyV=!q^Mcpp4vvs%JEw>}M2hZgqX
z8;<%f!RK3yh~cm1Z-%GuV+%!|T5q=w$K4-;J`O_I*;dG!5(2nGGe;T+WR|`78~!el
zl<Fd%M(fdspsGP0wAiI>SSGr=H@m@&9I>v}^lCo?);|DuCfjExUN$e63}Yr!qfBDh
zbk(Xp0-<Iz-cOAJ1%62O5nRWc2I?DUJqS6}jW4Jn8&e>G0>5qqIG08-d~VZJ&Ov9p
zcG@;yt#(nEGZu1zJhK)YP~FZftWtEKDY6*gg$3b{US;qApCL9IqR-jco*9MBU)}0j
zB(J8!?S>}+jegp~{;7=mmZOs*+(IHD@oihaO;RE?%QqFtcOzzsyI)ls*>>zdhV)F=
zEteQtk-2)<vYWMs-fcWpjg1MWXc4=-?m7gah7@HqG90yuULB>@nE{q^$Jk4b$UuA%
zh&09h9HU+n7X)%ETOf{KS{u&ON0u0s+g@n&a}IBdhewZij8(ui%);2+lkk9t!7IWG
zdP3$fyo9Km03@m`v&=@cU)hH!lP6iOtAZi|wJmFXG&t44s-n;92tTn}=4r2r?L^~o
zICi~~dQP6~PKWPHqOKfnaVZG6n$M!pu>$pQClDb_uX*`;qW)PnO}|3==ONirt521%
zE&HDdM5frRA=o95M$W&Q;(W;r=uIY<!Rjp=mm;HeAye1c9QjH$_HhvsIte(q9?#b5
zCiYZ(7ukGh1kN)jq^afyMR3n3&`}8S3Zp61kvY752~AZw^%)<H)`sapSA_LC=Wxq7
zP0kP{DpbV9)kS{V)B*Tv$xnY3nF^BLE}cGuXV9wZ*Qhx`-p0Dq)z}Hei-uyCE9F91
zePjW#LRKKFs`m&Zdt(}aJ)0#!FP2{TA1ajT7YWYL+rG=zp?;Vt(nI`kxvLFWm0P8k
zx)Oogusex56Nm03P$=Y50)AX&^)cR8>Krw+X<xVVgbtb64_*n#BnZS*tK03$2%f4a
zf>h(wwm@a**Exv|^~eztdgHL?NuVOcF~RNZf^>Kx_w(&!BZ2Fe?MIOD-e>^^3Ng}x
zeQ$<RbUNHJo1_*iXV#YdMn+Xf>nZ~;de{fA_naX=1twt;ra1*C8WVkceRhYRYF_PR
zdtBHwrP^<Y3sYgT`;U%%IU}EK`YV-8$lP&52#6BvWY0-LzvuD7Wx<`v<|;GI7WtUx
z6;?xPlqcG?h^mmjygb&{`qLl&G=u87(j>$(F2ZyrVpiz>Bp2tVZO8?BrAZh+Z9!J#
zYMXTZXf)MwAk<%MiNH}uY%Imr<W49=+LD)Q1``cP4f}rn#8FkGV6T|2^d&_fB!r)C
zYraU$O2^KN!3MdZ+ZKeXCDhV5fb?Acy#GXlaSP~aqhcexQsL7CKYvc7qlmTli>FeY
zJ<0pxZv?@X3N*_S4JEONE-+#>W%1icb_=(M{G1q}R^2!E#$Te?=H8W9cp;|}i>XF5
zg~k(qh2>R$M39JsdE(Mmb17ugWVT!biJmqt#l*Qf2h~NstK|VEs|ZItW6nd>mHXHn
zz2Rcd9$PTj{3aWys@PA(zf7B}C?MdC_N1X%6Zl?1tHvi;Qp)+4_G0T_H91P^>~)M%
zVDWWep<^!Pz3P1O8^iSL8y{x6!kT8@+eQndikLpaTiFGEYHi0PpzTJC8bXO?kuYUN
zU62I?#Q4Yc-#N8QlI*XzknX#7u}5M%kmEV#y(#CmS~VgGBr#x<##Mh<w7&gd9zLM(
z+DPuWA``@fx5LC!r{()L#B9@0W8InZjV||LR=PQ8i;Q>Z`zf=5=bMw9u$BV^)&hSZ
z26=7~6@0|=!9{X?1Pnw;{1|1K>#gbpG-mSXZorl0HQ~VoBEW8~4brsJJn7wXIyGMj
z7KW#ZBHg1gf%izxV|X?%0ATu%j7s!?`aT5Ykq;@ULRaoo0Ex!{3AWGjz3cRv3W6bk
z?}MiQvw`=22J|pX*lk1ig235w`F$Av{yq%fwT5EjVmF~uKkIv&5LC`hBSq6FhmV7o
zFNc&rNrtB<G7*{n)G<hOl_|*B+s;`1jYdwUmF0UP`#RNG%$tDc$`X0<RqAbJ@nW=p
zHYkBsi*&H+Qg%$`+MmOY3UivbW3Qk1cUp?BS2!8(E<27;%$)%7|E!o`c6()?+e)IH
zH=vD1E1=@8EfK=X%}u#@xZ_&f0-0|Rb4@8diO4&uirpS{FrwhdvKNh4P@UEX#=9tx
z0K*xq*#nPQKU@{_Z+DpoNM2F&>C!IGS{VB9$r9zV%o}keM`UZIwhR67zn><cck<gc
z9d}jGjSVNLoOv!E75OeF835h%2l~E|ggb%bA!ekxGp)(M&q-cg@UWpdWdC(p;)P?E
zUO1a9)gJ{Uoe>~X0gv;-L#`b7!si+*(wEjdE0|`Ky=tDuc=mk7I-$m;LGDz5VR0(U
zz2}Ku0|fom>G<>HYcE0ASaKhoYGqQ0#-c2genn`bH3?`@Z_EXIdbih+ofKA&LgkjT
zuCe%c57klHHdDk{4ZN9g$(8`b6sre4<9jh(SLq}{i?&3NIf$Sy+!=#sI<@Ne_%UCv
zEhB7f?m1>|Z{X55=nlL{q;bYr`YMK4juu;}4`14<wBrvS1+aA1oW`Elxj){16JH-I
zW?V|2&tF7?&>^#|I}OjVTk*_I5C*-Z+B?-=@Kd}^1&3G(45MPEg`oA6r|5>44kRT5
ziNPOMKAT4EeplO?<DW551c%{VJpOon4x0K>ffS+rcH<8IaDdj+e}wUuZ3Y=>6gUbZ
zw8WBBFiX4J-Baxlz8CMV>r;>h_S>9gZphl=hG?gY&CUIwsbD8%*A}5d$06VILde{h
z7us7NY>?;pGWEQWfTl6@_dt(Np{aTZy*68}tZ-GQIUi!EI$mLNgmS&+*;?n}AgX6G
zfkEWL@Os8GQjO1DP41u~)#$VG1p>;S+32wyCZ|p~3gGN~{nsB&VbR`!@@8fnGah*R
z(iNLxV<YYt<?tfWui*Zv)5X!YN%xw#C7yS98I}ulGv<$xV%-%x#2F&#?A$HA@~!*U
zUgbXX^^eaD&=LqWfz|{`Ku>@eWp$UvUo55rS?wyZ>9f}HB|_~Q?7KHFAx(`7r&$$M
zaO&c*fy}T3D|9I@RM5s<kH2X@YtCL^?OQT*)YU1+YO-z$O#MT5Eq!da;~W+xZXZ2b
z$Cr#+F@E&elY4M{N?o_Na0V^r%*K`(rZzu8PA^|>h+)0+1tMJdonp&BjXyQ6$zry0
zqn-Q*0-i1|nvL%C@Q603U2_F&HI}s3UTHs)meP&)U^+{Z^treyU<K9qtL6zeY?<98
z_xfvWt;?lMp85Sr0w5v>%;kU4_LgB$e$m?~f`YUR0@BjbB|VgYGzcgm-AXq@D^f#u
zhmz9WLkL56cMT=Y(Cr!Ecj~>a_c~wB|M@i6^UUno&yKZM-0NPPMfUG&G;&XLBNvG9
z+21dUPb1dwuhz_~zs2=?^m=mU`RR>TBxHVRc3h(x`sdni-hyiqLW@8Xx?uk@@#BKV
z6pO#Wk<@h;t3=#m=`SIq4MO(**p%Z6T-K~@ToHRQ%2>&@lr=4H6$tt0^fdD9-6Kf}
z_&w4=3>#id-h3a2XjKi}y~|UQ9SCJ0Z?rms_ejDfjZ3sv%H<%w!b7^U3y{{)Q>_AV
zH_Aud5~2=M6!2ft{`{SXc~1mO({Iz}vaAo4LG#tRz~Zwu%B0Jm0Cqj4m}(Oo_Gw1P
zfH+1kbP^CJ98l!{n(TlYzCZq*nU($*K$%-}a@SXAO`0L~2u<^^N{-T?)15WPaR(wa
zwEB`X>=lb6j}8BTnL)4N6kdQuSp|>X6&5C6zkZ#qo~Kp@=rh>A?DOIf{?L4vuS<7!
zB?Uu5**LKb1lNR=xkUDQk>(UlgR1Wg_=|;mPVD1mQYFGi^>}2KMuCX>t=P}wT?#9?
zQip-QrBYW(JQ|`<?Bw^1Lo39o&@tzY^COchoT2%*(ZovaAUC<wil$>0cy89n?-@bE
z4di#15%{*7=R@+|O9=5(g`urEZ9Jz4VH$Y#@oQ2txf0(#AFF&>+zgssN#!%?AD7k)
z@Uu^0`8=b>QkV)`f`>SNx7J5J+uCvlnfcE^jUsUO@rkDPICJpkVueKe=hM3k)o93b
z=zJ0h)n-{b{3gadoe7EPN1=9F(qwDIxP)d9Ps-!Vzz6`!LMwR4O%o*}szMll#>`35
z>D}_#nhp9oKf#jH9I-z)b)K;K)^Pxl)bpH#RU(Atfu=74?QS-~fzz9G<vfj`x*WrV
z8!<=x(r#q3#<hBKC7Nlb)P}}tQ=#hqNT0qaa#aE!Vx^**6gWM<=|;*b*8Ybr;+Xvg
zjCT7)FP#jKG2x9?I-TCaZA#Bvd9urO1%(WxAOWN(XrUQxIN{bH`$o&MH*(NVhgSDN
zC_;>2pT2(SYa-Z>>B$@a`fk(yWF0Ex3@8;^&Q`M>!cKP83^HkHAd*~uB_4K{_rxt^
zf>F1Ng+^@C4oE?SiNZzfUv_g24$Qb|yGGvuu;*1;c8n9uLz(Rzil%#<mdVo`x|v<W
zu+uE9SU#yRfa;<0X|9uAr}G;j^pdzea!)_3UQRMBqF;a57{WgY(Qq={xnua?+f|$m
zIn^oEI!bMInQYu<0OZO6MU@ZsT$<>PyZExm{}bpW;Mx>3(!Qp+pNj$OZEGkxtXx=A
zsaJ${W(Dj7UuBaMG<nUQF<0TZ-NItG1L-}#@4!qo1Ilg@9o83NZZWWiQT1`QeHGrI
z(@U2}xHn4sxpWxFgcvN^;?~PQS!-D-0Y7fB>G?ng5d1@!y$hXC=IcsnWN>ZqWNqOP
zGT|)4$B(enpE@zx-&}-O-iefBKRR*l++blz9S*j8+mOIji><5}=cDBrduFu_nnjIl
zfToM&`!c4!-rm>}53`y{T8iW~5x;db8Q{&;%voU#75sg_bb6wH0to+XNH_``!Foca
zgR&?p4|z#E#YA>13zMREBAQBUZ;1sV3dI*UaKdxn{`C9+&NJc17Ab0;uovuW0$G`N
z-TutYh-Ni27-_DmAU;YVW1@Z7b%@U8bZ<20DCr#>pwP55Hmp7GeKHeJ*iHP6D&p5m
zkT2ccvwq(K6Dw!A6m>&N#H7lJh--TLK{0R7+*WE|@Y1I9E{|~Tc*Ekf{A_h$^$V-|
z$BT+U4KP>`Z8n2eFRGn|LFjonk){3xn9qqwXHaXSOX1yR9VfhC+;y2BgMJZZfDE{T
zRr-W=H8(o}g#KnM=L*ZY`#(7AMIbnj7;D&W8&QI<hd#DAC#>Paz+y2+qF;3O6KKIi
zna`TpE4g}J+}Ne<ScK9e3tV%1YIunUz!hpKY=1k&Kp~iFmw5@EI&4tbapBuTylqK~
z$Mppe?%q#X`C|a=+Iw?p6JX3zb&Hk4g~&7+)opfWfCEy0f0+mk1G|=}-7dN?{ejM5
z@0u;mN}Id%ykfy?0Grpf1PSkKWTm7BYt(b3zwkk*n5<yGdlmD9|G}{T72@>D$UywP
zWSL#E$-8$%V;^5Qo_+y%SRB_Zv;H0CJY&6Jnj)4S=>J;mk9C*vLp=0OeB;nlB}4jt
zIa4Y_(z%Tz1-exJMy+<x5%k~W5~u&uZ+UtcB5(nr$!`zAy~TLTF}6bN>74va<9$Q;
z7GnL<b-(qqWCq}(yK)aKLqn3*$O*wyDdz5J?kun295AszInMQg31A|oC^LS3_>q>Y
zdRWnu4>*)Bb-)02L^!l7GXR|@d9l`duooWm9USPfy}9^mPDcqi-m`?E_BF)$UrLUh
z?ht;2bfxb^Oe=wWqg|Zaf2*8V31VWp{`YvDvJ?C37d!tm0jT5!Dn6V=jjHZQfbfQA
z2Xyy&3JEKHz;(Q6(EOH`^w19rM_t<8NH7Ai=Y<*PdX=rEVJls@kSLH$6hh<%5)Q;8
zR|RwkKc!pbi}|s5GTxzJL<c`iLEw<#v5MMHrj2<~m&NkZ(1xA96iUJNB3m-W?xvIJ
zDZEu|84b>r*ijkMd0+QUcr#^Xw8#LfrPFIxk&Zr96d_6W;2`nvZh{kdt}$AtVZS`q
z*CJ3$W!CLpJZxEbIJELq$quIbmy8@qX7<j_z$?8oPl``a<vCA4zn*&sZ{`HXlSbu9
zr@SD{3VheB5*Z2YekKmb5+u*cuQgM&dTC}RiU^xNe9+_hR1Ix913!GU3J!#S46N!!
ziE*M92X8Kj?#}K@kQ9OwXy=$|@`uL1GngGE23-o3dk;km%)JidAOr_WkG^fFBJqa#
zEBEyOwi*$^RRaEQ9z@Stph?kmh@(m(t&0eZ(ux-JucX3Ex^!V?{JgFb01O8yQ&r-+
zgcm*92|;1_?6M-y7$^pvEBN0;H@L3bjOZKco5=KW$gUEmcZ+=87n-!%3uXFBlEsoF
z<i^Wv(U=exknQHr12*UIPjE7m!x!v{^-a9u0;6Ts$N-hE{!nXKN#Uh}o{Qqhx(L}r
zqCsB!rQteXRw8<sK^Cd%pNc-5pavcyUbv5XD|^)&CO*-)Kh5Aw5DQBG(x9izyc_Ap
zvO`WNzzLlD6I+lV<Dwn1r=C9U6+a$Eqrii@$X&8dPxR3J0~v@k$u`1uy_Y35%B0iA
zlTPXGs?{CUFa~bV>qKP;y@XW>bIIZyywgnn<fsrm(+-@ZUBsCI(ZPXQdBc8();_Rj
zDhUG|cd~WF9fcpGHm7xV>3s<bTw_|JL6w<@0WwFpo>TospI|?U^-0u{u+Q;_JZRps
zeCf&c#MgQ}7v-l-Q2xEPj|m)FA>W`@#FnpG1D_d9ka?NLU(s7m?;*gS?0N5*!<C&S
z_WONwER1QqVUNKrc*{wB;6RnYI6`DwRhA}#@_<kV?rGsal8AC0-~ThqFFu+U`VM^5
zx^87DA80m!(bXC(^~?e4eNvfH*6?WIkCs>-#-4%N7H&VlXQotw^!fMKNZ(RCy93ws
zGbBf!y#b=i5;=*F`4MuE^(hdkRihD>+Md_xY6YIX4ZDwWi?LE@LToRko{7wHxhm?w
z*%cAyda9m20w;hV5JQC01?~}=G7|Sz{OAKt^X<cJ1X}akk6Xb=M2H>f)taqWL@5jM
z|L+ja|0gswsOj2GmNs;i|ISdl5O-Qn@TqQg`-^w9+(8D>>px|(vu}}Qr@{&3^Sp=Y
zYP8D&!~5Fw&gyQ3uhEK+fmuRvb>(H%9h=TCD8`dYyZDCqX{e9P?s@y}uf<Rzd7wG<
z8gSkCd@`c>ST&|&Z}2VEeX=0+5loK-KKaVQGVWL_v|PF32UVW5I^1?|tlKo00hEck
zaZh-PxNiJwqd_?#AtBh&`}>scJKO=DBJ!{}hx%0c3NA1MV+GcI4i#P$(YY@;K`NtC
z+i>5&69#8S_1%<(G}%?mqyZyhKp5uADF}+R`~Rf`jX=3`9d#(@w4b@)$;M54q$6rS
zS3G%aMq_myA9qUU)tt6aKZ8NBWARNWI1sB5!5@ma+<5MOKnnv#Y5-3u4KM!`;~x~u
zMqq0?+uSMvTdLK6*Wehipb3G9Otx7mTP+$t|1J><Qt%Qe9Jif{;V}DQF!pt@{|Tlz
z+N9G9mT)-;b*Z5hB_ZY%rU<Ii9Jl|~I`!la-m7q53C%mjf8(16haLqdrcJdjaefaY
z(f!4i8NTbENI)3aL-cx4dyu$~W3-<E6`b&Q^bkj}7Xc+$;~MW22&+I@&3>O{j%gjG
zV;!~o&&7t;pvr9pK{g=wwpRuGp5xi-V2S))QkvO!%jY>ZU!7xua*u>Pj?^ChB`EYH
z7f>ob7&sy6ivNos;rbl^fV@8FLjo%npj6?&)7znR$6=xK;}A37SfgnJs~%bLzH1M;
zpWxbN@sEy9vP(uei2R`L!V@&M1JpRTAUtsGUU^uv1lNvoZp&I$(ws?o{n2TOM!~#1
z#3W#G<*sVUK`Ww-Tdja^^jd@Bo*EW$5pP-3E}r)E^yICxG|`;USy=r!`-pmuk4tbp
zDVZMf18O;ZiYI7f(*b>HJF@w*yUpW7w}NnC!1NF2?I#mfQ~U^hGQZ#=nzgYs@+gpp
zW0B$${?klk1rIA^{zH)e@U`WlJcRJL@a&rksaa1EzOxz0!|I^Uy7>oAxT=47%*}&H
zzgQ#;W!{K6TiNKt#kWl(#Yd{SAXb41@VFS7D)^Xg(cuX)q&)YcZ68Z7kN4x$HR_|+
z!;4q;_PbO#yJIXAfANvqWU=;_y1jvp!$1-Dh~Qo)zU|yPP7$Gej7tHdUn-#X_2Z(P
zwv++(vvu@`&v)#koqpQv?#*Z9h>%8d(UKEZfSmJ27VKgHY*Xd~t6N*4<D8*14x@x=
z3!yu3jCSQoYgqB>qR*Zx-PO4g@r-8T``?`sn%I@xb$`eux8cy8Vfof%dSxpz9EmFI
zhtZ8v(G!QQw2ux4mjCOJpQ=Y>lGf%1BC{=v!=%uwVH?pupx2Um3B~D`pW~%oxt<tg
zck3QE_;+*3hsy#fqi~wG$}qTbGscFk(nqw(D87_fP4vVL)FRlpA?myQZZ%o?MS|M@
zP2|caT8d^?sb)hfnBiDuqfudY1Y^W8!fcEKc2Kc%%0PIH9gG3C5%Ut+vNp3q7Sz$x
z%LI1)X<b7d@`RvWNe$1=fxw-Wpal@@(?DCnvBEtoE8<c<lmp-{C4?RfCPKNHAX2~k
ziCK<cMd0IhLBziww<F$Kcb<ZuJO-4q;d~gMLPnqN+%YSeoRi-L4s^9~^4fSZGfg39
zz*WOphX7>FP45aX{wKE981-afblxndw=&DhV2yQ!s=keqkJH3)Ao58Utu7AuS+_M-
z7((-{3+)!b1Vw_zC`fe|jU-_!0sEd#g|RfyiGcbID=5BgpqnJU4sYy(;5iCu5;C)<
zz8AFZ@<pOSx@5>~f2bs0zecAr=m4QJ{;HBvEc`H<7)}B$MkSKv`$L7mCSACDir2WR
zs5Ny$sdU=Uxyk134(;X=0yqC5Vb(E%^XAX}43p8p=iQVr0}{~Ose&rZbcX`8=?XKw
zWo}e*S4Ln(#(o)z_u&3h04hQ>&I_T?#swF~&;7%!9lY*MVuH@AU~1`fMM%MdT9IW&
z%mYf5&|VFoe7VIja0xzNq+oXVa_t`tQ1()0u;%r7Kk<e%L8t514Xja}!CbKl`_Te8
z$lt30;4AwFXSG!R+X=B=iYT2W6&dqieQ(MOcRD-1&~sV#pniZXvcNiN)Ad>=5kK0W
zY8XP6a^SgsEdcL5ljH`w=B+kWWXHsZIJ&}}wtLlIFWh}R!Mu$FS<Kp&$^yw}ChB%w
zGUR9Zt7<bh3hO2Xw+}9hoax}b1h~qy1ac+oj|jq$I&m<c^xxin|A4(~H^l|V-)m5{
zv05{H<yzFUH@nj!lk?EN>fKJeQX`*!dfLT!%Nc~3^~yaqCKh+^N~`CLR!`yA9%h{P
z#)%wk%d3mRFCn7f5t|#LnE(xWD>t_wrk27JQ7R9yk<|bw&)l^VQJlXOg=fRd4EdTR
zZP6P+Ub+b&!7mpd6I|q^=JsyI%6~%sZCsSRbmvL|%_fFg;a;WuHy~!L0QR16q3`jL
z9<Gri)V^)vKP5NM>ItLVK1_cs(wQC}@76yJ4AT&}*9clOC;8xVci9Bje{<uj_db++
zUB@GCBfVxFjRk!o%J@>vYq@WsrF$)*u6kI!5r_yGAblH`QG-b_OsQM;HP%*F5xKBh
z`O`EyN#Fl2k^~{G8KhdqgX3<r`Y=1K^=F0qpWi<jrI7+B^lYLOt49u*pXl!cUuhga
zwDwU}<sScs&|U6T86>0!d??b6{{y`ATlGT%3sNhB$>{!}f%T0+3y#VCN7PI0rB&vT
z2SU{ct%vu2;^VzGqc|6V*krjQ+n83kbH9c|K+gd$XjPHZ5@&w~eM$N(^ybrOR{V(C
zCx}cFfy;FHuKvYc+|ou&B9!;~?$wTIGev@;?BZEu+vZH-<H37}@W`8ZglF{}jJcFN
zWZ4J(4bznaTKnjSG+nd|IG>g?v}_|^AQHSK9uLt)`TY%frWyyFuOoyWIe(>Soz;8p
zj>Dznpyb>Zdax_utL{)pdFk_x-4C#d?~XGZ90~u-pU6K_7K~&lk+LEE?fmG8`Yywl
zRm4p@aR6;*QfTr?9i=ajh5AG#)@lUVz-KOob$0yO&jfkE`3$N$fd9e<$u4b*GQZuJ
zZE~+F(zX_W`$JzD!Zhec_`p(e=Twba_M2C`l_f0|G=pleKrGmw4*g=<D){dU(!x*=
z*mdGb=+V{Ft4e5g^OL-ZYHY8vrn_)}ZRs=39UKGQCM8Hrhmk$<&rkTOE-Y}uEqx{6
zp1ebj;sDMg4aVI|#c4ISks3}M^}#rW*W^`Guo25Yf<-7`9TZ9kHWM3(I82f2d#YSJ
zq0YgLt~)wV4*GqodO-#MEXmLPH#;v_gVXTk+bBw1Q4|`%*&}M<?VrEU-CUU91q_K@
zfP?cynBOD0g-;Ns0d4QgTqb@LH}KA%v!!m$M47*nP=(lNFrtX=5J1CEYgSTM;+@v=
zXJCDJu$=zv#IYgFd89jN(+*q1BMsCmd?+C^lLKWIY4<dEWQw@$$I457ll<LV6?V#r
zuaEx;J?vZZMSPt+f>5_P<N&EIf^KyKrMLVj#h8T&e#2S<O+b4O2G^FZzLS~BhmK)=
z)MwCJ&v%<mJpkKo&G&g-PX7+Cnqu5kSnFUge{*R%Ra`oHWVGyLC>ZWYlRkgETelKg
z^<`44{F)~nln}GM)u0*Gy5cy-u`R(<@HZ5i;E>vTR`+FdzXA%SK^%FR=c8g;$V>JK
zu^^w~=AcqC{rK<^@PyvBHEjKxGQe&mAxwH%A=V-ml#m;Tjc*%fJi{Ltl?XC2e)Dqr
z7c>UL4nezAe{dK!2K%~lNz!}SY{i(lPuVpQj2^L*_Cn{6D;2wq&KuXufueMrk49j=
z>R<d?@RuxaifNo1X2ZgdvT9dN)())xW9v9k64aD4@KsL)&7djjjp+bof0I>DQP|bX
zLQYJX1fTE)#czaLjh`4O)co+S64!2nR&peS@j9_G!o$BwQ<>HRhJ=pJjKK-K21+(K
zOkU)=149*Ty-k!F+y)mhQ10D*sXH^%0({?`F6!N`F6o18Cn@PHjYjfUc4K!%Nef%b
z+$u1|Zw_=ZJ~cNjVJ3nc0iqT^;v2L!3G8U(h)@0Or^Ko?m~~TUvFR!9^`52(o^608
z^7Y-GdN6CN-cmQAb+dD0oPdI&C(5!M>(9U`wx5*B9WkSvQQyAX0zN-!k8SYkv!_qB
zEAj^?pya|!okeKr^zhPR?>E;$W!R1;?9?#r8Q&Is$(D*=#_aD6E&Riz@7T~~u_gR1
zilU<PDx=8L%2cPD&k!_j{(@tAcMFY|zLpZWl2wH9Fo7ygccBt9W47&6l1DldcS`wG
zleTqMn~B~TKN}~ce!90_$;6sdlHi6xji>LoDe>)ekp@m&rY4+NXpg?EWLy4yliT4s
z{Z6V4$_y<<aKTdI50^t)S}3s^%bK4vHpYXv{`9CMf)WG+U_0%DzgZeFhuF-c@rU|@
zr7Jn7<siKMSpzH978dHv3~(rOpsRy!f#DG4CTs?+<YsaFrg(3D&K{vi4nF&D2xGX(
zxfOx<NOaW>=>)3Omo|{9NziR*g9K6MFA&qAa>~n*;x$Pw?w{=JB5v!OBimx~)VR8!
z6K}H^f{QoaFe##)XE?r8PspF}vA>+IhRW9``h)p=lV+O>l`(G*Pp8(PmO(?Ur|K|)
z-_n(A6GatPbb6REhhgzh&286ZtiL@W^u6e-gmv*qEx%>&jm!g6oN?y*c|8Kl<yLJp
zG}$;8V_cRm=V_=Pgk!r2mT|&WFPks1Ux8%LtJvj;8>WJ1NnR1DqBR-c;sp5R@;2<r
z*&@I!7+hnJ1tmxu!Vzx6n<C&$^oA1Se)vXF&UnF7fp~pImj4JDGqZ2km|cZ&;n2e1
z(vUlK7*82L?nUurn*}r6p%q?Vin#_oc6&=q6oT-X3#e<PnK_*-h6_DK1^niEAPD-K
z_Jx_e*8)im_FhcZXy2M2!2MV3tG7DyemCo|x|@1{(Gt5#go{L()nIDdFeGD{G#wgq
zA~nFh%ml9o5l(gg(v=71XZ?Bxo(d&L)9nWU4qm0-_{aiDIos{jV?p|O@P7dN34Z|D
z@l@FuwYBE#oc*)qkf)tMPcXu~`@4`DHl)3I9E`Oh_oji)%;cirlB;k#lau1hv!C?#
zE8GJf9s8MrEV2Yk>wCo-HaEDhe#M0|EK&`JHvsAo<MaC^=DXfOI=vp~M>@U3-x1$J
z^8BEr3c@r0qCuFU>=HA6(8gzMnXIbpjoV+skEH8U*C~^DP7nPZS@1jKjC@Moa6gX2
zLYO_9y@0_Rd%ER!fRmq=fGM$0dQIigVk9g7c<{ZViv06@N5#uO@W1@JBFmdAi>vtj
z-68ugBB4akXkgqazU^~)T6eO$IcUg@2aIP$hL#HcmQ)Qq!g7*j3BRE%-*R5mYvWgx
zPMLz?-3~0?LUOTD4aj_^wexU)6XCY8!B(V@pQ-p7NUJf&zqa1Gl7CY?y~>o6XJeR9
zv4|lAT5Kt6aqx=KWa#Vs`}fs_C(cNB%2Y_(HBTa97`sJ{>B&&c_#sl6?-!8{TvKKS
zY7f7o5V^PWs`fOvuEcu=egu<ISNEX~FGs5x1g48`TW=H(rF|oPVg3xil<Y_?8f1Os
zX>WACvqZN@#emA`=$-CNh&z^N(U*}yni}(o-~Y1f%9q!t&1S=iB1ML*>OUibOT@`t
zW`4M#?l}EuH<;x~##Skn3d3At<F=$56azuAdjZ|b{CoCfY;WH1>w0PszeJ{-tAQ<t
zIWZ1u>9@1Y=X|W}75JrcHB=Gmobbc2Nrb_4o|=4l`ke`+1gr1Cuw%>c=HvgxYD$~#
zG5yXe(Doaz)(=M>Wr#!T3dI3eo)AqbUg6(d^m`NR=-pWl4Zr`BlpuLlmbM-#`^t3`
zk=LAAtg9+>)_gsjWZcFVB&{Q3SJkpDPk;U~z*kyOR&S_Asomqg2-f@o>A<DVRh-UB
zAge!%$B#(^;hH>w6&b+{<><k+i?9=b*gzJdGy_aGh5;3ot?jcpYIhoH(Q9USS^I)3
z(h<WN>Ia49XB*m<>7c)r@)Rf+ZVlBo4tDu%A0ACacA8C23ly_8&fo1hDMO2N-&Ps5
zceXCU_IP533h65<Gv_T&{*LJpFIULwtwbd`jG}%>jUuuic{bnt%pt?e#St`KX^fQ6
z7^?GWLo_O|TLHqFvGNrsw?@&!FBHmMYutw>V}Y1X5oO@K-&*!2uQiYz-cLd7zn@PW
z^JlS23I}5btg#oB<`J>hzqb_i<4y_M{LiUaBU+7LRGBZhyKGZy(dz@E8Vq7ZVG8U=
zrhQj_uN~xt^M&CYZ|i`@MP_#2Whxq*pVWk0%q9X05^0sQY9qmQ1DnZn5UMaa>5QK9
z=f6xWN{}WWU2;6;QFMVBF3=Qj#@u1YYhE=*u&n;*&>DbB7^j1}hp3}t?1_gIfL8mF
zp5Mmq0FhWj!QFB|^S@frKW;%&sk;s5A`<55AbMK9<r^S*FgBm3JIZ|xm!Kd>;*DWo
zHXk#U^YDqR%hjjs_rmAz${ew~oMyfm2jEAmtP`l#NBkmn6@6#X1c)r&>YB1MJHbkY
zdGHM`)_yO7I`#&l$h%zcXqpl4U|rG382JMZ5{|N{@8n8czQqbqti!b}Pd^hoZY(!^
zQ<xcxE0KS29=K`w7jwnOmK17M)Pq*Yg)TeV>ip=|ugd>K6jWb&H+PJ62!+qrE%i>E
z>Nkr1LX<s0Tv08<tElO#!pxEX((kL3AnJ(Z#QO4~4wVy;i8WX1qwyep<9N|Z*hcj4
z4=V+TwRTc!P7PtoB(Be2RcMT4+#5I02Z!vddxX=A2V2GHo^((QQ3nDEDd-`G>Ao-o
z1p;M-xPipcy>vyPul4kQlsE5%$N6!z%qv9Ag7)xS%5I!Sw1<e->qtC2IVZ;tv@E>U
zgw!43DJ&knacsY84RFbg|M}lC*Se+Kk5AtAP}J-l3-Syc^Z0IZ*fKESMg0qS9308D
z7B~XME%@Bw!b4Tu?%Dki;zjw)iKc-DlYfZDQ7OaEAyi9<Vl|SIYhN`X(Mk}uZ20tS
zvTnJ%$?ffCHE4Z)bUX1!V&(B{61!$Stvj98nGV8L)p%dt3DMM&^MNDjBs^E%;gIXQ
z6geX0DC`84cHx%n9r57$3n8N-zwU(3QAJ7zcnzz*q)?rJ8~Dj<bnqMaqr{_LinEAR
zDHU+5_~@}76==&*%gGC_b<bAdwRFzdAF-udoaN~mkVxncgio>$UaU<R!3erm?W0U8
zk;@U-#cPlMy75UV<wQ=UG%bp~eVGYMvr8w%)nvtk<}L^2V=jw&zq2zZ&ngSO9t{tQ
zafW9tj@52`5hC$n^Q9vj?&0ehOaDIFY$!_dL+vKSjwt%@x-_P-W{S--++g<t*r_l`
zzSiB=8x=al?N{+>(XWfe(4Qv0d@XVJPh#D!A=`T8`5SmB?wYxb0yyt+oofA36GY9_
z+leC3oFX$a>m4M}(iCuj7I15Sx(kCAItF($<B&o3wG>O_AUp}uk;BLG5UUpuSsh-N
zo$m|&;>(p3Ppx11pWWGHc|9>^=NCV7ac@Zk?VM)~xDPuT2&8G8{yE8+*=E>~3QbvV
zO%-mSa4#L?HJDyqcyFutC?S<4*h1>2uB@m=Y4G6*Jc&?k;n8ms%PXY}df!u7JQu_f
zbjF%-px;MNjf88Vb5ymxZ*Ab;0)!L(jvX!~`Sn$Lj!@eY0uDUIj1wi^=tiKnRVJ8>
z2QXe!yvuo?CGb6JCQoIb=^+@fwiV>gs*D>&WJ2`5VSPgZR!r98qQ;R*eQXc=R8a^R
zs{br)5>fIaiO`ngI3B2<1k!Rq=1ez+kzdvN&zpP^vgBff+i-C{)5!W~&wN@#xfctx
zO!sm9=avq7mT<b-E7i67`YW0o<q)?&l{TZn!luwGG!~ImrRwR1`JR^~CyFm}Q&{$`
zO)<Qb?u(@$PA@0Prn9JKltPL0wz>_T<`j_QK@Um_sQ!JF)jD$oz*hCVM_dT}A;TDo
zLu_D{LZME-$g7PW1Si8ET53ZvPq<tkLXgN;3mr#y1j_>5XF7&p0u-b;6tu3ycLf?%
zS@}#6giDi&4kq3P+&@0FN_5$bWBP^XKTLmAUi(?{o}gvQu2k)WTs|VH9Zz1?QFpJQ
z$VR;fX2J;ftXQjG+sC1Z74WO}jAS|5Re|aHTpa##bm)#|D>EMAfX{La{$-_0A2Xqh
z?{ejZ*OVoK&`v5j1UJY-$27!|e<?$%x6Vj#narHSccEjOVpHpMMHP>mK0W;R6ff!t
z?+d2LD%=eIH41MkczEV1@s+tQ+Ms<%M?Y*!odJ_}&=ZFo$vGZxZsS!HyHjNZfSs#b
zoO*~89&S#W-w+GZJp$^HCPFQZ7~#nKLoFj6Iy3VxoIYF4xmXFd(iNg>>b-ImRwvhd
zC<PW21SGH26QV;)v!b3l6P2pv#`!DI5(yIe#QQ=nVX3{lo{UQUITr^`_acHxtrIoA
zjx79rmVuAh2H)#WM+L|fM|jVg5lpfB>i|ia4ZyY`733Ul8Kw0mSBPP6Z|}+R$P$*2
znbsWHOUo$0wOC3@5CWm~aI}u$oqx`g%7O=YK6$OS#^{|6B91t9W+5#H+X|euHS|kB
zdvBiZ^aiZ={tMACLP5NZ8Q8V+nbfu9z&f<hVGv9At>@><zfVM;UH2;?VY9tKS-;Gc
zzpp!k5Q$rf=byK8fhFfcgM?a~Y3iLr`0ToQ32ihUCL~L}Dc96{9e1P-Pj}*Ts%`Oy
zwj;G8BLVD65Q>Gg{r-spC~4(VSkCuaR(A&5rR=G`ewsCZq00aYictRD&We>nuXiTW
zk6TrLrZHjiZ6;FiYM@RRNNl0*jT62Omg!Fpg%?ze#H2~DGpS^czZpm-@W`ku(xs@n
z`m<E9++y6u4gj<E5>@|ZjvmilEHD1)cz)N^pLQB``gtR<25hUPflD3}<L=cFyUmk^
z5ubgnd1KLy`i)IX%%W3Sb6$u>6kf3ADkB3<K+wdy1uN*m$jic{Z0l^PKFR|Qgj1yW
zAqpkk?U~BERQ5%WEBh2~30vSo3T>yOcMYx{GQck<S}5_K));*U%k-#&cx}Y?FXqP_
z#20Xg!j~P104lH?uz3_dwDX6$9xTny<OSysar9@R!pF!BaNtp$62;2MfEwo{n(;Du
zeVq5pN0p=~B72H{Kx@>3E<&S~Uq)a}>BSno9{ig)I#}@twVancdvT<M+UPwT;)N;$
zfpuqJ5*e7nnqJpZQPEMn2g*wKv-tIr;~WEQQ*U|f-Z%PYCO|x`$D~Y@&ix$y`sF8I
zBt>U@SB*dm8n^zx|M9Gw>fOfoG-^QK6wF#90e`3t!bY`v_<By|n3hzZiXCr@9=lFF
zoeu3pKs?*X(bXB|MJ11za9p(c8NVkVw#)vJwALvKxf0+eoUmaVq+P2tpU$u&O~2w)
zgrMIlx#QSG?g0!wv#;8=5^TjIk?{&Wo?cp%UrVdMDr0fD^q@Q7m(G6i)K4|Cansjr
zMz(#`+`Khhp5B=;d_dBg>gFl77LS06%i%|S7hen2`zi%X!j(wTe@%r*p8PA29#=KH
zYaO^o;DA4bCEMb&$Ejy6QY+wS$xb{!HV|KwF$e)G&&}9I4yeDq3_tO8i^=-GUjHq;
zNMuWhd{ju%8@3mH+QH~byC;dF(m&$3q3DoT&C$%kKziG(+{{X2v>3Cy8(E|kB;W++
zP6=D2Ny)ZEbYL+{Hl`{oZ_*AbZHwBi3^cf2WSFUE=?Z)<b4F1_q6^ij2pCr>mL6>(
zrE#GEnHXz|s6OuCD;a#;Tp?<KLhIL}a0MV&ibKaUY(G@Th_zi~!>^Ef2dhyRoG1aG
zF`C;;VgTuyn-WYaSzNoNl>47DOxWDEFj&&f{d22h|5gV*SKJmw`ms@i`C<0-mHLB&
zx4WUDhDmcPaX|4VoLIR}U=?%e^D`Eq+0owl47G<I1a&XL;~RZ=_4ouYrWg@Be+|vU
zMd2yZa}@EipPfiXhhXiu*?pcaJA8J>NTJmSS{+R(B?9-ZamPK4#j%uo=lCNu|9^wn
z|L=Cj|2y)piqK2{2ZR4KXZuS$Vf;9&7ZS>rlpg|Jo;%^8MWKkl#8XvCk%;>F%fd)3
zGm%ryE;x=QQ~Rex=djN&AJe9Te4c#kWYu?b9(pmZWSd9C`(C1mqJW4&Y57kF$Gfye
zz$!ITb1)SR{`Ma-kO}JE+YyLhe)ZjVXk~E1%qo(%G=uaJs>~7oogaGPgSsE3kLQio
z=Bh^$X?qh7qKxKYi^I?YCYd_gO+h|BP6M#AyI9d-S!bpBsczC1jN>`{f*Ibt)(xFg
z0h=&?V+fA1dG)AoNTyGPkR<7k_jqm^!qySVrQs&U&pB^mLEqnM@na*YU-Dd@VJMH8
z(f@o@$J}gYu=eVndD?J{ct?T)H8IY%i!@&(WB_bC`^nXl1t^xk2K0+R%HpGWk|Mw5
zU^f0T3i@<TdQl0if3$i`?QFb)Cd&VPW6Cm`9Aso-YYuBtID8H}Qh|}%k+kxCn$&;7
zpoGcH-4z<>>?B;V<dnrxlE&HF(@8h0^aD|mY<3Qu(3HQgA=-TCEi_1>qtXR#{p6CW
zp^34ixTv0*?jN@r(&oJ?nDf}{%V(I)3xdCEUFj+0fZty=%R;CHJP*%ec{yl@4E-Sc
z>=eVl2UZH;B}uO<6Gx`+zP!7LPxpS&8hCm)XM$ToYI`$?`u08Nt0sVMPSK-oux9V(
z4V|UJpDcmS(QLj*@B!E=be0vbDarA-hn$wJ`HKA>yy76}QBMN9f%zCGwA){(9@EMn
zUjl1nAwLKDJG5x$<jPjUwfvve(VLKzU}z&%psOc$9M=qpzmV6h7-q>0Ka(eT6sq0E
zZF5r=$!ilOQ27dqZ}%_2hX2IMDw;)+w9^-#o22@=PWY5H&nWu7H(_aqCvi#9dE`=P
zeF9REU{WolFMvHhgtsuBvCXU5WvapSYdq2axlrhp!pjM1gw6p6?m&uKWs?4kcfe*@
zU?9qT3@>xC39(+Jb3e4oP_mml22{=ALceJ5%WznwiSfkVc6N~e)#0d>e&sg_vol^w
zT}?J+-UFoI#{*xIf`_3`GbFOf16<1$Bo6eJgD~zh4xh0#RF{qOi1V5-B1H`Rl7+l=
zVbF_6I?cs9fV7S8=69$^b65V$zMMN>su6VI+cuKovj~3J5pu8&pHxs*j|mJy8M*Za
zx}E{ui8i#VyhN4vQYs$!;pnx2qkgoT0!_Jn4gXwK9?!SnVm&$CKPZ5VOcy@jorlJ3
zg0E81^i|=%A&s#aXNgHmTk7cBS$1z{M`W@Lm38!jPF>{8zQ?*+{Oo0Yb`X{<Dg)V`
z#Ny$Hi&VRYGfx(O@%^=~%7*k4%F3|snQoL*%J}l$d~;KX1<s2Uh%Uu}zQouE_3?pg
zrS%N@7zG0R%m9faouhtPt6@zU^oy+0nxu!Qt$k!mD}J@DzS4iW!*Up}yGTnTX9ag6
z5t^8%=>g7Xd^F@Y;i7`oS7ILU4}Wt&ujIsDNcq(ye*TIcqHAHz9K`HC9}FspZDKj4
z-~F|DufJFVoCzcyhpx$y(P@)*L`}@@CC%9pfBWdpq#}OXiyPkyqZrWda?c9;ZSwZ>
zm^d2#T^6%7!+{d0huJgDB{@;68j_Sd8iu;(r3>ZZJe@}=8;`<7(p9>vi2Zjt4}AFu
z01HuuHPy2im@3q~cojK7c3D<;<@cdPXgUc*R}u0d<R+y4z=?ZO5lMjesEV;)xI}&U
z?Sc)4o%#>ZoBt%v8B>&^UqQTR4FXcF<=fx!20l#1@YV0L>0}v)=}MH^ML%QZ^U&^y
zG;kneAfnKzkG)#2LtCL;(qIql^2fxKpZTl;M-U#emUZ$2Dg+=NrbN)(&U`j-22F`#
z3}Sx|HW%sfF2ta1*j)2E^=Yi9(8uF{y~Eh5lQAkZn+Ce14n#pRAKb<rCLyw{o9r>2
zan{^@^%@RCr2>KeyvG-#m;X;bRV+CtEy^Qr>;}~Mvx3bR-Q&4s-iUzFJm1RjlF2)m
zmBlhecf5R8%2w>CM$bGood$a4VbZceDfe7AnipKp9{uU5LWI*npwtmBNcuYM#3FqC
z;Grh0q$I5`MB;OvLVVY>ftF3bR`fFzbSM)%a_dxJ)2vZb?RBN5dpEC4LksG30!YTY
zIcE1fT;n`?>`-`3I9|?2Ct8moOza9G?!f<_gMv1CwM$!GZ~S=Lg!uRT<Xq&3et<1o
zjL<7G!@L-kiPy;{LSeW4YC`p#0V=5Xb{#c9mpZTBMFq<79}4ZRTtDz5Qrib4sm)>e
z_b9%@PrCjnj3E6n^hFvs3dXsfXVwc0ym#$f5QW@(d;;!2KX1Qx1o;`>hq2qr?qLyz
znuyc*r|3>(GMv%1xUXIj{r=ld7K1tKy&1;Jh^L^LMq|S2Km6gVK0zAfvp<&BJUy1)
z!RhLp8DsaGQT{Nhz5f0>T8f-(;i*(qC{tL_aZ?B>%K~Xk>g*ZN-8$mz@S0dsHht6n
z7Ld+>^o@vdbXPFyC2b>o+Xf(m!HoLIGluyxpHhCI+PHp224ivdfcP5QBN?hc;}iT4
zfj@*v%I)oTgMl5U9zklej=M!4v^|oOn(wMwnXlO&>Qf-;rOG&T9V`mCj5|#*&7f@b
zo=6)WxB+Z`hVmt2?PbtBgzh2yQCu`#;gcmK=71r2#tD~nMmQvv`Rm7OhQ}}H)TI|T
zO_$@#pO#W80Ebj_K2y^T#e&GW9k*S*tV?&5s~6B8zyZsdv9_w!(P;(lSDtL}VA}#w
z4VY>2Xxm*Z`y-zj%gF8sv`}uxToD)&_t?clKBw#>W3agw56hU2%c|nhSr@c$fWe+w
zz--yteeO$zOA&{ri+S;7$}yNvrH_m9oZElEh)49TxiC0yrEOmz#E$e@--eawN@B-<
zEPUWL33=(9`H`ipPd!R7f^N~0v10sFW~h*>68_LkeXmo<A<5FeLDJHXSRp&+__j<-
zXVl6`F7=?0rABqmPfbBR#awzONn#lq(aDplC5S2>h&54X_ni>YRNa^IlKIX+zBK43
zCj^s2zHpJx#k|J9nrW?CVxNLNodIiG8rGDF!8D@tAzHnG0rq~n7Of^(^P^+vajqQd
zgi`y&?yTy8@H3Ut=42UJ_|TkQq@=A?na$S*z0$`bxkw3y!$G^^D#msq@YL$OupC0w
zvEY*{Ck4o_U@=J8@wMkOtuL$u+7_(X)(6)52!0py?4MR<lm}U5n-+1!jxDg)KIK0u
zCr-llyjVL?qkRVS5X%_}iRjC(R__I!l8kNC(cd;^N<W7kXa*v{d_vh1a#~6LF~G|J
zhtmGkq}lk!YeqQl7IKQ_Hs7%fsX#pX!~o!+%;-(65pTSRrcJ@ubdb3Js``DSI+@`g
z-Q-{dFt;96%A7w`bvF{Q1vy1#=;v7#<$|Op;r+Togeq45x@cDK=cOwri7a1NJ*t_t
zb3}c?j#bbQIQhO6HdU={;1=VeMKuW{o0b`Cbwhp9`#`9+tZfm;NN!NiZ@d8fE&4{i
znl@z*_`Z5!&HU^yB<Y3uw;>}kHa!j+swTXjK%)_9EO3(lCjPD3Q7rWM$l6}M&vOL*
zAQGg%KGT$A7T=JPZ)aR1R4?Lgz`p2QR2<{_OX;+qdSF-YXHHUH8bL{py>}-4<6`FW
zir1)C%SmZEq5+!=eKqhfuCI<j3xtx&Yp8#*Oz~=1DNrwVs(^-MM-%vF0NB|Z>-U=C
z2q|$(`ZHq$PtI8R#zrw}M<efdYbzgu`Ckgmfnff}rBFmuxET4P@j}<{IYcs&+&2j4
z-MAj|&gX>?KOD|H0hvB(J+qCnuD!QE`Te>SRpSS**#yT)yElo|<|Jzy)~>E~oS>Kg
zHm{_lrChW`HW*kqB|dvs(Zl<Ra_LMMC!zB|IJE;Vw{87_9=|U^J=8MF*(6ve4s`Lh
z5edI6#8kJ!SK$x#9y?Qs)PImQ>WVRu(TtmP&@Lj+<1GpWOEbyVN8Bto*t(}Tq9UXD
zR63s@w*y#1D^yf34OH(Z>wSmzS83&NxDG1x-V7GJPqzw#up{V`?%gm!-AaA_C)%1E
zVeg0ZGe9L(?<)(K+L>wu(itf9xW3$k7FC%<%+U)ufdlbt<jZ=uwxJShjzIa==Y!}m
zR`$t(rT`6}xI|-q4-wG}SqKWwfBxYLwP5rgC4bs#!!ls?GKD6le7{kCQy`F3@FR&D
zZ6u6amS@syPsO(IE1`wg3cFub08b0?ahG}~twy?)uH-aqbg#sX_&&Y6`lN8$eG4$4
z=R`wzC%s%XOlw%-{d9e)2iE@@zWfopS34xxzKz8yghz75M3Qx;07~&HBl4Bxat%{V
za6_OHUSgdqky!I&_|YrkuN+0?fhWiguX3^8Bi_XE{F-1dGct4uM|Phy0c6i$U?JUw
z&%fk#)ialfrEE_RCvi~(&elL1SG6<_7Q8<FAyEbvp22Rj7%qfrA*Sb#m}w$$`wWxD
zf6n9Ayf!gp31bMNh*hoexGu)Lt3XX9U}6L~WyVkK)pd3T+1>yEcOd55$+dHl0%ZOA
zc39Cfx~E(`9`qMNM|@tG2v#x@x~cp#18rZ4TpKx~Tilx})fX*GItO{d7D^_7JMK?>
zR(r868#;wW0`I3UNCzwJ$tEi6YZJ^(_1(9Q7rOOht?x?iXd_H>FSGrQd!$s*!{vcL
zhx1`Xc`7)C1ev{)3n8vujp2;P7bO}VHN5b%Z#Rp9mg(oqRO)Wtt50~{x0}8{HqA0&
zBdR!Ncv1gk>cfTT4D_02?!u9o7oOV3b8wdnbsVyB=PO{wA1b`Jy%E=1OSLuv#<^I1
zOiR&w4!L)eT>cqsDtrrgW<d7OV15D@fXVulv^-M$MIf={aOCg6ASr4K1wumha)oy?
z1lYV(TTeq`zj`8PbfL}GyUh(eji+Oox%FPJw^O=iAH3ImlX(AW!N`wZbd!upLb*{a
zKf-<~#lI+%3%RuLp~BSfdAUVm%=Y374#`C3Wqc~Are2cPx*xU7vR4NC-1TgT?Qk0s
zKeIJQ@z7t%SWrs}NC<))X}-aVpzx<puf{J<>f>HhM{*(-z#+tXwM9?Ah>Xeo+{?TR
znanJiJfH*yb`PiGM!W9|UX&=WPlX@nT{%>UUgHVSaqsE(T@rRgQ97)t6`|#`1*uuf
zG7e0u6RQpPCOzKXrt7hj#cwCkfCICzp;A*Vlx2>VrcZi`8f2YB1ySLAja>7&D31;2
z9}(e~E?HE&H{vV^dU8txL+~xh(ZR0LQ^>O>#LY8g5uz;}I~+g6ez>T@sM@KKDW>Av
zpvDZO8GRVvYWhf!nc0*d7blk@eJm(HVE|W86+x1|5&vS_;TjXEuy-LF+DOaytv54C
zd^rjiArYJE>U>*KDDm-!Xi^RrM#W=6@XJX-(iO4AcYo8bA6x-=N%Z(~Ko)E7GCvit
zM$;)dTg)Y|=hp;hB!Uf9S+EAf94O%z_m%VjRRjPZ*Np@zpRDN(9OIx-2b13jJIa_s
z{9)Klk`<WXc1@QX?z`HMIBLw}L+nH}gD*6U@Il|_nEi3R<~6#vJn-h40Nk3}?spC_
z-BCrflu10hY3ZVO=`>Pol#;<F7!OJ%wVH_$I52Nu8o!gCLP;lnWsXL;zg|aC0fLpV
z(kS3Wd?f&Hp4P?+lZPZBH;VmO=ZD+i2{AB#Im-I<u*SP_&-N3m!AMxE7e8D%-CjFy
zy|6>a3496#Fn*_}&5eE`2N+K_8<{PMhtUSGWojevzx;yRn{bpZ-9gO)MIF_c!+!&=
z2X~u#i|rU%I<T$2TWErT5-fkc+^BK!;{-zk6TD67Xs~Q&)Zl_Yp#}0gX|LYlZds8{
zG%*h3zBxFMNm~CFZXt)gml?iySDF9e{){bqN0<m&nW{T%4oFS_S=%z5K~<X8i8Sc-
z+>a|Z7DA0u5&*W#`{hOxN#I%)mw&^?IbaP{izSucKUXv!@Sqb_tx&Z)4+jND8`$1O
zvC*HXxe`AOI=!Zq<m|GQW<aq#c<Mx#4#%86y>tJQx8j0>QxTZ^Kt>QwqiB*Liu?4w
z8_XtA;JkO9_iZ%QEHQJI-G0=dUG$z>H<sn=D`9eBPY%7<c*CcA5^CyV#;_<c$Ev9C
z4^-kl;}LyzCP6pb%zEF7CLN|Cu*oyu4O|P)hv9w)Tv}hxECJZB4NA=-a9-}z6UjB$
z$4<`V3qOBJi^hi;O;IZ>1h2AeNv7B9(AJV|r%CXYopH{P>BZSCxNp!PE*tE6o6P3w
zV=>*V`@v#N@iqz+o?4fp0egM)#qzjDo00)cbh4|mnO`*mci9_}t*_u=fEO$dD&Uu@
zJAbqY-&Iv9e>Ya*`DTWqG7rH_k7Q7N(*>1(D~G@Y=WEY-|C5GwkYvRi|MGSltymy8
zA9Z9dsI|_JXPShWr`Q%C)Hd>MSJ>8E_}+-t`}`(Vlg5d>!~3okC1!wYfoJpRC+xgR
zI}LBtJc}gnR+=vHMdmWt>C=6$zN!=Xy9U=j3~!-XKA?2XwE@+9aiZf9sLNBXfXHoC
z(G796oV$KhkDRL%z^;n*pPbJhRUHFzRu$L;4dq{wSWmV`+e7ekCKMCXUo3>^CC0B@
z2mjRjt{5n?{C_}#Z~qq%Fycr5Bk149c8xY|5XA6p{Gl!UB)31dGIgIX__4iv9>hiU
zls2*R&ub$nboSlwTehsA!erQc7~8PFJmU$5yKvJ_`c0_GEXkEPQ8chc3Wpt`_U^U!
z-;ZG8qXvnjfc<E(<f4MlLn{Av-qo6Y<VHM4x|qPC(~FdOT258g0%wWyh^z969AUU#
z`fTy){_%}P0HBn`IxL__@N84bW^B99h42wS&ApnBs3G=`4jxS)>!HmmOG@-O8Cvi)
z{DajtTfhO!d}RiN^^H%>SF@1n_|eB6w1$FUV2%F5<3_03qIZ?%@MXT6ycDDKgH@7<
ztqg>7onwgJAS(HMJ(vMqW$ywXv{wUq;3*Wdg6S?5RIg{6HJViUz5RCqM{uSwX^(6p
zFE2y&08ZwYLCD9Kbj1gXjdk`ZJxP@SDKzoDjy@1fcRsDQU_{7q@jWpnPRPu0e=~x3
zHHUnBh%cBgZ`E?Dk2KERYClf9c2DYYxzHj?1;xj(`N%-lh1>@G@0@6Cm)`G>W3c4@
zF48(w(b(X*c3jeuRjM-aDjeNu$hL<)b3L-LGSKi}D^5Mi#oqmUM>{Td2Ti@qO)cBA
zeGS(c?8JC{5#Jd(ptc4to;<|AsO`+2OGCMkB(d0qo5{IxL6J0n+rNV+Hl1&Wv1Pf_
zKzi?PG=9=CB>&oXG7#)qoWQ5w1$L#;or*3!$WB#;c=9at)3FPFttRw@#l<b?FYMZg
z0(s|Er`@VAN|GoB3G&nBn}vGKF(0P}-HaKIBWD2xl$Nwvl6L!B0MA(UKBx}CEL8u6
z4vqd2RTkvAeo?U3`SEwMGH?M6ZVMMF_}l4d>Iq{^J0{JZfP?dbEt@VgF+y&gm&jLa
zzec9&ZWX5}V?L$?C~%R3Yj3PsH|CD~pp`X8%vn&6n8_$NXM`0Y3*j3DcNEL%U-#7n
zm-}i@xfl#q_ul=?)Y|>F7DB|xcXpMf4A~iZvQi-r8T~GR_G^0|ZZHNZn!T;h>4wle
z&dbxWXi!ZBVWm+Fz_vN3Pz9FuCY6t!QCB1#WKNh6eWq*XFMms?GF-A#S>tv+3kTPd
zDnrUW_OS2|jzR6lwH*!E1fN`pL@2EBlG5ZHH(2j^>yeP?6O*Bp=aw`F)<!Tb#bg8>
zOhd)l&!3&E>=wMN`yb@6gE{s_OE=_fwcP91KjoMvfa){w+3nqy`b<Q=4l(En8ZJU2
zy2+ibe)Dy=mN+%8S6A)Xc=QBZ5F!#Lky=-C-@teKW`~JVVnN;uI00Y{l@5xJ&OAzx
za>qE<l41@H*X?)@iOMf#M8O{G|5T2<pxWJQ>)80ErnoNSjx7wlPV{s|zY-xcQF3Pn
zh?vY_TAGlV3eEGM{CC}TBWRuUQp+UeFYsu)dMjSgrK9%(_G#g7r$9uk_yVfveLPfC
zy5_{Hxe6l6A|IxSHH0ftb_T8q+F-l=RX#4R_X-CWxhf)~+HoiF(i~Rp={-xkm@J>Y
z0qK*2ptfV2VY&H1gFMkoyBv`)9p26`nb7LNLsxV?J~1heK`|q@%Z>4EgTojlmZMK6
zX{#%SZZW2V{xfo-_aXtMhun;ijAv`~={#nd72r*yUN@c1H_ULewTrtPc~17_mo%u)
z&UrT(c1;i@gn5{>f?r{9^i=B0MKf&_Qa&AIqP3?UvnH5IT1$@xnM}BZ95B{ruKg{g
z3`7o!uuH5RP&ZR=(h<JiGCiqY7B>@vVga!POWJr(>^%OIq)_NwiC48EWW;7Afft^P
z8ZRJ+>M?J&YUF|pq*`hv{!cll9YVxd(`W9R1>P75mw^Md*{}hNsmgpladyg#MkmBd
z_H~wRyqDePLSZE_3m&P&;Pc)AoP!QtxDhTo_cEsytQ=)$p2w7iTqPW2TI=)K-Vyp3
z3B~Ji`~VKbr98BQ8Ok<5EiZN0kUpkhS&{0+sG<B2-V1`%vmu43DEuSkRp=)QRs8Id
zcYyRV&xC;)-gy$UwS<#IpH7Lj+mq^bg_!{Q^>2hWYR1?&Cf=MCDP*cBqplbydS_OJ
z{XdgcrUpeGxbp{Em;(+R!V@@P5VfZ5CnlCQ-eo3_DdLle9w4ddq#gS{bY%jXqpj!F
z0-w_w4SO}6w6DH}w@E))^{yOTW(MHpb?v4tpqkYBL9+yi8MIgF013iJQle%D3p$VS
zB5HxVE~o^JBJkxDXV2yT!P<LA!}&#R!$^^+(L2$5?@W;BJ&4|W^v>uKQAUg2ONbi1
zx6wz7PILxCh=?+3^mmZo^M31F-}m1$e^|3DxBHy4&)Mgkz4vupnomZDs0T%tyu^;>
zYtDOnRFW(RN(4tl2D=6j)sJs?<Nus!XF0&uV96G9hMf_AN0vh56+Elf3`gW^M1y%r
zo?2Ynp#L{sM=Nwj6)<TMU+{1Jt6u$)Z_-y9xmx8fjnDHfO`0LK=$we3kA^q8TLY&~
z@`EU9ziB>T660k-XJOYOb;wAMKO%*`&YKU^c>HnI*^gtF@Y;;XN%SsCS}X;>L-8PL
z;_hnY5M^_Bzcgcleif5#jh-h%)XdTUe-+UY`g)aR2)sZPvvD@t5HHAe47#1Qzk$$N
z>=1j5k?EKnezH&`e8bk+IS`6J^vwJ5C!Ft@jRWJwzHni>T=qO0k~c@!7;>oXM_1F^
zSV*0L_j)-<&VJyQYuIVqhx-$bvmY*fM}X#B&Nt}^qD#&P4;3@kmUv|VY^hyV@i|(Z
zsYTtI9KT6Et2kC@#CSlD$uh;o8~2GC$|#qIlwTm@I=w6sRtP)&{ScOniL+}FX9MpP
z;#W?pF^yow14ruLP7X9`-;=K)IVTEeHg+87sr%U31yK8oO2O(!hFhC<b~Yh5nyg9>
zZ*YPZ?wszey%H@DOjtmb2QbDKo>tO4v~TshW8hj=pY438pq5J$q}Jfc{ceYZY7aPT
z*3UaAeZ`XKL~rCUL24<#HB5;yr3UpU_nF#YP~8{*qX2$~>Bhlm1C>K@`^d!Yl(wnM
zQ!m?PpHx{PfS2(8eH)$KA{{AfOkT0f{V@T!^SxzJ11JE!n5V)3TZ06nenz2c9nuJ&
zmn5}&{)7xsfKr%i)QgylJbugk&OvCdat@c&zhnO96IGw8h*p;S{h@WMBZ~R2LbMY^
z8g`lC6$KeTw=Uf2nj>1ZZ+$DhX0>hYs4gQacec6|O!pf>+Sn({GYsnZe{%@^r8Qyv
zyabOr{0VIT#P88*3z7WoQu+>Nf$NPl{qn2ps>!u)HpqS$c__e%E*kubaVxYHcOP5{
zQ5KUYm|dz^sxN29PyQ-Z?NlI_c9sFL`;4EHK5tnJO7h9w<bdaY@!|WCR6g>fl7{=)
zNx1?*+B)-YiXsK2?7}?q7Y#wP>V8%+EAQ665B*%d{P{!hl8$`@UeKilE6-!n`>8JO
z9Cxad25OmRO(+Tu;-3xj(n>4YZYApFuh72>00(7R!;P|m=7z7z6Vr4h1yjbX1XgtE
zwXIp<EG{s8MA(f*qj#EBT(@8gcA&RrMQ@zm%8Vmudv|r}RCM}*k#8ako6awIrBl4!
z9F{~dh0aP~>q&L2X-^Mb_JgYH1Oj#ie{1oSSj~8|wBVI!RPfbAx9=Ik3?C-U!Q^eP
z=!*Zcg1+ryuy_4Dkp)Y+k5$7Y@AjBAeAIOrsAap;Y?KBxUkvAG)z+JrDNMffdBEt2
z&$Z^H2uvcf6;u}+v#9TYGr?6U<C4$byp(#=`P7_@hDPs7jh5{i=d%ETYJFEo1gzJj
zw?J-4{nel9S5Nd33;G-C3XYd<^TODDR{&!P@;m}AUfVTIduJY82~JirkFk$DGfAjb
z2%$Lw(A@pngJ9GtBba}Uan_a|s;QsP&4JEN&*Ou)RLi#Mr;ORGt;VKwbpalw%<pe|
z@Up)J*qt8ZWSd{>{i9rbM%_XP-Htwcqb)mrY_Q*u9qBcPRxWx$pALO@Lb2-AhE5d-
zTk}}gMf>)RDhqt&grqVT@OA9s1?C4IY=wXH5Y$q$-J)%9nZ^Y%^|KK&PWavqBNSXn
zh4j^HuhXYU97(ml{mIs93S<^URJ^S>?u*od*MO}RXu&h;?L=jJW4;rUSy|W4^p7s`
z(+fki{o_72>%{==g9V43PDMb5k(0@*nj=*`s-|hBx)56<zg@d)Xbuo+egNgK*+rT`
zHFUFv#{`$V*0Fo%n;cVQ_+Q}e=ZeKcE>R7?kM%IdkI6pp7=02N5^K#n5`+@D;|ot-
z^aPCOFQAs^^dR1$t?xweqUr02-H++x37VkD){8j1W>g#riXX?0UnF5`ku%~gHH-Wu
z#5ZLp`m>SK>7e!xj6SRrLJ5B-*nXV33;EL^f)x_)5IU)7A>$1A>P5W<H5x7SX(6hd
zy8qb0Zq;59;i5U9d`0(vUxfP=Ei1}^>VU$o^t6plD)`{&vu1HYy`|kena*F&eWkVY
zYYQ2QkUe{wHqBDa`ih&-Q%EGB?6Vl<wD`^7%}aF6Hjm@nLNdW2Kcm3QLwU`eo=5Hq
zfD<(;StKR#l1kUxf{J~ad5Gvz<?nUIldd&i9aS$6f0OpBk9Z(K!|98kYh?0(brBNj
zsTUeJ?j9+_|128rrfm2vRw;T}y2n*m#?|>zwzc-8h2s&oN>ehQevB%UP_GMCH?Ofu
z+#l7>-$ImD8Pq~<p3R0)GT&;1aUg=v{kpYf2wLU?brvV}_S?x`v%<^HWHmLs#eFV;
zCNrh&F`XC3uyOD;^M@G3IY#sPMIUYJ?E>qQ6yRJ(Wxbf+#1fbN-c-rwm6OgCU>|28
zW{v`k#5QyBeFSgh-p6%=c*p|>-j;5@gw^+8?m{b*Yl*@kQ5zmC);gKP%M#+WW5$`c
zp=?RK8!Q<0$^v+tv5Xp4mCb6z6#XG}jC~Owr?t0Zf!hiN@dEt99|N^lSHECq6+ba1
z`=)<6^`5t_qxc4PW7v+^#1SlDb#t#DXN6!t-OvMUF@&wDC4eI|9?Meyo3Ep_Y^-ER
zG90{9kuB&OBQIYV&>9{YGNkwfwJUNH;_F$N4aXqag20%od-%LhIN|Tonwww55?jf4
zZHtAHZoLw!UAh#3GYl&p^wFWCZ6(QrJ7gUjVY&>QdAS1yR4VlU^B$7TJg#mMj<i{n
zev@L9$|eHfqZyfDc46CCZ%d*=ou@vNOXMJ5kXKT(Wux!DT=V5zo8c#d=%l<X+%Y}s
zz&>w!&>5yernF?Hyxhau4Zp3_t>`8(pP^Xb=xOPXQfyb|J8sPJT{L0AuruAYdbTzf
z|ArUdzQvY#QcohZ+&^%LmZ?0tyqgZpC9Z^l5ew{TEnAlwiDRb@lr^RB5WOnmT<Bv;
zpVr7X@3ZFg9nM674l)~hqp|g&o)qiK969g>@Z|E%P3XsjaMdJ%M$~^)lk>;+{>Uov
z<_$}(-@KKLUJ%m=n#}(c1{OAj3HT35@WQQ#GeXB~%#Zv2V$5I~y*`6T#vBP%(!txi
zq+O9`(58^_oPFJAOqICs=JKy4yI-b&72ig`n@|M*L@UogHij8;PO!+D8ssJE|Kr)V
z&R}sNlh{btHZ{+z3#;RPGDbH$@q#+?z>XfOyw_Q|GuLY&^2<|%Br-c5_=rJpth9X_
z`3f^0DAto1Qol8-P?=!I+x8Xek%gzkD16%UmK9#X<+ks$|28ebGzA!<<a1-@#4XRT
zi08!A*F8J{TY9PZ8qaUz&p0d^h4Ilpf;~2kv()Vq%t@sMw$d*)5#D@J@sY0-LyrH>
z`HX3UQhu_8#wQt=jtzBA^$}Z;&ydbKBT8F-cD6|4RxS%Da=js`NS#Hi<@z4g3Nhb9
zVM_seZL2&Ba|8dz{hWkDtY}e<0Q$$8e~$|8xf)%@EQT+3pgcIj(Xz$G(%>cX*R5n5
zPckemg>D4NqD}}0$OjeR7BGUQfQ+aDxxEv_97Sx5H;tW2P3Exj^0Knw5mj#5Tgt6C
z$Pq{&Aj~RNjbZ%a@@w=lLZA-{Siq~C%farPTl1uaZ>Bfmur%U2eJnxNaZ_Thg$hK$
z4F0jmX6k0p@k-YzZlt>FCQEB%67T`6thiore>|9WDGU&c1Cs1H+fx_kdGlZwT>>ks
zlhR@Ih^%aAJh@h*9PAup@&=<c>g%$3j36%Onj*N6ErA2hpr4Wdju(Ci#^}MSnD>2w
z`TCysD7@CuOu+bMt^a#8LLE?MGO*Ud2S>&!RP=ia(4Wq!yG3@v9pI3;j?Kj&R4l@1
z<n7_XAJ{C#aQTu=9?;>Th5tg%?U~B^pKwn=`}(BAM{W;<@~YcWEgU$g2v+99AG8~R
zuvWPie8?>M%s99<y?EI8^nQq4PRln^!(>l*@j~K$;-}Xv*(iehjb6s98N;3o!e5q2
zgj{pAe0JiUvD4t*t*N}M2eOjHyuRv!UbhzF+_9_2Mowd=vXP+ZLe`$Wkw}!}f=z$I
zW3~n%MAqI7zyD~iK8QQXKAvm7-7(%#BjyDqDQ6gAI6cz)R0;JA-oJzckm;dd+C#MW
z)uvr?<RTbSAnk5HnBgos;+xm!67)kJ6+jB7`*5Q-DRi^q@(v75L-tjYlG<Xqzf_}c
zUgA&k8D~IN+BM@1AO+}bh3xg6Cu(|KU&~}EV@w+ld&`2*j0sLiUWJ`mgH1bcvE2+z
zQSj2DGn%!on0lpHu#U>ln)*9F4IyOp#~7l{rLKC-Fi3cxLzr1qGq44IapP~5Zj2sC
z{hT_jMM^!ix$X!1IoJQg{BQWuZ#d35zC%k2zutzjKr0{Ib8zK_B&gA3AT-HY=;K6g
zAm%zy8m0-152_}jH*)l}T3mF2O(Up;1AhL+yiYFJz5lhvox~vetZv(#n?F5-s%AsD
zuwj7hjp#<oIi$3@=HLSQ+a;r&fWtzlUoRQx?2W;l2t?-?fZ?G&`vDA>2=s<Iv`*;m
zN`tJ8X^%U3U=TYu6KWGf!-9jgompODRrmWGO-*R_FZ(f+if85n>V(yJKD8WVI;k~J
zU4ZoidwwwgwYfLNgCbFy=0nzGc(X=yvdOQUY+$<ydShJq5d94}^}-|)<5F-O6Q?Q|
z`w3-_P|a5eZ;niTr)n*vHHMSL{rx`DBTth$q0qaG#{j89i=WId)LlcRtNfg<VJ$zm
z%3p^JLROzZl-GP1L+e0aF*q)k@3TzaKee#E$!Z%Vs~6rohG1)+Cy3rf`Wzcty?7tC
zIG;7G&E7a^;YQXu6NDh0PgO<vg_T-s1q!1?e}yh*(DIsJtcnjM_M#8)V6X|ydDTa9
zCxb>p@2|QQ&o+8UVq2KGz>NYMgB-EP+m=Z0b~flY@vuL$VXz|^XF5xU81|oEpUoB5
zf&vlO{R*ce65O{ib`jb%$@fLCi(Gy5nSLU&5d|YI0sBGFGhlO)YMom>ciPaZG)&ij
z(g8{+$NA)C9Ie}leo6t$LmSjH>V_CCGXM~Xbw(Ynu9A*qd^FEs+hYbXKAq?4WlG6A
zs80xebd&%X@>{+mjBTPy<O%b=E+#9-iy5G;zjy+J;3v<S;5V*{Slme^vF>(WrOEoQ
zhiJEN<^J>{f)SSS>tT;!5On;GRywjazNjCyl9Qt=_gOCsWe+<n`9`ty_Kf2<gdc_?
zR9jX=o?Th3k;)v7*UA@VcddZJj#``+s;y1CS&p1t$bVPI`iR0&==bZ;)6@|v_t)@&
zZaYsIO}65gc%AN+U+tOuPRrX0!FN_IYyAD5s#0`FC-v|zL7Y_M#dq%c$8Xq^<K4`4
zSu;}qacrqp2&t@Q*)Hsul>Z9~-RL`HuV;BKJ$g0YF#0d5bz4aTb=V@Zi2nCCQUd>h
z(k#sXqY(T5`PEI8F)9z@4uuqaRlW~Tl1AL7LhmlM_IE(N|MjocUgDhhY?I=AHy09~
z+b`VXUSUu_=An7Spa-JKTdTy+>y9B<D~abYy+humToavEF8NV<xZ8Tozp96OXSrS+
zx~By@wF}(Ud=3=-Pnk5|zmcHH`LE|2Q~zR5;eMn!bZ}h3-Fl(>DMbeX2|ts1O4LE+
z;&V-tEPaYfwol>fOWGe%p274cqDcQ4h$Plh0bF&HJEVQ%k8<x=YVwWENqI-jr)bHh
zZ#ZSuQ7Pp+s+ejs>{2Xza2*>*a0mMa^QNDMkZx;@rr1`ckR+5%Q?R)S|BTN0trlJY
zK?k|ZVewA$rS04U0HroFtz9Ra5E`8OBC0Kn9mK+?MyNc>pQG8L@OZqg`DORYWK3hA
zr>4<mQYHr88-Gir9F0vFb`pB*a=_>a-v^-FlADRR^NuV=)KqxDq;Hd=z2d~azJc7#
zcMU$wX>ypX9KK{XGPl8f6wkSF-b!1v(N))~7j`^M=@bq4j52{*6D^U~*FzY@n<Bu-
zG57|@`9kbkpV(p6sD-Xl3$HRpd!qI!E0%=<sPT7509Epllz2pk_Bz~H>7n43YR1iv
zWp5a-Qu20)4dvG<luVwNs_Pw{(_(oyOJOD&Hja=rJNIQ*B(hxcUo$s{Mgjc=VVmR;
z`l7)dXBY|Xp(q659YpB^P4<%Q{}lyCHTiCL9%t-U`RZ_!nXSb|u9kbG6bQxAIhia?
z$-W%HBldS)p)<Fkn1Pm2G9JUYo?>rRpCPj3htGHlAo`p3m4Cb%n;)5<x$^o0p&|9!
zVluyTg#o~*?uBI=teotT(`JKH9p)6#R9g?Mqs5SlSFWW7j?xw1&J~0s4=k>3Z07vV
zQSK`RKHIK8)uDsZw$WK}Y%{FH)D7a75elO)unBXi47`6E6M0CMN}%{-UD!&%)vvRZ
z!MKVg6Ub!a&r<}^ckN0#-Ot*~I1oL3PhQ8BWJ+yUiQ(jMG{J<4H2%FOAbn2ASqU}X
z7>zHxS_FX&QM<1U8ITn~=#+?=?un-Pi`K~YnF>=@21!*uDgFMGkfzP(Z9uDp?Rj0)
zIq&nD+c3x?4db2t^Bo+&Rf!&2)}2_6Syx!@*SeQ@CyMwV>!kX()F!kEl~&;(GvMWL
zCdgAs%vhe3&56Q+!ieM3S*Egb#UOGrqo5xl1A?Nq<kdYy+3)=kq_irknaTUaGha3P
zX=ID_<4kg1pllL9YHDm_>W(vpNe)=2qin-=y8px&!B|O#b2q;YCZnnC`EFmhaF2ac
z23V2T7Sy(GVHuk^fhWEP&L!zac3+7hh@!{nG83$y4kgfH6>UlP|G6&)+IW<~XcR-i
z_Sesd;<?8%j(1omNponU=dSHWDu&r^hBtR^Mp-O<{CWTroKohi+F6r)AX1td?8$d>
zSTr?ySA3-dqOj8BJ9D+2(V}nTigGy}baXSWO-0ZQuab8~qDULJa@0;~*Q@rvB;vf&
z>bgON(wusv^&s4T@rpuYkTgwKp*h~0fN*)!`BA5^o6fIzqN(wtsb(J>_}6q{-c?tt
z)Q`hV@MH_9JeV*Ib8k5&?OPe(YAZY7)N5=|DXvujfg+62(MubS_wbLrH|7=U4GlbA
zOmW3$)S*pn$c<6184ZyvQ}vvEp#r8&08;RHaf=_7*2V*8y}Z1B22-ca2N5fP=|8(u
ze5xB1vJ%j(|E&LEnr|@0KK8t|^oJ-&uvlE|kHx?~qW*_SRwjipOR`2yNH-F(R|V8~
z^|LuZT!{OreB={Q?L$3Zw0yz{+>m(2xVDO4NlNoNkB*z+1z-Nx=e^7<o=hzSBJQQf
z>_V4V3q)FO%|Ifz@<a}sOFss;^Ag2mPOtN8X(@}`>y6hVN1t%;Sul<f6Z1ul(Zq;3
z6w^DhT9SvN)ixUy<P{7u<jDbYWDijce8T0dQEfPoOA#wf|HDW}sV5zySwTq;HO=`@
zgo~}HoOAZgaCztsSRNN5u-o{GkUNRyGoB2RUE6zK>tGB8aF&tC*ISIEryCbnXb|V+
z@1{RI*j>e$;m%){L$DKpaVXwF18V2ELJS(u{5QFn;X!{iojzO8qBWW<l<{ftD(+`1
z0bbUhnhxd8l;7>I17?x!?*$=0n6}41H5I&HjW77R;mf|0szbk+%5vlr@1&Sog&|JI
zU=#xmiDHJEscUMz5H{x?Xa~HRHISDBk85Yl!kt-Ti-?EBlYqHOaWWZ3ci<e0IOWT7
z#ahrf^+=>K1C%=p14TJmBy~1i9dk_4;5C!xI(?A!1VszchKm`jYfFUg3nWrFW~I+p
zjkH5f7|<oPyoNX;z7iL&H2)J309aD_MLiB=fqRb4awXH(|JJ<QqX_5gR%)s_*S!IX
z#JV2K0t}&7QJv41Q8w$dfQ|mZXK(&Y)gkFzhsE2)W<enO4E5OoZ$L!WnA_QAG$){I
zz96@Yp=bEsaT%(B;~sUp1C*{ItG^Rvy2PxEj7EIkgg0H}!XNJk35H-DZaJRq&S9Vk
z_4D_HnD@T<BV@B0S`RQ&Vce%Wx1x&695S=@;qnl^BC4m4(j_p;$Hm*HO$$+Cf}5f7
z*Q7Nf?Q~i(Q{`um{3Z<=e(D{8ATG(j#F98omtI=TP5D6ZX^9}nAYl+hq)Zw62pxD6
z&%djG?_i}*V}UjqqmG2dX9JXd4N6W{780k)K}soXVMO1IaonUTk-og;K@BI1opoZ%
zUm$YVCm#BtGx%t3iFz?r6=3&+_v#tqXlK?P`?X_B*rGnV^Ec(T<6?eCWIjZ3F~5!X
zc-t`)d23FEbcS%m)4SAyBLR;y9YW$=U2jNBcJB*xO|U^>1(@+hNfur$A7ECV`LZI}
zvNhIg(01yPcgUTrT2r0cf}X;DfC{OL2FV3lr6|DUqWA9jHqB_&yH;yCLXSoSp56D4
z=U3H9V3^IPGQ7pu`e+<PE!F&I`uIZPc5!F<gr_yH!uXP=HauA2@^<4}ZMoT$gm(*}
zmvE)PwRSh5K3e0k*i-{$bX{1ed!r+FgDtTJ5|niBQIp9qHA;DnY`OwxSjn(LZ)`yp
zz>4_kl7g-xJB<R^yLoh=JIdXfq@(7F?S?GloV-uHGz1@aSe0gRu;~7xawu4R=<$gM
zYw`J`uY>`GgLYR24GDc^?$&sRd=D^}@T+RP_HL)aq+zmwyni6QtCZ6#kpRWul-Q7e
zAp3L9g6uWHwh3y~K;(bV4Twwr%eftn{dd&5;{A6HJM=pD^g6p9x@-y2JvlROUs?9=
zxqSf39|qqgi^=Zz8^c=dcnz=vrbZgi*b{x9FSn4BFZ-i@)F_ldc}*87BGmE(Uz%JK
z{8I3egIup=767U{olMStyLWs^LIV<0%|1~aezdBoTl^y5a@)TQd$Wh{t*I$v^h5pr
zx3VAa#bg$qqwGq39tqX*oP4MgsZ@~U=RMl9N&p|{`MmnR=*ZK0G+b5~n#K2!(l3SK
zI`#XClDD6fNx&X*14e0Wh;;m<@1;2`E6b^^n&^+%)kEe-NclD$i1#zkGjnNS=trW+
zLtR6{aTEmoqlEs=hNal>V)4$ye^Ht2U^FtZ+W#%DuONEbv3wEG&k}uIKp^IQrG|Hx
z#jx=~a5c$)*>1kJZdC7QkzUXN?cP^`XBKZUX4mI~t|=apSRF^MxMS&PGn>F6E0Kr<
zR=4ly+qP~t&AK{^%EY7gb|j8vM1^0ymxo1+wWl;)tqLv&ofpD)HvKSqPR>pEm3y)s
zmOruD5QIJ&D&N|*5gA?@;5tqUT??M~gcWL*f1~X4G_;GNTIaui-Kr1;LgmTEra6Gl
zdWm<jb$oYm#(<VJiqGqHp((>w<n%_}N@~Jm*L0_$Fi$VmpG%!BXw?pwSFRRNAdAw`
zmqaJcQiUJaLRuUMfwsGwM$9BN$Xbm=f71N)#xq$V?WffwlYV7w+(#`BSoHNNtZ*e+
zd{$`tm@;2kxUKu*WTd9FN(me%UKzMBnx8!1YR~LYQ>L*2wLLgLjjRNvS?X$!u<fKE
zcr?FBb@Aynhy=?`&_6Vtm?9B~SwHeeQX8J;V2DBBZRNSFd9>6}Aw#OgXfzS(R5<zL
zr&ifO!=7Q4gNGk`1msBcCui8@J}e}HV^~>i*_UXH-e~$8uFdZxLEQ(%@|cO&pxJlt
z$Sa4Nv5r>}!iH%H%uU~v0Jq=39|&8^^#NznK2|Dao@(xHZtX0~s$igyTaN!Zkt%Mb
zttRM)pmjm&LtZrsO9A}WoR{o2mBy1>aC!-Cs|{*t2}>fv-Ol<cCdWQ&gU33IPaxem
zmxVelYU`Q5$avq!L(eS)SN`3M;TB<n%h+akt6LQmeB^CSv}!nN-PJmE9af(7>aQ~-
zmWPf>H^y=VD1)`sBiD1?kLchmT9%*|E#P?g!y?IQ6Mb((8;w|tMdp0sa*w87Ju~_k
z_8R5Gi@2Z&c?2UgErTwt+;Cxkx`IlynW;3q#;jKzQuBk-=TG)0!$jBwhXZr=C5O+P
zb`|f@Fa$E(V(75ka3jWOy?F~DnFuz1les^UguCB-qhq1nhP)Lzhw1@S2Q`)oE_NUw
zbClZ=vatFUf(P2eaL1rhI%xa#_$O@7Eanb<YC<(ejyT}v_k<i$KOzPwp8q;LXVCGs
zLi_*y4DYpf<FyeqqxMNZO^vrFtape5QCcJ=+%5#KQ}$OKHAn<rI(F~+(i>eGa^Lf2
zh5vfm^aj>8bt1U;6%T*6Gaz%h<i;q`(J-e0W~*<Hd<}ynNV>I6p1?vp)nIK1^aDC2
zXf{CmfnJWL<vB+dFso7^laU-?f3-OD%HQ0$enrtM_!0k?mOm#Q^iYr}^ZrN@CFrcn
zhrRx&fx(tD2`GE9*q6QQ)eE`&bGZG{z$nm1_HGvrmuruJOo?5c&wi-5z0z1WcX8s2
zi@8+k8l<||%npBSynP{Ay3%kW>Ba{PG(XCNXjnEngy_!78V0T>0T&8y1{l-*uT-Q^
ztoVPo+5U8Cd~Z8;+S)P?+A<zq{vAKY7DX?t8i}BDP&t2mB=_uY^PRX=-TCt)Ketsp
z&s%9AEoMGeHT|!T?^sORR-(lA>!BIL#s%YVQ=Zch-|I2Z*#axvAj3+6`G#?GVtXyL
zLR@PFvU@fIn%Zi)nP3(+6c?Er7pJ_@hbfCsootCy?QCkLYr_Whk1pW3;Z|5;zK4IB
zZ{`#L{8cVhICW?Ct)*mHz+SaSg@5C35mN96qe4o3@8cdQDObX`A6KzNbZ}z5toBe$
zUoJ;!?B&*oM*d4p6JthDXsbpF-un2ns@?<28PV_-*+hzVvO9>r0W7wbFr+r<{`c3-
zprf`MY#I8h`S#biQe7hy)aZSDQ;F@mxx7|5t%2R^VfHku9d5tU7GS}#08NO*8*8vG
z|ADQ-hk(Sd=pP-HOoL~L$WS@0vmgntjy$s<c8K<70x7(al78tB6EpcuK=jEU<L*q@
zlFOZ^0`E?2$I8SqwD5=o385A-r1g@_G*Ss#xX7eL8L2?5Tm(C|QcScmilqy1OxZf<
zuhx@is6RRhxrD&kL25!Nz(+$vLywU9WH;hz!djG*pgmJlVU@cL8d$W4pt|{dh7pl7
zeHHipn|y!U#6SC}96kjWH5w!IitRJtJxt(oQVLlD(2&GrnzL}6*o^+14!wHtHXDfd
zv^z5W#LZhI$Ta%j$Oe(C52?V;`_-E1Y1#=)l*zk9qC5oQbu73cDsclZS<+JpgN#Lf
zEV7{I0Y{g!ASRUz>QU6}$$Vx{O5^+9Jr*ENeUac4fJ>sxQR;s9dvLqKQ508r(qXYk
z)hH`*BlPkV&j}Tt$Fv{<YgbhSU|elMA|bJ!UX{ph59{XQ;+#chKp89sp)pgWrH%1m
zODVil)-;=VE{Wfp*p)ihw?qJS^M6)KaTxxjJl*})1WPGzDH)hddQfT*ZJd<7=oyf^
zPYqY-bb|RHZM5sa3b=|vtkD<`9;7`$kts3*I5dazQ{XKgJ^6_uE!-!T$kKz}`bqi+
z>S|>qmR!JAi}`@?yr$8)W`VJsy_y98uX3t0(U0&$(D+#bP)~K}vh~$3JpBzQ_S%H&
z>%`pSs83ooNk6`+DuuWcR=bNB;>IPpE64lKOZ(QA9BK)P2=1l<pL}_VY1bKESLX60
z^!Hl!`)X$R#ZgID^_AiMIjqG|ybl&22uJCR6=I2&VU6I!<wP~tOF#cWf~W!bm<JsG
z$z3d8%m0$OVeJ&6ft!ls$^|Fg%PY7JWTWrRAAy)CDo4^stOa-!9!f}QnX0k&!}+X{
zuC)lYOcj-Y^hOk>!c%H2=e_39!&)X82<fG-82W#*8Sdt$)GTYra~C|$$M~b8MZFV=
zKudf1Qmz?(>(^TNmCYE*`Mu#EjNj3z(CnuHmpjvAqd~KDs8adL6mumWXIQ=5T;;f&
znFA^z3Z>i_`i2cNVUUG|h43uk3B)2GSCKBU^Y0s1IzfyjxEvTGZ?e!qc~An#!lSNe
zx-1V?gCzU3S`E$Y^W1x#dX_J;c63HzAAGI18-B(Cdx{kU-G$@knIqH{zc?BlF2BA?
zW-@Ynlsda9%ZNHNgrlbQAq^`o4&SE*VI}MYiZ=obMhU)LB0s_Dm*?QP*HI-9$#-~D
zfETvcY*3-hz`Q)oxLw>K@~XLHpq>&1Kd@qgXKx+<@GVI2$yejd6mp!q=omt1om{q~
z7-F~Eg7avI5|q;Q8^r{I^yhIeAsQ{=DBasE!d^QLB>bHq5AqBgOZJ!#_Hz<Bst5wn
zl4;wJG&Wf68#KI1c~Sx!jjk#t;g{BoM#GiqY#+MMA!OL$8i8$j7I6-WV5$$^Hn27;
zN*O8pb{9l_yi20=^Y!fei-+M?(Z8?Lah(RPGqe<u*C|0n{e*p1{(4N9`DA17sxZT(
z;OUAe`h8j9n>%AD=w`mLA-w}$S&9Hzdb)frc)Ii}Tqn|BM6$%*zARHh*`hf>wd!eZ
zmKc;(#Mih6$dR6gtOB_u-8+(GsclKU`(36&buxU}K?UGI!F1-iG<rg;`Dqc4^Vt`+
z2Kp~dh3Zd++?ruiTxNE6lM$(SXn_gl$<9F+oAqmqy|<d&Mj33|W~5rgBEiRmXu*vY
z0)>tXTIY%pKlYkL6~JDk{zXIi6bHqH<Z*%(KgRPXQwrf2XLEg%gNEC<-gqMJQU2&{
z-$&AHJPtcr+0k)#kA$VnJ90b@R#RNk(-80|SAQeiIFG$2Ann)1y`x8vR+%rOtWrE1
zDO(`xrC&^DD>iV9xXYJt#{n01J`;*NJJKkcI|_IC!A#3Lg95#Kf1AOCEqY$R<c5g^
zdB*v6(H3@c5t(MR(y>0X9B@D~l<DBrwSwv(1iE09ch~X`Vr=;EbAQ0L^Fa}$)%b5t
zIoAAVMkD9CNaDp9LEqKTfP=<i_<a#y4WGY2CJ=^<+B{S2S{H)0Pl$(2Ig2xINzzGh
zjgMmUSy*X)vt~np26NLa<vJ_gP#>17Xhp4|02ZGAi@xc8hW0DrxzrvI-VGsh{y2c;
z4UPp8Y;qUX`r6OijB1^3%Mxu-h>_d}hb?r~=~ZezFu2XzlOFvBH_#bnTf;vO8CE@g
z={fs&af#GBNG{sEb8T!(P};K$l)^yu5|Q;&%IgxbYOi@=Vf-|X+l^BRj5doS%piy$
z70U*MFMoC#s?jh<jyrn|I~kC?pAKxsG2H#BymZ)XAZ<&_hF1tk6>=QPQEJUd8qH^`
z^D_b++A|y*9t4*7?^5XFM4;P+1(E6uc>8tUfZL}9>Aemvc!jP4b)q>QI;c@^v#=}S
z`B6g_x$N`DN@8+=u&b!?qN&dhx9nuwYtWEPTRVydhv8U-HBT~R$Iaq}tqXRib}|n0
z5f<{L0dj64*$?Qit>dTjjSSGI=g5d~ZiY37PKy+i%g~2Mt7O-hkI~2r96)Chf+uY7
zczVwl15HatN&xcMkmEm>{)?l`-XA*!wrQipP&M{R0rxqU0rlRB-l2(=u6Hx&D{!j?
zIK&N*{c;?0<sg88DlhKWQg2wDgJX!n>(vOyr=1+v`_DE8a{1v57HEjKM-B=Ms`Mch
zW)!77SLYn-Lq!@y^M0<6U?TdAG3*D+Ro$EnYG#Ia>)>Jw*Z8HORJmzxWchEgF5^8a
z3veSdg%yz6J0Sn9Q`EM;$J$v4{{f}Ib;(dhqx)-uWi`*(l7UTD@Fk>QzF>YNT=NUY
z=O!o6Ee8~dg(FHfx8iDV-mLvtJix_|jbikd-qM~3UNS4-rYp*G1g}<J?AH|*qQO3H
z6n*xZcS8v`Vr3v}y^Z1t5Iv<zgu7rrrS9SfcKA4Z9C>}AF?#7~mPPb&AA8QU|3#`E
zN$gkKU<;~;6=bRka5r?V+m5e)7wfm-4}d$X_%u#4=u(-n)XNhJ?uezS%1~(yEo^3@
z%@-T}-S9!tXv5vE|IVSy1&17^e>Qk39?MJ*{TTK=C|_U9^#%8qcZ$7MXcZ%+=c$|u
z3=}qO1acQ_#QXyai>^h_u_%eJ|MO{hB4}e3db~}$nu+<h*cI(=bBLp_W~6MG@vi~G
z9SAhMhBFXKT6;zb?n8`EsgGi}zFt}5ktd}E`|tSy{C{5{)9jCu(JjMKAx#*Gzgu6)
zNg=qe_AoKcGNDfc`UmT(i?fth8;7e>E_zg2W(C5%&wg^<E7Qbjo6wrSP$2Zii1eKC
zJ{>*(8a$4yIA}bYHk{lAez*UY@wbuGzre=r@64UQikiz?tJD8QMsoB5$O5fLR({wt
z&#L_6s0makTXib;4+FdYs}+tD{GZUs|CR8;_%Cjffyi2&cLh)k++Bt-4jbG1wF!~`
z_j7Ikj~pIkw5_fX=gC`g@r?_S7p?D!KG?q@490kDA@!InlP)iu*k%|V60U2@q&ux3
zDDCxuqlWDq!59*K<XYEI_u@@P_R4a2ns04$<hgI!ovFMLY32LbL&Nz$>9*egVnATx
z|3-mG5_D#PHtl=wZo;o{=#4Vv3tl8BkO1lx_BU1u0(uA@=!~u~KVELE*6OyjE$cON
z;Tkob-%y7%L4NYj$UFT><40$2zZ_RIm;U|;qyb|;9-xpybLRo7Z<db;`&vA-<gl`-
zox1jbZhue!#rpjDpv=_EcUDTspDUiymG@9Q9^Nt!v!CXy8jb>^ANd~CkuX2uKJTNS
z|6TaP+67_xba@0b8xc1qlsbN0Ec?d{waxUQES_uCL-Z>yP_lJ^3-0N_fm`{O233&W
zHR^3Sag7jn&^#lVEPt35m(33P_EMsJX=D?Gs0J+VW|LK+3XY`7+Wq=eSCNg;$b}~j
zUtWMEHVD@#cZ}yvVXr{I+OP}O0i%EdTec6`A!PGq=>zFoU}q_Jd0Gx|b7@U25y(h*
z#u`Kt|J5UmkqX&;I|}1%TJ<jiotD>L=_oa47F67tB1K0iWmB&X@G7&h*3=5SzIq{5
zu(cz^h#4s9EA`4q1~PFu_g*zec0O6w?oa|8BDe=lYdtZI3wm1~Xo_x-c~yKI#NlF7
z2u}#Iw%3Y2TT2zx@pxnONH@%I*rE9<N|qwoGuZWuN2pwAS4al?={00g)*vTZ>7Yx#
zoGyDS*)QmCB9QwL!XtBkE>Z$hXJJ2!esB5DFlV(#B#%L7C8t45@Iv>=1A~CyMO9`w
zvPQ+t49}!F=W95`yHjkP!S6na9&G69rvL?7*Ir{_&(IrvHzR|o!FoM}g2obsdJS^j
zbSk_mf+;jdsLvb3`XuXBW=f}VHU&Cj8ttZ8*NxTNFes(V8*oaR{?Nw9c$%zL7*zx=
zNxVPWor8`VLvZ))n~bx6qTTOE-T_1sslc|!2<^aO->Y{sFv0zSaxCMlekk{s1ZfjF
z*2r-_w8rat#OAt!`bs{#OcBro2)pZL>xLz8QeO#4?s(fJ)To1BS7CU77XR1Knwpcu
z*vP0~pTO>?oX7;Ft9LQFYzC)L@7G?c#^NM?RBY4HxK10B8Y2%muk$CMEp$9iNu$-G
z7B+@0qMU*?Zs|>A@ES%gYE97FUKqtD54&d`gqKQSO))bo&DnCVToKIfiUwzf!@mFN
zblayQr<yo#%4q+C>RU8umVM%gyuGb|=qIxL<ZYFXXo*420{O7u;fM9|YEyQHi&^_M
zRQZh2suXs2HcB#1LiUB-w(q;t)~{ihgMQR)NA-}0qL1w53yA2;O2M#tv#&SacnBc7
z<dUyX;}pLZjzT=Glvu*n^DnLgyY2|Fo^(kp5qY+fT2q9rV#*&wiXf4#>qu@TxK3^5
zmIPdahbzcG=$50Ctl?23EpShpw90E^!m?=Zwi6t;0nWd#!{`H}Ki<ClRs!cDx#~q!
z!eE7^F1i3eu9)0iTCx+2%>EkdBmgz^=1I5XGYw3nKl^=d;#QPLlJ<PT497yB!j0^2
z=r!Q^`x#5OhP?(Hmij785~6eE?(6I86EHkUe|N@PeD1maT=6Mabn%ou)-^=5UjfFy
zZQtdl&_B$&gT8AK=81w>_U6-R1m!ci8BZ(TKM@t0KfvRpYP&kY!w%%!`;z<cf%{43
zp53*_Shpr}1Z{HnX%;G5Yf+m>$8(befGlK_7@aCc*xF;VD{5w&F!q-Sm^+-VQ5aaC
z3H;E7g=mVt)g_~g`Q}yBPFITgv?{o58d55=n?yebzTU1;u_kiCjH&(|p<>2;LP*TX
zQ=vMWBk-wm<Y@qhy=WE!XEfk-x>v*>&4h&Vcf4o<`D9Y^P7D6|8)_yRx?Fucd>r<^
z_fOO&%vo}a=q45A9Y0XIV%*U_c`O?m?iMPe+mvOk046)Qr2!Tq$~gnZPZ@2!$vE<1
z{DW7|Vk|xwV|-K>Xnh8BsEx|e24f5COfL;;en^;2{Z7HitwDd8x>#_6@|v(eB_*Z5
zm%2IS<8U#I{c;$DOyj+YWRM^zTS2Xe^sR2#wih9qyFsmbMMTr8<Jv=3X3O))_=Dk>
zQDneb9Wvd(F0FQ!(#uSj_q`hU(rWlWbz$m5i}q^-`;<q7$ui@pdh%>^ylnJARJXaC
z1b#&u)<R~N!^DqMsn1qh89R`lwGmZz@>s|aIL+JDpk`K?Az=TpsBbNiO9{rTBo;e{
zS8PoN9S5FiTamPjaDD(R9vDnLbsqgf@hQDtr2KBvIwb$dnk@p|L@NRAN>|!>i}MR^
zQWL`Ht;|wOP=D*u35{9)eNsW`9r5LrBj-0->=)|rtK_CfF2%6&xlJS?PiT`d+3cYq
z;4PeiylxZbTqjS6`EkJl17UsaAkmmLvO~gRI9fJ~0}}0_p&iFZuD*X=wPe=<6t6pc
zi;eJ7U|8iL6t9e8m`zoTPLm(qSs-pl=Rs9XJd)+o6~N&$34j*~bp{h~N(TfhmB7kH
z%zLGd0B0Op_NLTeSdw>?<u+R@;o`Fe>R{E;PnuswP+oweZ^M0$isA*3%#R)gj1s@^
zMdVuJwr~yL$F~*s6|oU#X58uYgJLM@Wh8&|WZ8OGw<mobsb*;5M#>g<YpipOm$V0O
zjaP;+24taE0;{}{?t1(_uGtPtn{$Ww1S<9xo=#3WoNC0mXB5yL$!azAti}8k({F!}
zE|%6e((s(MYDoSuFM}kR#=3W4)QJy6-I~pVku4px0J}gkmV8*+8rUE!mVzynG~>V_
zNjIX0XpA$!CMP8|<G>OA*MBup3++H`l!JV~_780xY-EKSHBG(u@--;u6o?)+)tu){
z$3o7g4Hn#ZPEMPFsu+kjvB4Emj;pBuOm0)xwu}FegQsI{t`mpikUofVzIPaTIpS=T
zGoHy!)URhM;!f{ujJ^rBee+3~P#r#Q$&MVx+$<{nhn#oTf|Rnh--R6AaH0A9%WU_%
zmJcQ73&z~FibhC-=Ucs+hqQ%04JTOQrNBlDa@gBm{b|dQQ{yB!+D;{MRE$k|8L)5D
z=&{u))_etW-u|wx>QJ08k|?1Q&W|yk+g#+A)9lWoKbMz;D3Rr?Qv*Xv6!i=r*&FH-
zEEv_6W+8>w4+d9-77?hbAr?hqy&k-7-66j!f~d4Q@##UKQhWaolF#a?EVWpln>SMM
zQSwNvN?pEizeIJ$XWv>>i%!r$s#dj_Y87a)MVcCIxrC}fMS6P(#tbmnpl`^(0ma|P
z4tE}z$*DvT#QU>p1Na8JZ-|3PVm<vcf7*Xi!Ppi<&(Y@Gog0j0i(;hP!5sE0i|xrf
zVnCJs6F$81!-25WsMlAM4qcEW-iyKhzma5Zg4y&hzB47jCc4}R@D?4iC(_U-pd1b8
zG1Ec7FOnItWnr-!0U<XEDW`izzwihA{8wnsFbZh|M?dcf=qe%o)qBZqOomQ)RL>l_
zk!|G7p$Vpn9_0O-hmf$u^3^E^PP|Sg?}@uKODJ>4L*hsSbysV`xI(@R=vK>2QX2OV
zwPwVUW2rl*oyKk)K#y&DG)Hvk6!*JHv$X&S-y*G6<;a@t3C6_NgTGj0*R^QR3|%Ly
zrB(7si|(1WmY_t`zh;zC1VDzf0x^wXWp5^12D>aTNFwWa_E0mpOqyXM_zL|L;fe(N
zv$$K>hT7w{Y3E*|+yI%wD}v-+k>`&KL@<Nt<Fee*OrK!u-CgoF(mquJNe^!XCq~JD
z3gGVbRQE^OI?4NMkzRUFk>e_zXk%9jBhH+7H1yzi?oV1b82I8ZK&tV}?Ps4%Dfr^(
zU)-9RckpZ|c*W*^1$(?`ft74r2c`ph_e&Qs2lO<0ef9eOn<*ahzaom_nyVM`fF
z%w(`Yr=HIU-ut;4fq)v=cIlzijrTX8D#XtDvp)RC$z2<_Upe+p`TwH1719J3nMR%O
z5;)l@%07IyN@5cFD}X9L5YMS0p6gDPtpvL1i~Ren%(n(l^W`x`t)s@YjpPxGvh3^~
zp%r|NHB8C%U3ob^U50|D{*i2iX6O$Jj<iAHq})J@Q=IghGY>E?XvQTIr>HIYc0Y5e
zL9mISJ-|wR`hv64BvSVKqf<PbO0;j%`e6FKm5$3=wxJrfOYw>v%bMKYb?S`HY#NxR
zgr-0paDK_2B-E5R)YMb#X0EUokB73#DIC~Zq*vA(skUBy_niK@l{HwIof3Nvk_}wk
zdlnAVR8K0`1ITup>n2<3Hl#;0O2!d>ro_(m^>tuJ#;JVs?TH>htay+MqlasFB^{iY
zvI2u&;#HwqJs)$lcvL=}$>v<2{(V?8kVZFvPh2tk!the!qcmiZTXVjqSEHYi|L*@L
zmpj)wyodjSzi4>#Tvf)(la4cH_>a6*sAq1?Z%7%Zx8pD+y^a)kS$%m976;e4djxY8
zl>S5?-~)I(!U%qYQ%TK9Y6IEP*X9A+%*!2S$|3~35gi|2b6AE6i<RCtP>BF<U$2>+
z@)hNqO+2IO@L6^FUtG~ZpNjYbXo6qAIK6=sm_nVivUfcp{cbddZmv)IJ=rOxDZ4fK
z)&>Vct?=XxdT)K=ZwYanik_?T=mRQEx)DgexmO-7uRSA+-o%BO^giF$iXfhsT-l?a
zsKdd*mM;FDPFVS3YZ@`|_hX;EIb?s=Rkoai2pKSFer&@-%{@~JoUu6|E^(wL)`;5U
z_?5%WYsAJUa2`zQCr%ItJ-Yte{uNjBy@2@no2tN9d!D~x+YXneOMwB5mN@Y4{1%7E
zE!-Q-rb7OHgk0&}r@53@>_<O1T9~u6_iMl87r$;+a=80@%(P2We|Z|C#g_J&og5iL
zvTQkf`>z`Twr;AERm~L%X`ii9X!4W;g#%ctISn)HlndBsjKbepDo3V_u`8^EUK>va
z`|dSwW#9b>81hAk7m69bulRX4Cq8^L&X8W=S)#C#0eCm$D*RbsV8wCr6KL8s6rgD}
zjzL7MMbDk5>wmRkina}j*7k>lpV<?2__Xz-?+guJqT4O|T>yQY)KnE{U{B;cG6=`m
zz5O{=Zsj_f9^Qv|sp3kJERUl+{O*0GEj1n@^EmrlHK5_+vI16|vGnw-_Eu~~oUsL-
z^**b0ubU_3A>o2mWX9++s5ic0Fk^wY%wgd~*~hz1uhW2ySMmrw<RA)H{;f0YUcI}i
z^vykzQF-SwaV9n?oQOhBY0SO_AD-6&A>BR_8XLhfqBR<-u;^2E!j=|GRkqw44{Ur)
zV+4FRTL;`}V#^mrLV8P*BJFmbDEm1~0wHDyh|`FSk%k(~P1r)n!UY?)=wR&2Uc*pD
z^_8p+8F1yPGfhcM)-;4nU-mmS?1^l_yGYp<#C*DyNQ+Im3FPOe45;<G<#4#um=`}D
zr1$<p(7yY!DLgpw&C5AusY!*`<PiloI`-4m^n1v;PX_V1h^5%*2UmX)azS#x64K<l
z-9$Pr`=$(~DIV=q80^%agWKs`*WfjqBSi5BhiB30Douv+IBg*9kaC7ug!GPaM?9Mz
zjAknfvPcxCcWq0M+_DkjU|G^b#b7j7e4`J((YfOoaJTkzI5c_H)c5t^@T3)p)IL=3
zM$HjL5%6agGRcJe9^v-IxZ&N>CzT?_VsrOT^j}F+o&oXAwAKxQs$d<h(G2yN$0<OG
zBiLW{{7j^nf(Cy(f*UTxL(QEePw$k{LX!Ijf=^Z{KX(s10xCga<|Ty%F<)@U;QWj>
zt6>{BSW4hkPvBcx;I`;pz>>-2E34B1;b&g}8h<Ms%m4*h9L1xt!PU3<^nOnwjY>C|
zvS!h*BJPdv5{C{J$d<iA>2VM0q=m?_!g|)Cc2fgH51abGI@gkonZvpOSI8gejWqIq
zsDLG3>ba=ogfDecF+f*vjrGz`t9yc7FB1*49eW;CPMLntS7Io-gHVu~<6NF=XOrGF
zLAVtTV1Sl*3p7C_rEOc0tPWk#R+Tomu!_~C4{`oNAZ}`EA>uzj%#ay6P3K<h9<-=)
zmJmH9fOhU?_3oHCI;@`<4Od~lN+0)$)Cb(P(7De$>UAr4A{uoMej~k{6HC;{u!bxD
zD*9`Rf_?8QVQb3U(Om<41BNpU_vVf>;e~w$$BASKGR}Q|_}<ocnIKSOT7rGK8qRs4
zNedTe2fiE4Ph3B9m<M3y=ojl>h;Naoz~YCn3Kt>J7X3k{l-rF@?TwWFrZz)VZswix
zUmiE1OSbGggcWa5*X4Fn-s>w`)y)jCqBUr1)NY=P_B?B$i=P=vTQvWD9pn8fL#fj}
z-%zZ(y1B}p(i`rH5+hh1=p(S~i`S^@kTyhKM*K{w%=tGz{4LYVPKe}hE>@c69U;B+
z=^77RxlVBgs3AQx)BI`BZ{(16kRa1R6k(1&U~12;{N%^iV9Rn|W^*P+cvJtnvruNv
zpJi1$($~cO@URN~>#mlt)0f9ISZp--7SfLm4Zbp|=j9jb#oC?}=K>Xyj3S_)C|#NH
zSKb&~W|3$+)Srqwq;Jt$D8a-S@vcOEABJ#xotf?VC$*D$gFl~%%jH)of|am{4SojA
z(vjG_)DB2RAtkD_-s?05dar(`V?M?j@I|K@rN=p`MJ8%0jIKr>CKNk(FBh?feBwGC
zG6f*68GG$1kYS1ZBhr0`gmcAsJUjBNr{ks(e#-zIX1_Xx9NBz!(IbU!ccLHe&H3mt
zqIVF*&2pfoVvbB{N%UDnarez-oz2&NHwFQqeXS=^4UvZe@NAKT2&ukj#8Y}iK$Gu>
zWM33S>8a|OaQF@1$$jfg(j_eI;+)9lBdp{8NVz;n?WOF~Bq|Udfo>656e<LyA*%I?
zVOfdhQvvTQf<+brdEhEJSgMRrP&aSjYm1<FOFt;BU97UZLH)$$QPt~Xw6`!BN^yZ(
z!M2cFIzqbaxHr9TwMm<3@^T|*^8uc4ch2X|jt(rCI5ZJRhVf-?$2N?A@?`aX^hf8)
zPM?eyw+b_DuJ3}VS>TD!t7r<HVb|y&>&ym4@CpfVL0`y)_bpPy=JQM-qO-({X#TPr
zZ~^G<X<<vA18ih1RNnihFfgYW6+d`!Euw)`E}`ZlOGlAJW+LCWx+CQF=JGcyH04NJ
zBslzQWch5XS;PqDnM<s62%QSp97I(5j^oX`Uc264?7NbtVl5VSb)NECzjA)Mg10sr
z_RW8tonzf6ShlFVQ~DZlRkH}zJRq@J+wOz0BIUJRtr@<p5LSA!;946C4WQLj=_vFh
ztZ-7rYecF>go(3fQ>M;Pz5rZwx?$Qo!*JdE<%+>^UtgHU#>r43DtIv&)w(=}qN-1D
zXFFs-Av~%)v9P{tdIVy1XH`S9Z09mKkZfPe7bnjU#_ue7c_CC3P5C0PlNOkt=nfnC
zfXfWE_BeenI_5r6gee*+XbX(4BPC@FErv|_)q!j>fnlICVmaX3<hrNQw;VshU@;ha
ziali6kDNM$<hF(1jz&#^tjk^{w@#@);%}C6{NlaI02krNpEs`$Au504lAS&&`GbKY
z-)HHa(eTUs3B0uhb)o0f!Wu(R=4UPr`xYXs%@CG1uX*fQXVG(iD7=Od806+h3Ya6>
zzlRVnkKv=-hb?;|M7x8qb`@!*$<BhT@#ScUFe&##LB+pJ<kdIG*%y4Uz%SXcEZTu#
zMt^iu@o6KEyJ8WBd9(?^R{^hsW}Q>g{$45MjGKs!tyzifkL!`_@!<qOSC%p+nTnP&
zuuFS_nbyC|u!`-p5N9a?B$<&^XBCOIs!z-cS5=Iq@YsoLvVhDT7u<1aEyX27HXN~r
zRuNfGU8S>t-W?0RP+gJYw8_Ub;wohvL@?U~;QnJhP7WUVaO~4jvbb98PpI9xo)wJ#
z;j%cs<*9f9XT2wMpi*TRd&mJp&(?C;fqfrh6)X#jzwquuzEK*{r<Vica@EO9+qa$0
z(Ac2OzTCzA^>)=PR3>xaS9457Wut_s)H^`WS1t#7uMi>B3g+?ds8cXXHaUiHGEhD<
zEF57tS-dgj;*0MHxd>l)x-$=5bR7FPh$M@QEcUlAj=x)Ne!qt`bJnHiX68}~z&JMh
z7VvLaC0icjB8aTDZkhWfnDNodgDU*4eJ7+<87Tq!M&2lN_KPta(*Gxv1#9{_YOh&u
zgigG%z{Hk52>-FYRTd5VKwo6<+0w)jBe^C;ePv5cp%X9eh^Z~Tf@s=|F~%6Xi@tj!
zkGJt|wIK24aH3Vf2Qce=(RE3nM_Un3pIf1xA#I*Rwvlp>G-=DsqDvcXAVh%=`kTOO
z<jmtkGzV`clgVYff!uRq8YkZI#A}h#Me4J}GwSG*mcmFUUV`Z8LCJ?rk~(lgr%rud
zFqC2rmHQ3-a?NW8BSTAJsuIr$-<Lu@nxRl=;-xK9YM><-4^GUKY*&1=ZM!WHSA}Oq
z&xLDQRwO5qmX;SwYbz(x)|V0xBTMTGXQ+Nqu(UHFGV8??sfeZAh=s)%V~nvo#NP9h
zjmK}syG??``+!_3`=m(WW*zQ|i0lDUHc1%(PiV_%LwWR+wCe^Wrxm8Hk8onN(MOiH
zE`cF!HTC4fApr*)gE+AbPP|X--A_v!K^TDXrCXD{Fz&@e!HaZ2N<a`r0#URoh9a$0
zW7~@jxvJ-?coBQf!K3sm_$iyrWY>+m8!~Zi8GL@Ptbb5XviRiPP4W}(F#e<38cDok
zk(1<mcTM7b3XIGTB^E@Maucs--G<hINK3jZ?0b;KD72PV7fj;qRjj6*rk_Z2L+0$e
zd0Ti<Q{O~Uw4t{nWnG)Zd$09(WnJjG@rk#u7puxT*1E0Kx?ZU%>%b;nBgw=w-B17k
z;B}cW@+t|ocpi%r??`0Lw%2S!dZAflNVom&?ZVMYPoy2hHn(N#sJ;4gJ6?dCZONDY
z)x*n%TrCB$9ccx*iFc4ryrnH^1%<!FCf@nQ#l`M+S7bQ~V32tEiT5!u4n=0tiFYgV
zd&la^UUENZ&JP(s_HV8~EQ|apycaJ_Sr%D52?vg(nR(HKinS*lTX?e%r=7dIE7`D#
z*XsA(z`XACk6qnSL-%^crrCMVy3%!(IM?UuscYR-u{C|EROj&vCnoVWwXXN9d961U
z004MxqQq2<#}>~Ysl>Yvto`9D7_>i&(fTU#CKw9qa<v%@tcyI@wcL&QyxNp{7+Am3
zQTR<{CWzh3VkH=wt;?PYg3K?;eO<hUv;tTp-v9J?ejBe6D}FJ&D4_rV0AAjh$jc-i
zTRgy1B%Y(fhq9(GJoQu!005vg`IR?C<MEW5fEP$SH(^24xffnEl#_RgVHk#C7^bYr
zRNlmm#~Kgt5{c)e@PVwSGKDu$;F-K*i3b1x%4Gk$$ca4X#^Yu@zy~Cri=wP^D7?bJ
zs|}bVkp}<(N~X-gx$)TI0UjdpTogT!HC^GQ^N#%;004L~D)~nv8+qd!kDKuT|B!et
z!qF)-w(ug0%QFE0z`vzT>%|Hh&ti=S_>07IQFJ}Jd*P{A&V`v(004mU6kf0y8TpOJ
z8V~RmiC6v>uQF6zcw-iLgYz)MFbu;mOnEa}d8$7yjmH`f@C}Lg{Cn{n3NJF!ftOj|
zvBU!a0A)%lFHH?i(0Huz0MC$k<!$lY3s0r;j`bbGFbu;mFEV}Rn0rrNWaKs;YdpYD
zB%X_K)C5bsnY5z7GbdfvcK`suf%6v6d>9+sc&zaNuaS6VZ1LO+&z@S7z~dTC008ik
zYDwzlM&9_wV~q!Rg~WUAy?9d<UMBEHmw2;%RR92h^3;-}d*iXj1AIc_J$H-elz5pH
zUVN&3l6U<9001aQ`if2SHl&IhkK6G8f01}D!copkyqp!Dc}qOzU6o<}ws!|70Kz~J
z0DY++`)}8Wctk?LgCh`_S!)oMO^AqyNKvkK%WF?-jys;~c(5!LuX4rPU;i3j{AZ)z
zS?>UVdPh6uJzqS8KInK}$Ae|5cp3Q-ufK?QhR=TIcqRbQDK2@TzmS(Xs=3+mnCJ!@
WoB-be#xzg>0000<MNUMnLSTY3WfD{X

literal 0
HcmV?d00001

diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
new file mode 100644
index 0000000000000..63efb10257134
--- /dev/null
+++ b/docs/source/_templates/layout.html
@@ -0,0 +1,10 @@
+{% extends "!layout.html" %}
+<link rel="canonical" href="{{ theme_canonical_url }}{{ pagename }}.html" />
+
+{% block footer %}
+{{ super() }}
+<script script type="text/javascript">
+  var collapsedSections = ['Best practices', 'Lightning API', 'Optional extensions', 'Tutorials', 'API References', 'Bolts', 'Examples', 'Common Use Cases', 'Partner Domain Frameworks', 'Community'];
+</script>
+
+{% endblock %}
diff --git a/docs/source/_templates/theme_variables.jinja b/docs/source/_templates/theme_variables.jinja
index d2f00702fb655..333edd766fc1d 100644
--- a/docs/source/_templates/theme_variables.jinja
+++ b/docs/source/_templates/theme_variables.jinja
@@ -14,5 +14,7 @@
   'blog': 'https://www.pytorchlightning.ai/blog',
   'resources': 'https://pytorch-lightning.readthedocs.io/en/latest/#community-examples',
   'support': 'https://pytorch-lightning.rtfd.io/en/latest/',
+  'community': 'https://pytorch-lightning.slack.com',
+  'forums': 'https://pytorch-lightning.slack.com',
 }
 -%}
diff --git a/docs/source/advanced/advanced_gpu.rst b/docs/source/advanced/advanced_gpu.rst
index 8146744b521db..0e43d4bff4626 100644
--- a/docs/source/advanced/advanced_gpu.rst
+++ b/docs/source/advanced/advanced_gpu.rst
@@ -23,7 +23,7 @@ This means we cannot sacrifice throughput as much as if we were fine-tuning, bec
 Overall:
 
 * When **fine-tuning** a model, use advanced memory efficient plugins such as :ref:`deepspeed-zero-stage-3` or :ref:`deepspeed-zero-stage-3-offload`, allowing you to fine-tune larger models if you are limited on compute
-* When **pre-training** a model, use simpler optimizations such :ref:`sharded`, :ref:`deepspeed-zero-stage-2`, scaling the number of GPUs to reach larger parameter sizes
+* When **pre-training** a model, use simpler optimizations such :ref:`sharded`, :ref:`deepspeed-zero-stage-2` or :ref:`fully-sharded`, scaling the number of GPUs to reach larger parameter sizes
 * For both fine-tuning and pre-training, use :ref:`deepspeed-activation-checkpointing` or :ref:`fairscale-activation-checkpointing` as the throughput degradation is not significant
 
 For example when using 128 GPUs, you can **pre-train** large 10 to 20 Billion parameter models using :ref:`deepspeed-zero-stage-2` without having to take a performance hit with more advanced optimized multi-gpu plugins.
@@ -73,6 +73,104 @@ Sharded Training can work across all DDP variants by adding the additional ``--p
 
 Internally we re-initialize your optimizers and shard them across your machines and processes. We handle all communication using PyTorch distributed, so no code changes are required.
 
+----------
+
+.. _fully-sharded:
+
+Fully Sharded Training
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. warning::
+    Fully Sharded Training is in beta and the API is subject to change. Please create an `issue <https://github.com/PyTorchLightning/pytorch-lightning/issues>`_ if you run into any issues.
+
+`Fully Sharded <https://fairscale.readthedocs.io/en/latest/api/nn/fsdp.html>`__ shards optimizer state, gradients and parameters across data parallel workers. This allows you to fit much larger models onto multiple GPUs into memory.
+
+Fully Sharded Training alleviates the need to worry about balancing layers onto specific devices using some form of pipe parallelism, and optimizes for distributed communication with minimal effort.
+
+Shard Parameters to Reach 10+ Billion Parameters
+""""""""""""""""""""""""""""""""""""""""""""""""
+
+To reach larger parameter sizes and be memory efficient, we have to shard parameters. There are various ways to enable this.
+
+.. note::
+    Currently Fully Sharded Training relies on the user to wrap the model with Fully Sharded within the ``LightningModule``.
+    This means you must create a single model that is treated as a ``torch.nn.Module`` within the ``LightningModule``.
+    This is a limitation of Fully Sharded Training that will be resolved in the future.
+
+Wrap the Model
+""""""""""""""
+
+To activate parameter sharding, you must wrap your model using provided ``wrap`` or ``auto_wrap`` functions as described below. Internally in Lightning, we enable a context manager around the ``configure_sharded_model`` function to make sure the ``wrap`` and ``auto_wrap`` parameters are passed correctly.
+
+When not using Fully Sharded these wrap functions are a no-op. This means once the changes have been made, there is no need to remove the changes for other plugins.
+
+This is a requirement for really large models and also saves on instantiation time as modules are sharded instantly, rather than after the entire model is created in memory.
+
+``auto_wrap`` will recursively wrap `torch.nn.Modules` within the ``LightningModule`` with nested Fully Sharded Wrappers,
+signalling that we'd like to partition these modules across data parallel devices, discarding the full weights when not required (information `here <https://fairscale.readthedocs.io/en/latest/api/nn/fsdp_tips.html>`__).
+
+``auto_wrap`` can have varying level of success based on the complexity of your model. **Auto Wrap does not support models with shared parameters**.
+
+``wrap`` will simply wrap the module with a Fully Sharded Parallel class with the correct parameters from the Lightning context manager.
+
+Below is an example of using both ``wrap`` and ``auto_wrap`` to create your model.
+
+.. code-block:: python
+
+    import torch
+    import torch.nn as nn
+    import pytorch_lightning as pl
+    from pytorch_lightning import Trainer
+    from fairscale.nn import checkpoint_wrapper, auto_wrap, wrap
+
+    class MyModel(pl.LightningModule):
+        ...
+        def configure_sharded_model(self):
+            # Created within sharded model context, modules are instantly sharded across processes
+            # as soon as they are wrapped with ``wrap`` or ``auto_wrap``
+
+             # Wraps the layer in a Fully Sharded Wrapper automatically
+            linear_layer = wrap(nn.Linear(32, 32))
+
+            # Wraps the module recursively
+            # based on a minimum number of parameters (default 100M parameters)
+            block = auto_wrap(
+                nn.Sequential(
+                    nn.Linear(32, 32),
+                    nn.ReLU()
+                )
+            )
+
+            # For best memory efficiency,
+            # add fairscale activation checkpointing
+            final_block = auto_wrap(
+                checkpoint_wrapper(
+                    nn.Sequential(
+                        nn.Linear(32, 32),
+                        nn.ReLU()
+                    )
+                )
+            )
+            self.model = nn.Sequential(
+                linear_layer,
+                nn.ReLU(),
+                block,
+                final_block
+            )
+
+        def configure_optimizers(self):
+            return torch.optim.AdamW(self.model.parameters())
+
+    model = MyModel()
+    trainer = Trainer(gpus=4, plugins='fsdp', precision=16)
+    trainer.fit(model)
+
+    trainer.test()
+    trainer.predict()
+
+
+----------
+
 .. _fairscale-activation-checkpointing:
 
 FairScale Activation Checkpointing
diff --git a/docs/source/advanced/amp.rst b/docs/source/advanced/amp.rst
deleted file mode 100644
index 2c25f9e7f918f..0000000000000
--- a/docs/source/advanced/amp.rst
+++ /dev/null
@@ -1,94 +0,0 @@
-.. testsetup:: *
-
-    from pytorch_lightning.trainer.trainer import Trainer
-
-.. _amp:
-
-16-bit training
-=================
-Lightning offers 16-bit training for CPUs, GPUs, and TPUs.
-
-.. raw:: html
-
-    <video width="50%" max-width="400px" controls
-    poster="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_docs/trainer_flags/yt_thumbs/thumb_precision.png"
-    src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_docs/yt/Trainer+flags+9+-+precision_1.mp4"></video>
-
-|
-
-
-----------
-
-GPU 16-bit
-----------
-16-bit precision can cut your memory footprint by half.
-If using volta architecture GPUs it can give a dramatic training speed-up as well.
-
-.. note:: PyTorch 1.6+ is recommended for 16-bit
-
-Native torch
-^^^^^^^^^^^^
-When using PyTorch 1.6+ Lightning uses the native amp implementation to support 16-bit.
-
-.. testcode::
-    :skipif: not _APEX_AVAILABLE and not _NATIVE_AMP_AVAILABLE or not torch.cuda.is_available()
-
-    # turn on 16-bit
-    trainer = Trainer(precision=16, gpus=1)
-
-Apex 16-bit
-^^^^^^^^^^^
-If you are using an earlier version of PyTorch Lightning uses Apex to support 16-bit.
-
-Follow these instructions to install Apex.
-To use 16-bit precision, do two things:
-
-1. Install Apex
-2. Set the "precision" trainer flag.
-
-.. code-block:: bash
-
-    # ------------------------
-    # OPTIONAL: on your cluster you might need to load CUDA 10 or 9
-    # depending on how you installed PyTorch
-
-    # see available modules
-    module avail
-
-    # load correct CUDA before install
-    module load cuda-10.0
-    # ------------------------
-
-    # make sure you've loaded a cuda version > 4.0 and < 7.0
-    module load gcc-6.1.0
-
-    $ pip install --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" https://github.com/NVIDIA/apex
-
-.. warning:: NVIDIA Apex and DDP have instability problems. We recommend native 16-bit in PyTorch 1.6+
-
-Enable 16-bit
-^^^^^^^^^^^^^
-
-.. testcode::
-    :skipif: not _APEX_AVAILABLE and not _NATIVE_AMP_AVAILABLE or not torch.cuda.is_available()
-
-    # turn on 16-bit
-    trainer = Trainer(amp_level='O2', precision=16)
-
-If you need to configure the apex init for your particular use case or want to use a different way of doing
-16-bit training, override   :meth:`pytorch_lightning.core.LightningModule.configure_apex`.
-
-----------
-
-TPU 16-bit
-----------
-16-bit on TPUs is much simpler. To use 16-bit with TPUs set precision to 16 when using the TPU flag
-
-.. testcode::
-    :skipif: not _TPU_AVAILABLE
-
-    # DEFAULT
-    trainer = Trainer(tpu_cores=8, precision=32)
-
-    # turn on 16-bit
-    trainer = Trainer(tpu_cores=8, precision=16)
diff --git a/docs/source/advanced/ipu.rst b/docs/source/advanced/ipu.rst
new file mode 100644
index 0000000000000..2b62df379e0cc
--- /dev/null
+++ b/docs/source/advanced/ipu.rst
@@ -0,0 +1,234 @@
+.. _ipu:
+
+IPU support
+===========
+
+.. note::
+    IPU Support is experimental and a work in progress (see :ref:`known-limitations`). If you run into any problems, please leave an issue.
+
+Lightning supports `Graphcore Information Processing Units (IPUs) <https://www.graphcore.ai/products/ipu>`_, processors built for Artificial Intelligence and Machine Learning.
+
+IPU Terminology
+---------------
+
+IPUs consist of many individual cores, allowing parallelization across computation. Due to the high bandwidth speed between cores,
+IPUs facilitate machine learning loads where parallelization is essential. Because computation is heavily parallelized,
+IPUs operate in a different way to conventional accelerators such as CPU/GPUs.
+IPUs do not require large batch sizes for maximum parallelization, can provide optimizations across the compiled graph and rely on model parallelism to fully utilize cores for larger models.
+
+IPUs are also found within IPU pods, a collection of IPU enabled machines for larger workloads. See the `IPU Architecture <https://www.graphcore.ai/products/ipu>`__ for more information.
+
+How to access IPUs
+------------------
+
+To use IPUs you must have access to a server with IPU devices attached. To get access see `getting started <https://www.graphcore.ai/getstarted>`_.
+
+You must ensure that the server with IPUs attached has enabled the SDK popart and poplar packages. Instructions should be given by Graphcore.
+
+Training with IPUs
+------------------
+
+Specify the number of IPUs to train with. Note that when training with IPUs, you must select 1 or a power of 2 number of IPUs (i.e. 2/4/8..).
+
+.. code-block:: python
+
+    trainer = pl.Trainer(ipus=8) # Train using data parallel on 8 IPUs
+
+IPUs only support specifying a single number to allocate devices, which is handled via the underlying libraries.
+
+Mixed Precision & 16 bit precision
+----------------------------------
+
+Lightning also supports training in mixed precision with IPUs.
+By default, IPU training will use 32-bit precision. To enable mixed precision,
+set the precision flag.
+
+.. note::
+    Currently there is no dynamic scaling of the loss with mixed precision training.
+
+.. code-block:: python
+
+    import pytorch_lightning as pl
+
+    model = MyLightningModule()
+    trainer = pl.Trainer(ipus=8, precision=16)
+    trainer.fit(model)
+
+You can also use pure 16-bit training, where the weights are also in 16 bit precision.
+
+.. code-block:: python
+
+    import pytorch_lightning as pl
+    from pytorch_lightning.plugins import IPUPlugin
+
+    model = MyLightningModule()
+    model = model.half()
+    trainer = pl.Trainer(ipus=8, precision=16)
+    trainer.fit(model)
+
+Advanced IPU Options
+--------------------
+
+IPUs provide further optimizations to speed up training. By using the ``IPUPlugin`` we can set the ``device_iterations``, which controls the number of iterations run directly on the IPU devices before returning to host. Increasing the number of on device iterations will improve throughput as there is less device to host communication required.
+
+.. note::
+
+    When using model parallel, it is a hard requirement to increase the number of device iterations to ensure we fully saturate the devices via micro-batching. see :ref:`ipu-model-parallelism` for more information.
+
+.. code-block:: python
+
+    import pytorch_lightning as pl
+    from pytorch_lightning.plugins import IPUPlugin
+
+    model = MyLightningModule()
+    trainer = pl.Trainer(ipus=8, plugins=IPUPlugin(device_iterations=32))
+    trainer.fit(model)
+
+Note that by default we return the last device iteration loss. You can override this by passing in your own ``poptorch.Options`` and setting the AnchorMode as described in the `poptorch documentation <https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/reference.html#poptorch.Options.anchorMode>`__.
+
+.. code-block:: python
+
+    import poptorch
+    import pytorch_lightning as pl
+    from pytorch_lightning.plugins import IPUPlugin
+
+    model = MyLightningModule()
+    inference_opts = poptorch.Options()
+    inference_opts.deviceIterations(32)
+
+    training_opts = poptorch.Options()
+    training_opts.anchorMode(poptorch.AnchorMode.All)
+    training_opts.deviceIterations(32)
+
+    trainer = Trainer(
+        ipus=8,
+        plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+    )
+    trainer.fit(model)
+
+You can also override all options by passing the ``poptorch.Options`` to the plugin. See `poptorch options documentation <https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/batching.html>`_ for more information.
+
+PopVision Graph Analyser
+------------------------
+
+.. figure:: ../_static/images/accelerator/ipus/profiler.png
+   :alt: PopVision Graph Analyser
+   :width: 500
+
+Lightning supports integration with the `PopVision Graph Analyser Tool <https://docs.graphcore.ai/projects/graphcore-popvision-user-guide/en/latest/popvision.html>`__. This helps to look at utilization of IPU devices and provides helpful metrics during the lifecycle of your trainer. Once you have gained access, The PopVision Graph Analyser Tool can be downloaded via the `GraphCore download website <https://downloads.graphcore.ai/>`__.
+
+Lightning supports dumping all reports to a directory to open using the tool.
+
+.. code-block:: python
+
+    import pytorch_lightning as pl
+    from pytorch_lightning.plugins import IPUPlugin
+
+    model = MyLightningModule()
+    trainer = pl.Trainer(ipus=8, plugins=IPUPlugin(autoreport_dir='report_dir/'))
+    trainer.fit(model)
+
+This will dump all reports to ``report_dir/`` which can then be opened using the Graph Analyser Tool, see `Opening Reports <https://docs.graphcore.ai/projects/graphcore-popvision-user-guide/en/latest/graph/graph.html#opening-reports>`__.
+
+.. _ipu-model-parallelism:
+
+Model Parallelism
+-----------------
+
+Due to the IPU architecture, larger models should be parallelized across IPUs by design. Currently poptorch provides the capabilities via annotations as described in `Parallel Execution <https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/overview.html#id1>`__
+
+Below is an example using the block annotation in a LightningModule.
+
+.. note::
+
+    Currently when using model parallelism, we do not infer the number of IPUs required for you. This is done via the annotations themselves. If you specify 4 different IDs when defining Blocks, this means your model will be split onto 4 different IPUs.
+
+    This is also mutually exclusive with the Trainer flag, i.e. if your model is split onto 2 IPUs and you set ``Trainer(ipus=4)`` this will require 8 IPUs in total; replicating the model 4 times in data parallel.
+
+    When pipelining the model you must also increase the `device_iterations` to ensure full data saturation of the devices data, i.e whilst one device in the model pipeline processes a batch of data, the other device can start on the next batch. For example if the model is split onto 4 IPUs, we require `device_iterations` to be at-least 4.
+
+
+.. code-block:: python
+
+    import pytorch_lightning as pl
+    import poptorch
+
+    class MyLightningModule(pl.LightningModule):
+
+        def __init__(self):
+            super().__init__()
+            # This will place layer1, layer2+layer3, layer4, softmax on different IPUs at runtime.
+            # BeginBlock will start a new id for all layers within this block
+            self.layer1 = poptorch.BeginBlock(torch.nn.Linear(5, 10), ipu_id=0)
+
+            # This layer starts a new block,
+            # adding subsequent layers to this current block at runtime
+            # till the next block has been declared
+            self.layer2 = poptorch.BeginBlock(torch.nn.Linear(10, 5), ipu_id=1)
+            self.layer3 = torch.nn.Linear(5, 5)
+
+            # Create new blocks
+            self.layer4 = poptorch.BeginBlock(torch.nn.Linear(5, 5), ipu_id=2)
+            self.softmax = poptorch.BeginBlock(torch.nn.Softmax(dim=1), ipu_id=3)
+
+        ...
+
+    model = MyLightningModule()
+    trainer = pl.Trainer(ipus=8, plugins=IPUPlugin(device_iterations=20))
+    trainer.fit(model)
+
+
+You can also use the block context manager within the forward function, or any of the step functions.
+
+.. code-block:: python
+
+    import pytorch_lightning as pl
+    import poptorch
+
+    class MyLightningModule(pl.LightningModule):
+
+        def __init__(self):
+            super().__init__()
+            self.layer1 = torch.nn.Linear(5, 10)
+            self.layer2 = torch.nn.Linear(10, 5)
+            self.layer3 = torch.nn.Linear(5, 5)
+            self.layer4 = torch.nn.Linear(5, 5)
+
+            self.act = torch.nn.ReLU()
+            self.softmax = torch.nn.Softmax(dim=1)
+
+        def forward(self, x):
+
+            with poptorch.Block(ipu_id=0):
+                x = self.act(self.layer1(x))
+
+            with poptorch.Block(ipu_id=1):
+                x = self.act(self.layer2(x))
+
+            with poptorch.Block(ipu_id=2):
+                x = self.act(self.layer3(x))
+                x = self.act(self.layer4(x))
+
+            with poptorch.Block(ipu_id=3):
+                x = self.softmax(x)
+            return x
+        ...
+
+    model = MyLightningModule()
+    trainer = pl.Trainer(ipus=8, plugins=IPUPlugin(device_iterations=20))
+    trainer.fit(model)
+
+
+.. _known-limitations:
+
+Known Limitations
+-----------------
+
+Currently there are some known limitations that are being addressed in the near future to make the experience seamless when moving from different devices.
+
+Please see the `MNIST example <https://github.com/PyTorchLightning/pytorch-lightning/blob/master/pl_examples/ipu_examples/mnist.py>`__ which displays most of the limitations and how to overcome them till they are resolved.
+
+* ``self.log`` is not supported in the ``training_step``, ``validation_step``, ``test_step`` or ``predict_step``. This is due to the step function being traced and sent to the IPU devices. We're actively working on fixing this
+* Multiple optimizers are not supported. ``training_step`` only supports returning one loss from the ``training_step`` function as a result
+* Since the step functions are traced, branching logic or any form of primitive values are traced into constants. Be mindful as this could lead to errors in your custom code
+* Clipping gradients is not supported
diff --git a/docs/source/advanced/multi_gpu.rst b/docs/source/advanced/multi_gpu.rst
index 1c465ae314e4f..e645fa7d18404 100644
--- a/docs/source/advanced/multi_gpu.rst
+++ b/docs/source/advanced/multi_gpu.rst
@@ -106,6 +106,23 @@ Note if you use any built in metrics or custom metrics that use the :doc:`Metric
         # Add sync_dist=True to sync logging across all GPU workers
         self.log('test_loss', loss, on_step=True, on_epoch=True, sync_dist=True)
 
+It is possible to perform some computation manually and log the reduced result on rank 0 as follows:
+
+.. testcode::
+
+    def test_step(self, batch, batch_idx):
+        x, y = batch
+        tensors = self(x)
+        return tensors
+
+    def test_epoch_end(self, outputs):
+        mean = torch.mean(self.all_gather(outputs))
+
+        # When logging only on rank 0, don't forget to add
+        # ``rank_zero_only=True`` to avoid deadlocks on synchronization.
+        if self.trainer.is_global_zero:
+            self.log("my_reduced_metric", mean, rank_zero_only=True)
+
 
 Make models pickleable
 ^^^^^^^^^^^^^^^^^^^^^^
@@ -622,38 +639,39 @@ The reason is that the full batch is visible to all GPUs on the node when using
 
 ----------
 
-TorchElastic
---------------
-Lightning supports the use of TorchElastic to enable fault-tolerant and elastic distributed job scheduling. To use it, specify the 'ddp' or 'ddp2' backend and the number of gpus you want to use in the trainer.
+Torch Distributed Elastic
+-------------------------
+Lightning supports the use of Torch Distributed Elastic to enable fault-tolerant and elastic distributed job scheduling. To use it, specify the 'ddp' or 'ddp2' backend and the number of gpus you want to use in the trainer.
 
 .. code-block:: python
 
     Trainer(gpus=8, accelerator='ddp')
 
-
-Following the `TorchElastic Quickstart documentation <https://pytorch.org/elastic/latest/quickstart.html>`_, you then need to start a single-node etcd server on one of the hosts:
+To launch a fault-tolerant job, run the following on all nodes.
 
 .. code-block:: bash
 
-    etcd --enable-v2
-         --listen-client-urls http://0.0.0.0:2379,http://127.0.0.1:4001
-         --advertise-client-urls PUBLIC_HOSTNAME:2379
-
+    python -m torch.distributed.run
+            --nnodes=NUM_NODES
+            --nproc_per_node=TRAINERS_PER_NODE
+            --rdzv_id=JOB_ID
+            --rdzv_backend=c10d
+            --rdzv_endpoint=HOST_NODE_ADDR
+            YOUR_LIGHTNING_TRAINING_SCRIPT.py (--arg1 ... train script args...)
 
-And then launch the elastic job with:
+To launch an elastic job, run the following on at least ``MIN_SIZE`` nodes and at most ``MAX_SIZE`` nodes.
 
 .. code-block:: bash
 
-    python -m torchelastic.distributed.launch
+    python -m torch.distributed.run
             --nnodes=MIN_SIZE:MAX_SIZE
             --nproc_per_node=TRAINERS_PER_NODE
             --rdzv_id=JOB_ID
-            --rdzv_backend=etcd
-            --rdzv_endpoint=ETCD_HOST:ETCD_PORT
+            --rdzv_backend=c10d
+            --rdzv_endpoint=HOST_NODE_ADDR
             YOUR_LIGHTNING_TRAINING_SCRIPT.py (--arg1 ... train script args...)
 
-
-See the official `TorchElastic documentation <https://pytorch.org/elastic>`_ for details
+See the official `Torch Distributed Elastic documentation <https://pytorch.org/docs/stable/distributed.elastic.html>`_ for details
 on installation and more use cases.
 
 ----------
diff --git a/docs/source/advanced/multiple_loaders.rst b/docs/source/advanced/multiple_loaders.rst
index 1a82641953c3c..02d5db143c95c 100644
--- a/docs/source/advanced/multiple_loaders.rst
+++ b/docs/source/advanced/multiple_loaders.rst
@@ -91,23 +91,6 @@ For more details please have a look at :paramref:`~pytorch_lightning.trainer.tra
 Furthermore, Lightning also supports that nested lists and dicts (or a combination) can
 be returned.
 
-.. testcode::
-
-    class LitModel(LightningModule):
-
-        def train_dataloader(self):
-
-            loader_a = torch.utils.data.DataLoader(range(8), batch_size=4)
-            loader_b = torch.utils.data.DataLoader(range(16), batch_size=2)
-
-            return {'a': loader_a, 'b': loader_b}
-
-        def training_step(self, batch, batch_idx):
-            # access a dictionnary with a batch from each dataloader
-            batch_a = batch["a"]
-            batch_b = batch["b"]
-
-
 .. testcode::
 
     class LitModel(LightningModule):
diff --git a/docs/source/api_references.rst b/docs/source/api_references.rst
index f73a8954f8764..3f9e2c2575cc2 100644
--- a/docs/source/api_references.rst
+++ b/docs/source/api_references.rst
@@ -89,8 +89,6 @@ Training Type Plugins
     DDPSpawnPlugin
     DeepSpeedPlugin
     HorovodPlugin
-    RPCPlugin
-    RPCSequentialPlugin
     SingleTPUPlugin
     TPUSpawnPlugin
 
@@ -137,8 +135,15 @@ Profiler API
 .. autosummary::
     :toctree: api
     :nosignatures:
+    :template: classtemplate.rst
+
+    AbstractProfiler
+    AdvancedProfiler
+    BaseProfiler
+    PassThroughProfiler
+    PyTorchProfiler
+    SimpleProfiler
 
-    profilers
 
 Trainer API
 -----------
diff --git a/docs/source/benchmarking/performance.rst b/docs/source/benchmarking/performance.rst
deleted file mode 100644
index 6e2b546fb275f..0000000000000
--- a/docs/source/benchmarking/performance.rst
+++ /dev/null
@@ -1,183 +0,0 @@
-.. _performance:
-
-Fast performance tips
-=====================
-Lightning builds in all the micro-optimizations we can find to increase your performance.
-But we can only automate so much.
-
-Here are some additional things you can do to increase your performance.
-
-----------
-
-Dataloaders
------------
-When building your DataLoader set ``num_workers > 0`` and ``pin_memory=True`` (only for GPUs).
-
-.. code-block:: python
-
-    Dataloader(dataset, num_workers=8, pin_memory=True)
-
-num_workers
-^^^^^^^^^^^
-The question of how many ``num_workers`` is tricky. Here's a summary of
-some references, [`1 <https://discuss.pytorch.org/t/guidelines-for-assigning-num-workers-to-dataloader/813>`_], and our suggestions.
-
-1. ``num_workers=0`` means ONLY the main process will load batches (that can be a bottleneck).
-2. ``num_workers=1`` means ONLY one worker (just not the main process) will load data but it will still be slow.
-3. The ``num_workers`` depends on the batch size and your machine.
-4. A general place to start is to set ``num_workers`` equal to the number of CPUs on that machine.
-
-.. warning:: Increasing ``num_workers`` will ALSO increase your CPU memory consumption.
-
-The best thing to do is to increase the ``num_workers`` slowly and stop once you see no more improvement in your training speed.
-
-Spawn
-^^^^^
-When using ``accelerator=ddp_spawn`` (the ddp default) or TPU training, the way multiple GPUs/TPU cores are used is by calling ``.spawn()`` under the hood.
-The problem is that PyTorch has issues with ``num_workers > 0`` when using ``.spawn()``. For this reason we recommend you
-use ``accelerator=ddp`` so you can increase the ``num_workers``, however your script has to be callable like so:
-
-.. code-block:: bash
-
-    python my_program.py --gpus X
-
-----------
-
-.item(), .numpy(), .cpu()
--------------------------
-Don't call ``.item()`` anywhere in your code. Use ``.detach()`` instead to remove the connected graph calls. Lightning
-takes a great deal of care to be optimized for this.
-
-----------
-
-empty_cache()
--------------
-Don't call this unnecessarily! Every time you call this ALL your GPUs have to wait to sync.
-
-----------
-
-Construct tensors directly on the device
-----------------------------------------
-LightningModules know what device they are on! Construct tensors on the device directly to avoid CPU->Device transfer.
-
-.. code-block:: python
-
-    # bad
-    t = torch.rand(2, 2).cuda()
-
-    # good (self is LightningModule)
-    t = torch.rand(2, 2, device=self.device)
-
-
-For tensors that need to be model attributes, it is best practice to register them as buffers in the modules's
-``__init__`` method:
-
-.. code-block:: python
-
-    # bad
-    self.t = torch.rand(2, 2, device=self.device)
-
-    # good
-    self.register_buffer("t", torch.rand(2, 2))
-
-----------
-
-Use DDP not DP
---------------
-DP performs three GPU transfers for EVERY batch:
-
-1. Copy model to device.
-2. Copy data to device.
-3. Copy outputs of each device back to master.
-
-|
-
-Whereas DDP only performs 1 transfer to sync gradients. Because of this, DDP is MUCH faster than DP.
-
-When using DDP set find_unused_parameters=False
------------------------------------------------
-
-By default we have enabled find unused parameters to True. This is for compatibility issues that have arisen in the past (see the `discussion <https://github.com/PyTorchLightning/pytorch-lightning/discussions/6219>`_ for more information).
-This by default comes with a performance hit, and can be disabled in most cases.
-
-.. code-block:: python
-
-    from pytorch_lightning.plugins import DDPPlugin
-
-    trainer = pl.Trainer(
-        gpus=2,
-        plugins=DDPPlugin(find_unused_parameters=False),
-    )
-
-----------
-
-16-bit precision
-----------------
-Use 16-bit to decrease the memory consumption (and thus increase your batch size). On certain GPUs (V100s, 2080tis), 16-bit calculations are also faster.
-However, know that 16-bit and multi-processing (any DDP) can have issues. Here are some common problems.
-
-1. `CUDA error: an illegal memory access was encountered <https://github.com/pytorch/pytorch/issues/21819>`_.
-    The solution is likely setting a specific CUDA, CUDNN, PyTorch version combination.
-2. ``CUDA error: device-side assert triggered``. This is a general catch-all error. To see the actual error run your script like so:
-
-.. code-block:: bash
-
-    # won't see what the error is
-    python main.py
-
-    # will see what the error is
-    CUDA_LAUNCH_BLOCKING=1 python main.py
-
-.. tip:: We also recommend using 16-bit native found in PyTorch 1.6. Just install this version and Lightning will automatically use it.
-
-----------
-
-Advanced GPU Optimizations
---------------------------
-
-When training on single or multiple GPU machines, Lightning offers a host of advanced optimizations to improve throughput, memory efficiency, and model scaling.
-Refer to :doc:`Advanced GPU Optimized Training for more details <../advanced/advanced_gpu>`.
-
-----------
-
-Preload Data Into RAM
----------------------
-
-When your training or preprocessing requires many operations to be performed on entire dataset(s) it can
-sometimes be beneficial to store all data in RAM given there is enough space.
-However, loading all data at the beginning of the training script has the disadvantage that it can take a long
-time and hence it slows down the development process. Another downside is that in multiprocessing (e.g. DDP)
-the data would get copied in each process.
-One can overcome these problems by copying the data into RAM in advance.
-Most UNIX-based operating systems provide direct access to tmpfs through a mount point typically named ``/dev/shm``.
-
-0.  Increase shared memory if necessary. Refer to the documentation of your OS how to do this.
-
-1.  Copy training data to shared memory:
-
-    .. code-block:: bash
-
-        cp -r /path/to/data/on/disk /dev/shm/
-
-2.  Refer to the new data root in your script or command line arguments:
-
-    .. code-block:: python
-
-        datamodule = MyDataModule(data_root="/dev/shm/my_data")
-
-----------
-
-Zero Grad ``set_to_none=True``
-------------------------------
-
-In order to modestly improve performance, you can override :meth:`~pytorch_lightning.core.lightning.LightningModule.optimizer_zero_grad`.
-
-For a more detailed explanation of pros / cons of this technique,
-read `this <https://pytorch.org/docs/master/optim.html#torch.optim.Optimizer.zero_grad>`_ documentation by the PyTorch team.
-
-.. testcode::
-
-    class Model(LightningModule):
-
-        def optimizer_zero_grad(self, epoch, batch_idx, optimizer, optimizer_idx):
-            optimizer.zero_grad(set_to_none=True)
diff --git a/docs/source/clouds/cloud_training.rst b/docs/source/clouds/cloud_training.rst
index a2171f102fa94..c608ad2c1063c 100644
--- a/docs/source/clouds/cloud_training.rst
+++ b/docs/source/clouds/cloud_training.rst
@@ -1,31 +1,42 @@
 .. _grid:
 
-################
-AWS/GCP training
-################
+##############
+Cloud Training
+##############
+
 Lightning has a native solution for training on AWS/GCP at scale.
 Go to `grid.ai <https://www.grid.ai/>`_ to create an account.
 
-We've designed Grid to work for Lightning users without needing to make ANY changes to their code.
+We've designed Grid to work seamlessly with Lightning, without needing to make ANY code changes.
 
-To use grid, take your regular command:
+To use Grid, replace ``python`` in your regular command:
 
 .. code-block:: bash
 
     python my_model.py --learning_rate 1e-6 --layers 2 --gpus 4
 
-And change it to use the grid train command:
+To use the ``grid run`` command:
 
 .. code-block:: bash
 
-    grid train --grid_gpus 4 my_model.py --learning_rate 'uniform(1e-6, 1e-1, 20)' --layers '[2, 4, 8, 16]'
+    grid run --gpus 4 my_model.py --learning_rate 'uniform(1e-6, 1e-1, 20)' --layers '[2, 4, 8, 16]'
 
-The above command will launch (20 * 4) experiments each running on 4 GPUs (320 GPUs!) - by making ZERO changes to
+The above command will launch (20 * 4) experiments, each running on 4 GPUs (320 GPUs!) - by making ZERO changes to
 your code.
 
-The `uniform` command is part of our new expressive syntax which lets you construct hyperparameter combinations
+The ``uniform`` command is part of our new expressive syntax which lets you construct hyperparameter combinations
 using over 20+ distributions, lists, etc. Of course, you can also configure all of this using yamls which
 can be dynamically assembled at runtime.
 
-
-.. hint:: Grid supports the search strategy of your choice! (and much more than just sweeps)
+***************
+Grid Highlights
+***************
+
+* Run any public or private repository with Grid, or use an interactive session.
+* Grid allocates all the machines and GPUs you need on demand, so you only pay for what you need when you need it.
+* Grid handles all the other parts of developing and training at scale: artifacts, logs, metrics, etc.
+* Grid works with the experiment manager of your choice, no code changes needed.
+* Use Grid Datastores- high-performance, low-latency, versioned datasets.
+* Attach Datastores to a Run so you don't have to keep downloading datasets
+* Use Grid Sessions for fast prototyping on a cloud machine of your choice
+* For more information check the `grid documentation <https://docs.grid.ai/>`_
diff --git a/docs/source/common/fast_training.rst b/docs/source/common/fast_training.rst
deleted file mode 100644
index 2216d234836f2..0000000000000
--- a/docs/source/common/fast_training.rst
+++ /dev/null
@@ -1,82 +0,0 @@
-.. testsetup:: *
-
-    from pytorch_lightning.trainer.trainer import Trainer
-
-.. _fast_training:
-
-Fast Training
-=============
-There are multiple options to speed up different parts of the training by choosing to train
-on a subset of data. This could be done for speed or debugging purposes.
-
-----------------
-
-Check validation every n epochs
--------------------------------
-If you have a small dataset you might want to check validation every n epochs
-
-.. testcode::
-
-    # DEFAULT
-    trainer = Trainer(check_val_every_n_epoch=1)
-
-----------------
-
-Force training for min or max epochs
-------------------------------------
-It can be useful to force training for a minimum number of epochs or limit to a max number.
-
-.. seealso::
-    :class:`~pytorch_lightning.trainer.trainer.Trainer`
-
-.. testcode::
-
-    # DEFAULT
-    trainer = Trainer(min_epochs=1, max_epochs=1000)
-
-----------------
-
-Set validation check frequency within 1 training epoch
-------------------------------------------------------
-For large datasets it's often desirable to check validation multiple times within a training loop.
-Pass in a float to check that often within 1 training epoch. Pass in an int `k` to check every `k` training batches.
-Must use an `int` if using an `IterableDataset`.
-
-.. testcode::
-
-    # DEFAULT
-    trainer = Trainer(val_check_interval=0.95)
-
-    # check every .25 of an epoch
-    trainer = Trainer(val_check_interval=0.25)
-
-    # check every 100 train batches (ie: for `IterableDatasets` or fixed frequency)
-    trainer = Trainer(val_check_interval=100)
-
-----------------
-
-Use data subset for training, validation, and test
---------------------------------------------------
-If you don't want to check 100% of the training/validation/test set (for debugging or if it's huge), set these flags.
-
-.. testcode::
-
-    # DEFAULT
-    trainer = Trainer(
-        limit_train_batches=1.0,
-        limit_val_batches=1.0,
-        limit_test_batches=1.0
-    )
-
-    # check 10%, 20%, 30% only, respectively for training, validation and test set
-    trainer = Trainer(
-        limit_train_batches=0.1,
-        limit_val_batches=0.2,
-        limit_test_batches=0.3
-    )
-
-If you also pass ``shuffle=True`` to the dataloader, a different random subset of your dataset will be used for each epoch; otherwise the same subset will be used for all epochs.
-
-.. note:: ``limit_train_batches``, ``limit_val_batches`` and ``limit_test_batches`` will be overwritten by ``overfit_batches`` if ``overfit_batches`` > 0. ``limit_val_batches`` will be ignored if ``fast_dev_run=True``.
-
-.. note:: If you set ``limit_val_batches=0``, validation will be disabled.
diff --git a/docs/source/common/lightning_cli.rst b/docs/source/common/lightning_cli.rst
index c16e2e3b733fe..2e4b3f356f7c4 100644
--- a/docs/source/common/lightning_cli.rst
+++ b/docs/source/common/lightning_cli.rst
@@ -1,6 +1,7 @@
 .. testsetup:: *
     :skipif: not _JSONARGPARSE_AVAILABLE
 
+    import torch
     from unittest import mock
     from typing import List
     from pytorch_lightning.core.lightning import LightningModule
@@ -19,9 +20,13 @@
         ):
             pass
 
+    class MyClassModel(LightningModule):
+        def __init__(self, num_classes: int):
+            pass
+
     class MyDataModule(LightningDataModule):
         def __init__(self, batch_size: int = 8):
-            pass
+            self.num_classes = 5
 
     def send_email(address, message):
         pass
@@ -88,6 +93,8 @@ practice to create a configuration file and provide this to the tool. A way to d
     nano config.yaml
     # Run training using created configuration
     python trainer.py --config config.yaml
+    # The config JSON can also be passed directly
+    python trainer.py --config '{trainer: {fast_dev_run: True}}'
 
 The instantiation of the :class:`~pytorch_lightning.utilities.cli.LightningCLI` class takes care of parsing command line
 and config file options, instantiating the classes, setting up a callback to save the config in the log directory and
@@ -372,6 +379,47 @@ Note that the config object :code:`self.config` is a dictionary whose keys are g
 has the same structure as the yaml format described previously. This means for instance that the parameters used for
 instantiating the trainer class can be found in :code:`self.config['trainer']`.
 
+.. tip::
+
+    Have a look at the :class:`~pytorch_lightning.utilities.cli.LightningCLI` class API reference to learn about other
+    methods that can be extended to customize a CLI.
+
+
+Configurable callbacks
+^^^^^^^^^^^^^^^^^^^^^^
+
+As explained previously, any callback can be added by including it in the config via :code:`class_path` and
+:code:`init_args` entries. However, there are other cases in which a callback should always be present and be
+configurable. This can be implemented as follows:
+
+.. testcode::
+
+    from pytorch_lightning.callbacks import EarlyStopping
+    from pytorch_lightning.utilities.cli import LightningCLI
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.add_lightning_class_args(EarlyStopping, 'my_early_stopping')
+            parser.set_defaults({'my_early_stopping.patience': 5})
+
+    cli = MyLightningCLI(MyModel)
+
+To change the configuration of the :code:`EarlyStopping` in the config it would be:
+
+.. code-block:: yaml
+
+    model:
+      ...
+    trainer:
+      ...
+    my_early_stopping:
+      patience: 5
+
+
+Argument linking
+^^^^^^^^^^^^^^^^
+
 Another case in which it might be desired to extend :class:`~pytorch_lightning.utilities.cli.LightningCLI` is that the
 model and data module depend on a common parameter. For example in some cases both classes require to know the
 :code:`batch_size`. It is a burden and error prone giving the same value twice in a config file. To avoid this the
@@ -402,13 +450,138 @@ The linking of arguments is observed in the help of the tool, which for this exa
         model.batch_size <-- data.batch_size
                               Number of samples in a batch (type: int)
 
+Sometimes a parameter value is only available after class instantiation. An example could be that your model requires the number of classes to instantiate its fully connected layer (for a classification task) but the value is not available until the data module has been instantiated.
+The code below illustrates how to address this.
+
+.. testcode::
+
+    from pytorch_lightning.utilities.cli import LightningCLI
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.link_arguments('data.num_classes', 'model.num_classes', apply_on='instantiate')
+
+    cli = MyLightningCLI(MyClassModel, MyDataModule)
+
+Instantiation links are used to automatically determine the order of instantiation, in this case data first.
+
 .. tip::
 
     The linking of arguments can be used for more complex cases. For example to derive a value via a function that takes
     multiple settings as input. For more details have a look at the API of `link_arguments
     <https://jsonargparse.readthedocs.io/en/stable/#jsonargparse.core.ArgumentParser.link_arguments>`_.
 
-.. tip::
 
-    Have a look at the :class:`~pytorch_lightning.utilities.cli.LightningCLI` class API reference to learn about other
-    methods that can be extended to customize a CLI.
+Optimizers and learning rate schedulers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Optimizers and learning rate schedulers can also be made configurable. The most common case is when a model only has a
+single optimizer and optionally a single learning rate scheduler. In this case the model's
+:class:`~pytorch_lightning.core.lightning.LightningModule` could be left without implementing the
+:code:`configure_optimizers` method since it is normally always the same and just adds boilerplate. The following code
+snippet shows how to implement it:
+
+.. testcode::
+
+    import torch
+    from pytorch_lightning.utilities.cli import LightningCLI
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.add_optimizer_args(torch.optim.Adam)
+            parser.add_lr_scheduler_args(torch.optim.lr_scheduler.ExponentialLR)
+
+    cli = MyLightningCLI(MyModel)
+
+With this the :code:`configure_optimizers` method is automatically implemented and in the config the :code:`optimizer`
+and :code:`lr_scheduler` groups would accept all of the options for the given classes, in this example :code:`Adam` and
+:code:`ExponentialLR`. Therefore, the config file would be structured like:
+
+.. code-block:: yaml
+
+    optimizer:
+      lr: 0.01
+    lr_scheduler:
+      gamma: 0.2
+    model:
+      ...
+    trainer:
+      ...
+
+And any of these arguments could be passed directly through command line. For example:
+
+.. code-block:: bash
+
+    $ python train.py --optimizer.lr=0.01 --lr_scheduler.gamma=0.2
+
+There is also the possibility of selecting among multiple classes by giving them as a tuple. For example:
+
+.. testcode::
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.add_optimizer_args((torch.optim.SGD, torch.optim.Adam))
+
+In this case in the config the :code:`optimizer` group instead of having directly init settings, it should specify
+:code:`class_path` and optionally :code:`init_args`. Sub-classes of the classes in the tuple would also be accepted.
+A corresponding example of the config file would be:
+
+.. code-block:: yaml
+
+    optimizer:
+      class_path: torch.optim.Adam
+      init_args:
+        lr: 0.01
+    model:
+      ...
+    trainer:
+      ...
+
+And the same through command line:
+
+.. code-block:: bash
+
+    $ python train.py --optimizer='{class_path: torch.optim.Adam, init_args: {lr: 0.01}}'
+
+The automatic implementation of :code:`configure_optimizers` can be disabled by linking the configuration group. An
+example can be :code:`ReduceLROnPlateau` which requires to specify a monitor. This would be:
+
+.. testcode::
+
+    from pytorch_lightning.utilities.cli import instantiate_class, LightningCLI
+
+    class MyModel(LightningModule):
+
+        def __init__(self, optimizer_init: dict, lr_scheduler_init: dict):
+            super().__init__()
+            self.optimizer_init = optimizer_init
+            self.lr_scheduler_init = lr_scheduler_init
+
+        def configure_optimizers(self):
+            optimizer = instantiate_class(self.parameters(), self.optimizer_init)
+            scheduler = instantiate_class(optimizer, self.lr_scheduler_init)
+            return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "metric_to_track"}
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.add_optimizer_args(
+                torch.optim.Adam,
+                link_to='model.optimizer_init',
+            )
+            parser.add_lr_scheduler_args(
+                torch.optim.lr_scheduler.ReduceLROnPlateau,
+                link_to='model.lr_scheduler_init',
+            )
+
+    cli = MyLightningCLI(MyModel)
+
+For both possibilities of using :meth:`pytorch_lightning.utilities.cli.LightningArgumentParser.add_optimizer_args` with
+a single class or a tuple of classes, the value given to :code:`optimizer_init` will always be a dictionary including
+:code:`class_path` and :code:`init_args` entries. The function
+:func:`~pytorch_lightning.utilities.cli.instantiate_class` takes care of importing the class defined in
+:code:`class_path` and instantiating it using some positional arguments, in this case :code:`self.parameters()`, and the
+:code:`init_args`. Any number of optimizers and learning rate schedulers can be added when using :code:`link_to`.
diff --git a/docs/source/common/lightning_module.rst b/docs/source/common/lightning_module.rst
index 295d231ca5ac3..6043eab649ebf 100644
--- a/docs/source/common/lightning_module.rst
+++ b/docs/source/common/lightning_module.rst
@@ -279,11 +279,16 @@ In this case, implement the `training_step_end` method
          return {'loss': loss, 'pred': pred}
 
      def training_step_end(self, batch_parts):
-         gpu_0_prediction = batch_parts[0]['pred']
-         gpu_1_prediction = batch_parts[1]['pred']
+         # predictions from each GPU
+         predictions = batch_parts['pred']
+         # losses from each GPU
+         losses = batch_parts['loss']
+
+         gpu_0_prediction = predictions[0]
+         gpu_1_prediction = predictions[1]
 
          # do something with both outputs
-         return (batch_parts[0]['loss'] + batch_parts[1]['loss']) / 2
+         return (losses[0] + losses[1]) / 2
 
      def training_epoch_end(self, training_step_outputs):
         for out in training_step_outputs:
@@ -383,11 +388,16 @@ In this case, implement the `validation_step_end` method
          return {'loss': loss, 'pred': pred}
 
      def validation_step_end(self, batch_parts):
-         gpu_0_prediction = batch_parts.pred[0]['pred']
-         gpu_1_prediction = batch_parts.pred[1]['pred']
+         # predictions from each GPU
+         predictions = batch_parts['pred']
+         # losses from each GPU
+         losses = batch_parts['loss']
+
+         gpu_0_prediction = predictions[0]
+         gpu_1_prediction = predictions[1]
 
          # do something with both outputs
-         return (batch_parts[0]['loss'] + batch_parts[1]['loss']) / 2
+         return (losses[0] + losses[1]) / 2
 
      def validation_epoch_end(self, validation_step_outputs):
         for out in validation_step_outputs:
@@ -441,12 +451,12 @@ There are two ways to call `test()`:
     trainer.fit(model)
 
     # automatically auto-loads the best weights
-    trainer.test(test_dataloaders=test_dataloader)
+    trainer.test(dataloaders=test_dataloader)
 
     # or call with pretrained model
     model = MyLightningModule.load_from_checkpoint(PATH)
     trainer = Trainer()
-    trainer.test(model, test_dataloaders=test_dataloader)
+    trainer.test(model, dataloaders=test_dataloader)
 
 ----------
 
@@ -489,6 +499,14 @@ For research, LightningModules are best structured as systems.
             reconstruction_loss = nn.functional.mse_loss(recons, x)
             self.log('val_reconstruction', reconstruction_loss)
 
+         def predict_step(self, batch, batch_idx, dataloader_idx):
+            x, _ = batch
+
+            # encode
+            # for predictions, we could return the embedding or the reconstruction or both based on our need.
+            x = x.view(x.size(0), -1)
+            return self.encoder(x)
+
          def configure_optimizers(self):
             return torch.optim.Adam(self.parameters(), lr=0.0002)
 
@@ -510,6 +528,7 @@ The methods above are part of the lightning interface:
 - training_step
 - validation_step
 - test_step
+- predict_step
 - configure_optimizers
 
 Note that in this case, the train loop and val loop are exactly the same. We can of course reuse this code.
@@ -554,12 +573,20 @@ Inference in research
 ^^^^^^^^^^^^^^^^^^^^^
 In the case where we want to perform inference with the system we can add a `forward` method to the LightningModule.
 
+.. note:: When using forward, you are responsible to call :func:`~torch.nn.Module.eval` and use the :func:`~torch.no_grad` context manager.
+
 .. code-block:: python
 
     class Autoencoder(pl.LightningModule):
+
         def forward(self, x):
             return self.decoder(x)
 
+    model = Autoencoder()
+    model.eval()
+    with torch.no_grad():
+        reconstruction = model(embedding)
+
 The advantage of adding a forward is that in complex systems, you can do a much more involved inference procedure,
 such as text generation:
 
@@ -575,6 +602,25 @@ such as text generation:
                 ...
             return decoded
 
+In the case where you want to scale your inference, you should be using
+:meth:`~pytorch_lightning.core.lightning.LightningModule.predict_step`.
+
+.. code-block:: python
+
+    class Autoencoder(pl.LightningModule):
+
+        def forward(self, x):
+            return self.decoder(x)
+
+        def predict_step(self, batch, batch_idx, dataloader_idx = None)
+            # this calls forward
+            return self(batch)
+
+    data_module = ...
+    model = Autoencoder()
+    trainer = Trainer(gpus=2)
+    trainer.predict(model, data_module)
+
 Inference in production
 ^^^^^^^^^^^^^^^^^^^^^^^
 For cases like production, you might want to iterate different models inside a LightningModule.
@@ -586,33 +632,41 @@ For cases like production, you might want to iterate different models inside a L
 
     class ClassificationTask(pl.LightningModule):
 
-         def __init__(self, model):
-             super().__init__()
-             self.model = model
-
-         def training_step(self, batch, batch_idx):
-             x, y = batch
-             y_hat = self.model(x)
-             loss = F.cross_entropy(y_hat, y)
-             return loss
+        def __init__(self, model):
+            super().__init__()
+            self.model = model
 
-         def validation_step(self, batch, batch_idx):
+        def training_step(self, batch, batch_idx):
             x, y = batch
             y_hat = self.model(x)
             loss = F.cross_entropy(y_hat, y)
-            acc = FM.accuracy(y_hat, y)
+            return loss
 
+        def validation_step(self, batch, batch_idx):
+            loss, acc = self._shared_eval_step(batch, batch_idx)
             metrics = {'val_acc': acc, 'val_loss': loss}
             self.log_dict(metrics)
             return metrics
 
-         def test_step(self, batch, batch_idx):
-            metrics = self.validation_step(batch, batch_idx)
-            metrics = {'test_acc': metrics['val_acc'], 'test_loss': metrics['val_loss']}
+        def test_step(self, batch, batch_idx):
+            loss, acc = self._shared_eval_step(batch, batch_idx)
+            metrics = {'test_acc': acc, 'test_loss': loss}
             self.log_dict(metrics)
+            return metrics
 
-         def configure_optimizers(self):
-             return torch.optim.Adam(self.model.parameters(), lr=0.02)
+        def _shared_eval_step(self, batch, batch_idx):
+            x, y = batch
+            y_hat = self.model(x)
+            loss = F.cross_entropy(y_hat, y)
+            acc = FM.accuracy(y_hat, y)
+            return loss, acc
+
+        def predict_step(self, batch, batch_idx, dataloader_idx):
+            x, y = batch
+            y_hat = self.model(x)
+
+        def configure_optimizers(self):
+            return torch.optim.Adam(self.model.parameters(), lr=0.02)
 
 Then pass in any arbitrary model to be fit with this task
 
@@ -1009,7 +1063,11 @@ truncated_bptt_steps
 ^^^^^^^^^^^^^^^^^^^^
 
 Truncated back prop breaks performs backprop every k steps of
-a much longer sequence.
+a much longer sequence. This is made possible by passing training batches
+splitted along the time-dimensions into splits of size k to the
+``training_step``. In order to keep the same forward propagation behavior, all
+hidden states should be kept in-between each time-dimension split.
+
 
 If this is enabled, your batches will automatically get truncated
 and the trainer will apply Truncated Backprop to it.
@@ -1026,23 +1084,40 @@ recurrent network trajectories."
 
     class MyModel(LightningModule):
 
-        def __init__(self):
+        def __init__(self, input_size, hidden_size, num_layers):
             super().__init__()
+            # batch_first has to be set to True
+            self.lstm = nn.LSTM(
+                input_size=input_size,
+                hidden_size=hidden_size,
+                num_layers=num_layers,
+                batch_first=True,
+            )
+
+            ...
+
             # Important: This property activates truncated backpropagation through time
             # Setting this value to 2 splits the batch into sequences of size 2
             self.truncated_bptt_steps = 2
 
         # Truncated back-propagation through time
         def training_step(self, batch, batch_idx, hiddens):
+            x, y = batch
+
             # the training step must be updated to accept a ``hiddens`` argument
             # hiddens are the hiddens from the previous truncated backprop step
-            out, hiddens = self.lstm(data, hiddens)
+            out, hiddens = self.lstm(x, hiddens)
+
+            ...
+
             return {
                 "loss": ...,
                 "hiddens": hiddens
             }
 
-Lightning takes care to split your batch along the time-dimension.
+Lightning takes care of splitting your batch along the time-dimension. It is
+assumed to be the second dimension of your batches. Therefore, in the
+example above we have set ``batch_first=True``.
 
 .. code-block:: python
 
@@ -1064,7 +1139,9 @@ override :meth:`pytorch_lightning.core.LightningModule.tbptt_split_batch`:
 
 Hooks
 ^^^^^
-This is the pseudocode to describe how all the hooks are called during a call to ``.fit()``.
+This is the pseudocode to describe the structure of :meth:`~pytorch_lightning.trainer.Trainer.fit`.
+The inputs and outputs of each function are not represented for simplicity. Please check each function's API reference
+for more information.
 
 .. code-block:: python
 
@@ -1075,36 +1152,41 @@ This is the pseudocode to describe how all the hooks are called during a call to
 
         configure_callbacks()
 
-        on_fit_start()
-
-        for gpu/tpu in gpu/tpus:
-            train_on_device(model.copy())
-
-        on_fit_end()
+        with parallel(devices):
+            # devices can be GPUs, TPUs, ...
+            train_on_device(model)
 
     def train_on_device(model):
-        # setup is called PER DEVICE
-        setup()
+        # called PER DEVICE
+        on_fit_start()
+        setup('fit')
         configure_optimizers()
+
         on_pretrain_routine_start()
+        on_pretrain_routine_end()
+
+        # the sanity check runs here
 
+        on_train_start()
         for epoch in epochs:
             train_loop()
+        on_train_end()
 
-        teardown()
+        on_fit_end()
+        teardown('fit')
 
     def train_loop():
         on_epoch_start()
         on_train_epoch_start()
-        train_outs = []
-        for train_batch in train_dataloader():
+
+        for batch in train_dataloader():
             on_train_batch_start()
 
-            # ----- train_step methods -------
-            out = training_step(batch)
-            train_outs.append(out)
+            on_before_batch_transfer()
+            transfer_batch_to_device()
+            on_after_batch_transfer()
 
-            loss = out.loss
+            training_step()
 
             on_before_zero_grad()
             optimizer_zero_grad()
@@ -1114,38 +1196,42 @@ This is the pseudocode to describe how all the hooks are called during a call to
 
             optimizer_step()
 
-            on_train_batch_end(out)
+            on_train_batch_end()
 
             if should_check_val:
                 val_loop()
-
         # end training epoch
-        training_epoch_end(outs)
-        on_train_epoch_end(outs)
+        training_epoch_end()
+
+        on_train_epoch_end()
         on_epoch_end()
 
     def val_loop():
-        model.eval()
+        on_validation_model_eval()  # calls `model.eval()`
         torch.set_grad_enabled(False)
 
+        on_validation_start()
         on_epoch_start()
         on_validation_epoch_start()
-        val_outs = []
-        for val_batch in val_dataloader():
+
+        for batch in val_dataloader():
             on_validation_batch_start()
 
-            # -------- val step methods -------
-            out = validation_step(val_batch)
-            val_outs.append(out)
+            on_before_batch_transfer()
+            transfer_batch_to_device()
+            on_after_batch_transfer()
+
+            validation_step()
 
-            on_validation_batch_end(out)
+            on_validation_batch_end()
+        validation_epoch_end()
 
-        validation_epoch_end(val_outs)
         on_validation_epoch_end()
         on_epoch_end()
+        on_validation_end()
 
         # set up for train
-        model.train()
+        on_validation_model_train()  # calls `model.train()`
         torch.set_grad_enabled(True)
 
 backward
@@ -1256,6 +1342,12 @@ on_test_epoch_end
 .. automethod:: pytorch_lightning.core.hooks.ModelHooks.on_test_epoch_end
     :noindex:
 
+on_test_start
+~~~~~~~~~~~~~
+
+.. automethod:: pytorch_lightning.core.hooks.ModelHooks.on_test_start
+    :noindex:
+
 on_test_end
 ~~~~~~~~~~~
 
@@ -1423,3 +1515,15 @@ on_after_batch_transfer
 
 .. automethod:: pytorch_lightning.core.hooks.DataHooks.on_after_batch_transfer
     :noindex:
+
+add_to_queue
+~~~~~~~~~~~~
+
+.. automethod:: pytorch_lightning.core.lightning.LightningModule.add_to_queue
+    :noindex:
+
+get_from_queue
+~~~~~~~~~~~~~~
+
+.. automethod:: pytorch_lightning.core.lightning.LightningModule.get_from_queue
+    :noindex:
diff --git a/docs/source/common/loggers.rst b/docs/source/common/loggers.rst
index c6c5f0d8653c7..5b1f13dbf4b8c 100644
--- a/docs/source/common/loggers.rst
+++ b/docs/source/common/loggers.rst
@@ -202,7 +202,7 @@ The :class:`~pytorch_lightning.loggers.TestTubeLogger` is available anywhere exc
 Weights and Biases
 ==================
 
-`Weights and Biases <https://www.wandb.com/>`_ is a third-party logger.
+`Weights and Biases <https://docs.wandb.ai/integrations/lightning/>`_ is a third-party logger.
 To use :class:`~pytorch_lightning.loggers.WandbLogger` as your logger do the following.
 First, install the package:
 
@@ -215,9 +215,14 @@ Then configure the logger and pass it to the :class:`~pytorch_lightning.trainer.
 .. code-block:: python
 
     from pytorch_lightning.loggers import WandbLogger
-    wandb_logger = WandbLogger(offline=True)
+
+    # instrument experiment with W&B
+    wandb_logger = WandbLogger(project='MNIST', log_model='all')
     trainer = Trainer(logger=wandb_logger)
 
+    # log gradients and model topology
+    wandb_logger.watch(model)
+
 The :class:`~pytorch_lightning.loggers.WandbLogger` is available anywhere except ``__init__`` in your
 :class:`~pytorch_lightning.core.lightning.LightningModule`.
 
@@ -226,8 +231,8 @@ The :class:`~pytorch_lightning.loggers.WandbLogger` is available anywhere except
     class MyModule(LightningModule):
         def any_lightning_module_function_or_hook(self):
             some_img = fake_image()
-            self.logger.experiment.log({
-                 "generated_images": [wandb.Image(some_img, caption="...")]
+            self.log({
+                "generated_images": [wandb.Image(some_img, caption="...")]
             })
 
 .. seealso::
diff --git a/docs/source/common/optimizers.rst b/docs/source/common/optimizers.rst
index 12e9c6925e7fd..cde203fdd193e 100644
--- a/docs/source/common/optimizers.rst
+++ b/docs/source/common/optimizers.rst
@@ -232,88 +232,6 @@ If you want to call ``lr_scheduler.step()`` every ``n`` steps/epochs, do the fol
 
 -----
 
-Improve training speed with model toggling
-------------------------------------------
-Toggling models can improve your training speed when performing gradient accumulation with multiple optimizers in a
-distributed setting.
-
-Here is an explanation of what it does:
-
-* Considering the current optimizer as A and all other optimizers as B.
-* Toggling means that all parameters from B exclusive to A will have their ``requires_grad`` attribute set to ``False``.
-* Their original state will be restored when exiting the context manager.
-
-When performing gradient accumulation, there is no need to perform grad synchronization during the accumulation phase.
-Setting ``sync_grad`` to ``False`` will block this synchronization and improve your training speed.
-
-:class:`~pytorch_lightning.core.optimizer.LightningOptimizer` provides a
-:meth:`~pytorch_lightning.core.optimizer.LightningOptimizer.toggle_model` function as a
-:func:`contextlib.contextmanager` for advanced users.
-
-Here is an example for advanced use-case.
-
-.. testcode:: python
-
-    # Scenario for a GAN with gradient accumulation every 2 batches and optimized for multiple gpus.
-    class SimpleGAN(LightningModule):
-
-        def __init__(self):
-            super().__init__()
-            self.automatic_optimization = False
-
-        def training_step(self, batch, batch_idx):
-            # Implementation follows the PyTorch tutorial:
-            # https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html
-            g_opt, d_opt = self.optimizers()
-
-            X, _ = batch
-            X.requires_grad = True
-            batch_size = X.shape[0]
-
-            real_label = torch.ones((batch_size, 1), device=self.device)
-            fake_label = torch.zeros((batch_size, 1), device=self.device)
-
-            # Sync and clear gradients
-            # at the end of accumulation or
-            # at the end of an epoch.
-            is_last_batch_to_accumulate = \
-                (batch_idx + 1) % 2 == 0 or self.trainer.is_last_batch
-
-            g_X = self.sample_G(batch_size)
-
-            ##########################
-            # Optimize Discriminator #
-            ##########################
-            with d_opt.toggle_model(sync_grad=is_last_batch_to_accumulate):
-                d_x = self.D(X)
-                errD_real = self.criterion(d_x, real_label)
-
-                d_z = self.D(g_X.detach())
-                errD_fake = self.criterion(d_z, fake_label)
-
-                errD = (errD_real + errD_fake)
-
-                self.manual_backward(errD)
-                if is_last_batch_to_accumulate:
-                    d_opt.step()
-                    d_opt.zero_grad()
-
-            ######################
-            # Optimize Generator #
-            ######################
-            with g_opt.toggle_model(sync_grad=is_last_batch_to_accumulate):
-                d_z = self.D(g_X)
-                errG = self.criterion(d_z, real_label)
-
-                self.manual_backward(errG)
-                if is_last_batch_to_accumulate:
-                    g_opt.step()
-                    g_opt.zero_grad()
-
-            self.log_dict({'g_loss': errG, 'd_loss': errD}, prog_bar=True)
-
------
-
 Use closure for LBFGS-like optimizers
 -------------------------------------
 It is a good practice to provide the optimizer with a closure function that performs a ``forward``, ``zero_grad`` and
diff --git a/docs/source/common/test_set.rst b/docs/source/common/test_set.rst
index 4c9e9a6061977..5703d71d956de 100644
--- a/docs/source/common/test_set.rst
+++ b/docs/source/common/test_set.rst
@@ -80,10 +80,10 @@ is not available at the time your model was declared.
 .. code-block:: python
 
     # setup your data loader
-    test = DataLoader(...)
+    test_dataloader = DataLoader(...)
 
     # test (pass in the loader)
-    trainer.test(test_dataloaders=test)
+    trainer.test(dataloaders=test_dataloader)
 
 You can either pass in a single dataloader or a list of them. This optional named
 parameter can be used in conjunction with any of the above use cases. Additionally,
diff --git a/docs/source/common/trainer.rst b/docs/source/common/trainer.rst
index 86fd218e2d6b8..0983f0acb9eec 100644
--- a/docs/source/common/trainer.rst
+++ b/docs/source/common/trainer.rst
@@ -159,7 +159,7 @@ or after it has already been trained.
 
 .. code-block:: python
 
-    trainer.validate(val_dataloaders=val_dataloaders)
+    trainer.validate(dataloaders=val_dataloaders)
 
 ------------
 
@@ -196,6 +196,8 @@ unique seeds across all dataloader workers and processes for :mod:`torch`, :mod:
 
 -------
 
+.. _trainer_flags:
+
 Trainer flags
 -------------
 
@@ -658,6 +660,8 @@ Writes logs to disk this often.
 See Also:
     - :doc:`logging <../extensions/logging>`
 
+.. _gpus:
+
 gpus
 ^^^^
 
@@ -1155,28 +1159,69 @@ precision
 
 |
 
-Double precision (64), full precision (32) or half precision (16).
-Can all be used on GPU or TPUs. Only double (64) and full precision (32) available on CPU.
+Lightning supports either double precision (64), full precision (32), or half precision (16) training.
 
-If used on TPU will use torch.bfloat16 but tensor printing
-will still show torch.float32.
+Half precision, or mixed precision, is the combined use of 32 and 16 bit floating points to reduce memory footprint during model training. This can result in improved performance, achieving +3X speedups on modern GPUs.
 
 .. testcode::
     :skipif: not _APEX_AVAILABLE and not _NATIVE_AMP_AVAILABLE or not torch.cuda.is_available()
 
     # default used by the Trainer
-    trainer = Trainer(precision=32)
+    trainer = Trainer(precision=32, gpus=1)
 
     # 16-bit precision
     trainer = Trainer(precision=16, gpus=1)
 
     # 64-bit precision
-    trainer = Trainer(precision=64)
+    trainer = Trainer(precision=64, gpus=1)
+
+
+.. note:: When running on TPUs, torch.float16 will be used but tensor printing will still show torch.float32.
+
+.. note:: 16-bit precision is not supported on CPUs.
+
+
+.. admonition::  When using PyTorch 1.6+, Lightning uses the native AMP implementation to support 16-bit precision. 16-bit precision with PyTorch < 1.6 is supported by NVIDIA Apex library.
+   :class: dropdown, warning
+
+    NVIDIA Apex and DDP have instability problems. We recommend upgrading to PyTorch 1.6+ in order to use the native AMP 16-bit precision with multiple GPUs.
+
+    If you are using an earlier version of PyTorch (before 1.6), Lightning uses `Apex <https://github.com/NVIDIA/apex>`_ to support 16-bit training.
+
+    To use Apex 16-bit training:
+
+    1. Install Apex
+
+    .. code-block:: bash
+
+        # ------------------------
+        # OPTIONAL: on your cluster you might need to load CUDA 10 or 9
+        # depending on how you installed PyTorch
+
+        # see available modules
+        module avail
+
+        # load correct CUDA before install
+        module load cuda-10.0
+        # ------------------------
+
+        # make sure you've loaded a GCC version > 4.0 and < 7.0
+        module load gcc-6.1.0
+
+        pip install --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" https://github.com/NVIDIA/apex
+
+    2. Set the `precision` trainer flag to 16. You can customize the `Apex optimization level <https://nvidia.github.io/apex/amp.html#opt-levels>`_ by setting the `amp_level` flag.
+
+    .. testcode::
+        :skipif: not _APEX_AVAILABLE and not _NATIVE_AMP_AVAILABLE or not torch.cuda.is_available()
+
+        # turn on 16-bit
+        trainer = Trainer(amp_backend="apex", amp_level='O2', precision=16)
+
+    If you need to configure the apex init for your particular use case, or want to customize the
+    16-bit training behaviour, override :meth:`pytorch_lightning.core.LightningModule.configure_apex`.
 
-Example::
 
-    # one day
-    trainer = Trainer(precision=8|4|2)
 
 process_position
 ^^^^^^^^^^^^^^^^
@@ -1378,6 +1423,8 @@ track_grad_norm
     # track the 2-norm
     trainer = Trainer(track_grad_norm=2)
 
+.. _tpu_cores:
+
 tpu_cores
 ^^^^^^^^^
 
@@ -1527,6 +1574,24 @@ Can specify as float or int.
     trainer = Trainer(val_check_interval=1000)
 
 
+.. code-block::
+
+    # Here is the computation to estimate the total number of batches seen within an epoch.
+
+    # Find the total number of train batches
+    total_train_batches = total_train_samples // (train_batch_size * world_size)
+
+    # Compute how many times we will call validation during the training loop
+    val_check_batch = max(1, int(total_train_batches * val_check_interval))
+    val_checks_per_epoch = total_train_batches / val_check_batch
+
+    # Find the total number of validation batches
+    total_val_batches = total_val_samples // (val_batch_size * world_size)
+
+    # Total number of batches run
+    total_fit_batches = total_train_batches + total_val_batches
+
+
 weights_save_path
 ^^^^^^^^^^^^^^^^^
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 0d830d75ecf28..111cd6887fbdb 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -23,7 +23,12 @@
 
 PATH_HERE = os.path.abspath(os.path.dirname(__file__))
 PATH_ROOT = os.path.join(PATH_HERE, '..', '..')
+PATH_RAW_NB = os.path.join(PATH_ROOT, '_notebooks')
+PATH_IPYNB = os.path.join(PATH_HERE, 'notebooks')
 sys.path.insert(0, os.path.abspath(PATH_ROOT))
+sys.path.append(os.path.join(PATH_RAW_NB, '.actions'))
+
+from helpers import HelperCLI  # noqa: E401 E402
 
 FOLDER_GENERATED = 'generated'
 SPHINX_MOCK_REQUIREMENTS = int(os.environ.get('SPHINX_MOCK_REQUIREMENTS', True))
@@ -37,21 +42,7 @@
 
 # -- Project documents -------------------------------------------------------
 
-# # export the documentation
-# with open('intro.rst', 'w') as fp:
-#     intro = pytorch_lightning.__doc__.replace(os.linesep + ' ', '')
-#     fp.write(m2r.convert(intro))
-#     # fp.write(pytorch_lightning.__doc__)
-
-# # export the READme
-# with open(os.path.join(PATH_ROOT, 'README.md'), 'r') as fp:
-#     readme = fp.read()
-# # replace all paths to relative
-# for ndir in (os.path.basename(p) for p in glob.glob(os.path.join(PATH_ROOT, '*'))
-#              if os.path.isdir(p)):
-#     readme = readme.replace('](%s/' % ndir, '](%s/%s/' % (PATH_ROOT, ndir))
-# with open('readme.md', 'w') as fp:
-#     fp.write(readme)
+HelperCLI.copy_notebooks(PATH_RAW_NB, PATH_IPYNB)
 
 
 def _transform_changelog(path_in: str, path_out: str) -> None:
@@ -111,10 +102,9 @@ def _transform_changelog(path_in: str, path_out: str) -> None:
     'sphinx.ext.autosummary',
     'sphinx.ext.napoleon',
     'sphinx.ext.imgmath',
-    'recommonmark',
     'sphinx.ext.autosectionlabel',
-    # 'm2r',
-    # 'nbsphinx',  # it seems some sphinx issue
+    'myst_parser',
+    'nbsphinx',
     'sphinx_autodoc_typehints',
     'sphinx_copybutton',
     'sphinx_paramlinks',
@@ -132,12 +122,14 @@ def _transform_changelog(path_in: str, path_out: str) -> None:
 nbsphinx_allow_errors = True
 nbsphinx_requirejs_path = ''
 
+# myst-parser, forcing to parse all html pages with mathjax
+# https://github.com/executablebooks/MyST-Parser/issues/394
+myst_update_mathjax = False
+
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
-# source_suffix = ['.rst', '.md']
-# source_suffix = ['.rst', '.md', '.ipynb']
-source_suffix = {
+source_parsers = {
     '.rst': 'restructuredtext',
     '.txt': 'markdown',
     '.md': 'markdown',
@@ -159,6 +151,8 @@ def _transform_changelog(path_in: str, path_out: str) -> None:
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = [
     f'{FOLDER_GENERATED}/PULL_REQUEST_TEMPLATE.md',
+    'notebooks/course_UvA-DL/*',
+    'notebooks/template*',
 ]
 
 # The name of the Pygments (syntax highlighting) style to use.
diff --git a/docs/source/ecosystem/asr_nlp_tts.rst b/docs/source/ecosystem/asr_nlp_tts.rst
index e1a94eda9e805..6b5840d32c84e 100644
--- a/docs/source/ecosystem/asr_nlp_tts.rst
+++ b/docs/source/ecosystem/asr_nlp_tts.rst
@@ -160,7 +160,7 @@ for the entire speech to text .yaml file.
         max_epochs: 5
         max_steps: null # computed at runtime if not set
         num_nodes: 1
-        distributed_backend: ddp
+        accelerator: ddp
         ...
     # configure the ASR model
     model:
@@ -598,7 +598,7 @@ Specify TTS Model Configurations with YAML File
         gpus: -1 # number of gpus
         max_epochs: 350
         num_nodes: 1
-        distributed_backend: ddp
+        accelerator: ddp
         ...
 
     # configure the TTS model
diff --git a/docs/source/extensions/datamodules.rst b/docs/source/extensions/datamodules.rst
index 27fdf176f5554..b710a43b2c580 100644
--- a/docs/source/extensions/datamodules.rst
+++ b/docs/source/extensions/datamodules.rst
@@ -53,7 +53,7 @@ Datamodules are for you if you ever asked the questions:
 
 What is a DataModule
 --------------------
-A DataModule is simply a collection of a train_dataloader, val_dataloader(s), test_dataloader(s) along with the
+A DataModule is simply a collection of a train_dataloader(s), val_dataloader(s), test_dataloader(s) along with the
 matching transforms and data processing/downloads steps required.
 
 Here's a simple PyTorch example:
diff --git a/docs/source/extensions/logging.rst b/docs/source/extensions/logging.rst
index 107eca2dd9d74..12760f0ee6898 100644
--- a/docs/source/extensions/logging.rst
+++ b/docs/source/extensions/logging.rst
@@ -68,6 +68,10 @@ except functions with `batch_start` in their names.
     def training_step(self, batch, batch_idx):
         self.log('my_metric', x)
 
+    # or a dict
+    def training_step(self, batch, batch_idx):
+        self.log('performance', {'acc': acc, 'recall': recall})
+
 Depending on where log is called from, Lightning auto-determines the correct logging mode for you. \
 But of course you can override the default behavior by manually setting the :func:`~~pytorch_lightning.core.lightning.LightningModule.log` parameters.
 
diff --git a/docs/source/extensions/plugins.rst b/docs/source/extensions/plugins.rst
index 35e563715e037..436d40f660e7a 100644
--- a/docs/source/extensions/plugins.rst
+++ b/docs/source/extensions/plugins.rst
@@ -115,8 +115,6 @@ Training Type Plugins
     DDPSpawnPlugin
     DeepSpeedPlugin
     HorovodPlugin
-    RPCPlugin
-    RPCSequentialPlugin
     SingleTPUPlugin
     TPUSpawnPlugin
 
diff --git a/docs/source/guides/speed.rst b/docs/source/guides/speed.rst
new file mode 100644
index 0000000000000..ece806558c76c
--- /dev/null
+++ b/docs/source/guides/speed.rst
@@ -0,0 +1,482 @@
+.. testsetup:: *
+
+    from pytorch_lightning.trainer.trainer import Trainer
+    from pytorch_lightning.callbacks.early_stopping import EarlyStopping
+    from pytorch_lightning.core.lightning import LightningModule
+
+.. _speed:
+
+#######################
+Speed up model training
+#######################
+
+There are multiple ways you can speed up your model's time to convergence:
+
+* `<GPU/TPU training_>`_
+
+* `<Mixed precision (16-bit) training_>`_
+
+* `<Control Training Epochs_>`_
+
+* `<Control Validation Frequency_>`_
+
+* `<Limit Dataset Size_>`_
+
+* `<Preload Data Into RAM_>`_
+
+* `<Model Toggling_>`_
+
+* `<Set Grads to None_>`_
+
+* `<Things to avoid_>`_
+
+****************
+GPU/TPU training
+****************
+
+**Use when:** Whenever possible!
+
+With Lightning, running on GPUs, TPUs or multiple node is a simple switch of a flag.
+
+GPU training
+============
+
+Lightning supports a variety of plugins to further speed up distributed GPU training. Most notably:
+
+* :class:`~pytorch_lightning.plugins.training_type.DDPPlugin`
+* :class:`~pytorch_lightning.plugins.training_type.DDPShardedPlugin`
+* :class:`~pytorch_lightning.plugins.training_type.DeepSpeedPlugin`
+
+.. code-block:: python
+
+    # run on 1 gpu
+    trainer = Trainer(gpus=1)
+
+    # train on 8 gpus, using DDP plugin
+    trainer = Trainer(gpus=8, accelerator="ddp")
+
+    # train on multiple GPUs across nodes (uses 8 gpus in total)
+    trainer = Trainer(gpus=2, num_nodes=4)
+
+
+GPU Training Speedup Tips
+-------------------------
+
+When training on single or multiple GPU machines, Lightning offers a host of advanced optimizations to improve throughput, memory efficiency, and model scaling.
+Refer to :doc:`Advanced GPU Optimized Training for more details <../advanced/advanced_gpu>`.
+
+Prefer DDP over DP
+^^^^^^^^^^^^^^^^^^
+:class:`~pytorch_lightning.plugins.training_type.DataParallelPlugin` performs three GPU transfers for EVERY batch:
+
+1. Copy model to device.
+2. Copy data to device.
+3. Copy outputs of each device back to master.
+
+Whereas :class:`~pytorch_lightning.plugins.training_type.DDPPlugin` only performs 1 transfer to sync gradients, making DDP MUCH faster than DP.
+
+
+When using DDP set find_unused_parameters=False
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+By default we have set ``find_unused_parameters`` to True for compatibility issues that have arisen in the past (see the `discussion <https://github.com/PyTorchLightning/pytorch-lightning/discussions/6219>`_ for more information).
+This by default comes with a performance hit, and can be disabled in most cases.
+
+.. code-block:: python
+
+    from pytorch_lightning.plugins import DDPPlugin
+
+    trainer = pl.Trainer(
+        gpus=2,
+        plugins=DDPPlugin(find_unused_parameters=False),
+    )
+
+Dataloaders
+^^^^^^^^^^^
+When building your DataLoader set ``num_workers > 0`` and ``pin_memory=True`` (only for GPUs).
+
+.. code-block:: python
+
+    Dataloader(dataset, num_workers=8, pin_memory=True)
+
+num_workers
+"""""""""""
+
+The question of how many workers to specify in ``num_workers`` is tricky. Here's a summary of
+some references, [`1 <https://discuss.pytorch.org/t/guidelines-for-assigning-num-workers-to-dataloader/813>`_], and our suggestions:
+
+1. ``num_workers=0`` means ONLY the main process will load batches (that can be a bottleneck).
+2. ``num_workers=1`` means ONLY one worker (just not the main process) will load data but it will still be slow.
+3. The ``num_workers`` depends on the batch size and your machine.
+4. A general place to start is to set ``num_workers`` equal to the number of CPU cores on that machine. You can get the number of CPU cores in python using `os.cpu_count()`, but note that depending on your batch size, you may overflow RAM memory.
+
+.. warning:: Increasing ``num_workers`` will ALSO increase your CPU memory consumption.
+
+The best thing to do is to increase the ``num_workers`` slowly and stop once you see no more improvement in your training speed.
+
+Spawn
+"""""
+When using ``accelerator=ddp_spawn`` or training on TPUs, the way multiple GPUs/TPU cores are used is by calling ``.spawn()`` under the hood.
+The problem is that PyTorch has issues with ``num_workers > 0`` when using ``.spawn()``. For this reason we recommend you
+use ``accelerator=ddp`` so you can increase the ``num_workers``, however your script has to be callable like so:
+
+.. code-block:: bash
+
+    python my_program.py
+
+
+TPU training
+============
+
+You can set the ``tpu_cores`` trainer flag to 1 or 8 cores.
+
+.. code-block:: python
+
+    # train on 1 TPU core
+    trainer = Trainer(tpu_cores=1)
+
+    # train on 8 TPU cores
+    trainer = Trainer(tpu_cores=8)
+
+To train on more than 8 cores (ie: a POD),
+submit this script using the xla_dist script.
+
+Example::
+
+    python -m torch_xla.distributed.xla_dist
+    --tpu=$TPU_POD_NAME
+    --conda-env=torch-xla-nightly
+    --env=XLA_USE_BF16=1
+    -- python your_trainer_file.py
+
+
+Read more in our :ref:`accelerators` and :ref:`plugins` guides.
+
+
+-----------
+
+.. _amp:
+
+*********************************
+Mixed precision (16-bit) training
+*********************************
+
+**Use when:**
+
+* You want to optimize for memory usage on a GPU.
+* You have a GPU that supports 16 bit precision (NVIDIA pascal architecture or newer).
+* Your optimization algorithm (training_step) is numerically stable.
+* You want to be the cool person in the lab :p
+
+.. raw:: html
+
+    <video width="50%" max-width="400px" controls
+    poster="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_docs/trainer_flags/yt_thumbs/thumb_precision.png"
+    src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_docs/yt/Trainer+flags+9+-+precision_1.mp4"></video>
+
+|
+
+
+Mixed precision combines the use of both 32 and 16 bit floating points to reduce memory footprint during model training, resulting in improved performance, achieving +3X speedups on modern GPUs.
+
+Lightning offers mixed precision or 16-bit training for GPUs and TPUs.
+
+
+.. testcode::
+    :skipif: not _APEX_AVAILABLE and not _NATIVE_AMP_AVAILABLE or not torch.cuda.is_available()
+
+    # 16-bit precision
+    trainer = Trainer(precision=16, gpus=4)
+
+
+----------------
+
+
+***********************
+Control Training Epochs
+***********************
+
+**Use when:** You run a hyperparameter search to find good initial parameters and want to save time, cost (money), or power (environment).
+It can allow you to be more cost efficient and also run more experiments at the same time.
+
+You can use Trainer flags to force training for a minimum number of epochs or limit to a max number of epochs. Use the `min_epochs` and `max_epochs` Trainer flags to set the number of epochs to run.
+
+.. testcode::
+
+    # DEFAULT
+    trainer = Trainer(min_epochs=1, max_epochs=1000)
+
+
+If running iteration based training, i.e. infinite / iterable dataloader, you can also control the number of steps with the `min_steps` and  `max_steps` flags:
+
+.. testcode::
+
+    trainer = Trainer(max_steps=1000)
+
+    trainer = Trainer(min_steps=100)
+
+You can also interupt training based on training time:
+
+.. testcode::
+
+    # Stop after 12 hours of training or when reaching 10 epochs (string)
+    trainer = Trainer(max_time="00:12:00:00", max_epochs=10)
+
+    # Stop after 1 day and 5 hours (dict)
+    trainer = Trainer(max_time={"days": 1, "hours": 5})
+
+Learn more in our :ref:`trainer_flags` guide.
+
+
+----------------
+
+****************************
+Control Validation Frequency
+****************************
+
+Check validation every n epochs
+===============================
+
+**Use when:** You have a small dataset, and want to run less validation checks.
+
+You can limit validation check to only run every n epochs using the `check_val_every_n_epoch` Trainer flag.
+
+.. testcode::
+
+    # DEFAULT
+    trainer = Trainer(check_val_every_n_epoch=1)
+
+
+Set validation check frequency within 1 training epoch
+======================================================
+
+**Use when:** You have a large training dataset, and want to run mid-epoch validation checks.
+
+For large datasets, it's often desirable to check validation multiple times within a training loop.
+Pass in a float to check that often within 1 training epoch. Pass in an int `k` to check every `k` training batches.
+Must use an `int` if using an `IterableDataset`.
+
+.. testcode::
+
+    # DEFAULT
+    trainer = Trainer(val_check_interval=0.95)
+
+    # check every .25 of an epoch
+    trainer = Trainer(val_check_interval=0.25)
+
+    # check every 100 train batches (ie: for `IterableDatasets` or fixed frequency)
+    trainer = Trainer(val_check_interval=100)
+
+Learn more in our :ref:`trainer_flags` guide.
+
+----------------
+
+******************
+Limit Dataset Size
+******************
+
+Use data subset for training, validation, and test
+==================================================
+
+**Use when:** Debugging or running huge datasets.
+
+If you don't want to check 100% of the training/validation/test set set these flags:
+
+.. testcode::
+
+    # DEFAULT
+    trainer = Trainer(
+        limit_train_batches=1.0,
+        limit_val_batches=1.0,
+        limit_test_batches=1.0
+    )
+
+    # check 10%, 20%, 30% only, respectively for training, validation and test set
+    trainer = Trainer(
+        limit_train_batches=0.1,
+        limit_val_batches=0.2,
+        limit_test_batches=0.3
+    )
+
+If you also pass ``shuffle=True`` to the dataloader, a different random subset of your dataset will be used for each epoch; otherwise the same subset will be used for all epochs.
+
+.. note:: ``limit_train_batches``, ``limit_val_batches`` and ``limit_test_batches`` will be overwritten by ``overfit_batches`` if ``overfit_batches`` > 0. ``limit_val_batches`` will be ignored if ``fast_dev_run=True``.
+
+.. note:: If you set ``limit_val_batches=0``, validation will be disabled.
+
+Learn more in our :ref:`trainer_flags` guide.
+
+-----
+
+*********************
+Preload Data Into RAM
+*********************
+
+**Use when:** You need access to all samples in a dataset at once.
+
+When your training or preprocessing requires many operations to be performed on entire dataset(s), it can
+sometimes be beneficial to store all data in RAM given there is enough space.
+However, loading all data at the beginning of the training script has the disadvantage that it can take a long
+time and hence it slows down the development process. Another downside is that in multiprocessing (e.g. DDP)
+the data would get copied in each process.
+One can overcome these problems by copying the data into RAM in advance.
+Most UNIX-based operating systems provide direct access to tmpfs through a mount point typically named ``/dev/shm``.
+
+0.  Increase shared memory if necessary. Refer to the documentation of your OS how to do this.
+
+1.  Copy training data to shared memory:
+
+    .. code-block:: bash
+
+        cp -r /path/to/data/on/disk /dev/shm/
+
+2.  Refer to the new data root in your script or command line arguments:
+
+    .. code-block:: python
+
+        datamodule = MyDataModule(data_root="/dev/shm/my_data")
+
+---------
+
+**************
+Model Toggling
+**************
+
+**Use when:** Performing gradient accumulation with multiple optimizers in a
+distributed setting.
+
+Here is an explanation of what it does:
+
+* Considering the current optimizer as A and all other optimizers as B.
+* Toggling means that all parameters from B exclusive to A will have their ``requires_grad`` attribute set to ``False``.
+* Their original state will be restored when exiting the context manager.
+
+When performing gradient accumulation, there is no need to perform grad synchronization during the accumulation phase.
+Setting ``sync_grad`` to ``False`` will block this synchronization and improve your training speed.
+
+:class:`~pytorch_lightning.core.optimizer.LightningOptimizer` provides a
+:meth:`~pytorch_lightning.core.optimizer.LightningOptimizer.toggle_model` function as a
+:func:`contextlib.contextmanager` for advanced users.
+
+Here is an example for advanced use-case:
+
+.. testcode::
+
+    # Scenario for a GAN with gradient accumulation every 2 batches and optimized for multiple gpus.
+    class SimpleGAN(LightningModule):
+
+        def __init__(self):
+            super().__init__()
+            self.automatic_optimization = False
+
+        def training_step(self, batch, batch_idx):
+            # Implementation follows the PyTorch tutorial:
+            # https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html
+            g_opt, d_opt = self.optimizers()
+
+            X, _ = batch
+            X.requires_grad = True
+            batch_size = X.shape[0]
+
+            real_label = torch.ones((batch_size, 1), device=self.device)
+            fake_label = torch.zeros((batch_size, 1), device=self.device)
+
+            # Sync and clear gradients
+            # at the end of accumulation or
+            # at the end of an epoch.
+            is_last_batch_to_accumulate = \
+                (batch_idx + 1) % 2 == 0 or self.trainer.is_last_batch
+
+            g_X = self.sample_G(batch_size)
+
+            ##########################
+            # Optimize Discriminator #
+            ##########################
+            with d_opt.toggle_model(sync_grad=is_last_batch_to_accumulate):
+                d_x = self.D(X)
+                errD_real = self.criterion(d_x, real_label)
+
+                d_z = self.D(g_X.detach())
+                errD_fake = self.criterion(d_z, fake_label)
+
+                errD = (errD_real + errD_fake)
+
+                self.manual_backward(errD)
+                if is_last_batch_to_accumulate:
+                    d_opt.step()
+                    d_opt.zero_grad()
+
+            ######################
+            # Optimize Generator #
+            ######################
+            with g_opt.toggle_model(sync_grad=is_last_batch_to_accumulate):
+                d_z = self.D(g_X)
+                errG = self.criterion(d_z, real_label)
+
+                self.manual_backward(errG)
+                if is_last_batch_to_accumulate:
+                    g_opt.step()
+                    g_opt.zero_grad()
+
+            self.log_dict({'g_loss': errG, 'd_loss': errD}, prog_bar=True)
+
+-----
+
+*****************
+Set Grads to None
+*****************
+
+In order to modestly improve performance, you can override :meth:`~pytorch_lightning.core.lightning.LightningModule.optimizer_zero_grad`.
+
+For a more detailed explanation of pros / cons of this technique,
+read `this <https://pytorch.org/docs/master/optim.html#torch.optim.Optimizer.zero_grad>`_ documentation by the PyTorch team.
+
+.. testcode::
+
+    class Model(LightningModule):
+
+        def optimizer_zero_grad(self, epoch, batch_idx, optimizer, optimizer_idx):
+            optimizer.zero_grad(set_to_none=True)
+
+
+-----
+
+***************
+Things to avoid
+***************
+
+.item(), .numpy(), .cpu()
+=========================
+Don't call ``.item()`` anywhere in your code. Use ``.detach()`` instead to remove the connected graph calls. Lightning
+takes a great deal of care to be optimized for this.
+
+----------
+
+empty_cache()
+=============
+Don't call this unnecessarily! Every time you call this ALL your GPUs have to wait to sync.
+
+----------
+
+Tranfering tensors to device
+============================
+LightningModules know what device they are on! Construct tensors on the device directly to avoid CPU->Device transfer.
+
+.. code-block:: python
+
+    # bad
+    t = torch.rand(2, 2).cuda()
+
+    # good (self is LightningModule)
+    t = torch.rand(2, 2, device=self.device)
+
+
+For tensors that need to be model attributes, it is best practice to register them as buffers in the modules's
+``__init__`` method:
+
+.. code-block:: python
+
+    # bad
+    self.t = torch.rand(2, 2, device=self.device)
+
+    # good
+    self.register_buffer("t", torch.rand(2, 2))
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 71ad835e02d31..c2c02b19634d6 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -21,8 +21,8 @@ PyTorch Lightning Documentation
    :name: guides
    :caption: Best practices
 
+   guides/speed
    starter/style_guide
-   benchmarking/performance
    Lightning project template<https://github.com/PyTorchLightning/pytorch-lightning-conference-seed>
    benchmarking/benchmarks
 
@@ -57,6 +57,13 @@ PyTorch Lightning Documentation
    PyTorch Lightning 101 class <https://www.youtube.com/playlist?list=PLaMu-SDt_RB5NUm67hU2pdE75j6KaIOv2>
    From PyTorch to PyTorch Lightning [Blog] <https://towardsdatascience.com/from-pytorch-to-pytorch-lightning-a-gentle-introduction-b371b7caaf09>
    From PyTorch to PyTorch Lightning [Video] <https://www.youtube.com/watch?v=QHww1JH7IDU>
+   notebooks/lightning_examples/mnist-hello-world.ipynb
+   notebooks/lightning_examples/datamodules.ipynb
+   notebooks/lightning_examples/cifar10-baseline.ipynb
+   notebooks/lightning_examples/basic-gan.ipynb
+   notebooks/lightning_examples/text-transformers.ipynb
+   notebooks/lightning_examples/reinforce-learning-DQN.ipynb
+   notebooks/lightning_examples/augmentation_kornia.ipynb
 
 .. toctree::
    :maxdepth: 2
@@ -94,12 +101,10 @@ PyTorch Lightning Documentation
 
    clouds/cloud_training
    clouds/cluster
-   advanced/amp
    common/child_modules
    common/debugging
    common/loggers
    common/early_stopping
-   common/fast_training
    common/hyperparameters
    common/lightning_cli
    advanced/lr_finder
@@ -114,6 +119,7 @@ PyTorch Lightning Documentation
    advanced/training_tricks
    advanced/pruning_quantization
    advanced/transfer_learning
+   advanced/ipu
    advanced/tpu
    common/test_set
    common/production_inference
@@ -127,7 +133,7 @@ PyTorch Lightning Documentation
 
 .. toctree::
    :maxdepth: 1
-   :name: community
+   :name: Community
    :caption: Community
 
 
diff --git a/docs/source/starter/new-project.rst b/docs/source/starter/new-project.rst
index 74ad30102b4f8..07bf3624560a0 100644
--- a/docs/source/starter/new-project.rst
+++ b/docs/source/starter/new-project.rst
@@ -219,7 +219,7 @@ The :class:`~pytorch_lightning.trainer.Trainer` automates:
 * Tensorboard (see :doc:`loggers <../common/loggers>` options)
 * :doc:`Multi-GPU <../advanced/multi_gpu>` support
 * :doc:`TPU <../advanced/tpu>`
-* :doc:`AMP <../advanced/amp>` support
+* :ref:`16-bit precision AMP <amp>` support
 
 .. tip:: If you prefer to manually manage optimizers you can use the :ref:`manual_opt` mode  (ie: RL, GANs, etc...).
 
diff --git a/notebooks/01-mnist-hello-world.ipynb b/notebooks/01-mnist-hello-world.ipynb
deleted file mode 100644
index 88557fa8a80aa..0000000000000
--- a/notebooks/01-mnist-hello-world.ipynb
+++ /dev/null
@@ -1,448 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "view-in-github"
-   },
-   "source": [
-    "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/01-mnist-hello-world.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "i7XbLCXGkll9"
-   },
-   "source": [
-    "# Introduction to Pytorch Lightning ⚡\n",
-    "\n",
-    "In this notebook, we'll go over the basics of lightning by preparing models to train on the [MNIST Handwritten Digits dataset](https://en.wikipedia.org/wiki/MNIST_database).\n",
-    "\n",
-    "---\n",
-    "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
-    "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
-    "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "2LODD6w9ixlT"
-   },
-   "source": [
-    "### Setup  \n",
-    "Lightning is easy to install. Simply ```pip install pytorch-lightning```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "zK7-Gg69kMnG"
-   },
-   "outputs": [],
-   "source": [
-    "! pip install pytorch-lightning --quiet"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "w4_TYnt_keJi"
-   },
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "import torch\n",
-    "from torch import nn\n",
-    "from torch.nn import functional as F\n",
-    "from torch.utils.data import DataLoader, random_split\n",
-    "from torchvision.datasets import MNIST\n",
-    "from torchvision import transforms\n",
-    "import pytorch_lightning as pl\n",
-    "from pytorch_lightning.metrics.functional import accuracy"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "EHpyMPKFkVbZ"
-   },
-   "source": [
-    "## Simplest example\n",
-    "\n",
-    "Here's the simplest most minimal example with just a training loop (no validation, no testing).\n",
-    "\n",
-    "**Keep in Mind** - A `LightningModule` *is* a PyTorch `nn.Module` - it just has a few more helpful features."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "V7ELesz1kVQo"
-   },
-   "outputs": [],
-   "source": [
-    "class MNISTModel(pl.LightningModule):\n",
-    "\n",
-    "    def __init__(self):\n",
-    "        super(MNISTModel, self).__init__()\n",
-    "        self.l1 = torch.nn.Linear(28 * 28, 10)\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return torch.relu(self.l1(x.view(x.size(0), -1)))\n",
-    "\n",
-    "    def training_step(self, batch, batch_nb):\n",
-    "        x, y = batch\n",
-    "        loss = F.cross_entropy(self(x), y)\n",
-    "        return loss\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        return torch.optim.Adam(self.parameters(), lr=0.02)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "hIrtHg-Dv8TJ"
-   },
-   "source": [
-    "By using the `Trainer` you automatically get:\n",
-    "1. Tensorboard logging\n",
-    "2. Model checkpointing\n",
-    "3. Training and validation loop\n",
-    "4. early-stopping"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "4Dk6Ykv8lI7X"
-   },
-   "outputs": [],
-   "source": [
-    "# Init our model\n",
-    "mnist_model = MNISTModel()\n",
-    "\n",
-    "# Init DataLoader from MNIST Dataset\n",
-    "train_ds = MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor())\n",
-    "train_loader = DataLoader(train_ds, batch_size=32)\n",
-    "\n",
-    "# Initialize a trainer\n",
-    "trainer = pl.Trainer(gpus=1, max_epochs=3, progress_bar_refresh_rate=20)\n",
-    "\n",
-    "# Train the model ⚡\n",
-    "trainer.fit(mnist_model, train_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "KNpOoBeIjscS"
-   },
-   "source": [
-    "## A more complete MNIST Lightning Module Example\n",
-    "\n",
-    "That wasn't so hard was it?\n",
-    "\n",
-    "Now that we've got our feet wet, let's dive in a bit deeper and write a more complete `LightningModule` for MNIST...\n",
-    "\n",
-    "This time, we'll bake in all the dataset specific pieces directly in the `LightningModule`. This way, we can avoid writing extra code at the beginning of our script every time we want to run it.\n",
-    "\n",
-    "---\n",
-    "\n",
-    "### Note what the following built-in functions are doing:\n",
-    "\n",
-    "1. [prepare_data()](https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.core.lightning.html#pytorch_lightning.core.lightning.LightningModule.prepare_data) 💾\n",
-    "    - This is where we can download the dataset. We point to our desired dataset and ask torchvision's `MNIST` dataset class to download if the dataset isn't found there.\n",
-    "    - **Note we do not make any state assignments in this function** (i.e. `self.something = ...`)\n",
-    "\n",
-    "2. [setup(stage)](https://pytorch-lightning.readthedocs.io/en/latest/common/lightning-module.html#setup) ⚙️\n",
-    "    - Loads in data from file and prepares PyTorch tensor datasets for each split (train, val, test). \n",
-    "    - Setup expects a 'stage' arg which is used to separate logic for 'fit' and 'test'.\n",
-    "    - If you don't mind loading all your datasets at once, you can set up a condition to allow for both 'fit' related setup and 'test' related setup to run whenever `None` is passed to `stage` (or ignore it altogether and exclude any conditionals).\n",
-    "    - **Note this runs across all GPUs and it *is* safe to make state assignments here**\n",
-    "\n",
-    "3. [x_dataloader()](https://pytorch-lightning.readthedocs.io/en/latest/common/lightning-module.html#data-hooks) ♻️\n",
-    "    - `train_dataloader()`, `val_dataloader()`, and `test_dataloader()` all return PyTorch `DataLoader` instances that are created by wrapping their respective datasets that we prepared in `setup()`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "4DNItffri95Q"
-   },
-   "outputs": [],
-   "source": [
-    "class LitMNIST(pl.LightningModule):\n",
-    "    \n",
-    "    def __init__(self, data_dir='./', hidden_size=64, learning_rate=2e-4):\n",
-    "\n",
-    "        super().__init__()\n",
-    "\n",
-    "        # Set our init args as class attributes\n",
-    "        self.data_dir = data_dir\n",
-    "        self.hidden_size = hidden_size\n",
-    "        self.learning_rate = learning_rate\n",
-    "\n",
-    "        # Hardcode some dataset specific attributes\n",
-    "        self.num_classes = 10\n",
-    "        self.dims = (1, 28, 28)\n",
-    "        channels, width, height = self.dims\n",
-    "        self.transform = transforms.Compose([\n",
-    "            transforms.ToTensor(),\n",
-    "            transforms.Normalize((0.1307,), (0.3081,))\n",
-    "        ])\n",
-    "\n",
-    "        # Define PyTorch model\n",
-    "        self.model = nn.Sequential(\n",
-    "            nn.Flatten(),\n",
-    "            nn.Linear(channels * width * height, hidden_size),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Dropout(0.1),\n",
-    "            nn.Linear(hidden_size, hidden_size),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Dropout(0.1),\n",
-    "            nn.Linear(hidden_size, self.num_classes)\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        x = self.model(x)\n",
-    "        return F.log_softmax(x, dim=1)\n",
-    "\n",
-    "    def training_step(self, batch, batch_idx):\n",
-    "        x, y = batch\n",
-    "        logits = self(x)\n",
-    "        loss = F.nll_loss(logits, y)\n",
-    "        return loss\n",
-    "\n",
-    "    def validation_step(self, batch, batch_idx):\n",
-    "        x, y = batch\n",
-    "        logits = self(x)\n",
-    "        loss = F.nll_loss(logits, y)\n",
-    "        preds = torch.argmax(logits, dim=1)\n",
-    "        acc = accuracy(preds, y)\n",
-    "\n",
-    "        # Calling self.log will surface up scalars for you in TensorBoard\n",
-    "        self.log('val_loss', loss, prog_bar=True)\n",
-    "        self.log('val_acc', acc, prog_bar=True)\n",
-    "        return loss\n",
-    "\n",
-    "    def test_step(self, batch, batch_idx):\n",
-    "        # Here we just reuse the validation_step for testing\n",
-    "        return self.validation_step(batch, batch_idx)\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)\n",
-    "        return optimizer\n",
-    "\n",
-    "    ####################\n",
-    "    # DATA RELATED HOOKS\n",
-    "    ####################\n",
-    "\n",
-    "    def prepare_data(self):\n",
-    "        # download\n",
-    "        MNIST(self.data_dir, train=True, download=True)\n",
-    "        MNIST(self.data_dir, train=False, download=True)\n",
-    "\n",
-    "    def setup(self, stage=None):\n",
-    "\n",
-    "        # Assign train/val datasets for use in dataloaders\n",
-    "        if stage == 'fit' or stage is None:\n",
-    "            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)\n",
-    "            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])\n",
-    "\n",
-    "        # Assign test dataset for use in dataloader(s)\n",
-    "        if stage == 'test' or stage is None:\n",
-    "            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)\n",
-    "\n",
-    "    def train_dataloader(self):\n",
-    "        return DataLoader(self.mnist_train, batch_size=32)\n",
-    "\n",
-    "    def val_dataloader(self):\n",
-    "        return DataLoader(self.mnist_val, batch_size=32)\n",
-    "\n",
-    "    def test_dataloader(self):\n",
-    "        return DataLoader(self.mnist_test, batch_size=32)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Mb0U5Rk2kLBy"
-   },
-   "outputs": [],
-   "source": [
-    "model = LitMNIST()\n",
-    "trainer = pl.Trainer(gpus=1, max_epochs=3, progress_bar_refresh_rate=20)\n",
-    "trainer.fit(model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "nht8AvMptY6I"
-   },
-   "source": [
-    "### Testing\n",
-    "\n",
-    "To test a model, call `trainer.test(model)`.\n",
-    "\n",
-    "Or, if you've just trained a model, you can just call `trainer.test()` and Lightning will automatically test using the best saved checkpoint (conditioned on val_loss)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "PA151FkLtprO"
-   },
-   "outputs": [],
-   "source": [
-    "trainer.test()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "T3-3lbbNtr5T"
-   },
-   "source": [
-    "### Bonus Tip\n",
-    "\n",
-    "You can keep calling `trainer.fit(model)` as many times as you'd like to continue training"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "IFBwCbLet2r6"
-   },
-   "outputs": [],
-   "source": [
-    "trainer.fit(model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "8TRyS5CCt3n9"
-   },
-   "source": [
-    "In Colab, you can use the TensorBoard magic function to view the logs that Lightning has created for you!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "wizS-QiLuAYo"
-   },
-   "outputs": [],
-   "source": [
-    "# Start tensorboard.\n",
-    "%load_ext tensorboard\n",
-    "%tensorboard --logdir lightning_logs/"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<code style=\"color:#792ee5;\">\n",
-    "    <h1> <strong> Congratulations - Time to Join the Community! </strong>  </h1>\n",
-    "</code>\n",
-    "\n",
-    "Congratulations on completing this notebook tutorial! If you enjoyed this and would like to join the Lightning movement, you can do so in the following ways!\n",
-    "\n",
-    "### Star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) on GitHub\n",
-    "The easiest way to help our community is just by starring the GitHub repos! This helps raise awareness of the cool tools we're building.\n",
-    "\n",
-    "* Please, star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning)\n",
-    "\n",
-    "### Join our [Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)!\n",
-    "The best way to keep up to date on the latest advancements is to join our community! Make sure to introduce yourself and share your interests in `#general` channel\n",
-    "\n",
-    "### Interested by SOTA AI models ! Check out [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-    "Bolts has a collection of state-of-the-art models, all implemented in [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) and can be easily integrated within your own projects.\n",
-    "\n",
-    "* Please, star [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-    "\n",
-    "### Contributions !\n",
-    "The best way to contribute to our community is to become a code contributor! At any time you can go to [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) or [Bolt](https://github.com/PyTorchLightning/lightning-bolts) GitHub Issues page and filter for \"good first issue\". \n",
-    "\n",
-    "* [Lightning good first issue](https://github.com/PyTorchLightning/pytorch-lightning/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-    "* [Bolt good first issue](https://github.com/PyTorchLightning/lightning-bolts/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-    "* You can also contribute your own notebooks with useful examples !\n",
-    "\n",
-    "### Great thanks from the entire Pytorch Lightning Team for your interest !\n",
-    "\n",
-    "<img src=\"https://github.com/PyTorchLightning/pytorch-lightning/blob/master/docs/source/_static/images/logo.png?raw=true\" width=\"800\" height=\"200\" />"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "authorship_tag": "ABX9TyOtAKVa5POQ6Xg3UcTQqXDJ",
-   "collapsed_sections": [],
-   "include_colab_link": true,
-   "name": "01-mnist-hello-world.ipynb",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/02-datamodules.ipynb b/notebooks/02-datamodules.ipynb
deleted file mode 100644
index f2bb7992ffabb..0000000000000
--- a/notebooks/02-datamodules.ipynb
+++ /dev/null
@@ -1,588 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "view-in-github"
-   },
-   "source": [
-    "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/02-datamodules.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "2O5r7QvP8-rt"
-   },
-   "source": [
-    "# PyTorch Lightning DataModules ⚡\n",
-    "\n",
-    "With the release of `pytorch-lightning` version 0.9.0, we have included a new class called `LightningDataModule` to help you decouple data related hooks from your `LightningModule`.\n",
-    "\n",
-    "This notebook will walk you through how to start using Datamodules.\n",
-    "\n",
-    "The most up to date documentation on datamodules can be found [here](https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html).\n",
-    "\n",
-    "---\n",
-    "\n",
-    "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
-    "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
-    "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "6RYMhmfA9ATN"
-   },
-   "source": [
-    "### Setup\n",
-    "Lightning is easy to install. Simply ```pip install pytorch-lightning```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "lj2zD-wsbvGr"
-   },
-   "outputs": [],
-   "source": [
-    "! pip install pytorch-lightning --quiet"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "8g2mbvy-9xDI"
-   },
-   "source": [
-    "# Introduction\n",
-    "\n",
-    "First, we'll go over a regular `LightningModule` implementation without the use of a `LightningDataModule`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "eg-xDlmDdAwy"
-   },
-   "outputs": [],
-   "source": [
-    "import pytorch_lightning as pl\n",
-    "from pytorch_lightning.metrics.functional import accuracy\n",
-    "import torch\n",
-    "from torch import nn\n",
-    "import torch.nn.functional as F\n",
-    "from torch.utils.data import random_split, DataLoader\n",
-    "\n",
-    "# Note - you must have torchvision installed for this example\n",
-    "from torchvision.datasets import MNIST, CIFAR10\n",
-    "from torchvision import transforms"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "DzgY7wi88UuG"
-   },
-   "source": [
-    "## Defining the LitMNISTModel\n",
-    "\n",
-    "Below, we reuse a `LightningModule` from our hello world tutorial that classifies MNIST Handwritten Digits.\n",
-    "\n",
-    "Unfortunately, we have hardcoded dataset-specific items within the model, forever limiting it to working with MNIST Data. 😢\n",
-    "\n",
-    "This is fine if you don't plan on training/evaluating your model on different datasets. However, in many cases, this can become bothersome when you want to try out your architecture with different datasets."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "IQkW8_FF5nU2"
-   },
-   "outputs": [],
-   "source": [
-    "class LitMNIST(pl.LightningModule):\n",
-    "    \n",
-    "    def __init__(self, data_dir='./', hidden_size=64, learning_rate=2e-4):\n",
-    "\n",
-    "        super().__init__()\n",
-    "\n",
-    "        # We hardcode dataset specific stuff here.\n",
-    "        self.data_dir = data_dir\n",
-    "        self.num_classes = 10\n",
-    "        self.dims = (1, 28, 28)\n",
-    "        channels, width, height = self.dims\n",
-    "        self.transform = transforms.Compose([\n",
-    "            transforms.ToTensor(),\n",
-    "            transforms.Normalize((0.1307,), (0.3081,))\n",
-    "        ])\n",
-    "\n",
-    "        self.hidden_size = hidden_size\n",
-    "        self.learning_rate = learning_rate\n",
-    "\n",
-    "        # Build model\n",
-    "        self.model = nn.Sequential(\n",
-    "            nn.Flatten(),\n",
-    "            nn.Linear(channels * width * height, hidden_size),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Dropout(0.1),\n",
-    "            nn.Linear(hidden_size, hidden_size),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Dropout(0.1),\n",
-    "            nn.Linear(hidden_size, self.num_classes)\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        x = self.model(x)\n",
-    "        return F.log_softmax(x, dim=1)\n",
-    "\n",
-    "    def training_step(self, batch, batch_idx):\n",
-    "        x, y = batch\n",
-    "        logits = self(x)\n",
-    "        loss = F.nll_loss(logits, y)\n",
-    "        return loss\n",
-    "\n",
-    "    def validation_step(self, batch, batch_idx):\n",
-    "        x, y = batch\n",
-    "        logits = self(x)\n",
-    "        loss = F.nll_loss(logits, y)\n",
-    "        preds = torch.argmax(logits, dim=1)\n",
-    "        acc = accuracy(preds, y)\n",
-    "        self.log('val_loss', loss, prog_bar=True)\n",
-    "        self.log('val_acc', acc, prog_bar=True)\n",
-    "        return loss\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)\n",
-    "        return optimizer\n",
-    "\n",
-    "    ####################\n",
-    "    # DATA RELATED HOOKS\n",
-    "    ####################\n",
-    "\n",
-    "    def prepare_data(self):\n",
-    "        # download\n",
-    "        MNIST(self.data_dir, train=True, download=True)\n",
-    "        MNIST(self.data_dir, train=False, download=True)\n",
-    "\n",
-    "    def setup(self, stage=None):\n",
-    "\n",
-    "        # Assign train/val datasets for use in dataloaders\n",
-    "        if stage == 'fit' or stage is None:\n",
-    "            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)\n",
-    "            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])\n",
-    "\n",
-    "        # Assign test dataset for use in dataloader(s)\n",
-    "        if stage == 'test' or stage is None:\n",
-    "            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)\n",
-    "\n",
-    "    def train_dataloader(self):\n",
-    "        return DataLoader(self.mnist_train, batch_size=32)\n",
-    "\n",
-    "    def val_dataloader(self):\n",
-    "        return DataLoader(self.mnist_val, batch_size=32)\n",
-    "\n",
-    "    def test_dataloader(self):\n",
-    "        return DataLoader(self.mnist_test, batch_size=32)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "K7sg9KQd-QIO"
-   },
-   "source": [
-    "## Training the ListMNIST Model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "QxDNDaus6byD"
-   },
-   "outputs": [],
-   "source": [
-    "model = LitMNIST()\n",
-    "trainer = pl.Trainer(max_epochs=2, gpus=1, progress_bar_refresh_rate=20)\n",
-    "trainer.fit(model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "dY8d6GxmB0YU"
-   },
-   "source": [
-    "# Using DataModules\n",
-    "\n",
-    "DataModules are a way of decoupling data-related hooks from the `LightningModule` so you can develop dataset agnostic models."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "eJeT5bW081wn"
-   },
-   "source": [
-    "## Defining The MNISTDataModule\n",
-    "\n",
-    "Let's go over each function in the class below and talk about what they're doing:\n",
-    "\n",
-    "1. ```__init__```\n",
-    "    - Takes in a `data_dir` arg that points to where you have downloaded/wish to download the MNIST dataset.\n",
-    "    - Defines a transform that will be applied across train, val, and test dataset splits.\n",
-    "    - Defines default `self.dims`, which is a tuple returned from `datamodule.size()` that can help you initialize models.\n",
-    "\n",
-    "\n",
-    "2. ```prepare_data```\n",
-    "    - This is where we can download the dataset. We point to our desired dataset and ask torchvision's `MNIST` dataset class to download if the dataset isn't found there.\n",
-    "    - **Note we do not make any state assignments in this function** (i.e. `self.something = ...`)\n",
-    "\n",
-    "3. ```setup```\n",
-    "    - Loads in data from file and prepares PyTorch tensor datasets for each split (train, val, test). \n",
-    "    - Setup expects a 'stage' arg which is used to separate logic for 'fit' and 'test'.\n",
-    "    - If you don't mind loading all your datasets at once, you can set up a condition to allow for both 'fit' related setup and 'test' related setup to run whenever `None` is passed to `stage`.\n",
-    "    - **Note this runs across all GPUs and it *is* safe to make state assignments here**\n",
-    "\n",
-    "\n",
-    "4. ```x_dataloader```\n",
-    "    - `train_dataloader()`, `val_dataloader()`, and `test_dataloader()` all return PyTorch `DataLoader` instances that are created by wrapping their respective datasets that we prepared in `setup()`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "DfGKyGwG_X9v"
-   },
-   "outputs": [],
-   "source": [
-    "class MNISTDataModule(pl.LightningDataModule):\n",
-    "\n",
-    "    def __init__(self, data_dir: str = './'):\n",
-    "        super().__init__()\n",
-    "        self.data_dir = data_dir\n",
-    "        self.transform = transforms.Compose([\n",
-    "            transforms.ToTensor(),\n",
-    "            transforms.Normalize((0.1307,), (0.3081,))\n",
-    "        ])\n",
-    "\n",
-    "        # self.dims is returned when you call dm.size()\n",
-    "        # Setting default dims here because we know them.\n",
-    "        # Could optionally be assigned dynamically in dm.setup()\n",
-    "        self.dims = (1, 28, 28)\n",
-    "        self.num_classes = 10\n",
-    "\n",
-    "    def prepare_data(self):\n",
-    "        # download\n",
-    "        MNIST(self.data_dir, train=True, download=True)\n",
-    "        MNIST(self.data_dir, train=False, download=True)\n",
-    "\n",
-    "    def setup(self, stage=None):\n",
-    "\n",
-    "        # Assign train/val datasets for use in dataloaders\n",
-    "        if stage == 'fit' or stage is None:\n",
-    "            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)\n",
-    "            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])\n",
-    "\n",
-    "        # Assign test dataset for use in dataloader(s)\n",
-    "        if stage == 'test' or stage is None:\n",
-    "            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)\n",
-    "\n",
-    "    def train_dataloader(self):\n",
-    "        return DataLoader(self.mnist_train, batch_size=32)\n",
-    "\n",
-    "    def val_dataloader(self):\n",
-    "        return DataLoader(self.mnist_val, batch_size=32)\n",
-    "\n",
-    "    def test_dataloader(self):\n",
-    "        return DataLoader(self.mnist_test, batch_size=32)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "H2Yoj-9M9dS7"
-   },
-   "source": [
-    "## Defining the dataset agnostic `LitModel`\n",
-    "\n",
-    "Below, we define the same model as the `LitMNIST` model we made earlier. \n",
-    "\n",
-    "However, this time our model has the freedom to use any input data that we'd like 🔥."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "PM2IISuOBDIu"
-   },
-   "outputs": [],
-   "source": [
-    "class LitModel(pl.LightningModule):\n",
-    "    \n",
-    "    def __init__(self, channels, width, height, num_classes, hidden_size=64, learning_rate=2e-4):\n",
-    "\n",
-    "        super().__init__()\n",
-    "\n",
-    "        # We take in input dimensions as parameters and use those to dynamically build model.\n",
-    "        self.channels = channels\n",
-    "        self.width = width\n",
-    "        self.height = height\n",
-    "        self.num_classes = num_classes\n",
-    "        self.hidden_size = hidden_size\n",
-    "        self.learning_rate = learning_rate\n",
-    "\n",
-    "        self.model = nn.Sequential(\n",
-    "            nn.Flatten(),\n",
-    "            nn.Linear(channels * width * height, hidden_size),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Dropout(0.1),\n",
-    "            nn.Linear(hidden_size, hidden_size),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Dropout(0.1),\n",
-    "            nn.Linear(hidden_size, num_classes)\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        x = self.model(x)\n",
-    "        return F.log_softmax(x, dim=1)\n",
-    "\n",
-    "    def training_step(self, batch, batch_idx):\n",
-    "        x, y = batch\n",
-    "        logits = self(x)\n",
-    "        loss = F.nll_loss(logits, y)\n",
-    "        return loss\n",
-    "\n",
-    "    def validation_step(self, batch, batch_idx):\n",
-    "\n",
-    "        x, y = batch\n",
-    "        logits = self(x)\n",
-    "        loss = F.nll_loss(logits, y)\n",
-    "        preds = torch.argmax(logits, dim=1)\n",
-    "        acc = accuracy(preds, y)\n",
-    "        self.log('val_loss', loss, prog_bar=True)\n",
-    "        self.log('val_acc', acc, prog_bar=True)\n",
-    "        return loss\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)\n",
-    "        return optimizer"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "G4Z5olPe-xEo"
-   },
-   "source": [
-    "## Training the `LitModel` using the `MNISTDataModule`\n",
-    "\n",
-    "Now, we initialize and train the `LitModel` using the `MNISTDataModule`'s configuration settings and dataloaders."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "kV48vP_9mEli"
-   },
-   "outputs": [],
-   "source": [
-    "# Init DataModule\n",
-    "dm = MNISTDataModule()\n",
-    "# Init model from datamodule's attributes\n",
-    "model = LitModel(*dm.size(), dm.num_classes)\n",
-    "# Init trainer\n",
-    "trainer = pl.Trainer(max_epochs=3, progress_bar_refresh_rate=20, gpus=1)\n",
-    "# Pass the datamodule as arg to trainer.fit to override model hooks :)\n",
-    "trainer.fit(model, dm)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "WNxrugIGRRv5"
-   },
-   "source": [
-    "## Defining the CIFAR10 DataModule\n",
-    "\n",
-    "Lets prove the `LitModel` we made earlier is dataset agnostic by defining a new datamodule for the CIFAR10 dataset."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "1tkaYLU7RT5P"
-   },
-   "outputs": [],
-   "source": [
-    "class CIFAR10DataModule(pl.LightningDataModule):\n",
-    "\n",
-    "    def __init__(self, data_dir: str = './'):\n",
-    "        super().__init__()\n",
-    "        self.data_dir = data_dir\n",
-    "        self.transform = transforms.Compose([\n",
-    "            transforms.ToTensor(),\n",
-    "            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
-    "        ])\n",
-    "\n",
-    "        self.dims = (3, 32, 32)\n",
-    "        self.num_classes = 10\n",
-    "\n",
-    "    def prepare_data(self):\n",
-    "        # download\n",
-    "        CIFAR10(self.data_dir, train=True, download=True)\n",
-    "        CIFAR10(self.data_dir, train=False, download=True)\n",
-    "\n",
-    "    def setup(self, stage=None):\n",
-    "\n",
-    "        # Assign train/val datasets for use in dataloaders\n",
-    "        if stage == 'fit' or stage is None:\n",
-    "            cifar_full = CIFAR10(self.data_dir, train=True, transform=self.transform)\n",
-    "            self.cifar_train, self.cifar_val = random_split(cifar_full, [45000, 5000])\n",
-    "\n",
-    "        # Assign test dataset for use in dataloader(s)\n",
-    "        if stage == 'test' or stage is None:\n",
-    "            self.cifar_test = CIFAR10(self.data_dir, train=False, transform=self.transform)\n",
-    "\n",
-    "    def train_dataloader(self):\n",
-    "        return DataLoader(self.cifar_train, batch_size=32)\n",
-    "\n",
-    "    def val_dataloader(self):\n",
-    "        return DataLoader(self.cifar_val, batch_size=32)\n",
-    "\n",
-    "    def test_dataloader(self):\n",
-    "        return DataLoader(self.cifar_test, batch_size=32)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "BrXxf3oX_gsZ"
-   },
-   "source": [
-    "## Training the `LitModel` using the `CIFAR10DataModule`\n",
-    "\n",
-    "Our model isn't very good, so it will perform pretty badly on the CIFAR10 dataset.\n",
-    "\n",
-    "The point here is that we can see that our `LitModel` has no problem using a different datamodule as its input data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "sd-SbWi_krdj"
-   },
-   "outputs": [],
-   "source": [
-    "dm = CIFAR10DataModule()\n",
-    "model = LitModel(*dm.size(), dm.num_classes, hidden_size=256)\n",
-    "trainer = pl.Trainer(max_epochs=5, progress_bar_refresh_rate=20, gpus=1)\n",
-    "trainer.fit(model, dm)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<code style=\"color:#792ee5;\">\n",
-    "    <h1> <strong> Congratulations - Time to Join the Community! </strong>  </h1>\n",
-    "</code>\n",
-    "\n",
-    "Congratulations on completing this notebook tutorial! If you enjoyed this and would like to join the Lightning movement, you can do so in the following ways!\n",
-    "\n",
-    "### Star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) on GitHub\n",
-    "The easiest way to help our community is just by starring the GitHub repos! This helps raise awareness of the cool tools we're building.\n",
-    "\n",
-    "* Please, star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning)\n",
-    "\n",
-    "### Join our [Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)!\n",
-    "The best way to keep up to date on the latest advancements is to join our community! Make sure to introduce yourself and share your interests in `#general` channel\n",
-    "\n",
-    "### Interested by SOTA AI models ! Check out [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-    "Bolts has a collection of state-of-the-art models, all implemented in [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) and can be easily integrated within your own projects.\n",
-    "\n",
-    "* Please, star [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-    "\n",
-    "### Contributions !\n",
-    "The best way to contribute to our community is to become a code contributor! At any time you can go to [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) or [Bolt](https://github.com/PyTorchLightning/lightning-bolts) GitHub Issues page and filter for \"good first issue\". \n",
-    "\n",
-    "* [Lightning good first issue](https://github.com/PyTorchLightning/pytorch-lightning/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-    "* [Bolt good first issue](https://github.com/PyTorchLightning/lightning-bolts/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-    "* You can also contribute your own notebooks with useful examples !\n",
-    "\n",
-    "### Great thanks from the entire Pytorch Lightning Team for your interest !\n",
-    "\n",
-    "<img src=\"https://github.com/PyTorchLightning/pytorch-lightning/blob/master/docs/source/_static/images/logo.png?raw=true\" width=\"800\" height=\"200\" />"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "collapsed_sections": [],
-   "include_colab_link": true,
-   "name": "02-datamodules.ipynb",
-   "provenance": [],
-   "toc_visible": true
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/03-basic-gan.ipynb b/notebooks/03-basic-gan.ipynb
deleted file mode 100644
index 523702a8fcb62..0000000000000
--- a/notebooks/03-basic-gan.ipynb
+++ /dev/null
@@ -1,472 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "view-in-github"
-   },
-   "source": [
-    "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/03-basic-gan.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "J37PBnE_x7IW"
-   },
-   "source": [
-    "# PyTorch Lightning Basic GAN Tutorial ⚡\n",
-    "\n",
-    "How to train a GAN!\n",
-    "\n",
-    "Main takeaways:\n",
-    "1. Generator and discriminator are arbitrary PyTorch modules.\n",
-    "2. training_step does both the generator and discriminator training.\n",
-    "\n",
-    "---\n",
-    "\n",
-    "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
-    "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
-    "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "kg2MKpRmybht"
-   },
-   "source": [
-    "### Setup\n",
-    "Lightning is easy to install. Simply `pip install pytorch-lightning`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "LfrJLKPFyhsK"
-   },
-   "outputs": [],
-   "source": [
-    "! pip install pytorch-lightning --quiet"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "BjEPuiVLyanw"
-   },
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "from argparse import ArgumentParser\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import numpy as np\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
-    "import torchvision\n",
-    "import torchvision.transforms as transforms\n",
-    "from torch.utils.data import DataLoader, random_split\n",
-    "from torchvision.datasets import MNIST\n",
-    "\n",
-    "import pytorch_lightning as pl"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "OuXJzr4G2uHV"
-   },
-   "source": [
-    "### MNIST DataModule\n",
-    "\n",
-    "Below, we define a DataModule for the MNIST Dataset. To learn more about DataModules, check out our tutorial on them or see the [latest docs](https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "DOY_nHu328g7"
-   },
-   "outputs": [],
-   "source": [
-    "class MNISTDataModule(pl.LightningDataModule):\n",
-    "\n",
-    "    def __init__(self, data_dir: str = './', batch_size: int = 64, num_workers: int = 8):\n",
-    "        super().__init__()\n",
-    "        self.data_dir = data_dir\n",
-    "        self.batch_size = batch_size\n",
-    "        self.num_workers = num_workers\n",
-    "\n",
-    "        self.transform = transforms.Compose([\n",
-    "            transforms.ToTensor(),\n",
-    "            transforms.Normalize((0.1307,), (0.3081,))\n",
-    "        ])\n",
-    "\n",
-    "        # self.dims is returned when you call dm.size()\n",
-    "        # Setting default dims here because we know them.\n",
-    "        # Could optionally be assigned dynamically in dm.setup()\n",
-    "        self.dims = (1, 28, 28)\n",
-    "        self.num_classes = 10\n",
-    "\n",
-    "    def prepare_data(self):\n",
-    "        # download\n",
-    "        MNIST(self.data_dir, train=True, download=True)\n",
-    "        MNIST(self.data_dir, train=False, download=True)\n",
-    "\n",
-    "    def setup(self, stage=None):\n",
-    "\n",
-    "        # Assign train/val datasets for use in dataloaders\n",
-    "        if stage == 'fit' or stage is None:\n",
-    "            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)\n",
-    "            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])\n",
-    "\n",
-    "        # Assign test dataset for use in dataloader(s)\n",
-    "        if stage == 'test' or stage is None:\n",
-    "            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)\n",
-    "\n",
-    "    def train_dataloader(self):\n",
-    "        return DataLoader(self.mnist_train, batch_size=self.batch_size, num_workers=self.num_workers)\n",
-    "\n",
-    "    def val_dataloader(self):\n",
-    "        return DataLoader(self.mnist_val, batch_size=self.batch_size, num_workers=self.num_workers)\n",
-    "\n",
-    "    def test_dataloader(self):\n",
-    "        return DataLoader(self.mnist_test, batch_size=self.batch_size, num_workers=self.num_workers)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "tW3c0QrQyF9P"
-   },
-   "source": [
-    "### A. Generator"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "0E2QDjl5yWtz"
-   },
-   "outputs": [],
-   "source": [
-    "class Generator(nn.Module):\n",
-    "    def __init__(self, latent_dim, img_shape):\n",
-    "        super().__init__()\n",
-    "        self.img_shape = img_shape\n",
-    "\n",
-    "        def block(in_feat, out_feat, normalize=True):\n",
-    "            layers = [nn.Linear(in_feat, out_feat)]\n",
-    "            if normalize:\n",
-    "                layers.append(nn.BatchNorm1d(out_feat, 0.8))\n",
-    "            layers.append(nn.LeakyReLU(0.2, inplace=True))\n",
-    "            return layers\n",
-    "\n",
-    "        self.model = nn.Sequential(\n",
-    "            *block(latent_dim, 128, normalize=False),\n",
-    "            *block(128, 256),\n",
-    "            *block(256, 512),\n",
-    "            *block(512, 1024),\n",
-    "            nn.Linear(1024, int(np.prod(img_shape))),\n",
-    "            nn.Tanh()\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, z):\n",
-    "        img = self.model(z)\n",
-    "        img = img.view(img.size(0), *self.img_shape)\n",
-    "        return img"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "uyrltsGvyaI3"
-   },
-   "source": [
-    "### B. Discriminator"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Ed3MR3vnyxyW"
-   },
-   "outputs": [],
-   "source": [
-    "class Discriminator(nn.Module):\n",
-    "    def __init__(self, img_shape):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.model = nn.Sequential(\n",
-    "            nn.Linear(int(np.prod(img_shape)), 512),\n",
-    "            nn.LeakyReLU(0.2, inplace=True),\n",
-    "            nn.Linear(512, 256),\n",
-    "            nn.LeakyReLU(0.2, inplace=True),\n",
-    "            nn.Linear(256, 1),\n",
-    "            nn.Sigmoid(),\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, img):\n",
-    "        img_flat = img.view(img.size(0), -1)\n",
-    "        validity = self.model(img_flat)\n",
-    "\n",
-    "        return validity"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "BwUMom3ryySK"
-   },
-   "source": [
-    "### C. GAN\n",
-    "\n",
-    "#### A couple of cool features to check out in this example...\n",
-    "\n",
-    "  - We use `some_tensor.type_as(another_tensor)` to make sure we initialize new tensors on the right device (i.e. GPU, CPU).\n",
-    "    - Lightning will put your dataloader data on the right device automatically\n",
-    "    - In this example, we pull from latent dim on the fly, so we need to dynamically add tensors to the right device.\n",
-    "    - `type_as` is the way we recommend to do this.\n",
-    "  - This example shows how to use multiple dataloaders in your `LightningModule`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "3vKszYf6y1Vv"
-   },
-   "outputs": [],
-   "source": [
-    " class GAN(pl.LightningModule):\n",
-    "\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        channels,\n",
-    "        width,\n",
-    "        height,\n",
-    "        latent_dim: int = 100,\n",
-    "        lr: float = 0.0002,\n",
-    "        b1: float = 0.5,\n",
-    "        b2: float = 0.999,\n",
-    "        batch_size: int = 64,\n",
-    "        **kwargs\n",
-    "    ):\n",
-    "        super().__init__()\n",
-    "        self.save_hyperparameters()\n",
-    "\n",
-    "        # networks\n",
-    "        data_shape = (channels, width, height)\n",
-    "        self.generator = Generator(latent_dim=self.hparams.latent_dim, img_shape=data_shape)\n",
-    "        self.discriminator = Discriminator(img_shape=data_shape)\n",
-    "\n",
-    "        self.validation_z = torch.randn(8, self.hparams.latent_dim)\n",
-    "\n",
-    "        self.example_input_array = torch.zeros(2, self.hparams.latent_dim)\n",
-    "\n",
-    "    def forward(self, z):\n",
-    "        return self.generator(z)\n",
-    "\n",
-    "    def adversarial_loss(self, y_hat, y):\n",
-    "        return F.binary_cross_entropy(y_hat, y)\n",
-    "\n",
-    "    def training_step(self, batch, batch_idx, optimizer_idx):\n",
-    "        imgs, _ = batch\n",
-    "\n",
-    "        # sample noise\n",
-    "        z = torch.randn(imgs.shape[0], self.hparams.latent_dim)\n",
-    "        z = z.type_as(imgs)\n",
-    "\n",
-    "        # train generator\n",
-    "        if optimizer_idx == 0:\n",
-    "\n",
-    "            # generate images\n",
-    "            self.generated_imgs = self(z)\n",
-    "\n",
-    "            # log sampled images\n",
-    "            sample_imgs = self.generated_imgs[:6]\n",
-    "            grid = torchvision.utils.make_grid(sample_imgs)\n",
-    "            self.logger.experiment.add_image('generated_images', grid, 0)\n",
-    "\n",
-    "            # ground truth result (ie: all fake)\n",
-    "            # put on GPU because we created this tensor inside training_loop\n",
-    "            valid = torch.ones(imgs.size(0), 1)\n",
-    "            valid = valid.type_as(imgs)\n",
-    "\n",
-    "            # adversarial loss is binary cross-entropy\n",
-    "            g_loss = self.adversarial_loss(self.discriminator(self(z)), valid)\n",
-    "            tqdm_dict = {'g_loss': g_loss}\n",
-    "            output = OrderedDict({\n",
-    "                'loss': g_loss,\n",
-    "                'progress_bar': tqdm_dict,\n",
-    "                'log': tqdm_dict\n",
-    "            })\n",
-    "            return output\n",
-    "\n",
-    "        # train discriminator\n",
-    "        if optimizer_idx == 1:\n",
-    "            # Measure discriminator's ability to classify real from generated samples\n",
-    "\n",
-    "            # how well can it label as real?\n",
-    "            valid = torch.ones(imgs.size(0), 1)\n",
-    "            valid = valid.type_as(imgs)\n",
-    "\n",
-    "            real_loss = self.adversarial_loss(self.discriminator(imgs), valid)\n",
-    "\n",
-    "            # how well can it label as fake?\n",
-    "            fake = torch.zeros(imgs.size(0), 1)\n",
-    "            fake = fake.type_as(imgs)\n",
-    "\n",
-    "            fake_loss = self.adversarial_loss(\n",
-    "                self.discriminator(self(z).detach()), fake)\n",
-    "\n",
-    "            # discriminator loss is the average of these\n",
-    "            d_loss = (real_loss + fake_loss) / 2\n",
-    "            tqdm_dict = {'d_loss': d_loss}\n",
-    "            output = OrderedDict({\n",
-    "                'loss': d_loss,\n",
-    "                'progress_bar': tqdm_dict,\n",
-    "                'log': tqdm_dict\n",
-    "            })\n",
-    "            return output\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        lr = self.hparams.lr\n",
-    "        b1 = self.hparams.b1\n",
-    "        b2 = self.hparams.b2\n",
-    "\n",
-    "        opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2))\n",
-    "        opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2))\n",
-    "        return [opt_g, opt_d], []\n",
-    "\n",
-    "    def on_epoch_end(self):\n",
-    "        z = self.validation_z.type_as(self.generator.model[0].weight)\n",
-    "\n",
-    "        # log sampled images\n",
-    "        sample_imgs = self(z)\n",
-    "        grid = torchvision.utils.make_grid(sample_imgs)\n",
-    "        self.logger.experiment.add_image('generated_images', grid, self.current_epoch)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Ey5FmJPnzm_E"
-   },
-   "outputs": [],
-   "source": [
-    "dm = MNISTDataModule()\n",
-    "model = GAN(*dm.size())\n",
-    "trainer = pl.Trainer(gpus=1, max_epochs=5, progress_bar_refresh_rate=20)\n",
-    "trainer.fit(model, dm)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "MlECc7cHzolp"
-   },
-   "outputs": [],
-   "source": [
-    "# Start tensorboard.\n",
-    "%load_ext tensorboard\n",
-    "%tensorboard --logdir lightning_logs/"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<code style=\"color:#792ee5;\">\n",
-    "    <h1> <strong> Congratulations - Time to Join the Community! </strong>  </h1>\n",
-    "</code>\n",
-    "\n",
-    "Congratulations on completing this notebook tutorial! If you enjoyed this and would like to join the Lightning movement, you can do so in the following ways!\n",
-    "\n",
-    "### Star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) on GitHub\n",
-    "The easiest way to help our community is just by starring the GitHub repos! This helps raise awareness of the cool tools we're building.\n",
-    "\n",
-    "* Please, star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning)\n",
-    "\n",
-    "### Join our [Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)!\n",
-    "The best way to keep up to date on the latest advancements is to join our community! Make sure to introduce yourself and share your interests in `#general` channel\n",
-    "\n",
-    "### Interested by SOTA AI models ! Check out [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-    "Bolts has a collection of state-of-the-art models, all implemented in [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) and can be easily integrated within your own projects.\n",
-    "\n",
-    "* Please, star [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-    "\n",
-    "### Contributions !\n",
-    "The best way to contribute to our community is to become a code contributor! At any time you can go to [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) or [Bolt](https://github.com/PyTorchLightning/lightning-bolts) GitHub Issues page and filter for \"good first issue\". \n",
-    "\n",
-    "* [Lightning good first issue](https://github.com/PyTorchLightning/pytorch-lightning/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-    "* [Bolt good first issue](https://github.com/PyTorchLightning/lightning-bolts/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-    "* You can also contribute your own notebooks with useful examples !\n",
-    "\n",
-    "### Great thanks from the entire Pytorch Lightning Team for your interest !\n",
-    "\n",
-    "<img src=\"https://github.com/PyTorchLightning/pytorch-lightning/blob/master/docs/source/_static/images/logo.png?raw=true\" width=\"800\" height=\"200\" />"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "collapsed_sections": [],
-   "include_colab_link": true,
-   "name": "03-basic-gan.ipynb",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/04-transformers-text-classification.ipynb b/notebooks/04-transformers-text-classification.ipynb
deleted file mode 100644
index fc80e9904a772..0000000000000
--- a/notebooks/04-transformers-text-classification.ipynb
+++ /dev/null
@@ -1,599 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/04-transformers-text-classification.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "8ag5ANQPJ_j9"
-   },
-   "source": [
-    "# Finetune 🤗 Transformers Models with PyTorch Lightning ⚡\n",
-    "\n",
-    "This notebook will use HuggingFace's `datasets` library to get data, which will be wrapped in a `LightningDataModule`. Then, we write a class to perform text classification on any dataset from the[ GLUE Benchmark](https://gluebenchmark.com/). (We just show CoLA and MRPC due to constraint on compute/disk)\n",
-    "\n",
-    "[HuggingFace's NLP Viewer](https://huggingface.co/nlp/viewer/?dataset=glue&config=cola) can help you get a feel for the two datasets we will use and what tasks they are solving for.\n",
-    "\n",
-    "---\n",
-    "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
-    "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
-    "  - Ask a question on [GitHub Discussions](https://github.com/PyTorchLightning/pytorch-lightning/discussions/)\n",
-    "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)\n",
-    "\n",
-    "  - [HuggingFace datasets](https://github.com/huggingface/datasets)\n",
-    "  - [HuggingFace transformers](https://github.com/huggingface/transformers)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "fqlsVTj7McZ3"
-   },
-   "source": [
-    "### Setup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "OIhHrRL-MnKK"
-   },
-   "outputs": [],
-   "source": [
-    "!pip install pytorch-lightning datasets transformers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "6yuQT_ZQMpCg"
-   },
-   "outputs": [],
-   "source": [
-    "from argparse import ArgumentParser\n",
-    "from datetime import datetime\n",
-    "from typing import Optional\n",
-    "\n",
-    "import datasets\n",
-    "import numpy as np\n",
-    "import pytorch_lightning as pl\n",
-    "import torch\n",
-    "from torch.utils.data import DataLoader\n",
-    "from transformers import (\n",
-    "    AdamW,\n",
-    "    AutoModelForSequenceClassification,\n",
-    "    AutoConfig,\n",
-    "    AutoTokenizer,\n",
-    "    get_linear_schedule_with_warmup,\n",
-    "    glue_compute_metrics\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "9ORJfiuiNZ_N"
-   },
-   "source": [
-    "## GLUE DataModule"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "jW9xQhZxMz1G"
-   },
-   "outputs": [],
-   "source": [
-    "class GLUEDataModule(pl.LightningDataModule):\n",
-    "\n",
-    "    task_text_field_map = {\n",
-    "        'cola': ['sentence'],\n",
-    "        'sst2': ['sentence'],\n",
-    "        'mrpc': ['sentence1', 'sentence2'],\n",
-    "        'qqp': ['question1', 'question2'],\n",
-    "        'stsb': ['sentence1', 'sentence2'],\n",
-    "        'mnli': ['premise', 'hypothesis'],\n",
-    "        'qnli': ['question', 'sentence'],\n",
-    "        'rte': ['sentence1', 'sentence2'],\n",
-    "        'wnli': ['sentence1', 'sentence2'],\n",
-    "        'ax': ['premise', 'hypothesis']\n",
-    "    }\n",
-    "\n",
-    "    glue_task_num_labels = {\n",
-    "        'cola': 2,\n",
-    "        'sst2': 2,\n",
-    "        'mrpc': 2,\n",
-    "        'qqp': 2,\n",
-    "        'stsb': 1,\n",
-    "        'mnli': 3,\n",
-    "        'qnli': 2,\n",
-    "        'rte': 2,\n",
-    "        'wnli': 2,\n",
-    "        'ax': 3\n",
-    "    }\n",
-    "\n",
-    "    loader_columns = [\n",
-    "        'datasets_idx',\n",
-    "        'input_ids',\n",
-    "        'token_type_ids',\n",
-    "        'attention_mask',\n",
-    "        'start_positions',\n",
-    "        'end_positions',\n",
-    "        'labels'\n",
-    "    ]\n",
-    "\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        model_name_or_path: str,\n",
-    "        task_name: str ='mrpc',\n",
-    "        max_seq_length: int = 128,\n",
-    "        train_batch_size: int = 32,\n",
-    "        eval_batch_size: int = 32,\n",
-    "        **kwargs\n",
-    "    ):\n",
-    "        super().__init__()\n",
-    "        self.model_name_or_path = model_name_or_path\n",
-    "        self.task_name = task_name\n",
-    "        self.max_seq_length = max_seq_length\n",
-    "        self.train_batch_size = train_batch_size\n",
-    "        self.eval_batch_size = eval_batch_size\n",
-    "\n",
-    "        self.text_fields = self.task_text_field_map[task_name]\n",
-    "        self.num_labels = self.glue_task_num_labels[task_name]\n",
-    "        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True)\n",
-    "\n",
-    "    def setup(self, stage):\n",
-    "        self.dataset = datasets.load_dataset('glue', self.task_name)\n",
-    "\n",
-    "        for split in self.dataset.keys():\n",
-    "            self.dataset[split] = self.dataset[split].map(\n",
-    "                self.convert_to_features,\n",
-    "                batched=True,\n",
-    "                remove_columns=['label'],\n",
-    "            )\n",
-    "            self.columns = [c for c in self.dataset[split].column_names if c in self.loader_columns]\n",
-    "            self.dataset[split].set_format(type=\"torch\", columns=self.columns)\n",
-    "\n",
-    "        self.eval_splits = [x for x in self.dataset.keys() if 'validation' in x]\n",
-    "\n",
-    "    def prepare_data(self):\n",
-    "        datasets.load_dataset('glue', self.task_name)\n",
-    "        AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True)\n",
-    "    \n",
-    "    def train_dataloader(self):\n",
-    "        return DataLoader(self.dataset['train'], batch_size=self.train_batch_size)\n",
-    "    \n",
-    "    def val_dataloader(self):\n",
-    "        if len(self.eval_splits) == 1:\n",
-    "            return DataLoader(self.dataset['validation'], batch_size=self.eval_batch_size)\n",
-    "        elif len(self.eval_splits) > 1:\n",
-    "            return [DataLoader(self.dataset[x], batch_size=self.eval_batch_size) for x in self.eval_splits]\n",
-    "\n",
-    "    def test_dataloader(self):\n",
-    "        if len(self.eval_splits) == 1:\n",
-    "            return DataLoader(self.dataset['test'], batch_size=self.eval_batch_size)\n",
-    "        elif len(self.eval_splits) > 1:\n",
-    "            return [DataLoader(self.dataset[x], batch_size=self.eval_batch_size) for x in self.eval_splits]\n",
-    "\n",
-    "    def convert_to_features(self, example_batch, indices=None):\n",
-    "\n",
-    "        # Either encode single sentence or sentence pairs\n",
-    "        if len(self.text_fields) > 1:\n",
-    "            texts_or_text_pairs = list(zip(example_batch[self.text_fields[0]], example_batch[self.text_fields[1]]))\n",
-    "        else:\n",
-    "            texts_or_text_pairs = example_batch[self.text_fields[0]]\n",
-    "\n",
-    "        # Tokenize the text/text pairs\n",
-    "        features = self.tokenizer.batch_encode_plus(\n",
-    "            texts_or_text_pairs,\n",
-    "            max_length=self.max_seq_length,\n",
-    "            pad_to_max_length=True,\n",
-    "            truncation=True\n",
-    "        )\n",
-    "\n",
-    "        # Rename label to labels to make it easier to pass to model forward\n",
-    "        features['labels'] = example_batch['label']\n",
-    "\n",
-    "        return features"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "jQC3a6KuOpX3"
-   },
-   "source": [
-    "#### You could use this datamodule with standalone PyTorch if you wanted..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "JCMH3IAsNffF"
-   },
-   "outputs": [],
-   "source": [
-    "dm = GLUEDataModule('distilbert-base-uncased')\n",
-    "dm.prepare_data()\n",
-    "dm.setup('fit')\n",
-    "next(iter(dm.train_dataloader()))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "l9fQ_67BO2Lj"
-   },
-   "source": [
-    "## GLUE Model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "gtn5YGKYO65B"
-   },
-   "outputs": [],
-   "source": [
-    "class GLUETransformer(pl.LightningModule):\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        model_name_or_path: str,\n",
-    "        num_labels: int,\n",
-    "        learning_rate: float = 2e-5,\n",
-    "        adam_epsilon: float = 1e-8,\n",
-    "        warmup_steps: int = 0,\n",
-    "        weight_decay: float = 0.0,\n",
-    "        train_batch_size: int = 32,\n",
-    "        eval_batch_size: int = 32,\n",
-    "        eval_splits: Optional[list] = None,\n",
-    "        **kwargs\n",
-    "    ):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.save_hyperparameters()\n",
-    "\n",
-    "        self.config = AutoConfig.from_pretrained(model_name_or_path, num_labels=num_labels)\n",
-    "        self.model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, config=self.config)\n",
-    "        self.metric = datasets.load_metric(\n",
-    "            'glue',\n",
-    "            self.hparams.task_name,\n",
-    "            experiment_id=datetime.now().strftime(\"%d-%m-%Y_%H-%M-%S\")\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, **inputs):\n",
-    "        return self.model(**inputs)\n",
-    "\n",
-    "    def training_step(self, batch, batch_idx):\n",
-    "        outputs = self(**batch)\n",
-    "        loss = outputs[0]\n",
-    "        return loss\n",
-    "\n",
-    "    def validation_step(self, batch, batch_idx, dataloader_idx=0):\n",
-    "        outputs = self(**batch)\n",
-    "        val_loss, logits = outputs[:2]\n",
-    "\n",
-    "        if self.hparams.num_labels >= 1:\n",
-    "            preds = torch.argmax(logits, axis=1)\n",
-    "        elif self.hparams.num_labels == 1:\n",
-    "            preds = logits.squeeze()\n",
-    "\n",
-    "        labels = batch[\"labels\"]\n",
-    "\n",
-    "        return {'loss': val_loss, \"preds\": preds, \"labels\": labels}\n",
-    "\n",
-    "    def validation_epoch_end(self, outputs):\n",
-    "        if self.hparams.task_name == 'mnli':\n",
-    "            for i, output in enumerate(outputs):\n",
-    "                # matched or mismatched\n",
-    "                split = self.hparams.eval_splits[i].split('_')[-1]\n",
-    "                preds = torch.cat([x['preds'] for x in output]).detach().cpu().numpy()\n",
-    "                labels = torch.cat([x['labels'] for x in output]).detach().cpu().numpy()\n",
-    "                loss = torch.stack([x['loss'] for x in output]).mean()\n",
-    "                self.log(f'val_loss_{split}', loss, prog_bar=True)\n",
-    "                split_metrics = {f\"{k}_{split}\": v for k, v in self.metric.compute(predictions=preds, references=labels).items()}\n",
-    "                self.log_dict(split_metrics, prog_bar=True)\n",
-    "            return loss\n",
-    "\n",
-    "        preds = torch.cat([x['preds'] for x in outputs]).detach().cpu().numpy()\n",
-    "        labels = torch.cat([x['labels'] for x in outputs]).detach().cpu().numpy()\n",
-    "        loss = torch.stack([x['loss'] for x in outputs]).mean()\n",
-    "        self.log('val_loss', loss, prog_bar=True)\n",
-    "        self.log_dict(self.metric.compute(predictions=preds, references=labels), prog_bar=True)\n",
-    "        return loss\n",
-    "\n",
-    "    def setup(self, stage):\n",
-    "        if stage == 'fit':\n",
-    "            # Get dataloader by calling it - train_dataloader() is called after setup() by default\n",
-    "            train_loader = self.train_dataloader()\n",
-    "\n",
-    "            # Calculate total steps\n",
-    "            self.total_steps = (\n",
-    "                (len(train_loader.dataset) // (self.hparams.train_batch_size * max(1, self.hparams.gpus)))\n",
-    "                // self.hparams.accumulate_grad_batches\n",
-    "                * float(self.hparams.max_epochs)\n",
-    "            )\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        \"Prepare optimizer and schedule (linear warmup and decay)\"\n",
-    "        model = self.model\n",
-    "        no_decay = [\"bias\", \"LayerNorm.weight\"]\n",
-    "        optimizer_grouped_parameters = [\n",
-    "            {\n",
-    "                \"params\": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],\n",
-    "                \"weight_decay\": self.hparams.weight_decay,\n",
-    "            },\n",
-    "            {\n",
-    "                \"params\": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],\n",
-    "                \"weight_decay\": 0.0,\n",
-    "            },\n",
-    "        ]\n",
-    "        optimizer = AdamW(optimizer_grouped_parameters, lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)\n",
-    "\n",
-    "        scheduler = get_linear_schedule_with_warmup(\n",
-    "            optimizer, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=self.total_steps\n",
-    "        )\n",
-    "        scheduler = {\n",
-    "            'scheduler': scheduler,\n",
-    "            'interval': 'step',\n",
-    "            'frequency': 1\n",
-    "        }\n",
-    "        return [optimizer], [scheduler]\n",
-    "\n",
-    "    @staticmethod\n",
-    "    def add_model_specific_args(parent_parser):\n",
-    "        parser = parent_parser.add_argument_group(\"GLUETransformer\")",
-    "        parser = ArgumentParser(parents=[parent_parser], add_help=False)\n",
-    "        parser.add_argument(\"--learning_rate\", default=2e-5, type=float)\n",
-    "        parser.add_argument(\"--adam_epsilon\", default=1e-8, type=float)\n",
-    "        parser.add_argument(\"--warmup_steps\", default=0, type=int)\n",
-    "        parser.add_argument(\"--weight_decay\", default=0.0, type=float)\n",
-    "        return parent_parser"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "ha-NdIP_xbd3"
-   },
-   "source": [
-    "### ⚡ Quick Tip \n",
-    "  - Combine arguments from your DataModule, Model, and Trainer into one for easy and robust configuration"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "3dEHnl3RPlAR"
-   },
-   "outputs": [],
-   "source": [
-    "def parse_args(args=None):\n",
-    "    parser = ArgumentParser()\n",
-    "    parser = pl.Trainer.add_argparse_args(parser)\n",
-    "    parser = GLUEDataModule.add_argparse_args(parser)\n",
-    "    parser = GLUETransformer.add_model_specific_args(parser)\n",
-    "    parser.add_argument('--seed', type=int, default=42)\n",
-    "    return parser.parse_args(args)\n",
-    "\n",
-    "\n",
-    "def main(args):\n",
-    "    pl.seed_everything(args.seed)\n",
-    "    dm = GLUEDataModule.from_argparse_args(args)\n",
-    "    dm.prepare_data()\n",
-    "    dm.setup('fit')\n",
-    "    model = GLUETransformer(num_labels=dm.num_labels, eval_splits=dm.eval_splits, **vars(args))\n",
-    "    trainer = pl.Trainer.from_argparse_args(args)\n",
-    "    return dm, model, trainer"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "PkuLaeec3sJ-"
-   },
-   "source": [
-    "# Training"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "QSpueK5UPsN7"
-   },
-   "source": [
-    "## CoLA\n",
-    "\n",
-    "See an interactive view of the CoLA dataset in [NLP Viewer](https://huggingface.co/nlp/viewer/?dataset=glue&config=cola)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "NJnFmtpnPu0Y"
-   },
-   "outputs": [],
-   "source": [
-    "mocked_args = \"\"\"\n",
-    "    --model_name_or_path albert-base-v2\n",
-    "    --task_name cola\n",
-    "    --max_epochs 3\n",
-    "    --gpus 1\"\"\".split()\n",
-    "\n",
-    "args = parse_args(mocked_args)\n",
-    "dm, model, trainer = main(args)\n",
-    "trainer.fit(model, dm)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "_MrNsTnqdz4z"
-   },
-   "source": [
-    "## MRPC\n",
-    "\n",
-    "See an interactive view of the MRPC dataset in [NLP Viewer](https://huggingface.co/nlp/viewer/?dataset=glue&config=mrpc)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "LBwRxg9Cb3d-"
-   },
-   "outputs": [],
-   "source": [
-    "mocked_args = \"\"\"\n",
-    "    --model_name_or_path distilbert-base-cased\n",
-    "    --task_name mrpc\n",
-    "    --max_epochs 3\n",
-    "    --gpus 1\"\"\".split()\n",
-    "\n",
-    "args = parse_args(mocked_args)\n",
-    "dm, model, trainer = main(args)\n",
-    "trainer.fit(model, dm)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "iZhbn0HzfdCu"
-   },
-   "source": [
-    "## MNLI\n",
-    "\n",
-    " - The MNLI dataset is huge, so we aren't going to bother trying to train it here.\n",
-    "\n",
-    " - Let's just make sure our multi-dataloader logic is right by skipping over training and going straight to validation.\n",
-    "\n",
-    "See an interactive view of the MRPC dataset in [NLP Viewer](https://huggingface.co/nlp/viewer/?dataset=glue&config=mnli)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "AvsZMOggfcWW"
-   },
-   "outputs": [],
-   "source": [
-    "mocked_args = \"\"\"\n",
-    "    --model_name_or_path distilbert-base-uncased\n",
-    "    --task_name mnli\n",
-    "    --max_epochs 1\n",
-    "    --gpus 1\n",
-    "    --limit_train_batches 10\n",
-    "    --progress_bar_refresh_rate 20\"\"\".split()\n",
-    "\n",
-    "args = parse_args(mocked_args)\n",
-    "dm, model, trainer = main(args)\n",
-    "trainer.fit(model, dm)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<code style=\"color:#792ee5;\">\n",
-    "    <h1> <strong> Congratulations - Time to Join the Community! </strong>  </h1>\n",
-    "</code>\n",
-    "\n",
-    "Congratulations on completing this notebook tutorial! If you enjoyed this and would like to join the Lightning movement, you can do so in the following ways!\n",
-    "\n",
-    "### Star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) on GitHub\n",
-    "The easiest way to help our community is just by starring the GitHub repos! This helps raise awareness of the cool tools we're building.\n",
-    "\n",
-    "* Please, star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning)\n",
-    "\n",
-    "### Join our [Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)!\n",
-    "The best way to keep up to date on the latest advancements is to join our community! Make sure to introduce yourself and share your interests in `#general` channel\n",
-    "\n",
-    "### Interested by SOTA AI models ! Check out [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-    "Bolts has a collection of state-of-the-art models, all implemented in [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) and can be easily integrated within your own projects.\n",
-    "\n",
-    "* Please, star [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-    "\n",
-    "### Contributions !\n",
-    "The best way to contribute to our community is to become a code contributor! At any time you can go to [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) or [Bolt](https://github.com/PyTorchLightning/lightning-bolts) GitHub Issues page and filter for \"good first issue\". \n",
-    "\n",
-    "* [Lightning good first issue](https://github.com/PyTorchLightning/pytorch-lightning/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-    "* [Bolt good first issue](https://github.com/PyTorchLightning/lightning-bolts/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-    "* You can also contribute your own notebooks with useful examples !\n",
-    "\n",
-    "### Great thanks from the entire Pytorch Lightning Team for your interest !\n",
-    "\n",
-    "<img src=\"https://github.com/PyTorchLightning/pytorch-lightning/blob/master/docs/source/_static/images/logo.png?raw=true\" width=\"800\" height=\"200\" />"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "collapsed_sections": [],
-   "name": "04-transformers-text-classification.ipynb",
-   "provenance": [],
-   "toc_visible": true
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/05-trainer-flags-overview.ipynb b/notebooks/05-trainer-flags-overview.ipynb
deleted file mode 100644
index d6996a925c228..0000000000000
--- a/notebooks/05-trainer-flags-overview.ipynb
+++ /dev/null
@@ -1,2926 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/05-trainer-flags-overview.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "goRmGIRI5cfC"
-   },
-   "source": [
-    "# Introduction to Lightning Flags ⚡🚩\n",
-    "\n",
-    "In this notebook, we'll go over the flags available in the `Trainer` object. Note that not everything will work in the Colab environment (multi-gpu, etc). This notebook accompanies the Trainer videos we'll be putting out.\n",
-    "\n",
-    "---\n",
-    "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
-    "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
-    "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "jKj5lgdr5j48"
-   },
-   "source": [
-    "--- \n",
-    "### Setup  \n",
-    "First thing first, we need to install Lightning. Simply ```pip install pytorch-lightning```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "UGjilEHk4vb7"
-   },
-   "outputs": [],
-   "source": [
-    "! pip install pytorch-lightning"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "zaVUShmQ5n8Y"
-   },
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "from argparse import ArgumentParser\n",
-    "import torch\n",
-    "from torch import nn\n",
-    "from torch.nn import functional as F\n",
-    "from torch.utils.data import DataLoader\n",
-    "from torch.utils.data import random_split\n",
-    "from torchvision.datasets import MNIST\n",
-    "from torchvision import transforms\n",
-    "import pytorch_lightning as pl\n",
-    "from pytorch_lightning.metrics.functional import accuracy\n",
-    "\n",
-    "from torchvision.datasets.mnist import MNIST\n",
-    "from torchvision import transforms"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "6tgkS8IYZwY_"
-   },
-   "outputs": [],
-   "source": [
-    "# ------------\n",
-    "# data\n",
-    "# ------------\n",
-    "pl.seed_everything(1234)\n",
-    "batch_size = 32\n",
-    "\n",
-    "# Init DataLoader from MNIST Dataset\n",
-    "\n",
-    "dataset = MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor())\n",
-    "mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transforms.ToTensor())\n",
-    "mnist_train, mnist_val = random_split(dataset, [55000, 5000])\n",
-    "\n",
-    "train_loader = DataLoader(mnist_train, batch_size=batch_size)\n",
-    "val_loader = DataLoader(mnist_val, batch_size=batch_size)\n",
-    "test_loader = DataLoader(mnist_test, batch_size=batch_size)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "gEulmrbxwaYL"
-   },
-   "source": [
-    "### Simple AutoEncoder Model\n",
-    "\n",
-    "Were gonna define a simple Lightning model so we can play with all the settings of the Lightning Trainer.\n",
-    "\n",
-    "LightningModule is simply pure Pytorch reorganized into hooks, that represents all the steps in the training process.\n",
-    "\n",
-    "You can use LightningModule hooks to control every part of your model, but for the purpose of this video we will use a very simple MNIST classifier, a model that takes 28*28 grayscale images of hand written images, and can predict the digit between 0-9.\n",
-    "\n",
-    "The LightningModule can encompass a single model, like an image classifier, or a deep learning system composed of multiple models, like this auto encoder that contains an encoder and a decoder.\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "x-34xKCI40yW"
-   },
-   "outputs": [],
-   "source": [
-    "class LitAutoEncoder(pl.LightningModule):\n",
-    "\n",
-    "    def __init__(self, batch_size=32, lr=1e-3):\n",
-    "        super().__init__()\n",
-    "        self.encoder = nn.Sequential(\n",
-    "            nn.Linear(28 * 28, 64),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Linear(64, 3)\n",
-    "        )\n",
-    "        self.decoder = nn.Sequential(\n",
-    "            nn.Linear(3, 64),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Linear(64, 28 * 28)\n",
-    "        )\n",
-    "        self.batch_size=batch_size\n",
-    "        self.learning_rate=lr\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        # in lightning, forward defines the prediction/inference actions\n",
-    "        embedding = self.encoder(x)\n",
-    "        return embedding\n",
-    "\n",
-    "    def training_step(self, batch, batch_idx):\n",
-    "        x, y = batch\n",
-    "        x = x.view(x.size(0), -1)\n",
-    "        z = self.encoder(x)\n",
-    "        x_hat = self.decoder(z)\n",
-    "        loss = F.mse_loss(x_hat, x)\n",
-    "        self.log('train_loss', loss)\n",
-    "        return loss\n",
-    "\n",
-    "    def validation_step(self, batch, batch_idx):\n",
-    "        x, y = batch\n",
-    "        x = x.view(x.size(0), -1)\n",
-    "        z = self.encoder(x)\n",
-    "        x_hat = self.decoder(z)\n",
-    "        loss = F.mse_loss(x_hat, x)\n",
-    "        self.log('val_loss', loss)\n",
-    "        \n",
-    "    def test_step(self, batch, batch_idx):\n",
-    "        x, y = batch\n",
-    "        x = x.view(x.size(0), -1)\n",
-    "        z = self.encoder(x)\n",
-    "        x_hat = self.decoder(z)\n",
-    "        loss = F.mse_loss(x_hat, x)\n",
-    "        self.log('test_loss', loss)\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)\n",
-    "        return optimizer"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "VbxcRCrxiYly"
-   },
-   "source": [
-    "You'll notice the LightningModule doesn't have epoch and batch loops, we're not calling model.train() and model.eval(), and no mentions of CUDA or hardware. That's because it is all automated by the Lightning Trainer. All the engineering boilerplate is automated by the trainer: \n",
-    "\n",
-    "*  Training loops\n",
-    "*  Evaluation and test loops\n",
-    "*  Calling model.train(), model.eval(), no_grad at the right time\n",
-    "*  CUDA or to_device calls\n",
-    "\n",
-    "It also allows you to train your models on different hardware like GPUs and TPUs without changing your code!\n",
-    "\n",
-    "\n",
-    "### To use the lightning trainer simply:\n",
-    "\n",
-    "1. init your LightningModule and datasets\n",
-    "\n",
-    "2. init lightning trainer\n",
-    "\n",
-    "3. call trainer.fit\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "HOk9c4_35FKg"
-   },
-   "outputs": [],
-   "source": [
-    "#####################\n",
-    "# 1. Init Model\n",
-    "#####################\n",
-    "\n",
-    "model = LitAutoEncoder()\n",
-    "\n",
-    "#####################\n",
-    "# 2. Init Trainer\n",
-    "#####################\n",
-    "\n",
-    "# these 2 flags are explained in the later sections...but for short explanation:\n",
-    "# - progress_bar_refresh_rate: limits refresh rate of tqdm progress bar so Colab doesn't freak out\n",
-    "# - max_epochs: only run 2 epochs instead of default of 1000\n",
-    "trainer = pl.Trainer(progress_bar_refresh_rate=20, max_epochs=2)\n",
-    "\n",
-    "#####################\n",
-    "# 3. Train\n",
-    "#####################\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "3meDako-Qa_6"
-   },
-   "source": [
-    "Our model is training just like that, using the Lightning defaults. The beauty of Lightning is that everything is easily configurable.\n",
-    "In our next videos were going to show you all the ways you can control your Trainer to do things like controlling your training, validation and test loops, running on GPUs and TPUs, checkpointing, early stopping, and a lot more.\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "z_Wry2MckQkI"
-   },
-   "source": [
-    "# Training loop and eval loop Flags"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "0MkI1xB2vsLj"
-   },
-   "source": [
-    "\n",
-    "To really scale up your networks, you can use accelerators like GPUs. GPUs or Graphical Processing Units, parallelize matrix multiplications which enable speed ups of at least 100x over training on CPUs.\n",
-    "\n",
-    "Let's say you have a machine with 8 GPUs on it. You can set this flag to 1, 4, or 8 GPUs and lightning will automatically distribute your training for you.\n",
-    "\n",
-    "```\n",
-    "trainer = pl.Trainer(gpus=1)\n",
-    "```\n",
-    "\n",
-    "---------\n",
-    "\n",
-    "Lightning makes your code hardware agnostic... This means, you can switch between CPUs, GPUs without code changes.\n",
-    "\n",
-    "However, it requires forming good PyTorch habits:\n",
-    "\n",
-    "1. First, remove the .cuda() or .to() calls in your code.\n",
-    "2. Second, when you initialize a new tensor, set the device=self.device in the call since every lightningModule knows what gpu index or TPU core it is on.\n",
-    "\n",
-    "You can also use type_as and or you can register the tensor as a buffer in your module’s __init__ method with register_buffer().\n",
-    "\n",
-    "```\n",
-    "# before lightning\n",
-    "def forward(self, x):\n",
-    "    z = torch.Tensor(2, 3)\n",
-    "    z = z.cuda(0)\n",
-    "\n",
-    "# with lightning\n",
-    "def forward(self, x):\n",
-    "    z = torch.Tensor(2, 3)\n",
-    "    z = z.type_as(x, device=self.device)\n",
-    "```\n",
-    "\n",
-    "\n",
-    "```\n",
-    "class LitModel(LightningModule):\n",
-    "\n",
-    "    def __init__(self):\n",
-    "        ...\n",
-    "        self.register_buffer(\"sigma\", torch.eye(3))\n",
-    "        # you can now access self.sigma anywhere in your module\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "hw6jJhhjvlSL"
-   },
-   "source": [
-    "Lightning Trainer automates all the engineering boilerplate like iterating over epochs and batches, training eval and test loops, CUDA and to(device) calls, calling model.train and model.eval.\n",
-    "\n",
-    "You still have full control over the loops, by using the following trainer flags:"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "pT5-ETH9eUg6"
-   },
-   "source": [
-    "## Calling validation steps\n",
-    "Sometimes, training an epoch may be pretty fast, like minutes per epoch. In this case, you might not need to validate on every epoch. Instead, you can actually validate after a few epochs.\n",
-    "\n",
-    "Use `check_val_every_n_epoch` flag to control the frequency of validation step:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Z-EMVvKheu3D"
-   },
-   "outputs": [],
-   "source": [
-    "# run val loop every 10 training epochs\n",
-    "trainer = pl.Trainer(check_val_every_n_epoch=10)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "UOzZr9S2UcSO"
-   },
-   "source": [
-    "## val_check_interval\n",
-    "\n",
-    "In some cases where your epoch is very long, you might want to check validation within an epoch.\n",
-    "\n",
-    "You can also run validation step within your training epochs, by setting `val_check_interval` flag.\n",
-    "\n",
-    "Set `val_check_interval` to a float between [0.0 to 1.0] to check your validation set within a training epoch. For example, setting it to 0.25 will check your validation set 4 times during a training epoch.\n",
-    "\n",
-    "Default is set to 1.0"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "9kbUbvrUVLrT"
-   },
-   "outputs": [],
-   "source": [
-    "# check validation set 4 times during a training epoch\n",
-    "trainer = pl.Trainer(val_check_interval=0.25)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Onm1gBsKVaw4"
-   },
-   "source": [
-    "When you have iterable data sets, or when streaming data for production use cases, it is useful to check the validation set every number of steps. \n",
-    "Set val_check_interval to an int:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "psn6DVb5Vi85"
-   },
-   "outputs": [],
-   "source": [
-    "# check validation set every 1000 training batches\n",
-    "# use this when using iterableDataset and your dataset has no length\n",
-    "# (ie: production cases with streaming data)\n",
-    "trainer = pl.Trainer(val_check_interval=1000)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "QkoYonrWkb7-"
-   },
-   "source": [
-    "## num_sanity_val_steps \n",
-    "\n",
-    "You may have run into an issue, where you have a bug in your validation loop, but won't catch it until your training loop ends.\n",
-    "\n",
-    "and if your training loop takes hours or days, you will waste valuable compute.\n",
-    "\n",
-    "Instead, lightning automatically runs through 2 steps of validation in the beginning to catch these kinds of bugs up front.\n",
-    "\n",
-    "\n",
-    "The `num_sanity_val_steps` flag can help you run n batches of validation before starting the training routine.\n",
-    "\n",
-    "You can set it to 0 to turn it off"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "zOcT-ugSkiKW"
-   },
-   "outputs": [],
-   "source": [
-    "# turn it off\n",
-    "trainer = pl.Trainer(num_sanity_val_steps=0)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "zS0ob1ZmTw56"
-   },
-   "source": [
-    "Set it to -1 to check all validation data before training"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "rzqvjA4UT263"
-   },
-   "outputs": [],
-   "source": [
-    "# check all validation data\n",
-    "trainer = pl.Trainer(num_sanity_val_steps=-1)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "uMB41wq4T3Z2"
-   },
-   "source": [
-    "Or use any arbitrary number of validation steps"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "lGP78aQzT7VS"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(num_sanity_val_steps=10)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "H-xaYRtd1rb-"
-   },
-   "source": [
-    "## Limit train, validation, and test batches\n",
-    "\n",
-    "You can set limits on how much of training, validation and test dataset you want your model to check. This is useful if you have really large validation or tests sets, for debugging or testing something that happens at the end of an epoch.\n",
-    "\n",
-    "Set the flag to int to specify the number of batches to run\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "XiK5cFKL1rcA"
-   },
-   "outputs": [],
-   "source": [
-    "# run for only 10 batches\n",
-    "trainer = pl.Trainer(limit_test_batches=10)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Y4LK0g65RrBm"
-   },
-   "source": [
-    "For example, some metrics need to be computed on the entire validation results, such as AUC ROC. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "8MmeRs2DR3dD"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(limit_val_batches=10)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "xmigcNa1A2Vy"
-   },
-   "source": [
-    "You can use a float to limit the batches be percentage of the set on every epoch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "W7uGJt8nA4tv"
-   },
-   "outputs": [],
-   "source": [
-    "# run through only 25% of the test set each epoch\n",
-    "trainer = pl.Trainer(limit_test_batches=0.25)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "YRI8THtUN7_e"
-   },
-   "source": [
-    "# Training on GPUs\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "R8FFkX_FwlfE"
-   },
-   "source": [
-    "To run on 1 GPU set the flag to 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Nnzkf3KaOE27"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(gpus=1)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "cxBg47s5PB1P"
-   },
-   "source": [
-    "to run on 2 or 4 GPUs, set the flag to 2 or 4."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "cSEM4ihLrohT"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(gpus=2)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ZE6ZgwtNudro"
-   },
-   "source": [
-    "You can also select which GPU devices to run on, using a list of indices like [1, 4] \n",
-    "\n",
-    "or a string containing a comma separated list of GPU ids like '1,2'\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "gQkJtq0urrjq"
-   },
-   "outputs": [],
-   "source": [
-    "# list: train on GPUs 1, 4 (by bus ordering)\n",
-    "# trainer = Trainer(gpus='1, 4') # equivalent\n",
-    "trainer = pl.Trainer(gpus=[1, 4])\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "XghDPad4us74"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(gpus=list(range(4)))\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "6FVkKHpSPMTW"
-   },
-   "source": [
-    "You can use all the GPUs you have available by setting `gpus=-1`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "r6cKQijYrtPe"
-   },
-   "outputs": [],
-   "source": [
-    "# trainer = Trainer(gpus='-1') - equivalent\n",
-    "trainer = pl.Trainer(gpus=-1)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "2C-fNLm3UGCV"
-   },
-   "source": [
-    "Lightning uses the PCI bus_id as the index for ordering GPUs."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "_V75s7EhOFhE"
-   },
-   "source": [
-    "### `auto_select_gpus`\n",
-    "\n",
-    "You can save on GPUs by running in “exclusive mode”, meaning only one process at a time can access them. If your not sure which GPUs you should use when running exclusive mode, Lightning can automatically find unoccupied GPUs for you. \n",
-    "\n",
-    "Simply specify the number of gpus as an integer `gpus=k`, and set the trainer flag `auto_select_gpus=True`. Lightning will automatically help you find k gpus that are not occupied by other processes."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "_Sd3XFsAOIwd"
-   },
-   "outputs": [],
-   "source": [
-    "# enable auto selection (will find two available gpus on system)\n",
-    "trainer = pl.Trainer(gpus=2, auto_select_gpus=True)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "a5JGSBMQhJNp"
-   },
-   "source": [
-    "## analyzing GPU usage\n",
-    "\n",
-    "### log_gpu_memory\n",
-    "\n",
-    "This is useful to analyze the memory usage of your GPUs.\n",
-    "\n",
-    "To get the GPU memory usage for every GPU on the master node, set the flag to log_gpu_memory=all.\n",
-    "\n",
-    "Under the hood, lightning uses the nvidia-smi command which may slow your training down.\n",
-    "\n",
-    "Your logs can become overwhelmed if you log the usage from many GPUs at once. In this case, you can also set the flag to min_max which will log only the min and max usage across all the GPUs of the master node.\n",
-    "\n",
-    "Note that lightning is not logging the usage across all nodes for performance reasons."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "idus3ZGahOki"
-   },
-   "outputs": [],
-   "source": [
-    "# log all the GPUs (on master node only)\n",
-    "trainer = Trainer(log_gpu_memory='all')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "-mevgiy_hkip"
-   },
-   "source": [
-    "To avoid the performance decrease you can also set `log_gpu_memory=min_max` to only log the min and max memory on the master node.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "SlvLJnWyhs7J"
-   },
-   "outputs": [],
-   "source": [
-    "# log only the min and max memory on the master node\n",
-    "trainer = Trainer(log_gpu_memory='min_max')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "K82FLLIJVQG3"
-   },
-   "source": [
-    "\n",
-    "But what if you want to train on multiple machines and not just one?"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "YViQ6PXesAue"
-   },
-   "source": [
-    "# Training on multiple GPUs"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "WacbBQUivxQq"
-   },
-   "source": [
-    "Lightning makes your models hardware agnostic, and you can run on GPUs with a flip of a flag. Lightning also supports training on multiple GPUs across many machines.\n",
-    "\n",
-    "You can do this by setting the num_nodes flag.\n",
-    "\n",
-    "The world size, or the total number of GPUs you are using, will be gpus*num_nodes.\n",
-    "\n",
-    "If i set gpus=8 and num_nodes=32 then I will be training on 256 GPUs."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "5iKckmDvr8zZ"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(gpus=8, num_nodes=32)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "GgcSbDjjlSTh"
-   },
-   "source": [
-    "## Accelerators\n",
-    "\n",
-    "Under the hood, Lightning uses distributed data parallel (or DDP) by default to distribute training across GPUs.\n",
-    "\n",
-    "This Lightning implementation of DDP calls your script under the hood multiple times with the correct environment variables.\n",
-    "\n",
-    "Under the hood it's as if you had called your script like this:\n",
-    "\n",
-    "1. Each GPU across each node gets its own process.\n",
-    "2. Each GPU gets visibility into a subset of the overall dataset. It will only ever see that subset.\n",
-    "3. Each process inits the model. (Make sure to set the random seed so that each model initializes with the same weights.)\n",
-    "4. Each process performs a full forward and backward pass in parallel.\n",
-    "5. The gradients are synced and averaged across all processes.\n",
-    "6. Each process updates its optimizer.\n",
-    "If you request multiple GPUs or nodes without setting a mode, DDP will be automatically used.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "n_Brr7F5wdtj"
-   },
-   "outputs": [],
-   "source": [
-    "# ddp = DistributedDataParallel\n",
-    "# trainer = pl.Trainer(gpus=2, num_nodes=2) equivalent\n",
-    "trainer = pl.Trainer(gpus=2, num_nodes=2, accelerator='ddp')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "edxHyttC5J3e"
-   },
-   "source": [
-    "DDP is the fastest and recommended way to distribute your training, but you can pass in other backends to `accelerator` trainer flag, when DDP is not supported.\n",
-    "\n",
-    "DDP isn't available in\n",
-    "* Jupyter Notebook, Google COLAB, Kaggle, etc.\n",
-    "* If You have a nested script without a root package\n",
-    "* or if Your script needs to invoke .fit or .test multiple times"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ZDh96mavxHxf"
-   },
-   "source": [
-    "### DDP_SPAWN\n",
-    "\n",
-    "In these cases, you can use `ddp_spawn` instead. `ddp_spawn` is exactly like DDP except that it uses `.spawn()` to start the training processes."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "JM5TKtgLxo37"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(gpus=2, num_nodes=2, accelerator='ddp_spawn')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "sebhVE3qrhKK"
-   },
-   "source": [
-    "We STRONGLY discourage this use because it has limitations (due to Python and PyTorch):\n",
-    "\n",
-    "* Since .spawn() trains the model in subprocesses, the model on the main process does not get updated.\n",
-    "\n",
-    "* Dataloader(num_workers=N), where N is large, bottlenecks training with DDP… ie: it will be VERY slow or won’t work at all. This is a PyTorch limitation.\n",
-    "\n",
-    "* Forces everything to be picklable.\n",
-    "\n",
-    "DDP is MUCH faster than DDP_spawn. To be able to use DDP we recommend you: \n",
-    "\n",
-    "1. Install a top-level module for your project using setup.py\n",
-    "\n",
-    "```\n",
-    "# setup.py\n",
-    "#!/usr/bin/env python\n",
-    "\n",
-    "from setuptools import setup, find_packages\n",
-    "\n",
-    "setup(name='src',\n",
-    "      version='0.0.1',\n",
-    "      description='Describe Your Cool Project',\n",
-    "      author='',\n",
-    "      author_email='',\n",
-    "      url='https://github.com/YourSeed',  # REPLACE WITH YOUR OWN GITHUB PROJECT LINK\n",
-    "      install_requires=[\n",
-    "            'pytorch-lightning'\n",
-    "      ],\n",
-    "      packages=find_packages()\n",
-    "      )\n",
-    "\n",
-    "```\n",
-    "\n",
-    "2. Setup your project like so:\n",
-    "\n",
-    "```\n",
-    "/project\n",
-    "    /src\n",
-    "        some_file.py\n",
-    "        /or_a_folder\n",
-    "    setup.py\n",
-    "```\n",
-    "3. Install as a root-level package\n",
-    "```\n",
-    "cd /project\n",
-    "pip install -e .\n",
-    "```\n",
-    "4. You can then call your scripts anywhere\n",
-    "```\n",
-    "cd /project/src\n",
-    "\n",
-    "python some_file.py --accelerator 'ddp' --gpus 8\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "cmB3I_oyw7a8"
-   },
-   "source": [
-    "### DP\n",
-    "\n",
-    "If you're using windows, DDP is not supported. You can use `dp` for DataParallel instead: DataParallel uses multithreading, instead of multiprocessing. It splits a batch across k GPUs. That is, if you have a batch of 32 and use DP with 2 gpus, each GPU will process 16 samples, after which the root node will aggregate the results.\n",
-    "\n",
-    "DP use is discouraged by PyTorch and Lightning. Use DDP which is more stable and at least 3x faster.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "OO-J0ISvlVCg"
-   },
-   "outputs": [],
-   "source": [
-    "# dp = DataParallel\n",
-    "trainer = pl.Trainer(gpus=2, accelerator='dp')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Y7E2eHZKwUn9"
-   },
-   "source": [
-    "### DDP2\n",
-    "\n",
-    "In certain cases, it’s advantageous to use ***all*** batches on the same machine, instead of a subset. For instance, in self-supervised learning, a common performance boost comes from increasing the number of negative samples.\n",
-    "\n",
-    "In this case, we can use DDP2 which behaves like DP in a machine and DDP across nodes. DDP2 does the following:\n",
-    "\n",
-    "* Copies a subset of the data to each node.\n",
-    "* Inits a model on each node.\n",
-    "* Runs a forward and backward pass using DP.\n",
-    "* Syncs gradients across nodes.\n",
-    "* Applies the optimizer updates.\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Y4xweqL3xHER"
-   },
-   "outputs": [],
-   "source": [
-    "# ddp2 = DistributedDataParallel + dp\n",
-    "trainer = pl.Trainer(gpus=2, num_nodes=2, accelerator='ddp2')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "lhKNCnveeeq5"
-   },
-   "source": [
-    "- The second mode is ddp_spawn. This works like ddp, but instead of calling your script multiple times, lightning will use multiprocessing spawn to start a subprocess per GPU. \n",
-    "\n",
-    "However, you should be careful of mixing this mode with num_workers > 0 in your dataloaders because it will bottleneck your training. This is a current known limitation of PyTorch which is why we recommend using our ddp implementation instead.\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "HUf9ANyQkFFO"
-   },
-   "source": [
-    "\n",
-    "### mocking ddp\n",
-    "\n",
-    "Testing or debugging DDP can be hard, so we have a distributed backend that simulates ddp on cpus to make it easier. Set `num_processes` to a number greater than 1 when using accelerator=\"ddp_cpu\" to mimic distributed training on a machine without GPUs. Note that while this is useful for debugging, it will not provide any speedup, since single-process Torch already makes efficient use of multiple CPUs."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "ZSal5Da9kHOf"
-   },
-   "outputs": [],
-   "source": [
-    "# Simulate DDP for debugging on your GPU-less laptop\n",
-    "trainer = Trainer(accelerator=\"ddp_cpu\", num_processes=2)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Br_btCy5lgES"
-   },
-   "source": [
-    "# Training on TPUS\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "DXkBNITdv44d"
-   },
-   "source": [
-    "Another option for accelerating your training is using TPUs.\n",
-    "A TPU is a Tensor processing unit, designed specifically for deep learning. Each TPU has 8 cores where each core is optimized for 128x128 matrix multiplies. Google estimates that 8 TPU cores are about as fast as 4 V100 GPUs!\n",
-    "\n",
-    "A TPU pod hosts many TPUs on it. Currently, TPU pod v2 has 2048 cores! You can request a full pod from Google cloud or a “slice” which gives you some subset of those 2048 cores.\n",
-    "\n",
-    "At this moment, TPUs are available on Google Cloud (GCP), Google Colab and Kaggle Environments.\n",
-    "\n",
-    "Lightning supports training on TPUs without any code adjustments to your model. Just like when using GPUs, Lightning automatically inserts the correct samplers - no need to do this yourself!\n",
-    "\n",
-    "Under the hood, lightning uses the XLA framework developed jointly by the facebook and google XLA teams. And we want to recognize their efforts in advancing TPU adoption of PyTorch.\n",
-    "\n",
-    "## tpu_cores\n",
-    "To train on TPUs, set the tpu_cores flag.\n",
-    "\n",
-    "When using colab or kaggle, the allowed values are 1 or 8 cores. When using google cloud, any value above 8 is allowed.\n",
-    "\n",
-    "Your effective batch size is the batch size passed into a dataloader times the total number of tpu cores."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "itP9y70gmD9M"
-   },
-   "outputs": [],
-   "source": [
-    "# int: train on a single core\n",
-    "trainer = pl.Trainer(tpu_cores=1)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "NJKnzPb3mKEg"
-   },
-   "outputs": [],
-   "source": [
-    "# int: train on all cores few cores\n",
-    "trainer = pl.Trainer(tpu_cores=8)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "8a4exfWUmOHq"
-   },
-   "source": [
-    "You can also choose which TPU core to train on, by passing a list [1-8]. This is not an officially supported use case but we are working with the XLA team to improve this user experience.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "S6OrjE_bmT-_"
-   },
-   "outputs": [],
-   "source": [
-    "# list: train on a single selected core\n",
-    "trainer = pl.Trainer(tpu_cores=[2])\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Afqx3sFUmfWD"
-   },
-   "source": [
-    "To train on more than 8 cores (ie: a POD), submit this script using the xla_dist script.\n",
-    "\n",
-    "\n",
-    "\n",
-    "```\n",
-    "python -m torch_xla.distributed.xla_dist\n",
-    "--tpu=$TPU_POD_NAME\n",
-    "--conda-env=torch-xla-nightly\n",
-    "--env=XLA_USE_BF16=1\n",
-    "-- python your_trainer_file.py\n",
-    "```\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ncPvbUVQqKOh"
-   },
-   "source": [
-    "# Advanced distributed training\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "4MP7bEgnv7qK"
-   },
-   "source": [
-    "\n",
-    "Lightning supports distributed training across multiple GPUs and TPUs out of the box by setting trainer flags, but it also allows you to control the way sampling is done if you need to."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "wdHiTfAMepKH"
-   },
-   "source": [
-    "## replace_sampler_ddp\n",
-    "In PyTorch, you must use torch.nn.DistributedSampler for multi-node or GPU training. The sampler makes sure each GPU sees the appropriate part of your data.\n",
-    "\n",
-    "```\n",
-    "# without lightning\n",
-    "def train_dataloader(self):\n",
-    "    dataset = MNIST(...)\n",
-    "    sampler = None\n",
-    "\n",
-    "    if self.on_tpu:\n",
-    "        sampler = DistributedSampler(dataset)\n",
-    "\n",
-    "    return DataLoader(dataset, sampler=sampler)\n",
-    "```\n",
-    "Lightning adds the correct samplers when needed, so no need to explicitly add samplers. By default it will add `shuffle=True` for train sampler and `shuffle=False` for val/test sampler.\n",
-    "\n",
-    "If you want to customize this behaviour, you can set `replace_sampler_ddp=False` and add your own distributed sampler.\n",
-    "\n",
-    "(note: For iterable datasets, we don’t do this automatically.)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "ZfmcB_e_7HbE"
-   },
-   "outputs": [],
-   "source": [
-    "sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=False)\n",
-    "dataloader = DataLoader(dataset, batch_size=32, sampler=sampler)\n",
-    "\n",
-    "trainer = pl.Trainer(gpus=2, num_nodes=2, replace_sampler_ddp=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "-IOhk1n0lL3_"
-   },
-   "source": [
-    "## prepare_data_per_node\n",
-    "\n",
-    "When doing multi NODE training, if your nodes share the same file system, then you don't want to download data more than once to avoid possible collisions. \n",
-    "\n",
-    "Lightning automatically calls the prepare_data hook on the root GPU of the master node (ie: only a single GPU).\n",
-    "\n",
-    "In some cases where your nodes don't share the same file system, you need to download the data on each node. In this case you can set this flag to true and lightning will download the data on the root GPU of each node.\n",
-    "\n",
-    "This flag is defaulted to True."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "WFBMUR48lM04"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(gpus=2, num_nodes=2, prepare_data_per_node=False)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "FKBwXqo4q-Vp"
-   },
-   "source": [
-    "## sync_batchnorm\n",
-    "\n",
-    "Batch norm is computed per GPU/TPU. This flag enables synchronization between batchnorm layers across all GPUs.\n",
-    "It is recommended if you have small batch sizes.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "GhaCLTEZrAQi"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = Trainer(gpus=4, sync_batchnorm=True)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "XuFA7VTFMY9-"
-   },
-   "source": [
-    "# Debugging flags\n",
-    "\n",
-    "Lightning offers a couple of flags to make debugging your models easier:\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "AKoS3fdml4Jx"
-   },
-   "source": [
-    "## Fast Dev Run\n",
-    "\n",
-    "To help you save time debugging, your first run should use the fast_dev_run flag.\n",
-    "\n",
-    "This won't generate logs or save checkpoints but will touch every line of your code to make sure that it is working as intended.\n",
-    "\n",
-    "Think about this flag like a compiler. You make changes to your code, and run Trainer with this flag to verify that your changes are bug free.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "L5vuG7GSmhzK"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(fast_dev_run=True)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "HRP1qQR5nT4p"
-   },
-   "source": [
-    "## overfit_batches\n",
-    "\n",
-    "Uses this much data of the training set. If nonzero, will use the same training set for validation and testing. If the training dataloaders have shuffle=True, Lightning will automatically disable it.\n",
-    "\n",
-    "Useful for quickly debugging or trying to overfit on purpose."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "NTM-dqGMnXms"
-   },
-   "outputs": [],
-   "source": [
-    "# use only 1% of the train set (and use the train set for val and test)\n",
-    "trainer = pl.Trainer(overfit_batches=0.01)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "c0LV0gC3nl1X"
-   },
-   "outputs": [],
-   "source": [
-    "# overfit on 10 of the same batches\n",
-    "trainer = pl.Trainer(overfit_batches=10)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "lt3UHU6WgtS_"
-   },
-   "source": [
-    "Or a float to represent percentage of data to run"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "K3yUqADhgnkf"
-   },
-   "outputs": [],
-   "source": [
-    "# run through only 25% of the test set each epoch\n",
-    "trainer = pl.Trainer(limit_test_batches=0.25)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ODN66NeVg_2o"
-   },
-   "source": [
-    "In the case of multiple test dataloaders, the limit applies to each dataloader individually.\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "8aQx5SLeMz1R"
-   },
-   "source": [
-    "# accumulate_grad_batches\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "g8GczZXFwKC7"
-   },
-   "source": [
-    "The batch size controls the accuracy of the estimate of the gradients. Small batch size use less memory, but decrease accuracy. When training large models, such as NLP transformers, it is useful to accumulate gradients before calling backwards(). It allows for bigger batch sizes than what can actually fit on a GPU/TPU in a single step.\n",
-    "\n",
-    "Use accumulate_grad_batches to accumulate gradients every k batches or as set up in the dict. Trainer also calls optimizer.step() for the last indivisible step number.\n",
-    "\n",
-    "For example, set accumulate_grad_batches to 4 to accumulate every 4 batches. In this case the effective batch size is batch_size*4, so if your batch size is 32, effectively it will be 128."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "2jB6-Z_yPhhf"
-   },
-   "outputs": [],
-   "source": [
-    "# accumulate every 4 batches (effective batch size is batch*4)\n",
-    "trainer = pl.Trainer(accumulate_grad_batches=4)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "_Yi-bdTOgINC"
-   },
-   "source": [
-    "You can also pass a dictionary to specify different accumulation per epoch. We can set it to `{5: 3, 10: 20}` to have no accumulation for epochs 1 to 4, accumulate 3 batches for epoch 5 to 10, and 20 batches after that."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "X3xsoZ3YPgBv"
-   },
-   "outputs": [],
-   "source": [
-    "# no accumulation for epochs 1-4. accumulate 3 for epochs 5-10. accumulate 20 after that\n",
-    "trainer = pl.Trainer(accumulate_grad_batches={5: 3, 10: 20})\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "myzH8mV4M1_9"
-   },
-   "source": [
-    "# 16 bit precision\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "v9EaFAonwOk6"
-   },
-   "source": [
-    "Most deep learning frameworks like PyTorch, train with 32-bit floating point arithmetic. \n",
-    "\n",
-    "But many models can still achieve full accuracy using half the precision.\n",
-    "\n",
-    "In 2017, NVIDIA researchers successfully used a combination of 32 and 16 bit precision (also known as mixed precision) and achieved the same accuracy as 32 bit precision training.\n",
-    "\n",
-    "The main two advantages are:\n",
-    "\n",
-    "- a reduction in memory requirements which enables larger batch sizes and models.\n",
-    "- and a speed up in compute. On ampere, turing and volta architectures 16 bit precision models can train at least 3 times faster.\n",
-    "\n",
-    "As of PyTorch 1.6, NVIDIA and Facebook moved mixed precision functionality into PyTorch core as the AMP package, torch.cuda.amp. \n",
-    "\n",
-    "This package supersedes the apex package developed by NVIDIA."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "TjNypZPHnxvJ"
-   },
-   "source": [
-    "## precision\n",
-    "\n",
-    "Use precision flag to switch between full precision (32) to half precision (16). Can be used on CPU, GPU or TPUs.\n",
-    "\n",
-    "When using PyTorch 1.6+ Lightning uses the native amp implementation to support 16-bit.\n",
-    "\n",
-    "If used on TPU will use torch.bfloat16 but tensor printing will still show torch.float32"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "kBZKMVx1nw-D"
-   },
-   "outputs": [],
-   "source": [
-    "# 16-bit precision\n",
-    "trainer = pl.Trainer(gpus=1, precision=16)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "VJGj3Jh7oQXU"
-   },
-   "source": [
-    "In earlier version of Lightning, we use NVIDIA Apex for 16-bit precision. Apex was the first library to attempt 16-bit and the automatic mixed precision library (amp), has since been merged into core PyTorch as of 1.6.\n",
-    "\n",
-    "If you insist in using Apex, you can set the amp_backend flag to 'apex' and install Apex on your own."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "BDV1trAUPc9h"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(gpus=1, precision=16, amp_backend='apex')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "HK5c_aVfNV4e"
-   },
-   "source": [
-    "## amp_level\n",
-    "Apex includes 4 optimization levels:\n",
-    "O0 (FP32 training)\n",
-    "O1 (Conservative Mixed Precision): only some whitelist ops are done in FP16.\n",
-    "O2 (Fast Mixed Precision): this is the standard mixed precision training. It maintains FP32 master weights and optimizer.step acts directly on the FP32 master weights.\n",
-    "O3 (FP16 training): full FP16. Passing keep_batchnorm_fp32=True can speed things up as cudnn batchnorm is faster anyway.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "FshMFPowNbWt"
-   },
-   "outputs": [],
-   "source": [
-    "# default used by the Trainer\n",
-    "trainer = pl.Trainer(gpus=1, precision=16, amp_backend='apex', amp_level='O2')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "y8KEr1YvNgkC"
-   },
-   "source": [
-    "# `auto_scale_batch_size`\n",
-    "\n",
-    " \n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "7F1pKFIuwSFl"
-   },
-   "source": [
-    "Lightning can help you improve your model by using auto_scale_batch_size flag, which tries to find the largest batch size that fits into memory, before you start your training.\n",
-    "Larger batch size often yields better estimates of gradients, but may also result in longer training time. \n",
-    "\n",
-    "Set it to True to initially run a batch size finder trying to find the largest batch size that fits into memory. The result will be stored in self.batch_size in the LightningModule.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "9_jE-iyyheIv"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(auto_scale_batch_size=True)\n",
-    "\n",
-    "trainer.tune(model, train_dataloader=train_loader, val_dataloaders=val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "yaHsJvwFhNJt"
-   },
-   "source": [
-    "You can set the value to `power`. `power` scaling starts from a batch size of 1 and keeps doubling the batch size until an out-of-memory (OOM) error is encountered.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Qx0FbQrphgw1"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(auto_scale_batch_size='power')\n",
-    "\n",
-    "trainer.tune(model, train_dataloader=train_loader, val_dataloaders=val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "8bwgVF9zhZ75"
-   },
-   "source": [
-    "You can also set it to `binsearch`, that continues to finetune the batch size by performing a binary search.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "QObXNs3yNrg9"
-   },
-   "outputs": [],
-   "source": [
-    "# run batch size scaling, result overrides hparams.batch_size\n",
-    "trainer = pl.Trainer(auto_scale_batch_size='binsearch')\n",
-    "\n",
-    "trainer.tune(model, train_dataloader=train_loader, val_dataloaders=val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "5OWdhSsZjqW7"
-   },
-   "source": [
-    "This feature expects that a batch_size field in the hparams of your model, i.e., model.hparams.batch_size should exist and will be overridden by the results of this algorithm. \n",
-    "\n",
-    "Additionally, your train_dataloader() method should depend on this field for this feature to work.\n",
-    "\n",
-    "The algorithm in short works by:\n",
-    "1. Dumping the current state of the model and trainer\n",
-    "\n",
-    "2. Iteratively until convergence or maximum number of tries max_trials (default 25) has been reached:\n",
-    "* Call fit() method of trainer. This evaluates steps_per_trial (default 3) number of training steps. Each training step can trigger an OOM error if the tensors (training batch, weights, gradients etc.) allocated during the steps have a too large memory footprint.\n",
-    "  * If an OOM error is encountered, decrease the batch size\n",
-    "  * Else increase it.\n",
-    "* How much the batch size is increased/decreased is determined by the chosen strategy.\n",
-    "\n",
-    "3. The found batch size is saved to model.hparams.batch_size\n",
-    "\n",
-    "4. Restore the initial state of model and trainer\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "q4CvxfZmOWBd"
-   },
-   "source": [
-    "# `auto_lr_find`\n",
-    "\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "j85e8usNwdBV"
-   },
-   "source": [
-    "Selecting a good learning rate for your deep learning training is essential for both better performance and faster convergence.\n",
-    "\n",
-    "Even optimizers such as Adam that are self-adjusting the learning rate can benefit from more optimal choices.\n",
-    "\n",
-    "To reduce the amount of guesswork concerning choosing a good initial learning rate, you can use Lightning auto learning rate finder.\n",
-    "\n",
-    "The learning rate finder does a small run where the learning rate is increased after each processed batch and the corresponding loss is logged. The result of this is a lr vs. loss plot that can be used as guidance for choosing an optimal initial lr.\n",
-    "\n",
-    "\n",
-    "warning: For the moment, this feature only works with models having a single optimizer. LR support for DDP is not implemented yet, it is coming soon.\n",
-    "\n",
-    "\n",
-    "***auto_lr_find=***\n",
-    "\n",
-    "In the most basic use case, this feature can be enabled during trainer construction with Trainer(auto_lr_find=True).\n",
-    "When .fit(model) is called, the LR finder will automatically run before any training is done. The lr that is found and used will be written to the console and logged together with all other hyperparameters of the model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "iuhve9RBOfFh"
-   },
-   "outputs": [],
-   "source": [
-    "# default used by the Trainer (no learning rate finder)\n",
-    "trainer = pl.Trainer(mnist_model, auto_lr_find=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "BL-gjXNCPDXk"
-   },
-   "source": [
-    "This flag sets your learning rate which can be accessed via self.lr or self.learning_rate.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "wEb-vIMmPJQf"
-   },
-   "outputs": [],
-   "source": [
-    "class LitModel(LightningModule):\n",
-    "\n",
-    "    def __init__(self, learning_rate):\n",
-    "        self.learning_rate = learning_rate\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        return Adam(self.parameters(), lr=(self.lr or self.learning_rate))\n",
-    "\n",
-    "# finds learning rate automatically\n",
-    "# sets hparams.lr or hparams.learning_rate to that learning rate\n",
-    "trainer = pl.Trainer(mnist_model, auto_lr_find=True)\n",
-    "\n",
-    "trainer.tune(model, train_dataloader=train_loader, val_dataloaders=val_loader)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "RweqvpnVPPSh"
-   },
-   "source": [
-    "To use an arbitrary value set it as auto_lr_find\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "4LKI39IfPLJv"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(mnist_model, auto_lr_find='my_value')\n",
-    "\n",
-    "trainer.tune(model, train_dataloader=train_loader, val_dataloaders=val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "9VAhPRKbPX-m"
-   },
-   "source": [
-    "Under the hood, when you call tune it runs the learning rate finder.\n",
-    "\n",
-    "If you want to inspect the results of the learning rate finder before doing any actual training or just play around with the parameters of the algorithm, this can be done by invoking the lr_find method of the trainer. A typical example of this would look like\n",
-    "\n",
-    "\n",
-    "```\n",
-    "trainer = pl.Trainer(auto_lr_find=True)\n",
-    "\n",
-    "# Run learning rate finder\n",
-    "lr_finder = trainer.lr_find(model)\n",
-    "\n",
-    "# Results can be found in\n",
-    "lr_finder.results\n",
-    "\n",
-    "# Plot with\n",
-    "fig = lr_finder.plot(suggest=True)\n",
-    "fig.show()\n",
-    "\n",
-    "# Pick point based on plot, or get suggestion\n",
-    "new_lr = lr_finder.suggestion()\n",
-    "\n",
-    "# update hparams of the model\n",
-    "model.hparams.lr = new_lr\n",
-    "\n",
-    "# Fit model\n",
-    "trainer.fit(model)\n",
-    "```\n",
-    "\n",
-    "The figure produced by lr_finder.plot() should look something like the figure below. It is recommended to not pick the learning rate that achieves the lowest loss, but instead something in the middle of the sharpest downward slope (red point). This is the point returned py lr_finder.suggestion().\n",
-    "\n",
-    "![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAoAAAAHgCAYAAAA10dzkAAAgAElEQVR4Ae3dB3hUZb7H8bheey94r+URdMWOuq66a1mVtazX3nVX17p617bqursGUCMo9gIWUFBRBBGwoEAgtEDoLbQgIQklCS0kkJAQkpDyv8//xRlnkkkyM++UM3O+53nykDlz3jPnfM5/8v44NUUYEEAAAQQQQAABBFwlkOKqtWVlEUAAAQQQQAABBIQASBEggAACCCCAAAIuEyAAumyDs7oIIIAAAggggAABkBpAAAEEEEAAAQRcJkAAdNkGZ3URQAABBBBAAAECIDWAAAIIIIAAAgi4TIAA6LINzuoigAACCCCAAAIEQGoAAQQQQAABBBBwmQAB0GUbnNVFAAEEEEAAAQQIgNQAAggggAACCCDgMgECoMs2OKuLAAIIIIAAAggQAKkBBBBAAAEEEEDAZQIEQJdtcFYXAQQQQAABBBAgAFIDCCCAAAIIIICAywQIgC7b4KwuAggggAACCCBAAKQGEEAAAQQQQAABlwkQAF22wVldBBBAAAEEEECAAEgNIIAAAggggAACLhMgALpsg7O6CCCAAAIIIIAAAZAaQAABBBBAAAEEXCZAAHTZBmd1EUAAAQQQQAABAiA1gAACCCCAAAIIuEyAAOiyDc7qIoAAAggggAACBEBqAAEEEEAAAQQQcJkAAdBlG5zVRQABBBBAAAEECIDUAAIIIIAAAggg4DIBAqDLNjiriwACCCCAAAIIEACpAQQQQAABBBBAwGUCBECXbXBWFwEEEEAAAQQQIABSAwgggAACCCCAgMsECIAu2+CsLgIIIIAAAgggQACkBhBAAAEEEEAAAZcJEABdtsFZXQQQQAABBBBAgABIDSCAAAIIIIAAAi4TIAC6bIOzuggggAACCCCAAAGQGkAAAQQQQAABBFwmQAB02QZndRFAAAEEEEAAAQIgNYAAAggggAACCLhMgADosg3O6iKAAAIIIIAAAgRAagABBBBAAAEEEHCZAAHQZRuc1UUAAQQQQAABBAiA1AACCCCAAAIIIOAyAQKgyzY4q4sAAggggAACCBAAqQEEEEAAAQQQQMBlAgRAl21wVhcBBBBAAAEEECAAUgMIIIAAAggggIDLBAiALtvgrC4CCCCAAAIIIEAApAYQQAABBBBAAAGXCRAAXbbBWV0EEEAAAQQQQIAASA0ggAACCCCAAAIuEyAAumyDs7oIIIAAAggggAABkBpAAAEEEEAAAQRcJkAAdNkGZ3URQAABBBBAAAECIDWAAAIIIIAAAgi4TIAA6LINzuoigAACCCCAAAIEQGoAAQQQQAABBBBwmQAB0GKDNzY2SnFxsVRUVMi2bdv4wYAaoAaoAWqAGkiAGtB+W/tv7cfdOhAALba8Fk9KSgo/GFAD1AA1QA1QAwlYA9qPu3UgAFpsef0fhAZALSD2ALIHlBqgBqgBaoAaSIwa8OzA0X7crQMB0GLL6xddA6D+y4AAAggggAACiSFA/y1CALSoVQrIAo+mCCCAAAIIxEmA/psAaFV6FJAVH40RQAABBBCIiwD9NwHQqvAoICs+GiOAAAIIIBAXAfpvAqBV4VFAVnw0RgABBBBAIC4C9N8EQKvCo4Cs+GiMAAIIIIBAXATovwmAVoVHAVnx0RgBBBBAAIG4CNB/EwCtCo8CsuKjMQIIIIAAAnERoP8mAFoVHgVkxUdjBBBAAAEE4iJA/00AtCo8CsiKj8YIIIAAAgjERYD+mwBoVXgUkBUfjRFAAAEEEIiLAP03AdCq8CggKz4aI4AAAgggEBcB+m8CoFXhUUBWfDRGAAEEEEAgLgL03wRAq8KjgKz4aIwAAggggECrAiPmF8k/hmXLuGUbWp0m3DfovwmA4daOaUcBWfHRGAEEEEAAgVYFUr9dIh2fHSN9J+W1Ok24b9B/EwDDrR3TjgKy4qMxAggggAACrQo8PHi+CYCDZ69tdZpw36D/JgCGWzumHQVkxUdjBBBAAAEEWhW4tf9MEwDHLuUQcKtIFm+kWLR1fVMCoOtLAAAEEEAAgSgJ/PGtTBMAZxWURfwT6L/ZA2hVVBSQFR+NEUAAAQQQaFXgrJ4ZJgCu3FTZ6jThvkH/TQAMt3ZMOwrIio/GCCCAAAIIBBRoaGySTqljTAAsraoNOI3NSPpvAqBN/QgFZMVHYwQQQAABBAIKlFXVmvCnVwHXNzQGnMZmJP03AdCmfgiAVno0RgABBBBAILBA3qZKEwDP7JkReALLsQRAAqBVCVFAVnw0RgABBBBAIKDAnFVlJgB2fTMz4Pu2I+m/CYBWNUQBWfHRGAEEEEAAgYAC6Us3mAB4S7+ZAd+3HUn/TQC0qiEKyIqPxggggAACCAQUGDJnrQmAf/tifsD3bUfSfxMArWqIArLiozECCCCAAAIBBd6blGcC4LPfLAn4vu1I+m8CoFUNUUBWfDRGAAEEEEAgoMCLP+aYAPjauBUB37cdSf9NALSqIQrIio/GCCCAAAIIBBT4x7BsEwAHZq0K+L7tSPpvAqBVDVFAVnw0RgABBBBAIKDA3Z/MMQHwmwXFAd+3HUn/TQC0qiEKyIqPxggggAACCAQUuLpvlgmAU3JLAr5vO5L+mwBoVUMUkBUfjRFAAAEEEAgocP4rk0wAXFxUHvB925H03wRAqxqigKz4aIwAAggggEALgaamJjmxR7oJgEVbqlu8H4kR9N8EQKs6ooCs+GiMAAIIIIBAC4HttfUm/OlzgPX3aAz03y4PgOvWrZO77rpLDj30UNl7773l9NNPl/nzg7/pJAUUja8l80QAAQQQcLOA7vXT8Kd7AXVvYDQG+m8XB8CtW7dKx44d5b777pO5c+fK6tWrJSMjQwoKCoKuNQooaComRAABBBBAICgBPe9PA+DvX5kU1PThTET/7eIA+Oyzz8pFF10UTt1421BAXgp+QQABBBBAICICeuWvBkC9EjhaA/23iwPgKaecIk899ZTceuut0qFDBznrrLNkwIABbdZabW2taNF4foqLiyUlJcW8brMhbyKAAAIIIIBAUAJ67z8NgHovwGgNBEAXB8C99tpL9Kdbt26SnZ0tH3/8sTkP8PPPP2+13tLS0kzg09Dn+6OFxIAAAggggAAC9gL69A8NgPo0kGgNBEAXB8A99thDzj//fL/aeuKJJ+T3v/+93zjfF+wB9NXgdwQQQAABBCIvoM//1QCY9kNO5Gf+8xwJgC4OgMcee6w8+OCDfsXVr18/Oeqoo/zGtfWCAmpLh/cQQAABBBAIXeDZb5aYAPjepLzQGwfZgv7bxQHwz3/+c4uLQPScwOZ7BduqJQqoLR3eQwABBBBAIHSBv30x3wTAIXPWht44yBb03y4OgPPmzZP/+q//kt69e0t+fr4MHTpU9t13XxkyZEiQ5SPm4g8uAgmaiwkRQAABBBBoV+CWfjNNAExfuqHdacOdgADo4gCoRTN69Ghz82e9GOTkk09u9yrg5oVGATUX4TUCCCCAAAJ2Al3fzDQBcM6qMrsZtdGa/tvlAbCN2gjqLQooKCYmQgABBBBAIGiBM3tmmACYt6ky6DahTkj/TQAMtWb8pqeA/Dh4gQACCCCAgJVAfUOjCX96FXBZVa3VvNpqTP9NAGyrPtp9jwJql4gJEEAAAQQQCFpgc2WtCYCdUsdIQ2N0ngOsC0P/TQAMuigDTUgBBVJhHAIIIIAAAuEJrNxUaQLgWT0zwptBkK3ovwmAQZZK4MkooMAujEUAAQQQQCAcgVkFZSYAdn0rM5zmQbeh/yYABl0sgSakgAKpMA4BBBBAAIHwBMYu3WAC4K39Z4Y3gyBb0X8TAIMslcCTUUCBXRiLAAIIIIBAOAKDZ681AfChL+aH0zzoNvTfBMCgiyXQhBRQIBXGIYAAAgggEJ5A30l5JgCmfrskvBkE2Yr+mwAYZKkEnowCCuzCWAQQQAABBMIRSPshxwTA18etCKd50G3ovwmAQRdLoAkpoEAqjEMAAQQQQCA8gSe+yjYBcGDWqvBmEGQr+m8CYJClEngyCiiwC2MRQAABBBAIR+CugXNMAPx2YXE4zYNuQ/9NAAy6WAJNSAEFUmEcAggggAAC4Qn8b58sEwAzc0vCm0GQrei/CYBBlkrgySigwC6MRQABBBBAIByB3/WeZALgkuLycJoH3Yb+mwAYdLEEmpACCqTCOAQQQAABBEIXaGpqks490k0ALN5aHfoMQmhB/00ADKFcWk5KAbU0YQwCCCCAAALhCFTV1pvw1/HZMVJdVx/OLIJuQ/9NAAy6WAJNSAEFUmEcAggggAACoQsUllWbAHjSc+mhNw6xBf03ATDEkvGfnALy9+AVAggggAAC4QosKio3AfCCVyeHO4ug29F/EwCDLpZAE1JAgVQYhwACCCCAQOgCk1dsMgHwmveyQm8cYgv6bwJgiCXjPzkF5O/BKwQQQAABBMIVGLmg2ATAv346N9xZBN2O/psAGHSxBJqQAgqkwjgEEEAAAQRCF/h4WoEJgE8Oyw69cYgt6L8JgCGWjP/kFJC/B68QQAABBBAIV+DV9BUmAPb8cXm4swi6Hf03ATDoYgk0IQUUSIVxCCCAAAIIhC7w75GLTQB8f3Je6I1DbEH/TQAMsWT8J6eA/D14hQACCCCAQLgCD34+3wTAoXMKw51F0O3ovwmAQRdLoAkpoEAqjEMAAQQQQCB0gZs+nGEC4LhlG0JvHGIL+m8CYIgl4z85BeTvwSsEEEAAAQTCFbj0zUwTAOeu3hLuLIJuR/9NAAy6WAJNSAEFUmEcAggggAACoQt0SRtvAmB+SWXojUNsQf9NAAyxZPwnp4D8PXiFAAIIIIBAOAI7GxpN+NPnAG/ZXhfOLEJqQ/9NAAypYJpPTAE1F+E1AggggAACoQuUVNaYANgpdYw0NDaFPoMQW9B/EwBDLBn/ySkgfw9eIYAAAgggEI7Aio3bTAD8Ta8J4TQPuQ39NwEw5KLxbUAB+WrwOwIIIIAAAuEJzCwoNQHwj29lhjeDEFvRfxMAQywZ/8kpIH8PXiGAAAIIIBCOwOgl600AvK3/rHCah9yG/psAGHLR+DaggHw1+B0BBBBAAIHwBAbPWmMC4MOD54c3gxBb0X8TAEMsGf/JKSB/D14hgAACCCAQjsC7E1eaAJj67dJwmofchv6bABhy0fg2oIB8NfgdAQQQQACB8AReGLXMBMA3xq8IbwYhtqL/JgCGWDL+k1NA/h68QgABBBBAIByBx4YuNAHwk+mrw2kechv6bwJgyEXj24AC8tXgdwQQQAABBMITuP2jWSYAjlq0LrwZhNiK/psAGGLJ+E9OAfl78AoBBBBAAIFwBC54dbIJgAvWRv85wLp89N8EwHDq1NuGAvJS8AsCCCCAAAJhCdQ3NMrx3caaALhpW01Y8wi1Ef03ATDUmvGbngLy4+AFAggggAACIQsUbak24a9z93RpjMFj4HQB6b8JgCEXqm8DCshXg98RQAABBBAIXWBWQZkJgJe+GZungOgS0n8TAEOvVJ8WFJAPBr8igAACCCAQhsCI+UUmAN79yZwwWofXhP6bABhe5fzcigKy4qMxAggggAAC8s6E2N4EWsnpvwmAVl89CsiKj8YIIIAAAgjIP4cvNnsAP5iSHzMN+m8CoFWxUUBWfDRGAAEEEEBAYn0PQCWn/yYAWn31KCArPhojgAACCCAgv9wDcGvMNOi/CYBWxUYBWfHRGAEEEEDA5QK+9wAsidE9AJWc/psAaPXVo4Cs+GiMAAIIIOByAe89AHvE7h6ASk7/TQC0+upRQFZ8NEYAAQQQcLnAzIJScwFI1xjeA1DJ6b8JgFZfPQrIio/GCCCAAAIuFxgeh3sAKjn9NwHQ6qtHAVnx0RgBBBBAwOUCb8fhHoBKTv9NALT66lFAVnw0RgABBBBwucDTwxfF/B6ASk7/TQC0+upRQFZ8NEYAAQQQcLnAbR/NMgFw1KJ1MZWg/yYAWhUcBWTFR2MEEEAAAZcLeO4BuLAwdvcAVHL6bwKg1VePArLiozECCCCAgIsFdjY0ynGpY8wewJLKmphK0H8TAK0KjgKy4qMxAggggICLBTz3ADyxR7o0NTXFVIL+mwBoVXAUkBUfjRFAAAEEXCzgvQfgW5kxV6D/JgBaFR0FZMVHYwQQQAABFwt47gH410/nxlyB/psAaFV0FJAVH40RQAABBFws4LkHYLfvlsZcgf6bAGhVdBSQFR+NEUAAAQRcLOC5B+CHmfkxV6D/dnkATEtLk5SUFL+fk046KehCpICCpmJCBBBAAAEE/AQ89wD8YfF6v/GxeEH/TQCU0047TTZu3Oj9KS0tDbr2KKCgqZgQAQQQQAABP4F43QNQF4L+mwAoZ555pl9BhvKCAgpFi2kRQAABBBDYJRDPewDqEtB/EwBl3333lSOPPFKOO+44+ctf/iKFhYVBfz8poKCpmBABBBBAAAGvQDzvAagLQf/t8gCYnp4uI0aMkCVLlsj48ePl/PPPl2OPPVYqKyu9Rer7S21trSkaLRz9KS4uNucP6u8MCCCAAAIIIBCcwMz8UvMEkD/G4R6AuoQEQJcHwOZlWl5eLgceeKB88sknzd8yrwNdNKIXkRAAA3IxEgEEEEAAgYACw+cVmQB4TxzuAagLRAAkALYozHPOOUdSU1NbjNcR7AEMyMJIBBBAAAEEQhJ4OyPXBMDucbgHoC4oAZAA6FewVVVVcsghh0jfvn39xrf2ggJqTYbxCCCAAAIItC7w9NeLTADsl1nQ+kRRfIf+2+UB8JlnnpGpU6fKmjVrZObMmXL55ZfL4YcfLps3bw6q7CigoJiYCAEEEEAAAT+B2/rPMgHwxzjcA1AXhP7b5QHwjjvuMFcA77nnnnL00UeLvi4oCP5/IxSQ3/eZFwgggAACCAQlcP4rk0wAzC7cGtT0kZ6I/tvlAdC2oCggW0HaI4AAAgi4TaCuvlGOSx1jAuDmytq4rD79NwHQqvAoICs+GiOAAAIIuFCgsKzahL8Te6RLU1NTXATovwmAVoVHAVnx0RgBBBBAwIUC8b4HoJLTfxMArb56FJAVH40RQAABBFwoEO97ACo5/TcB0OqrRwFZ8dEYAQQQQMCFAvG+B6CS038TAK2+ehSQFR+NEUAAAQRcKOC5B2D/qcHfdSPSTPTfBECrmqKArPhojAACCCDgQgHPPQBHL1kft7Wn/yYAWhUfBWTFR2MEEEAAAZcJVNfVy0nPpZurgH/asC1ua0//TQC0Kj4KyIqPxggggAACLhMYs2SDCX9/eH1K3G4Bo+T03wRAq68eBWTFR2MEEEAAAZcJPDpkoQmAr6T/FNc1p/8mAFoVIAVkxUdjBBBAAAEXCeyoa5CTnxtnAuCS4vK4rjn9NwHQqgApICs+GiOAAAIIuEhg7NJdh38vfG1yXA//Kjn9NwHQ6qtHAVnx0RgBBBBAwEUCjw79+fDv2Pge/lVy+m8CoNVXjwKy4qMxAggggIBLBPTw7ynP7zr8u7govod/lZz+mwBo9dWjgKz4aIwAAggg4BKBccucc/hXyem/CYBWXz0KyIqPxggggAACLhF4/Ktsc/FHbwcc/lVy+m8CoNVXjwKy4qMxAggggIALBGp2/nL4d5EDDv8qOf03AdDqq0cBWfHRGAEEEEDABQLjlm00e/8ueDX+V/96uOm/CYCeWgjrXwooLDYaIYAAAgi4SOCJnw//vjR6uWPWmv6bAGhVjBSQFR+NEUAAAQSSXEAP/57689W/Cwu3OmZt6b8JgFbFSAFZ8dEYAQQQQCDJBcbn7Dr8e/4rk+J+82dfavpvAqBvPYT8OwUUMhkNEEAAAQRcJPCPYbuu/u3loMO/yk//TQC0+hpSQFZ8NEYAAQQQSGKBTdtqvM/+XbDWOYd/lZz+mwBo9dWjgKz4aIwAAgggkMQCT/689+/GD2c46vCvktN/EwCtvnoUkBUfjRFAAAEEklRg3pot5tYvnVLHyNLiCsetJf03AdCqKCkgKz4aI4AAAggkoUBDY5P8b58sEwBTv13iyDWk/yYAWhUmBWTFR2MEEEAAgSQU+HL2WhP+uqSNl7KqWkeuIf03AdCqMCkgKz4aI4AAAggkmcDW7XVyZs8MEwAHzVjt2LWj/yYAWhUnBWTFR2MEEEAAgSQTeO77ZSb8XfnONKlvaHTs2tF/EwCtipMCsuKjMQIIIIBAEgksX79NjksdYwLgrIIyR68Z/TcB0KpAKSArPhojgAACCCSJwPbaernpwxkm/D06dKHj14r+mwBoVaQUkBUfjRFAAAEEkkBgUVG5XPLGFBP+Tn5unKwv3+H4taL/JgBaFSkFZMVHYwQQQACBBBbQ2718MCVfft1trAl/+rxfvf9fIgz03wRAqzqlgKz4aIwAAgggkKAC68p3yG0fzTLBr+OzY0QP+1ZU70yYtaH/JgBaFSsFZMVHYwQQQACBBBGo2dkgMwtK5e0JK+X2j2ZJ5+7pJvyd+vw4Gbmg2HGPemuPlf6bANhejbT5PgXUJg9vIoAAAggkuIDeyPmugXO8gU/39nl+9Bm/a0q3J+Qa0n8TAK0KlwKy4qMxAggggIDDBXSPnyfwndd7ojzxVbYMnVMoBZurEm6vny81/TcB0LceQv6dAgqZjAYIIIAAAgkioBd56IUdGgC/nleY0IGvOTn9NwGweU2E9JoCComLiRFAAAEEEkggM7fEhL8zXswQPQcwmQb6bwKgVT1TQFZ8NEYAAQQQcLDAI0MWmACY9kOOg5cyvEWj/yYAhlc5P7eigKz4aIwAAggg4FABvfjjhO677u+nj3hLtoH+mwBoVdMUkBUfjRFAAAEEHCowMGuV2ft33fvTHbqEdotF/00AtKogCsiKj8YIIIAAAg4UaGpqksvfnmoC4Jez1zpwCe0Xif6bAGhVRRSQFR+NEUAAAQQcKLCwcKsJfyc9ly7bahLn6R6hUNJ/EwBDqZcW01JALUgYgQACCCCQ4AL/GbnEBMCnhy9K8DVpffHpvwmArVdHEO9QQEEgMQkCCCCAQMIIVNXWyynPjzMBcO7qLQmz3KEuKP03ATDUmvGbngLy4+AFAggggECCC+gNn/XGz13fzEyqGz833yz03wTA5jUR0msKKCQuJkYAAQQQcLjATR/OMAGw/9QChy+p3eLRfxMArSqIArLiozECCCCAgIME8jZVmvB3fLexUlJZ46Ali/yi0H8TAK2qigKy4qMxAggggICDBN6duNIEwAc/n++gpYrOotB/EwCtKosCsuKjMQIIIICAgwTu/Hi2CYBD5iTnvf98qem/CYC+9RDy7xRQyGQ0QAABBBBwoEBdfaOc2CPdBMD8kkoHLmFkF4n+mwBoVVEUkBUfjRFAAAEEHCKwYO0WE/5+02tCUl/96+Gm/yYAemohrH8poLDYaIQAAggg4DCBDzPzTQD8v8ELHLZk0Vkc+m8CoFVlUUBWfDRGAAEEEHCIwL2fzTUB8NPpqx2yRNFdDPpvAqBVhVFAVnw0RgABBBBwgEBDY5Oc9sJ4EwCXratwwBJFfxHovwmAVlVGAVnx0RgBBBBAwAECS4srTPg7PW28aBh0w0D/TQC0qnMKyIqPxggggAACDhAYmLXKBMD7B81zwNLEZhHovwmAVpVGAVnx0RgBBBBAwAECD30x3wTAZH/8my81/XeCBsCioiIpLi72bsu5c+fKk08+KR9//LF3XKi/vPrqq5KSkmLmE2xbCihYKaZDAAEEEHCiQGNjk5zVM8MEwIWFW524iFFZJvrvBA2AF110kQwePNgUxcaNG+XAAw+U888/Xw4//HDp2bNnyMUyb9486dSpk5xxxhkEwJD1aIAAAgggkKgCK39+/u/Jz40TvRm0WwYCYIIGwIMPPlhyc3NNnfbt21cuuOAC83tGRoYcd9xxIdVvVVWVdO7cWSZOnCiXXHIJATAkPSZGAAEEEEhkgcGz15q9f38ZODuRVyPkZScAJmgA3G+//WTNmjVmg1933XXy2muvmd8LCwtl7733DqkQ7rnnHnnqqadMGwJgSHRMjAACCCCQ4AKPf5VtAmCfiXkJviahLT4BMEED4HnnnSfPPvusZGVlmcC3ePFis+Vnz54tRx99dNBVMGzYMDn99NOlpqbGtGkvANbW1ooWjedHz0PU8wb1NQMCCCCAAAKJJNDU1CTnvjzRBMBZBWWJtOjWy0oATNAAmJmZKXoY+Fe/+pXcf//93kLo1q2b3HTTTd7Xbf2iF5IcccQRsmTJEu9k7QXAtLQ0E/g09Pn+EAC9hPyCAAIIIJAgAmtKt5vwd0L3sVKzsyFBljoyi0kATNAAqJu/oaFBtm71v2JJDwuXlJQEVR3ff/+9CXG77767eH401O22227mtc6/+cAewOYivEYAAQQQSFSB4fOKTAC8pd/MRF2FsJebAJigAXDHjh1SXV3t3fBr166Vd999V8aPH+8d194vlZWVsmzZMr+fc845R+6++24zrr32+j4FFIwS0yCAAAIIOFHgn8MXmwD4xvgVTly8qC4T/XeCBsArrrhC+vfvb4qjvLxc/vu//1uOOeYYcz5gv379wi6a9g4BN58xBdRchNcIIIAAAokicNHrk00AnLpyc6IscsSWk/47QQPgYYcdJjk5OaYQBg4caO7f19jYKCNGjJCTTz457AIhAIZNR0MEEEAAgQQSWF++w4S/41LHSFVtfQIteWQWlQCYoAFwn332Eb3liw633XabvPjii+Z3vbBD34vVQAHFSprPQQABBBCIpMCoRetMALzu/emRnG3CzIv+O0EDYJcuXURvAK2BT58CMmvWLFN0CxYsMIeDY1WBFFCspPkcBBBAAIFICvT4fqkJgL1GL4/kbBNmXvTfCRoAR44cKXvssYe5Dczll1/uLbhXXnlFrrrqKu/raP9CAUVbmPkjgAACCERD4Nr3ppsAOHrJ+mjM3vHzpP9O0AColaXPAM7OzhY9988zzJ07V1asiN3VTBSQR55/EUAAAQQSRaC2vkH03n8dn204e50AACAASURBVB0jRVt+uaNGoix/JJaT/juBA6CnAPRpHPoTj4ECioc6n4kAAgggYCOwqKjchL+zemaIPg3EjQP9d4IGQN3r17NnT3P+nz4NRH8OOugg6dWrl98ewWgXNQUUbWHmjwACCCAQaYEvZq0xAfCeT+dGetYJMz/67wQNgKmpqdKhQwfRe/7po9z058MPPzTjunfvHrMCpIBiRs0HIYAAAghESOCZEbtuAP1WRm6E5ph4s6H/TtAAeOSRR8oPP/zQouJGjRolRx11VIvx0RpBAUVLlvkigAACCERL4Ip3ppo9gBOWb4rWRzh+vvTfCRoA99prL1m5cmWLAsvNzTVPA2nxRpRGUEBRgmW2CCCAAAJREdheWy9682e9AKRkW01UPiMRZkr/naAB8LzzzpMnnniiRY09/vjjou/FaqCAYiXN5yCAAAIIREJgzqoyE/5+13tSJGaXsPOg/07QADh16lTZb7/95JRTTpEHHnjA/Ojv+++/v2RlZcWsICmgmFHzQQgggAACERAYmLXKBMCHvpgfgbkl7izovxM0AGrJrV+/XvSCj5tvvtn89OjRwzwe7qGHHopZRVJAMaPmgxBAAAEEIiDw+FfZJgC+PzkvAnNL3FnQfydwAAxUdosXLza3hAn0XjTGUUDRUGWeCCCAAALRErj4jSkmAE5buTlaH5EQ86X/JgBaFSoFZMVHYwQQQACBGAqUV9eZ8KcXgOjvbh7ovwmAVvVPAVnx0RgBBBBAIIYCutdPw5/uBXT7QP9NALT6DlBAVnw0RgABBBCIocAHU/JNANTzAN0+0H8nWAC86aabpK2frl27cg6g27/VrD8CCCCAQEABvfJX9wAOmLYq4PtuGkkATLAAeN9990kwP7EqYgooVtJ8DgIIIICArYDe+08DoN4L0O0D/XeCBUCnFSwF5LQtwvIggAACCAQS0Kd+aPjTp4Do00DcPtB/EwCtvgMUkBUfjRFAAAEEYiSgz/3VAKjPAWYQof8mAFp9DyggKz4aI4AAAgjESODtjFwTAJ8ZsThGn+jsj6H/JgBaVSgFZMVHYwQQQACBGAnc8+lcEwC/mLUmRp/o7I+h/yYAWlUoBWTFR2MEEEAAgRgINDU1yVk9M0wAXFRUHoNPdP5H0H8TAK2qlAKy4qMxAggggEAMBIq2VJvwd0L3sVJb3xCDT3T+R9B/EwCtqpQCsuKjMQIIIIBADATGLNlgAuC1702PwaclxkfQfxMArSqVArLiozECCCCAQAwEXhn7kwmA3b9bGoNPS4yPoP8mAFpVKgVkxUdjBBBAAIEYCFzdN8sEwOHzi2LwaYnxEfTfBECrSqWArPhojAACCCAQZYH8kkoT/n7dbaxs2V4X5U9LnNnTfxMAraqVArLiozECCCCAQJQF3hy/6/5/DwyaF+VPSqzZ038TAK0qlgKy4qMxAggggEAUBfT2Lxe+NtnsAfxx8fooflLizZr+mwBoVbUUkBUfjRFAAAEEoigwf80WE/5OfX6c7Kjj9i++1PTfBEDfegj5dwooZDIaIIAAAgjESECv+tXn//5zOI9/a05O/00AbF4TIb2mgELiYmIEEEAAgRgJ1NU3ypk/P/1jel5pjD41cT6G/psAaFWtFJAVH40RQAABBKIkMGH5JrP379yXJ0pDY1OUPiVxZ0v/TQC0ql4KyIqPxggggAACURJ4dMhCEwBfGr08Sp+Q2LOl/yYAWlUwBWTFR2MEEEAAgSgIbKvZKSf2SDcBcNm6iih8QuLPkv6bAGhVxRSQFR+NEUAAAQSiIKBP/NCLPy57e6rorWAYWgrQfxMAW1ZFCGMooBCwmBQBBBBAICYCfx4w2wTAD6bkx+TzEvFD6L8JgFZ1SwFZ8dEYAQQQQCDCAhsraqRT6hgTAIu2VEd47skzO/pvAqBVNVNAVnw0RgABBBCIsMDH0wpM+Lu1/8wIzzm5Zkf/TQC0qmgKyIqPxggggAACERa4/v3pJgAOmbM2wnNOrtnRfxMArSqaArLiozECCCCAQAQFanY2yPHdxpoAuK58RwTnnHyzov8mAFpVNQVkxUdjBBBAAIEICixYu+vZv+e8PJGrf9txpf8mALZTIm2/TQG17cO7CCCAAAKxExiYtcrs/Xvw8/mx+9AE/ST6bwKgVelSQFZ8NEYAAQQQiKDA419lmwDI7V/aR6X/JgC2XyVtTEEBtYHDWwgggAACMRX4w+tTTACcnlca089NxA+j/yYAWtUtBWTFR2MEEEAAgQgJlFXVmvCnTwCp2LEzQnNN3tnQfxMAraqbArLiozECCCCAQIQEpqwoMQHwj29lRmiOyT0b+m8CoFWFU0BWfDRGAAEEEIiQwNsTVpoA+PTwRRGaY3LPhv6bAGhV4RSQFR+NEUAAAQQiJHDPp3NNABw8a02E5pjcs6H/JgBaVTgFZMVHYwQQQACBCAg0NTXJmT0zTABcUlwegTkm/yzovwmAVlVOAVnx0RgBBBBAIAICa0q3m/DXuUe61NU3RmCOyT8L+m8CoFWVU0BWfDRGAAEEEIiAwKhF60wAvPHDGRGYmztmQf9NALSqdArIio/GCCCAAAIREHjxxxwTANN+yInA3NwxC/pvAqBVpVNAVnw0RgABBBCIgIDu+dP7/32fvS4Cc3PHLOi/CYBWlU4BWfHRGAEEEEDAUkDP+dNz/zQA6rmADMEJ0H8TAIOrlFamooBagWE0AggggEBMBJYWV5jwp1cB69XADMEJ0H8TAIOrlFamooBagWE0AggggEBMBPS+f7r3T+8DyBC8AP03ATD4agkwJQUUAIVRCCCAAAIxE/jn8MUmAOqTQBiCF6D/dnkA7Nevn3Tp0kUOOOAA8/P73/9e0tPTg64gCihoKiZEAAEEEIiCgD77V/cATl6xKQpzT95Z0n+7PAD++OOPMnbsWMnLy5OVK1dK9+7dZY899pCcnOAupaeAkvePA2uGAAIIOF1gW81OE/40AJZV1Tp9cR21fPTfLg+AgarxkEMOkU8++STQWy3GUUAtSBiBAAIIIBAjgRn5pSYAXvT65Bh9YvJ8DP03AdBbzQ0NDTJs2DDZc889Zfny5d7xbf1CAbWlw3sIIIAAAtEU+GBKvgmAj3+VHc2PScp5038TAGXp0qWy3377ye677y4HHXSQOSTcWrXX1taKFo3np7i4WFJSUszr1towHgEEEEAAgWgI/O2L+SYADsxaFY3ZJ/U8CYAEQKmrq5P8/HxZsGCBpKamyuGHH97qHsC0tDQT+DT0+f5oITEggAACCCAQK4EddQ3ym14TTACcv2ZLrD42aT6HAEgAbFHMl112mTz88MMtxusI9gAGZGEkAggggECMBd7KyDXh74JXJ4s+DYQhNAECIAGwRcV07dpV7r333hbjA42ggAKpMA4BBBBAIJoCq0u3S+fuux7/Nm7Zxmh+VNLOm/7b5QFQD/lOmzZN1qxZY84F1Ne77babTJgwIaiip4CCYmIiBBBAAIEICejj3u79bK7Z+/fXT+fy+LcwXem/XR4AH3jgAenYsaO58rdDhw6ih3+DDX9acxRQmN88miGAAAIIhCUwPmejCX+6B1D3BDKEJ0D/7fIAGF7Z/NKKAvrFgt8QQAABBKIroBd+6Dl/euPnN8aviO6HJfnc6b8JgFYlTgFZ8dEYAQQQQCAEAd8LP6rr6kNoyaTNBei/CYDNayKk1xRQSFxMjAACCCAQpsAavws/NoQ5F5p5BOi/CYCeWgjrXwooLDYaIYAAAgiEKHAfF36EKNb25PTfBMC2K6SddymgdoB4GwEEEEDAWqCkssac93dc6hhZtbnKen7MgIs4tQZSKITwBQiA4dvREgEEEEAgOIHZq8pMALz4jSnBNWCqdgXovwmA7RZJWxNQQG3p8B4CCCCAQCQEhs0tNAHwnk/nRmJ2zIPbuJkaYA+gxVeBAGiBR1MEEEAAgaAEXkn/yQTAF0YtC2p6JmpfgP6bPYDtV0kbU1BAbeDwFgIIIIBARAQeHjzfBMDPZqyOyPyYCecAag2wB9Dim0AAtMCjKQIIIIBAUAJXvjPNBMApuSVBTc9E7QvQfxMA26+SNqaggNrA4S0EEEAAAWuBxsYmObFHugmAei9AhsgI0H8TAK0qiQKy4qMxAggggEA7AuvLd5jw9+tuY6W+obGdqXk7WAH6bwJgsLUScDoKKCALIxFAAAEEIiQwM7/UBMBL38yM0ByZjQrQfxMArb4JFJAVH40RQAABBNoRGDJnrQmA+iQQhsgJ0H8TAK2qiQKy4qMxAggggEA7Ai+PWW4C4Is/5rQzJW+HIkD/TQAMpV5aTEsBtSBhBAIIIIBABAUe/HzXLWC+mLUmgnNlVvTfBECrbwEFZMVHYwQQQACBdgQue3uq2QM4beXmdqbk7VAE6L8JgKHUS4tpKaAWJIxAAAEEEIiQQENjk3TuvusWMEVbqiM0V2ajAvTfBECrbwIFZMVHYwQQQACBNgSKt1abvX8aAjUMMkROgP6bAGhVTRSQFR+NEUAAAQTaEJiet+sWMH98i1vAtMEU1lv03wTAsArH04gC8kjwLwIIIIBApAUGz951C5gHP58X6Vm7fn703wRAqy+BEwtozqoyue2jWfL3LxfIxooaq/WjMQIIIIBA/AR6jd51C5iXRi+P30Ik6Sc7sf+ONXVKrD8wmT7PSQWk54o8OmShOV+k47NjzL9nvJghPyxe3yp5U1OTbNleJ3py8fL122Temi2iDxufsHyTLFi7VQrLqqW6rr7V9pF8Q593qc+5zFlfIZu21UhdfeiPPNL10XZVtfVmvXQ+68p3xGwdIunBvBBAAIEHBs0zf8u/nL0WjAgLOKn/jvCqBT07AmDQVC0njFYBfbOgWG7uN1P0/k//GblEXk1fIQOmrRIdP2VFiSwuKjehbXttvQk3b2fkeh8WflzqGHn2myVy7XvTvWHw0aELZev2OrMCeiKx7iVM+yFHftd7kncaT2gM9O8pz4+TC1+bLFe8M1Wue3+62cP410/nysOD58s/hy8283orI1c+mlogQ+cUyugl60VvWbCoqFxWba6SzZW1UlJZYwLlyk2VsqS43CzD8HlF8sKoZXJr/5ly2gvjWyzL6WnjRR9/9L99sqTrW5lywauT5Te9Jogujz4X8/huY0XXV386pe4KvYGWX8dpm4vfmGJcdbm7f7dU1O3zmWvM8urjltR2xPwisx69x/5k1u2Jr7LlyWHZ8vTXi+SZEYvl3yMXS7fvlkrPH5fLa+NWSJ+JeWb6fpkFom3+NWKx6OEa3X5X982Suz+ZY9rqex9PK5BvFxbL5BW7AnbB5iopraqVnTzfs+WXizEIIGD+7unfrxn5pWhEWCBa/XeEFzOqsyMAWvBGq4DeGL+iRRhqLdj4Bp87Pp5l9uTpKmmoeHfiShOStO05L080YfK3L01sMe+TnxsnOv6SN6bINe9lyfXvTzeB76Tndt1+oLXPjvT4zj3SzXJosIvEvHU+J3SPzLwisTxtzUMDrYbg3740wQRdvffXXQPnmKCqwXF8zkZZsXGbbKvZKbqnkwEBBJJboL6h0fv3S4/wMERWIFr9d2SXMrpzIwBa+EargHSvWfrSDaLPgHxvUp7oI4D+MSzb7E3SvUrnvzLJu8dPQ4XunRu3bEPAYKB72/QKMt/w0SVtvNm7NXH5JqnZ2dCqgAYNPZyqh2YXFm4V3Uume6/GLt1g9kbqYYn+UwtEA+vzo5bJU18vMnu/9BzEP707zQQZ3Yunn61BVYOm7sHT5e/6Zqbc+fFs0XNbvssultyNlaJ/8HTQw8G6xzK/pMrsKczMLZHZq8rMnk+dTg9N6/mNeoi3RH8qd/3o3rSKHTvNOnlumeBZh9Wl280hbnXVO+q/M2Gl9Ph+qTlX8rb+s4yR2uqeTV0PXa4PM/Pl0+mrZWDWKrP3TvfyfTAl3wRr3fun20X3Bj49fJHx1Db6/ldzC8328OxR1HZ6Lo9uQw11GrIven2y6Hbw3S7B/q57M3XP6O0fzRLdQ6l7I7VOdHvotplVUGb2EBMUWy1t3kDA8QL6d07/Juh/jPVvIkNkBaLVf0d2KaM7NwKghW88C0g7dz0/b335Dm9wam1VNORpMNGQpmEqnPPrWpt3MOP1jxdhJLCUBtWK6p0m0GrQ1r18euh8ZkGpORz95vhcefyrbHPoPdTAqHsT9TQC3fY6Pw3zDAggkBgCU1duNgHw8renJsYCJ9hSxrP/dgoVAdBiS1BAFng0DUtAQ78GRT2PUy/w0b2TujdSz/t86Iv55nxK3eMb6NC37oXVQ8t6PuOgGavNhT476lrfAxzWAtIIAQQiIqDnJ+sewL99MT8i82Mm/gL039wGxr8iQnxFAYUIxuQxE9C9vnoltwZEvQhID7u3dohZD8tf1SdL7vtsrqR+u9QcTtbTAzZU7GDPbcy2GB+EgL+AXqin31m9gIwh8gL03wRAq6qigKz4aBxjAT1XUs/h1IuD9PYSgS4Iah4Sz+41wZx7qnsZta1ehMKAAALRF7j3s7kmAOqdFRgiL0D/TQC0qioKyIqPxnEW0PMy9WKbnzZsM/d/HDa30Fwco4eI9SKeQFdj6y139BZDL49ZLpN+2iR6KyIGBBCIvIDelUH/Q6bn7zJEXoD+mwBoVVUUkBUfjR0uoIeR9Z6TejW63o/S0yH57iU8sUe6OUdJ71GpF7MwIICAvYDexsvzHzA9FYMh8gL03wRAq6qigKz4aJyAAnr7nVGL1knqt0vMrWx8w6Dey1BvfP3J9NXmfpTcuiIBNzCL7AgBvW2Vfrf0Xqx8j6KzSei/CYBWlUUBWfHROMEF9BCyHj7Weype+c4002H5BsIze2aYp8XoFcf5JZVcUJLg25vFj52A3kNUv0t6KgZDdATovwmAVpVFAVnx0TjJBPQG5vo4wHs+nWsevecbBvX3P7w+xTw2MCtvc8zvRZlk1KxOkgt8NmO1CYD/N3hBkq9p/FaP/psAaFV9FJAVH42TWEDPYdKnx+jTVPSwcOfu/o8VPPX5ceYG1/qEF24SnsSFwKqFJaDPSNf/NOlz4BmiI0D/TQC0qiwKyIqPxi4S0KuF9XnGejFJ89vP6JMO9Ka33GLGRQXBqrYpoI+k1AD49TxuAdMmlMWb9N8EQIvyEaGArPho7FIBPak9u3Cruem0Ph/ac6hYn3GsF5csW1fhUhlWG4FdAnq6hH4v9Ik/DNERoP8mAFpVFgVkxUdjBMxeP937p3sBPUFQ/73hgxkyckGx6K1oGBBwk4A+q13vt6nfg5JtNW5a9ZiuK/03AdCq4CggKz4aI+AV0PMAdW/H419l+z3HWK8k1ptOF22p9k7LLwgks0DepkoT/nSPOOfHRm9L038TAK2qiwKy4qMxAgEFNlfWygdT8uWCVyd79wrqHpFHhiwwzzcO2IiRCCSJgF44pXv//jxgdpKskTNXg/6bAGhVmRSQFR+NEWhToKGxyTx/WK8i9j08fOOHM2TMkg2i7zMgkGwCeu8/rXd9NCND9ATovwmAVtVFAVnx0RiBoAVWbNwm/x652O92Mlf1yZJpKzcHPQ8mRMDpAp7Dvyd0Hyvl1XVOX9yEXj76bwKgVQFTQFZ8NEYgZAE9PPx2Rq6cnjbeu1dQ9xDmrOfK4ZAxaeA4gbcyck1dPzBonuOWLdkWiP6bAGhV0xSQFR+NEQhbYOv2Ouk1ern3gpFOqWPk6a8XyfryHWHPk4YIxFNAL/i4+I1dt3/R520zRFeA/psAaFVhFJAVH40RsBYoLKs2Vw57zhHU+wr2nZTH7WOsZZlBrAWWFJebvX8nPZcueuN0hugK0H8TAK0qjAKy4qMxAhETWFxULrf2n+k9LKxXEI9duoHbaERMmBlFW+Cl0ctN/T42dGG0P4r5Cw9y0CJIoRLCFyAAhm9HSwQiLaCH0H5YvF5+/8okbxC84+NZsnz9tkh/FPNDIKIC+nSc3/XeVbcZORsjOm9mFliA/psAGLgyghxLAQUJxWQIxFCguq5e3p6wUk7skW6CoJ4fqFcQb+KpCjHcCnxUKAKzV5WZWu2SNl5q63n6TSh24U5L/00ADLd2TDsKyIqPxghEVUCfHvLo0IXevYF6fuC7E1eKBkQGBJwk0O27paZO9T8qDLERoP8mAFpVGgVkxUdjBGIisGDtVrnpwxneIHhe74kyYn6R6GE3BgTiLbCzoVHO6plh6nN6Xmm8F8c1n0//TQC0KnYKyIqPxgjETEDPD9Snh1z0+i+Pl7v+/ek8Wi5mW4APak1gyooSE/5++9JEnm7TGlIUxtN/EwCtyooCsuKjMQIxF9Dzqz6aWiCnvfDLjaSf+nqRbKyoifmy8IEIqIDWn97GKO2HHEBiKED/TQC0KjcKyIqPxgjETaCkssZcGKIXiGjnq+cHvj+5nfsHNjWJlJaKrFmz6199zYCAhcCOugY59flxpgb1VAWG2AnQfxMAraqNArLiozECcRfQm+/e3O+X+wde+NpkSW9+/8DycpE+fUR+/WuRlJRffvS1jtf3GRAIQ0BrTf8Dovet1NMUGGInQP9NALSqNgrIio/GCDhCQDteffRW8/sH/rRhm8j48SL77Sey2267fnwDoGecvq/TMSAQooDn8O/LY5aH2JLJbQXovwmAVjVEAVnx0RgBRwk0v3/gPbf3lMZf/UqafvWrX/b6+QZAz+/6/u67EwIdtTWdvzB69a/e90/3AM5bs8X5C5xkS0j/TQC0KmkKyIqPxgg4UqB4a7U8M2CqbN9jb2lI2a3t8OcbAnVPIIeDHblNnbhQM/JLTfg7u9cErv6Nwwai/yYAWpUdBWTFR2MEnCvQp4806SFeT8AL5l+dvm9f564TS+YogRdGLTMB8D8jlzhqudyyMPTfLg+Ar7zyipxzzjmy//77S4cOHeSGG26Q3NzcoOufAgqaigkRSBwBPRlfL/AIJwBqO07mT5xtHacl1fNOPeecTl6xKU5L4e6Ppf92eQD805/+JIMGDZKcnBxZvHixXH311XLsscfK9u3bg/pmUEBBMTERAokloLd6CWaPX2vTlJUl1vqytDEXWFpcYfb+nfL8OKnZybN/Y74BRIT+2+UBsHnRbd68WVJSUmTatGnN3wr4mgIKyMJIBBJbQO/z11q4C2a8tmdAoA2BtzJyTQB8ZMiCNqbirWgK0H8TAP3qKz8/3wTAZcuW+Y1v7QUF1JoM4xFIYAH2ACbwxkuMRb/ynWkmAH6fvS4xFjgJl5L+mwDoLevGxka55ppr5MILL/SOa/5LbW2t2W2shaM/xcXFJjDq7wwIIJAkAmGeA6gXjTRxDmCSFEH0VmNN6XYT/n7dbaxUVO+M3gcx5zYFCIAEQG+B/P3vf5eOHTuaUOcd2eyXtLQ0E/j0MLHvDwGwGRQvEUh0AX3CR4gXgTSm7CYDbn1SctZXJPras/xRFBgwbZUJgHcNnBPFT2HW7QkQAAmApkYee+wxOeaYY2T16tVt1gx7ANvk4U0EkkdA7+en9/Vr7ybQP58T2Ljbr6R6j72ly5Nfy3GpY6TX6OWyvbY+eTxYk4gJ3PLzowe/mMW5ohFDDWNGBECXB0C9FF/D31FHHSV5eXkhlxAFFDIZDRBIHAF9vJs+4aO9EPjzk0C2fjdaHh2y0Ozd0ac76G0+vllQzE1+E2eLR31JN1fWSqfUMaZG1pfviPrn8QGtC9B/uzwAPvLII3LQQQfJ1KlTZePGjd6fHTuC+2JSQK1/uXgHgaQQCPZZwBkZ3tWdklsiF70+2RsE9YT/ics3if6Hk8HdAsPmFpq6uO796e6GcMDa03+7PAD6nsfn+7veGzCYgQIKRolpEEhwAT0crE/40As8fG8Do691fEXLc/521DVIv8wC77NedY+gHvrjma8JXguWi3//oHkmAL4/OfQjTpYfTfNmAvTfLg+Azeoh5JcUUMhkNEAgcQV0D57e5Fnv86f/BrFHT6/yfDV9hZzYI927R1Dv/bahIrijDImLxZI3F6iqrZfOP9fByk2Vzd/mdYwF6L8JgFYlRwFZ8dEYAdcIbKyokdRvl5oLRHRvoD4B4qOpBVJX3+gaA7ev6KhF68x/Ai55YwqnAzigGOi/CYBWZUgBWfHRGAHXCSxfv80cCtYQqD+XvT1VZhXw6LhkL4SdDY3S9a1Ms83fzgj+efPJ7hLP9aP/JgBa1R8FZMVHYwRcKdDY2CQjFxTL2b0meA8LP/X1IimrqnWlhxtW+rMZq8221m1eWcPNn52wzem/CYBWdUgBWfHRGAFXC+j5gc+PWua9LciZPTNk+PwiDg8mWVWUV9fJGS9mmAA4dE5hkq1d4q4O/TcB0Kp6KSArPhojgICILC4ql6v6ZHn3Bt7x8SxZtbkKmyQRSPshx2zbP707jXtCOmib0n8TAK3KkQKy4qMxAgj8LFDf0CgfTyuQk57bdbVw5+7p8sb4FVKxg8OFiVwkBZurRJ/5q+d7Ts8rTeRVSbplp/8mAFoVNQVkxUdjBBBoJlC0pVru+XSud29gl7Tx8sGUfB4r18wpUV4+8PN9/x78fF6iLLJrlpP+mwBoVewUkBUfjRFAIICAPjFkfM5G0SeIeK4W/u1LE+ST6aulZmdDgBaMcqJAVt5ms/10DyCH9J23hei/CYBWVUkBWfHRGAEE2hBoaGwSvXec3jfOEwTP6z1R9IpSgmAbcA54Sw/pewL8iz/mOGCJWITmAvTfBMDmNRHSawooJC4mRgCBMAT0HnL6DNnzX5nkDYLnvjyRPYJhWMaqycCsVWZb6ZXdehUwg/ME6L8JgFZVSQFZ8dEYAQRCEKitb5AvZ6+VC16d7A2Cv31pomjY0GcPMzhDYNJPm7xPfBk8e60zFoqlaCFA/00AbFEUoYyggELRYloEEIiEgD4+Tu8n5x8EJ8iAaaukuq4+Eh/BPMIUWFpcISc/N84E9P+MXMI9HcN0jEUz+m8CoFWdUUBWfDRGAAELAQ2CzY3rMAAAFkdJREFUemj4wtd+2SOoT5rQZwwTBC1gw2y6rnyHnPPyRBP+7v5kjuihewbnCtB/EwCtqpMCsuKjMQIIREBAg8bweUXyh9d/uVjkN70mSL/MAm4fEwHfYGaxrWanXPHOVBP+9IbP+prB2QL03wRAqwqlgKz4aIwAAhEU0CA4Yn6RXOxz1fBZPTPMfQSrajk0HEFqv1npnti/DJxtwp9enLO+fIff+7xwpgD9NwHQqjIpICs+GiOAQBQE9BYk3ywolkvfzDShRG8ho1ej9p2UxxWpEfbW0P3Y0IXG+dTnx8mydRUR/gRmFy0B+m8CoFVtUUBWfDRGAIEoCmgQ/D57nXR965cgqBcovDBqmRSWVUfxk90xa70Xoz7hQwP2Cd3HypTcEneseJKsJf03AdCqlCkgKz4aI4BADAQ8N5S+qk+Wd4/gcalj5JEhC2Rh4dYYLEHyfYQeUr/z412HfU/skU74S8BNTP9NALQqWwrIio/GCCAQQwF9xNyM/FK/Zw3r3qvr3p8uw+cX8XSRILeF3tj5hg9mmDB92gvjZfaqsiBbMpmTBOi/CYBW9UgBWfHRGAEE4iSwYuM2eWbEYuncPd27V/CMFzPkpdHLZXXp9jgtlfM/dnNlrehVvp7zKhcXlTt/oVnCgAL03wTAgIUR7EgKKFgppkMAAScKlFXVmtvF+N5UWsPN376YL4sIN36bLLtwq/fm23q/v9yNlX7v8yKxBOi/CYBWFUsBWfHRGAEEHCKg5wlOXrFJ7vtsrnRKHePdK6i3N5mZX+rqJ1roofPPZqw2F3poOL7kjSmyhr2kDqnc8BeD/psAGH71iAgFZMVHYwQQcKBAfkml/HP4Yjm+21hvELz+gxny7sSV8uPi9bJ8/TbXPHtYb+isF8to8NOfv3+5gJs8O7Bmw1kk+m8CYDh1421DAXkp+AUBBJJMoHhrtblljF7l6glAvv/qYePb+s+SR4culBd/zDGHkvX+g3roWG+OnOiDPtdX9/bpOuttXnQvoO4NZEgOAfpvAqBVJVNAVnw0RgCBBBDQCx8GZq2Sf49cLDf3m2luKu0bBAP93rlHuplWLyoZs2SDlFbVJsCa7lpEvbBDz4H0rJcGXT3/jyG5BOi/CYBWFU0BWfHRGAEEElRgy/Y6mb9mi4xesl4+nb5aXk1fIU8PX2QeiaaPn/OEJ8+/ejj5gUHzZNyyDY7cO6h79mYVlMndn8zxLrueC/n4V9mydXtdgm4lFrstAfpvAmBb9dHuexRQu0RMgAACLhPQMKW3kvl2YbH0+H6p97YpnjCoATHthxzRQ6yxOqSqn6OHtPUcxl6jl8sTX2XL/YPmyW0fzZKr+2Z5r+7VZdSwqmFWz4VkSF4B+m8CoFV1U0BWfDRGAAGXCOSXVJm9hOe+PNG7h03Dlh5e1TCoe9/00XWRGvTiDb3p9YeZ+fLQF/NFb9viCaCt/auHrTWwFm3hMXmR2g5Ong/9NwHQqj4pICs+GiOAgMsENOTpM3P1whF9LrFvGDuzZ4Y8OSzbXGyhh5er6+rb1dFHsulNrScu32TOU9T2Xd/85dnHvvP/dbexcv37082FLZ9MXy1fzys0h7Azc0vM4Ww9rM3gHgH6bwKgVbVTQFZ8NEYAARcL7KhrkAnLN5knkgQ6b1CfV3z521PNrVf+b/ACc49CvS/hrf1nmsO2gdr4Br4LX5tsbuEyYNoqE/Bqdja4WJtVby5A/00AbF4TIb2mgELiYmIEEEAgoIDuGdTDwO9MWGkuFjmvd/uHbD1hT/ccXvNelgmKfSbmmT2M+oQTBgTaEqD/JgC2VR/tvkcBtUvEBAgggEBYAiWVNebpJHr/vcGz18rweUXyffY6Gbt0gxn/04ZtUlmzM6x50wgB+m8CoNW3gAKy4qMxAggggAACcRGg/yYAWhUeBWTFR2MEEEAAAQTiIkD/TQC0KjwKyIqPxggggAACCMRFgP6bAGhVeBSQFR+NEUAAAQQQiIsA/TcB0KrwKCArPhojgAACCCAQFwH6bwKgVeFRQFZ8NEYAAQQQQCAuAvTfBECrwqOArPhojAACCCCAQFwE6L8JgFaFRwFZ8dEYAQQQQACBuAjQfxMArQqPArLiozECCCCAAAJxEaD/JgBaFR4FZMVHYwQQQAABBOIiQP9NALQqPArIio/GCCCAAAIIxEWA/psAaFV4FJAVH40RQAABBBCIiwD9NwHQqvAoICs+GiOAAAIIIBAXAfpvAqBV4VFAVnw0RgABBBBAIC4C9N8EQKvCq6iokJSUFCkuLhYtJn4woAaoAWqAGqAGnF8D2m9r/639uFuHFLeueCTW21NAWkT8YEANUAPUADVADSRWDWg/7taBAGix5RsbG83eP/0fhP6P78QTT2yxF7C9cc3f97z2hEv9NxL/m/TMN5h5tTdta+8HGt/euObve16z/rv+d8r2T+z6D/R3wVPjnu+i72vP74lY/4HWNdA4zzqy/rv2kvl6eH5n+0f/75/22+qs/bhbBwJgBLf8Kaec0mJu7Y1r/r7ntf5x1P9J6r+RGDzzDWZe7U3b2vuBxrc3rvn7ntesP9s/Gepfv2+emvZ899p67XkvEes/0LoGGudZx0AenvdY/8T7/gfa1oHGebaxU7e/Z7nc8C8BMIJb+YMPPmgxt/bGNX/f8zrSfwA9822xgAFGtDdta+8HGt/euObve16z/pHtADyuATZ3i1HtTdva+4HGtzeu+fue18my/RXXs04e6LZee95LxPUPtK6BxnnWMZCH5z3WP/G+/4G2daBxnm3s1O3vWS43/EsAdOhWjvQfQIeuZquLxfpHtgNoFdqhb7D92f6R3APs0DJvdbGof3fXf6uFEeE3CIARBo3U7GprayUtLU30XzcOrD/bn/rn+8/fP/7+u7H/i9U6EwBjJc3nIIAAAggggAACDhEgADpkQ7AYCCCAAAIIIIBArAQIgLGS5nMQQAABBBBAAAGHCBAAHbIhWAwEEEAAAQQQQCBWAgTAWEnzOQgggAACCCCAgEMECIAO2RAsBgIIIIAAAgggECsBAmCspKP4Oe+8846ceuqp5okDTzzxhDQ1NUXx05w169zcXDnzzDO9P3vvvbd8//33zlrIKC/N6tWr5dJLLzXb//TTT5ft27dH+ROdNfuOHTtKly5dTA2ogxuH6upqOfbYY+WZZ55x1eqXl5fLb3/7W7PtTzvtNBkwYICr1r+oqEguueQS893X78CIESNctf66sjfeeKMcfPDBcsstt7hu3W1XmABoKxjn9ps3b5bjjz9eampqpKGhQS644AKZNWtWnJcqPh9fVVUlhx12mOsC0MUXXyxZWVkGfcuWLVJfXx+fDRCnT9UAqNvezUP37t3l9ttvd10A1L95Gn510P/4dOrUScrKylxTChs2bJBFixaZ9d24caMcddRRrvv7l5mZKT/++CMBMIyqJwCGgeakJhoA9X/++j9hDYHnnnuuFBQUOGkRY7YsQ4cONZ1gzD7QAR+Uk5Mjl112mQOWJH6L4PYAmJeXJzfffLMMGjTIdQHQt+r0Pz9aC6Wlpb6jXfX7GWecIbpX0G2DhkD2AIa+1QmAoZuF1GLatGly7bXXypFHHin6aKNAhyf12Yj6h2uvvfaS8847T+bOnRvSZ7z33ntywAEHyCGHHCLdunULqW20J47F+nvW4YYbbpBvv/3W89IR/0Z7/bWedL21xn7zm99I7969HbHenoWI9vrr5+hen7PPPlvOOeccGTJkiOejHfFvLNb/+uuvl5UrVzoyAMZi/fU/vxp89tlnnxbPXY53EcRi/T3ruGDBAtHD4E4aYrX+BMDwtjoBMDy3oFulp6dLjx495LvvvgsYAL/++mvZc8895bPPPpPly5fLQw89ZM5nKCkp8X6GnuOmX+zmP+vXr5etW7fKlVdeKfq/3x07dpjzQfRL55Qh2uvvWU99dmaHDh3MXlDPOCf8G+31HzlypBx66KHmf/362Cw9B27ChAlOWHWzDNFef/2QdevWmc/Sw2F6LuySJUtcs/6jRo2Sf/3rX2Z9nbgHMBbb37OxN23aZE6B0X+dMsRq/fXvv9b+zJkznbLqZjlitf4EwPA2OwEwPLewWgXaA6h7/B577DHv/BobG815HK+++qp3XFu/6Em/jz76qHeSN954Q15//XXvayf9Eo3196zf4MGD5a677vK8dOS/0Vh/Pd9T/wPgGXT7648Th2isf/P11DCkQciJQzTWPzU1VY455hhzBEHPfz3wwAOlZ8+eTlz9gP8Btv3713xFH3nkEdH/FDlxiMb21/XU//j94Q9/EP0b6OQhWuuv60wADG/LEwDDcwurVfMvQF1dney+++4tDgvfc889ood1ghlmz54tZ511lvcikKuvvlp0r4ATh2isv2c99RCongjs5CEa668XfOj21z3B+p8HdRg9erQjGaKx/nrif2VlpVlfvRBEDwXPmzfPNevvu6JO3APou3zR2P66t8+z/SsqKsxRkqVLl/p+rGN+j8b66x0f7rzzTklLS3PMera2INFYf89nEQA9EqH9SwAMzctq6uZfAD2Eq+OaX7X773//25wLGOyH6RWAJ598sjkE4OTbwERr/fUP/xFHHCEaqJ08RGv99TCL3v5FTxF4+umnHUsQjfVftWqVOf9LzwHT9e/Tp4+r1t93ZRMtAEbi75+eL62nyOj219ugfPTRR74kjvo9GvU/ffp02W233by3wVKLRAnAkdj+uoH1IrjDDz/cnAN69NFHt+hPHVUEDlsYAmAMN0g0/gDEcPGtP4r1978IKFJ/AK03TIxmwPZn+/teBEf9R2YHQIy+vtYf4/bvvzVgFGZAAIwCamuzbP4FiMQh4NY+y4njWX//AMD2tz8Fwol13toyUf/Uv28A5vvvru9/a38X4jmeABhD/eYdgH60ngT9+OOPe5dCz+PS3djBXgTibZgAv7D+/h0g25/65/vP3z/+/ruj/3NiF00AjPJW0RPT9U7t+qMBSB/bpr8XFhaaT9bbwOj9/z7//HP56aef5OGHHza3gXHSrQxsiFh/tj/1z/efv3/8/Xdj/2fTd8aiLQEwysp6dZIWfvOfe++91/vJ77//vnmah94PUPcIzJkzx/teov/C+rP9m9e+vqb++f57/rbx94+///o0q2Ts/zw17tR/CYBO3TIsFwIIIIAAAgggECUBAmCUYJktAggggAACCCDgVAECoFO3DMuFAAIIIIAAAghESYAAGCVYZosAAggggAACCDhVgADo1C3DciGAAAIIIIAAAlESIABGCZbZIoAAAggggAACThUgADp1y7BcCCCAAAIIIIBAlAQIgFGCZbYIIIAAAggggIBTBQiATt0yLBcCCCCAAAIIIBAlAQJglGCZLQIIJIZAx44d5d13302MhWUpEUAAgQgJEAAjBMlsEECgdQF99NsNN9zQ+gRxfGfz5s1SXV0dxyVo+6OdbNf2kvMuAgg4WYAA6OStw7IhkCQC8QgxO3fudLResMsXDztHw7FwCCAQEQECYEQYmQkCCLQl0F6IWbZsmVx11VWy3377yRFHHCF33323lJaWemc5btw4ufDCC+Wggw6SQw89VK655hopKCjwvr9mzRpJSUmRr7/+Wi6++GLZa6+9ZNCgQeL53DfffFP+53/+x7R99NFHxTd8NT8ErPMZOHCg3HjjjbLPPvvICSecID/88IP3s/QXfa3j9XMuvfRS+fzzz83nl5eX+03n+0Ln269fP7nuuutk3333lbS0NGloaJAHHnhAOnXqJHvvvbeceOKJ0qdPH28znUbb+f5kZmaa94uKiuS2224zJocccohcf/31og4MCCCAQDACBMBglJgGAQSsBDxBLNBMNDR16NBBunXrJitWrJDs7Gy54oorpGvXrt7Jv/nmG/n2228lPz9fFi1aZEJUly5dpLGx0UzjCYAapHS61atXy4YNG0wAPPDAA+Xvf/+7mffo0aNN+BowYIB33oEC4DHHHCNfffWV+bx//OMfsv/++8uWLVtMG533HnvsIf/6178kNzdXhg0bJkcffXRQAVDD7WeffSarVq2SwsJCE0RfeOEFmT9/vlnmIUOGmOUbPny4+ayqqiq5/fbbTTjeuHGj6E9dXZ1pd8opp5jwuHTpUvnpp5/kL3/5i5x00knmfe/K8QsCCCDQigABsBUYRiOAQOQE2gqAL730klx55ZV+H1ZcXGwC1cqVK/3Ge17o3kHdK6Z7DnXwBEDfvWc6Xj9XA57uafMMutfsjjvu8Lw07/teBKLzfe6557zvb9++3XyW7oXU4dlnn5XTTz/d+77+0qNHj6AC4FNPPeXXLtCLxx57TG655RbvW4HsvvzySxP2mpqavNNpMNQ9lhkZGd5x/IIAAgi0JkAAbE2G8QggEDGBQCHGM/Nbb73V7FHTw7++PxrE0tPTzWR5eXly5513ynHHHScHHHCAmU7fHzt2rHnfEwBnzJjhma35Vz/36quv9hune/R89y4G2gM4YsQIvza6F/GLL74w4/TQ8P333+/3vh4S1uVp7xCw7uFrPnzwwQdy9tlny+GHH27WS/cunnvuud7JAtnp3sfdd9/dz0vtdtttN3OY2duYXxBAAIFWBAiArcAwGgEEIicQKMR45q7n/t18883mcKse4vX90b1vOuihTd1LOGnSJHO4MycnxwSu77//3rzvCYB6eNh3CPS5Tz75pFxyySXeyQIFQM98PRPpuYd6TqEONgGw+Xz18LGe+/fhhx+aQ9+67g8//LCceeaZno/2nsfoHSFiDmmfd955flYet4qKCt9J+R0BBBAIKEAADMjCSAQQiKRAoCDmmX/37t1NwKuvr/eM8vu3rKzMhL2srCzv+OnTp8ctAOohYD3/0HfQQ8bB7AFsHgAff/xx+eMf/+g7K7nsssv8AuBDDz0k1157rd80eg6jXvixbds2v/G8QAABBIIVIAAGK8V0CCAQtoAGQL1aVvfQ+f7olazr1683F4HooeB58+aZq3vHjx8v9913nzl3Ty/0OOyww8yVwbqXa/LkyeYQqQYuT6CK5R5Az0Ug//nPf0TPUdQLNvSiEV2etva++S6vB7Jv376ih5d1fXVeGiT1te8ewN69e8uxxx5rLjjRcx/1Cma9b2Hnzp2NqQZjXSa9OviJJ54QPX+SAQEEEGhPgADYnhDvI4CAtYAGQA1AzX8efPBBM289x++mm26Sgw8+2FzIcPLJJ4teMOG5yGHixImiV73qbVfOOOMMmTp1qplXPAKgLnDz28D079/fLE9NTU2rVoECYG1trQm6eohZ1/2RRx6R1NRUvwCoN6rWq6L1SmSdh+c2MHpF8D333GPOHVSX448/XnRvIXsFW90EvIEAAj4CBEAfDH5FAAEEwhF4+eWXzV7AcNrSBgEEEIiHAAEwHup8JgIIJLSAXrShh6v1fn6DBw82N2PWW8EwIIAAAokiQABMlC3FciKAgGME9PD0kUceaQ5J67l4vXr1ktYuYnHMQrMgCCCAgI8AAdAHg18RQAABBBBAAAE3CBAA3bCVWUcEEEAAAQQQQMBHgADog8GvCCCAAAIIIICAGwQIgG7YyqwjAggggAACCCDgI0AA9MHgVwQQQAABBBBAwA0C/w+ELQeExqjNywAAAABJRU5ErkJggg==)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "tn1RV-jfOjt1"
-   },
-   "source": [
-    "# `benchmark`\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "rsmTl5zfwjM3"
-   },
-   "source": [
-    "You can try to speed your system by setting `benchmark=True`, which enables cudnn.benchmark. This flag is likely to increase the speed of your system if your input sizes don’t change. This flag makes cudnn auto-tuner look for the optimal set of algorithms for the given hardware configuration. This usually leads to faster runtime.\n",
-    "But if your input sizes changes at each iteration, then cudnn will benchmark every time a new size appears, possibly leading to worse runtime performances."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "dWr-OCBgQCeb"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(gpus=1, benchmark=True)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "qwAvSKYGa24K"
-   },
-   "source": [
-    "# `deterministic`\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "tl5mfmafwmat"
-   },
-   "source": [
-    "PyTorch does not guarantee reproducible results, even when using identical seeds. To guarentee reproducible results, you can remove most of the randomness from your process by setting the `deterministic` flag to True.\n",
-    "\n",
-    "Note that it might make your system slower."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Mhv5LZ3HbNCK"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(gpus=1, deterministic=True)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "u_5eJSvTf60f"
-   },
-   "source": [
-    "# Exploding and vanishing gradients"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "B6drjh4pq6Jv"
-   },
-   "source": [
-    "## track_grad_norm\n",
-    "\n",
-    "You can debug your grad norm to identify exploding or vanishing gradients using the `track_grad_norm` flag.\n",
-    "\n",
-    "Set value to 2 to track the 2-norm. or p to any p-norm."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "2taHUir8rflR"
-   },
-   "outputs": [],
-   "source": [
-    "# track the 2-norm\n",
-    "trainer = pl.Trainer(track_grad_norm=2)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "3vHKxmruk62f"
-   },
-   "source": [
-    "May be set to ‘inf’ infinity-norm."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "g7TbD6SxlAjP"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(track_grad_norm='inf')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "TcMlRe7ywpe6"
-   },
-   "source": [
-    "## Gradient clipping\n",
-    "\n",
-    "\n",
-    "Exploding gradients refer to the problem that the gradients get too large and overflow in training, making the model unstable. Gradient clipping will ‘clip’ the gradients or cap them to a Threshold value to prevent the gradients from getting too large. To avoid this, we can set `gradient_clip_val` (default is set to 0.0).\n",
-    "\n",
-    "[when to use it, what are relevant values]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "jF9JwmbOgOWF"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(gradient_clip_val=0.1)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ggb4MkkQrr1h"
-   },
-   "source": [
-    "# truncated_bptt_steps\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "s1Iu6PyAw9_r"
-   },
-   "source": [
-    "If you have a large recurrent model, you can use truncated_bptt_steps flag to split up the backprop over portions of the sequence. This flag will automatically truncate your batches and the trainer will apply Truncated Backprop to it.\n",
-    "\n",
-    "Make sure your batches have a sequence dimension.\n",
-    "\n",
-    "Lightning takes care of splitting your batch along the time-dimension.\n",
-    "```\n",
-    "# we use the second as the time dimension\n",
-    "# (batch, time, ...)\n",
-    "sub_batch = batch[0, 0:t, ...]\n",
-    "Using this feature requires updating your LightningModule’s pytorch_lightning.core.LightningModule.training_step() to include a hiddens arg with the hidden\n",
-    "\n",
-    "# Truncated back-propagation through time\n",
-    "def training_step(self, batch, batch_idx, hiddens):\n",
-    "    # hiddens are the hiddens from the previous truncated backprop step\n",
-    "    out, hiddens = self.lstm(data, hiddens)\n",
-    "\n",
-    "    return {\n",
-    "        \"loss\": ...,\n",
-    "        \"hiddens\": hiddens  # remember to detach() this\n",
-    "    }\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "WiTF1VMtruMU"
-   },
-   "outputs": [],
-   "source": [
-    "# backprop every 5 steps in a batch\n",
-    "trainer = pl.Trainer(truncated_bptt_steps=5)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "8XI_kEWkS-nT"
-   },
-   "source": [
-    "To modify how the batch is split, override pytorch_lightning.core.LightningModule.tbptt_split_batch():\n",
-    "\n",
-    "```\n",
-    "class LitMNIST(LightningModule):\n",
-    "    def tbptt_split_batch(self, batch, split_size):\n",
-    "        # do your own splitting on the batch\n",
-    "        return splits\n",
-    "```\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "oLbEmbmupwQ8"
-   },
-   "source": [
-    "# reload_dataloaders_every_epoch\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "CLdNGVv9xD_L"
-   },
-   "source": [
-    "Set to True to reload dataloaders every epoch (instead of loading just once in the beginning of training).\n",
-    "\n",
-    "```\n",
-    "# if False (default)\n",
-    "train_loader = model.train_dataloader()\n",
-    "for epoch in epochs:\n",
-    "    for batch in train_loader:\n",
-    "        ...\n",
-    "\n",
-    "# if True\n",
-    "for epoch in epochs:\n",
-    "    train_loader = model.train_dataloader()\n",
-    "    for batch in train_loader:\n",
-    "\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "10AXthXxp311"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(reload_dataloaders_every_epoch=True)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "f513EYl0bmmL"
-   },
-   "source": [
-    "# Callbacks\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "2pt7iGh4xNs5"
-   },
-   "source": [
-    "\n",
-    "Lightning Callbacks are self-contained programs that can be reused across projects.\n",
-    "Callbacks should capture NON-ESSENTIAL logic that is NOT required for your LightningModule to run. Lightning includes some a few built-in callbacks that can be used with flags like early stopping and Model Checkpointing, but you can also create your own callbacks to add any functionality to your models.\n",
-    "\n",
-    "The callback API includes hooks that allow you to add logic at every point of your training:\n",
-    "setup, teardown, on_epoch_start, on_epoch_end, on_batch_start, on_batch_end, on_init_start, on_keyboard_interrupt etc. \n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "1t84gvDNsUuh"
-   },
-   "source": [
-    "## callbacks\n",
-    "\n",
-    "Use **callbacks=** to pass a list of user defined callbacks. These callbacks DO NOT replace the built-in callbacks (loggers or EarlyStopping). \n",
-    "\n",
-    "In this example, we create a dummy callback that prints a message when training starts and ends, using on_train_start and on_train_end hooks."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "oIXZYabub3f0"
-   },
-   "outputs": [],
-   "source": [
-    "from pytorch_lightning.callbacks import Callback\n",
-    "\n",
-    "class PrintCallback(Callback):\n",
-    "    def on_train_start(self, trainer, pl_module):\n",
-    "        print(\"Training is started!\")\n",
-    "    def on_train_end(self, trainer, pl_module):\n",
-    "        print(\"Training is done.\")\n",
-    "\n",
-    "# a list of callbacks\n",
-    "callbacks = [PrintCallback()]\n",
-    "trainer = pl.Trainer(callbacks=callbacks)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "cNF74CLYfJJu"
-   },
-   "source": [
-    "# Model checkpointing\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "2blgquBrxLtS"
-   },
-   "source": [
-    "Checkpoints capture the exact value of all parameters used by a model.\n",
-    "\n",
-    "Checkpointing your training allows you to resume a training process in case it was interrupted, fine-tune a model or use a pre-trained model for inference without having to retrain the model.\n",
-    "\n",
-    "Lightning automates saving and loading checkpoints so you restore a training session, saving all the required parameters including: \n",
-    "* 16-bit scaling factor (apex)\n",
-    "* Current epoch\n",
-    "* Global step\n",
-    "* Model state_dict\n",
-    "* State of all optimizers\n",
-    "* State of all learningRate schedulers\n",
-    "* State of all callbacks\n",
-    "* The hyperparameters used for that model if passed in as hparams (Argparse.Namespace)\n",
-    "\n",
-    "By default Lightning will save a checkpoint in the working directory, which will be updated every epoch.\n",
-    "\n",
-    "### Automatic saving\n",
-    "By default Lightning will save a checkpoint in the end of the first epoch in the working directory, which will be updated every epoch."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "XGu0JULrg9l7"
-   },
-   "outputs": [],
-   "source": [
-    "# default used by the Trainer\n",
-    "trainer = pl.Trainer(default_root_dir=os.getcwd())\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "3s9OjkGuhq1W"
-   },
-   "source": [
-    "To change the checkpoint path pass in **default_root_dir=**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "DgdxkrIQhvfw"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(default_root_dir='/your/path/to/save/checkpoints')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Qyvj_bkWrJiE"
-   },
-   "source": [
-    "\n",
-    "You can also have Lightning update your checkpoint based on a specific metric that you are logging (using self.log), by passing the key to `monitor=`. For example, if we want to save checkpoint based on the validation loss, logged as `val_loss`, you can pass:\n",
-    "\n",
-    "\n",
-    "```\n",
-    "checkpoint_callback = ModelCheckpoint(\n",
-    "    filepath=os.getcwd(),\n",
-    "    save_top_k=1,\n",
-    "    verbose=True,\n",
-    "    monitor='val_loss',\n",
-    "    mode='min',\n",
-    "    prefix=''\n",
-    ")\n",
-    "```\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "YzYMivw1rO1O"
-   },
-   "outputs": [],
-   "source": [
-    "from pytorch_lightning.callbacks import ModelCheckpoint\n",
-    "\n",
-    "trainer = pl.Trainer(callbacks=[ModelCheckpoint(monitor='val_loss')])\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "5hYs_FV8iDMn"
-   },
-   "source": [
-    "You can modify the behavior of checkpointing by creating your own callback, and passing it to the trainer. \n",
-    "You can control\n",
-    "* filepath- where logs are saved\n",
-    "* save_top_k- save k top models\n",
-    "* verbose\n",
-    "* monitor- the metric to monitor\n",
-    "* mode\n",
-    "* prefix\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Tb1K2VYDiNTu"
-   },
-   "outputs": [],
-   "source": [
-    "from pytorch_lightning.callbacks import ModelCheckpoint\n",
-    "\n",
-    "# DEFAULTS used by the Trainer\n",
-    "checkpoint_callback = ModelCheckpoint(\n",
-    "    filepath=os.getcwd(),\n",
-    "    save_top_k=3,\n",
-    "    verbose=True,\n",
-    "    monitor='val_loss',\n",
-    "    mode='min',\n",
-    "    prefix='',\n",
-    ")\n",
-    "\n",
-    "trainer = Trainer(callbacks=[checkpoint_callback])\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "YKhZ6xRojJcl"
-   },
-   "source": [
-    "You can disable checkpointing it by passing\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Yt8zd2ZFjOXX"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = Trainer(checkpoint_callback=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "HcLy8asCjrj9"
-   },
-   "source": [
-    "### Manual saving\n",
-    "\n",
-    "You can manually save checkpoints and restore your model from the checkpointed state.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "kZSkMJf0jR4x"
-   },
-   "outputs": [],
-   "source": [
-    "trainer.fit(model)\n",
-    "trainer.save_checkpoint(\"example.ckpt\")\n",
-    "new_model = LitAutoEncoder.load_from_checkpoint(checkpoint_path=\"example.ckpt\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "X2d9cjVPj7CP"
-   },
-   "source": [
-    "### Checkpoint Loading\n",
-    "To load a model along with its weights, biases and module_arguments use following method:\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "BpAFfg5zkFmH"
-   },
-   "outputs": [],
-   "source": [
-    "model = LitAutoEncoder.load_from_checkpoint(PATH)\n",
-    "\n",
-    "print(model.learning_rate)\n",
-    "# prints the learning_rate you used in this checkpoint\n",
-    "\n",
-    "model.eval()\n",
-    "y_hat = model(x)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "jTQ3mxSJkhFN"
-   },
-   "source": [
-    "But if you don’t want to use the values saved in the checkpoint, pass in your own here"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "IoMcOh9-kfUP"
-   },
-   "outputs": [],
-   "source": [
-    "class LitAutoEncoder(LightningModule):\n",
-    "\n",
-    "    def __init__(self, in_dim, out_dim):\n",
-    "        super().__init__()\n",
-    "        self.save_hyperparameters()\n",
-    "        self.l1 = nn.Linear(self.hparams.in_dim, self.hparams.out_dim)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ITPVY8mNknut"
-   },
-   "source": [
-    "you can restore the model like this\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "H7XeRJzVkuY8"
-   },
-   "outputs": [],
-   "source": [
-    "# if you train and save the model like this it will use these values when loading\n",
-    "# the weights. But you can overwrite this\n",
-    "LitAutoEncoder(in_dim=32, out_dim=10)\n",
-    "\n",
-    "# uses in_dim=32, out_dim=10\n",
-    "model = LitAutoEncoder.load_from_checkpoint(PATH)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "14WwGpnVk0a4"
-   },
-   "outputs": [],
-   "source": [
-    "# uses in_dim=128, out_dim=10\n",
-    "model = LitAutoEncoder.load_from_checkpoint(PATH, in_dim=128, out_dim=10)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "bY5s6wP_k1CU"
-   },
-   "source": [
-    "\n",
-    "\n",
-    "## Restoring Training State (resume_from_checkpoint)\n",
-    "If your training was cut short for some reason, you can resume exactly from where you left off using the `resume_from_checkpoint` flag, which will automatically restore model, epoch, step, LR schedulers, apex, etc..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "9zfhHtyrk3rO"
-   },
-   "outputs": [],
-   "source": [
-    "model = LitAutoEncoder()\n",
-    "trainer = pl.Trainer(resume_from_checkpoint='some/path/to/my_checkpoint.ckpt')\n",
-    "\n",
-    "# automatically restores model, epoch, step, LR schedulers, apex, etc...\n",
-    "trainer.fit(model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "xkKdvALFsmT2"
-   },
-   "source": [
-    "## weights_save_path\n",
-    "You can specify a directory for saving weights file using `weights_save_path`.\n",
-    "\n",
-    "(If you are using a custom checkpoint callback, the checkpoint callback will override this flag)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "9OwHHFcCsrgT"
-   },
-   "outputs": [],
-   "source": [
-    "# save to your custom path\n",
-    "trainer = pl.Trainer(weights_save_path='my/path')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "PbNtlJ9Wsscf"
-   },
-   "outputs": [],
-   "source": [
-    "# if checkpoint callback used, then overrides the weights path\n",
-    "# **NOTE: this saves weights to some/path NOT my/path\n",
-    "checkpoint = ModelCheckpoint(filepath='some/path')\n",
-    "trainer = pl.Trainer(\n",
-    "    callbacks=[checkpoint],\n",
-    "    weights_save_path='my/path'\n",
-    ")\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "uDdxCuyHdWQt"
-   },
-   "source": [
-    "# Early stopping\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "fqAy3ihRxTfR"
-   },
-   "source": [
-    "The EarlyStopping callback can be used to monitor a validation metric and stop the training when no improvement is observed, to help you avoid overfitting.\n",
-    "\n",
-    "To enable Early Stopping you can init the EarlyStopping callback, and pass it to `callbacks=` trainer flag. The callback will look for a logged metric to early stop on.\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "lFx976CheH93"
-   },
-   "outputs": [],
-   "source": [
-    "from pytorch_lightning.callbacks.early_stopping import EarlyStopping\n",
-    "\n",
-    "trainer = pl.Trainer(callbacks=[EarlyStopping('val_loss')])\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "MwpJfTvjeOwF"
-   },
-   "source": [
-    "You can customize the callback using the following params:\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "V6I9h6HteK2U"
-   },
-   "outputs": [],
-   "source": [
-    "from pytorch_lightning.callbacks.early_stopping import EarlyStopping\n",
-    "\n",
-    "early_stop_callback = EarlyStopping(\n",
-    "   monitor='val_accuracy',\n",
-    "   min_delta=0.00,\n",
-    "   patience=3,\n",
-    "   verbose=False,\n",
-    "   mode='max'\n",
-    ")\n",
-    "trainer = pl.Trainer(callbacks=[early_stop_callback])\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "7TAIerPYe_Q1"
-   },
-   "source": [
-    "The EarlyStopping callback runs at the end of every validation check, which, under the default configuration, happens after every training epoch. However, the frequency of validation can be modified by setting various parameters on the Trainer, for example check_val_every_n_epoch and val_check_interval. It must be noted that the patience parameter counts the number of validation checks with no improvement, and not the number of training epochs. Therefore, with parameters check_val_every_n_epoch=10 and patience=3, the trainer will perform at least 40 training epochs before being stopped."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "VoKrX2ENh9Fg"
-   },
-   "source": [
-    "# Logging"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "-CQTPKd7iKLm"
-   },
-   "source": [
-    "Lightning has built in integration with various loggers such as TensorBoard, wandb, commet, etc.\n",
-    "\n",
-    "\n",
-    "You can pass any metrics you want to log during training to `self.log`, such as loss or accuracy. Similarly, pass in to self.log any metric you want to log during validation step.\n",
-    "\n",
-    "These values will be passed in to the logger of your choise. simply pass in any supported logger to logger trainer flag.\n",
-    "\n",
-    "\n",
-    "\n",
-    "Use the as`logger=` trainer flag to pass in a Logger, or iterable collection of Loggers, for experiment tracking.\n",
-    "\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "ty5VPS3AiS8L"
-   },
-   "outputs": [],
-   "source": [
-    "from pytorch_lightning.loggers import TensorBoardLogger\n",
-    "\n",
-    "# default logger used by trainer\n",
-    "logger = TensorBoardLogger(\n",
-    "    save_dir=os.getcwd(),\n",
-    "    version=1,\n",
-    "    name='lightning_logs'\n",
-    ")\n",
-    "trainer = pl.Trainer(logger=logger)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "jc5oWNpoiuuc"
-   },
-   "source": [
-    "Lightning supports the use of multiple loggers, just pass a list to the Trainer.\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "BlYwMRRyivp_"
-   },
-   "outputs": [],
-   "source": [
-    "from pytorch_lightning.loggers import TensorBoardLogger, TestTubeLogger\n",
-    "logger1 = TensorBoardLogger('tb_logs', name='my_model')\n",
-    "logger2 = TestTubeLogger('tb_logs', name='my_model')\n",
-    "trainer = pl.Trainer(logger=[logger1, logger2])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "a7EyspQPh7iQ"
-   },
-   "source": [
-    "## flush_logs_every_n_steps\n",
-    "\n",
-    "Use this flag to determine when logging to disc should happen."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Em_XvsmyiBbk"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(flush_logs_every_n_steps=100)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "_vDeKE98qsl1"
-   },
-   "source": [
-    "## log_every_n_steps\n",
-    "How often to add logging rows (does not write to disk)\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "HkqD7D_0w1Tt"
-   },
-   "outputs": [],
-   "source": [
-    "trainer = pl.Trainer(log_every_n_steps=1000)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "9uw0gfe422CT"
-   },
-   "source": [
-    "# info logging"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "dQXpt0aatDGo"
-   },
-   "source": [
-    "### default_root_dir\n",
-    "\n",
-    "---\n",
-    "\n",
-    "\n",
-    "\n",
-    "Default path for logs and weights when no logger or pytorch_lightning.callbacks.ModelCheckpoint callback passed. On certain clusters you might want to separate where logs and checkpoints are stored. If you don’t then use this argument for convenience. Paths can be local paths or remote paths such as s3://bucket/path or ‘hdfs://path/’. Credentials will need to be set up to use remote filepaths."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "CMmID2Bts5W3"
-   },
-   "source": [
-    "## weights_summary\n",
-    "Prints a summary of the weights when training begins. Default is set to `top`- print summary of top level modules.\n",
-    "\n",
-    "Options: ‘full’, ‘top’, None."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "KTl6EdwDs6j2"
-   },
-   "outputs": [],
-   "source": [
-    "\n",
-    "# print full summary of all modules and submodules\n",
-    "trainer = pl.Trainer(weights_summary='full')\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "R57cSLl9w9ma"
-   },
-   "outputs": [],
-   "source": [
-    "# don't print a summary\n",
-    "trainer = Trainer(weights_summary=None)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "bSc2hU5AotAP"
-   },
-   "source": [
-    "# progress bar"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "GgvbyDsBxcH6"
-   },
-   "source": [
-    "## process_position\n",
-    "\n",
-    "Orders the progress bar. Useful when running multiple trainers on the same node.\n",
-    "\n",
-    "(This argument is ignored if a custom callback is passed to callbacks)\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "6ekz8Es8owDn"
-   },
-   "outputs": [],
-   "source": [
-    "# default used by the Trainer\n",
-    "trainer = pl.Trainer(process_position=0)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "itivQFgEphBU"
-   },
-   "source": [
-    "## progress_bar_refresh_rate\n",
-    "\n",
-    "How often to refresh the progress bar (in steps). In notebooks, faster refresh rates (lower number) is known to crash them because of their screen refresh rates, so raise it to 50 or more."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "GKe6eVxmplL5"
-   },
-   "outputs": [],
-   "source": [
-    "# default used by the Trainer\n",
-    "trainer = pl.Trainer(progress_bar_refresh_rate=1)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "8rDHJOJbxNtf"
-   },
-   "outputs": [],
-   "source": [
-    "# disable progress bar\n",
-    "trainer = Trainer(progress_bar_refresh_rate=0)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "NCNvYLwjpWne"
-   },
-   "source": [
-    "# profiler"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "pRknrG_zpY6M"
-   },
-   "outputs": [],
-   "source": [
-    "# to profile standard training events\n",
-    "trainer = pl.Trainer(profiler=True)\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Ji6aWpU73kMM"
-   },
-   "source": [
-    "You can also use Lightning AdvancedProfiler if you want more detailed information about time spent in each function call recorded during a given action. The output is quite verbose and you should only use this if you want very detailed reports.\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "layG55pt316C"
-   },
-   "outputs": [],
-   "source": [
-    "from pytorch_lightning.profiler import AdvancedProfiler\n",
-    "\n",
-    "trainer = Trainer(profiler=AdvancedProfiler())\n",
-    "\n",
-    "trainer.fit(model, train_loader, val_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<code style=\"color:#792ee5;\">\n",
-    "    <h1> <strong> Congratulations - Time to Join the Community! </strong>  </h1>\n",
-    "</code>\n",
-    "\n",
-    "Congratulations on completing this notebook tutorial! If you enjoyed this and would like to join the Lightning movement, you can do so in the following ways!\n",
-    "\n",
-    "### Star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) on GitHub\n",
-    "The easiest way to help our community is just by starring the GitHub repos! This helps raise awareness of the cool tools we're building.\n",
-    "\n",
-    "* Please, star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning)\n",
-    "\n",
-    "### Join our [Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)!\n",
-    "The best way to keep up to date on the latest advancements is to join our community! Make sure to introduce yourself and share your interests in `#general` channel\n",
-    "\n",
-    "### Interested by SOTA AI models ! Check out [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-    "Bolts has a collection of state-of-the-art models, all implemented in [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) and can be easily integrated within your own projects.\n",
-    "\n",
-    "* Please, star [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-    "\n",
-    "### Contributions !\n",
-    "The best way to contribute to our community is to become a code contributor! At any time you can go to [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) or [Bolt](https://github.com/PyTorchLightning/lightning-bolts) GitHub Issues page and filter for \"good first issue\". \n",
-    "\n",
-    "* [Lightning good first issue](https://github.com/PyTorchLightning/pytorch-lightning/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-    "* [Bolt good first issue](https://github.com/PyTorchLightning/lightning-bolts/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-    "* You can also contribute your own notebooks with useful examples !\n",
-    "\n",
-    "### Great thanks from the entire Pytorch Lightning Team for your interest !\n",
-    "\n",
-    "<img src=\"https://github.com/PyTorchLightning/pytorch-lightning/blob/master/docs/source/_static/images/logo.png?raw=true\" width=\"800\" height=\"200\" />"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "collapsed_sections": [],
-   "name": "05-trainer-flags-overview.ipynb",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/06-mnist-tpu-training.ipynb b/notebooks/06-mnist-tpu-training.ipynb
deleted file mode 100644
index ba5ebc98134cc..0000000000000
--- a/notebooks/06-mnist-tpu-training.ipynb
+++ /dev/null
@@ -1,368 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "06-mnist-tpu-training.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "accelerator": "TPU"
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "WsWdLFMVKqbi"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/06-tpu-training.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "qXO1QLkbRXl0"
-      },
-      "source": [
-        "# TPU training with PyTorch Lightning ⚡\n",
-        "\n",
-        "In this notebook, we'll train a model on TPUs. Changing one line of code is all you need to that.\n",
-        "\n",
-        "The most up to documentation related to TPU training can be found [here](https://pytorch-lightning.readthedocs.io/en/latest/advanced/tpu.html).\n",
-        "\n",
-        "---\n",
-        "\n",
-        "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
-        "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
-        "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)\n",
-        "  - Ask a question on our [GitHub Discussions](https://github.com/PyTorchLightning/pytorch-lightning/discussions/)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "UmKX0Qa1RaLL"
-      },
-      "source": [
-        "### Setup\n",
-        "\n",
-        "Lightning is easy to install. Simply ```pip install pytorch-lightning```"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "vAWOr0FZRaIj"
-      },
-      "source": [
-        "! pip install pytorch-lightning -qU"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "zepCr1upT4Z3"
-      },
-      "source": [
-        "###  Install Colab TPU compatible PyTorch/TPU wheels and dependencies"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "AYGWh10lRaF1"
-      },
-      "source": [
-        "! pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8-cp37-cp37m-linux_x86_64.whl"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "SNHa7DpmRZ-C"
-      },
-      "source": [
-        "import torch\n",
-        "from torch import nn\n",
-        "import torch.nn.functional as F\n",
-        "from torch.utils.data import random_split, DataLoader\n",
-        "\n",
-        "# Note - you must have torchvision installed for this example\n",
-        "from torchvision.datasets import MNIST\n",
-        "from torchvision import transforms\n",
-        "\n",
-        "import pytorch_lightning as pl\n",
-        "from pytorch_lightning.metrics.functional import accuracy"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "rjo1dqzGUxt6"
-      },
-      "source": [
-        "### Defining The `MNISTDataModule`\n",
-        "\n",
-        "Below we define `MNISTDataModule`. You can learn more about datamodules in [docs](https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html) and [datamodule notebook](https://github.com/PyTorchLightning/pytorch-lightning/blob/master/notebooks/02-datamodules.ipynb)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "pkbrm3YgUxlE"
-      },
-      "source": [
-        "class MNISTDataModule(pl.LightningDataModule):\n",
-        "\n",
-        "    def __init__(self, data_dir: str = './'):\n",
-        "        super().__init__()\n",
-        "        self.data_dir = data_dir\n",
-        "        self.transform = transforms.Compose([\n",
-        "            transforms.ToTensor(),\n",
-        "            transforms.Normalize((0.1307,), (0.3081,))\n",
-        "        ])\n",
-        "\n",
-        "        # self.dims is returned when you call dm.size()\n",
-        "        # Setting default dims here because we know them.\n",
-        "        # Could optionally be assigned dynamically in dm.setup()\n",
-        "        self.dims = (1, 28, 28)\n",
-        "        self.num_classes = 10\n",
-        "\n",
-        "    def prepare_data(self):\n",
-        "        # download\n",
-        "        MNIST(self.data_dir, train=True, download=True)\n",
-        "        MNIST(self.data_dir, train=False, download=True)\n",
-        "\n",
-        "    def setup(self, stage=None):\n",
-        "\n",
-        "        # Assign train/val datasets for use in dataloaders\n",
-        "        if stage == 'fit' or stage is None:\n",
-        "            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)\n",
-        "            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])\n",
-        "\n",
-        "        # Assign test dataset for use in dataloader(s)\n",
-        "        if stage == 'test' or stage is None:\n",
-        "            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)\n",
-        "\n",
-        "    def train_dataloader(self):\n",
-        "        return DataLoader(self.mnist_train, batch_size=32)\n",
-        "\n",
-        "    def val_dataloader(self):\n",
-        "        return DataLoader(self.mnist_val, batch_size=32)\n",
-        "\n",
-        "    def test_dataloader(self):\n",
-        "        return DataLoader(self.mnist_test, batch_size=32)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "nr9AqDWxUxdK"
-      },
-      "source": [
-        "### Defining the `LitModel`\n",
-        "\n",
-        "Below, we define the model `LitMNIST`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "YKt0KZkOUxVY"
-      },
-      "source": [
-        "class LitModel(pl.LightningModule):\n",
-        "    \n",
-        "    def __init__(self, channels, width, height, num_classes, hidden_size=64, learning_rate=2e-4):\n",
-        "\n",
-        "        super().__init__()\n",
-        "\n",
-        "        self.save_hyperparameters()\n",
-        "\n",
-        "        self.model = nn.Sequential(\n",
-        "            nn.Flatten(),\n",
-        "            nn.Linear(channels * width * height, hidden_size),\n",
-        "            nn.ReLU(),\n",
-        "            nn.Dropout(0.1),\n",
-        "            nn.Linear(hidden_size, hidden_size),\n",
-        "            nn.ReLU(),\n",
-        "            nn.Dropout(0.1),\n",
-        "            nn.Linear(hidden_size, num_classes)\n",
-        "        )\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        x = self.model(x)\n",
-        "        return F.log_softmax(x, dim=1)\n",
-        "\n",
-        "    def training_step(self, batch, batch_idx):\n",
-        "        x, y = batch\n",
-        "        logits = self(x)\n",
-        "        loss = F.nll_loss(logits, y)\n",
-        "        self.log('train_loss', loss, prog_bar=False)\n",
-        "        return loss\n",
-        "\n",
-        "    def validation_step(self, batch, batch_idx):\n",
-        "        x, y = batch\n",
-        "        logits = self(x)\n",
-        "        loss = F.nll_loss(logits, y)\n",
-        "        preds = torch.argmax(logits, dim=1)\n",
-        "        acc = accuracy(preds, y)\n",
-        "        self.log('val_loss', loss, prog_bar=True)\n",
-        "        self.log('val_acc', acc, prog_bar=True)\n",
-        "        return loss\n",
-        "\n",
-        "    def configure_optimizers(self):\n",
-        "        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)\n",
-        "        return optimizer"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Uxl88z06cHyV"
-      },
-      "source": [
-        "### TPU Training\n",
-        "\n",
-        "Lightning supports training on a single TPU core or 8 TPU cores.\n",
-        "\n",
-        "The Trainer parameters `tpu_cores` defines how many TPU cores to train on (1 or 8) / Single TPU core to train on [1].\n",
-        "\n",
-        "For Single TPU training, Just pass the TPU core ID [1-8] in a list. Setting `tpu_cores=[5]` will train on TPU core ID 5."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "UZ647Xg2gYng"
-      },
-      "source": [
-        "Train on TPU core ID 5 with `tpu_cores=[5]`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "bzhJ8g_vUxN2"
-      },
-      "source": [
-        "# Init DataModule\n",
-        "dm = MNISTDataModule()\n",
-        "# Init model from datamodule's attributes\n",
-        "model = LitModel(*dm.size(), dm.num_classes)\n",
-        "# Init trainer\n",
-        "trainer = pl.Trainer(max_epochs=3, progress_bar_refresh_rate=20, tpu_cores=[5])\n",
-        "# Train\n",
-        "trainer.fit(model, dm)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "slMq_0XBglzC"
-      },
-      "source": [
-        "Train on single TPU core with `tpu_cores=1`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "31N5Scf2RZ61"
-      },
-      "source": [
-        "# Init DataModule\n",
-        "dm = MNISTDataModule()\n",
-        "# Init model from datamodule's attributes\n",
-        "model = LitModel(*dm.size(), dm.num_classes)\n",
-        "# Init trainer\n",
-        "trainer = pl.Trainer(max_epochs=3, progress_bar_refresh_rate=20, tpu_cores=1)\n",
-        "# Train\n",
-        "trainer.fit(model, dm)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_v8xcU5Sf_Cv"
-      },
-      "source": [
-        "Train on 8 TPU cores with `tpu_cores=8`. You might have to restart the notebook to run it on 8 TPU cores after training on single TPU core."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "EFEw7YpLf-gE"
-      },
-      "source": [
-        "# Init DataModule\n",
-        "dm = MNISTDataModule()\n",
-        "# Init model from datamodule's attributes\n",
-        "model = LitModel(*dm.size(), dm.num_classes)\n",
-        "# Init trainer\n",
-        "trainer = pl.Trainer(max_epochs=3, progress_bar_refresh_rate=20, tpu_cores=8)\n",
-        "# Train\n",
-        "trainer.fit(model, dm)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "m2mhgEgpRZ1g"
-      },
-      "source": [
-        "<code style=\"color:#792ee5;\">\n",
-        "    <h1> <strong> Congratulations - Time to Join the Community! </strong>  </h1>\n",
-        "</code>\n",
-        "\n",
-        "Congratulations on completing this notebook tutorial! If you enjoyed this and would like to join the Lightning movement, you can do so in the following ways!\n",
-        "\n",
-        "### Star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) on GitHub\n",
-        "The easiest way to help our community is just by starring the GitHub repos! This helps raise awareness of the cool tools we're building.\n",
-        "\n",
-        "* Please, star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning)\n",
-        "\n",
-        "### Join our [Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)!\n",
-        "The best way to keep up to date on the latest advancements is to join our community! Make sure to introduce yourself and share your interests in `#general` channel\n",
-        "\n",
-        "### Interested by SOTA AI models ! Check out [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-        "Bolts has a collection of state-of-the-art models, all implemented in [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) and can be easily integrated within your own projects.\n",
-        "\n",
-        "* Please, star [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-        "\n",
-        "### Contributions !\n",
-        "The best way to contribute to our community is to become a code contributor! At any time you can go to [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) or [Bolt](https://github.com/PyTorchLightning/lightning-bolts) GitHub Issues page and filter for \"good first issue\". \n",
-        "\n",
-        "* [Lightning good first issue](https://github.com/PyTorchLightning/pytorch-lightning/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-        "* [Bolt good first issue](https://github.com/PyTorchLightning/lightning-bolts/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-        "* You can also contribute your own notebooks with useful examples !\n",
-        "\n",
-        "### Great thanks from the entire Pytorch Lightning Team for your interest !\n",
-        "\n",
-        "<img src=\"https://github.com/PyTorchLightning/pytorch-lightning/blob/master/docs/source/_static/images/logo.png?raw=true\" width=\"800\" height=\"200\" />"
-      ]
-    }
-  ]
-}
diff --git a/notebooks/07-cifar10-baseline.ipynb b/notebooks/07-cifar10-baseline.ipynb
deleted file mode 100644
index c96f473c4bacf..0000000000000
--- a/notebooks/07-cifar10-baseline.ipynb
+++ /dev/null
@@ -1,394 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "name": "07-cifar10-baseline.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "qMDj0BYNECU8"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/06-cifar10-pytorch-lightning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ECu0zDh8UXU8"
-      },
-      "source": [
-        "# PyTorch Lightning CIFAR10 ~94% Baseline Tutorial ⚡\n",
-        "\n",
-        "Train a Resnet to 94% accuracy on Cifar10!\n",
-        "\n",
-        "Main takeaways:\n",
-        "1. Experiment with different Learning Rate schedules and frequencies in the configure_optimizers method in pl.LightningModule\n",
-        "2. Use an existing Resnet architecture with modifications directly with Lightning\n",
-        "\n",
-        "---\n",
-        "\n",
-        "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
-        "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
-        "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HYpMlx7apuHq"
-      },
-      "source": [
-        "### Setup\n",
-        "Lightning is easy to install. Simply `pip install pytorch-lightning`.\n",
-        "Also check out [bolts](https://github.com/PyTorchLightning/lightning-bolts/) for pre-existing data modules and models."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ziAQCrE-TYWG"
-      },
-      "source": [
-        "! pip install pytorch-lightning lightning-bolts -qU"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "L-W_Gq2FORoU"
-      },
-      "source": [
-        "# Run this if you intend to use TPUs\n",
-        "# !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py\n",
-        "# !python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "wjov-2N_TgeS"
-      },
-      "source": [
-        "import torch\n",
-        "import torch.nn as nn\n",
-        "import torch.nn.functional as F\n",
-        "from torch.optim.lr_scheduler import OneCycleLR\n",
-        "from torch.optim.swa_utils import AveragedModel, update_bn\n",
-        "import torchvision\n",
-        "\n",
-        "import pytorch_lightning as pl\n",
-        "from pytorch_lightning.callbacks import LearningRateMonitor\n",
-        "from pytorch_lightning.metrics.functional import accuracy\n",
-        "from pl_bolts.datamodules import CIFAR10DataModule\n",
-        "from pl_bolts.transforms.dataset_normalizations import cifar10_normalization"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "54JMU1N-0y0g"
-      },
-      "source": [
-        "pl.seed_everything(7);"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "FA90qwFcqIXR"
-      },
-      "source": [
-        "### CIFAR10 Data Module\n",
-        "\n",
-        "Import the existing data module from `bolts` and modify the train and test transforms."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "S9e-W8CSa8nH"
-      },
-      "source": [
-        "batch_size = 32\n",
-        "\n",
-        "train_transforms = torchvision.transforms.Compose([\n",
-        "    torchvision.transforms.RandomCrop(32, padding=4),\n",
-        "    torchvision.transforms.RandomHorizontalFlip(),\n",
-        "    torchvision.transforms.ToTensor(),\n",
-        "    cifar10_normalization(),\n",
-        "])\n",
-        "\n",
-        "test_transforms = torchvision.transforms.Compose([\n",
-        "    torchvision.transforms.ToTensor(),\n",
-        "    cifar10_normalization(),\n",
-        "])\n",
-        "\n",
-        "cifar10_dm = CIFAR10DataModule(\n",
-        "    batch_size=batch_size,\n",
-        "    train_transforms=train_transforms,\n",
-        "    test_transforms=test_transforms,\n",
-        "    val_transforms=test_transforms,\n",
-        ")"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "SfCsutp3qUMc"
-      },
-      "source": [
-        "### Resnet\n",
-        "Modify the pre-existing Resnet architecture from TorchVision. The pre-existing architecture is based on ImageNet images (224x224) as input. So we need to modify it for CIFAR10 images (32x32)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "GNSeJgwvhHp-"
-      },
-      "source": [
-        "def create_model():\n",
-        "    model = torchvision.models.resnet18(pretrained=False, num_classes=10)\n",
-        "    model.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
-        "    model.maxpool = nn.Identity()\n",
-        "    return model"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HUCj5TKsqty1"
-      },
-      "source": [
-        "### Lightning Module\n",
-        "Check out the [`configure_optimizers`](https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#configure-optimizers) method to use custom Learning Rate schedulers. The OneCycleLR with SGD will get you to around 92-93% accuracy in 20-30 epochs and 93-94% accuracy in 40-50 epochs. Feel free to experiment with different LR schedules from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "03OMrBa5iGtT"
-      },
-      "source": [
-        "class LitResnet(pl.LightningModule):\n",
-        "    def __init__(self, lr=0.05):\n",
-        "        super().__init__()\n",
-        "\n",
-        "        self.save_hyperparameters()\n",
-        "        self.model = create_model()\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        out = self.model(x)\n",
-        "        return F.log_softmax(out, dim=1)\n",
-        "\n",
-        "    def training_step(self, batch, batch_idx):\n",
-        "        x, y = batch\n",
-        "        logits = F.log_softmax(self.model(x), dim=1)\n",
-        "        loss = F.nll_loss(logits, y)\n",
-        "        self.log('train_loss', loss)\n",
-        "        return loss\n",
-        "\n",
-        "    def evaluate(self, batch, stage=None):\n",
-        "        x, y = batch\n",
-        "        logits = self(x)\n",
-        "        loss = F.nll_loss(logits, y)\n",
-        "        preds = torch.argmax(logits, dim=1)\n",
-        "        acc = accuracy(preds, y)\n",
-        "\n",
-        "        if stage:\n",
-        "            self.log(f'{stage}_loss', loss, prog_bar=True)\n",
-        "            self.log(f'{stage}_acc', acc, prog_bar=True)\n",
-        "\n",
-        "    def validation_step(self, batch, batch_idx):\n",
-        "        self.evaluate(batch, 'val')\n",
-        "\n",
-        "    def test_step(self, batch, batch_idx):\n",
-        "        self.evaluate(batch, 'test')\n",
-        "\n",
-        "    def configure_optimizers(self):\n",
-        "        optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4)\n",
-        "        steps_per_epoch = 45000 // batch_size\n",
-        "        scheduler_dict = {\n",
-        "            'scheduler': OneCycleLR(optimizer, 0.1, epochs=self.trainer.max_epochs, steps_per_epoch=steps_per_epoch),\n",
-        "            'interval': 'step',\n",
-        "        }\n",
-        "        return {'optimizer': optimizer, 'lr_scheduler': scheduler_dict}"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "3FFPgpAFi9KU"
-      },
-      "source": [
-        "model = LitResnet(lr=0.05)\n",
-        "model.datamodule = cifar10_dm\n",
-        "\n",
-        "trainer = pl.Trainer(\n",
-        "    progress_bar_refresh_rate=20,\n",
-        "    max_epochs=40,\n",
-        "    gpus=1,\n",
-        "    logger=pl.loggers.TensorBoardLogger('lightning_logs/', name='resnet'),\n",
-        "    callbacks=[LearningRateMonitor(logging_interval='step')],\n",
-        ")\n",
-        "\n",
-        "trainer.fit(model, cifar10_dm)\n",
-        "trainer.test(model, datamodule=cifar10_dm);"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "lWL_WpeVIXWQ"
-      },
-      "source": [
-        "### Bonus: Use [Stochastic Weight Averaging](https://arxiv.org/abs/1803.05407) to get a boost on performance\n",
-        "\n",
-        "Use SWA from torch.optim to get a quick performance boost. Also shows a couple of cool features from Lightning:\n",
-        "- Use `training_epoch_end` to run code after the end of every epoch\n",
-        "- Use a pretrained model directly with this wrapper for SWA"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "bsSwqKv0t9uY"
-      },
-      "source": [
-        "class SWAResnet(LitResnet):\n",
-        "    def __init__(self, trained_model, lr=0.01):\n",
-        "        super().__init__()\n",
-        "\n",
-        "        self.save_hyperparameters('lr')\n",
-        "        self.model = trained_model\n",
-        "        self.swa_model = AveragedModel(self.model)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        out = self.swa_model(x)\n",
-        "        return F.log_softmax(out, dim=1)\n",
-        "\n",
-        "    def training_epoch_end(self, training_step_outputs):\n",
-        "        self.swa_model.update_parameters(self.model)\n",
-        "\n",
-        "    def validation_step(self, batch, batch_idx, stage=None):\n",
-        "        x, y = batch\n",
-        "        logits = F.log_softmax(self.model(x), dim=1)\n",
-        "        loss = F.nll_loss(logits, y)\n",
-        "        preds = torch.argmax(logits, dim=1)\n",
-        "        acc = accuracy(preds, y)\n",
-        "\n",
-        "        self.log(f'val_loss', loss, prog_bar=True)\n",
-        "        self.log(f'val_acc', acc, prog_bar=True)\n",
-        "\n",
-        "    def configure_optimizers(self):\n",
-        "        optimizer = torch.optim.SGD(self.model.parameters(), lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4)\n",
-        "        return optimizer\n",
-        "\n",
-        "    def on_train_end(self):\n",
-        "        update_bn(self.datamodule.train_dataloader(), self.swa_model, device=self.device)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "cA6ZG7C74rjL"
-      },
-      "source": [
-        "swa_model = SWAResnet(model.model, lr=0.01)\n",
-        "swa_model.datamodule = cifar10_dm\n",
-        "\n",
-        "swa_trainer = pl.Trainer(\n",
-        "    progress_bar_refresh_rate=20,\n",
-        "    max_epochs=20,\n",
-        "    gpus=1,\n",
-        "    logger=pl.loggers.TensorBoardLogger('lightning_logs/', name='swa_resnet'),\n",
-        ")\n",
-        "\n",
-        "swa_trainer.fit(swa_model, cifar10_dm)\n",
-        "swa_trainer.test(swa_model, datamodule=cifar10_dm);"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "RRHMfGiDpZ2M"
-      },
-      "source": [
-        "# Start tensorboard.\n",
-        "%reload_ext tensorboard\n",
-        "%tensorboard --logdir lightning_logs/"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RltpFGS-s0M1"
-      },
-      "source": [
-        "<code style=\"color:#792ee5;\">\n",
-        "    <h1> <strong> Congratulations - Time to Join the Community! </strong>  </h1>\n",
-        "</code>\n",
-        "\n",
-        "Congratulations on completing this notebook tutorial! If you enjoyed this and would like to join the Lightning movement, you can do so in the following ways!\n",
-        "\n",
-        "### Star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) on GitHub\n",
-        "The easiest way to help our community is just by starring the GitHub repos! This helps raise awareness of the cool tools we're building.\n",
-        "\n",
-        "* Please, star [Lightning](https://github.com/PyTorchLightning/pytorch-lightning)\n",
-        "\n",
-        "### Join our [Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-pw5v393p-qRaDgEk24~EjiZNBpSQFgQ)!\n",
-        "The best way to keep up to date on the latest advancements is to join our community! Make sure to introduce yourself and share your interests in `#general` channel\n",
-        "\n",
-        "### Interested by SOTA AI models ! Check out [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-        "Bolts has a collection of state-of-the-art models, all implemented in [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) and can be easily integrated within your own projects.\n",
-        "\n",
-        "* Please, star [Bolt](https://github.com/PyTorchLightning/lightning-bolts)\n",
-        "\n",
-        "### Contributions !\n",
-        "The best way to contribute to our community is to become a code contributor! At any time you can go to [Lightning](https://github.com/PyTorchLightning/pytorch-lightning) or [Bolt](https://github.com/PyTorchLightning/lightning-bolts) GitHub Issues page and filter for \"good first issue\". \n",
-        "\n",
-        "* [Lightning good first issue](https://github.com/PyTorchLightning/pytorch-lightning/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-        "* [Bolt good first issue](https://github.com/PyTorchLightning/lightning-bolts/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)\n",
-        "* You can also contribute your own notebooks with useful examples !\n",
-        "\n",
-        "### Great thanks from the entire Pytorch Lightning Team for your interest !\n",
-        "\n",
-        "<img src=\"https://github.com/PyTorchLightning/pytorch-lightning/blob/master/docs/source/_static/images/logo.png?raw=true\" width=\"800\" height=\"200\" />"
-      ]
-    }
-  ]
-}
diff --git a/notebooks/08-Domain-specific-demos.ipynb b/notebooks/08-Domain-specific-demos.ipynb
deleted file mode 100644
index a7b6a0dcc903a..0000000000000
--- a/notebooks/08-Domain-specific-demos.ipynb
+++ /dev/null
@@ -1,7415 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "GatZ6ZiXFzVh"
-   },
-   "source": [
-    "![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAgAAAADwCAYAAAB2ddzKAAAgAElEQVR4Ae2dB3hbRdaGj5p7jUt6J4UQSAgQaoBQQ0LosHQIhN6XXXb52aX33svCwhZYOoSls5AQSAi9JZSQ3nvc4m7J//ONfG1Z1i2SJVu695s8jqR75055Z6Q5M3PmHFdzc3OzMJAACZAACZAACTiKgNtRtWVlSYAESIAESIAEFAEKAOwIJEACJEACJOBAAhQAHNjorDIJkAAJkAAJUABgHyABEiABEiABBxKgAODARmeVSYAESIAESIACAPsACZAACZAACTiQAAUABzY6q0wCJEACJEACFADYB0iABEiABEjAgQQoADiw0VllEiABEiABEqAAwD5AAiRAAiRAAg4kQAHAgY3OKpMACZAACZAABQD2ARIgARIgARJwIAEKAA5sdFaZBEiABEiABCgAsA+QAAmQAAmQgAMJUABwYKOzyiRAAiRAAiRAAYB9gARIgARIgAQcSIACgAMbnVUmARIgARIgAS8RkAAJRE9g2riHpLHeL7XbGiXgD4jL1awSaQ6+RJ+gQ59wuYIVb252idvjlswcn/jSPfLMt5c4lAirTQJdR4ACQNexZk42IoDBv6kxIBIy4oe8tVFNE1sVMNOEALBUTBObJVMnARJoIUABgF2BBGIggJk/BqxAAFP+5lA5IIbUnP1IUHACS5HGuiZprG9ZFnA2FtaeBBJOgAJAwhEzAzsSwLJ/MHDNP37tGxQCIFAxkAAJJJ4ABYDEM2YONiTAPf/ENKrGNTGpM1USIIFQAjwFEEqD70kgCgLc848ClsWoZGoRFKORQBwIUACIA0Qm4TwCqT5QuVo075pbKtKqiJcETZnqbJMAIYtAApYIUACwhImRSMA+BDDoNzX5Ba9ud/AnIKjMaJ86siYkQALmBKgDYM6IMUjANgQw6Hu8bklP80hGVpqkZ/hkW0Wd1G6rt00dWRESIAFrBCgAWOPEWCSQ0gSw5O8PBKSxsUl69SmUIaN6Sr+hRVLcJ0++nbVUfv1mjdTXNEhTU0Dcbh7DS+nGZuFJwCIBCgAWQTEaCaQygUAgIF6fW7LysmTgiBIZO2GQDBhRIkW982TdsjJZOn+DNNQ1qm0BEZdAYND0A1K53iw7CZCAPgEKAPpseIcEUppAULHPJRj8Gxv9klecJcPH9pYx+w6WXQ4YKlm5GdIcaBZvmkcZM0rpyrLwJEACUROgEmDUyPgACSQ/AU2rH4O/x+eWoj65MmR0TxkzYbCM2LmvlPYrUAM/9v9rttVLfW2jsmqoLf9z9p/8bcwSkkBnCXAFoLME+TwJJCWB4BI+9vwz87Jk+Jg+MmbCINnlwKHSozRH3B6XVG6pkdWLt8jmdZVSVVkrXq9XnQrg4J+UDcpCkUDcCVAAiDtSJkgC3UsAs3+/PyBur0t6DiiQgSNL1LL/iJ37SGFJjqRn+rDNLxWbq2XVb5vVK5T/PJ5mcbnc8GxAa7zd24TMnQS6hAAFgC7BzExIoGsIaPv+Tf4mSU/3yaDte8q4/bDnP0RK+uarQmCGj/P/5ZtrZOXCTVJVVhvUAaDyf9c0EnMhgSQhQAEgSRqCxSCBzhDQ9vwx84crnd4DC6X/8GIZt/8QGblLX8ntkSVur1vN7KH41+T3q5n/umXlAs+GHo8HiwKc/XemEfgsCaQYAQoAKdZgLC4JhBPQBn+s6ys/em5pHfzH7jtY+m7XQ2n746bL7RJ/U0Aa6pqkfFONbFhZLnXVQQFA7QvQEV84Xn4mAdsSoABg26ZlxZxAQFvy9/v9yrxv70GY+ZfILhOHyA57DJD84iyBbX38aYJCTVW9bFlbKVs3VMm2qjqRQLN4PMH1fyoAOqHXsI4kECRAAYA9gQRSlIA2oKs9fWXe1y39hwWX/XfYfYAMGtVTGfMJHdQxzFdX1snaZWWyZUOV1FbXS1qaV3w+T0vcFIXBYpMACURNgAJA1Mj4AAkkC4E2Iz89B+SrAX/cfkMEy/6Y+WNmDyFBWfVT6/+iVvmh9Lfi101SvrE6zNpfUAsgWWrHcpAACSSWAAWAxPJl6iSQMALw4OfxeSQzL10GjiwVDP4jd+2r9vyVMgCW/aHaF1zdV9sAAX+zVG2tldWLtkjl1holHATvc/BPWEMxYRJIUgIUAJK0YVgsEohEQNvzbzXvW5Irw8f2lbH7DpJdJg6V3B6ZSuEPs378aYM/3uMZf2NAKrfWyrrlZVJdUS9er0fcLXb/Q7cKIuXNayRAAvYiQAHAXu3J2tiYgLbnr5n3zSvJkiGjeynHPsN37iMl/fLE7Qke9VMYWmb+GpKmBr8681+2cZvgD+Z/vV53UFDQIvGVBEjAMQQoADimqVnRVCagzfwxS2817zu2j4ydEHTsU1CarY74oY6aPX/M/nHmHwErAHXVDbJxdYVsXl8plRU1EmiE9b+gIyDO/lO5d7DsJBAbAQoAsXHjUyTQZQS0mb8/EFA2/DXzvhj8h4/rI4Wl2cq8L4Z6La629I9CwrQvdAFqtjXI2iVbZfPaKmms96soXi+OCHL/v8sakxmRQBIRoACQRI3BopCAPgGXNDU1SXqar0Xbv8W8b7/8lj38oJEf9XzY0r+WJs7/w/nPlnVVEvBj9u8OSgwwEsBAAiTgOAIUABzX5KxwKhHAjN7IvK8axEMq1LoCgJm/WhIQkYBLmf6F619Y/oP2P7YJsC3Apf8QeHxLAg4jQAHAYQ3O6qYOgeBgHm7ed6ja99fM+2KQD93z12qnTeqV9n9zQC35byurk42rKtQxQDgDUs8pK4FcAdC48ZUEnESAAoCTWpt1TRkCGLgx8/f7m6RXi2MfHPMLNe+LOK17/R2W/YMufbH731jfpJb9N62tkIqyGmmobwou/6tVAg7+KdMpWFASiDMBCgBxBsrkSCAeBLA07/a4xJPmlf7Di5RXvx326C+DRpW2Ldurs/46ubXM7CEjwPHPpjUV6m9bZZ00NvglLd2n9gi4BaDDj5dJwAEEKAA4oJFZxdQhoJbsAwFpagqoc/0DhhXLzvsNUX/KsQ+O9bWY9w3d7zeqIc77r19RLhtXV6rB3ygu75EACTiHAAUA57Q1a5oSBIIDPGb/pX3zZcw+g2XEuL7Sb1iRwPQvNPuUeV+zurRsCTQHRGqrG2X98nLZtKZSuQIObhtw6d8MIe+TgN0JUACwewuzfilBADN/LMf7/c3KOl9WdpoMGFYi4yYOkaI+ueqemvEH/2s77x+hdkEFwKCYgJWEmso6Wb+iTOkBKNe/7hBrgRGe5yUSIAFnEHA7o5qsJQmkBgGY+XV73ZKdm64M/JQOyJfsvPR2hbey9B+qEwjzwJk5aZKR5VPn/tv2/bkK0A4sP5CAwwhQAHBYg7O6yUsAM/dAQJTyX2ZumqRn+cQDW/1u2PTF1n+Lgx+TKkBAgPU/BGwl5BRkKOXBPkN7qM+wKKgFK8KEFpevJEAC9iLALQB7tSdrk8IEMBjjbD5c9tbVNKo/aPB7fR41cKshHYJA6PRep74QFppdzUp4gAAAZ0HYDsARwHVLtypnQIjTakNAJx1eJgESsC8BCgD2bVvWLAUJYED2NwVd9laV1SohIC3DK+len3LsE5zXYyXApHLKRAAiNUtOQabyGYBtACz5fet2yca1FeIWt6Sl4SeAWwEmNHmbBGxJgAKALZuVlUo1Atq+PAQAePCrr22QVYs2y+fvLJSRu/aVoWN6K+X94LiP0wAhRoAMKqtUAV3NyvBPj565ssOeA9RwjxMFa5eVyeY1lWqbwYNtBs18sEF6vEUCJGAfAhQA7NOWrIkNCGh2AODyFwJAoKlZsAKAY4C+NK94fW5l41/z8NdqCdCo7s3BEwa5hZmSX5SlthSQz9czF8vGNeXiCREtsLKgmRE2SpL3SIAEUp8ABYDUb0PWwFYEmpWTHq/Xq9z3rlqyWb6bvUwJAtuP7ycDRpao5X91bBBufs1WArSFguBugDpOWNQ7V8ZMGKTeQ0kQHgKxEuD1epROAIUAW3UoVoYEdAlQANBFwxsk0PUEMPvG4O7xuKShtlG2VdbKr9+slpqKerUSgBl8Vl6Geo/SKUU/s2WAFn0AdTKgWdQqQEFxtjQ1+pUwAaVD6Bs0+5sFhoMgAFAI6Pq2Z44k0NUEKAB0NXHmRwImBKAPEBQC3JLu9sm28jpZ+ssGyfwwTWq3NchO+w6SftsVqUEahwNVfAgBwW183dS1kwFQAoCeAZwMYXsBIS3dI0sWbJCtG7aJr2UlQDch3iABErAFAQoAtmhGVsJuBDQhwIuVgPomqalpkEXfr5PGOr9k5aWLL80jBaU5kpEZ/AqrlQAr2wHQB2jR+s/rkSnZ+elSV92o8DXU+5WvgIaaJgn4A0oIsRtX1ocESKCNAAWANhZ8RwJJRUCdDAiI0uD3eNxSvrlaFs1fKx6fW6or6mTXg7eTngMK1ECttvg1PwFGKwEt2wGoqFIkdLmUgmFGtk/VPS3Dp7YcsCWQ5vO2rDJQMTCpOgYLQwJxIkABIE4gmQwJJIKAphOAPXm48a3cWitLF6xXWWXnZ6hrPfsXSFqGR11Tg7qFlYCWyEp4yMnPUCcDRu7aT7CP0FDXqE4gVJfXKaNENBaUiJZlmiTQ/QQoAHR/G7AEJGBIACsBMNXj9brV8f/N66ukoaFJ/I1w9FMve08dKb70bKXBj5iWdAKUop9LXMrBYLPSARi8Q0/JzAn6HfCle2X+vBXSWN/UYizIsIi8SQIkkIIEKACkYKOxyA4kAAnAhQG7WWnrV1fUy/JfN4ovw6Mc/QzdqZdayofvAAQVXf1nwqrleKDb7ZaMLLcU98mV7XfDSgBWHJpk7dIyKduwTSXIlQATlrxNAilGgAJAijUYi+tcAkonwCXi83kElvzWry5XCoK1lQ0CnwE9BxYIPP8FnQcFVw2ClgCNmeHEAQLShxfCYTv3VkIFLn/rWSqb11YoR0Qej1fFMU6Nd0mABFKFAAWAVGkplpMEQABL9i0kvB631FU3yMrFmyR7brp4fB7Zbkwv5flPjektXgGVEGBEL0RpUB0/FJcUlubIqD0GKAdC9TWNsmFluToiCGVErBYoYcQoTd4jARJIegIUAJK+iVhAEmhPQO3xt6wEQDFw/apy5UCoamutkhB6Dy4Q7OHDiyDiBuOHjPLtkwt+ajkdAKXD5uaAaGaDlV+C6kb5zr9MNqyuaLFP0LZiECkpXiMBEkgNAhQAUqOdWEoSaEdA2esPiLhdLqWkV1vdICsWbZLvZi9VwgAcCCmzwe6g58DgaYJ2SXT80GpLqGWAl2Yp6pUjo/ceIJvWVcqS+euV0mAgEOj4LK+QAAmkHAEKACnXZCwwCQQJaDN76ARAB2BbZZ388vVqgYIgVgDyi7MlOy89aO2vxclPy3a/PkJtoQD7DDAbXJwlsBHw0+erJCPDp44dwoQwFAKxXYAyMJAACaQmAQoAqdluLDUJKAJqAA64xON2S3q6T6rK62TJz+sluyBDmhr8suPeA6Xv0B7Bgdp09NegQgoIDuwut1ttJWA7AX8Y/IODviYpaM/wlQRIINUIUABItRZjeUkgjEDrSgCUAuFAqKpOVv62SfIKMmXI6J4hsTGoRzdw+xv9ylQw7AFg6T+YV0iSfEsCJJCyBCgApGzTseAk0J6A2ucXrAYo9X91VDA4W28fz+gT0oDiHwKOE9ZU1cv6FeWyZX2VVFfVqeOAOAWAEG3a6iH+RwIkkDQEKAAkTVOwICQQHwLYm4d538LSbEnTnAVhQDdZAFDb+S17+v6mQNAQ0LIy+enzlbJueZlSLvR4PEGLgy3x4lNipkICJNAdBCgAdAd15kkCCSQABb3S/vnKHgAEARWCk/rg1r7OLoCa0Tdj5h+0Ali+qVoW/7BO5v73V7UCgJm/ZjQogcVn0iRAAl1EgAJAF4FmNiSQSAIYvPEHU8BpGV4p7p0nvQYVKot+yLdV/y/C4N9u5u8PSMO2RlmzdKv88uVqWTBvpWxcXaFOGQQ1/xNZC6ZNAiTQlQQoAHQlbeZFAgkggFk5TANDSS89wyc5uRlS1CtXSvvlS0aWr0XvL8LI31KW4My/We35B5oCUrm1Rpb8sE5mvTxfLf3XbKtXpwwgXGiCRgKqwSRJgAS6mAAFgC4GzuxIIBEEMDA3+QNSkJumfALg/H56plcN6mqAh/pemAwQHMyDpYEAUdei8PfjnBXy8xerZPPaSjXzh/lf7cw/t/4T0XpMkwS6hwAFgO7hzlxJIC4EMDBrg7Pf71fn/3sPLpS8HlnKGBA0+jHQw2JgeMBgjvt4Hkp/2PNftmCDzHt7oSz7eaPU1zUqAcLnbTMpHJ4GP5MACaQuAQoAqdt2LDkJdCBQUJwlA0YUS25hRlDhT1P+Cxn/1cDfMpXH4F9TVSeb1lTKNzOXyk8te/7+Jr8EZ/7B435B34IdsuMFEiCBFCZAASCFG49FJwEQwAwfAzkGbHjx6z+8SHIKMlvhdPAGGJQAlN4AZv4Y/Jf8sF6++3iZLPxmjTr+B+nB4/WoNDS7AK0J8g0JkIAtCFAAsEUzshJOJKAt/UMAUNr/6RlS1DNXeg/qoXwAKCYtRoFgALDdzN/tUnv+WPb/ZuYSNfivWbxFmhqb1Dl/pTmoXA9rSwhOJMw6k4C9CVAAsHf7snY2J4BB3e8PSHqmT/IKs6RHaY5aBfClB2fvqvotg39QAgjGh7Y/LPxhz/+neavUzF/Z+YdOgBdKf20WAW2OkNUjAccSoADg2KZnxe1AAHvz0P7PyEmTXoMKJL8kW9kB0FYHNNV/rBIgwMgPzvnjqB+0/T97e6Fswjn/Bsz83eL2wsxvszTT468dugfrQAKGBCgAGOLhTRJIXgIY5OG0D4M73P723a5ICkqy1XZAUPsfC/ltS/jKvG99kzLyg3P+OOq3/OeN0tTkV5WEnmBw5p+8dWbJSIAE4keAAkD8WDIlEugWAhi4cwszZeDIYsEpgLYxH0cAUSRX0LxvfZNUbKpWFv5g5Cd4zr9R3B63eNVRPy77d0sDMlMS6CYCFAC6CTyzJYHOEoDlP5jnhfU/zPz7DClS5/9b08XgjzP+4eZ9P1uhLPw11DWpc/5tJgLaVgta0+AbEiAB2xKgAGDbpmXF7EpA29+H9T5o/2dkpEmPklzpM7iH5PbIDCrwB4ILATgE0Gbed73MenlBm3nf1pk/9vw5+Nu1v7BeJKBHgAKAHhleJ4EkJ4CZfUa2T4r75klhz2zl+Ae2AAL+4GAO3YDqynql7d/evG9ji5EfV9C2f9ueQZLXmMUjARKIJwEKAPGkybRIoIsIqON/gYD4Mr1S2j9PevTMUbb/sZ8PwQBbAzTv20WNwWxIIEUJUABI0YZjsUkABLJz06Xv0CLp0Ss3qMLvEjX411TVKyU/mvdlPyEBEtAjQAFAjwyvk0CSE/C4XZKTlyF9h/SQwp45rcZ7YNAHGv4075vkDcjikUA3E6AA0M0NwOxJIFoCUP6D5n5mVprkF2VLaf98yS/KUsf5airrpWzTNuXYB7b91yzeTPO+0QJmfBJwCAEKAA5paFYz9QkEtf9FAgG/eH0eyc7LkMKSbGX6F6aA62sblYb/sp82yIJ5K1vM+zYFXf7SvG/qdwDWgATiTIACQJyBMjkSSDQBaPm7M1xSWJKjlP9gBRDue8s31sqPny6Xee8ulI2t5n1dNO+b6AZh+iSQogQoAKRow7HYDiYA03+ClYCA1FQ1yIZVFdJY3yRrl5bJL1+tbjHvC2P+zbABSPO+Du4qrDoJGBGgAGBEh/dIIOkINCstf6wCVGypkZW/bZLsT9OlbGO1LP5hvWxZVyn1dTjn76F536RrOxaIBJKLAAWA5GoPloYEdAloHv1wxh/va7Y1qD3/xsYmZfBn8+oqaahrFOUkqGWVIMQxgG66vEECJOBMAhQAnNnurHUKEwgKAKIG+w2r62Xtyq3idrmUdT+3S3PsQ/O+KdzELDoJdAkBCgBdgpmZ2I2AcpvbTebzgx7+gkQx8MPVH2b9+IegrRSkKvM250SpWgOWmwRSgwAFgNRoJ5YyCQl0pxAAHFgJwODvVQUJDvwc/JOwo7BIJJCkBCgAJGnDsFjJTaC5Wdtk76ZlADXTh+O/ZtFWBFJ98EeLt3FN7vZn6UjADgQoANihFVmHLicApzsYeQM4bdeN3vSCg3/3CSHxBe9qWdXQhKv4ps7USIAE2hOgANCeBz+RgCUCmTk+aWoMSGNdkxICXK62mbilBBiplUBwKyU4+PsyvOL1uVvv8Q0JkEDiCPCblji2TNnGBHzpnuBAFaKxFvLWxjWPb9XaMXO5FFOwZSABEkg8AVezHTYOE8+JOZAACZAACZCArQhwBcBWzcnKkAAJkAAJkIA1AhQArHFiLBIgARIgARKwFQEKALZqTlaGBEiABEiABKwRoABgjRNjkQAJkAAJkICtCFAAsFVzsjIkQAIkQAIkYI0ABQBrnBiLBEiABEiABGxFgAKArZqTlSEBEiABEiABawQoAFjjxFgkQAIkQAIkYCsCFABs1ZysDAmQAAmQAAlYI0ABwBonxiIBEiABEiABWxGgAGCr5mRlSIAESIAESMAaAQoA1jgxFgmQAAmQAAnYigAFAFs1JytDAiRAAiRAAtYIUACwxomxSIAESIAESMBWBCgA2Ko5WRkSIAESIAESsEaAAoA1ToxFAiRAAiRAArYiQAHAVs3JypAACZAACZCANQIUAKxxYiwSIAESIAESsBUBCgC2ak5WhgRIgARIgASsEaAAYI0TY5EACZAACZCArQhQALBVc7IyJEACJEACJGCNAAUAa5wYiwRIgARIgARsRYACgK2ak5UhARIgARIgAWsEKABY48RYJEACJEACJGArAhQAbNWcrAwJkAAJkAAJWCNAAcAaJ8YiARIgARIgAVsRoABgq+ZkZUiABEiABEjAGgEKANY4MRYJkAAJkAAJ2IoABQBbNScrQwIkQAIkQALWCFAAsMaJsUiABEiABEjAVgQoANiqOVkZEiABEiABErBGgAKANU6MRQIkQAIkQAK2IkABwFbNycqQAAmQAAmQgDUCFACscWIsEiABEiABErAVAQoAtmpOVoYESIAESIAErBGgAGCNE2ORAAmQAAmQgK0IUACwVXOyMiRAAiRAAiRgjQAFAGucGIsESIAESIAEbEWAAoCtmpOVIQESIAESIAFrBCgAWOPEWCRAAiRAAiRgKwIUAGzVnKwMCZAACZAACVgjQAHAGifGIgESIAESIAFbEaAAYKvmZGVIgARIgARIwBoBCgDWODEWCZAACZAACdiKAAUAWzUnK0MCJEACJEAC1ghQALDGibFIgARIgARIwFYEKADYqjlZGRIgARIgARKwRoACgDVOjEUCJEACJEACtiJAAcBWzcnKkAAJkAAJkIA1AhQArHFiLBIgARIgARKwFQEKALZqTlaGBEiABEiABKwRoABgjRNjkQAJkAAJkICtCFAAsFVzsjIkQAIkQAIkYI0ABQBrnBiLBEiABEiABGxFgAKArZqTlSEBEiABEiABawQoAFjjxFgkQAIkQAIkYCsCFABs1ZysDAmQAAmQAAlYI0ABwBonxiIBEiABEiABWxGgAGCr5mRlSIAESIAESMAaAQoA1jgxFgmQAAmQAAnYigAFAFs1JytDAiRAAiRAAtYIUACwxomxSIAESIAESMBWBCgA2Ko5WRkSIAESIAESsEaAAoA1ToxFAiRAAiRAArYi4LVVbVgZEiCBLiOwZOlS+e77H6WyolIGDx4ke+wxXjIzMrosf2aUnATYL5KzXSKVKmECQHNzsyxdtkyamyNla+1aVlamFBYUSGZmprUHuiHW2nXrpKamthtyFvH5vDJwwIBuydsumZaVlcuWrVsjVqeoRw8pLCyIeK+7L9bW1cmaNWt1izF40EDxeDy69zt746FHHpOHHnm8XTL9+/eTxx66X4YPH9buerJ9MGKH35q+fXonW5FTpjyp3C9SBnIcC+pqxkidgPDKazPk//5yXdxSnrDPXrLP3nvJxP33k0EDk2PQW7FypRw8aWrc6hhtQvjB/ej9t6N9jPFbCKDr7zXhAF0B4MorLpPzzjkrKXld9edrZMZ/39It27w5swQCTCLCu+9/IJdd8ceISQ8aNFDemvGKpKWlRbyfDBevvOpqefOtdyIWZe+99pRnnmov2ESMyIsdCKR6v+hQIQdcSJgOwJYtkWdVsTL9dM5nctsdd8shh00V/PgtW7Y81qTi9lxVZVXc0mJCXU8gEGjWHfy7vjTR5bhp85boHohj7DcMBI/ly1fIz7/8Gsfc4p/Upk2b458oU5RU7xdObMKECQCJhImZz6FTjpRXX5+RyGyYNgmQQAQCixYviXC17dKKlavaPvCdYwiwX6ReUydMB6ArUFx9zXWycOEiufpPfxCXy9UVWTIPEnA8gQH9+8uqVat1OfQsLdW9xxvtCfw4f4EsWry4/cWWTznZOXLoIQdFvJeMF9kvkrFVjMuU0gIAqvaPfz0rpaUlMv2sM41ryrskQAJxIXDQAfvL3M/mRUwLegc7jh4V8R4vdiTw6ON/k5mzZne8ISLQp0glAYD9ImIzJvXFlNwCCCd65933yWfzPg+/zM8kQAIJIHDi706Qgw86IGLK99x1m2RnZ0e8x4sdCTQ1+TteTNEr7Bep13DdtgJwztnTZOyYnToQa2hokFWrV8vyFStl0aLFMn/BTx3iRLpw8613yJszXhWPp+tkmpKSErn4wvMiFSfitZraWnn6mX9FvIeLvXr1lOOOOUr3fviN3Nzc8Ev8TAIJJ4Dv2MMP3Cv/+3CmfP3Nt+L1eiU/L08On3KY9O3bJ+H5M4PkJMB+kZztYlSqbhMAMPjrzSJCC/zDj/PlxptvMxUEFi9ZKu+9/4FMmTwp9PGEvu/Zs1QuvfhCy3lUVFYaCgDDthsaVXqWM2ZEEogzAejcHHLwgeovzkkzuRQmwH6RWo3XdS8aivQAABxgSURBVNPlGLmM2WlHefmFZy0NjK/NeCPGXPgYCZAACZAACTiLQNILAGgOt9stF5x3jowbN9awdWArAFa+GNoTgMGbpqam9hdT+JPf7xe/P5DCNWDRSYAESKD7CXTbFkC0Vcf+0m033aDO/xs9O3/+Ahm/265SW1sr7773gVFUOfDAiWrv0jBS2E0cf/rq62/CrrZ9nLDP3lJSUtx2oYvfbdq8Wb744iv55NM5gnO569dvaGfsBtYDcUxrj913E1g922nH0eLz+aIqJdIF50gBCmChmstffPm1vPPue8osNM6HNzY0Sp8+veX6a6+RHUfvECmJ1msQXKAHMvezz2XuvM9l5apVAiMu1dXVKg7yAuvtR4yQ8bvtomzRDx0ypPX5RLz59rvv5e133hPUBeWB4Rtovo8Zs6MMHzZMhg/bTnbccQdbm2he8NPP8ttvi3TxHjF1itIL0I0QdgPfVbTv7E/myOrVa2TN2rUCI1vFJcUyZNAgQZ9FnwrvLxs3bpI5cz8LS63t40EHHSB5cdCTQTvPeONNWfjbIlm5arUqI3IZvcP2sv3IkYKtuxEjhqvvUlvuHd+hP78+47/tbqxdq2/OGQxee739qiYmQ4dNOkTS09PbpRPNdxIPwoT5a6//Vxb+9puqD3SuELYfOUJGjhwh2w0dIiOHDzeddIUWItZ+8cabb4tfZ4Ky2667qPbX8oHwP2v2JzJr1mxZtXqNLF+xQv3GoY+MHjVKhg3bTrbbbojsuss4KS4q0h6L+TXZ+2bMFWt5MGUEAJQXDkfQMbHfrxfwo6CCyyV/vuZavWjq+l3eW+XIqVMM44TffPHlV+VvTz0dfrn1M3QColEMbH2wk29glvjBhx/TNXGqJQ8BBn9Q3nr40SfU4HXJxRco5UOr5lu//PIrueHm27Qk271qR5c2b9kif7r6L4JVmfAA2/sbNm6UHUVfAIAAc9e9D8jChb+FP976GYIA/jAIwwwpwr4T9pELLawWtSZi8Y1ReVAfHOUKPc515eWXyvSzz0yoPX6LRY97tFkfz+7gByA0k0MOPkhycsx/WrBa99Tf/yGwHx8pgKvW/vjOQW8I36999t5TRYevEaPv+Ptjx3RKAIC10Tvuvrddu4aWE8It/rQw9fDJcu01f5b8/HztUrvXQCBgWN52kUWU4B6pfuB20u+ObxfdyncSD6xZu07uuvteeUdncoTfBfxp4aADJsqNN/zV0mAaa7/445/+T8uuw+uD99/dKgC8/Mpr8tCjj6sBPzyi9rum/Q5gcnDzDdfGrBOW7H0zvP6xfk6JLYDQym2//cjQjx3el5WXq2vwSnb0UUd0uB964ePZn4R+tPRe62B6kQ8+cKLerYRcx1L4vfc/pHwS6Nk3N8oYP7LX33iLHDTpcFNFS6N0Qu/hy3PeBZdEHPxD40V6v3VrmUybfr5MP++i1h//SPH0rmGgPvHUM+Tuex8QzBbiEZ75x7+jLs899z8oJ516pjrNEo8y2C0NDOyHTj5Cd/CPVN/vf/hRzjrnfHn8b08JZtOJDJ9/8ZUcc8LJuoN/pLzx/Zs05SiZMzeyjYRIz8RyLRBjv/7p51/kuBNO1h38I5Xlw5mz1G/LB//7KNLtLrmG7/GNt9wu11x7Q8TBP1IhMDG44g9/kst//0eBw69oQrL3zWjqYhY35QQAM0dAFRWVrXWeOmVy6/tIbz6e/ak0NjZGuhXxGmbZkDT1Ama/WArsqoDlqUuvuFL9IHY2T2wVHHvCyfL+Bx92Nim5+577YxImsAx5/Emn6hqZiaZgmDFecPFlgllXZ8Ijj/1Nbrvz7piSwIB19HEnSkVFRUzP2/Uh+Ao48dQzLf+Yh3OAwAuhNVHh8y++lNOnTW/daoomHwjUEFKMtgmjSS9ecX9duFD1RZQv2oDB9OLLft9ttlauvuZaefa556MttoqPlY4LLrnMssCY7H0zJggGD6WcAFBWbvxj6na3mQTGPreRURJ07O++/8EAT/tbcyIsZ4fGOPaoI0M/JvQ9pOLzLrxUncWOZ0aXXH5lp9LEHv2/Y/iybtiwUf1AGQlY0dYTAt5TT/8j2sda4y/46Sd54KFHWj/H8gZ9LNxtbizp2OUZ9I9TTj8rpsE1lMHzL74sb73zXuiluLyvrKyUq67+S6fTwow1XitQnS0MynHNX2/obDICWytdXad33/3A0OullUp9++338t4H/zONmux907QCMURIOQFg0SJ95SPUvzTEDjkMlJxw3DGGWD75dK7h/dCbMz+ObLJTizNp0sHa24S/YlDBTCURAT+AWO2IJWDAizbgRwXLdbE8a5YXtgKguBdLiMdqCPL917P/kd8WRbb3Hku5UvmZ62+6JW7t/NLLr8YdBQyPYTWsswHLyK++lhzOyiBUWzWoZlRv6F69/OrrRlHifs9sy9VqhvAki61Jo5DsfdOo7LHeM9fUiTXlBDyHo2wLfvrFMOXSMA38yYcdKs/889+6z3zw4Ufyh99fpntfu7FtW7XhnjY0lAcOGKBFT+grJFrYEDcL0IyFEg9sKfToUahOBWha7EbPYiD+/R/+LK+8+FzCnCxlZWa2FuGpp//ZTvGo9UbYG9Rnj/G7qfpkZGbIihWrZOasjwV7m0bhokuukPfenqGrnGX0bPg9WGuEtnFamk8N6kYKqaHP3nv/g/L4Iw+GXnLce6zIwHqglYCVu912HacUf1esWKmU7RIhIFotCxQQi4t6yNJlyy0PprfdeY9MnTpFoI+EACM50848rV2WOKmkJ3CAwQnHd5zADB8+rF0asX6Apny/vn1k5erVyqmaFb533HWvUhjG5Kq7An4HRm0/Um3v/brwN8NtWa2MYPzqq6/LqaecpF1q95qqfbNdJWL40H2tGENhn/z7M6azh/BjYDjmhg6jt7QMDXLMds0G78+/NJ5tH3Xk1BhqFNsjjz7xpOmDhx16iNx+202tPz54YI/dx8tpp5yk/s4+90JDlpgx4PidpnFtmqFOhLOmnS6TJx0qgwcNktzcHKVzsWz5CnVcDo/U1NRY0mGYdsZpctUfruigVX/h+efICy+9IjfcdKtOCYLa1JjNn3D8sbpxzG7sPn5Xue3mG6Vfv77tomLZ8Jprrxf8gBiFUG1xo3h2vvfsf16wVL277rhVpk45TNn/0B6A0t9Hsz6WCy++XLuU8FcIe3ffcas6VhyaWX19vdrWMToNhPgYUFeuWNmqF4QjfFdf9YfQpGTJkmW6AgCOuIbHb/dwjB9uufE6OfaYo9rxxSoc6nPfAw8bpoo64Zim2e+lYSIx3sTv1+233ih9evdulwJW16686mpTpeH5P/3c7rnQD6nWN0PL3pn3KbMFgEY265w4IhhuixxS99FHGp8GMNvbB2CzH/hDDjqwM+1g+VkoqUDT3SicO/0sue+eO9oN/qHxx+08Vt6a8Urr8ZrQe6Hvn3jyqdCPUb3H2fgXnvun/PmPV6rz0Rj8EWBzAGfltYBz0WYzjxuu+4ty+ezxeLTHWl9x7ZSTfif333Nn67VIbzrjLOrC88+Vf/z9yQ6DP/LBj/QTjz4kZ55+aqRsW6+hjjga6dSA8/1m/RYz3pee/7c6movBMjTge4zVrDdff1n5zAi9l4j3E/bZS95649UOgz/ywhl8rBrCH4JZgL2IZAn4Tr7/9hty/HHHtBv8UT58j2Bs7aknzHVeVq1a0+VVOu/cs+WZp57oMPijIPg9efXF59TZf6OCLV68JOLtVOubESsR48X237IYE0n0YziCcsa0c0yzmXxYZD8AUw471PBZs719aJJ/9NEs3TQwO4RfgK4IMEJjFLDaccVlF3f4goc/A0Hp6j+1n42Ex8GsFeeGYwkw9ANBwyy8YrJPipWM8DPPkdLEVs8xR+srYc7+dG5M1gNxquOSi843dDKFwenii843VDhFmVcm0WAQiWEir83+xHiFBHk/cO+dER2EhZYL7dEVWyk333idqQ0B+EKAoGAUYKgmWcK1f71abakYlQd2NPBnFGLVDzJK0+geJnaXX3KR4XcQNkywQmgUsH0TKaRa34xUh1ivJaUAUFdXL0uWLpVZH38ip087Rx1BMTu+gtkDZoKRAgwI7TBq+0i31DUYqzGahWLWbZT/EVMP10073jfMZrLnnzu9wzK5XhkO2H8/5XNc7z6ufx/FKQktHSzVhVoD1K6Hv1ZWVZnu3597zlnhj+l+NhIU0L44ChVtgGGXSCsP4enA4ty506eFX273GVbknBrg1Mso4PsJK5pWAvZ/sRqQqHD5pRdJ7169LCWPWbNRwBZjMgTs9086xJqS8tnTTjcsMnQyujL81eJ3EHoaRubi8RsQ6Xc8lfpmvLl3mw7ARZdeEXHwqaur090TM6r8BedNl8LCAt0oxx59pOFg8/mXX8mBE/eP+LzZ0mUif4xCC2Q2YGKJ74jDjW0fhKaHZVZYLfzDVfqWuL786uuorWlZ8fKIcuCcvFHATMRIcAt/FoqYMG9c3mIMKvQ+th4gJEYTEB+mSK0GM+WsjRs3Wk3KdvG+MTmJcdEF50alcArdDxipSUSI5vuM2alRgMXLZAioE1aqrATYMzEKXd2Px++2m1Fx2t0bMWyYQElaL8DQGH4nQ0Mq9c3QcsfjfbcJACh8vKRjLMOdPe0MQx4wT4qzuXoBe/x6AsBHMz/We0wOmLifoeCh+2AMN8wkb2xFhNsIN8vGbIBbaHLsMlL6Awf0j3S5w7Uli/VNOiPyxP337fCM0QUINM889bhRlKjujRgRnbZ1vz59okrfKZGhYKanhKsx2H+/6Np69OgdDJV7tXRjee1vsf8i7YKCAiVYGq0gxlKGeD9jNqiH5ldaUhL6sVvfY8sHfmCshnAlXbPnUq1vmtUn2vvWyUabchfFhwR+3z13mi7TlpaWGO7XffjhzIhW4+BbwOgMrZm1wXhiCLVyGCndvn3ba6hHihN+zezLDok52tC/Xz9Lj2hmm/Uih2v76sVL1PWhgwdHlXRRceedj0SVYYpE3mZiGwLa9rEcKzOzChoLHpRFO7Zn9fnudP5ltYwDohBqsOUVPku2mk+845mtsITnh+PO0YRU6pvR1Mtq3JQWADD7fuE//zJV1tFgGO3VY28Ie/3hwWzPff/9JoQ/krDPZiZl+/S2tm8ZWkB82fGjpxdwzC3aYNWpkFl9evfWL1e0ZYolfk5O8OSC1WetLrFaTc8u8bZVbTOsyqCBxkvOeg9bFTT1no90PSfKbSKk4Y1wOiVS2t15LVqhpjvLGpp3qL2Q0Ot6711ibZtDez6V+qZW5ni+pqQAgL1ZaLA/+tD9lgd/QIPSm1H4dE5Hq4BQRNQLUyZPinpfWS8tK9erthn/kJbEuHQH97x6AUubiXK8YiYAFBd3n1tlPR68Hj2B6poaw4d6xyC4IsGuOnljWHjeTGkCTu+bKSUAYDkI528/nfU/gWGY8LPCZj0RZ9ExaOuFD8OO+jU0NLS6mY30zOFTDot0OWHXfD5jlY26emNTl3oFq6+r17ulridqZmu2UmBWLsNC82bSEDBr520mgq1eRaqrjQULved4nQQ0Ak7vm8YjikYpAa9Q3BtosPSHhZyCgnzpUVgomAnuPHaMYB+/swF79npn6bHXjyVvbU/PyIY8ViH23jPol7yzZbL6fH5enmHUdetis2EOL3x6IZF7gYWFxvt16zds6GDYSa+cvJ68BDQjUHoljPV45OrVzj1WqceS16Mj4PS+2W0CwIknHC9Wj4tF16TGsWHaFoO3ntbu3Hmfy1FHBM/1f2xgvGTypEMkIyPdOLM4383PzzdMcd266I32gIMeC2RWHOZbwbAAUd7MMxFo9GykR5kNo3czAbN9dTjOiSUkk5W9WMrPZ7qfgNP7ZkptAcSju2DJ58ipU3STmjWrzeOfkeMSo60E3cQ7ecNMwzWWH0QzS389Q7wrdrL4HR4vMdGaN1qZ6JBYywV4/ILjpkh/jY2Neo/xegIJ4GgqhG6jEG1bV1VtM7TrYZQX75GARsDpfdNxAgAa3mjvHu4nsfe/bNly3bPL+DHbfbx14xRaZ+vs66CBgwx/SOd+Nk+3zHp5v/Hft/RuqevYeklUwFluo/Dsc8+rtjCKE3oP3iL3mnCAjBu/V8S/GW+8GRqd77uQAGxUGIWnnv6H0e0O95573ppjoQ4P8gIJhBFwct90pACw89ixhudcsfc/57N5Yd2k7ePxxx5tanegLXb83sEgxp67jzdM8Jl/PWt4P/QmLObBw6JR2GXczka3O3VvhIlbUxzNfPPtdyzn8fkXXxluZ4wcOcJyWowYXwIw02oUXnr5VYFTFisBFjGfePJpK1FTOg6M1DAknoCT+6YjBQAMpPCIpRc++XSuod/ySYdas6mtl35nru+91x6Gj2PWDONFVsKz/3nRNNqYnUabxok1AmwQmPkMePTxJwW+IcwCfiyNvBdi1WbU9vr+IMzS5/3OEdhzj91NE7j+pluVe2ijiNjGueW2Ow0FPaPnU+kerCdSCEh8izm5bzpSAECXMvIQ+PyLL8vnX3wZsefBaI6ZxBjxwThdnGqgv6BlceqZZ4uZx65//vs5efDhR7VHIr5OO/M0yczMjHgvXhdPPfkkw6TwI3jK6dPU6QyjiHfec5/Ae6FegNJmNCZF9dLh9dgIjNlpR4FZV6MAnxu/O+UMWbNmbcRomzZvltOmTRe4kLZLyM83Ptkz+xNj19924dCd9XBy3+y2UwDd2eDIGz9GsCuweElHe/RGWvFHHTE1avsD8awrvM6dd85Zhkug8LFw1LEnKveqe+25RzszqzDt+8BDjwiEHLNw+qmnmEXp9H3sv8GJj5G5Zdw74ujj5Y7bb5ZxY8dKTk6bQhmsN95+5z26AptWwGj9CmjPperruedfLEVF7Z2exFKXI6cebmg7I5o0zzjtFPm/v1xn+AhOBBx+1HFqZQg/zFlZWVJbWyvz5y+QmbNmR/TmZphgkt80M2d8zV+vF7jxhZMrfPfr6+uj9veR5AiSonhO7ZuOFQDQ64495ii54657o+qAkw87JKr4iYg87YzT5ZVXZxj+GEKImX7eRSp7uAINNAfULNrMKYtW3nOnnyV9DSwEavHi8QqrjiefZuxKF/oA08+9UGUHwS0vP08WLlxkaSkYwt7eexv7bY9HPZIpDSOBKppyxnO16+gjp8rzL7xkKOyhbOi7r73+hvqLpqypGNfMnDH6/WVX/LFd1Z549KGoHWW1S4AfOhBwat907BYAesCkQ6MbzOFRa+SI7lckw3HARx68r0Mn1rvw9TffKheZVgd/+NSGT/SuChBQrrz8UsvZYdUGLj+NVmq0xLD3/9hD90ft4EV7nq/xIwCdjztuuzluCfbvb83pVNwyTEBC0Tjp0bJfuzbyFol2n6/RE3Bq33S0AIAZLgYfq+GYo46wGjXh8TBI33T9X+OeD35UH7z37nbbBnHPJEKC08+eJlMPnxzhTucuPfbw/RKti9DO5cinjQhg9ebxRx40imLp3r4T9pFzzzZeNbKUUDdHwjFbs2No3VxEx2TvxL7paAEAPfvoKAb1aFcMEv3N+d0JxwmWA+MV8EP02kv/iYvJ5WjLBAW9u26/Rc4/d3q0j+rGv/2WG2UPk2OTug/zRsIIwIvnU38zVkA1yvyYo4+Uhx+4t1t1cYzKF809+Nm48br4C/LRlIFx2wg4rW86XgA4+MCJba1v8G6HUduLmcKOweMJuwXltnfefF0645YYy+RwsvT0k4+LmbnhhFVERP2g//7yS+SxRx4QbLfEGk763fEyd/ZHgoGCITkJ7LvP3jLn4w8NrXKGlxwncG656XqBYNfVZrjDyxLPz4MHD5JHH7rP0DZJPPNjWsYEnNQ3Ha0EiG5QUFAgcEz06ZzPDHvFUUdONbwfj5vQeI4lYOnqb489LDBgBCWrD2d+bGl/HMpxhx16sJxy8oli5mgotFwZmRmhHzu8z8nN6XAtmgsHTtxf9t93grz51jvyxptvCywcWgkHHTBRLr/sYhk+bDsr0bs8TnFRUZfn2dkMI5XZ6GgohMn09DRL2cK511133CoXX3S+0vCH/Y2Vq1a1WrNEWkMGD1J/kw+bJPtO2LudAa6y8nLDfFzuyPMbM/vvholGedOqS+uDDjxAxo/fTTkqmzP3M/ls3het32EIPqF+MSLxT/R3MrTapTrmwSOVS3vOqF/A4RiUHSOFWF2cR0oL1wpM/Kloz3VX39Ty76pXV3OinL13VQ3ikM9pZ55teIYcWXwy8wPBFzEVgt8fkF9++UVgXx0/klu3blUmhGEqF94VexT1kDE77tjq9TDZ61RTUyPf//Cj+hHcWlYm5eUVAtfIAwcMEChRQW+hpLg42avB8kVBAAZwoJhlFK686molJOrF+ezTmRJJgNGLn2zXwaCyskp8Pp8ykATBKicnx5RLstXDbuWxU990/AoAjIsYGZBB58UZ3FQZ/FFe7KfDzr6Zrf1U+WJiZQT2DBhSmwBOo5SVRZ61Dx++nRLotBqaDf6BQEAJhVr8SK9WZ3uRnk2Ga2BQWFigihJq+yIZyma3Mji1bzpeAHj1tRmmffmE4/XNBps+zAgkQALKO6ORrQdsR8145QXLs9vnnn+xdasgEl54/PR6Hf/zFgkNr4URgOdQp/bNyJtkYYDs+hF75vfeb6xFj72rA/bfz64IWC8S6BICmMFCw1ovwALgLbffZcn2/YIFP8lNt9yul5S6Dp0BBhKwQsDJfdMRAgD2bGAbH4omeMV+8m133i0nnnKGaf+47JILaXrTlBIjkIA5gaOPNLajAUdW06afL199/Y1EUk2qqKyU2+64W4454WTDzCC077WnufMhw0R401EEnNo3HaEECO94++x/UNQdGtqpMz98l1bkoibHB0igIwF4ddxzwsRW7faOMdquQLFz9KhRUlxcJFD8hPVHrBJYCbfceJ2ht08raTCOswg4tW9SADDo5zBWgjOhDCRAAvEh8MOP8+X4E0+NT2IRUoEPC9i0YCCBaAk4sW86Ygsg2o6A+PC4x8E/FnJ8hgT0CcDD30vP/1s/QifuHHboIQJDUgwkEAsBJ/ZNCgAReso5Z0+T30fhnCZCErxEAiSgQwAeBl947p9xtXx36cUXyt133moL88A62Hi5Cwg4rW9yCyCkUx16yEFy3jlny+gdRoVc5VsSIIFEEGhoaJB33n1fHn3iSVm+fEVMWUyedIhccN45gmOEDCQQLwJO6ZuOFQA0M6P9+/WV/gP6y9Qpk5PWhGy8OjXTIYFkJACjPrM/mSPvffA/dUpnxfLgiZ1IZYVi7vjddhV4w8SSP0y2MpBAogjYvW86QgBA54B53IaGetVPYCAE5jUZSIAEkpMAtLLXb9gg5eXl6hROTm6u5ObmSF5ubnIWmKVyDAE79U3HCACO6Z2sKAmQAAmQAAlYIEAlQAuQGIUESIAESIAE7EaAAoDdWpT1IQESIAESIAELBCgAWIDEKCRAAiRAAiRgNwIUAOzWoqwPCZAACZAACVggQAHAAiRGIQESIAESIAG7EaAAYLcWZX1IgARIgARIwAIBCgAWIDEKCZAACZAACdiNAAUAu7Uo60MCJEACJEACFghQALAAiVFIgARIgARIwG4EKADYrUVZHxIgARIgARKwQIACgAVIjEICJEACJEACdiNAAcBuLcr6kAAJkAAJkIAFAhQALEBiFBIgARIgARKwG4H/BwQe9SgvOUC+AAAAAElFTkSuQmCC)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "goRmGIRI5cfC"
-   },
-   "source": [
-    "# Live DEMO\n",
-    "\n",
-    "Here are four examples using Lightning.\n",
-    "\n",
-    "1. MNIST\n",
-    "2. GAN\n",
-    "3. Finetuning a Transformer from Huggingface\n",
-    "4. DQN"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "jKj5lgdr5j48"
-   },
-   "source": [
-    "--- \n",
-    "### Setup  \n",
-    "Lightning is easy to use. Simply ```pip install pytorch-lightning```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 938
-    },
-    "id": "UGjilEHk4vb7",
-    "outputId": "229670cf-ec26-446f-afe5-2432c4571030"
-   },
-   "outputs": [],
-   "source": [
-    "! pip install pytorch-lightning==0.8.3 --upgrade --silent"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "zaVUShmQ5n8Y"
-   },
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "import torch\n",
-    "from torch.nn import functional as F\n",
-    "from torch.utils.data import DataLoader\n",
-    "from torchvision.datasets import MNIST\n",
-    "from torchvision import transforms\n",
-    "import pytorch_lightning as pl"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "gEulmrbxwaYL"
-   },
-   "source": [
-    "--- \n",
-    "## MNIST hello world"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "nbQAcRna5e_q"
-   },
-   "source": [
-    "## Simplest example\n",
-    "\n",
-    "Here's the simplest most minimal example with just a training loop (no validation, no testing).\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "zM15oxCH5lo6"
-   },
-   "outputs": [],
-   "source": [
-    "class MNISTModel(pl.LightningModule):\n",
-    "\n",
-    "    def __init__(self):\n",
-    "        super(MNISTModel, self).__init__()\n",
-    "        self.l1 = torch.nn.Linear(28 * 28, 10)\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return torch.relu(self.l1(x.view(x.size(0), -1)))\n",
-    "\n",
-    "    def training_step(self, batch, batch_nb):\n",
-    "        x, y = batch\n",
-    "        loss = F.cross_entropy(self(x), y)\n",
-    "        tensorboard_logs = {'train_loss': loss}\n",
-    "        return {'loss': loss, 'log': tensorboard_logs}\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        return torch.optim.Adam(self.parameters(), lr=0.02)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 639,
-     "referenced_widgets": [
-      "bb305bb378774c1586a3196eb3babd29",
-      "ff6eead2826e4113abf7ab3a8cb31b0f",
-      "992f545bb6f1489493d89a59d046f63f",
-      "44e2c572ab1641a29156ad4ee8884e12",
-      "254ddfa7c23d4b6f828d515dbab38978",
-      "a700c003887b4d2ba134fbfcf4823cb5",
-      "32e47e93509043439580cd5f58dc7726",
-      "845415af79634da5a64b9f368280c0e3",
-      "a7a94847786244dd9a5cb9718957143a",
-      "8734c1b798ff4ba0bf77dca4f3de9cbf",
-      "ee0a78c01b63443f9e51470a1b1e79a4",
-      "632b9d0d9ffa4d479deb70f6fafb92ab",
-      "a9413692ae5040e6ae3c2a446dbe297c",
-      "ca4cd1659d73446e964f9ab36d92e3a0",
-      "42e787b78000472eab434fb795197a86",
-      "1a7680c6279d4985bd69188dd72b11d5",
-      "3397549a0695432990f1d3d5390941e7",
-      "515ef7d03ef2447e9643210b029b930e",
-      "ae52e3d810aa4bc5965559ed2ba2b78a",
-      "08b6d9269e514d228e7e94fe0299a2c5",
-      "7ee81979301c447bb13ff9fff5153e0f",
-      "ea162090fc954f0198a1d63507dfff9b",
-      "0cf9a61c88af45b6a6ef72640f93cbfd",
-      "67728556b4c9432b877d54a081657663",
-      "de325f4002a945b4a2a15086c2a77816",
-      "5012438370764b4db215d545e9414c94",
-      "6aafaca3c8824e2fa267f4a68d5d2ca3",
-      "c4200c1f957a4179af51245a797c8921",
-      "53b2a85381b1460d9f446390c79bfc08",
-      "59f02fe7f9f2433bb25f5b292c213f50",
-      "1dabf5740f4d44d68d06629f77b001e3",
-      "0f688614251d49589f320f2b2cb55344",
-      "c93f037dc6044d858ae1862d5b29f6f0",
-      "00ae53beaa9341f4826b1bdc0a6f88e0",
-      "4b7021f73f6b4e5193454128ccf323d7",
-      "6f55aa11acb14afdb2ac0a1052be1bb6",
-      "b5f184fbcba740999b205e34e23455d6",
-      "d9540ab5d2394b77a65f48b501acdc18",
-      "23fd97d95fae4f42bd21906f67115f8b",
-      "420e8d65e9584973a8004e8398cf430c"
-     ]
-    },
-    "id": "5VEbFQp55wqo",
-    "outputId": "c2321d5d-bbad-4896-b41b-dbc9ed19340d"
-   },
-   "outputs": [],
-   "source": [
-    "train_loader = DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()), batch_size=32)\n",
-    "\n",
-    "mnist_model = MNISTModel()\n",
-    "trainer = pl.Trainer(gpus=1, progress_bar_refresh_rate=20)    \n",
-    "trainer.fit(mnist_model, train_loader)  "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "gjo55nA549pU"
-   },
-   "source": [
-    "### 1. LightningModule\n",
-    "Each project goes into a LightningModule.\n",
-    "This module houses:\n",
-    "1. Model definition (__init__)\n",
-    "2. Computations (forward)\n",
-    "3. What happens inside the training loop (training_step)\n",
-    "4. What happens inside the validation loop (validation_step)\n",
-    "5. What optimizer(s) to use (configure_optimizers)\n",
-    "6. What data to use (train_dataloader, val_dataloader, test_dataloader)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "x-34xKCI40yW"
-   },
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "import torch\n",
-    "from torch.nn import functional as F\n",
-    "from torch.utils.data import DataLoader\n",
-    "from torchvision.datasets import MNIST\n",
-    "from torchvision import transforms\n",
-    "import pytorch_lightning as pl\n",
-    "\n",
-    "class MNISTModel(pl.LightningModule):\n",
-    "\n",
-    "    def __init__(self):\n",
-    "        super(MNISTModel, self).__init__()\n",
-    "        # not the best model...\n",
-    "        self.l1 = torch.nn.Linear(28 * 28, 10)\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        # called with self(x)\n",
-    "        return torch.relu(self.l1(x.view(x.size(0), -1)))\n",
-    "\n",
-    "    def training_step(self, batch, batch_nb):\n",
-    "        # REQUIRED\n",
-    "        x, y = batch\n",
-    "        y_hat = self(x)\n",
-    "        loss = F.cross_entropy(y_hat, y)\n",
-    "        tensorboard_logs = {'train_loss': loss}\n",
-    "        return {'loss': loss, 'log': tensorboard_logs}\n",
-    "\n",
-    "    def validation_step(self, batch, batch_nb):\n",
-    "        # OPTIONAL\n",
-    "        x, y = batch\n",
-    "        y_hat = self(x)\n",
-    "        return {'val_loss': F.cross_entropy(y_hat, y)}\n",
-    "\n",
-    "    def validation_epoch_end(self, outputs):\n",
-    "        # OPTIONAL\n",
-    "        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()\n",
-    "        tensorboard_logs = {'val_loss': avg_loss}\n",
-    "        return {'val_loss': avg_loss, 'log': tensorboard_logs}\n",
-    "\n",
-    "    def test_step(self, batch, batch_nb):\n",
-    "        # OPTIONAL\n",
-    "        x, y = batch\n",
-    "        y_hat = self(x)\n",
-    "        return {'test_loss': F.cross_entropy(y_hat, y)}\n",
-    "\n",
-    "    def test_epoch_end(self, outputs):\n",
-    "        # OPTIONAL\n",
-    "        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()\n",
-    "        logs = {'test_loss': avg_loss}\n",
-    "        return {'test_loss': avg_loss, 'log': logs, 'progress_bar': logs}\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        # REQUIRED\n",
-    "        # can return multiple optimizers and learning_rate schedulers\n",
-    "        # (LBFGS it is automatically supported, no need for closure function)\n",
-    "        return torch.optim.Adam(self.parameters(), lr=0.02)\n",
-    "\n",
-    "    def train_dataloader(self):\n",
-    "        # REQUIRED\n",
-    "        return DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()), batch_size=32)\n",
-    "\n",
-    "    def val_dataloader(self):\n",
-    "        # OPTIONAL\n",
-    "        return DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()), batch_size=32)\n",
-    "\n",
-    "    def test_dataloader(self):\n",
-    "        # OPTIONAL\n",
-    "        return DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=transforms.ToTensor()), batch_size=32)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "GROo8IDX-QCx"
-   },
-   "source": [
-    "### 2. Trainer\n",
-    "The trainer is where the magic happens!\n",
-    "Feed ANY LightningModule to a trainer to train the model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 510,
-     "referenced_widgets": [
-      "6b3a598e7d01407aa5850b5a6620e7f0",
-      "e23c0d6c117246b2a0a6681008748917",
-      "e7a06a13ea11427ea3866cec3a55b644",
-      "620f45256d504f0188f40c61e23e1355",
-      "05f01b8813374534a4c58ac65fe2b390",
-      "ee3d759a4e4442288599eacfd1347c8d",
-      "c3587b40d9f942e98b708ff0b5fc5301",
-      "e64a723bcf474d6699d78ec05462f995",
-      "4ffe9fb35ca44358b0177723f73a35d8",
-      "ba3fe1aba1b04a1fbaab268dfd3d0166",
-      "fc3f14c4e83048aa9d6fe9963f95bf7a",
-      "171db7c8fa1e4f11aaff71b9f5879d58",
-      "4eaea330bc8e414fbf2f0e2b21af8b08",
-      "118c0b8da0df4ff68a90a3d500f1d1b8",
-      "135883097f0e428c963ae0ad320dfabd",
-      "85741a7765a147c4a8d69872850cb072",
-      "3d2e43ae9f924fbd8463ce72b44200f3",
-      "7c03e0a2247442ad9c11569e443de4bb",
-      "3a521cb700f247fd8cd345b158697f2f",
-      "8a40fcad728841c7ab2fd15d2c40ee5f",
-      "bc4dfb14c9d14499ae72d2a30fc6bc2d",
-      "866f880d850a4e689a1c99723f0366db",
-      "e7a864f4dc0f485eb045b778e981fc01",
-      "56ef38eff92143bcaf68b22c8dae7f98",
-      "a252ac32033b4e39b87a6c91bd21b5ae",
-      "40ce71ff339849748486ebbc73474cbe",
-      "f1db9d62eee44e61bb8ac26c16b3b601",
-      "69fa0a853cf84b9482043e12881c849f",
-      "d8db4d3709f34c869dedbc066e60501e",
-      "9b6445338a69425889a8901c192d5144",
-      "51b1111f5fe24042b38af809285e1b16",
-      "294d8142a4aa48aa8261b0b8155ef97f",
-      "a2990a67f92c4047b95876aae91e3de0",
-      "44ca9ee5c356458680a5d20c6a891c91",
-      "7e8cf26303ed4975b239fd43184a1dc6",
-      "d7406a8b15f9439fba19ec4dab086c61",
-      "fdf5c4a49602423184f6d94cd814177e",
-      "5d3c506d3f4444d8a6b7024cd11de2cd",
-      "42ede89dbd194eb6a603ccd7d4b96aae",
-      "ea13174e5b894e93b3c59d7e599de5a9",
-      "ee908316d227495381e8cf7dcf5526f1",
-      "a4b49709f7464ce491324e8aa636c152",
-      "8eb2086a01cf41429a5f4adff5f2359b",
-      "bbef89e4fd9d4cf8ae4c8fcab9bc665d",
-      "d01088cc378044cba4879032d74a852e",
-      "352d7dae131b407cb6e0238315c1b1a0",
-      "91a6de2063cc48b28021ef29feab7f69",
-      "39422514a4a04a9ba290285dc586ea9f",
-      "b73a326ada4d4a859e3c2c39abf5530d",
-      "cd942318db094680821f0d9902941977",
-      "29650c4a829b44ed9e1526b1dc5d2b83",
-      "df6521155d05459882601ba8c84f3dce",
-      "384a36423d154f2abcddb5094afeced3",
-      "a99b7813bf88496c875a818afe3b170a",
-      "f4a052d2223a4d4fa95ed52f94ad465d",
-      "7ea8ad4e10bb465aa2b6708655a2793f",
-      "15bb223836764207a5ac15616a41ddb7",
-      "46f7df7035d44bd099f60ad23f836f8a",
-      "296453e43f7344de8a9b5c6bc970ab1e",
-      "cd86997da08649d7999ade2d0e7cea96",
-      "7d15fc81537a449cb6b6afd7ccc65dac",
-      "bef041a9f5a942f68b4a8488a371d3da",
-      "e10c94b1fdf84a9186ab7d87fd83f87f",
-      "19c7460c565d494abbb8b9731a34294d"
-     ]
-    },
-    "id": "HOk9c4_35FKg",
-    "outputId": "a07e65a7-7452-478d-f80e-179272b26b8a"
-   },
-   "outputs": [],
-   "source": [
-    "mnist_model = MNISTModel()\n",
-    "\n",
-    "# most basic trainer, uses good defaults (1 gpu)\n",
-    "trainer = pl.Trainer(gpus=1)    \n",
-    "trainer.fit(mnist_model)   "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "IV77NP_Ywkzb"
-   },
-   "source": [
-    "### Testing"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 134,
-     "referenced_widgets": [
-      "bcf69c2a0b694e0498beadb6f4509395",
-      "2e20c741cf8a401cb90e8e230a23026a",
-      "a7bcd18049d8493b9d3d9f17d86f0429",
-      "3c99401bde8641c19978c11c9abb906a",
-      "cd84335fb7234f3aa54dafe045614e56",
-      "f261b8aab86b4d6e94984bf658c1b74d",
-      "fd8ec919352046dd84057e9763bb235a",
-      "f778d9ef70ca4f5898c423109cf82ed2"
-     ]
-    },
-    "id": "-Bnkq97qhe2x",
-    "outputId": "9db00280-ef5b-4ae4-8a6d-174590ae6d0c"
-   },
-   "outputs": [],
-   "source": [
-    "trainer.test()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Q-qxNrXvKAlN"
-   },
-   "source": [
-    "### Plotting\n",
-    "\n",
-    "Plot the results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Start tensorboard.\n",
-    "%load_ext tensorboard\n",
-    "%tensorboard --logdir lightning_logs/"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "xF9-ouAEGFlZ"
-   },
-   "source": [
-    "By using the trainer you automatically get:\n",
-    "1. Tensorboard logging\n",
-    "2. Model checkpointing\n",
-    "3. Training and validation loop\n",
-    "4. early-stopping"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "18STRwHg-kW8"
-   },
-   "source": [
-    "### Bonus\n",
-    "In fact, if you keep calling fit, it'll keep training the model where it left off!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 83,
-     "referenced_widgets": [
-      "6aa5e292e2094c239e7418994a31ff51",
-      "555443a6fa564d10a3a7901cf15a79a3",
-      "1f9c48164702427fb3aca2a26b2651e5",
-      "d8bd5c9b233b41008109d14cffc89aaa",
-      "61c71d4f1c2848b1813aebc0b2db5e25",
-      "a1e2c38bb40642168cc9d44abf645a54",
-      "d0590d65433c4478af6a0762421f9f7a",
-      "76c916d634c644a4a0d8f12e183822fd",
-      "a38242d3231442e2a259067d6a1355c2",
-      "9a9ebf052d914a8881882da8d2fa9cd8",
-      "a56deb884719491090a4146e72be3868",
-      "7633820adf9a4757ae73b472e43031d6",
-      "b7a073dfdeaf48fc9f3e6352b0ea2ba7",
-      "8aab627e715a44ada2af81b74bece257",
-      "fc262db2a53948488092a77209081319",
-      "11db4a94a4534fc2b503aad28be631be"
-     ]
-    },
-    "id": "U2d1gc4N5IJX",
-    "outputId": "f68aaf1f-dfa9-4f30-de7e-d4fdab9eb089"
-   },
-   "outputs": [],
-   "source": [
-    "trainer.fit(mnist_model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "P0bSmCw57aV5"
-   },
-   "source": [
-    "---\n",
-    "## GAN Example\n",
-    "\n",
-    "How to train a GAN!\n",
-    "\n",
-    "Main takeaways:\n",
-    "1. Generator and discriminator are arbitraty PyTorch modules.\n",
-    "2. training_step does both the generator and discriminator training."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "pBhBR3QJ7mhx"
-   },
-   "source": [
-    "#### A. Generator"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "mesU_huG-rr6"
-   },
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "To run this template just do:\n",
-    "python gan.py\n",
-    "After a few epochs, launch tensorboard to see the images being generated at every batch.\n",
-    "tensorboard --logdir default\n",
-    "\"\"\"\n",
-    "import os\n",
-    "from argparse import ArgumentParser\n",
-    "from collections import OrderedDict\n",
-    "\n",
-    "import numpy as np\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
-    "import torchvision\n",
-    "import torchvision.transforms as transforms\n",
-    "from torch.utils.data import DataLoader\n",
-    "from torchvision.datasets import MNIST\n",
-    "\n",
-    "import pytorch_lightning as pl\n",
-    "\n",
-    "\n",
-    "class Generator(nn.Module):\n",
-    "    def __init__(self, latent_dim, img_shape):\n",
-    "        super(Generator, self).__init__()\n",
-    "        self.img_shape = img_shape\n",
-    "\n",
-    "        def block(in_feat, out_feat, normalize=True):\n",
-    "            layers = [nn.Linear(in_feat, out_feat)]\n",
-    "            if normalize:\n",
-    "                layers.append(nn.BatchNorm1d(out_feat, 0.8))\n",
-    "            layers.append(nn.LeakyReLU(0.2, inplace=True))\n",
-    "            return layers\n",
-    "\n",
-    "        self.model = nn.Sequential(\n",
-    "            *block(latent_dim, 128, normalize=False),\n",
-    "            *block(128, 256),\n",
-    "            *block(256, 512),\n",
-    "            *block(512, 1024),\n",
-    "            nn.Linear(1024, int(np.prod(img_shape))),\n",
-    "            nn.Tanh()\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, z):\n",
-    "        img = self.model(z)\n",
-    "        img = img.view(img.size(0), *self.img_shape)\n",
-    "        return img"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "bt37ycLx7uO3"
-   },
-   "source": [
-    "### B. Discriminator"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "pcPCt8JG7tI-"
-   },
-   "outputs": [],
-   "source": [
-    "class Discriminator(nn.Module):\n",
-    "    def __init__(self, img_shape):\n",
-    "        super(Discriminator, self).__init__()\n",
-    "\n",
-    "        self.model = nn.Sequential(\n",
-    "            nn.Linear(int(np.prod(img_shape)), 512),\n",
-    "            nn.LeakyReLU(0.2, inplace=True),\n",
-    "            nn.Linear(512, 256),\n",
-    "            nn.LeakyReLU(0.2, inplace=True),\n",
-    "            nn.Linear(256, 1),\n",
-    "            nn.Sigmoid(),\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, img):\n",
-    "        img_flat = img.view(img.size(0), -1)\n",
-    "        validity = self.model(img_flat)\n",
-    "\n",
-    "        return validity"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "TyYOdg8g77P0"
-   },
-   "source": [
-    "### C. GAN"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "ArrPXFM371jR"
-   },
-   "outputs": [],
-   "source": [
-    "class GAN(pl.LightningModule):\n",
-    "\n",
-    "    def __init__(self, hparams):\n",
-    "        super(GAN, self).__init__()\n",
-    "        self.hparams = hparams\n",
-    "\n",
-    "        # networks\n",
-    "        mnist_shape = (1, 28, 28)\n",
-    "        self.generator = Generator(latent_dim=hparams.latent_dim, img_shape=mnist_shape)\n",
-    "        self.discriminator = Discriminator(img_shape=mnist_shape)\n",
-    "\n",
-    "        # cache for generated images\n",
-    "        self.generated_imgs = None\n",
-    "        self.last_imgs = None\n",
-    "\n",
-    "    def forward(self, z):\n",
-    "        return self.generator(z)\n",
-    "\n",
-    "    def adversarial_loss(self, y_hat, y):\n",
-    "        return F.binary_cross_entropy(y_hat, y)\n",
-    "\n",
-    "    def training_step(self, batch, batch_nb, optimizer_idx):\n",
-    "        imgs, _ = batch\n",
-    "        self.last_imgs = imgs\n",
-    "\n",
-    "        # train generator\n",
-    "        if optimizer_idx == 0:\n",
-    "            # sample noise\n",
-    "            z = torch.randn(imgs.shape[0], self.hparams.latent_dim)\n",
-    "\n",
-    "            # match gpu device (or keep as cpu)\n",
-    "            if self.on_gpu:\n",
-    "                z = z.cuda(imgs.device.index)\n",
-    "\n",
-    "            # generate images\n",
-    "            self.generated_imgs = self(z)\n",
-    "\n",
-    "            # log sampled images\n",
-    "            # sample_imgs = self.generated_imgs[:6]\n",
-    "            # grid = torchvision.utils.make_grid(sample_imgs)\n",
-    "            # self.logger.experiment.add_image('generated_images', grid, 0)\n",
-    "\n",
-    "            # ground truth result (ie: all fake)\n",
-    "            # put on GPU because we created this tensor inside training_loop\n",
-    "            valid = torch.ones(imgs.size(0), 1)\n",
-    "            if self.on_gpu:\n",
-    "              valid = valid.cuda(imgs.device.index)\n",
-    "\n",
-    "            # adversarial loss is binary cross-entropy\n",
-    "            g_loss = self.adversarial_loss(self.discriminator(self.generated_imgs), valid)\n",
-    "            tqdm_dict = {'g_loss': g_loss}\n",
-    "            output = OrderedDict({\n",
-    "                'loss': g_loss,\n",
-    "                'progress_bar': tqdm_dict,\n",
-    "                'log': tqdm_dict\n",
-    "            })\n",
-    "            return output\n",
-    "\n",
-    "        # train discriminator\n",
-    "        if optimizer_idx == 1:\n",
-    "            # Measure discriminator's ability to classify real from generated samples\n",
-    "\n",
-    "            # how well can it label as real?\n",
-    "            valid = torch.ones(imgs.size(0), 1)\n",
-    "            if self.on_gpu:\n",
-    "              valid = valid.cuda(imgs.device.index)\n",
-    "\n",
-    "            real_loss = self.adversarial_loss(self.discriminator(imgs), valid)\n",
-    "\n",
-    "            # how well can it label as fake?\n",
-    "            fake = torch.zeros(imgs.size(0), 1)\n",
-    "            if self.on_gpu:\n",
-    "              fake = fake.cuda(imgs.device.index)\n",
-    "\n",
-    "            fake_loss = self.adversarial_loss(\n",
-    "                self.discriminator(self.generated_imgs.detach()), fake)\n",
-    "\n",
-    "            # discriminator loss is the average of these\n",
-    "            d_loss = (real_loss + fake_loss) / 2\n",
-    "            tqdm_dict = {'d_loss': d_loss}\n",
-    "            output = OrderedDict({\n",
-    "                'loss': d_loss,\n",
-    "                'progress_bar': tqdm_dict,\n",
-    "                'log': tqdm_dict\n",
-    "            })\n",
-    "            return output\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        lr = self.hparams.lr\n",
-    "        b1 = self.hparams.b1\n",
-    "        b2 = self.hparams.b2\n",
-    "\n",
-    "        opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2))\n",
-    "        opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2))\n",
-    "        return [opt_g, opt_d], []\n",
-    "\n",
-    "    def train_dataloader(self):\n",
-    "        transform = transforms.Compose([transforms.ToTensor(),\n",
-    "                                        transforms.Normalize([0.5], [0.5])])\n",
-    "        dataset = MNIST(os.getcwd(), train=True, download=True, transform=transform)\n",
-    "        return DataLoader(dataset, batch_size=self.hparams.batch_size)\n",
-    "\n",
-    "    def on_epoch_end(self):\n",
-    "        z = torch.randn(8, self.hparams.latent_dim)\n",
-    "        # match gpu device (or keep as cpu)\n",
-    "        if self.on_gpu:\n",
-    "            z = z.cuda(self.last_imgs.device.index)\n",
-    "\n",
-    "        # log sampled images\n",
-    "        sample_imgs = self(z)\n",
-    "        grid = torchvision.utils.make_grid(sample_imgs)\n",
-    "        self.logger.experiment.add_image(f'generated_images', grid, self.current_epoch)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "-WRY6dfn8ScZ"
-   },
-   "source": [
-    "### D. Trainer"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "xsmHHcpP8ryX"
-   },
-   "source": [
-    "Here we fake using argparse"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "fIJl3phH8uEI"
-   },
-   "outputs": [],
-   "source": [
-    "from argparse import Namespace\n",
-    "\n",
-    "args = {\n",
-    "    'batch_size': 32,\n",
-    "    'lr': 0.0002,\n",
-    "    'b1': 0.5,\n",
-    "    'b2': 0.999,\n",
-    "    'latent_dim': 100\n",
-    "}\n",
-    "hparams = Namespace(**args)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 83,
-     "referenced_widgets": [
-      "5f119b90386c499ea9caf987fecf6c06",
-      "6d391353197443a694f6c75147ca96df",
-      "47df0bc1b3d14bb7b673e0591daa4e5f",
-      "87e118f890dc42319e723331e1306787",
-      "e068e2b1c68c48a784c19fc716c043a3",
-      "7662324b3b924f8f9649dc409fb0d349",
-      "afc85a52a5d04653ae9e7168b180ff98",
-      "dbb9fd5429f5416ab6a4f78f0c72867c"
-     ]
-    },
-    "id": "h788dCGu7_Iu",
-    "outputId": "bcebc504-f0fc-496b-c8d5-a0c2f3349155"
-   },
-   "outputs": [],
-   "source": [
-    "gan_model = GAN(hparams)\n",
-    "\n",
-    "# most basic trainer, uses good defaults (1 gpu)\n",
-    "trainer = pl.Trainer(gpus=1)    \n",
-    "trainer.fit(gan_model)   "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Start tensorboard.\n",
-    "%load_ext tensorboard\n",
-    "%tensorboard --logdir lightning_logs/"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "7uQVI-xv9Ddj"
-   },
-   "source": [
-    "---  \n",
-    "## BERT example\n",
-    "BERT + Lightning"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "e2npX-Gi9uwa"
-   },
-   "outputs": [],
-   "source": [
-    "! pip install transformers"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "DeLyZQ_E9o1T"
-   },
-   "source": [
-    "#### Data download + processing\n",
-    "\n",
-    "Let's grab the correct data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 164,
-     "referenced_widgets": [
-      "5484eef7b6f247d68a89f86965b0940f",
-      "0c3473a16a5e4c46a6c7515e610bca7f",
-      "ad849800b2124195b92f3bf9dfc7681b",
-      "6ae5b2f9195847b5a0aa9991e14aa397",
-      "240764252e7c4f5ca39db14fd1c724ed",
-      "386ff59e3694480394253f1c24ff8e84",
-      "70e48d7d8e8a411a90642926db4aada8",
-      "1f3364ab59b541268fabcb3f9fb5c64c",
-      "0fad6468e3c849b380e34f674e074219",
-      "10a88a05740b45d4a6ea5873d4a7151a",
-      "d3b107acd1b1401cabe3090724e12e86",
-      "b3563100dd1b4a4abe14ab7193649064",
-      "17f0e360e85f48d9a17b84c9b7f6c9f0",
-      "29f35103a6e94af09c8ac9cdb2cca89c",
-      "e6e15d5c14134be0b4cf86fdecfef687",
-      "f23f02d00d424574afa29311b8d0906e",
-      "e918a6de59b64bd590e4f1233bbc078a",
-      "abeb0a773f3542c39ff724ae0674b74e",
-      "892246fdf6bb476abb35ec321ddf86e8",
-      "88c181cd21a94ec9a43df9754c1986c9",
-      "e4098b0091124fef8ba342783a82cc6e",
-      "498a50387a0742a88356a7ee9920bf7a",
-      "86482894cddd4956ae2fc3d9edd8ef9a",
-      "438d19fb8e8243ebbc658f4b1d27df99"
-     ]
-    },
-    "id": "eBP6FeY18_Ck",
-    "outputId": "b2a5c5fd-88cf-4428-d196-9e1c1ddc7e30"
-   },
-   "outputs": [],
-   "source": [
-    "from transformers.data.processors.glue import MnliProcessor\n",
-    "import torch\n",
-    "from transformers import (\n",
-    "    BertModel,\n",
-    "    BertTokenizer\n",
-    ")\n",
-    "\n",
-    "tokenizer = BertTokenizer.from_pretrained('bert-base-cased')\n",
-    "bert = BertModel.from_pretrained('bert-base-cased', output_attentions=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "vMbozzxs9xq_"
-   },
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import os\n",
-    "import sys\n",
-    "import shutil\n",
-    "import argparse\n",
-    "import tempfile\n",
-    "import urllib.request\n",
-    "import zipfile\n",
-    "\n",
-    "TASKS = [\"CoLA\", \"SST\", \"MRPC\", \"QQP\", \"STS\", \"MNLI\", \"SNLI\", \"QNLI\", \"RTE\", \"WNLI\", \"diagnostic\"]\n",
-    "TASK2PATH = {\n",
-    "    \"CoLA\": \"https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FCoLA.zip?alt=media&token=46d5e637-3411-4188-bc44-5809b5bfb5f4\",  # noqa\n",
-    "    \"SST\": \"https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSST-2.zip?alt=media&token=aabc5f6b-e466-44a2-b9b4-cf6337f84ac8\",  # noqa\n",
-    "    \"MRPC\": \"https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2Fmrpc_dev_ids.tsv?alt=media&token=ec5c0836-31d5-48f4-b431-7480817f1adc\",  # noqa\n",
-    "    \"QQP\": \"https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FQQP-clean.zip?alt=media&token=11a647cb-ecd3-49c9-9d31-79f8ca8fe277\",  # noqa\n",
-    "    \"STS\": \"https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSTS-B.zip?alt=media&token=bddb94a7-8706-4e0d-a694-1109e12273b5\",  # noqa\n",
-    "    \"MNLI\": \"https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FMNLI.zip?alt=media&token=50329ea1-e339-40e2-809c-10c40afff3ce\",  # noqa\n",
-    "    \"SNLI\": \"https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSNLI.zip?alt=media&token=4afcfbb2-ff0c-4b2d-a09a-dbf07926f4df\",  # noqa\n",
-    "    \"QNLI\": \"https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FQNLIv2.zip?alt=media&token=6fdcf570-0fc5-4631-8456-9505272d1601\",  # noqa\n",
-    "    \"RTE\": \"https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FRTE.zip?alt=media&token=5efa7e85-a0bb-4f19-8ea2-9e1840f077fb\",  # noqa\n",
-    "    \"WNLI\": \"https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FWNLI.zip?alt=media&token=068ad0a0-ded7-4bd7-99a5-5e00222e0faf\",  # noqa\n",
-    "    \"diagnostic\": [\n",
-    "        \"https://storage.googleapis.com/mtl-sentence-representations.appspot.com/tsvsWithoutLabels%2FAX.tsv?GoogleAccessId=firebase-adminsdk-0khhl@mtl-sentence-representations.iam.gserviceaccount.com&Expires=2498860800&Signature=DuQ2CSPt2Yfre0C%2BiISrVYrIFaZH1Lc7hBVZDD4ZyR7fZYOMNOUGpi8QxBmTNOrNPjR3z1cggo7WXFfrgECP6FBJSsURv8Ybrue8Ypt%2FTPxbuJ0Xc2FhDi%2BarnecCBFO77RSbfuz%2Bs95hRrYhTnByqu3U%2FYZPaj3tZt5QdfpH2IUROY8LiBXoXS46LE%2FgOQc%2FKN%2BA9SoscRDYsnxHfG0IjXGwHN%2Bf88q6hOmAxeNPx6moDulUF6XMUAaXCSFU%2BnRO2RDL9CapWxj%2BDl7syNyHhB7987hZ80B%2FwFkQ3MEs8auvt5XW1%2Bd4aCU7ytgM69r8JDCwibfhZxpaa4gd50QXQ%3D%3D\",  # noqa\n",
-    "        \"https://www.dropbox.com/s/ju7d95ifb072q9f/diagnostic-full.tsv?dl=1\",\n",
-    "    ],\n",
-    "}\n",
-    "\n",
-    "MRPC_TRAIN = \"https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_train.txt\"\n",
-    "MRPC_TEST = \"https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_test.txt\"\n",
-    "\n",
-    "\n",
-    "def download_and_extract(task, data_dir):\n",
-    "    print(\"Downloading and extracting %s...\" % task)\n",
-    "    data_file = \"%s.zip\" % task\n",
-    "    urllib.request.urlretrieve(TASK2PATH[task], data_file)\n",
-    "    with zipfile.ZipFile(data_file) as zip_ref:\n",
-    "        zip_ref.extractall(data_dir)\n",
-    "    os.remove(data_file)\n",
-    "    print(\"\\tCompleted!\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 51
-    },
-    "id": "3CVHOXQY9yVm",
-    "outputId": "f06b886b-cc32-4972-918e-f4ca5828fb2c"
-   },
-   "outputs": [],
-   "source": [
-    "download_and_extract('MNLI', '.')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "vOR0Q1Yg-HmN"
-   },
-   "outputs": [],
-   "source": [
-    "from transformers import glue_convert_examples_to_features as convert_examples_to_features\n",
-    "from transformers import BertTokenizer\n",
-    "from torch.utils.data import TensorDataset, RandomSampler, DataLoader, random_split\n",
-    "\n",
-    "processor = MnliProcessor()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "yuUwBKpn-TIK"
-   },
-   "source": [
-    "#### Data loaders\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "kMdQZUjO-MI7"
-   },
-   "outputs": [],
-   "source": [
-    "def generate_mnli_bert_dataloaders():\n",
-    "  # ----------------------\n",
-    "  # TRAIN/VAL DATALOADERS\n",
-    "  # ----------------------\n",
-    "  train = processor.get_train_examples('MNLI')\n",
-    "  features = convert_examples_to_features(train,\n",
-    "                                          tokenizer,\n",
-    "                                          label_list=['contradiction','neutral','entailment'],\n",
-    "                                          max_length=128,\n",
-    "                                          output_mode='classification',\n",
-    "                                          pad_on_left=False,\n",
-    "                                          pad_token=tokenizer.pad_token_id,\n",
-    "                                          pad_token_segment_id=0)\n",
-    "  train_dataset = TensorDataset(torch.tensor([f.input_ids for f in features], dtype=torch.long), \n",
-    "                                torch.tensor([f.attention_mask for f in features], dtype=torch.long), \n",
-    "                                torch.tensor([f.token_type_ids for f in features], dtype=torch.long), \n",
-    "                                torch.tensor([f.label for f in features], dtype=torch.long))\n",
-    "\n",
-    "  nb_train_samples = int(0.95 * len(train_dataset))\n",
-    "  nb_val_samples = len(train_dataset) - nb_train_samples\n",
-    "\n",
-    "  bert_mnli_train_dataset, bert_mnli_val_dataset = random_split(train_dataset, [nb_train_samples, nb_val_samples])\n",
-    "\n",
-    "  # train loader\n",
-    "  train_sampler = RandomSampler(bert_mnli_train_dataset)\n",
-    "  bert_mnli_train_dataloader = DataLoader(bert_mnli_train_dataset, sampler=train_sampler, batch_size=32)\n",
-    "\n",
-    "  # val loader\n",
-    "  val_sampler = RandomSampler(bert_mnli_val_dataset)\n",
-    "  bert_mnli_val_dataloader = DataLoader(bert_mnli_val_dataset, sampler=val_sampler, batch_size=32)\n",
-    "\n",
-    "\n",
-    "  # ----------------------\n",
-    "  # TEST DATALOADERS\n",
-    "  # ----------------------\n",
-    "  dev = processor.get_dev_examples('MNLI')\n",
-    "  features = convert_examples_to_features(dev,\n",
-    "                                          tokenizer,\n",
-    "                                          label_list=['contradiction','neutral','entailment'],\n",
-    "                                          max_length=128,\n",
-    "                                          output_mode='classification',\n",
-    "                                          pad_on_left=False,\n",
-    "                                          pad_token=tokenizer.pad_token_id,\n",
-    "                                          pad_token_segment_id=0)\n",
-    "\n",
-    "  bert_mnli_test_dataset = TensorDataset(torch.tensor([f.input_ids for f in features], dtype=torch.long), \n",
-    "                                torch.tensor([f.attention_mask for f in features], dtype=torch.long), \n",
-    "                                torch.tensor([f.token_type_ids for f in features], dtype=torch.long), \n",
-    "                                torch.tensor([f.label for f in features], dtype=torch.long))\n",
-    "\n",
-    "  # test dataset\n",
-    "  test_sampler = RandomSampler(bert_mnli_test_dataset)\n",
-    "  bert_mnli_test_dataloader = DataLoader(bert_mnli_test_dataset, sampler=test_sampler, batch_size=32)\n",
-    "  \n",
-    "  return bert_mnli_train_dataloader, bert_mnli_val_dataloader, bert_mnli_test_dataloader"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "iV-baDhN-U6B"
-   },
-   "outputs": [],
-   "source": [
-    "bert_mnli_train_dataloader, bert_mnli_val_dataloader, bert_mnli_test_dataloader = generate_mnli_bert_dataloaders()\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "yr7eaxkF-djf"
-   },
-   "source": [
-    "### BERT Lightning module!\n",
-    "\n",
-    "Finally, we can create the LightningModule"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "UIXLW8CO-W8w"
-   },
-   "outputs": [],
-   "source": [
-    "from sklearn.metrics import accuracy_score\n",
-    "import torch.nn.functional as F\n",
-    "\n",
-    "\n",
-    "class BertMNLIFinetuner(pl.LightningModule):\n",
-    "\n",
-    "    def __init__(self):\n",
-    "        super(BertMNLIFinetuner, self).__init__()\n",
-    "        \n",
-    "        self.bert = bert\n",
-    "        self.W = nn.Linear(bert.config.hidden_size, 3)\n",
-    "        self.num_classes = 3\n",
-    "\n",
-    "\n",
-    "    def forward(self, input_ids, attention_mask, token_type_ids):\n",
-    "      \n",
-    "        h, _, attn = self.bert(input_ids=input_ids, \n",
-    "                         attention_mask=attention_mask, \n",
-    "                         token_type_ids=token_type_ids)\n",
-    "        \n",
-    "        h_cls = h[:, 0]\n",
-    "        logits = self.W(h_cls)\n",
-    "        return logits, attn\n",
-    "\n",
-    "    def training_step(self, batch, batch_nb):\n",
-    "        # batch\n",
-    "        input_ids, attention_mask, token_type_ids, label = batch\n",
-    "         \n",
-    "        # fwd\n",
-    "        y_hat, attn = self(input_ids, attention_mask, token_type_ids)\n",
-    "        \n",
-    "        # loss\n",
-    "        loss = F.cross_entropy(y_hat, label)\n",
-    "        \n",
-    "        # logs\n",
-    "        tensorboard_logs = {'train_loss': loss}\n",
-    "        return {'loss': loss, 'log': tensorboard_logs}\n",
-    "\n",
-    "    def validation_step(self, batch, batch_nb):\n",
-    "        # batch\n",
-    "        input_ids, attention_mask, token_type_ids, label = batch\n",
-    "         \n",
-    "        # fwd\n",
-    "        y_hat, attn = self(input_ids, attention_mask, token_type_ids)\n",
-    "        \n",
-    "        # loss\n",
-    "        loss = F.cross_entropy(y_hat, label)\n",
-    "        \n",
-    "        # acc\n",
-    "        a, y_hat = torch.max(y_hat, dim=1)\n",
-    "        val_acc = accuracy_score(y_hat.cpu(), label.cpu())\n",
-    "        val_acc = torch.tensor(val_acc)\n",
-    "\n",
-    "        return {'val_loss': loss, 'val_acc': val_acc}\n",
-    "\n",
-    "    def validation_epoch_end(self, outputs):\n",
-    "        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()\n",
-    "        avg_val_acc = torch.stack([x['val_acc'] for x in outputs]).mean()\n",
-    "\n",
-    "        tensorboard_logs = {'val_loss': avg_loss, 'avg_val_acc': avg_val_acc}\n",
-    "        return {'val_loss': avg_loss, 'progress_bar': tensorboard_logs}\n",
-    "\n",
-    "    def test_step(self, batch, batch_nb):\n",
-    "        input_ids, attention_mask, token_type_ids, label = batch\n",
-    "        \n",
-    "        y_hat, attn = self(input_ids, attention_mask, token_type_ids)\n",
-    "        \n",
-    "        a, y_hat = torch.max(y_hat, dim=1)\n",
-    "        test_acc = accuracy_score(y_hat.cpu(), label.cpu())\n",
-    "        \n",
-    "        return {'test_acc': torch.tensor(test_acc)}\n",
-    "\n",
-    "    def test_epoch_end(self, outputs):\n",
-    "\n",
-    "        avg_test_acc = torch.stack([x['test_acc'] for x in outputs]).mean()\n",
-    "\n",
-    "        tensorboard_logs = {'avg_test_acc': avg_test_acc}\n",
-    "        return {'avg_test_acc': avg_test_acc, 'log': tensorboard_logs, 'progress_bar': tensorboard_logs}\n",
-    "    \n",
-    "    def configure_optimizers(self):\n",
-    "        return torch.optim.Adam([p for p in self.parameters() if p.requires_grad], lr=2e-05, eps=1e-08)\n",
-    "\n",
-    "\n",
-    "    def train_dataloader(self):\n",
-    "        return bert_mnli_train_dataloader\n",
-    "\n",
-    "    def val_dataloader(self):\n",
-    "        return bert_mnli_val_dataloader\n",
-    "\n",
-    "    def test_dataloader(self):\n",
-    "        return bert_mnli_test_dataloader"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "FHt8tgwa_DcM"
-   },
-   "source": [
-    "### Trainer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 83,
-     "referenced_widgets": [
-      "86bedd1fc6da4b8fa0deac637628729e",
-      "f444ab7646444b9885cfec41b5a2236e",
-      "fad0b06dc57e4b4599cf43daad7106b8",
-      "c190999c2761453380f816372fcca608",
-      "a5cc9e60aff641dca27f1adf6807e5b3",
-      "0a96cc26343e4bb2ac2f5145be2fbacf",
-      "cce9ed8de0a048679453e53b71523eea",
-      "773fd1b84c364903bc7350630e76a825",
-      "0e149cc766d147aba2c05f8b0f2c69d5",
-      "191f483b5b0346a8a28cac37f29ac2dc",
-      "24b28a7423a541c0b84ba93d70416c1a",
-      "4820f0005e60493793e506e9f0caf5d4",
-      "fce1fc72006f4e84a6497a493cbbfca2",
-      "f220485e332d4c3cbfc3c45ce3b5fdf1",
-      "bf257b8a04b44a389da2e6f4c64379d4",
-      "7efa007fdb2d4e06b5f34c4286fe9a2f"
-     ]
-    },
-    "id": "gMRMJ-Kd-oup",
-    "outputId": "790ab73c-b37d-4bcb-af5f-46b464e46f9b"
-   },
-   "outputs": [],
-   "source": [
-    "bert_finetuner = BertMNLIFinetuner()\n",
-    "\n",
-    "# most basic trainer, uses good defaults (1 gpu)\n",
-    "trainer = pl.Trainer(gpus=1)    \n",
-    "trainer.fit(bert_finetuner) "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "NWvMLBDySQI5"
-   },
-   "source": [
-    "## DQN example\n",
-    "\n",
-    "How to train a Deep Q Network\n",
-    "\n",
-    "Main takeaways:\n",
-    "1. RL has the same flow as previous models we have seen, with a few additions\n",
-    "2. Handle unsupervised learning by using an IterableDataset where the dataset itself is constantly updated during training\n",
-    "3. Each training step carries has the agent taking an action in the environment and storing the experience in the IterableDataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 146
-    },
-    "id": "4ARIT37rDdIZ",
-    "outputId": "37ea5092-0db7-4e73-b507-f4be9bb0ae7e"
-   },
-   "outputs": [],
-   "source": [
-    "!pip install gym"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "nm9BKoF0Sv_O"
-   },
-   "source": [
-    "### DQN Network"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "FXkKtnEhSaIV"
-   },
-   "outputs": [],
-   "source": [
-    "from torch import nn\n",
-    "\n",
-    "class DQN(nn.Module):\n",
-    "    \"\"\"\n",
-    "    Simple MLP network\n",
-    "\n",
-    "    Args:\n",
-    "        obs_size: observation/state size of the environment\n",
-    "        n_actions: number of discrete actions available in the environment\n",
-    "        hidden_size: size of hidden layers\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, obs_size: int, n_actions: int, hidden_size: int = 128):\n",
-    "        super(DQN, self).__init__()\n",
-    "        self.net = nn.Sequential(\n",
-    "            nn.Linear(obs_size, hidden_size),\n",
-    "            nn.ReLU(),\n",
-    "            nn.Linear(hidden_size, n_actions)\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return self.net(x.float())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "c9clSz7xTFZf"
-   },
-   "source": [
-    "### Memory"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "zUmawp0ITE3I"
-   },
-   "outputs": [],
-   "source": [
-    "from collections import namedtuple\n",
-    "\n",
-    "# Named tuple for storing experience steps gathered in training\n",
-    "Experience = namedtuple(\n",
-    "    'Experience', field_names=['state', 'action', 'reward',\n",
-    "                               'done', 'new_state'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Zs7h_Z0LTVoy"
-   },
-   "outputs": [],
-   "source": [
-    "from typing import Tuple\n",
-    "\n",
-    "class ReplayBuffer:\n",
-    "    \"\"\"\n",
-    "    Replay Buffer for storing past experiences allowing the agent to learn from them\n",
-    "\n",
-    "    Args:\n",
-    "        capacity: size of the buffer\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, capacity: int) -> None:\n",
-    "        self.buffer = deque(maxlen=capacity)\n",
-    "\n",
-    "    def __len__(self) -> None:\n",
-    "        return len(self.buffer)\n",
-    "\n",
-    "    def append(self, experience: Experience) -> None:\n",
-    "        \"\"\"\n",
-    "        Add experience to the buffer\n",
-    "\n",
-    "        Args:\n",
-    "            experience: tuple (state, action, reward, done, new_state)\n",
-    "        \"\"\"\n",
-    "        self.buffer.append(experience)\n",
-    "\n",
-    "    def sample(self, batch_size: int) -> Tuple:\n",
-    "        indices = np.random.choice(len(self.buffer), batch_size, replace=False)\n",
-    "        states, actions, rewards, dones, next_states = zip(*[self.buffer[idx] for idx in indices])\n",
-    "\n",
-    "        return (np.array(states), np.array(actions), np.array(rewards, dtype=np.float32),\n",
-    "                np.array(dones, dtype=np.bool), np.array(next_states))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "R5UK2VRvTgS1"
-   },
-   "outputs": [],
-   "source": [
-    "from torch.utils.data.dataset import IterableDataset\n",
-    "\n",
-    "class RLDataset(IterableDataset):\n",
-    "    \"\"\"\n",
-    "    Iterable Dataset containing the ExperienceBuffer\n",
-    "    which will be updated with new experiences during training\n",
-    "\n",
-    "    Args:\n",
-    "        buffer: replay buffer\n",
-    "        sample_size: number of experiences to sample at a time\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, buffer: ReplayBuffer, sample_size: int = 200) -> None:\n",
-    "        self.buffer = buffer\n",
-    "        self.sample_size = sample_size\n",
-    "\n",
-    "    def __iter__(self) -> Tuple:\n",
-    "        states, actions, rewards, dones, new_states = self.buffer.sample(self.sample_size)\n",
-    "        for i in range(len(dones)):\n",
-    "            yield states[i], actions[i], rewards[i], dones[i], new_states[i]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "d7sCGSURTuQK"
-   },
-   "source": [
-    "### Agent"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "dS2RpSHHTvpO"
-   },
-   "outputs": [],
-   "source": [
-    "import gym\n",
-    "import torch\n",
-    "\n",
-    "class Agent:\n",
-    "    \"\"\"\n",
-    "    Base Agent class handeling the interaction with the environment\n",
-    "\n",
-    "    Args:\n",
-    "        env: training environment\n",
-    "        replay_buffer: replay buffer storing experiences\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, env: gym.Env, replay_buffer: ReplayBuffer) -> None:\n",
-    "        self.env = env\n",
-    "        self.replay_buffer = replay_buffer\n",
-    "        self.reset()\n",
-    "        self.state = self.env.reset()\n",
-    "\n",
-    "    def reset(self) -> None:\n",
-    "        \"\"\" Resents the environment and updates the state\"\"\"\n",
-    "        self.state = self.env.reset()\n",
-    "\n",
-    "    def get_action(self, net: nn.Module, epsilon: float, device: str) -> int:\n",
-    "        \"\"\"\n",
-    "        Using the given network, decide what action to carry out\n",
-    "        using an epsilon-greedy policy\n",
-    "\n",
-    "        Args:\n",
-    "            net: DQN network\n",
-    "            epsilon: value to determine likelihood of taking a random action\n",
-    "            device: current device\n",
-    "\n",
-    "        Returns:\n",
-    "            action\n",
-    "        \"\"\"\n",
-    "        if np.random.random() < epsilon:\n",
-    "            action = self.env.action_space.sample()\n",
-    "        else:\n",
-    "            state = torch.tensor([self.state])\n",
-    "\n",
-    "            if device not in ['cpu']:\n",
-    "                state = state.cuda(device)\n",
-    "\n",
-    "            q_values = net(state)\n",
-    "            _, action = torch.max(q_values, dim=1)\n",
-    "            action = int(action.item())\n",
-    "\n",
-    "        return action\n",
-    "\n",
-    "    @torch.no_grad()\n",
-    "    def play_step(self, net: nn.Module, epsilon: float = 0.0, device: str = 'cpu') -> Tuple[float, bool]:\n",
-    "        \"\"\"\n",
-    "        Carries out a single interaction step between the agent and the environment\n",
-    "\n",
-    "        Args:\n",
-    "            net: DQN network\n",
-    "            epsilon: value to determine likelihood of taking a random action\n",
-    "            device: current device\n",
-    "\n",
-    "        Returns:\n",
-    "            reward, done\n",
-    "        \"\"\"\n",
-    "\n",
-    "        action = self.get_action(net, epsilon, device)\n",
-    "\n",
-    "        # do step in the environment\n",
-    "        new_state, reward, done, _ = self.env.step(action)\n",
-    "\n",
-    "        exp = Experience(self.state, action, reward, done, new_state)\n",
-    "\n",
-    "        self.replay_buffer.append(exp)\n",
-    "\n",
-    "        self.state = new_state\n",
-    "        if done:\n",
-    "            self.reset()\n",
-    "        return reward, done"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "IAlT0-75T_Kv"
-   },
-   "source": [
-    "### DQN Lightning Module"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "BS5D7s83T13H"
-   },
-   "outputs": [],
-   "source": [
-    "import pytorch_lightning as pl\n",
-    "import argparse\n",
-    "from collections import OrderedDict, deque\n",
-    "from typing import Tuple, List\n",
-    "import torch.optim as optim\n",
-    "from torch.optim import Optimizer\n",
-    "from torch.utils.data import DataLoader\n",
-    "\n",
-    "class DQNLightning(pl.LightningModule):\n",
-    "    \"\"\" Basic DQN Model \"\"\"\n",
-    "\n",
-    "    def __init__(self, hparams: argparse.Namespace) -> None:\n",
-    "        super().__init__()\n",
-    "        self.hparams = hparams\n",
-    "\n",
-    "        self.env = gym.make(self.hparams.env)\n",
-    "        obs_size = self.env.observation_space.shape[0]\n",
-    "        n_actions = self.env.action_space.n\n",
-    "\n",
-    "        self.net = DQN(obs_size, n_actions)\n",
-    "        self.target_net = DQN(obs_size, n_actions)\n",
-    "\n",
-    "        self.buffer = ReplayBuffer(self.hparams.replay_size)\n",
-    "        self.agent = Agent(self.env, self.buffer)\n",
-    "        self.total_reward = 0\n",
-    "        self.episode_reward = 0\n",
-    "        self.populate(self.hparams.warm_start_steps)\n",
-    "\n",
-    "    def populate(self, steps: int = 1000) -> None:\n",
-    "        \"\"\"\n",
-    "        Carries out several random steps through the environment to initially fill\n",
-    "        up the replay buffer with experiences\n",
-    "\n",
-    "        Args:\n",
-    "            steps: number of random steps to populate the buffer with\n",
-    "        \"\"\"\n",
-    "        for i in range(steps):\n",
-    "            self.agent.play_step(self.net, epsilon=1.0)\n",
-    "\n",
-    "    def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
-    "        \"\"\"\n",
-    "        Passes in a state x through the network and gets the q_values of each action as an output\n",
-    "\n",
-    "        Args:\n",
-    "            x: environment state\n",
-    "\n",
-    "        Returns:\n",
-    "            q values\n",
-    "        \"\"\"\n",
-    "        output = self.net(x)\n",
-    "        return output\n",
-    "\n",
-    "    def dqn_mse_loss(self, batch: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:\n",
-    "        \"\"\"\n",
-    "        Calculates the mse loss using a mini batch from the replay buffer\n",
-    "\n",
-    "        Args:\n",
-    "            batch: current mini batch of replay data\n",
-    "\n",
-    "        Returns:\n",
-    "            loss\n",
-    "        \"\"\"\n",
-    "        states, actions, rewards, dones, next_states = batch\n",
-    "\n",
-    "        state_action_values = self.net(states).gather(1, actions.unsqueeze(-1)).squeeze(-1)\n",
-    "\n",
-    "        with torch.no_grad():\n",
-    "            next_state_values = self.target_net(next_states).max(1)[0]\n",
-    "            next_state_values[dones] = 0.0\n",
-    "            next_state_values = next_state_values.detach()\n",
-    "\n",
-    "        expected_state_action_values = next_state_values * self.hparams.gamma + rewards\n",
-    "\n",
-    "        return nn.MSELoss()(state_action_values, expected_state_action_values)\n",
-    "\n",
-    "    def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], nb_batch) -> OrderedDict:\n",
-    "        \"\"\"\n",
-    "        Carries out a single step through the environment to update the replay buffer.\n",
-    "        Then calculates loss based on the minibatch recieved\n",
-    "\n",
-    "        Args:\n",
-    "            batch: current mini batch of replay data\n",
-    "            nb_batch: batch number\n",
-    "\n",
-    "        Returns:\n",
-    "            Training loss and log metrics\n",
-    "        \"\"\"\n",
-    "        device = self.get_device(batch)\n",
-    "        epsilon = max(self.hparams.eps_end, self.hparams.eps_start -\n",
-    "                      self.global_step + 1 / self.hparams.eps_last_frame)\n",
-    "\n",
-    "        # step through environment with agent\n",
-    "        reward, done = self.agent.play_step(self.net, epsilon, device)\n",
-    "        self.episode_reward += reward\n",
-    "\n",
-    "        # calculates training loss\n",
-    "        loss = self.dqn_mse_loss(batch)\n",
-    "\n",
-    "        if self.trainer.use_dp or self.trainer.use_ddp2:\n",
-    "            loss = loss.unsqueeze(0)\n",
-    "\n",
-    "        if done:\n",
-    "            self.total_reward = self.episode_reward\n",
-    "            self.episode_reward = 0\n",
-    "\n",
-    "        # Soft update of target network\n",
-    "        if self.global_step % self.hparams.sync_rate == 0:\n",
-    "            self.target_net.load_state_dict(self.net.state_dict())\n",
-    "\n",
-    "        log = {'total_reward': torch.tensor(self.total_reward).to(device),\n",
-    "               'reward': torch.tensor(reward).to(device),\n",
-    "               'train_loss': loss\n",
-    "               }\n",
-    "        status = {'steps': torch.tensor(self.global_step).to(device),\n",
-    "                  'total_reward': torch.tensor(self.total_reward).to(device)\n",
-    "                  }\n",
-    "\n",
-    "        \n",
-    "\n",
-    "        return OrderedDict({'loss': loss, 'log': log, 'progress_bar': status})\n",
-    "\n",
-    "    def configure_optimizers(self) -> List[Optimizer]:\n",
-    "        \"\"\" Initialize Adam optimizer\"\"\"\n",
-    "        optimizer = optim.Adam(self.net.parameters(), lr=self.hparams.lr)\n",
-    "        return [optimizer]\n",
-    "\n",
-    "    def __dataloader(self) -> DataLoader:\n",
-    "        \"\"\"Initialize the Replay Buffer dataset used for retrieving experiences\"\"\"\n",
-    "        dataset = RLDataset(self.buffer, self.hparams.episode_length)\n",
-    "        dataloader = DataLoader(dataset=dataset,\n",
-    "                                batch_size=self.hparams.batch_size,\n",
-    "                                )\n",
-    "        return dataloader\n",
-    "\n",
-    "    def train_dataloader(self) -> DataLoader:\n",
-    "        \"\"\"Get train loader\"\"\"\n",
-    "        return self.__dataloader()\n",
-    "\n",
-    "    def get_device(self, batch) -> str:\n",
-    "        \"\"\"Retrieve device currently being used by minibatch\"\"\"\n",
-    "        return batch[0].device.index if self.on_gpu else 'cpu'"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "JST5AN-8VFLY"
-   },
-   "source": [
-    "### Trainer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "bQEvD7gFUSaN"
-   },
-   "outputs": [],
-   "source": [
-    "def main(hparams) -> None:\n",
-    "    model = DQNLightning(hparams)\n",
-    "\n",
-    "    trainer = pl.Trainer(\n",
-    "        gpus=1,\n",
-    "        distributed_backend='dp',\n",
-    "        max_epochs=500,\n",
-    "        early_stop_callback=False,\n",
-    "        val_check_interval=100\n",
-    "    )\n",
-    "\n",
-    "    trainer.fit(model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 380,
-     "referenced_widgets": [
-      "e9a6bf4eda3244c6bb17216715f36525",
-      "0922c5b2de554b4fa28dd531603f2709",
-      "c293fc4171b0438595bc9a49fbb250cf",
-      "819c83bf0bbd472ba417c31e957718c7",
-      "c24384195a074989a86217b2edc411cb",
-      "b3817e0ba30f449585f7641b4d3061bb",
-      "8591bd2136ab4bb7831579609b43ee9c",
-      "5a761ed145474ec7a30006bc584b26be"
-     ]
-    },
-    "id": "-iV9PQC9VOHK",
-    "outputId": "2fd70097-c913-4d68-e80a-d240532edd19"
-   },
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import argparse\n",
-    "\n",
-    "torch.manual_seed(0)\n",
-    "np.random.seed(0)\n",
-    "\n",
-    "parser = argparse.ArgumentParser()\n",
-    "parser.add_argument(\"--batch_size\", type=int, default=16, help=\"size of the batches\")\n",
-    "parser.add_argument(\"--lr\", type=float, default=1e-2, help=\"learning rate\")\n",
-    "parser.add_argument(\"--env\", type=str, default=\"CartPole-v0\", help=\"gym environment tag\")\n",
-    "parser.add_argument(\"--gamma\", type=float, default=0.99, help=\"discount factor\")\n",
-    "parser.add_argument(\"--sync_rate\", type=int, default=10,\n",
-    "                    help=\"how many frames do we update the target network\")\n",
-    "parser.add_argument(\"--replay_size\", type=int, default=1000,\n",
-    "                    help=\"capacity of the replay buffer\")\n",
-    "parser.add_argument(\"--warm_start_size\", type=int, default=1000,\n",
-    "                    help=\"how many samples do we use to fill our buffer at the start of training\")\n",
-    "parser.add_argument(\"--eps_last_frame\", type=int, default=1000,\n",
-    "                    help=\"what frame should epsilon stop decaying\")\n",
-    "parser.add_argument(\"--eps_start\", type=float, default=1.0, help=\"starting value of epsilon\")\n",
-    "parser.add_argument(\"--eps_end\", type=float, default=0.01, help=\"final value of epsilon\")\n",
-    "parser.add_argument(\"--episode_length\", type=int, default=200, help=\"max length of an episode\")\n",
-    "parser.add_argument(\"--max_episode_reward\", type=int, default=200,\n",
-    "                    help=\"max episode reward in the environment\")\n",
-    "parser.add_argument(\"--warm_start_steps\", type=int, default=1000,\n",
-    "                    help=\"max episode reward in the environment\")\n",
-    "\n",
-    "args, _ = parser.parse_known_args()\n",
-    "\n",
-    "main(args)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Start tensorboard.\n",
-    "%load_ext tensorboard\n",
-    "%tensorboard --logdir lightning_logs/"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "collapsed_sections": [],
-   "name": "Lightning-demo.ipynb",
-   "provenance": [],
-   "toc_visible": true
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  },
-  "widgets": {
-   "application/vnd.jupyter.widget-state+json": {
-    "00ae53beaa9341f4826b1bdc0a6f88e0": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "05f01b8813374534a4c58ac65fe2b390": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "08b6d9269e514d228e7e94fe0299a2c5": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_67728556b4c9432b877d54a081657663",
-      "placeholder": "​",
-      "style": "IPY_MODEL_0cf9a61c88af45b6a6ef72640f93cbfd",
-      "value": " 1654784/? [00:01&lt;00:00, 1304326.05it/s]"
-     }
-    },
-    "0922c5b2de554b4fa28dd531603f2709": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "0a96cc26343e4bb2ac2f5145be2fbacf": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "0c3473a16a5e4c46a6c7515e610bca7f": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "0cf9a61c88af45b6a6ef72640f93cbfd": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "0e149cc766d147aba2c05f8b0f2c69d5": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_24b28a7423a541c0b84ba93d70416c1a",
-       "IPY_MODEL_4820f0005e60493793e506e9f0caf5d4"
-      ],
-      "layout": "IPY_MODEL_191f483b5b0346a8a28cac37f29ac2dc"
-     }
-    },
-    "0f688614251d49589f320f2b2cb55344": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "0fad6468e3c849b380e34f674e074219": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_d3b107acd1b1401cabe3090724e12e86",
-       "IPY_MODEL_b3563100dd1b4a4abe14ab7193649064"
-      ],
-      "layout": "IPY_MODEL_10a88a05740b45d4a6ea5873d4a7151a"
-     }
-    },
-    "10a88a05740b45d4a6ea5873d4a7151a": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "118c0b8da0df4ff68a90a3d500f1d1b8": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "11db4a94a4534fc2b503aad28be631be": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "135883097f0e428c963ae0ad320dfabd": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "15bb223836764207a5ac15616a41ddb7": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_296453e43f7344de8a9b5c6bc970ab1e",
-       "IPY_MODEL_cd86997da08649d7999ade2d0e7cea96"
-      ],
-      "layout": "IPY_MODEL_46f7df7035d44bd099f60ad23f836f8a"
-     }
-    },
-    "171db7c8fa1e4f11aaff71b9f5879d58": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_85741a7765a147c4a8d69872850cb072",
-      "placeholder": "​",
-      "style": "IPY_MODEL_135883097f0e428c963ae0ad320dfabd",
-      "value": " 32768/? [00:01&lt;00:00, 24569.28it/s]"
-     }
-    },
-    "17f0e360e85f48d9a17b84c9b7f6c9f0": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "191f483b5b0346a8a28cac37f29ac2dc": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "19c7460c565d494abbb8b9731a34294d": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "1a7680c6279d4985bd69188dd72b11d5": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "1dabf5740f4d44d68d06629f77b001e3": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "1f3364ab59b541268fabcb3f9fb5c64c": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "1f9c48164702427fb3aca2a26b2651e5": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "",
-      "description": "Validation sanity check: ",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_a1e2c38bb40642168cc9d44abf645a54",
-      "max": 5,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_61c71d4f1c2848b1813aebc0b2db5e25",
-      "value": 5
-     }
-    },
-    "23fd97d95fae4f42bd21906f67115f8b": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "240764252e7c4f5ca39db14fd1c724ed": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "24b28a7423a541c0b84ba93d70416c1a": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "danger",
-      "description": "Epoch 1:   0%",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_f220485e332d4c3cbfc3c45ce3b5fdf1",
-      "max": 12273,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_fce1fc72006f4e84a6497a493cbbfca2",
-      "value": 50
-     }
-    },
-    "254ddfa7c23d4b6f828d515dbab38978": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "294d8142a4aa48aa8261b0b8155ef97f": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "296453e43f7344de8a9b5c6bc970ab1e": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "",
-      "description": "Validating:  96%",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_bef041a9f5a942f68b4a8488a371d3da",
-      "max": 1875,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_7d15fc81537a449cb6b6afd7ccc65dac",
-      "value": 1800
-     }
-    },
-    "29650c4a829b44ed9e1526b1dc5d2b83": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "",
-      "description": "Validating: ",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_a99b7813bf88496c875a818afe3b170a",
-      "max": 1875,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_384a36423d154f2abcddb5094afeced3",
-      "value": 1875
-     }
-    },
-    "29f35103a6e94af09c8ac9cdb2cca89c": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "2e20c741cf8a401cb90e8e230a23026a": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "32e47e93509043439580cd5f58dc7726": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "3397549a0695432990f1d3d5390941e7": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_ae52e3d810aa4bc5965559ed2ba2b78a",
-       "IPY_MODEL_08b6d9269e514d228e7e94fe0299a2c5"
-      ],
-      "layout": "IPY_MODEL_515ef7d03ef2447e9643210b029b930e"
-     }
-    },
-    "352d7dae131b407cb6e0238315c1b1a0": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "384a36423d154f2abcddb5094afeced3": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "386ff59e3694480394253f1c24ff8e84": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "39422514a4a04a9ba290285dc586ea9f": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "3a521cb700f247fd8cd345b158697f2f": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "success",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_866f880d850a4e689a1c99723f0366db",
-      "max": 1,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_bc4dfb14c9d14499ae72d2a30fc6bc2d",
-      "value": 1
-     }
-    },
-    "3c99401bde8641c19978c11c9abb906a": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_f778d9ef70ca4f5898c423109cf82ed2",
-      "placeholder": "​",
-      "style": "IPY_MODEL_fd8ec919352046dd84057e9763bb235a",
-      "value": " 350/? [00:01&lt;00:00, 348.68it/s]"
-     }
-    },
-    "3d2e43ae9f924fbd8463ce72b44200f3": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_3a521cb700f247fd8cd345b158697f2f",
-       "IPY_MODEL_8a40fcad728841c7ab2fd15d2c40ee5f"
-      ],
-      "layout": "IPY_MODEL_7c03e0a2247442ad9c11569e443de4bb"
-     }
-    },
-    "40ce71ff339849748486ebbc73474cbe": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "420e8d65e9584973a8004e8398cf430c": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "42e787b78000472eab434fb795197a86": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "42ede89dbd194eb6a603ccd7d4b96aae": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "438d19fb8e8243ebbc658f4b1d27df99": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "44ca9ee5c356458680a5d20c6a891c91": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "44e2c572ab1641a29156ad4ee8884e12": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_845415af79634da5a64b9f368280c0e3",
-      "placeholder": "​",
-      "style": "IPY_MODEL_32e47e93509043439580cd5f58dc7726",
-      "value": " 9920512/? [00:20&lt;00:00, 998025.61it/s]"
-     }
-    },
-    "46f7df7035d44bd099f60ad23f836f8a": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "47df0bc1b3d14bb7b673e0591daa4e5f": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "danger",
-      "description": "Epoch 1:  93%",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_7662324b3b924f8f9649dc409fb0d349",
-      "max": 1875,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_e068e2b1c68c48a784c19fc716c043a3",
-      "value": 1750
-     }
-    },
-    "4820f0005e60493793e506e9f0caf5d4": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_7efa007fdb2d4e06b5f34c4286fe9a2f",
-      "placeholder": "​",
-      "style": "IPY_MODEL_bf257b8a04b44a389da2e6f4c64379d4",
-      "value": " 50/12273 [00:11&lt;48:21,  4.21it/s, loss=1.083, v_num=6]"
-     }
-    },
-    "498a50387a0742a88356a7ee9920bf7a": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "4b7021f73f6b4e5193454128ccf323d7": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "danger",
-      "description": "Epoch 2:  81%",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_d9540ab5d2394b77a65f48b501acdc18",
-      "max": 1875,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_b5f184fbcba740999b205e34e23455d6",
-      "value": 1520
-     }
-    },
-    "4eaea330bc8e414fbf2f0e2b21af8b08": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "4ffe9fb35ca44358b0177723f73a35d8": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_fc3f14c4e83048aa9d6fe9963f95bf7a",
-       "IPY_MODEL_171db7c8fa1e4f11aaff71b9f5879d58"
-      ],
-      "layout": "IPY_MODEL_ba3fe1aba1b04a1fbaab268dfd3d0166"
-     }
-    },
-    "5012438370764b4db215d545e9414c94": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "515ef7d03ef2447e9643210b029b930e": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "51b1111f5fe24042b38af809285e1b16": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "53b2a85381b1460d9f446390c79bfc08": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "5484eef7b6f247d68a89f86965b0940f": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_ad849800b2124195b92f3bf9dfc7681b",
-       "IPY_MODEL_6ae5b2f9195847b5a0aa9991e14aa397"
-      ],
-      "layout": "IPY_MODEL_0c3473a16a5e4c46a6c7515e610bca7f"
-     }
-    },
-    "555443a6fa564d10a3a7901cf15a79a3": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "56ef38eff92143bcaf68b22c8dae7f98": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "59f02fe7f9f2433bb25f5b292c213f50": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "5a761ed145474ec7a30006bc584b26be": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "5d3c506d3f4444d8a6b7024cd11de2cd": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "5f119b90386c499ea9caf987fecf6c06": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_47df0bc1b3d14bb7b673e0591daa4e5f",
-       "IPY_MODEL_87e118f890dc42319e723331e1306787"
-      ],
-      "layout": "IPY_MODEL_6d391353197443a694f6c75147ca96df"
-     }
-    },
-    "61c71d4f1c2848b1813aebc0b2db5e25": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "620f45256d504f0188f40c61e23e1355": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_e64a723bcf474d6699d78ec05462f995",
-      "placeholder": "​",
-      "style": "IPY_MODEL_c3587b40d9f942e98b708ff0b5fc5301",
-      "value": " 9920512/? [00:20&lt;00:00, 2030694.65it/s]"
-     }
-    },
-    "632b9d0d9ffa4d479deb70f6fafb92ab": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_1a7680c6279d4985bd69188dd72b11d5",
-      "placeholder": "​",
-      "style": "IPY_MODEL_42e787b78000472eab434fb795197a86",
-      "value": " 0/28881 [00:00&lt;?, ?it/s]"
-     }
-    },
-    "67728556b4c9432b877d54a081657663": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "69fa0a853cf84b9482043e12881c849f": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_294d8142a4aa48aa8261b0b8155ef97f",
-      "placeholder": "​",
-      "style": "IPY_MODEL_51b1111f5fe24042b38af809285e1b16",
-      "value": " 8192/? [00:00&lt;00:00, 33325.29it/s]"
-     }
-    },
-    "6aa5e292e2094c239e7418994a31ff51": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_1f9c48164702427fb3aca2a26b2651e5",
-       "IPY_MODEL_d8bd5c9b233b41008109d14cffc89aaa"
-      ],
-      "layout": "IPY_MODEL_555443a6fa564d10a3a7901cf15a79a3"
-     }
-    },
-    "6aafaca3c8824e2fa267f4a68d5d2ca3": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "success",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_59f02fe7f9f2433bb25f5b292c213f50",
-      "max": 1,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_53b2a85381b1460d9f446390c79bfc08",
-      "value": 1
-     }
-    },
-    "6ae5b2f9195847b5a0aa9991e14aa397": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_1f3364ab59b541268fabcb3f9fb5c64c",
-      "placeholder": "​",
-      "style": "IPY_MODEL_70e48d7d8e8a411a90642926db4aada8",
-      "value": " 213k/213k [00:00&lt;00:00, 746kB/s]"
-     }
-    },
-    "6b3a598e7d01407aa5850b5a6620e7f0": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_e7a06a13ea11427ea3866cec3a55b644",
-       "IPY_MODEL_620f45256d504f0188f40c61e23e1355"
-      ],
-      "layout": "IPY_MODEL_e23c0d6c117246b2a0a6681008748917"
-     }
-    },
-    "6d391353197443a694f6c75147ca96df": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "6f55aa11acb14afdb2ac0a1052be1bb6": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_420e8d65e9584973a8004e8398cf430c",
-      "placeholder": "​",
-      "style": "IPY_MODEL_23fd97d95fae4f42bd21906f67115f8b",
-      "value": " 1520/1875 [00:05&lt;00:01, 254.43it/s, loss=1.504, v_num=0]"
-     }
-    },
-    "70e48d7d8e8a411a90642926db4aada8": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "7633820adf9a4757ae73b472e43031d6": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_11db4a94a4534fc2b503aad28be631be",
-      "placeholder": "​",
-      "style": "IPY_MODEL_fc262db2a53948488092a77209081319",
-      "value": " 1400/3750 [00:08&lt;00:13, 171.01it/s, loss=1.188, test_loss=1.17, v_num=2]"
-     }
-    },
-    "7662324b3b924f8f9649dc409fb0d349": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "76c916d634c644a4a0d8f12e183822fd": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "773fd1b84c364903bc7350630e76a825": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "7c03e0a2247442ad9c11569e443de4bb": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "7d15fc81537a449cb6b6afd7ccc65dac": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "7e8cf26303ed4975b239fd43184a1dc6": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "",
-      "description": "Validation sanity check: ",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_5d3c506d3f4444d8a6b7024cd11de2cd",
-      "max": 5,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_fdf5c4a49602423184f6d94cd814177e",
-      "value": 5
-     }
-    },
-    "7ea8ad4e10bb465aa2b6708655a2793f": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "7ee81979301c447bb13ff9fff5153e0f": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "7efa007fdb2d4e06b5f34c4286fe9a2f": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "819c83bf0bbd472ba417c31e957718c7": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_5a761ed145474ec7a30006bc584b26be",
-      "placeholder": "​",
-      "style": "IPY_MODEL_8591bd2136ab4bb7831579609b43ee9c",
-      "value": " 13/? [00:00&lt;00:00, 115.19it/s, loss=69.045, steps=6499, total_reward=200, v_num=0]"
-     }
-    },
-    "845415af79634da5a64b9f368280c0e3": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "85741a7765a147c4a8d69872850cb072": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "8591bd2136ab4bb7831579609b43ee9c": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "86482894cddd4956ae2fc3d9edd8ef9a": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "866f880d850a4e689a1c99723f0366db": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "86bedd1fc6da4b8fa0deac637628729e": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_fad0b06dc57e4b4599cf43daad7106b8",
-       "IPY_MODEL_c190999c2761453380f816372fcca608"
-      ],
-      "layout": "IPY_MODEL_f444ab7646444b9885cfec41b5a2236e"
-     }
-    },
-    "8734c1b798ff4ba0bf77dca4f3de9cbf": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "87e118f890dc42319e723331e1306787": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_dbb9fd5429f5416ab6a4f78f0c72867c",
-      "placeholder": "​",
-      "style": "IPY_MODEL_afc85a52a5d04653ae9e7168b180ff98",
-      "value": " 1750/1875 [00:31&lt;00:02, 55.10it/s, d_loss=0.36, g_loss=1.65, loss=0.947, v_num=5]"
-     }
-    },
-    "88c181cd21a94ec9a43df9754c1986c9": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_438d19fb8e8243ebbc658f4b1d27df99",
-      "placeholder": "​",
-      "style": "IPY_MODEL_86482894cddd4956ae2fc3d9edd8ef9a",
-      "value": " 436M/436M [01:31&lt;00:00, 4.77MB/s]"
-     }
-    },
-    "892246fdf6bb476abb35ec321ddf86e8": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "success",
-      "description": "Downloading: 100%",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_498a50387a0742a88356a7ee9920bf7a",
-      "max": 435779157,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_e4098b0091124fef8ba342783a82cc6e",
-      "value": 435779157
-     }
-    },
-    "8a40fcad728841c7ab2fd15d2c40ee5f": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_56ef38eff92143bcaf68b22c8dae7f98",
-      "placeholder": "​",
-      "style": "IPY_MODEL_e7a864f4dc0f485eb045b778e981fc01",
-      "value": " 1654784/? [00:01&lt;00:00, 1615285.92it/s]"
-     }
-    },
-    "8aab627e715a44ada2af81b74bece257": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "8eb2086a01cf41429a5f4adff5f2359b": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "danger",
-      "description": "Epoch 2:  99%",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_352d7dae131b407cb6e0238315c1b1a0",
-      "max": 3750,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_d01088cc378044cba4879032d74a852e",
-      "value": 3700
-     }
-    },
-    "91a6de2063cc48b28021ef29feab7f69": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "992f545bb6f1489493d89a59d046f63f": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "info",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_a700c003887b4d2ba134fbfcf4823cb5",
-      "max": 1,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_254ddfa7c23d4b6f828d515dbab38978",
-      "value": 1
-     }
-    },
-    "9a9ebf052d914a8881882da8d2fa9cd8": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "9b6445338a69425889a8901c192d5144": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "a1e2c38bb40642168cc9d44abf645a54": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "a252ac32033b4e39b87a6c91bd21b5ae": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_f1db9d62eee44e61bb8ac26c16b3b601",
-       "IPY_MODEL_69fa0a853cf84b9482043e12881c849f"
-      ],
-      "layout": "IPY_MODEL_40ce71ff339849748486ebbc73474cbe"
-     }
-    },
-    "a2990a67f92c4047b95876aae91e3de0": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_7e8cf26303ed4975b239fd43184a1dc6",
-       "IPY_MODEL_d7406a8b15f9439fba19ec4dab086c61"
-      ],
-      "layout": "IPY_MODEL_44ca9ee5c356458680a5d20c6a891c91"
-     }
-    },
-    "a38242d3231442e2a259067d6a1355c2": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_a56deb884719491090a4146e72be3868",
-       "IPY_MODEL_7633820adf9a4757ae73b472e43031d6"
-      ],
-      "layout": "IPY_MODEL_9a9ebf052d914a8881882da8d2fa9cd8"
-     }
-    },
-    "a4b49709f7464ce491324e8aa636c152": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "a56deb884719491090a4146e72be3868": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "danger",
-      "description": "Epoch 1:  37%",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_8aab627e715a44ada2af81b74bece257",
-      "max": 3750,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_b7a073dfdeaf48fc9f3e6352b0ea2ba7",
-      "value": 1400
-     }
-    },
-    "a5cc9e60aff641dca27f1adf6807e5b3": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "a700c003887b4d2ba134fbfcf4823cb5": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "a7a94847786244dd9a5cb9718957143a": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_ee0a78c01b63443f9e51470a1b1e79a4",
-       "IPY_MODEL_632b9d0d9ffa4d479deb70f6fafb92ab"
-      ],
-      "layout": "IPY_MODEL_8734c1b798ff4ba0bf77dca4f3de9cbf"
-     }
-    },
-    "a7bcd18049d8493b9d3d9f17d86f0429": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "success",
-      "description": "Testing: ",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_f261b8aab86b4d6e94984bf658c1b74d",
-      "max": 313,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_cd84335fb7234f3aa54dafe045614e56",
-      "value": 313
-     }
-    },
-    "a9413692ae5040e6ae3c2a446dbe297c": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "a99b7813bf88496c875a818afe3b170a": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "abeb0a773f3542c39ff724ae0674b74e": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "ad849800b2124195b92f3bf9dfc7681b": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "success",
-      "description": "Downloading: 100%",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_386ff59e3694480394253f1c24ff8e84",
-      "max": 213450,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_240764252e7c4f5ca39db14fd1c724ed",
-      "value": 213450
-     }
-    },
-    "ae52e3d810aa4bc5965559ed2ba2b78a": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "success",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_ea162090fc954f0198a1d63507dfff9b",
-      "max": 1,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_7ee81979301c447bb13ff9fff5153e0f",
-      "value": 1
-     }
-    },
-    "afc85a52a5d04653ae9e7168b180ff98": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "b3563100dd1b4a4abe14ab7193649064": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_f23f02d00d424574afa29311b8d0906e",
-      "placeholder": "​",
-      "style": "IPY_MODEL_e6e15d5c14134be0b4cf86fdecfef687",
-      "value": " 361/361 [00:16&lt;00:00, 22.2B/s]"
-     }
-    },
-    "b3817e0ba30f449585f7641b4d3061bb": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "b5f184fbcba740999b205e34e23455d6": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "b73a326ada4d4a859e3c2c39abf5530d": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_29650c4a829b44ed9e1526b1dc5d2b83",
-       "IPY_MODEL_df6521155d05459882601ba8c84f3dce"
-      ],
-      "layout": "IPY_MODEL_cd942318db094680821f0d9902941977"
-     }
-    },
-    "b7a073dfdeaf48fc9f3e6352b0ea2ba7": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "ba3fe1aba1b04a1fbaab268dfd3d0166": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "bb305bb378774c1586a3196eb3babd29": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_992f545bb6f1489493d89a59d046f63f",
-       "IPY_MODEL_44e2c572ab1641a29156ad4ee8884e12"
-      ],
-      "layout": "IPY_MODEL_ff6eead2826e4113abf7ab3a8cb31b0f"
-     }
-    },
-    "bbef89e4fd9d4cf8ae4c8fcab9bc665d": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_39422514a4a04a9ba290285dc586ea9f",
-      "placeholder": "​",
-      "style": "IPY_MODEL_91a6de2063cc48b28021ef29feab7f69",
-      "value": " 3700/3750 [00:11&lt;00:00, 310.13it/s, loss=1.160, v_num=0]"
-     }
-    },
-    "bc4dfb14c9d14499ae72d2a30fc6bc2d": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "bcf69c2a0b694e0498beadb6f4509395": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_a7bcd18049d8493b9d3d9f17d86f0429",
-       "IPY_MODEL_3c99401bde8641c19978c11c9abb906a"
-      ],
-      "layout": "IPY_MODEL_2e20c741cf8a401cb90e8e230a23026a"
-     }
-    },
-    "bef041a9f5a942f68b4a8488a371d3da": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "bf257b8a04b44a389da2e6f4c64379d4": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "c190999c2761453380f816372fcca608": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_773fd1b84c364903bc7350630e76a825",
-      "placeholder": "​",
-      "style": "IPY_MODEL_cce9ed8de0a048679453e53b71523eea",
-      "value": " 50/? [00:00&lt;00:00, 286.31it/s]"
-     }
-    },
-    "c24384195a074989a86217b2edc411cb": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "c293fc4171b0438595bc9a49fbb250cf": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "success",
-      "description": "Epoch 500: ",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_b3817e0ba30f449585f7641b4d3061bb",
-      "max": 1,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_c24384195a074989a86217b2edc411cb",
-      "value": 1
-     }
-    },
-    "c3587b40d9f942e98b708ff0b5fc5301": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "c4200c1f957a4179af51245a797c8921": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_0f688614251d49589f320f2b2cb55344",
-      "placeholder": "​",
-      "style": "IPY_MODEL_1dabf5740f4d44d68d06629f77b001e3",
-      "value": " 8192/? [00:00&lt;00:00, 18498.86it/s]"
-     }
-    },
-    "c93f037dc6044d858ae1862d5b29f6f0": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_4b7021f73f6b4e5193454128ccf323d7",
-       "IPY_MODEL_6f55aa11acb14afdb2ac0a1052be1bb6"
-      ],
-      "layout": "IPY_MODEL_00ae53beaa9341f4826b1bdc0a6f88e0"
-     }
-    },
-    "ca4cd1659d73446e964f9ab36d92e3a0": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "cce9ed8de0a048679453e53b71523eea": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "cd84335fb7234f3aa54dafe045614e56": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "cd86997da08649d7999ade2d0e7cea96": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_19c7460c565d494abbb8b9731a34294d",
-      "placeholder": "​",
-      "style": "IPY_MODEL_e10c94b1fdf84a9186ab7d87fd83f87f",
-      "value": " 1800/1875 [00:17&lt;00:00, 356.02it/s]"
-     }
-    },
-    "cd942318db094680821f0d9902941977": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "d01088cc378044cba4879032d74a852e": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "d0590d65433c4478af6a0762421f9f7a": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "d3b107acd1b1401cabe3090724e12e86": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "success",
-      "description": "Downloading: 100%",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_29f35103a6e94af09c8ac9cdb2cca89c",
-      "max": 361,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_17f0e360e85f48d9a17b84c9b7f6c9f0",
-      "value": 361
-     }
-    },
-    "d7406a8b15f9439fba19ec4dab086c61": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_ea13174e5b894e93b3c59d7e599de5a9",
-      "placeholder": "​",
-      "style": "IPY_MODEL_42ede89dbd194eb6a603ccd7d4b96aae",
-      "value": " 50/? [00:00&lt;00:00, 254.04it/s]"
-     }
-    },
-    "d8bd5c9b233b41008109d14cffc89aaa": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_76c916d634c644a4a0d8f12e183822fd",
-      "placeholder": "​",
-      "style": "IPY_MODEL_d0590d65433c4478af6a0762421f9f7a",
-      "value": " 50/? [00:00&lt;00:00, 576.16it/s]"
-     }
-    },
-    "d8db4d3709f34c869dedbc066e60501e": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "d9540ab5d2394b77a65f48b501acdc18": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "dbb9fd5429f5416ab6a4f78f0c72867c": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "de325f4002a945b4a2a15086c2a77816": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_6aafaca3c8824e2fa267f4a68d5d2ca3",
-       "IPY_MODEL_c4200c1f957a4179af51245a797c8921"
-      ],
-      "layout": "IPY_MODEL_5012438370764b4db215d545e9414c94"
-     }
-    },
-    "df6521155d05459882601ba8c84f3dce": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HTMLModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HTMLModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HTMLView",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_7ea8ad4e10bb465aa2b6708655a2793f",
-      "placeholder": "​",
-      "style": "IPY_MODEL_f4a052d2223a4d4fa95ed52f94ad465d",
-      "value": " 1900/? [00:05&lt;00:00, 344.14it/s]"
-     }
-    },
-    "e068e2b1c68c48a784c19fc716c043a3": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "e10c94b1fdf84a9186ab7d87fd83f87f": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "e23c0d6c117246b2a0a6681008748917": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "e4098b0091124fef8ba342783a82cc6e": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "e64a723bcf474d6699d78ec05462f995": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "e6e15d5c14134be0b4cf86fdecfef687": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "e7a06a13ea11427ea3866cec3a55b644": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "info",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_ee3d759a4e4442288599eacfd1347c8d",
-      "max": 1,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_05f01b8813374534a4c58ac65fe2b390",
-      "value": 1
-     }
-    },
-    "e7a864f4dc0f485eb045b778e981fc01": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "e918a6de59b64bd590e4f1233bbc078a": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_892246fdf6bb476abb35ec321ddf86e8",
-       "IPY_MODEL_88c181cd21a94ec9a43df9754c1986c9"
-      ],
-      "layout": "IPY_MODEL_abeb0a773f3542c39ff724ae0674b74e"
-     }
-    },
-    "e9a6bf4eda3244c6bb17216715f36525": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_c293fc4171b0438595bc9a49fbb250cf",
-       "IPY_MODEL_819c83bf0bbd472ba417c31e957718c7"
-      ],
-      "layout": "IPY_MODEL_0922c5b2de554b4fa28dd531603f2709"
-     }
-    },
-    "ea13174e5b894e93b3c59d7e599de5a9": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "ea162090fc954f0198a1d63507dfff9b": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "ee0a78c01b63443f9e51470a1b1e79a4": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "info",
-      "description": "  0%",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_ca4cd1659d73446e964f9ab36d92e3a0",
-      "max": 1,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_a9413692ae5040e6ae3c2a446dbe297c",
-      "value": 0
-     }
-    },
-    "ee3d759a4e4442288599eacfd1347c8d": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "ee908316d227495381e8cf7dcf5526f1": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "HBoxModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "HBoxModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "HBoxView",
-      "box_style": "",
-      "children": [
-       "IPY_MODEL_8eb2086a01cf41429a5f4adff5f2359b",
-       "IPY_MODEL_bbef89e4fd9d4cf8ae4c8fcab9bc665d"
-      ],
-      "layout": "IPY_MODEL_a4b49709f7464ce491324e8aa636c152"
-     }
-    },
-    "f1db9d62eee44e61bb8ac26c16b3b601": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "success",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_9b6445338a69425889a8901c192d5144",
-      "max": 1,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_d8db4d3709f34c869dedbc066e60501e",
-      "value": 1
-     }
-    },
-    "f220485e332d4c3cbfc3c45ce3b5fdf1": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "f23f02d00d424574afa29311b8d0906e": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "f261b8aab86b4d6e94984bf658c1b74d": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": "2",
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "f444ab7646444b9885cfec41b5a2236e": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": "inline-flex",
-      "flex": null,
-      "flex_flow": "row wrap",
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": "100%"
-     }
-    },
-    "f4a052d2223a4d4fa95ed52f94ad465d": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "f778d9ef70ca4f5898c423109cf82ed2": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    },
-    "fad0b06dc57e4b4599cf43daad7106b8": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "",
-      "description": "Validation sanity check: ",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_0a96cc26343e4bb2ac2f5145be2fbacf",
-      "max": 5,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_a5cc9e60aff641dca27f1adf6807e5b3",
-      "value": 5
-     }
-    },
-    "fc262db2a53948488092a77209081319": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "fc3f14c4e83048aa9d6fe9963f95bf7a": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "FloatProgressModel",
-     "state": {
-      "_dom_classes": [],
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "FloatProgressModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/controls",
-      "_view_module_version": "1.5.0",
-      "_view_name": "ProgressView",
-      "bar_style": "success",
-      "description": "",
-      "description_tooltip": null,
-      "layout": "IPY_MODEL_118c0b8da0df4ff68a90a3d500f1d1b8",
-      "max": 1,
-      "min": 0,
-      "orientation": "horizontal",
-      "style": "IPY_MODEL_4eaea330bc8e414fbf2f0e2b21af8b08",
-      "value": 1
-     }
-    },
-    "fce1fc72006f4e84a6497a493cbbfca2": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "fd8ec919352046dd84057e9763bb235a": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "DescriptionStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "DescriptionStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "description_width": ""
-     }
-    },
-    "fdf5c4a49602423184f6d94cd814177e": {
-     "model_module": "@jupyter-widgets/controls",
-     "model_name": "ProgressStyleModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/controls",
-      "_model_module_version": "1.5.0",
-      "_model_name": "ProgressStyleModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "StyleView",
-      "bar_color": null,
-      "description_width": "initial"
-     }
-    },
-    "ff6eead2826e4113abf7ab3a8cb31b0f": {
-     "model_module": "@jupyter-widgets/base",
-     "model_name": "LayoutModel",
-     "state": {
-      "_model_module": "@jupyter-widgets/base",
-      "_model_module_version": "1.2.0",
-      "_model_name": "LayoutModel",
-      "_view_count": null,
-      "_view_module": "@jupyter-widgets/base",
-      "_view_module_version": "1.2.0",
-      "_view_name": "LayoutView",
-      "align_content": null,
-      "align_items": null,
-      "align_self": null,
-      "border": null,
-      "bottom": null,
-      "display": null,
-      "flex": null,
-      "flex_flow": null,
-      "grid_area": null,
-      "grid_auto_columns": null,
-      "grid_auto_flow": null,
-      "grid_auto_rows": null,
-      "grid_column": null,
-      "grid_gap": null,
-      "grid_row": null,
-      "grid_template_areas": null,
-      "grid_template_columns": null,
-      "grid_template_rows": null,
-      "height": null,
-      "justify_content": null,
-      "justify_items": null,
-      "left": null,
-      "margin": null,
-      "max_height": null,
-      "max_width": null,
-      "min_height": null,
-      "min_width": null,
-      "object_fit": null,
-      "object_position": null,
-      "order": null,
-      "overflow": null,
-      "overflow_x": null,
-      "overflow_y": null,
-      "padding": null,
-      "right": null,
-      "top": null,
-      "visibility": null,
-      "width": null
-     }
-    }
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
diff --git a/notebooks/README.md b/notebooks/README.md
deleted file mode 100644
index a72e154c36410..0000000000000
--- a/notebooks/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Lightning Notebooks ⚡
-
-## Official Notebooks
-
-You can easily run any of the official notebooks by clicking the 'Open in Colab' links in the table below :smile:
-
-| Notebook                 | Description                                                                          |                                                                                                        Colab Link                                                                                                         |
-| :----------------------- | :----------------------------------------------------------------------------------- | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| **MNIST Hello World**    | Train your first Lightning Module on the classic MNIST Handwritten Digits Dataset.   |        [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/01-mnist-hello-world.ipynb)         |
-| **Datamodules**          | Learn about DataModules and train a dataset-agnostic model on MNIST and CIFAR10.     |           [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/02-datamodules.ipynb)            |
-| **GAN**                  | Train a GAN on the MNIST Dataset. Learn how to use multiple optimizers in Lightning. |            [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/03-basic-gan.ipynb)             |
-| **BERT**                 | Fine-tune HuggingFace Transformers models on the GLUE Benchmark                      | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/04-transformers-text-classification.ipynb) |
-| **Trainer Flags**        | Overview of the available Lightning `Trainer` flags                                  |      [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/05-trainer-flags-overview.ipynb)      |
-| **TPU Training**        | Train a model on MNIST using TPUs with Lightning                                      |      [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/06-mnist-tpu-training.ipynb)                |
-| **94% Baseline CIFAR10** | Establish a quick baseline of ~94% accuracy on CIFAR10 using Resnet in Lightning     |         [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/07-cifar10-baseline.ipynb)         |
diff --git a/pl_examples/basic_examples/autoencoder.py b/pl_examples/basic_examples/autoencoder.py
index 8ea03dabc9bdb..94e4fbfcf7ae2 100644
--- a/pl_examples/basic_examples/autoencoder.py
+++ b/pl_examples/basic_examples/autoencoder.py
@@ -87,6 +87,12 @@ def test_step(self, batch, batch_idx):
         loss = F.mse_loss(x_hat, x)
         self.log('test_loss', loss, on_step=True)
 
+    def predict_step(self, batch, batch_idx, dataloader_idx=None):
+        x, y = batch
+        x = x.view(x.size(0), -1)
+        z = self.encoder(x)
+        return self.decoder(z)
+
     def configure_optimizers(self):
         optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
         return optimizer
@@ -113,10 +119,15 @@ def val_dataloader(self):
     def test_dataloader(self):
         return DataLoader(self.mnist_test, batch_size=self.batch_size)
 
+    def predict_dataloader(self):
+        return DataLoader(self.mnist_test, batch_size=self.batch_size)
+
 
 def cli_main():
-    cli = LightningCLI(LitAutoEncoder, MyDataModule, seed_everything_default=1234)
+    cli = LightningCLI(LitAutoEncoder, MyDataModule, seed_everything_default=1234, save_config_overwrite=True)
     cli.trainer.test(cli.model, datamodule=cli.datamodule)
+    predictions = cli.trainer.predict(cli.model, datamodule=cli.datamodule)
+    print(predictions[0])
 
 
 if __name__ == '__main__':
diff --git a/pl_examples/basic_examples/backbone_image_classifier.py b/pl_examples/basic_examples/backbone_image_classifier.py
index 57cf97be00023..381cda088ea9d 100644
--- a/pl_examples/basic_examples/backbone_image_classifier.py
+++ b/pl_examples/basic_examples/backbone_image_classifier.py
@@ -100,6 +100,10 @@ def test_step(self, batch, batch_idx):
         loss = F.cross_entropy(y_hat, y)
         self.log('test_loss', loss)
 
+    def predict_step(self, batch, batch_idx, dataloader_idx=None):
+        x, y = batch
+        return self.backbone(x)
+
     def configure_optimizers(self):
         # self.hparams available because we called self.save_hyperparameters()
         return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
@@ -126,10 +130,15 @@ def val_dataloader(self):
     def test_dataloader(self):
         return DataLoader(self.mnist_test, batch_size=self.batch_size)
 
+    def predict_dataloader(self):
+        return DataLoader(self.mnist_test, batch_size=self.batch_size)
+
 
 def cli_main():
-    cli = LightningCLI(LitClassifier, MyDataModule, seed_everything_default=1234)
+    cli = LightningCLI(LitClassifier, MyDataModule, seed_everything_default=1234, save_config_overwrite=True)
     cli.trainer.test(cli.model, datamodule=cli.datamodule)
+    predictions = cli.trainer.predict(cli.model, datamodule=cli.datamodule)
+    print(predictions[0])
 
 
 if __name__ == '__main__':
diff --git a/pl_examples/basic_examples/conv_sequential_example.py b/pl_examples/basic_examples/conv_sequential_example.py
deleted file mode 100644
index 9747c4a939340..0000000000000
--- a/pl_examples/basic_examples/conv_sequential_example.py
+++ /dev/null
@@ -1,226 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-
-Example script of running the experimental DDP Sequential Plugin.
-This script splits a convolutional model onto multiple GPUs, whilst using the internal built in balancer
-to balance across your GPUs.
-
-To run:
-python conv_model_sequential_example.py --accelerator ddp --gpus 4 --max_epochs 1  --batch_size 256 --use_rpc_sequential
-"""
-import math
-from argparse import ArgumentParser
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torchvision
-from torchmetrics.functional import accuracy
-
-import pytorch_lightning as pl
-from pl_examples import cli_lightning_logo
-from pytorch_lightning import Trainer
-from pytorch_lightning.plugins import RPCSequentialPlugin
-from pytorch_lightning.utilities import _BOLTS_AVAILABLE, _FAIRSCALE_PIPE_AVAILABLE
-
-if _BOLTS_AVAILABLE:
-    import pl_bolts
-    from pl_bolts.transforms.dataset_normalizations import cifar10_normalization
-
-#####################
-#      Modules      #
-#####################
-
-
-class Flatten(nn.Module):
-
-    def forward(self, x):
-        return x.view(x.size(0), -1)
-
-
-###############################
-#       LightningModule       #
-###############################
-
-
-class LitResnet(pl.LightningModule):
-    """
-    >>> LitResnet()  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    LitResnet(
-      (sequential_module): Sequential(...)
-    )
-    """
-
-    def __init__(self, lr=0.05, batch_size=32, manual_optimization=False):
-        super().__init__()
-
-        self.save_hyperparameters()
-        self.sequential_module = nn.Sequential(
-            # Conv Layer block 1
-            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
-            nn.BatchNorm2d(32),
-            nn.ReLU(inplace=False),
-            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
-            nn.ReLU(inplace=False),
-            nn.MaxPool2d(kernel_size=2, stride=2),
-
-            # Conv Layer block 2
-            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
-            nn.BatchNorm2d(128),
-            nn.ReLU(inplace=False),
-            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
-            nn.ReLU(inplace=False),
-            nn.MaxPool2d(kernel_size=2, stride=2),
-            nn.Dropout2d(p=0.05),
-
-            # Conv Layer block 3
-            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
-            nn.BatchNorm2d(256),
-            nn.ReLU(inplace=False),
-            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
-            nn.ReLU(inplace=False),
-            nn.MaxPool2d(kernel_size=2, stride=2),
-            Flatten(),
-            nn.Dropout(p=0.1),
-            nn.Linear(4096, 1024),
-            nn.ReLU(inplace=False),
-            nn.Linear(1024, 512),
-            nn.ReLU(inplace=False),
-            nn.Dropout(p=0.1),
-            nn.Linear(512, 10)
-        )
-        self._example_input_array = torch.randn((1, 3, 32, 32))
-
-        if manual_optimization:
-            self.automatic_optimization = False
-            self.training_step = self.training_step_manual
-
-    def forward(self, x):
-        out = self.sequential_module(x)
-        return F.log_softmax(out, dim=-1)
-
-    def training_step_manual(self, batch, batch_idx):
-        opt = self.optimizers()
-
-        def closure():
-            x, y = batch
-            logits = self.forward(x)
-            loss = F.nll_loss(logits, y)
-            self.manual_backward(loss, opt)
-            self.log('train_loss', loss, prog_bar=True)
-
-        opt.step(closure=closure)
-
-    def training_step(self, batch, batch_idx):
-        x, y = batch
-        logits = self.forward(x)
-        loss = F.nll_loss(logits, y)
-        self.log('Training Loss', loss)
-        return loss
-
-    def _evaluate(self, batch, batch_idx, stage=None):
-        x, y = batch
-        out = self.forward(x)
-        logits = F.log_softmax(out, dim=-1)
-        loss = F.nll_loss(logits, y)
-        preds = torch.argmax(logits, dim=-1)
-        acc = accuracy(preds, y)
-
-        if stage:
-            self.log(f'{stage}_loss', loss, prog_bar=True)
-            self.log(f'{stage}_acc', acc, prog_bar=True)
-
-        return loss, acc
-
-    def validation_step(self, batch, batch_idx):
-        return self._evaluate(batch, batch_idx, 'val')[0]
-
-    def test_step(self, batch, batch_idx):
-        loss, acc = self._evaluate(batch, batch_idx, 'test')
-        self.log_dict({'test_loss': loss, 'test_acc': acc})
-
-    def configure_optimizers(self):
-        optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4)
-        return {
-            'optimizer': optimizer,
-            'lr_scheduler': {
-                'scheduler': torch.optim.lr_scheduler.OneCycleLR(
-                    optimizer,
-                    0.1,
-                    epochs=self.trainer.max_epochs,
-                    steps_per_epoch=math.ceil(45000 / self.hparams.batch_size)
-                ),
-                'interval': 'step',
-            }
-        }
-
-
-#################################
-#     Instantiate Data Module   #
-#################################
-
-
-def instantiate_datamodule(args):
-    train_transforms = torchvision.transforms.Compose([
-        torchvision.transforms.RandomCrop(32, padding=4),
-        torchvision.transforms.RandomHorizontalFlip(),
-        torchvision.transforms.ToTensor(),
-        cifar10_normalization(),
-    ])
-
-    test_transforms = torchvision.transforms.Compose([
-        torchvision.transforms.ToTensor(),
-        cifar10_normalization(),
-    ])
-
-    cifar10_dm = pl_bolts.datamodules.CIFAR10DataModule(
-        data_dir=args.data_dir,
-        batch_size=args.batch_size,
-        train_transforms=train_transforms,
-        test_transforms=test_transforms,
-        val_transforms=test_transforms,
-    )
-
-    return cifar10_dm
-
-
-if __name__ == "__main__":
-    cli_lightning_logo()
-
-    assert _BOLTS_AVAILABLE, "Bolts is required for this example, install it via `pip install lightning-bolts`"
-    assert _FAIRSCALE_PIPE_AVAILABLE, "FairScale and PyTorch 1.6 is required for this example."
-
-    parser = ArgumentParser(description="Pipe Example")
-    parser.add_argument("--use_rpc_sequential", action="store_true")
-    parser.add_argument("--manual_optimization", action="store_true")
-    parser = Trainer.add_argparse_args(parser)
-    parser = pl_bolts.datamodules.CIFAR10DataModule.add_argparse_args(parser)
-    args = parser.parse_args()
-
-    cifar10_dm = instantiate_datamodule(args)
-
-    plugins = None
-    if args.use_rpc_sequential:
-        plugins = RPCSequentialPlugin()
-
-    model = LitResnet(batch_size=args.batch_size, manual_optimization=args.manual_optimization)
-
-    trainer = pl.Trainer.from_argparse_args(args, plugins=[plugins] if plugins else None)
-    trainer.fit(model, cifar10_dm)
-    trainer.test(model, datamodule=cifar10_dm)
-
-    if trainer.accelerator.rpc_enabled:
-        # Called at the end of trainer to ensure all processes are killed
-        trainer.training_type_plugin.exit_rpc_process()
diff --git a/pl_examples/basic_examples/profiler_example.py b/pl_examples/basic_examples/profiler_example.py
index c79214af93581..688eb15ef923f 100644
--- a/pl_examples/basic_examples/profiler_example.py
+++ b/pl_examples/basic_examples/profiler_example.py
@@ -62,6 +62,10 @@ def validation_step(self, batch, batch_idx):
         loss = self.criterion(outputs, labels)
         self.log("val_loss", loss)
 
+    def predict_step(self, batch, batch_idx, dataloader_idx: int = None):
+        inputs = batch[0]
+        return self.model(inputs)
+
     def configure_optimizers(self):
         return torch.optim.SGD(self.parameters(), lr=0.001, momentum=0.9)
 
diff --git a/pl_examples/basic_examples/simple_image_classifier.py b/pl_examples/basic_examples/simple_image_classifier.py
index ffb6434352b2e..70aaa35931f8e 100644
--- a/pl_examples/basic_examples/simple_image_classifier.py
+++ b/pl_examples/basic_examples/simple_image_classifier.py
@@ -76,7 +76,7 @@ def configure_optimizers(self):
 
 
 def cli_main():
-    cli = LightningCLI(LitClassifier, MNISTDataModule, seed_everything_default=1234)
+    cli = LightningCLI(LitClassifier, MNISTDataModule, seed_everything_default=1234, save_config_overwrite=True)
     cli.trainer.test(cli.model, datamodule=cli.datamodule)
 
 
diff --git a/pl_examples/bug_report_model.py b/pl_examples/bug_report_model.py
index abb65ba86fd93..f906ab9bde77c 100644
--- a/pl_examples/bug_report_model.py
+++ b/pl_examples/bug_report_model.py
@@ -59,8 +59,8 @@ def run():
         max_epochs=1,
         weights_summary=None,
     )
-    trainer.fit(model, train_dataloader=train_data, val_dataloaders=val_data)
-    trainer.test(model, test_dataloaders=test_data)
+    trainer.fit(model, train_dataloaders=train_data, val_dataloaders=val_data)
+    trainer.test(model, dataloaders=test_data)
 
 
 if __name__ == '__main__':
diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py
index 70726a748818c..114097df483af 100644
--- a/pl_examples/domain_templates/reinforce_learn_Qnet.py
+++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py
@@ -34,7 +34,7 @@
 
 import argparse
 from collections import deque, namedtuple, OrderedDict
-from typing import List, Tuple
+from typing import Iterator, List, Tuple
 
 import gym
 import numpy as np
@@ -139,7 +139,7 @@ def __init__(self, buffer: ReplayBuffer, sample_size: int = 200) -> None:
         self.buffer = buffer
         self.sample_size = sample_size
 
-    def __iter__(self) -> Tuple:
+    def __iter__(self) -> Iterator:
         states, actions, rewards, dones, new_states = self.buffer.sample(self.sample_size)
         for i in range(len(dones)):
             yield states[i], actions[i], rewards[i], dones[i], new_states[i]
diff --git a/pl_examples/domain_templates/reinforce_learn_ppo.py b/pl_examples/domain_templates/reinforce_learn_ppo.py
index f3453a5eb86f0..5bca67f41a7b3 100644
--- a/pl_examples/domain_templates/reinforce_learn_ppo.py
+++ b/pl_examples/domain_templates/reinforce_learn_ppo.py
@@ -28,7 +28,7 @@
 [3] https://github.com/sid-sundrani/ppo_lightning
 """
 import argparse
-from typing import Callable, Iterable, List, Tuple
+from typing import Callable, Iterator, List, Tuple
 
 import gym
 import torch
@@ -144,7 +144,7 @@ class ExperienceSourceDataset(IterableDataset):
     def __init__(self, generate_batch: Callable):
         self.generate_batch = generate_batch
 
-    def __iter__(self) -> Iterable:
+    def __iter__(self) -> Iterator:
         iterator = self.generate_batch()
         return iterator
 
@@ -413,7 +413,7 @@ def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx, opt
 
             return loss_actor
 
-        elif optimizer_idx == 1:
+        if optimizer_idx == 1:
             loss_critic = self.critic_loss(state, action, old_logp, qval, adv)
             self.log('loss_critic', loss_critic, on_step=False, on_epoch=True, prog_bar=False, logger=True)
 
diff --git a/pl_examples/ipu_examples/__init__.py b/pl_examples/ipu_examples/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
new file mode 100644
index 0000000000000..37cb63c076e2e
--- /dev/null
+++ b/pl_examples/ipu_examples/mnist.py
@@ -0,0 +1,89 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from torch.nn import functional as F
+
+import pytorch_lightning as pl
+from pl_examples.basic_examples.mnist_datamodule import MNISTDataModule
+
+
+class LitClassifier(pl.LightningModule):
+
+    def __init__(
+        self,
+        hidden_dim: int = 128,
+        learning_rate: float = 0.0001,
+    ):
+        super().__init__()
+        self.save_hyperparameters()
+
+        self.l1 = torch.nn.Linear(28 * 28, self.hparams.hidden_dim)
+        self.l2 = torch.nn.Linear(self.hparams.hidden_dim, 10)
+
+    def forward(self, x):
+        x = x.view(x.size(0), -1)
+        x = torch.relu(self.l1(x))
+        x = torch.relu(self.l2(x))
+        return x
+
+    def training_step(self, batch, batch_idx):
+        x, y = batch
+        y_hat = self(x)
+        loss = F.cross_entropy(y_hat, y)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        x, y = batch
+        probs = self(x)
+        # we currently return the accuracy as the validation_step/test_step is run on the IPU devices.
+        # Outputs from the step functions are sent to the host device, where we calculate the metrics in
+        # validation_epoch_end and test_epoch_end for the test_step.
+        acc = self.accuracy(probs, y)
+        return acc
+
+    def test_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        acc = self.accuracy(logits, y)
+        return acc
+
+    def accuracy(self, logits, y):
+        # currently IPU poptorch doesn't implicit convert bools to tensor
+        # hence we use an explicit calculation for accuracy here. Once fixed in poptorch
+        # we can use the accuracy metric.
+        acc = torch.sum(torch.eq(torch.argmax(logits, -1), y).to(torch.float32)) / len(y)
+        return acc
+
+    def validation_epoch_end(self, outputs) -> None:
+        # since the training step/validation step and test step are run on the IPU device
+        # we must log the average loss outside the step functions.
+        self.log('val_acc', torch.stack(outputs).mean(), prog_bar=True)
+
+    def test_epoch_end(self, outputs) -> None:
+        self.log('test_acc', torch.stack(outputs).mean())
+
+    def configure_optimizers(self):
+        return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
+
+
+if __name__ == '__main__':
+    dm = MNISTDataModule(batch_size=32)
+
+    model = LitClassifier()
+
+    trainer = pl.Trainer(max_epochs=2, ipus=8)
+
+    trainer.fit(model, datamodule=dm)
+    trainer.test(model, datamodule=dm)
diff --git a/pyproject.toml b/pyproject.toml
index e8a3213f2b738..1f21e1f088acb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,3 +16,28 @@ profile = "black"
 line_length = 120
 force_sort_within_sections = "False"
 order_by_type = "False"
+
+[tool.vulture]
+exclude = ['pytorch_lightning/metrics']
+make_whitelist = true
+min_confidence = 95
+paths = ["pytorch_lightning"]
+ignore_names = [
+    "*_nb",
+    "*batch",
+    "*idx",
+    "*param*",
+    "cmd_line",
+    "kw",
+    "loc",
+    "mocked_device_count*",
+    "my_path",
+    "new_device",
+    "new_dtype",
+    "prediction",
+    "root",
+    "signum",
+    "torch_save",
+    "using_lbfgs",
+]
+sort_by_size = true
diff --git a/pytorch_lightning/accelerators/__init__.py b/pytorch_lightning/accelerators/__init__.py
index 05e15fe1f1767..2a460a27e373a 100644
--- a/pytorch_lightning/accelerators/__init__.py
+++ b/pytorch_lightning/accelerators/__init__.py
@@ -13,4 +13,5 @@
 from pytorch_lightning.accelerators.accelerator import Accelerator  # noqa F401
 from pytorch_lightning.accelerators.cpu import CPUAccelerator  # noqa F401
 from pytorch_lightning.accelerators.gpu import GPUAccelerator  # noqa F401
+from pytorch_lightning.accelerators.ipu import IPUAccelerator  # noqa F401
 from pytorch_lightning.accelerators.tpu import TPUAccelerator  # noqa F401
diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py
index 4ea017ae0c208..abfb29c149bff 100644
--- a/pytorch_lightning/accelerators/accelerator.py
+++ b/pytorch_lightning/accelerators/accelerator.py
@@ -179,10 +179,6 @@ def batch_to_device(
 
         return move_data_to_device(batch, device)
 
-    def on_train_start(self) -> None:
-        """Hook to do something upon the training start"""
-        pass
-
     def training_step(
         self,
         step_kwargs: Dict[str, Union[Any, int]],
@@ -348,14 +344,6 @@ def clip_gradients(
             model=self.model,
         )
 
-    def on_train_epoch_end(self) -> None:
-        """Hook to do something on the end of an training epoch."""
-        pass
-
-    def on_train_end(self) -> None:
-        """Hook to do something at the end of the training"""
-        pass
-
     def setup_optimizers(self, trainer: 'pl.Trainer') -> None:
         """
         Creates optimizers and schedulers
@@ -394,7 +382,7 @@ def to_device(self, step_kwargs: Dict[str, Union[Any, int]]) -> Dict[str, Union[
     def amp_backend(self) -> Optional[LightningEnum]:
         if isinstance(self.precision_plugin, ApexMixedPrecisionPlugin):
             return AMPType.APEX
-        elif isinstance(self.precision_plugin, NativeMixedPrecisionPlugin):
+        if isinstance(self.precision_plugin, NativeMixedPrecisionPlugin):
             return AMPType.NATIVE
         return None
 
@@ -406,10 +394,6 @@ def precision(self) -> Union[str, int]:
     def scaler(self) -> Optional['GradScaler']:
         return getattr(self.precision_plugin, 'scaler', None)
 
-    @property
-    def rpc_enabled(self) -> bool:
-        return self.training_type_plugin.rpc_enabled
-
     def optimizer_state(self, optimizer: Optimizer) -> Dict[str, Tensor]:
         """
         Returns state of an optimizer. Allows for syncing/collating optimizer state from processes in custom
@@ -460,6 +444,22 @@ def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[I
         """
         return self.training_type_plugin.process_dataloader(dataloader)
 
+    def on_reset_train_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        """Called before resetting the train dataloader."""
+        return self.training_type_plugin.on_reset_train_dataloader(dataloader)
+
+    def on_reset_val_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        """Called before resetting the val dataloader."""
+        return self.training_type_plugin.on_reset_val_dataloader(dataloader)
+
+    def on_reset_test_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        """Called before resetting the test dataloader."""
+        return self.training_type_plugin.on_reset_test_dataloader(dataloader)
+
+    def on_reset_predict_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        """Called before resetting the predict dataloader."""
+        return self.training_type_plugin.on_reset_predict_dataloader(dataloader)
+
     @property
     def results(self) -> Any:
         """
@@ -547,3 +547,45 @@ def setup_optimizers_in_pre_dispatch(self) -> bool:
 
     def update_global_step(self, total_batch_idx: int, current_global_step: int) -> int:
         return self.training_type_plugin.update_global_step(total_batch_idx, current_global_step)
+
+    def on_train_epoch_end(self) -> None:
+        """Hook to do something on the end of an training epoch."""
+        pass
+
+    def on_train_start(self) -> None:
+        """Called when train begins."""
+        return self.training_type_plugin.on_train_start()
+
+    def on_validation_start(self) -> None:
+        """Called when validation begins."""
+        return self.training_type_plugin.on_validation_start()
+
+    def on_test_start(self) -> None:
+        """Called when test begins."""
+        return self.training_type_plugin.on_test_start()
+
+    def on_predict_start(self) -> None:
+        """Called when predict begins."""
+        return self.training_type_plugin.on_predict_start()
+
+    def on_validation_end(self) -> None:
+        """Called when validation ends."""
+        return self.training_type_plugin.on_validation_end()
+
+    def on_test_end(self) -> None:
+        """Called when test end."""
+        return self.training_type_plugin.on_test_end()
+
+    def on_predict_end(self) -> None:
+        """Called when predict ends."""
+        return self.training_type_plugin.on_predict_end()
+
+    def on_train_end(self) -> None:
+        """Called when train ends."""
+        return self.training_type_plugin.on_train_end()
+
+    def on_train_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """
+        Called in the training loop before anything happens for that batch.
+        """
+        return self.training_type_plugin.on_train_batch_start(batch, batch_idx, dataloader_idx)
diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py
index 7543a2b794b5d..1c5ff56d805a6 100644
--- a/pytorch_lightning/accelerators/gpu.py
+++ b/pytorch_lightning/accelerators/gpu.py
@@ -42,10 +42,7 @@ def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None:
 
     def on_train_start(self) -> None:
         # clear cache before training
-        # use context because of:
-        # https://discuss.pytorch.org/t/out-of-memory-when-i-use-torch-cuda-empty-cache/57898
-        with torch.cuda.device(self.root_device):
-            torch.cuda.empty_cache()
+        torch.cuda.empty_cache()
 
     @staticmethod
     def set_nvidia_flags(local_rank: int) -> None:
diff --git a/pytorch_lightning/accelerators/ipu.py b/pytorch_lightning/accelerators/ipu.py
new file mode 100644
index 0000000000000..c9bee827af0e6
--- /dev/null
+++ b/pytorch_lightning/accelerators/ipu.py
@@ -0,0 +1,35 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import Callable
+from typing import Any
+
+from torch.optim import Optimizer
+
+import pytorch_lightning as pl
+from pytorch_lightning.accelerators.accelerator import Accelerator
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+
+
+class IPUAccelerator(Accelerator):
+    """ Accelerator for IPUs. """
+
+    def setup_optimizers(self, trainer: 'pl.Trainer') -> None:
+        super().setup_optimizers(trainer)
+
+        if len(self.optimizers) > 1:
+            raise MisconfigurationException("IPUs currently only support one optimizer.")
+
+    def optimizer_step(self, optimizer: Optimizer, opt_idx: int, lambda_closure: Callable, **kwargs: Any) -> None:
+        # Optimizer step is handled by the IPU accelerator.
+        lambda_closure()
diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index f0c1a3a95819e..6f9ea07c0716d 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -26,7 +26,7 @@
 
 import pytorch_lightning as pl
 from pytorch_lightning.callbacks.base import Callback
-from pytorch_lightning.utilities import rank_zero_warn
+from pytorch_lightning.utilities import rank_zero_deprecation, rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 log = logging.getLogger(__name__)
@@ -88,7 +88,7 @@ class EarlyStopping(Callback):
 
     def __init__(
         self,
-        monitor: str = 'early_stop_on',
+        monitor: Optional[str] = None,
         min_delta: float = 0.0,
         patience: int = 3,
         verbose: bool = False,
@@ -100,7 +100,6 @@ def __init__(
         check_on_train_epoch_end: bool = True,
     ):
         super().__init__()
-        self.monitor = monitor
         self.min_delta = min_delta
         self.patience = patience
         self.verbose = verbose
@@ -120,6 +119,13 @@ def __init__(
         torch_inf = torch.tensor(np.Inf)
         self.best_score = torch_inf if self.monitor_op == torch.lt else -torch_inf
 
+        if monitor is None:
+            rank_zero_deprecation(
+                "The `EarlyStopping(monitor)` argument will be required starting in v1.6."
+                " For backward compatibility, setting this to `early_stop_on`."
+            )
+        self.monitor = monitor or "early_stop_on"
+
     def _validate_condition_metric(self, logs):
         monitor_val = logs.get(self.monitor)
 
@@ -190,7 +196,7 @@ def _run_early_stopping_check(self, trainer) -> None:
         # when in dev debugging
         trainer.dev_debugger.track_early_stopping_history(self, current)
 
-        should_stop, reason = self._evalute_stopping_criteria(current)
+        should_stop, reason = self._evalute_stopping_criteria(current, trainer)
 
         # stop every ddp process if any world process decides to stop
         should_stop = trainer.training_type_plugin.reduce_boolean_decision(should_stop)
@@ -200,7 +206,7 @@ def _run_early_stopping_check(self, trainer) -> None:
         if reason and self.verbose:
             self._log_info(trainer, reason)
 
-    def _evalute_stopping_criteria(self, current: torch.Tensor) -> Tuple[bool, str]:
+    def _evalute_stopping_criteria(self, current: torch.Tensor, trainer: 'pl.Trainer') -> Tuple[bool, str]:
         should_stop = False
         reason = None
         if self.check_finite and not torch.isfinite(current):
@@ -223,7 +229,7 @@ def _evalute_stopping_criteria(self, current: torch.Tensor) -> Tuple[bool, str]:
                 f" {self.monitor} = {current} {self.order_dict[self.mode]} {self.divergence_threshold}."
                 " Signaling Trainer to stop."
             )
-        elif self.monitor_op(current - self.min_delta, self.best_score):
+        elif self.monitor_op(current - self.min_delta, self.best_score.to(trainer.lightning_module.device)):
             should_stop = False
             reason = self._improvement_message(current)
             self.best_score = current
diff --git a/pytorch_lightning/callbacks/finetuning.py b/pytorch_lightning/callbacks/finetuning.py
index a6c13d1b0c0db..fe7e5f7bc09eb 100644
--- a/pytorch_lightning/callbacks/finetuning.py
+++ b/pytorch_lightning/callbacks/finetuning.py
@@ -20,7 +20,7 @@
 from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Union
 
 import torch
-from torch.nn import Module
+from torch.nn import Module, ModuleDict
 from torch.nn.modules.batchnorm import _BatchNorm
 from torch.optim.optimizer import Optimizer
 
@@ -63,7 +63,7 @@ def configure_optimizer(self):
 
         class FeatureExtractorFreezeUnfreeze(BaseFinetuning):
 
-            def __init__(self, unfreeze_at_epoch=10)
+            def __init__(self, unfreeze_at_epoch=10):
                 self._unfreeze_at_epoch = unfreeze_at_epoch
 
             def freeze_before_training(self, pl_module):
@@ -105,7 +105,8 @@ def on_load_checkpoint(
     @staticmethod
     def flatten_modules(modules: Union[Module, Iterable[Union[Module, Iterable]]]) -> List[Module]:
         """
-        This function is used to flatten a module or an iterable of modules into a list of its modules.
+        This function is used to flatten a module or an iterable of modules into a list of its leaf modules (modules
+        with no children) and parent modules that have parameters directly themselves.
 
         Args:
             modules: A given module or an iterable of modules
@@ -113,6 +114,9 @@ def flatten_modules(modules: Union[Module, Iterable[Union[Module, Iterable]]]) -
         Returns:
             List of modules
         """
+        if isinstance(modules, ModuleDict):
+            modules = modules.values()
+
         if isinstance(modules, Iterable):
             _modules = []
             for m in modules:
@@ -121,8 +125,8 @@ def flatten_modules(modules: Union[Module, Iterable[Union[Module, Iterable]]]) -
         else:
             _modules = modules.modules()
 
-        # Leaf nodes in the graph have no children, so we use that to filter
-        return [m for m in _modules if not list(m.children())]
+        # Capture all leaf modules as well as parent modules that have parameters directly themsleves
+        return [m for m in _modules if not list(m.children()) or m._parameters]
 
     @staticmethod
     def filter_params(
@@ -136,7 +140,6 @@ def filter_params(
             modules: A given module or an iterable of modules
             train_bn: Whether to train BatchNorm module
             requires_grad: Whether to create a generator for trainable or non-trainable parameters.
-
         Returns:
             Generator
         """
@@ -144,7 +147,8 @@ def filter_params(
         for mod in modules:
             if isinstance(mod, _BatchNorm) and not train_bn:
                 continue
-            for param in mod.parameters():
+            # recursion could yield duplicate parameters for parent modules w/ parameters so disabling it
+            for param in mod.parameters(recurse=False):
                 if param.requires_grad == requires_grad:
                     yield param
 
@@ -158,7 +162,8 @@ def make_trainable(modules: Union[Module, Iterable[Union[Module, Iterable]]]) ->
         """
         modules = BaseFinetuning.flatten_modules(modules)
         for module in modules:
-            for param in module.parameters():
+            # recursion could yield duplicate parameters for parent modules w/ parameters so disabling it
+            for param in module.parameters(recurse=False):
                 param.requires_grad = True
 
     @staticmethod
@@ -178,7 +183,8 @@ def freeze(modules: Union[Module, Iterable[Union[Module, Iterable]]], train_bn:
             if isinstance(mod, _BatchNorm) and train_bn:
                 BaseFinetuning.make_trainable(mod)
             else:
-                for param in mod.parameters():
+                # recursion could yield duplicate parameters for parent modules w/ parameters so disabling it
+                for param in mod.parameters(recurse=False):
                     param.requires_grad = False
 
     @staticmethod
@@ -282,7 +288,7 @@ def _store(
 
     def on_train_epoch_start(self, trainer, pl_module):
         """Called when the epoch begins."""
-        for opt_idx, optimizer in trainer.train_loop.get_active_optimizers():
+        for opt_idx, optimizer in trainer.fit_loop.epoch_loop.batch_loop.get_active_optimizers():
             num_param_groups = len(optimizer.param_groups)
             self.finetune_function(pl_module, trainer.current_epoch, optimizer, opt_idx)
             current_param_groups = optimizer.param_groups
diff --git a/pytorch_lightning/callbacks/lr_monitor.py b/pytorch_lightning/callbacks/lr_monitor.py
index 410f8b319c239..d3afcde35f55e 100644
--- a/pytorch_lightning/callbacks/lr_monitor.py
+++ b/pytorch_lightning/callbacks/lr_monitor.py
@@ -19,8 +19,10 @@
 Monitor and logs learning rate for lr schedulers during training.
 
 """
+from collections import defaultdict
+from typing import Any, DefaultDict, Dict, List, Optional, Set, Type
 
-from typing import Dict, List, Optional
+from torch.optim.optimizer import Optimizer
 
 from pytorch_lightning.callbacks.base import Callback
 from pytorch_lightning.utilities import rank_zero_warn
@@ -53,7 +55,9 @@ class LearningRateMonitor(Callback):
     In case of multiple optimizers of same type, they will be named ``Adam``,
     ``Adam-1`` etc. If a optimizer has multiple parameter groups they will
     be named ``Adam/pg1``, ``Adam/pg2`` etc. To control naming, pass in a
-    ``name`` keyword in the construction of the learning rate schdulers
+    ``name`` keyword in the construction of the learning rate schedulers.
+    A ``name`` keyword can also be used for parameter groups in the
+    construction of the optimizer.
 
     Example::
 
@@ -65,6 +69,19 @@ def configure_optimizer(self):
             }
             return [optimizer], [lr_scheduler]
 
+    Example::
+
+        def configure_optimizer(self):
+            optimizer = torch.optim.SGD(
+                [{
+                    'params': [p for p in self.parameters()],
+                    'name': 'my_parameter_group_name'
+                }],
+                lr=0.1
+            )
+            lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, ...)
+            return [optimizer], [lr_scheduler]
+
     """
 
     def __init__(self, logging_interval: Optional[str] = None, log_momentum: bool = False):
@@ -138,6 +155,9 @@ def on_train_epoch_start(self, trainer, *args, **kwargs):
     def _extract_stats(self, trainer, interval: str) -> Dict[str, float]:
         latest_stat = {}
 
+        names = self._find_names(trainer.lr_schedulers, add_lr_sch_names=False)
+        self._remap_keys(names)
+
         for name, scheduler in zip(self.lr_sch_names, trainer.lr_schedulers):
             if scheduler['interval'] == interval or interval == 'any':
                 opt = scheduler['scheduler'].optimizer
@@ -145,22 +165,33 @@ def _extract_stats(self, trainer, interval: str) -> Dict[str, float]:
                 use_betas = 'betas' in opt.defaults
 
                 for i, pg in enumerate(param_groups):
-                    suffix = f'/pg{i + 1}' if len(param_groups) > 1 else ''
-                    lr = self._extract_lr(param_group=pg, name=f'{name}{suffix}')
+                    name_and_suffix = self._add_suffix(name, param_groups, i)
+                    lr = self._extract_lr(pg, name_and_suffix)
                     latest_stat.update(lr)
                     momentum = self._extract_momentum(
-                        param_group=pg, name=f'{name}-momentum{suffix}', use_betas=use_betas
+                        param_group=pg, name=name_and_suffix.replace(name, f'{name}-momentum'), use_betas=use_betas
                     )
                     latest_stat.update(momentum)
 
         return latest_stat
 
-    def _extract_lr(self, param_group, name: str) -> Dict[str, float]:
+    def _extract_lr(self, param_group: Dict[str, Any], name: str) -> Dict[str, Any]:
         lr = param_group.get('lr')
         self.lrs[name].append(lr)
         return {name: lr}
 
-    def _extract_momentum(self, param_group, name: str, use_betas: bool) -> Dict[str, float]:
+    def _remap_keys(self, names: List[str], token: str = '/pg1') -> None:
+        """
+        This function is used the remap the keys if param groups for a given optimizer increased.
+        """
+        for new_name in names:
+            old_name = new_name.replace(token, '')
+            if token in new_name and old_name in self.lrs:
+                self.lrs[new_name] = self.lrs.pop(old_name)
+            elif new_name not in self.lrs:
+                self.lrs[new_name] = []
+
+    def _extract_momentum(self, param_group: Dict[str, Any], name: str, use_betas: bool) -> Dict[str, float]:
         if not self.log_momentum:
             return {}
 
@@ -168,35 +199,65 @@ def _extract_momentum(self, param_group, name: str, use_betas: bool) -> Dict[str
         self.last_momentum_values[name] = momentum
         return {name: momentum}
 
-    def _find_names(self, lr_schedulers) -> List[str]:
-        # Create uniqe names in the case we have multiple of the same learning
-        # rate schduler + multiple parameter groups
+    def _add_prefix(
+        self, name: str, optimizer_cls: Type[Optimizer], seen_optimizer_types: DefaultDict[Type[Optimizer], int]
+    ) -> str:
+        if optimizer_cls not in seen_optimizer_types:
+            return name
+        count = seen_optimizer_types[optimizer_cls]
+        return name + f'-{count - 1}' if count > 1 else name
+
+    def _add_suffix(self, name: str, param_groups: List[Dict], param_group_index: int, use_names: bool = True) -> str:
+        if len(param_groups) > 1:
+            if not use_names:
+                return f'{name}/pg{param_group_index+1}'
+            pg_name = param_groups[param_group_index].get('name', f'pg{param_group_index+1}')
+            return f'{name}/{pg_name}'
+        elif use_names:
+            pg_name = param_groups[param_group_index].get('name')
+            return f'{name}/{pg_name}' if pg_name else name
+        return name
+
+    def _duplicate_param_group_names(self, param_groups: List[Dict]) -> Set[str]:
+        names = [pg.get('name', f'pg{i}') for i, pg in enumerate(param_groups, start=1)]
+        unique = set(names)
+        if len(names) == len(unique):
+            return set()
+        return set(n for n in names if names.count(n) > 1)
+
+    def _find_names(self, lr_schedulers: List, add_lr_sch_names: bool = True) -> List[str]:
+        # Create unique names in the case we have multiple of the same learning
+        # rate scheduler + multiple parameter groups
         names = []
+        seen_optimizers = []
+        seen_optimizer_types = defaultdict(int)
         for scheduler in lr_schedulers:
             sch = scheduler['scheduler']
             if scheduler['name'] is not None:
                 name = scheduler['name']
             else:
-                opt_name = 'lr-' + sch.optimizer.__class__.__name__
-                i, name = 1, opt_name
+                name = 'lr-' + sch.optimizer.__class__.__name__
 
-                # Multiple schduler of the same type
-                while True:
-                    if name not in names:
-                        break
-                    i, name = i + 1, f'{opt_name}-{i}'
+            seen_optimizers.append(sch.optimizer)
+            optimizer_cls = type(sch.optimizer)
+            if scheduler['name'] is None:
+                seen_optimizer_types[optimizer_cls] += 1
 
-            # Multiple param groups for the same schduler
+            # Multiple param groups for the same scheduler
             param_groups = sch.optimizer.param_groups
+            duplicates = self._duplicate_param_group_names(param_groups)
+            if duplicates:
+                raise MisconfigurationException(
+                    'A single `Optimizer` cannot have multiple parameter groups with identical '
+                    f'`name` values. {name} has duplicated parameter group names {duplicates}'
+                )
 
-            if len(param_groups) != 1:
-                for i, pg in enumerate(param_groups):
-                    temp = f'{name}/pg{i + 1}'
-                    names.append(temp)
-            else:
-                names.append(name)
+            name = self._add_prefix(name, optimizer_cls, seen_optimizer_types)
+
+            names.extend(self._add_suffix(name, param_groups, i) for i in range(len(param_groups)))
 
-            self.lr_sch_names.append(name)
+            if add_lr_sch_names:
+                self.lr_sch_names.append(name)
 
         return names
 
diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index 1bf8046dcee5b..ec2f5e0d990a5 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -26,6 +26,7 @@
 from datetime import timedelta
 from pathlib import Path
 from typing import Any, Callable, Dict, Optional, Union
+from weakref import proxy
 
 import numpy as np
 import torch
@@ -101,7 +102,7 @@ class ModelCheckpoint(Callback):
             saved (``model.save_weights(filepath)``), else the full model
             is saved (``model.save(filepath)``).
         every_n_train_steps: Number of training steps between checkpoints.
-            If ``every_n_train_steps == None or every_n_train_steps == 0``, we skip saving during training
+            If ``every_n_train_steps == None or every_n_train_steps == 0``, we skip saving during training.
             To disable, set ``every_n_train_steps = 0``. This value must be ``None`` or non-negative.
             This must be mutually exclusive with ``train_time_interval`` and ``every_n_val_epochs``.
         train_time_interval: Checkpoints are monitored at the specified time interval.
@@ -109,8 +110,9 @@ class ModelCheckpoint(Callback):
             of time it takes to process a single training batch. This is not
             guaranteed to execute at the exact time specified, but should be close.
             This must be mutually exclusive with ``every_n_train_steps`` and ``every_n_val_epochs``.
+        FIXME
         every_n_val_epochs: Number of validation epochs between checkpoints.
-            If ``every_n_val_epochs == None or every_n_val_epochs == 0``, we skip saving on validation end
+            If ``every_n_val_epochs == None or every_n_val_epochs == 0``, we skip saving on validation end.
             To disable, set ``every_n_val_epochs = 0``. This value must be ``None`` or non-negative.
             This must be mutually exclusive with ``every_n_train_steps`` and ``train_time_interval``.
             Setting both ``ModelCheckpoint(..., every_n_val_epochs=V)`` and
@@ -118,7 +120,7 @@ class ModelCheckpoint(Callback):
             will only save checkpoints at epochs 0 < E <= N
             where both values for ``every_n_val_epochs`` and ``check_val_every_n_epoch`` evenly divide E.
         period: Interval (number of epochs) between checkpoints.
-        save_on_train_epoch_end: TODO
+        save_on_train_epoch_end: FIXME
 
             .. warning::
                This argument has been deprecated in v1.3 and will be removed in v1.5.
@@ -203,7 +205,7 @@ def __init__(
         train_time_interval: Optional[timedelta] = None,
         every_n_val_epochs: Optional[int] = None,
         period: Optional[int] = None,
-        save_on_train_epoch_end: bool = True,
+        save_on_train_epoch_end: Optional[bool] = None,
     ):
         super().__init__()
         self.monitor = monitor
@@ -234,6 +236,10 @@ def on_pretrain_routine_start(self, trainer: 'pl.Trainer', pl_module: 'pl.Lightn
         """
         self.__resolve_ckpt_dir(trainer)
         self._save_function = trainer.save_checkpoint
+        if self._save_on_train_epoch_end is None:
+            # if the user runs validation before multiple times per training epoch, we try to save checkpoint after
+            # validation instead of on train epoch end
+            self._save_on_train_epoch_end = trainer.val_check_interval == 1.0
 
     def on_train_start(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -> None:
         self._last_time_checked = time.monotonic()
@@ -275,11 +281,13 @@ def on_train_epoch_end(
     ) -> None:
         """ Save a checkpoint at the end of the training epoch. """
         if (
-            self._should_skip_saving_checkpoint(trainer) or self._save_on_train_epoch_end
-            # TODO: should every_n_val_epochs be repurposed to work for this too?
+            self._should_skip_saving_checkpoint(trainer) or not self._save_on_train_epoch_end
+            # FIXME: repurpose every_n_val_epochs to work for this hook
+            or self._every_n_val_epochs < 1 or (trainer.current_epoch + 1) % self._every_n_val_epochs != 0
         ):
             return
         # as we advance one step at end of training, we use `global_step - 1` to avoid saving duplicates
+        # FIXME: last_global_step_saved wrong
         trainer.train_loop.global_step -= 1
         self.save_checkpoint(trainer)
         trainer.train_loop.global_step += 1
@@ -298,16 +306,16 @@ def on_train_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -
         Save a checkpoint at the very end of training.
 
         This will only save a checkpoint if `save_last` is also enabled
-        as the monitor metrics produced by training or validation steps or end of epochs
-        is not guaranteed to be available at this stage.
+        as the monitor metrics logged during training/validation steps or end of epochs
+        are not guaranteed to be available at this stage.
         """
-        if self._should_skip_saving_checkpoint(trainer) or not trainer.checkpoint_connector.has_trained:
+        if self._should_skip_saving_checkpoint(trainer):
             return
         if self.save_last and self.verbose:
-            rank_zero_info("Saving last checkpoint...")
+            rank_zero_info("Saving latest checkpoint...")
         # as we advance one step at end of training, we use `global_step - 1` to avoid saving duplicates
+        monitor_candidates = self._monitor_candidates(trainer, trainer.current_epoch, trainer.global_step - 1)
         trainer.train_loop.global_step -= 1
-        monitor_candidates = self._monitor_candidates(trainer)
         self._save_last_checkpoint(trainer, monitor_candidates)
         trainer.train_loop.global_step += 1
 
@@ -364,6 +372,10 @@ def save_checkpoint(self, trainer: 'pl.Trainer', unused: Optional['pl.LightningM
         # Mode 3: save last checkpoints
         self._save_last_checkpoint(trainer, monitor_candidates)
 
+        # notify loggers
+        if trainer.is_global_zero and trainer.logger:
+            trainer.logger.after_save_checkpoint(proxy(self))
+
     def _should_skip_saving_checkpoint(self, trainer: 'pl.Trainer') -> bool:
         from pytorch_lightning.trainer.states import TrainerFn
         return (
@@ -388,7 +400,7 @@ def __validate_init_configuration(self) -> None:
         every_n_train_steps_triggered = self._every_n_train_steps >= 1
         every_n_val_epochs_triggered = self._every_n_val_epochs >= 1
         train_time_interval_triggered = self._train_time_interval is not None
-        if (every_n_train_steps_triggered + every_n_val_epochs_triggered + train_time_interval_triggered > 1):
+        if every_n_train_steps_triggered + every_n_val_epochs_triggered + train_time_interval_triggered > 1:
             raise MisconfigurationException(
                 f"Combination of parameters every_n_train_steps={self._every_n_train_steps}, "
                 f"every_n_val_epochs={self._every_n_val_epochs} and train_time_interval={self._train_time_interval} "
@@ -446,8 +458,11 @@ def __init_monitor_mode(self, mode: str) -> None:
         self.kth_value, self.mode = mode_dict[mode]
 
     def __init_triggers(
-        self, every_n_train_steps: Optional[int], every_n_val_epochs: Optional[int],
-        train_time_interval: Optional[timedelta], period: Optional[int]
+        self,
+        every_n_train_steps: Optional[int],
+        every_n_val_epochs: Optional[int],
+        train_time_interval: Optional[timedelta],
+        period: Optional[int],
     ) -> None:
 
         # Default to running once after each validation epoch if neither
@@ -471,7 +486,6 @@ def __init_triggers(
                 ' Please use `every_n_val_epochs` instead.'
             )
             self._every_n_val_epochs = period
-
         self._period = self._every_n_val_epochs
 
     @property
@@ -512,15 +526,6 @@ def _del_model(self, trainer: 'pl.Trainer', filepath: str) -> None:
             log.debug(f"Removed checkpoint: {filepath}")
 
     def _save_model(self, trainer: 'pl.Trainer', filepath: str) -> None:
-        if trainer.training_type_plugin.rpc_enabled:
-            # RPCPlugin manages saving all model states
-            # TODO: the rpc plugin should wrap trainer.save_checkpoint
-            # instead of us having to do it here manually
-            trainer.training_type_plugin.rpc_save_model(trainer, self._do_save, filepath)
-        else:
-            self._do_save(trainer, filepath)
-
-    def _do_save(self, trainer: 'pl.Trainer', filepath: str) -> None:
         # in debugging, track when we save checkpoints
         trainer.dev_debugger.track_checkpointing_history(filepath)
 
@@ -679,10 +684,10 @@ def _add_backward_monitor_support(self, trainer: 'pl.Trainer') -> None:
             self.save_top_k = 1
 
         if deprecation_warning:
-            warning_cache.warn(
+            warning_cache.deprecation(
                 "Relying on `self.log('val_loss', ...)` to set the ModelCheckpoint monitor is deprecated in v1.2"
                 " and will be removed in v1.4. Please, create your own `mc = ModelCheckpoint(monitor='your_monitor')`"
-                " and use it as `Trainer(callbacks=[mc])`.", DeprecationWarning
+                " and use it as `Trainer(callbacks=[mc])`.",
             )
 
     def _validate_monitor_key(self, trainer: 'pl.Trainer') -> None:
@@ -695,7 +700,10 @@ def _validate_monitor_key(self, trainer: 'pl.Trainer') -> None:
                 f" {list(metrics.keys())}. "
                 f"HINT: Did you call self.log('{self.monitor}', value) in the LightningModule?"
             )
-            raise MisconfigurationException(m)
+            if not trainer.fit_loop.epoch_loop.val_loop._has_run:
+                warning_cache.warn(m)
+            else:
+                raise MisconfigurationException(m)
 
     def _get_metric_interpolated_filepath_name(
         self,
diff --git a/pytorch_lightning/callbacks/prediction_writer.py b/pytorch_lightning/callbacks/prediction_writer.py
index cbcff74ff0278..962877cc5a658 100644
--- a/pytorch_lightning/callbacks/prediction_writer.py
+++ b/pytorch_lightning/callbacks/prediction_writer.py
@@ -109,7 +109,7 @@ def on_predict_batch_end(
         if not self.interval.on_batch:
             return
         is_distributed = trainer.accelerator_connector.is_distributed
-        batch_indices = trainer.predict_loop.batch_indices if is_distributed else None
+        batch_indices = trainer.predict_loop.epoch_loop.current_batch_indices if is_distributed else None
         self.write_on_batch_end(trainer, pl_module, outputs, batch_indices, batch, batch_idx, dataloader_idx)
 
     def on_predict_epoch_end(
diff --git a/pytorch_lightning/callbacks/progress.py b/pytorch_lightning/callbacks/progress.py
index 0fe05ff812e20..2fd4b8c25df19 100644
--- a/pytorch_lightning/callbacks/progress.py
+++ b/pytorch_lightning/callbacks/progress.py
@@ -200,7 +200,7 @@ def on_init_end(self, trainer):
         self._trainer = trainer
 
     def on_train_start(self, trainer, pl_module):
-        self._train_batch_idx = trainer.train_loop.batch_idx
+        self._train_batch_idx = trainer.fit_loop.batch_idx
 
     def on_train_epoch_start(self, trainer, pl_module):
         self._train_batch_idx = 0
diff --git a/pytorch_lightning/callbacks/pruning.py b/pytorch_lightning/callbacks/pruning.py
index e7da752d1c844..ced8d29c14424 100644
--- a/pytorch_lightning/callbacks/pruning.py
+++ b/pytorch_lightning/callbacks/pruning.py
@@ -259,25 +259,26 @@ def _create_pruning_fn(self, pruning_fn: str, **kwargs: Any) -> Union[Callable,
     def _wrap_pruning_fn(pruning_fn: Callable, **kwargs: Any) -> Callable:
         return partial(pruning_fn, **kwargs)
 
-    def make_pruning_permanent(self, pl_module: LightningModule) -> None:
+    def make_pruning_permanent(self, module: nn.Module) -> None:
         """
         Removes pruning buffers from any pruned modules
 
         Adapted from https://github.com/pytorch/pytorch/blob/1.7.1/torch/nn/utils/prune.py#L1176-L1180
         """
-        for _, module in pl_module.named_modules():
+        for _, module in module.named_modules():
             for k in list(module._forward_pre_hooks):
                 hook = module._forward_pre_hooks[k]
                 if isinstance(hook, pytorch_prune.BasePruningMethod):
                     hook.remove(module)
                     del module._forward_pre_hooks[k]
 
-    def _restore_original_weights(self, module: nn.Module, orig_module: nn.Module, tensor_name: str) -> None:
-        trained = getattr(module, tensor_name)
-        orig = getattr(orig_module, tensor_name)
-        if trained is None or orig is None:
+    @staticmethod
+    def _copy_param(new: nn.Module, old: nn.Module, name: str) -> None:
+        dst = getattr(new, name)
+        src = getattr(old, name)
+        if dst is None or src is None or not isinstance(dst, torch.Tensor) or not isinstance(src, torch.Tensor):
             return
-        trained.data = orig.data.to(trained.device)
+        dst.data = src.data.to(dst.device)
 
     def apply_lottery_ticket_hypothesis(self) -> None:
         r"""
@@ -292,14 +293,6 @@ def apply_lottery_ticket_hypothesis(self) -> None:
 
         The ``resample_parameters`` argument can be used to reset the parameters with a new :math:`\theta_z \sim \mathcal{D}_\theta`
         """  # noqa: E501
-
-        def copy_param(new: nn.Module, old: nn.Module, name: str) -> None:
-            dst = getattr(new, name)
-            src = getattr(old, name)
-            if dst is None or src is None or not isinstance(dst, torch.Tensor) or not isinstance(src, torch.Tensor):
-                return
-            dst.data = src.data.to(dst.device)
-
         assert self._original_layers is not None
         for d in self._original_layers.values():
             copy = d["data"]
@@ -309,7 +302,7 @@ def copy_param(new: nn.Module, old: nn.Module, name: str) -> None:
                 copy.reset_parameters()
             for i, name in names:
                 new, new_name = self._parameters_to_prune[i]
-                copy_param(new, copy, name)
+                self._copy_param(new, copy, name)
 
     def _apply_local_pruning(self, amount: float) -> None:
         for module, name in self._parameters_to_prune:
diff --git a/pytorch_lightning/callbacks/stochastic_weight_avg.py b/pytorch_lightning/callbacks/stochastic_weight_avg.py
index 3ec7774d5f8b6..0cd788c8c8647 100644
--- a/pytorch_lightning/callbacks/stochastic_weight_avg.py
+++ b/pytorch_lightning/callbacks/stochastic_weight_avg.py
@@ -159,7 +159,7 @@ def on_fit_start(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule'):
         self._max_epochs = trainer.max_epochs
         if self._model_contains_batch_norm:
             # virtually increase max_epochs to perform batch norm update on latest epoch.
-            trainer.train_loop.max_epochs += 1
+            trainer.fit_loop.max_epochs += 1
 
     def on_train_epoch_start(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule'):
         if trainer.current_epoch == self.swa_start:
@@ -220,19 +220,20 @@ def on_train_epoch_start(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningMo
             # performing only one pass over the train data-loader to compute activation statistics
             # Therefore, we will virtually increase `num_training_batches` by 1 and skip backward.
             trainer.num_training_batches += 1
-            trainer.train_loop._skip_backward = True
+            trainer.fit_loop._skip_backward = True
             self._accumulate_grad_batches = trainer.accumulate_grad_batches
-            trainer.accumulate_grad_batches = len(trainer.train_dataloader)
+
+            trainer.accumulate_grad_batches = trainer.num_training_batches
 
     def on_train_epoch_end(self, trainer: 'pl.Trainer', *args):
-        trainer.train_loop._skip_backward = False
+        trainer.fit_loop._skip_backward = False
 
     def on_train_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule'):
         if self._model_contains_batch_norm and trainer.current_epoch == self.swa_end + 1:
             # BatchNorm epoch update. Reset state
             trainer.accumulate_grad_batches = self._accumulate_grad_batches
             trainer.num_training_batches -= 1
-            trainer.train_loop.max_epochs -= 1
+            trainer.fit_loop.max_epochs -= 1
             self.reset_momenta()
         elif trainer.current_epoch == self.swa_end:
             # Last SWA epoch. Transfer weights from average model to pl_module
@@ -265,7 +266,7 @@ def reset_momenta(self):
         """
         Adapted from https://github.com/pytorch/pytorch/blob/v1.7.1/torch/optim/swa_utils.py#L164-L165
         """
-        for bn_module in self.momenta.keys():
+        for bn_module in self.momenta:
             bn_module.momentum = self.momenta[bn_module]
 
     @staticmethod
diff --git a/pytorch_lightning/callbacks/timer.py b/pytorch_lightning/callbacks/timer.py
index 9b93499c82ea1..ba42419141253 100644
--- a/pytorch_lightning/callbacks/timer.py
+++ b/pytorch_lightning/callbacks/timer.py
@@ -170,4 +170,5 @@ def _check_time_remaining(self, trainer: 'pl.Trainer') -> None:
         should_stop = trainer.accelerator.broadcast(should_stop)
         trainer.should_stop = trainer.should_stop or should_stop
         if should_stop and self._verbose:
-            rank_zero_info(f"Time limit reached. Elapsed time is {self.time_elapsed}. Signaling Trainer to stop.")
+            elapsed = timedelta(seconds=int(self.time_elapsed(RunningStage.TRAINING)))
+            rank_zero_info(f"Time limit reached. Elapsed time is {elapsed}. Signaling Trainer to stop.")
diff --git a/pytorch_lightning/core/datamodule.py b/pytorch_lightning/core/datamodule.py
index 84210e9d7b667..df3fa26a24a17 100644
--- a/pytorch_lightning/core/datamodule.py
+++ b/pytorch_lightning/core/datamodule.py
@@ -20,7 +20,7 @@
 from torch.utils.data import DataLoader, Dataset, IterableDataset
 
 from pytorch_lightning.core.hooks import CheckpointHooks, DataHooks
-from pytorch_lightning.utilities import rank_zero_only
+from pytorch_lightning.utilities import rank_zero_deprecation
 from pytorch_lightning.utilities.argparse import add_argparse_args, from_argparse_args, get_init_arguments_and_types
 
 
@@ -160,7 +160,13 @@ def has_prepared_data(self) -> bool:
 
         Returns:
             bool: True if ``datamodule.prepare_data()`` has been called. False by default.
+
+        .. deprecated:: v1.4
+            Will be removed in v1.6.0.
         """
+        rank_zero_deprecation(
+            'DataModule property `has_prepared_data` was deprecated in v1.4 and will be removed in v1.6.'
+        )
         return self._has_prepared_data
 
     @property
@@ -169,7 +175,11 @@ def has_setup_fit(self) -> bool:
 
         Returns:
             bool: True ``if datamodule.setup(stage='fit')`` has been called. False by default.
+
+        .. deprecated:: v1.4
+            Will be removed in v1.6.0.
         """
+        rank_zero_deprecation('DataModule property `has_setup_fit` was deprecated in v1.4 and will be removed in v1.6.')
         return self._has_setup_fit
 
     @property
@@ -178,7 +188,13 @@ def has_setup_validate(self) -> bool:
 
         Returns:
             bool: True if ``datamodule.setup(stage='validate')`` has been called. False by default.
+
+        .. deprecated:: v1.4
+            Will be removed in v1.6.0.
         """
+        rank_zero_deprecation(
+            'DataModule property `has_setup_validate` was deprecated in v1.4 and will be removed in v1.6.'
+        )
         return self._has_setup_validate
 
     @property
@@ -187,7 +203,13 @@ def has_setup_test(self) -> bool:
 
         Returns:
             bool: True if ``datamodule.setup(stage='test')`` has been called. False by default.
+
+        .. deprecated:: v1.4
+            Will be removed in v1.6.0.
         """
+        rank_zero_deprecation(
+            'DataModule property `has_setup_test` was deprecated in v1.4 and will be removed in v1.6.'
+        )
         return self._has_setup_test
 
     @property
@@ -196,7 +218,13 @@ def has_setup_predict(self) -> bool:
 
         Returns:
             bool: True if ``datamodule.setup(stage='predict')`` has been called. False by default.
+
+        .. deprecated:: v1.4
+            Will be removed in v1.6.0.
         """
+        rank_zero_deprecation(
+            'DataModule property `has_setup_predict` was deprecated in v1.4 and will be removed in v1.6.'
+        )
         return self._has_setup_predict
 
     @property
@@ -205,7 +233,13 @@ def has_teardown_fit(self) -> bool:
 
         Returns:
             bool: True ``if datamodule.teardown(stage='fit')`` has been called. False by default.
+
+        .. deprecated:: v1.4
+            Will be removed in v1.6.0.
         """
+        rank_zero_deprecation(
+            'DataModule property `has_teardown_fit` was deprecated in v1.4 and will be removed in v1.6.'
+        )
         return self._has_teardown_fit
 
     @property
@@ -214,7 +248,13 @@ def has_teardown_validate(self) -> bool:
 
         Returns:
             bool: True if ``datamodule.teardown(stage='validate')`` has been called. False by default.
+
+        .. deprecated:: v1.4
+            Will be removed in v1.6.0.
         """
+        rank_zero_deprecation(
+            'DataModule property `has_teardown_validate` was deprecated in v1.4 and will be removed in v1.6.'
+        )
         return self._has_teardown_validate
 
     @property
@@ -223,7 +263,13 @@ def has_teardown_test(self) -> bool:
 
         Returns:
             bool: True if ``datamodule.teardown(stage='test')`` has been called. False by default.
+
+        .. deprecated:: v1.4
+            Will be removed in v1.6.0.
         """
+        rank_zero_deprecation(
+            'DataModule property `has_teardown_test` was deprecated in v1.4 and will be removed in v1.6.'
+        )
         return self._has_teardown_test
 
     @property
@@ -232,7 +278,13 @@ def has_teardown_predict(self) -> bool:
 
         Returns:
             bool: True if ``datamodule.teardown(stage='predict')`` has been called. False by default.
+
+        .. deprecated:: v1.4
+            Will be removed in v1.6.0.
         """
+        rank_zero_deprecation(
+            'DataModule property `has_teardown_predict` was deprecated in v1.4 and will be removed in v1.6.'
+        )
         return self._has_teardown_predict
 
     @classmethod
@@ -329,7 +381,7 @@ def test_dataloader():
     def __new__(cls, *args: Any, **kwargs: Any) -> 'LightningDataModule':
         obj = super().__new__(cls)
         # track `DataHooks` calls and run `prepare_data` only on rank zero
-        obj.prepare_data = cls._track_data_hook_calls(obj, rank_zero_only(obj.prepare_data))
+        obj.prepare_data = cls._track_data_hook_calls(obj, obj.prepare_data)
         obj.setup = cls._track_data_hook_calls(obj, obj.setup)
         obj.teardown = cls._track_data_hook_calls(obj, obj.teardown)
         return obj
@@ -381,8 +433,13 @@ def wrapped_fn(*args: str, **kwargs: Optional[str]) -> Any:
                 has_run = obj._has_prepared_data
                 obj._has_prepared_data = True
 
-            if not has_run:
-                return fn(*args, **kwargs)
+            if has_run:
+                rank_zero_deprecation(
+                    f"DataModule.{name} has already been called, so it will not be called again. "
+                    f"In v1.6 this behavior will change to always call DataModule.{name}."
+                )
+            else:
+                fn(*args, **kwargs)
 
         return wrapped_fn
 
diff --git a/pytorch_lightning/core/grads.py b/pytorch_lightning/core/grads.py
index 30a2f0ae7e38f..f6a0d41035460 100644
--- a/pytorch_lightning/core/grads.py
+++ b/pytorch_lightning/core/grads.py
@@ -18,7 +18,7 @@
 
 from torch.nn import Module
 
-from pytorch_lightning.utilities.distributed import rank_zero_deprecation
+from pytorch_lightning.utilities import rank_zero_deprecation
 from pytorch_lightning.utilities.grads import grad_norm as new_grad_norm
 
 
diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index 0ad6b131ad14b..50b058c3c24c2 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -13,14 +13,13 @@
 # limitations under the License.
 """Various hooks to be used in the Lightning code."""
 
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 
 import torch
 from torch.optim.optimizer import Optimizer
-from torch.utils.data import DataLoader
 
 from pytorch_lightning.utilities import move_data_to_device, rank_zero_warn
-from pytorch_lightning.utilities.types import STEP_OUTPUT
+from pytorch_lightning.utilities.types import EVAL_DATALOADERS, STEP_OUTPUT, TRAIN_DATALOADERS
 
 
 class ModelHooks:
@@ -428,14 +427,13 @@ def teardown(self, stage: Optional[str] = None) -> None:
             stage: either ``'fit'``, ``'validate'``, ``'test'``, or ``'predict'``
         """
 
-    def train_dataloader(self) -> Union[DataLoader, List[DataLoader], Dict[str, DataLoader]]:
+    def train_dataloader(self) -> TRAIN_DATALOADERS:
         """
         Implement one or more PyTorch DataLoaders for training.
 
         Return:
-            Either a single PyTorch :class:`~torch.utils.data.DataLoader` or a collection of these
-            (list, dict, nested lists and dicts). In the case of multiple dataloaders, please see
-            this :ref:`page <multiple-training-dataloaders>`
+            A collection of :class:`torch.utils.data.DataLoader` specifying training samples.
+            In the case of multiple dataloaders, please see this :ref:`page <multiple-training-dataloaders>`.
 
         The dataloader you return will not be called every epoch unless you set
         :paramref:`~pytorch_lightning.trainer.Trainer.reload_dataloaders_every_epoch` to ``True``.
@@ -503,7 +501,7 @@ def train_dataloader(self):
         """
         rank_zero_warn("`train_dataloader` must be implemented to be used with the Lightning Trainer")
 
-    def test_dataloader(self) -> Union[DataLoader, List[DataLoader]]:
+    def test_dataloader(self) -> EVAL_DATALOADERS:
         r"""
         Implement one or multiple PyTorch DataLoaders for testing.
 
@@ -533,7 +531,7 @@ def test_dataloader(self) -> Union[DataLoader, List[DataLoader]]:
             There is no need to set it yourself.
 
         Return:
-            Single or multiple PyTorch DataLoaders.
+            A :class:`torch.utils.data.DataLoader` or a sequence of them specifying testing samples.
 
         Example::
 
@@ -563,7 +561,7 @@ def test_dataloader(self):
             will have an argument ``dataloader_idx`` which matches the order here.
         """
 
-    def val_dataloader(self) -> Union[DataLoader, List[DataLoader]]:
+    def val_dataloader(self) -> EVAL_DATALOADERS:
         r"""
         Implement one or multiple PyTorch DataLoaders for validation.
 
@@ -584,7 +582,7 @@ def val_dataloader(self) -> Union[DataLoader, List[DataLoader]]:
             There is no need to set it yourself.
 
         Return:
-            Single or multiple PyTorch DataLoaders.
+            A :class:`torch.utils.data.DataLoader` or a sequence of them specifying validation samples.
 
         Examples::
 
@@ -614,7 +612,7 @@ def val_dataloader(self):
             will have an argument ``dataloader_idx`` which matches the order here.
         """
 
-    def predict_dataloader(self) -> Union[DataLoader, List[DataLoader]]:
+    def predict_dataloader(self) -> EVAL_DATALOADERS:
         r"""
         Implement one or multiple PyTorch DataLoaders for prediction.
 
@@ -632,7 +630,7 @@ def predict_dataloader(self) -> Union[DataLoader, List[DataLoader]]:
             There is no need to set it yourself.
 
         Return:
-            Single or multiple PyTorch DataLoaders.
+            A :class:`torch.utils.data.DataLoader` or a sequence of them specifying prediction samples.
 
         Note:
             In the case where you return multiple prediction dataloaders, the :meth:`predict`
@@ -807,7 +805,8 @@ def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
         else you might want to save.
 
         Args:
-            checkpoint: Checkpoint to be saved
+            checkpoint: The full checkpoint dictionary before it gets dumped to a file.
+                Implementations of this hook can insert additional data into this dictionary.
 
         Example::
 
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 74c1ef442f993..2478f698e659b 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -24,30 +24,31 @@
 import uuid
 from abc import ABC
 from argparse import Namespace
-from functools import partial
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union
 
+import numpy as np
 import torch
 from torch import ScriptModule, Tensor
 from torch.nn import Module
 from torch.optim.optimizer import Optimizer
+from torchmetrics import Metric
 
 from pytorch_lightning.core.grads import GradInformation
 from pytorch_lightning.core.hooks import CheckpointHooks, DataHooks, ModelHooks
 from pytorch_lightning.core.memory import ModelSummary
 from pytorch_lightning.core.optimizer import LightningOptimizer
 from pytorch_lightning.core.saving import ALLOWED_CONFIG_TYPES, ModelIO, PRIMITIVE_TYPES
-from pytorch_lightning.core.step_result import Result
+from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import FxValidator
 from pytorch_lightning.utilities import rank_zero_deprecation, rank_zero_warn
 from pytorch_lightning.utilities.apply_func import apply_to_collection, convert_to_tensors
 from pytorch_lightning.utilities.cloud_io import get_filesystem
 from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin
-from pytorch_lightning.utilities.distributed import sync_ddp_if_available, tpu_distributed
+from pytorch_lightning.utilities.distributed import distributed_available, sync_ddp
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.parsing import AttributeDict, collect_init_args, save_hyperparameters
 from pytorch_lightning.utilities.signature_utils import is_param_in_hook_signature
-from pytorch_lightning.utilities.types import _METRIC, EPOCH_OUTPUT, STEP_OUTPUT
+from pytorch_lightning.utilities.types import _METRIC_COLLECTION, EPOCH_OUTPUT, STEP_OUTPUT
 from pytorch_lightning.utilities.warnings import WarningCache
 
 warning_cache = WarningCache()
@@ -80,6 +81,7 @@ class LightningModule(
         "model_size",
         "automatic_optimization",
         "truncated_bptt_steps",
+        "loaded_optimizer_states_dict",
     ] + DeviceDtypeModuleMixin.__jit_unused_properties__
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
@@ -89,7 +91,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         # torch/nn/modules/module.py#L227)
         torch._C._log_api_usage_once(f"lightning.module.{self.__class__.__name__}")
 
-        self.loaded_optimizer_states_dict = {}
+        self._loaded_optimizer_states_dict = {}
 
         #: Pointer to the trainer object
         self.trainer = None
@@ -106,13 +108,13 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         # optionally can be set by user
         self._example_input_array = None
         self._datamodule = None
-        self._results: Optional[Result] = None
         self._current_fx_name: Optional[str] = None
         self._running_manual_backward: bool = False
         self._current_dataloader_idx: Optional[int] = None
         self._automatic_optimization: bool = True
         self._truncated_bptt_steps: int = 0
         self._param_requires_grad_state = dict()
+        self._metric_attributes: Optional[Dict[int, str]] = None
 
     def optimizers(self, use_pl_optimizer: bool = True) -> Union[Optimizer, List[Optimizer], List[LightningOptimizer]]:
         if use_pl_optimizer:
@@ -170,12 +172,31 @@ def example_input_array(self, example: Any) -> None:
 
     @property
     def datamodule(self) -> Any:
-        rank_zero_deprecation(
+        warning_cache.deprecation(
             "The `LightningModule.datamodule` property is deprecated in v1.3 and will be removed in v1.5."
-            " Access the datamodule through using `self.trainer.datamodule` instead."
+            " Access the datamodule through using `self.trainer.datamodule` instead.",
+            stacklevel=6,
         )
         return self._datamodule
 
+    @property
+    def loaded_optimizer_states_dict(self) -> dict:
+        warning_cache.deprecation(
+            "The `LightningModule.loaded_optimizer_states_dict` property is deprecated in v1.4"
+            " and will be removed in v1.6.",
+            stacklevel=6,
+        )
+        return self._loaded_optimizer_states_dict
+
+    @loaded_optimizer_states_dict.setter
+    def loaded_optimizer_states_dict(self, val: dict) -> None:
+        warning_cache.deprecation(
+            "The `LightningModule.loaded_optimizer_states_dict` property is deprecated in v1.4"
+            " and will be removed in v1.6.",
+            stacklevel=6,
+        )
+        self._loaded_optimizer_states_dict = val
+
     @datamodule.setter
     def datamodule(self, datamodule: Any) -> None:
         self._datamodule = datamodule
@@ -225,10 +246,10 @@ def _apply_batch_transfer_handler(
         if is_param_in_hook_signature(self.transfer_batch_to_device, 'dataloader_idx'):
             batch = self.transfer_batch_to_device(batch, device, dataloader_idx)
         else:
-            warning_cache.warn(
+            warning_cache.deprecation(
                 "`transfer_batch_to_device` hook signature has changed in v1.4."
                 " `dataloader_idx` parameter has been added to it. Support for"
-                " the old signature will be removed in v1.6", DeprecationWarning
+                " the old signature will be removed in v1.6"
             )
             batch = self.transfer_batch_to_device(batch, device)
 
@@ -259,19 +280,22 @@ def forward(self, x):
     def log(
         self,
         name: str,
-        value: Any,
+        value: _METRIC_COLLECTION,
         prog_bar: bool = False,
         logger: bool = True,
         on_step: Optional[bool] = None,
         on_epoch: Optional[bool] = None,
-        reduce_fx: Callable = torch.mean,
+        reduce_fx: Union[str, Callable] = 'default',  # TODO: change to 'mean' when `sync_dist_op` is removed in 1.6
         tbptt_reduce_fx: Optional = None,  # noqa: Remove in 1.6
         tbptt_pad_token: Optional = None,  # noqa: Remove in 1.6
         enable_graph: bool = False,
         sync_dist: bool = False,
-        sync_dist_op: Union[Any, str] = 'mean',
+        sync_dist_op: Optional = None,  # noqa: Remove in 1.6
         sync_dist_group: Optional[Any] = None,
         add_dataloader_idx: bool = True,
+        batch_size: Optional[int] = None,
+        metric_attribute: Optional[str] = None,
+        rank_zero_only: Optional[bool] = None,
     ) -> None:
         """
         Log a key, value
@@ -294,20 +318,25 @@ def log(
            "validation_epoch_end*", "F", "T", "F", "T"
 
         Args:
-            name: key name
-            value: value name
+            name: key to log
+            value: value to log. Can be a ``float``, ``Tensor``, ``Metric``, or a dictionary of the former.
             prog_bar: if True logs to the progress bar
             logger: if True logs to the logger
             on_step: if True logs at this step. None auto-logs at the training_step but not validation/test_step
             on_epoch: if True logs epoch accumulated metrics. None auto-logs at the val/test step but not training_step
-            reduce_fx: reduction function over step values for end of epoch. Torch.mean by default
+            reduce_fx: reduction function over step values for end of epoch. :meth:`torch.mean` by default.
             enable_graph: if True, will not auto detach the graph
             sync_dist: if True, reduces the metric across GPUs/TPUs
-            sync_dist_op: the op to sync across GPUs/TPUs
             sync_dist_group: the ddp group to sync across
             add_dataloader_idx: if True, appends the index of the current dataloader to
                 the name (when using multiple). If False, user needs to give unique names for
                 each dataloader to not mix values
+            batch_size: Current batch_size. This will be directly inferred from the loaded batch,
+                but some data structures might need to explicitly provide it.
+            metric_attribute: To restore the metric state, Lightning requires the reference of the
+                :class:`torchmetrics.Metric` in your model. This is found automatically if it is a model attribute.
+            rank_zero_only: Whether the value will be logged only on rank 0. This will prevent synchronization which
+                would produce a deadlock as not all processes would perform this log call.
         """
         if tbptt_reduce_fx is not None:
             rank_zero_deprecation(
@@ -321,62 +350,105 @@ def log(
                 ' Please, open a discussion explaining your use-case in'
                 ' `https://github.com/PyTorchLightning/pytorch-lightning/discussions`'
             )
+        if sync_dist_op is not None:
+            rank_zero_deprecation(
+                f"`self.log(sync_dist_op='{sync_dist_op}')` is deprecated and will be removed in v.1.6."
+                f" Use `self.log(reduce_fx={sync_dist_op})` instead."
+            )
+            if reduce_fx == 'default':
+                reduce_fx = sync_dist_op
+        elif reduce_fx == 'default':
+            reduce_fx = 'mean'
+
+        # check for invalid values
+        apply_to_collection(value, dict, self.__check_not_nested, name)
+        apply_to_collection(
+            value, object, self.__check_allowed, name, value, wrong_dtype=(numbers.Number, Metric, Tensor, dict)
+        )
 
-        if self._results is not None:
-            # TODO: if logged twice fail with crash
+        # set the default depending on the fx_name
+        on_step = self.__auto_choose_log_on_step(on_step)
+        on_epoch = self.__auto_choose_log_on_epoch(on_epoch)
 
-            # set the default depending on the fx_name
-            on_step = self.__auto_choose_log_on_step(on_step)
-            on_epoch = self.__auto_choose_log_on_epoch(on_epoch)
+        results = self.trainer._results
+        assert results is not None
+        assert self._current_fx_name is not None
+        FxValidator.check_logging(self._current_fx_name, on_step=on_step, on_epoch=on_epoch)
 
-            assert self._current_fx_name is not None
-            self.trainer.logger_connector.check_logging(self._current_fx_name, on_step=on_step, on_epoch=on_epoch)
+        # make sure user doesn't introduce logic for multi-dataloaders
+        if "/dataloader_idx_" in name:
+            raise MisconfigurationException(
+                f"You called `self.log` with the key `{name}`"
+                " but it should not contain information about `dataloader_idx`"
+            )
+
+        value = apply_to_collection(value, numbers.Number, self.__to_tensor)
+
+        if self.trainer.logger_connector.should_reset_tensors(self._current_fx_name):
+            # if we started a new epoch (running it's first batch) the hook name has changed
+            # reset any tensors for the new hook name
+            results.reset(metrics=False, fx=self._current_fx_name)
 
-            # make sure user doesn't introduce logic for multi-dataloaders
-            if "/dataloader_idx_" in name:
+        if metric_attribute is None and isinstance(value, Metric):
+            if self._metric_attributes is None:
+                # compute once
+                self._metric_attributes = {
+                    id(module): name
+                    for name, module in self.named_modules() if isinstance(module, Metric)
+                }
+                if not self._metric_attributes:
+                    raise MisconfigurationException(
+                        "Could not find the `LightningModule` attribute for the `torchmetrics.Metric` logged."
+                        " You can fix this by setting an attribute for the metric in your `LightningModule`."
+                    )
+            # try to find the passed metric in the LightningModule
+            metric_attribute = self._metric_attributes.get(id(value), None)
+            if metric_attribute is None:
                 raise MisconfigurationException(
-                    f"Logged key: {name} should not contain information about dataloader_idx."
+                    "Could not find the `LightningModule` attribute for the `torchmetrics.Metric` logged."
+                    f" You can fix this by calling `self.log({name}, ..., metric_attribute=name)` where `name` is one"
+                    f" of {list(self._metric_attributes.values())}"
                 )
 
-            value = self.__sync(
-                value,
-                sync_fn=self.trainer.training_type_plugin.reduce,
-                sync_dist=sync_dist,
-                sync_dist_op=sync_dist_op,
-                sync_dist_group=sync_dist_group,
-                device=self.device,
-            )
+        results.log(
+            self._current_fx_name,
+            name,
+            value,
+            prog_bar=prog_bar,
+            logger=logger,
+            on_step=on_step,
+            on_epoch=on_epoch,
+            reduce_fx=reduce_fx,
+            enable_graph=enable_graph,
+            dataloader_idx=(self._current_dataloader_idx if add_dataloader_idx else None),
+            batch_size=batch_size,
+            sync_dist=sync_dist and distributed_available(),
+            sync_dist_fn=self.trainer.training_type_plugin.reduce or sync_ddp,
+            sync_dist_group=sync_dist_group,
+            metric_attribute=metric_attribute,
+            rank_zero_only=rank_zero_only,
+        )
 
-            self._results.log(
-                name,
-                value,
-                prog_bar=prog_bar,
-                logger=logger,
-                on_step=on_step,
-                on_epoch=on_epoch,
-                reduce_fx=reduce_fx,
-                enable_graph=enable_graph,
-                dataloader_idx=(self._current_dataloader_idx if add_dataloader_idx else None),
-            )
+        self.trainer.logger_connector._current_fx = self._current_fx_name
 
     def log_dict(
         self,
-        dictionary: dict,
+        dictionary: Mapping[str, _METRIC_COLLECTION],
         prog_bar: bool = False,
         logger: bool = True,
         on_step: Optional[bool] = None,
         on_epoch: Optional[bool] = None,
-        reduce_fx: Callable = torch.mean,
-        tbptt_reduce_fx: Optional = None,  # noqa: Remove in 1.6
-        tbptt_pad_token: Optional = None,  # noqa: Remove in 1.6
+        reduce_fx: Union[str, Callable] = 'default',  # TODO: change to 'mean' when `sync_dist_op` is removed in 1.6
+        tbptt_reduce_fx: Optional[Any] = None,  # noqa: Remove in 1.6
+        tbptt_pad_token: Optional[Any] = None,  # noqa: Remove in 1.6
         enable_graph: bool = False,
         sync_dist: bool = False,
-        sync_dist_op: Union[Any, str] = 'mean',
+        sync_dist_op: Optional[Any] = None,  # noqa: Remove in 1.6
         sync_dist_group: Optional[Any] = None,
         add_dataloader_idx: bool = True,
     ) -> None:
         """
-        Log a dictonary of values at once
+        Log a dictionary of values at once
 
         Example::
 
@@ -384,15 +456,15 @@ def log_dict(
             self.log_dict(values)
 
         Args:
-            dictionary: key value pairs (str, tensors)
+            dictionary: key value pairs.
+                The values can be a ``float``, ``Tensor``, ``Metric``, or a dictionary of the former.
             prog_bar: if True logs to the progress base
             logger: if True logs to the logger
             on_step: if True logs at this step. None auto-logs for training_step but not validation/test_step
             on_epoch: if True logs epoch accumulated metrics. None auto-logs for val/test step but not training_step
-            reduce_fx: reduction function over step values for end of epoch. Torch.mean by default
+            reduce_fx: reduction function over step values for end of epoch. :meth:`torch.mean` by default.
             enable_graph: if True, will not auto detach the graph
             sync_dist: if True, reduces the metric across GPUs/TPUs
-            sync_dist_op: the op to sync across GPUs/TPUs
             sync_dist_group: the ddp group sync across
             add_dataloader_idx: if True, appends the index of the current dataloader to
                 the name (when using multiple). If False, user needs to give unique names for
@@ -417,29 +489,32 @@ def log_dict(
             )
 
     @staticmethod
-    def __sync(
-        value: _METRIC,
-        sync_fn: Optional[Callable] = None,
-        sync_dist: bool = False,
-        sync_dist_op: Union[Any, str] = 'mean',
-        sync_dist_group: Optional[Any] = None,
-        device: torch.device = None,
-    ) -> _METRIC:
-        """Sync across workers when using distributed training"""
-        if not isinstance(value, (torch.Tensor, numbers.Number)):
-            return value
-
-        sync_fn = sync_fn or sync_ddp_if_available
-        dist_available = torch.distributed.is_available() and torch.distributed.is_initialized() or tpu_distributed()
-        if not sync_dist or not dist_available:
-            return value
-
-        # TODO: Find a way to make the reduction only once, so we don't need to clone.
-        if isinstance(value, torch.Tensor):
-            value = value.clone()
-        else:
-            value = torch.tensor(value, device=device, dtype=torch.float)
-        return sync_fn(value, group=sync_dist_group, reduce_op=sync_dist_op)
+    def __check_not_nested(value: dict, name: str) -> dict:
+        # self-imposed restriction. for simplicity
+        if any(isinstance(v, dict) for v in value.values()):
+            raise ValueError(f'`self.log({name}, {value})` was called, but nested dictionaries cannot be logged')
+        return value
+
+    @staticmethod
+    def __check_allowed(v: Any, name: str, value: Any) -> None:
+        raise ValueError(f'`self.log({name}, {value})` was called, but `{type(v).__name__}` values cannot be logged')
+
+    def __to_tensor(self, value: numbers.Number) -> torch.Tensor:
+        return torch.tensor(value, device=self.device)
+
+    def log_grad_norm(self, grad_norm_dict: Dict[str, torch.Tensor]) -> None:
+        """Override this method to change the default behaviour of ``log_grad_norm``.
+
+        Args:
+            grad_norm_dict: Dictionary containing current grad norm metrics
+
+        Example::
+
+            # DEFAULT
+            def log_grad_norm(self, grad_norm_dict):
+                self.log_dict(grad_norm_dict, on_step=False, on_epoch=True, prog_bar=False, logger=True)
+        """
+        self.log_dict(grad_norm_dict, on_step=True, on_epoch=True, prog_bar=True, logger=True)
 
     def write_prediction(
         self, name: str, value: Union[torch.Tensor, List[torch.Tensor]], filename: str = 'predictions.pt'
@@ -468,7 +543,7 @@ def write_prediction(
             ' and will be removed in v1.5.'
         )
 
-        self.trainer.evaluation_loop.predictions._add_prediction(name, value, filename)
+        self.trainer._evaluation_loop.predictions._add_prediction(name, value, filename)
 
     def write_prediction_dict(self, predictions_dict: Dict[str, Any], filename: str = 'predictions.pt'):
         """
@@ -535,8 +610,7 @@ def all_gather(
         group = group if group is not None else torch.distributed.group.WORLD
         all_gather = self.trainer.accelerator.all_gather
         data = convert_to_tensors(data, device=self.device)
-        all_gather = partial(all_gather, group=group, sync_grads=sync_grads)
-        return apply_to_collection(data, torch.Tensor, all_gather)
+        return apply_to_collection(data, torch.Tensor, all_gather, group=group, sync_grads=sync_grads)
 
     def forward(self, *args, **kwargs) -> Any:
         r"""
@@ -1093,6 +1167,29 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int]
         By default, it calls :meth:`~pytorch_lightning.core.lightning.LightningModule.forward`.
         Override to add any processing logic.
 
+        The :meth:`~pytorch_lightning.core.lightning.LightningModule.predict_step` is used
+        to scale inference on multi-devices.
+
+        To prevent an OOM error, it is possible to use :class:`~pytorch_lightning.callbacks.BasePredictionWriter`
+        callback to write the predictions to disk or database after each batch or on epoch end.
+
+        The :class:`~pytorch_lightning.callbacks.BasePredictionWriter` should be used while using a spawn
+        based accelerator. This happens for ``Trainer(accelerator="ddp_spawn")``
+        or training on 8 TPU cores with ``Trainer(tpu_cores=8)`` as predictions won't be returned.
+
+        Example ::
+
+            class MyModel(LightningModule):
+
+                def predicts_step(self, batch, batch_idx, dataloader_idx):
+                    return self(batch)
+
+            dm = ...
+            model = MyModel()
+            trainer = Trainer(gpus=2)
+            predictions = trainer.predict(model, dm)
+
+
         Args:
             batch: Current batch
             batch_idx: Index of current batch
@@ -1324,7 +1421,7 @@ def training_step(...):
 
         # backward
         self._running_manual_backward = True
-        self.trainer.train_loop.backward(loss, optimizer=None, opt_idx=None, *args, **kwargs)
+        self.trainer.fit_loop.epoch_loop.batch_loop.backward(loss, optimizer=None, opt_idx=None, *args, **kwargs)
         self._running_manual_backward = False
 
     def backward(self, loss: Tensor, optimizer: Optimizer, optimizer_idx: int, *args, **kwargs) -> None:
@@ -1417,13 +1514,14 @@ def optimizer_step(
         Override this method to adjust the default way the
         :class:`~pytorch_lightning.trainer.trainer.Trainer` calls each optimizer.
         By default, Lightning calls ``step()`` and ``zero_grad()`` as shown in the example
-        once per optimizer.
+        once per optimizer. This method (and ``zero_grad()``) won't be called during the
+        accumulation phase when ``Trainer(accumulate_grad_batches != 1)``.
 
         Warning:
             If you are overriding this method, make sure that you pass the ``optimizer_closure`` parameter
             to ``optimizer.step()`` function as shown in the examples. This ensures that
             ``training_step()``, ``optimizer.zero_grad()``, ``backward()`` are called within
-            :meth:`~pytorch_lightning.trainer.training_loop.TrainLoop.run_training_batch`.
+            :meth:`~pytorch_lightning.loops.training_batch_loop.TrainingBatchLoop.advance`.
 
         Args:
             epoch: Current epoch
@@ -1563,15 +1661,24 @@ def tbptt_split_batch(self, batch, split_size):
 
         return splits
 
-    def summarize(self, mode: Optional[str] = ModelSummary.MODE_DEFAULT) -> Optional[ModelSummary]:
+    def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None) -> Optional[ModelSummary]:
         model_summary = None
 
-        if mode in ModelSummary.MODES:
-            model_summary = ModelSummary(self, mode=mode)
-            log.info("\n" + str(model_summary))
-        elif mode is not None:
-            raise MisconfigurationException(f"`mode` can be None, {', '.join(ModelSummary.MODES)}, got {mode}")
+        # temporary mapping from mode to max_depth
+        if max_depth is None:
+            if mode in ModelSummary.MODES:
+                max_depth = ModelSummary.MODES[mode]
+                rank_zero_deprecation(
+                    f"Argument `mode` in `LightningModule.summarize` is deprecated in v1.4"
+                    f" and will be removed in v1.6. Use `max_depth={max_depth}` to replicate `mode={mode}` behavior."
+                )
+                model_summary = ModelSummary(self, max_depth=max_depth)
+            elif mode is not None:
+                raise MisconfigurationException(f"`mode` can be None, {', '.join(ModelSummary.MODES)}, got {mode}")
+        else:
+            model_summary = ModelSummary(self, max_depth=max_depth)
 
+        log.info("\n" + str(model_summary))
         return model_summary
 
     def freeze(self) -> None:
@@ -1628,7 +1735,7 @@ def get_progress_bar_dict(self):
             Dictionary with the items to be displayed in the progress bar.
         """
         # call .item() only once but store elements without graphs
-        running_train_loss = self.trainer.train_loop.running_loss.mean()
+        running_train_loss = self.trainer.fit_loop.running_loss.mean()
         avg_training_loss = None
         if running_train_loss is not None:
             avg_training_loss = running_train_loss.cpu().item()
@@ -1642,7 +1749,7 @@ def get_progress_bar_dict(self):
         module_tbptt_enabled = self.truncated_bptt_steps > 0
         trainer_tbptt_enabled = self.trainer.truncated_bptt_steps is not None and self.trainer.truncated_bptt_steps > 0
         if module_tbptt_enabled or trainer_tbptt_enabled:
-            tqdm_dict["split_idx"] = self.trainer.train_loop.split_idx
+            tqdm_dict["split_idx"] = self.trainer.fit_loop.split_idx
 
         if self.trainer.logger is not None and self.trainer.logger.version is not None:
             version = self.trainer.logger.version
@@ -1926,3 +2033,30 @@ def model_size(self) -> float:
         size_mb = os.path.getsize(tmp_name) / 1e6
         os.remove(tmp_name)
         return size_mb
+
+    def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None:
+        """Appends the :attr:`trainer.callback_metrics` dictionary to the given queue.
+
+        To avoid issues with memory sharing, we cast the data to numpy.
+
+        Args:
+            queue: the instance of the queue to append the data.
+        """
+        callback_metrics: dict = apply_to_collection(
+            self.trainer.callback_metrics, torch.Tensor, lambda x: x.cpu().numpy()
+        )  # send as numpy to avoid issues with memory sharing
+        queue.put(callback_metrics)
+
+    def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None:
+        """Retrieve the :attr:`trainer.callback_metrics` dictionary from the given queue.
+
+        To preserve consistency, we cast back the data to ``torch.Tensor``.
+
+        Args:
+            queue: the instance of the queue from where to get the data.
+        """
+        # NOTE: `add_to_queue` needs to be called before
+        callback_metrics: dict = queue.get()
+        self.trainer.callback_metrics.update(
+            apply_to_collection(callback_metrics, np.ndarray, lambda x: torch.tensor(x))
+        )
diff --git a/pytorch_lightning/core/memory.py b/pytorch_lightning/core/memory.py
index 2908064e3c68f..bba42d6997be3 100644
--- a/pytorch_lightning/core/memory.py
+++ b/pytorch_lightning/core/memory.py
@@ -21,9 +21,14 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from torch import Tensor
 from torch.utils.hooks import RemovableHandle
 
 from pytorch_lightning.utilities import AMPType, DeviceType
+from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8
+from pytorch_lightning.utilities.warnings import WarningCache
+
+warning_cache = WarningCache()
 
 PARAMETER_NUM_UNITS = [" ", "K", "M", "B", "T"]
 UNKNOWN_SIZE = "?"
@@ -118,7 +123,7 @@ def layer_type(self) -> str:
     @property
     def num_parameters(self) -> int:
         """ Returns the number of parameters in this module. """
-        return sum(np.prod(p.shape) for p in self._module.parameters())
+        return sum(np.prod(p.shape) if not _is_lazy_weight_tensor(p) else 0 for p in self._module.parameters())
 
 
 class ModelSummary(object):
@@ -126,11 +131,17 @@ class ModelSummary(object):
     Generates a summary of all layers in a :class:`~pytorch_lightning.core.lightning.LightningModule`.
 
     Args:
-        model: The model to summarize (also referred to as the root module)
+        model: The model to summarize (also referred to as the root module).
         mode: Can be one of
 
-             - `top` (default): only the top-level modules will be recorded (the children of the root module)
-             - `full`: summarizes all layers and their submodules in the root module
+            - `top` (default): only the top-level modules will be recorded (the children of the root module)
+            - `full`: summarizes all layers and their submodules in the root module
+
+            .. deprecated:: v1.4
+                This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6.
+
+        max_depth: Maximum depth of modules to show. Use -1 to show all modules or 0 to show no
+            summary. Defaults to 1.
 
     The string representation of this summary prints a table with columns containing
     the name, type and number of parameters for each layer.
@@ -155,7 +166,7 @@ class ModelSummary(object):
         ...         return self.net(x)
         ...
         >>> model = LitModel()
-        >>> ModelSummary(model, mode='top')  # doctest: +NORMALIZE_WHITESPACE
+        >>> ModelSummary(model, max_depth=1)  # doctest: +NORMALIZE_WHITESPACE
           | Name | Type       | Params | In sizes  | Out sizes
         ------------------------------------------------------------
         0 | net  | Sequential | 132 K  | [10, 256] | [10, 512]
@@ -164,7 +175,7 @@ class ModelSummary(object):
         0         Non-trainable params
         132 K     Total params
         0.530     Total estimated model params size (MB)
-        >>> ModelSummary(model, mode='full')  # doctest: +NORMALIZE_WHITESPACE
+        >>> ModelSummary(model, max_depth=-1)  # doctest: +NORMALIZE_WHITESPACE
           | Name  | Type        | Params | In sizes  | Out sizes
         --------------------------------------------------------------
         0 | net   | Sequential  | 132 K  | [10, 256] | [10, 512]
@@ -177,14 +188,28 @@ class ModelSummary(object):
         0.530     Total estimated model params size (MB)
     """
 
-    MODE_TOP = "top"
-    MODE_FULL = "full"
-    MODE_DEFAULT = MODE_TOP
-    MODES = [MODE_FULL, MODE_TOP]
+    MODES = dict(top=1, full=-1)  # TODO: remove in v1.6
 
-    def __init__(self, model, mode: str = MODE_DEFAULT):
+    def __init__(self, model, mode: Optional[str] = None, max_depth: Optional[int] = 1):
         self._model = model
-        self._mode = mode
+
+        #  temporary mapping from mode to max_depth
+        if max_depth is None or mode is not None:
+            if mode in ModelSummary.MODES:
+                max_depth = ModelSummary.MODES[mode]
+                from pytorch_lightning.utilities import rank_zero_deprecation
+                rank_zero_deprecation(
+                    f"Argument `mode` in `ModelSummary` is deprecated in v1.4"
+                    f" and will be removed in v1.6. Use `max_depth={max_depth}` to replicate `mode={mode}` behaviour."
+                )
+            else:
+                from pytorch_lightning.utilities.exceptions import MisconfigurationException
+                raise MisconfigurationException(f"`mode` can be {', '.join(ModelSummary.MODES)}, got {mode}.")
+
+        if not isinstance(max_depth, int) or max_depth < -1:
+            raise ValueError(f"`max_depth` can be -1, 0 or > 0, got {max_depth}.")
+
+        self._max_depth = max_depth
         self._layer_summary = self.summarize()
         # 1 byte -> 8 bits
         # TODO: how do we compute precisin_megabytes in case of mixed precision?
@@ -193,14 +218,14 @@ def __init__(self, model, mode: str = MODE_DEFAULT):
 
     @property
     def named_modules(self) -> List[Tuple[str, nn.Module]]:
-        if self._mode == ModelSummary.MODE_FULL:
-            mods = self._model.named_modules()
-            mods = list(mods)[1:]  # do not include root module (LightningModule)
-        elif self._mode == ModelSummary.MODE_TOP:
+        if self._max_depth == 0:
+            mods = []
+        elif self._max_depth == 1:
             # the children are the top-level modules
             mods = self._model.named_children()
         else:
-            mods = []
+            mods = self._model.named_modules()
+            mods = list(mods)[1:]  # do not include root module (LightningModule)
         return list(mods)
 
     @property
@@ -225,11 +250,13 @@ def param_nums(self) -> List[int]:
 
     @property
     def total_parameters(self) -> int:
-        return sum(p.numel() for p in self._model.parameters())
+        return sum(p.numel() if not _is_lazy_weight_tensor(p) else 0 for p in self._model.parameters())
 
     @property
     def trainable_parameters(self) -> int:
-        return sum(p.numel() for p in self._model.parameters() if p.requires_grad)
+        return sum(
+            p.numel() if not _is_lazy_weight_tensor(p) else 0 for p in self._model.parameters() if p.requires_grad
+        )
 
     @property
     def model_size(self) -> float:
@@ -242,6 +269,12 @@ def summarize(self) -> Dict[str, LayerSummary]:
             self._forward_example_input()
         for layer in summary.values():
             layer.detach_hook()
+
+        if self._max_depth >= 1:
+            # remove summary entries with depth > max_depth
+            for k in [k for k in summary if k.count(".") >= self._max_depth]:
+                del summary[k]
+
         return summary
 
     def _forward_example_input(self) -> None:
@@ -438,3 +471,15 @@ def get_human_readable_count(number: int) -> str:
         return f"{int(number):,d} {labels[index]}"
 
     return f"{number:,.1f} {labels[index]}"
+
+
+def _is_lazy_weight_tensor(p: Tensor) -> bool:
+    if _TORCH_GREATER_EQUAL_1_8:
+        from torch.nn.parameter import UninitializedParameter
+        if isinstance(p, UninitializedParameter):
+            warning_cache.warn(
+                "A layer with UninitializedParameter was found. "
+                "Thus, the total number of parameters detected may be inaccurate."
+            )
+            return True
+    return False
diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py
index 174631ae73e8b..3572a79b9bd84 100644
--- a/pytorch_lightning/core/optimizer.py
+++ b/pytorch_lightning/core/optimizer.py
@@ -120,7 +120,7 @@ def toggle_model(self, sync_grad: bool = True):
         during the accumulation phase.
         Setting `sync_grad` to False will block this synchronization and improve performance.
         """
-        with self._trainer.train_loop.block_ddp_sync_behaviour(not sync_grad):
+        with self._trainer.fit_loop.epoch_loop.batch_loop.block_ddp_sync_behaviour(not sync_grad):
             self._toggle_model()
             yield
             self._untoggle_model()
diff --git a/pytorch_lightning/core/saving.py b/pytorch_lightning/core/saving.py
index ffa9b0a1359ee..74862735aba61 100644
--- a/pytorch_lightning/core/saving.py
+++ b/pytorch_lightning/core/saving.py
@@ -202,7 +202,17 @@ def _load_model_state(cls, checkpoint: Dict[str, Any], strict: bool = True, **cl
         model.on_load_checkpoint(checkpoint)
 
         # load the state_dict on the model automatically
-        model.load_state_dict(checkpoint['state_dict'], strict=strict)
+        keys = model.load_state_dict(checkpoint['state_dict'], strict=strict)
+
+        if not strict:
+            if keys.missing_keys:
+                rank_zero_warn(
+                    f"Found keys that are in the model state dict but not in the checkpoint: {keys.missing_keys}"
+                )
+            if keys.unexpected_keys:
+                rank_zero_warn(
+                    f"Found keys that are not in the model state dict but in the checkpoint: {keys.unexpected_keys}"
+                )
 
         return model
 
diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
deleted file mode 100644
index c55fb14a7eed4..0000000000000
--- a/pytorch_lightning/core/step_result.py
+++ /dev/null
@@ -1,613 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Result class for easier logging and epoch-wise reduction."""
-
-from copy import copy
-from typing import Any, Callable, Dict, Iterable, List, MutableMapping, Optional, Sequence, Tuple, Union
-
-import torch
-from torch import Tensor
-from torchmetrics import Metric
-
-
-class Result(Dict):
-
-    def __init__(self) -> None:
-        super().__init__()
-        self['meta'] = {'_internal': {'_reduce_on_epoch': False, 'batch_sizes': []}}
-
-    def __getitem__(self, key: Union[str, Any]) -> Any:
-        try:
-            return super().__getitem__(key)
-        except KeyError:
-            return super().__getitem__(f'{key}_step')
-
-    def __getattr__(self, key: str) -> Any:
-        try:
-            if key == 'batch_log_metrics':
-                return self.get_batch_log_metrics()
-            elif key == 'batch_pbar_metrics':
-                return self.get_batch_pbar_metrics()
-            elif key == 'epoch_log_metrics':
-                return self.get_epoch_log_metrics()
-            elif key == 'epoch_pbar_metrics':
-                return self.get_epoch_pbar_metrics()
-            else:
-                return self[key]
-        except KeyError:
-            return None
-
-    def __setattr__(self, key: str, val: Union[Tensor, Any]):
-        # ensure tensors are detached
-        if isinstance(val, torch.Tensor) and key != 'minimize':
-            val = val.detach()
-        self[key] = val
-
-    def __getstate__(self):
-        return self
-
-    def __setstate__(self, d):
-        self.update(d)
-
-    @property
-    def minimize(self) -> Optional[Tensor]:
-        return self.get('minimize', None)
-
-    @minimize.setter
-    def minimize(self, val: Optional[torch.Tensor]) -> None:
-        if val is not None:
-            if not isinstance(val, Tensor):
-                raise ValueError(f"`Result.minimize` must be a `torch.Tensor`, found: {val}")
-            if val.grad_fn is None:
-                raise RuntimeError("`Result.minimize` must have a `grad_fn`")
-        self['minimize'] = val
-
-    def log(
-        self,
-        name: str,
-        value: Any,
-        prog_bar: bool = False,
-        logger: bool = True,
-        on_step: bool = False,
-        on_epoch: bool = True,
-        reduce_fx: Callable = torch.mean,
-        enable_graph: bool = False,
-        dataloader_idx: Optional[int] = None,
-    ):
-        # no metrics should be logged with graphs
-        if not enable_graph and isinstance(value, torch.Tensor):
-            value = value.detach()
-
-        if isinstance(value, torch.Tensor) and value.device.type == "xla":
-            value = value.cpu()
-
-        if 'meta' not in self:
-            self.__setitem__('meta', {})
-
-        # if user requests both step and epoch, then we split the metric in two automatically
-        # one will be logged per step. the other per epoch
-        was_forked = False
-        if on_step and on_epoch:
-            was_forked = True
-
-            # set step version
-            step_name = f'{name}_step'
-
-            self.__set_meta(
-                step_name,
-                value,
-                prog_bar,
-                logger,
-                on_step=True,
-                on_epoch=False,
-                reduce_fx=reduce_fx,
-                forked=False,
-                dataloader_idx=dataloader_idx,
-            )
-
-            self.__setitem__(step_name, value)
-
-            # set epoch version
-            epoch_name = f'{name}_epoch'
-
-            self.__set_meta(
-                epoch_name,
-                value,
-                prog_bar,
-                logger,
-                on_step=False,
-                on_epoch=True,
-                reduce_fx=reduce_fx,
-                forked=False,
-                dataloader_idx=dataloader_idx,
-            )
-            self.__setitem__(epoch_name, value)
-
-        # always log the original metric
-        self.__set_meta(
-            name,
-            value,
-            prog_bar,
-            logger,
-            on_step,
-            on_epoch,
-            reduce_fx,
-            forked=was_forked,
-            dataloader_idx=dataloader_idx,
-        )
-
-        # set the value
-        self.__setitem__(name, value)
-
-    def __set_meta(
-        self,
-        name: str,
-        value: Any,
-        prog_bar: bool,
-        logger: bool,
-        on_step: bool,
-        on_epoch: bool,
-        reduce_fx: Callable,
-        forked: bool,
-        dataloader_idx: Union[int, None],
-    ):
-        # set the meta for the item
-        meta_value = value
-        meta = dict(
-            prog_bar=prog_bar,
-            logger=logger,
-            on_step=on_step,
-            on_epoch=on_epoch,
-            reduce_fx=reduce_fx,
-            value=meta_value,
-            forked=forked,
-            dataloader_idx=dataloader_idx,
-        )
-
-        self['meta'][name] = meta
-
-        # track whether any input requires reduction on epoch end
-        _internal = self['meta']['_internal']
-        _internal['_reduce_on_epoch'] = max(_internal['_reduce_on_epoch'], on_epoch)
-
-    def track_batch_size(self, batch):
-        batch_size = Result.extract_batch_size(batch)
-        Result.attach_batch_size(batch_size, self)
-
-    @staticmethod
-    def extract_batch_size(batch):
-        try:
-            batch_size = Result.unpack_batch_size(batch)
-        except RecursionError:
-            batch_size = 1
-        return batch_size
-
-    @staticmethod
-    def attach_batch_size(batch_size: Union[int, None], result: 'Result') -> None:
-        if batch_size is not None:
-            meta = result['meta']
-            meta['_internal']['batch_sizes'].append(batch_size)
-
-    def get_batch_sizes(self):
-        meta = self['meta']
-        return torch.tensor(meta['_internal']['batch_sizes'])
-
-    def _add_dataloader_idx(self, k: str, dataloader_idx: Union[int, None], add_dataloader_idx: bool) -> str:
-        if dataloader_idx is not None and add_dataloader_idx:
-            return f"{k}/dataloader_idx_{dataloader_idx}"
-        return k
-
-    def get_batch_log_metrics(self, include_forked_originals=True, add_dataloader_idx=False) -> dict:
-        """
-        Gets the metrics to log at the end of the batch step
-
-        """
-        result = {}
-
-        meta = self['meta']
-        for k, options in meta.items():
-            if k == '_internal':
-                continue
-
-            if options['forked'] and not include_forked_originals:
-                continue
-
-            dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx)
-
-            if options['logger'] and options['on_step']:
-                if isinstance(self[k], Metric) and self[k]._forward_cache is not None:
-                    result[dl_key] = self[k]._forward_cache.detach()
-                else:
-                    result[dl_key] = self[k]
-
-        return result
-
-    def get_epoch_log_metrics(self, add_dataloader_idx=False) -> dict:
-        """
-        Gets the metrics to log at the end of epoch
-        """
-        result = {}
-        meta = self['meta']
-        for k, options in meta.items():
-            if k == '_internal':
-                continue
-
-            if options['forked']:
-                continue
-
-            dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx)
-
-            if options['logger'] and options['on_epoch']:
-                if isinstance(self[k], Metric):
-                    result[dl_key] = self[k].compute().detach()
-                else:
-                    result[dl_key] = self[k]
-
-            if k in self and not options['on_epoch'] and isinstance(self[k], Metric):
-                # compute for reuse later
-                self[k].compute()
-
-        return result
-
-    def get_epoch_pbar_metrics(self, add_dataloader_idx=False):
-        """
-        Gets the metrics to log at the end of epoch
-        """
-        result = {}
-
-        meta = self['meta']
-        for k, options in meta.items():
-            if k == '_internal':
-                continue
-
-            if options['forked']:
-                continue
-
-            dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx)
-
-            if options['prog_bar'] and options['on_epoch']:
-                if isinstance(self[k], Metric):
-                    result[dl_key] = self[k].compute().detach()
-                else:
-                    result[dl_key] = self[k]
-
-            if k in self and not options['on_epoch'] and isinstance(self[k], Metric):
-                # compute for reuse later
-                self[k].compute()
-
-        return result
-
-    def get_forked_metrics(self, add_dataloader_idx=False):
-        """
-        Gets the metrics to log at the end of epoch
-        """
-        result = {}
-
-        meta = self['meta']
-        for k, options in meta.items():
-            if k == '_internal':
-                continue
-
-            dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx)
-
-            if options['forked']:
-                if isinstance(self[k], Metric):
-                    result[dl_key] = self[k].compute().detach()
-                else:
-                    result[dl_key] = self[k]
-
-        return result
-
-    def get_batch_pbar_metrics(self, include_forked_originals=True, add_dataloader_idx=False):
-        """
-        Gets the metrics to log at the end of the batch step
-        """
-        result = {}
-
-        meta = self['meta']
-        for k, options in meta.items():
-            if k == '_internal':
-                continue
-
-            if options['forked'] and not include_forked_originals:
-                continue
-
-            dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx)
-
-            if options['prog_bar'] and options['on_step']:
-                if isinstance(self[k], Metric) and self[k]._forward_cache is not None:
-                    result[dl_key] = self[k]._forward_cache
-                else:
-                    result[dl_key] = self[k]
-
-        return result
-
-    def detach(self) -> 'Result':
-        for k, v in self.items():
-            if isinstance(v, torch.Tensor):
-                self.__setitem__(k, v.detach())
-        return self
-
-    def to(self, *args, **kwargs) -> 'Result':
-        """Move all self attributes to the given device."""
-        for k, v in self.items():
-            if isinstance(v, torch.Tensor):
-                self.__setitem__(k, v.to(*args, **kwargs))
-        return self
-
-    def cpu(self) -> 'Result':
-        """Move all self attributes to CPU."""
-        return self.to(torch.device("cpu"))
-
-    def __repr__(self):
-        self_copy = self.copy()
-
-        if 'meta' in self_copy:
-            del self_copy['meta']
-
-        return str(self_copy)
-
-    def __str__(self):
-        copy = self.copy()
-        del copy['meta']
-
-        return str(copy)
-
-    def __copy__(self):
-        newone = type(self)()
-        for k, v in self.items():
-            if isinstance(v, torch.Tensor):
-                v = v.detach()
-            newone[k] = copy(v)
-        return newone
-
-    @staticmethod
-    def unpack_batch_size(sample):
-        """
-        Recursively unpack sample to find a torch.Tensor.
-        returns len(tensor) when found, or 1 when it hits an empty or non iterable.
-        """
-        if isinstance(sample, torch.Tensor):
-            size = sample.size(0)
-        elif isinstance(sample, str):
-            return len(sample)
-        elif isinstance(sample, dict):
-            sample = next(iter(sample.values()), 1)
-            size = Result.unpack_batch_size(sample)
-        elif isinstance(sample, Iterable):
-            sample = next(iter(sample), 1)
-            size = Result.unpack_batch_size(sample)
-        else:
-            size = 1
-        return size
-
-    @classmethod
-    def reduce_on_epoch_end(cls, outputs):
-        # get the batch sizes for all outputs
-        batch_sizes = []
-        meta = {}
-        for x in outputs:
-            batch_sizes.append(x.get_batch_sizes())
-            meta.update(x['meta'])
-
-        batch_sizes = torch.stack(batch_sizes).view(-1)
-
-        result = cls()
-        result = recursive_gather(outputs, result)
-        recursive_stack(result)
-
-        for k, option in meta.items():
-            if k == '_internal' or isinstance(result[k], Metric):
-                continue
-
-            # for forked metrics don't reduce, just take the last val
-            if option['forked']:
-                result[k] = choose_last(result[k])
-                continue
-
-            if option['on_epoch']:
-                fx = option['reduce_fx']
-                if fx == torch.mean:
-                    if isinstance(result[k], list):
-                        result[k] = torch.tensor(result[k]).float()
-                    try:
-                        reduced_val = weighted_mean(result[k], batch_sizes)
-                    # todo: specify the expected Exceptions to come
-                    except Exception:
-                        reduced_val = torch.mean(result[k])
-                else:
-                    reduced_val = fx(result[k])
-
-                result[k] = reduced_val
-            else:
-                del result[k]
-
-        result['meta'] = meta
-        return result
-
-    @classmethod
-    def reduce_across_time(cls, time_outputs):
-        # auto-reduce across time for tbptt
-        meta = time_outputs[0]['meta']
-
-        result = cls()
-        result = recursive_gather(time_outputs, result)
-        recursive_stack(result)
-
-        for k, value in result.items():
-            if k in ['meta', 'extra'] or isinstance(value, Metric):
-                continue
-
-            if isinstance(value, list):
-                value = torch.tensor(value)
-
-            if isinstance(value, dict):
-                # TODO: recursive reduce:
-                _recursive_fx_apply(value, torch.mean)
-            else:
-                result[k] = torch.mean(value.float())
-
-        result['meta'] = meta
-        return result
-
-    def dp_reduce(self):
-        for k, value in self.items():
-            if k == 'meta' or isinstance(value, Metric):
-                continue
-
-            if isinstance(value, list):
-                value = torch.tensor(value)
-
-            self[k] = value.mean(dim=-1)
-
-    @property
-    def should_reduce_on_epoch_end(self) -> bool:
-        return self['meta']['_internal']['_reduce_on_epoch']
-
-    def rename_keys(self, map_dict: dict):
-        """
-        Maps key values to the target values. Useful when renaming variables in mass.
-
-        Args:
-            map_dict:
-        """
-        meta = self.meta
-        for source, dest in map_dict.items():
-            # map the main keys
-            self[dest] = self[source]
-            del self[source]
-
-            # map meta
-            meta[dest] = meta[source]
-            del meta[source]
-
-    def reset(self) -> None:
-        """
-        Call at the end of epoch to reset all metric objects
-        """
-        for k, value in self.items():
-            if isinstance(value, Metric):
-                value.reset()
-
-
-def choose_last(x):
-    if isinstance(x, (torch.Tensor, list)):
-        return x[-1]
-    if isinstance(x, dict):
-        for k, v in x.items():
-            x[k] = x[k][-1]
-
-
-def recursive_gather(outputs: Sequence[dict], result: Optional[MutableMapping] = None) -> Optional[MutableMapping]:
-    for out in outputs:
-        if 'meta' in out:
-            del out['meta']
-
-        for k, v in out.items():
-            # support manual opt where the user does not return a minimize key
-            if k == 'minimize' and v is None:
-                continue
-
-            if isinstance(v, dict):
-                in_d = result.get(k, {})
-                v = recursive_gather([v], in_d)
-                result[k] = v
-            else:
-                if isinstance(v, Metric):
-                    # if v is a metric, just keep one of them,
-                    # don't keep on adding a list of them
-                    result[k] = v
-                else:
-                    if k not in result:
-                        result[k] = []
-                    result[k].append(v)
-
-    return result
-
-
-def recursive_stack(result: MutableMapping):
-    for k, v in result.items():
-        if isinstance(v, dict):
-            recursive_stack(v)
-
-        result[k] = collate_tensors(v)
-
-
-def _recursive_fx_apply(input: dict, fx):
-    for k, v in input.items():
-        if isinstance(v, list):
-            v = torch.tensor(v)
-
-        if isinstance(v, torch.Tensor):
-            v = fx(v.float())
-            input[k] = v
-        else:
-            _recursive_fx_apply(v, fx)
-
-
-def collate_tensors(items: Union[List, Tuple]) -> Union[Tensor, List, Tuple]:
-    if not items or not isinstance(items, (list, tuple)) or any(not isinstance(item, Tensor) for item in items):
-        # items is not a sequence, empty, or contains non-tensors
-        return items
-
-    if all(item.ndim == 0 for item in items):
-        # all tensors are scalars, we need to stack
-        return torch.stack(items)
-
-    if all(item.ndim >= 1 and item.shape[1:] == items[0].shape[1:] for item in items):
-        # we can concatenate along the first dimension
-        return torch.cat(items)
-
-    return items
-
-
-def weighted_mean(result, weights):
-
-    if isinstance(result, dict):
-        _process_dataloader_aggregated_steps(result, weights)
-    else:
-        if isinstance(result, list):
-            result = torch.tensor(result)
-
-        weights = weights.to(result.device)[:result.size(0)]
-        numerator = torch.dot(result.float(), weights.transpose(-1, 0).float())
-        result = numerator / weights.sum().float()
-    return result
-
-
-def _process_dataloader_aggregated_steps(result, weights):
-    internal_keys = {'meta'}
-
-    moved = False
-
-    for k, v in result.items():
-        if k in internal_keys:
-            continue
-
-        # make sure v is a tensor
-        if not isinstance(v, torch.Tensor):
-            v = torch.tensor(v)
-
-        # move to memory only once
-        if not moved:
-            weights = weights.to(v.device)
-            moved = True
-
-        # move weights to same device as value to reduce
-        weights_t = weights[:v.size(0)]
-
-        # weighted mean
-        numerator = torch.dot(v.float(), weights_t.transpose(-1, 0).float())
-        v = numerator / weights.sum().float()
-        result[k] = v
diff --git a/pytorch_lightning/loggers/base.py b/pytorch_lightning/loggers/base.py
index 035a42338fe68..d6875d225790c 100644
--- a/pytorch_lightning/loggers/base.py
+++ b/pytorch_lightning/loggers/base.py
@@ -20,11 +20,13 @@
 from argparse import Namespace
 from functools import wraps
 from typing import Any, Callable, Dict, Iterable, List, Mapping, MutableMapping, Optional, Sequence, Tuple, Union
+from weakref import ReferenceType
 
 import numpy as np
 import torch
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
 from pytorch_lightning.utilities import rank_zero_only
 
 
@@ -71,6 +73,15 @@ def __init__(
         self._agg_key_funcs = agg_key_funcs if agg_key_funcs else {}
         self._agg_default_func = agg_default_func
 
+    def after_save_checkpoint(self, checkpoint_callback: 'ReferenceType[ModelCheckpoint]') -> None:
+        """
+        Called after model checkpoint callback saves a new checkpoint
+
+        Args:
+            model_checkpoint: the model checkpoint callback instance
+        """
+        pass
+
     def update_agg_funcs(
         self,
         agg_key_funcs: Optional[Mapping[str, Callable[[Sequence[float]], float]]] = None,
@@ -289,7 +300,7 @@ def log_hyperparams(self, params: argparse.Namespace, *args, **kwargs):
             kwargs: Optional keywoard arguments, depends on the specific logger being used
         """
 
-    def log_graph(self, model: LightningModule, input_array=None) -> None:
+    def log_graph(self, model: 'pl.LightningModule', input_array=None) -> None:
         """
         Record model graph
 
@@ -355,7 +366,11 @@ def __init__(self, logger_iterable: Iterable[LightningLoggerBase]):
         self._logger_iterable = logger_iterable
 
     def __getitem__(self, index: int) -> LightningLoggerBase:
-        return [logger for logger in self._logger_iterable][index]
+        return list(self._logger_iterable)[index]
+
+    def after_save_checkpoint(self, checkpoint_callback: 'ReferenceType[ModelCheckpoint]') -> None:
+        for logger in self._logger_iterable:
+            logger.after_save_checkpoint(checkpoint_callback)
 
     def update_agg_funcs(
         self,
@@ -381,7 +396,7 @@ def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None:
         for logger in self._logger_iterable:
             logger.log_hyperparams(params)
 
-    def log_graph(self, model: LightningModule, input_array=None) -> None:
+    def log_graph(self, model: 'pl.LightningModule', input_array=None) -> None:
         for logger in self._logger_iterable:
             logger.log_graph(model, input_array)
 
diff --git a/pytorch_lightning/loggers/comet.py b/pytorch_lightning/loggers/comet.py
index 148e512f5e439..498a16a9daa29 100644
--- a/pytorch_lightning/loggers/comet.py
+++ b/pytorch_lightning/loggers/comet.py
@@ -24,7 +24,7 @@
 import torch
 from torch import is_tensor
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment
 from pytorch_lightning.utilities import _module_available, rank_zero_only
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -318,6 +318,6 @@ def __getstate__(self):
         state["_experiment"] = None
         return state
 
-    def log_graph(self, model: LightningModule, input_array=None) -> None:
+    def log_graph(self, model: 'pl.LightningModule', input_array=None) -> None:
         if self._experiment is not None:
             self._experiment.set_model_graph(model)
diff --git a/pytorch_lightning/loggers/csv_logs.py b/pytorch_lightning/loggers/csv_logs.py
index 4df672fa6e3b5..754a7cf892060 100644
--- a/pytorch_lightning/loggers/csv_logs.py
+++ b/pytorch_lightning/loggers/csv_logs.py
@@ -29,7 +29,8 @@
 
 from pytorch_lightning.core.saving import save_hparams_to_yaml
 from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment
-from pytorch_lightning.utilities.distributed import rank_zero_only, rank_zero_warn
+from pytorch_lightning.utilities import rank_zero_warn
+from pytorch_lightning.utilities.distributed import rank_zero_only
 
 log = logging.getLogger(__name__)
 
diff --git a/pytorch_lightning/loggers/neptune.py b/pytorch_lightning/loggers/neptune.py
index aed09f11464f8..89b9628534e09 100644
--- a/pytorch_lightning/loggers/neptune.py
+++ b/pytorch_lightning/loggers/neptune.py
@@ -278,15 +278,13 @@ def save_dir(self) -> Optional[str]:
     def name(self) -> str:
         if self.offline_mode:
             return 'offline-name'
-        else:
-            return self.experiment.name
+        return self.experiment.name
 
     @property
     def version(self) -> str:
         if self.offline_mode:
             return 'offline-id-1234'
-        else:
-            return self.experiment.id
+        return self.experiment.id
 
     @rank_zero_only
     def log_metric(
diff --git a/pytorch_lightning/loggers/tensorboard.py b/pytorch_lightning/loggers/tensorboard.py
index 94268f6063f51..ea0937016550d 100644
--- a/pytorch_lightning/loggers/tensorboard.py
+++ b/pytorch_lightning/loggers/tensorboard.py
@@ -25,7 +25,7 @@
 from torch.utils.tensorboard import SummaryWriter
 from torch.utils.tensorboard.summary import hparams
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.core.saving import save_hparams_to_yaml
 from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment
 from pytorch_lightning.utilities import _OMEGACONF_AVAILABLE, rank_zero_only, rank_zero_warn
@@ -112,8 +112,7 @@ def root_dir(self) -> str:
         """
         if self.name is None or len(self.name) == 0:
             return self.save_dir
-        else:
-            return os.path.join(self.save_dir, self.name)
+        return os.path.join(self.save_dir, self.name)
 
     @property
     def log_dir(self) -> str:
@@ -223,7 +222,7 @@ def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) ->
                     raise ValueError(m) from ex
 
     @rank_zero_only
-    def log_graph(self, model: LightningModule, input_array=None):
+    def log_graph(self, model: 'pl.LightningModule', input_array=None):
         if self._log_graph:
             if input_array is None:
                 input_array = model.example_input_array
@@ -267,14 +266,16 @@ def version(self) -> int:
         return self._version
 
     def _get_next_version(self):
-        root_dir = os.path.join(self.save_dir, self.name)
+        root_dir = self.root_dir
 
-        if not self._fs.isdir(root_dir):
+        try:
+            listdir_info = self._fs.listdir(root_dir)
+        except OSError:
             log.warning('Missing logger folder: %s', root_dir)
             return 0
 
         existing_versions = []
-        for listing in self._fs.listdir(root_dir):
+        for listing in listdir_info:
             d = listing["name"]
             bn = os.path.basename(d)
             if self._fs.isdir(d) and bn.startswith("version_"):
diff --git a/pytorch_lightning/loggers/test_tube.py b/pytorch_lightning/loggers/test_tube.py
index 84f231b0f16d7..1650ab8f4ba49 100644
--- a/pytorch_lightning/loggers/test_tube.py
+++ b/pytorch_lightning/loggers/test_tube.py
@@ -18,10 +18,10 @@
 from argparse import Namespace
 from typing import Any, Dict, Optional, Union
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment
-from pytorch_lightning.utilities import _module_available
-from pytorch_lightning.utilities.distributed import rank_zero_only, rank_zero_warn
+from pytorch_lightning.utilities import _module_available, rank_zero_warn
+from pytorch_lightning.utilities.distributed import rank_zero_only
 
 _TESTTUBE_AVAILABLE = _module_available("test_tube")
 
@@ -153,7 +153,7 @@ def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) ->
         self.experiment.log(metrics, global_step=step)
 
     @rank_zero_only
-    def log_graph(self, model: LightningModule, input_array=None):
+    def log_graph(self, model: 'pl.LightningModule', input_array=None):
         if self._log_graph:
             if input_array is None:
                 input_array = model.example_input_array
diff --git a/pytorch_lightning/loggers/wandb.py b/pytorch_lightning/loggers/wandb.py
index 0f73153378ed4..5daf2176f3421 100644
--- a/pytorch_lightning/loggers/wandb.py
+++ b/pytorch_lightning/loggers/wandb.py
@@ -15,20 +15,26 @@
 Weights and Biases Logger
 -------------------------
 """
+import operator
 import os
 from argparse import Namespace
+from pathlib import Path
 from typing import Any, Dict, Optional, Union
+from weakref import ReferenceType
 
 import torch.nn as nn
 
+from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
 from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment
 from pytorch_lightning.utilities import _module_available, rank_zero_only
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.imports import _compare_version
 from pytorch_lightning.utilities.warnings import WarningCache
 
 warning_cache = WarningCache()
 
 _WANDB_AVAILABLE = _module_available("wandb")
+_WANDB_GREATER_EQUAL_0_10_22 = _compare_version("wandb", operator.ge, "0.10.22")
 
 try:
     import wandb
@@ -40,7 +46,7 @@
 
 class WandbLogger(LightningLoggerBase):
     r"""
-    Log using `Weights and Biases <https://www.wandb.com/>`_.
+    Log using `Weights and Biases <https://docs.wandb.ai/integrations/lightning>`_.
 
     Install it with pip:
 
@@ -56,7 +62,15 @@ class WandbLogger(LightningLoggerBase):
         version: Same as id.
         anonymous: Enables or explicitly disables anonymous logging.
         project: The name of the project to which this run will belong.
-        log_model: Save checkpoints in wandb dir to upload on W&B servers.
+        log_model: Log checkpoints created by :class:`~pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint`
+            as W&B artifacts.
+
+            * if ``log_model == 'all'``, checkpoints are logged during training.
+            * if ``log_model == True``, checkpoints are logged at the end of training, except when
+              :paramref:`~pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint.save_top_k` ``== -1``
+              which also logs every checkpoint during training.
+            * if ``log_model == False`` (default), no checkpoint is logged.
+
         prefix: A string to put at the beginning of metric keys.
         experiment: WandB experiment object. Automatically set when creating a run.
         \**kwargs: Arguments passed to :func:`wandb.init` like `entity`, `group`, `tags`, etc.
@@ -71,15 +85,16 @@ class WandbLogger(LightningLoggerBase):
 
         from pytorch_lightning.loggers import WandbLogger
         from pytorch_lightning import Trainer
-        wandb_logger = WandbLogger()
+
+        # instrument experiment with W&B
+        wandb_logger = WandbLogger(project='MNIST', log_model='all')
         trainer = Trainer(logger=wandb_logger)
 
-    Note: When logging manually through `wandb.log` or `trainer.logger.experiment.log`,
-    make sure to use `commit=False` so the logging step does not increase.
+        # log gradients and model topology
+        wandb_logger.watch(model)
 
     See Also:
-        - `Tutorial <https://colab.research.google.com/drive/16d1uctGaw2y9KhGBlINNTsWpmlXdJwRW?usp=sharing>`__
-          on how to use W&B with PyTorch Lightning
+        - `Demo in Google Colab <http://wandb.me/lightning>`__ with model logging
         - `W&B Documentation <https://docs.wandb.ai/integrations/lightning>`__
 
     """
@@ -114,10 +129,17 @@ def __init__(
                 'Hint: Set `offline=False` to log your model.'
             )
 
-        if sync_step is not None:
+        if log_model and not _WANDB_GREATER_EQUAL_0_10_22:
             warning_cache.warn(
+                f'Providing log_model={log_model} requires wandb version >= 0.10.22'
+                ' for logging associated model metadata.\n'
+                'Hint: Upgrade with `pip install --ugrade wandb`.'
+            )
+
+        if sync_step is not None:
+            warning_cache.deprecation(
                 "`WandbLogger(sync_step=(True|False))` is deprecated in v1.2.1 and will be removed in v1.5."
-                " Metrics are now logged separately and automatically synchronized.", DeprecationWarning
+                " Metrics are now logged separately and automatically synchronized."
             )
 
         super().__init__()
@@ -125,6 +147,8 @@ def __init__(
         self._log_model = log_model
         self._prefix = prefix
         self._experiment = experiment
+        self._logged_model_time = {}
+        self._checkpoint_callback = None
         # set wandb init arguments
         anonymous_lut = {True: 'allow', False: None}
         self._wandb_init = dict(
@@ -168,10 +192,6 @@ def experiment(self) -> Run:
                 os.environ['WANDB_MODE'] = 'dryrun'
             self._experiment = wandb.init(**self._wandb_init) if wandb.run is None else wandb.run
 
-        # save checkpoints in wandb dir to upload on W&B servers
-        if self._save_dir is None:
-            self._save_dir = self._experiment.dir
-
         # define default x-axis (for latest wandb versions)
         if getattr(self._experiment, "define_metric", None):
             self._experiment.define_metric("trainer/global_step")
@@ -213,8 +233,49 @@ def version(self) -> Optional[str]:
         # don't create an experiment if we don't have one
         return self._experiment.id if self._experiment else self._id
 
+    def after_save_checkpoint(self, checkpoint_callback: 'ReferenceType[ModelCheckpoint]') -> None:
+        # log checkpoints as artifacts
+        if self._log_model == 'all' or self._log_model is True and checkpoint_callback.save_top_k == -1:
+            self._scan_and_log_checkpoints(checkpoint_callback)
+        elif self._log_model is True:
+            self._checkpoint_callback = checkpoint_callback
+
     @rank_zero_only
     def finalize(self, status: str) -> None:
-        # upload all checkpoints from saving dir
-        if self._log_model:
-            wandb.save(os.path.join(self.save_dir, "*.ckpt"))
+        # log checkpoints as artifacts
+        if self._checkpoint_callback:
+            self._scan_and_log_checkpoints(self._checkpoint_callback)
+
+    def _scan_and_log_checkpoints(self, checkpoint_callback: 'ReferenceType[ModelCheckpoint]') -> None:
+        # get checkpoints to be saved with associated score
+        checkpoints = {
+            checkpoint_callback.last_model_path: checkpoint_callback.current_score,
+            checkpoint_callback.best_model_path: checkpoint_callback.best_model_score,
+            **checkpoint_callback.best_k_models
+        }
+        checkpoints = sorted([(Path(p).stat().st_mtime, p, s) for p, s in checkpoints.items() if Path(p).is_file()])
+        checkpoints = [
+            c for c in checkpoints if c[1] not in self._logged_model_time.keys() or self._logged_model_time[c[1]] < c[0]
+        ]
+
+        # log iteratively all new checkpoints
+        for t, p, s in checkpoints:
+            metadata = {
+                'score': s,
+                'original_filename': Path(p).name,
+                'ModelCheckpoint': {
+                    k: getattr(checkpoint_callback, k)
+                    for k in [
+                        'monitor', 'mode', 'save_last', 'save_top_k', 'save_weights_only', '_every_n_train_steps',
+                        '_every_n_val_epochs'
+                    ]
+                    # ensure it does not break if `ModelCheckpoint` args change
+                    if hasattr(checkpoint_callback, k)
+                }
+            } if _WANDB_GREATER_EQUAL_0_10_22 else None
+            artifact = wandb.Artifact(name=f"model-{self.experiment.id}", type="model", metadata=metadata)
+            artifact.add_file(p, name='model.ckpt')
+            aliases = ["latest", "best"] if p == checkpoint_callback.best_model_path else ["latest"]
+            self.experiment.log_artifact(artifact, aliases=aliases)
+            # remember logged models - timestamp needed in case filename didn't change (lastkckpt or custom name)
+            self._logged_model_time[p] = t
diff --git a/pytorch_lightning/loops/__init__.py b/pytorch_lightning/loops/__init__.py
new file mode 100644
index 0000000000000..b7eb47167d26f
--- /dev/null
+++ b/pytorch_lightning/loops/__init__.py
@@ -0,0 +1,19 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pytorch_lightning.loops.base import Loop  # noqa: F401
+from pytorch_lightning.loops.batch import TrainingBatchLoop  # noqa: F401
+from pytorch_lightning.loops.dataloader import DataLoaderLoop, EvaluationLoop, PredictionLoop  # noqa: F401
+from pytorch_lightning.loops.epoch import EvaluationEpochLoop, PredictionEpochLoop, TrainingEpochLoop  # noqa: F401
+from pytorch_lightning.loops.fit_loop import FitLoop  # noqa: F401
diff --git a/pytorch_lightning/loops/base.py b/pytorch_lightning/loops/base.py
new file mode 100644
index 0000000000000..18657fe5dbaff
--- /dev/null
+++ b/pytorch_lightning/loops/base.py
@@ -0,0 +1,158 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
+
+from deprecate import void
+
+import pytorch_lightning as pl
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+
+
+class Loop(ABC):
+    """
+    Basic Loops interface. All classes derived from this must implement the following properties and methods:
+
+        * :attr:`done` (property): Condition to break the loop
+        * :attr:`reset` (method): Resets the internal state between multiple calls of :attr:`run`
+        * :attr:`advance` (method): Implements one step of the loop
+
+    This class implements the following loop structure:
+
+    .. codeblock:: python
+
+        on_run_start()
+
+        while not done:
+            on_advance_start()
+            advance()
+            on_advance_end()
+
+        on_run_end()
+    """
+
+    def __init__(self) -> None:
+        self.iteration_count: int = 0
+        self.trainer: Optional['pl.Trainer'] = None
+        self._restarting = False
+
+    @property
+    def restarting(self) -> bool:
+        return self._restarting
+
+    @restarting.setter
+    def restarting(self, restarting: bool) -> None:
+        self._restarting = restarting
+
+    @property
+    @abstractmethod
+    def done(self) -> bool:
+        """Property indicating when loop is finished"""
+
+    @property
+    def skip(self) -> bool:
+        """Determine whether to return immediately from the call to :meth:`run`."""
+        return False
+
+    def connect(self, trainer: 'pl.Trainer', *args: Any, **kwargs: Any) -> None:
+        """Connects Loop with all the necessary things like connectors and accelerators."""
+        # TODO(@justusschock): Make the trainer a weakref/proxy
+        if not isinstance(trainer, pl.Trainer):
+            raise MisconfigurationException(
+                f"Loop {self.__class__.__name__} should be connected to a `Trainer`, found: {trainer}."
+            )
+        self.trainer = trainer
+
+    def on_skip(self) -> Optional[Any]:
+        """
+        The function to run when :meth:`run` should be skipped, determined by the condition in :attr:`skip`.
+
+        Returns:
+            the default output value of :meth:`on_run_end`
+        """
+
+    def run(self, *args: Any, **kwargs: Any) -> Optional[Any]:
+        """
+        The main entry point to the loop.
+
+        Will frequently check the :attr:`done` condition and calls :attr:`advance`
+        until :attr:`done` evaluates to ``True``.
+
+        Returns:
+            the output of :attr:`on_run_end` (often outputs collected from each step of the loop)
+        """
+        if self.skip:
+            return self.on_skip()
+
+        if self.restarting:
+            self.restore()
+            self.restarting = False
+        else:
+            self.reset()
+
+        self.on_run_start(*args, **kwargs)
+
+        while not self.done:
+            try:
+                self.on_advance_start(*args, **kwargs)
+                self.advance(*args, **kwargs)
+                self.on_advance_end()
+                self.iteration_count += 1
+            except StopIteration:
+                break
+
+        output = self.on_run_end()
+        return output
+
+    def restore(self) -> None:
+        """Restore the internal state of the loop the beginning of run if restarting is ``True``."""
+
+    @abstractmethod
+    def reset(self) -> None:
+        """Resets the internal state of the loop at the beginning of each call to :attr:`run`."""
+
+    def on_run_start(self, *args: Any, **kwargs: Any) -> None:
+        """
+        Hook to be called as the first thing after entering :attr:`run` (except the state reset).
+
+        Accepts all arguments passed to :attr:`run`.
+        """
+        void(*args, **kwargs)
+
+    def on_advance_start(self, *args: Any, **kwargs: Any) -> None:
+        """
+        Hook to be called each time before :attr:`advance` is called. Accepts all arguments passed to :attr`run`.
+        """
+        void(*args, **kwargs)
+
+    @abstractmethod
+    def advance(self, *args: Any, **kwargs: Any) -> None:
+        """Performs a single step. Accepts all arguments passed to :attr:`run`."""
+
+    def on_advance_end(self) -> None:
+        """Hook to be called each time after :attr:`advance` is called."""
+
+    def on_run_end(self) -> Any:
+        """Hook to be called at the end of the run. Its return argument is returned from :attr:`run`."""
+
+    def teardown(self) -> None:
+        """Use to release memory etc."""
+
+    def load_state_dict(self, state_dict: Dict) -> None:
+        """Restore the loop state from the provided state_dict."""
+
+    def state_dict(self) -> Dict:
+        """Return the loop current states."""
+        return {}
diff --git a/pytorch_lightning/loops/batch/__init__.py b/pytorch_lightning/loops/batch/__init__.py
new file mode 100644
index 0000000000000..6e6522165404a
--- /dev/null
+++ b/pytorch_lightning/loops/batch/__init__.py
@@ -0,0 +1,15 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pytorch_lightning.loops.batch.training_batch_loop import TrainingBatchLoop  # noqa: F401
diff --git a/pytorch_lightning/loops/batch/training_batch_loop.py b/pytorch_lightning/loops/batch/training_batch_loop.py
new file mode 100644
index 0000000000000..64df877ce68ee
--- /dev/null
+++ b/pytorch_lightning/loops/batch/training_batch_loop.py
@@ -0,0 +1,677 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import OrderedDict
+from contextlib import contextmanager
+from functools import partial, update_wrapper
+from typing import Any, Callable, Dict, Generator, List, Mapping, Optional, Tuple
+
+import numpy as np
+import torch
+from deprecate import void
+from torch import Tensor
+from torch.optim import Optimizer
+
+from pytorch_lightning.core.optimizer import LightningOptimizer
+from pytorch_lightning.loops.base import Loop
+from pytorch_lightning.plugins import ParallelPlugin
+from pytorch_lightning.trainer.connectors.logger_connector.result import ResultCollection
+from pytorch_lightning.trainer.supporters import TensorRunningAccum
+from pytorch_lightning.utilities import AMPType, AttributeDict, DeviceType, grad_norm
+from pytorch_lightning.utilities.apply_func import apply_to_collection
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.finite_checks import detect_nan_parameters
+from pytorch_lightning.utilities.imports import _TPU_AVAILABLE
+from pytorch_lightning.utilities.signature_utils import is_param_in_hook_signature
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+from pytorch_lightning.utilities.warnings import WarningCache
+
+
+class TrainingBatchLoop(Loop):
+    """ Runs over a single batch of data. """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.accumulated_loss: Optional[Tensor] = None
+        self.batch_outputs: Optional[List[List[STEP_OUTPUT]]] = None
+        self.running_loss: TensorRunningAccum = TensorRunningAccum(window_length=20)
+        self.batch_idx: int = 0
+        self.split_idx: Optional[int] = None
+        self._warning_cache: WarningCache = WarningCache()
+
+        self._hiddens: Optional[Tensor] = None
+        self._optimizer_freq_cumsum: Optional[int] = None
+        self._remaining_splits: Optional[List[Any]] = None
+        self._skip_backward: bool = False
+
+    @property
+    def done(self) -> bool:
+        """Returns if all batch splits have been processed already"""
+        return len(self._remaining_splits) == 0
+
+    @property
+    def optimizer_freq_cumsum(self) -> int:
+        """Returns the cumulated sum of optimizer frequencies"""
+        if self._optimizer_freq_cumsum is None:
+            self._optimizer_freq_cumsum = np.cumsum(self.trainer.optimizer_frequencies)
+        return self._optimizer_freq_cumsum
+
+    def run(self, batch: Any, batch_idx: int, dataloader_idx: int) -> AttributeDict:
+        """Runs all the data splits and the ``on_batch_start`` and ``on_train_batch_start`` hooks
+
+        Args:
+            batch: the current batch to run the train step on
+            batch_idx: the index of the current batch
+            dataloader_idx: the index of the dataloader producing the current batch
+        """
+        if batch is None:
+            self._warning_cache.warn("train_dataloader yielded None. If this was on purpose, ignore this warning...")
+            return AttributeDict(signal=0, training_step_output=[[]])
+
+        # hook
+        self.trainer.logger_connector.on_batch_start()
+        response = self.trainer.call_hook("on_batch_start")
+        if response == -1:
+            return AttributeDict(signal=-1)
+
+        # hook
+        response = self.trainer.call_hook("on_train_batch_start", batch, batch_idx, dataloader_idx)
+        if response == -1:
+            return AttributeDict(signal=-1)
+
+        super().run(batch, batch_idx, dataloader_idx)
+        output = AttributeDict(signal=0, training_step_output=self.batch_outputs)
+        self.batch_outputs = None  # free memory
+        return output
+
+    def reset(self) -> None:
+        """Resets the loop state"""
+        self._hiddens = None
+        self.batch_idx = 0
+        self.batch_outputs = [[] for _ in range(len(self.trainer.optimizers))]
+
+    def on_run_start(self, batch: Any, batch_idx: int, dataloader_idx: int):
+        """Splits the data into tbptt splits
+
+        Args:
+            batch: the current batch to run the trainstep on
+            batch_idx: the index of the current batch
+            dataloader_idx: the index of the dataloader producing the current batch
+        """
+        void(batch_idx, dataloader_idx)
+        self._remaining_splits = list(enumerate(self._tbptt_split_batch(batch)))
+
+    def advance(self, batch, batch_idx, dataloader_idx):
+        """Runs the train step together with optimization (if necessary) on the current batch split
+
+        Args:
+            batch: the current batch to run the training on (this is not the split!)
+            batch_idx: the index of the current batch
+            dataloader_idx: the index of the dataloader producing the current batch
+        """
+        void(batch, dataloader_idx)
+        split_idx, split_batch = self._remaining_splits.pop(0)
+        self.batch_idx = batch_idx
+        self.split_idx = split_idx
+
+        # let logger connector extract current batch size
+        self.trainer.logger_connector.on_train_split_start(batch_idx, split_idx, split_batch)
+
+        if self.trainer.lightning_module.automatic_optimization:
+            for opt_idx, optimizer in self.get_active_optimizers(batch_idx):
+                result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer)
+                if result:
+                    self.batch_outputs[opt_idx].append(result.training_step_output)
+        else:
+            # in manual optimization, there is no looping over optimizers
+            result = self._run_optimization(batch_idx, split_batch)
+            if result:
+                self.batch_outputs[0].append(result.training_step_output)
+
+    def num_active_optimizers(self, batch_idx: Optional[int] = None) -> int:
+        """Gets the number of active optimizers based on their frequency"""
+        return len(self.get_active_optimizers(batch_idx))
+
+    def _run_optimization(
+        self, batch_idx: int, split_batch: Any, opt_idx: int = 0, optimizer: Optional[torch.optim.Optimizer] = None
+    ):
+        """Runs closure (train step + backward) together with optimization if necessary.
+
+        Args:
+            batch_idx: the index of the current batch
+            split_batch: the current tbptt split of the whole batch
+            opt_idx: the index of the current optimizer
+            optimizer: the current optimizer
+        """
+        # TODO(@awaelchli): In v1.5, when optimizer_idx gets removed from training_step in manual_optimization, change
+        #   opt_idx=0 to opt_idx=None in the signature here
+
+        # toggle model params
+        self._run_optimization_start(opt_idx, optimizer)
+
+        result = AttributeDict()
+        closure = self._make_closure(split_batch, batch_idx, opt_idx, optimizer, self._hiddens, result)
+
+        if self.should_accumulate():
+            # For gradient accumulation
+
+            # -------------------
+            # calculate loss (train step + train step end)
+            # -------------------
+            # automatic_optimization=True: perform ddp sync only when performing optimizer_step
+            # automatic_optimization=False: don't block synchronization here
+            with self.block_ddp_sync_behaviour():
+                closure()
+
+        # ------------------------------
+        # BACKWARD PASS
+        # ------------------------------
+        # gradient update with accumulated gradients
+        else:
+            if self.trainer.lightning_module.automatic_optimization:
+                self._optimizer_step(optimizer, opt_idx, batch_idx, closure)
+                if len(self.trainer.optimizers) > 1:
+                    # revert back to previous state
+                    self.trainer.lightning_module.untoggle_optimizer(opt_idx)
+            else:
+                result = self._training_step(split_batch, batch_idx, opt_idx, self._hiddens)
+
+            if not result:
+                # user decided to skip optimization
+                return result
+
+            # update running loss + reset accumulated loss
+            self._update_running_loss(result.loss)
+
+        self._process_closure_result(result)
+        return result
+
+    def _training_step_and_backward_closure(
+        self,
+        split_batch: Any,
+        batch_idx: int,
+        opt_idx: int,
+        optimizer: Optimizer,
+        hiddens: Tensor,
+        return_result: AttributeDict,
+    ) -> Optional[Tensor]:
+        """Closure for training step and backward
+
+        Args:
+            split_batch: the current tbptt split of the batch
+            batch_idx: the index of the current batch
+            opt_idx: the index of the current optimizer
+            optimizer: the current optimizer
+            hiddens: the hidden state of the recurrent net
+            return_result: the storage of the trainstep results
+        """
+
+        result = self.training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
+        if result is not None:
+            return_result.update(result)
+            return return_result.loss
+
+    def _make_closure(self, *closure_args: Any, **closure_kwargs: Any) -> Callable:
+        """ Wraps the training step closure into a partial object which will be called within ``optimizer.step``. """
+        partial_func = partial(self._training_step_and_backward_closure, *closure_args, **closure_kwargs)
+        return update_wrapper(partial_func, self._training_step_and_backward_closure)
+
+    def _process_closure_result(self, opt_closure_result: Optional[AttributeDict]) -> None:
+        """Checks if the closure results is finite and optionally breaks if it is not
+
+        Args:
+            opt_closure_result: the result of the train step wrapped in an attribute dict
+        """
+        if not opt_closure_result:
+            return
+
+        # check if loss or model weights are nan
+        if self.trainer.terminate_on_nan:
+            self._check_finite(opt_closure_result.loss)
+
+    def _on_after_backward(self, batch_idx: int, untouched_loss: Tensor) -> None:
+        """Calls ``on_after_backward`` hook and tracks loss history
+
+        Args:
+            batch_idx: the index of the current batch
+            untouched_loss: the original loss value
+        """
+
+        # insert after step hook
+        self.trainer.call_hook("on_after_backward")
+
+        # when in dev debugging track the losses
+        self.trainer.dev_debugger.track_train_loss_history(batch_idx, untouched_loss.detach())
+
+    def _check_training_step_output(self, training_step_output: STEP_OUTPUT) -> None:
+        """Sanity checks that training produced a valid output and optimizer step has already been called in manual
+        optimization.
+
+        Args:
+            training_step_output: the output of the training step (before wrapping in an AttributeDict)
+
+        """
+        if isinstance(training_step_output, Tensor) and not self.trainer.lightning_module.automatic_optimization:
+            if training_step_output.grad_fn is None:
+                # TODO: Find why - RuntimeError: Expected to mark a variable ready only once ...
+                raise MisconfigurationException("In manual optimization, `training_step` should not return a Tensor")
+        elif self.trainer.lightning_module.automatic_optimization:
+            if not any((
+                isinstance(training_step_output, Tensor),
+                (isinstance(training_step_output, Mapping)
+                 and 'loss' in training_step_output), training_step_output is None
+            )):
+                raise MisconfigurationException(
+                    "In automatic optimization, `training_step` must either return a Tensor, "
+                    "a dict with key 'loss' or None (where the step will be skipped)."
+                )
+
+    def _training_step(
+        self,
+        split_batch: Any,
+        batch_idx: int,
+        opt_idx: int,
+        hiddens: Tensor,
+    ) -> Optional[AttributeDict]:
+        """Performs the actual train step with the tied hooks.
+
+        Args:
+            split_batch: the current tbptt split of the current batch
+            batch_idx: the index of the current batch
+            opt_idx: the index of the current optimizer
+            hiddens: the model's hidden state of the previous iteration
+
+        Returns:
+            an AttributeDict containing the loss value and the training step output.
+        """
+        # give the PL module a result for logging
+        model_ref = self.trainer.lightning_module
+
+        with self.trainer.profiler.profile("model_forward"):
+            step_kwargs = self._build_kwargs(split_batch, batch_idx, opt_idx, hiddens)
+
+            # manually capture logged metrics
+            model_ref._current_fx_name = 'training_step'
+            with self.trainer.profiler.profile("training_step"):
+                training_step_output = self.trainer.accelerator.training_step(step_kwargs)
+                self.trainer.accelerator.post_training_step()
+
+            training_step_output = self.trainer.call_hook("training_step_end", training_step_output)
+
+            self._check_training_step_output(training_step_output)
+
+            training_step_output = self._process_training_step_output(training_step_output)
+            if training_step_output is None:
+                return
+
+        closure_loss = None
+        loss = None
+        if self.trainer.lightning_module.automatic_optimization:
+            # accumulate loss. if accumulate_grad_batches==1, no effect
+            closure_loss = training_step_output.minimize / self.trainer.accumulate_grad_batches
+            # the loss will get scaled for amp. avoid any modifications to it
+            loss = closure_loss.detach().clone()
+        return AttributeDict(closure_loss=closure_loss, loss=loss, training_step_output=training_step_output)
+
+    def _process_training_step_output(self, training_step_output: STEP_OUTPUT) -> Optional[ResultCollection]:
+        """Adds the :param:`training_step_output` to the trainer's results
+
+        Args:
+            training_step_output: the output of the training step (before wrapping into an AttributeDict)
+
+        Returns:
+            the updated results if the training_step's output was not None else None
+        """
+        if training_step_output is None:
+            return None
+
+        results = self.trainer._results
+
+        loss = None
+        hiddens = None
+        results.extra = {}
+
+        # handle dict return
+        if isinstance(training_step_output, dict):
+            loss = training_step_output.pop("loss", None)
+            hiddens = training_step_output.pop("hiddens", None)
+            # detach hiddens to avoid `RuntimeError: Trying to backward through the graph a second time`
+            hiddens = apply_to_collection(hiddens, Tensor, lambda t: t.detach())
+            results.extra = training_step_output
+
+        # handle scalar return
+        elif isinstance(training_step_output, Tensor):
+            loss = training_step_output
+
+        # map to results under the hood
+        results.minimize = loss
+        self._hiddens = hiddens
+
+        if self.trainer.move_metrics_to_cpu:
+            results.cpu()
+        return results
+
+    def _optimizer_step(
+        self, optimizer: torch.optim.Optimizer, opt_idx: int, batch_idx: int, train_step_and_backward_closure: Callable
+    ) -> None:
+        """Performs the optimizer step and some sanity checking.
+
+        Args:
+            optimizer: the optimizer to perform the step with
+            opt_idx: the index of the current :param:`optimizer`
+            batch_idx: the index of the current batch
+            train_step_and_backward_closure: the closure function performing the train step and computing the
+                gradients. By default called by the optimizer (if possible)
+        """
+        model_ref = self.trainer.lightning_module
+
+        is_lbfgs = isinstance(optimizer, torch.optim.LBFGS)
+        using_native_amp = self.trainer.amp_backend == AMPType.NATIVE
+
+        # native amp + lbfgs is a no go right now
+        if using_native_amp and is_lbfgs:
+            raise MisconfigurationException(
+                'native PyTorch amp and lbfgs are not compatible.'
+                ' To request, please file a Github issue in PyTorch and tag @mcarilli'
+            )
+
+        # wraps into LightningOptimizer only for running step
+        optimizer = LightningOptimizer._to_lightning_optimizer(optimizer, self.trainer, opt_idx)
+
+        # model hook
+        model_ref.optimizer_step(
+            self.trainer.current_epoch,
+            batch_idx,
+            optimizer,
+            opt_idx,
+            train_step_and_backward_closure,
+            on_tpu=(self.trainer._device_type == DeviceType.TPU and _TPU_AVAILABLE),
+            using_native_amp=using_native_amp,
+            using_lbfgs=is_lbfgs,
+        )
+
+    def _on_before_zero_grad(self, optimizer: torch.optim.Optimizer) -> None:
+        """Calls the ``on_before_zero_grad`` hook.
+
+        Args:
+            optimizer: the current optimizer
+        """
+        self.trainer.call_hook('on_before_zero_grad', optimizer)
+
+    def _optimizer_zero_grad(self, batch_idx: int, optimizer: torch.optim.Optimizer, opt_idx: int) -> None:
+        """Zeroes out all gradients of parameters optimized by the current optimizer.
+
+        Args:
+            batch_idx: the index of the current batch
+            optimizer: the current optimizer
+            opt_idx: the index of the current optimizer
+        """
+        self.trainer.accelerator.optimizer_zero_grad(self.trainer.current_epoch, batch_idx, optimizer, opt_idx)
+
+    def _track_and_norm_grad(self, optimizer: torch.optim.Optimizer) -> Dict[str, Tensor]:
+        """Tracks gradient norms and clips the gradients of all parameters optimized by the current optimizer.
+
+        Args:
+            optimizer: the current optimizer
+        """
+        # track gradient norms
+        grad_norm_dict = {}
+        can_log = (self.trainer.global_step + 1) % self.trainer.log_every_n_steps == 0
+        should_track = float(self.trainer.track_grad_norm) > 0
+        if should_track and can_log:
+            grad_norm_dict = grad_norm(self.trainer.lightning_module, self.trainer.track_grad_norm)
+
+        # clip gradients
+        self.trainer.accelerator.clip_gradients(
+            optimizer, self.trainer.gradient_clip_val, gradient_clip_algorithm=self.trainer.gradient_clip_algorithm
+        )
+        return grad_norm_dict
+
+    def _accumulated_batches_reached(self) -> bool:
+        """Determine if accumulation will be finished by the end of the current batch."""
+        # FIXME(@awaelchli): use progress tracking of batches instead of manual batch_idx
+        return (self.batch_idx + 1) % self.trainer.accumulate_grad_batches == 0
+
+    def _num_training_batches_reached(self, is_last_batch: bool = False) -> bool:
+        """Checks whether sufficient training batches have been processed.
+
+        Args:
+            is_last_batch: Whether the current batch is the last one
+        """
+        # FIXME(@awaelchli): use progress tracking of batches instead of manual batch_idx
+        return (self.batch_idx + 1) == self.trainer.num_training_batches or is_last_batch
+
+    def should_accumulate(self) -> bool:
+        """Checks if the optimizer step should be performed or gradients should be accumulated for the current step."""
+        # checks if backward or backward + optimizer step (via closure)
+        accumulation_done = self._accumulated_batches_reached()
+        is_final_batch = self._num_training_batches_reached()
+        return not (accumulation_done or is_final_batch)
+
+    def _tbptt_split_batch(self, batch: Any) -> List[Any]:
+        """Splits a single batch into a list of sequence steps for tbptt.
+
+        Args:
+            batch: the current batch to split
+        """
+        splits = [batch]
+        if self.trainer.truncated_bptt_steps is not None:
+            model_ref = self.trainer.lightning_module
+            with self.trainer.profiler.profile("tbptt_split_batch"):
+                splits = model_ref.tbptt_split_batch(batch, self.trainer.truncated_bptt_steps)
+        return splits
+
+    def _run_optimization_start(self, opt_idx: int, optimizer: torch.optim.Optimizer) -> None:
+        """Toggles the optimizer to ensure the correct one is used and prevend dangling grads.
+
+        Args:
+            opt_idx: the index of the optimizer to use
+            optimizer: the optimizer to use
+
+        """
+        # make sure only the gradients of the current optimizer's parameters are calculated
+        # in the training step to prevent dangling gradients in multiple-optimizer setup.
+        if self.trainer.lightning_module.automatic_optimization and len(self.trainer.optimizers) > 1:
+            model = self.trainer.lightning_module
+            model.toggle_optimizer(optimizer, opt_idx)
+
+    @contextmanager
+    def block_ddp_sync_behaviour(self, should_block_sync: bool = False) -> Generator[None, None, None]:
+        """
+        automatic_optimization = True
+        Blocks ddp sync gradients behaviour on backwards pass.
+        This is useful for skipping sync when accumulating gradients, reducing communication overhead
+
+        automatic_optimization = False
+        do not block ddp gradient sync when using manual optimization
+        as gradients are needed within the training step
+
+        Returns:
+            context manager with sync behaviour off
+        """
+        if (
+            isinstance(self.trainer.training_type_plugin, ParallelPlugin)
+            and (self.trainer.lightning_module.automatic_optimization or should_block_sync)
+        ):
+            with self.trainer.training_type_plugin.block_backward_sync():
+                yield None
+        else:
+            yield None
+
+    def training_step_and_backward(
+        self,
+        split_batch: Any,
+        batch_idx: int,
+        opt_idx: int,
+        optimizer: torch.optim.Optimizer,
+        hiddens: Optional[Tensor],
+    ) -> STEP_OUTPUT:
+        """Wrap forward, zero_grad and backward in a closure so second order methods work"""
+        with self.trainer.profiler.profile("training_step_and_backward"):
+            # lightning module hook
+            result = self._training_step(split_batch, batch_idx, opt_idx, hiddens)
+
+            if not self._skip_backward and self.trainer.lightning_module.automatic_optimization:
+                is_first_batch_to_accumulate = batch_idx % self.trainer.accumulate_grad_batches == 0
+
+                if is_first_batch_to_accumulate:
+                    self._on_before_zero_grad(optimizer)
+                    self._optimizer_zero_grad(batch_idx, optimizer, opt_idx)
+
+                # backward pass
+                if result is not None:
+                    with self.trainer.profiler.profile("backward"):
+                        self.backward(result, optimizer, opt_idx)
+
+                    # hook - call this hook only
+                    # when gradients have finished to accumulate
+                    if not self.should_accumulate():
+                        self._on_after_backward(batch_idx, result.loss)
+
+                    # check if loss or model weights are nan
+                    if self.trainer.terminate_on_nan:
+                        self._check_finite(result.loss)
+
+                else:
+                    self._warning_cache.warn(
+                        "training_step returned None. If this was on purpose, ignore this warning..."
+                    )
+
+        return result
+
+    def _check_finite(self, loss: Tensor) -> None:
+        """Checks fotr finite parameters and loss values.
+
+        Args:
+            loss: the loss value to check to be finite
+        """
+        if not torch.isfinite(loss).all():
+            raise ValueError(f'The loss returned in `training_step` is {loss}.')
+        model = self.trainer.lightning_module
+        detect_nan_parameters(model)
+
+    def backward(
+        self, result: STEP_OUTPUT, optimizer: torch.optim.Optimizer, opt_idx: int, *args: Any, **kwargs: Any
+    ) -> None:
+        """Performs the backward step.
+
+        Args:
+            result: The output of the trainstep (including the loss value)
+            optimizer: The optimizer optimizing the gradients to call backward for
+            opt_idx: the index of the current optimizer
+        """
+        self.trainer.dev_debugger.track_event("backward_call")
+
+        should_accumulate = self.should_accumulate()
+
+        # backward can be called manually in the training loop
+        if isinstance(result, Tensor):
+            self.trainer.accelerator.backward(result, optimizer, opt_idx, should_accumulate, *args, **kwargs)
+        else:
+            result.closure_loss = self.trainer.accelerator.backward(
+                result.closure_loss, optimizer, opt_idx, should_accumulate, *args, **kwargs
+            )
+
+        if not self.should_accumulate():
+            # track gradients
+            grad_norm_dict = self._track_and_norm_grad(optimizer=optimizer)
+            if grad_norm_dict:
+                self.trainer.lightning_module._current_fx_name = "on_after_backward"
+                self.trainer.lightning_module.log_grad_norm(grad_norm_dict)
+
+    def _update_running_loss(self, current_loss: Tensor) -> None:
+        """Updates the running loss value with the current value"""
+        if self.trainer.lightning_module.automatic_optimization:
+            # track total loss for logging (avoid mem leaks)
+            self.accumulated_loss.append(current_loss)
+
+        accumulated_loss = self.accumulated_loss.mean()
+
+        if accumulated_loss is not None:
+            # calculate running loss for display
+            self.running_loss.append(self.accumulated_loss.mean() * self.trainer.accumulate_grad_batches)
+
+        # reset for next set of accumulated grads
+        self.accumulated_loss.reset()
+
+    def get_active_optimizers(self, batch_idx: Optional[int] = None) -> List[Tuple[int, Optimizer]]:
+        """
+        Returns the currently active optimizers. When multiple optimizers are used with different frequencies,
+        only one of the optimizers is active at a time.
+
+        Returns:
+            A list of tuples (opt_idx, optimizer) of currently active optimizers.
+        """
+        if not self.trainer.optimizer_frequencies:
+            # call training_step once per optimizer
+            return list(enumerate(self.trainer.optimizers))
+
+        optimizers_loop_length = self.optimizer_freq_cumsum[-1]
+        current_place_in_loop = batch_idx % optimizers_loop_length
+
+        # find optimzier index by looking for the first {item > current_place} in the cumsum list
+        opt_idx = int(np.argmax(self.optimizer_freq_cumsum > current_place_in_loop))
+        return [(opt_idx, self.trainer.optimizers[opt_idx])]
+
+    def _build_kwargs(self, batch: Any, batch_idx: int, opt_idx: int, hiddens: Optional[Tensor]) -> Dict[str, Any]:
+        """Builds the keyword arguments for training_step
+
+        Args:
+            batch: the batch to train on
+            batch_idx: the index of the current batch
+            opt_idx: the index of the current optimizer
+            hiddens: the hidden state of the previous RNN iteration
+
+        Returns:
+            the keyword arguments for the training step
+        """
+        # enable not needing to add opt_idx to training_step
+        step_kwargs = OrderedDict([('batch', batch), ('batch_idx', batch_idx)])
+
+        lightning_module = self.trainer.lightning_module
+
+        if len(self.trainer.optimizers) > 1:
+            training_step_fx = getattr(lightning_module, "training_step")
+            has_opt_idx_in_train_step = is_param_in_hook_signature(training_step_fx, "optimizer_idx")
+            if has_opt_idx_in_train_step:
+                if not lightning_module.automatic_optimization:
+                    self._warning_cache.deprecation(
+                        "`training_step` hook signature has changed in v1.3."
+                        " `optimizer_idx` argument has been removed in case of manual optimization. Support for"
+                        " the old signature will be removed in v1.5"
+                    )
+                step_kwargs['optimizer_idx'] = opt_idx
+            elif not has_opt_idx_in_train_step and lightning_module.automatic_optimization:
+                raise ValueError(
+                    f"Your LightningModule defines {len(self.trainer.optimizers)} optimizers but"
+                    ' `training_step` is missing the `optimizer_idx` argument.'
+                )
+
+        # pass hiddens if using tbptt
+        if self._truncated_bptt_enabled():
+            step_kwargs['hiddens'] = hiddens
+
+        return step_kwargs
+
+    def _truncated_bptt_enabled(self) -> bool:
+        """ Temporary tbptt utilities until this flag is fully migrated to the lightning module. """
+        return self._truncated_bptt_steps() > 0
+
+    def _truncated_bptt_steps(self) -> int:
+        """Returns the number of tbptt steps"""
+        lightning_module = self.trainer.lightning_module
+        # Give precedence to the LightningModule as the Trainer flag will be removed in v1.5
+        if lightning_module.truncated_bptt_steps > 0:
+            return lightning_module.truncated_bptt_steps
+        return self.trainer.truncated_bptt_steps or 0
diff --git a/pytorch_lightning/loops/dataloader/__init__.py b/pytorch_lightning/loops/dataloader/__init__.py
new file mode 100644
index 0000000000000..db2b2f7926d50
--- /dev/null
+++ b/pytorch_lightning/loops/dataloader/__init__.py
@@ -0,0 +1,17 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pytorch_lightning.loops.dataloader.dataloader_loop import DataLoaderLoop  # noqa: F401
+from pytorch_lightning.loops.dataloader.evaluation_loop import EvaluationLoop  # noqa: F401
+from pytorch_lightning.loops.dataloader.prediction_loop import PredictionLoop  # noqa: F401
diff --git a/pytorch_lightning/loops/dataloader/dataloader_loop.py b/pytorch_lightning/loops/dataloader/dataloader_loop.py
new file mode 100644
index 0000000000000..ce255b73d0bba
--- /dev/null
+++ b/pytorch_lightning/loops/dataloader/dataloader_loop.py
@@ -0,0 +1,53 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import abstractmethod
+from typing import Sequence
+
+from torch.utils.data import DataLoader
+
+from pytorch_lightning.loops.base import Loop
+
+
+class DataLoaderLoop(Loop):
+    """Base class to loop over all dataloaders"""
+
+    @property
+    @abstractmethod
+    def dataloaders(self) -> Sequence[DataLoader]:
+        """Returns the dataloaders to loop over"""
+
+    @property
+    def current_dataloader_idx(self) -> int:
+        """Returns the index of the current dataloader"""
+        return self.iteration_count
+
+    @property
+    def current_dataloader(self) -> DataLoader:
+        """Returns the current dataloader"""
+        return self.dataloaders[self.current_dataloader_idx]
+
+    @property
+    def num_dataloaders(self) -> int:
+        """Returns the number of dataloaders present"""
+        return len(self.dataloaders) if self.dataloaders is not None else 0
+
+    @property
+    def done(self) -> bool:
+        """Returns whether all dataloaders have been processed"""
+        return self.current_dataloader_idx >= self.num_dataloaders
+
+    def reset(self) -> None:
+        """Resets the internal state"""
+        self.iteration_count = 0
diff --git a/pytorch_lightning/loops/dataloader/evaluation_loop.py b/pytorch_lightning/loops/dataloader/evaluation_loop.py
new file mode 100644
index 0000000000000..02d802fb3fc15
--- /dev/null
+++ b/pytorch_lightning/loops/dataloader/evaluation_loop.py
@@ -0,0 +1,269 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, List, Optional, Sequence, Union
+
+from deprecate.utils import void
+from torch.utils.data.dataloader import DataLoader
+
+import pytorch_lightning as pl
+from pytorch_lightning.loops.dataloader import DataLoaderLoop
+from pytorch_lightning.loops.epoch import EvaluationEpochLoop
+from pytorch_lightning.trainer.connectors.logger_connector.result import ResultCollection
+from pytorch_lightning.trainer.states import TrainerFn
+from pytorch_lightning.utilities.model_helpers import is_overridden
+from pytorch_lightning.utilities.types import EPOCH_OUTPUT
+
+
+class EvaluationLoop(DataLoaderLoop):
+    """Loops over all dataloaders for evaluation."""
+
+    def __init__(self):
+        super().__init__()
+        self._max_batches: Optional[Union[int, Sequence[int]]] = None
+        self.outputs = []
+
+        self.epoch_loop = EvaluationEpochLoop()
+
+        self._results = ResultCollection(training=False)
+        self._has_run: bool = False
+
+    @property
+    def num_dataloaders(self) -> int:
+        """Returns the total number of dataloaders"""
+        # case where user does:
+        # return dl1, dl2
+        dataloaders = self.dataloaders
+        if dataloaders is None:
+            return 0
+        length = len(dataloaders)
+        if length > 0 and isinstance(dataloaders[0], (list, tuple)):
+            length = len(dataloaders[0])
+        return length
+
+    @property
+    def dataloaders(self) -> Sequence[DataLoader]:
+        """Returns the validation or test dataloaders"""
+        if self.trainer.testing:
+            return self.trainer.test_dataloaders
+        return self.trainer.val_dataloaders
+
+    @property
+    def predictions(self):
+        """Returns the predictions from all dataloaders"""
+        return self.epoch_loop.predictions
+
+    def connect(self, trainer: "pl.Trainer", *args: Any, **kwargs: Any) -> None:
+        """Connects the loop to everything necessary (like trainer and accelerators)"""
+        super().connect(trainer, *args, **kwargs)
+        self.epoch_loop.connect(trainer)
+
+    @property
+    def done(self) -> bool:
+        """Returns whether all dataloaders are processed or evaluation should be skipped altogether"""
+        return (self.current_dataloader_idx >= len(self.dataloaders)) or self.skip
+
+    @property
+    def skip(self) -> bool:
+        """Returns whether the evaluation should be skipped."""
+        max_batches = self.get_max_batches()
+        return sum(max_batches) == 0
+
+    def reset(self) -> None:
+        """Resets the internal state of the loop"""
+        self.iteration_count = 0
+        self._max_batches = self.get_max_batches()
+        # bookkeeping
+        self.outputs = []
+
+        if isinstance(self._max_batches, int):
+            self._max_batches = [self._max_batches] * len(self.dataloaders)
+
+    def on_skip(self) -> List:
+        return []
+
+    def on_run_start(self, *args: Any, **kwargs: Any) -> None:
+        """Runs the ``on_evaluation_model_eval``, ``on_evaluation_start`` and ``on_evaluation_epoch_start`` hooks"""
+        void(*args, **kwargs)
+        # hook
+        self.on_evaluation_model_eval()
+        self.trainer.lightning_module.zero_grad()
+        self.on_evaluation_start()
+        self.on_evaluation_epoch_start()
+
+    def advance(self, *args: Any, **kwargs: Any) -> None:
+        """Performs evaluation on one single dataloader"""
+        void(*args, **kwargs)
+        dataloader = self.trainer.accelerator.process_dataloader(self.current_dataloader)
+        dataloader_iter = enumerate(dataloader)
+        dl_max_batches = self._max_batches[self.current_dataloader_idx]
+
+        dl_outputs = self.epoch_loop.run(
+            dataloader_iter,
+            self.current_dataloader_idx,
+            dl_max_batches,
+            self.num_dataloaders,
+        )
+
+        # store batch level output per dataloader
+        if self.should_track_batch_outputs_for_epoch_end:
+            self.outputs.append(dl_outputs)
+
+        if not self.trainer.sanity_checking:
+            # indicate the loop has run
+            self._has_run = True
+
+    def on_run_end(self) -> Any:
+        """Runs the ``on_evaluation_epoch_end`` hook"""
+        outputs = self.outputs
+
+        # free memory
+        self.outputs = []
+
+        # with a single dataloader don't pass a 2D list
+        if len(outputs) > 0 and self.num_dataloaders == 1:
+            outputs = outputs[0]
+
+        # lightning module method
+        self.evaluation_epoch_end(outputs)
+
+        # hook
+        self.on_evaluation_epoch_end()
+
+        # log epoch metrics
+        eval_loop_results = self.trainer.logger_connector.update_eval_epoch_metrics()
+
+        # hook
+        self.on_evaluation_end()
+
+        # save predictions to disk
+        self.epoch_loop.predictions.to_disk()
+
+        # enable train mode again
+        self.on_evaluation_model_train()
+
+        return eval_loop_results
+
+    def get_max_batches(self) -> List[Union[int, float]]:
+        """Returns the max number of batches for each dataloader"""
+        if self.trainer.testing:
+            max_batches = self.trainer.num_test_batches
+        else:
+            if self.trainer.sanity_checking:
+                self.trainer.num_sanity_val_batches = [
+                    min(self.trainer.num_sanity_val_steps, val_batches) for val_batches in self.trainer.num_val_batches
+                ]
+                max_batches = self.trainer.num_sanity_val_batches
+            else:
+                max_batches = self.trainer.num_val_batches
+        return max_batches
+
+    def reload_evaluation_dataloaders(self) -> None:
+        """Reloads dataloaders if necessary"""
+        model = self.trainer.lightning_module
+        if self.trainer.testing:
+            self.trainer.reset_test_dataloader(model)
+        elif self.trainer.val_dataloaders is None or self.trainer.reload_dataloaders_every_epoch:
+            self.trainer.reset_val_dataloader(model)
+
+    def on_evaluation_start(self, *args: Any, **kwargs: Any) -> None:
+        """Runs ``on_{validation/test}_start`` hooks"""
+        self.should_track_batch_outputs_for_epoch_end: bool = self._should_track_batch_outputs_for_epoch_end()
+
+        assert self._results is not None
+        self._results.to(device=self.trainer.lightning_module.device)
+
+        if self.trainer.testing:
+            self.trainer.call_hook("on_test_start", *args, **kwargs)
+        else:
+            self.trainer.call_hook("on_validation_start", *args, **kwargs)
+
+    def on_evaluation_model_eval(self) -> None:
+        """Sets model to eval mode"""
+        model_ref = self.trainer.lightning_module
+        if self.trainer.testing:
+            model_ref.on_test_model_eval()
+        else:
+            model_ref.on_validation_model_eval()
+
+    def on_evaluation_model_train(self) -> None:
+        """Sets model to train mode"""
+        model_ref = self.trainer.lightning_module
+        if self.trainer.testing:
+            model_ref.on_test_model_train()
+        else:
+            model_ref.on_validation_model_train()
+
+    def on_evaluation_end(self, *args: Any, **kwargs: Any) -> None:
+        """Runs ``on_{validation/test}_end`` hook"""
+        if self.trainer.testing:
+            self.trainer.call_hook("on_test_end", *args, **kwargs)
+        else:
+            self.trainer.call_hook("on_validation_end", *args, **kwargs)
+
+        if self.trainer.state.fn != TrainerFn.FITTING:
+            # summarize profile results
+            self.trainer.profiler.describe()
+
+        # reset any `torchmetrics.Metric` and the logger connector state
+        self.trainer.logger_connector.reset(metrics=True)
+
+    def on_evaluation_epoch_start(self, *args: Any, **kwargs: Any) -> None:
+        """Runs ``on_epoch_start`` and ``on_{validation/test}_epoch_start`` hooks"""
+        self.trainer.logger_connector.on_epoch_start()
+        self.trainer.call_hook("on_epoch_start", *args, **kwargs)
+
+        if self.trainer.testing:
+            self.trainer.call_hook("on_test_epoch_start", *args, **kwargs)
+        else:
+            self.trainer.call_hook("on_validation_epoch_start", *args, **kwargs)
+
+    def _should_track_batch_outputs_for_epoch_end(self) -> bool:
+        """Whether the batch outputs should be stored for later usage"""
+        model = self.trainer.lightning_module
+        if self.trainer.testing:
+            return is_overridden("test_epoch_end", model)
+        return is_overridden("validation_epoch_end", model)
+
+    def evaluation_epoch_end(self, outputs: EPOCH_OUTPUT) -> None:
+        """Runs ``{validation/test}_epoch_end``"""
+        # inform logger the batch loop has finished
+        self.trainer.logger_connector.epoch_end_reached()
+
+        # call the model epoch end
+        model = self.trainer.lightning_module
+
+        # unset dataloader_idx in model
+        model._current_dataloader_idx = None
+
+        if self.trainer.testing:
+            if is_overridden("test_epoch_end", model):
+                model._current_fx_name = "test_epoch_end"
+                model.test_epoch_end(outputs)
+
+        else:
+            if is_overridden("validation_epoch_end", model):
+                model._current_fx_name = "validation_epoch_end"
+                model.validation_epoch_end(outputs)
+
+    def on_evaluation_epoch_end(self) -> None:
+        """Runs ``on_{validation/test}_epoch_end`` hook"""
+        hook_name = ("on_test_epoch_end" if self.trainer.testing else "on_validation_epoch_end")
+        self.trainer.call_hook(hook_name)
+        self.trainer.call_hook("on_epoch_end")
+        self.trainer.logger_connector.on_epoch_end()
+
+    def teardown(self) -> None:
+        self._results.cpu()
+        self.epoch_loop.teardown()
diff --git a/pytorch_lightning/loops/dataloader/prediction_loop.py b/pytorch_lightning/loops/dataloader/prediction_loop.py
new file mode 100644
index 0000000000000..37b4b83a25ebe
--- /dev/null
+++ b/pytorch_lightning/loops/dataloader/prediction_loop.py
@@ -0,0 +1,151 @@
+from typing import Any, List, Optional, Sequence, Union
+
+from deprecate.utils import void
+from torch.utils.data import DataLoader
+
+import pytorch_lightning as pl
+from pytorch_lightning.loops.dataloader.dataloader_loop import DataLoaderLoop
+from pytorch_lightning.loops.epoch.prediction_epoch_loop import PredictionEpochLoop
+from pytorch_lightning.plugins import DDPSpawnPlugin
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.types import _PREDICT_OUTPUT
+
+
+class PredictionLoop(DataLoaderLoop):
+    """Loop to run over dataloaders for prediction"""
+
+    def __init__(self):
+        super().__init__()
+        self.predictions: Optional[List[List[Any]]] = None
+        self.epoch_batch_indices: Optional[List[List[int]]] = None
+
+        self.epoch_loop: PredictionEpochLoop = PredictionEpochLoop()
+
+        self._results = None  # for `trainer._results` access
+        self._return_predictions: bool = False
+
+    @property
+    def return_predictions(self) -> bool:
+        """Whether to return the predictions or not"""
+        return self._return_predictions
+
+    @return_predictions.setter
+    def return_predictions(self, return_predictions: Optional[bool] = None) -> None:
+        # ``DDPSpawnPlugin`` plugins and derivate don't support return predictions.
+        is_ddp_spawn = isinstance(self.trainer.training_type_plugin, DDPSpawnPlugin)
+        if return_predictions and is_ddp_spawn:
+            raise MisconfigurationException(
+                "`return_predictions` should be set to `False` when using the `DDPSpawnPlugin` or children class. "
+                f"Found {return_predictions} with training_type_plugin {type(self.trainer.training_type_plugin)}."
+            )
+        # For non ``DDPSpawnPlugin`` plugin, the `return_predictions` is True by default unless user decide otherwise.
+        self._return_predictions = not is_ddp_spawn if return_predictions is None else return_predictions
+
+    @property
+    def num_dataloaders(self) -> int:
+        """Returns the number of prediction dataloaders"""
+        # case where user does:
+        # return dl1, dl2
+        dataloaders = self.dataloaders
+        length = len(dataloaders)
+        if len(dataloaders) > 0 and isinstance(dataloaders[0], (list, tuple)):
+            length = len(dataloaders[0])
+        return length
+
+    @property
+    def max_batches(self) -> List[int]:
+        """The max number of batches this loop will run for each dataloader."""
+        max_batches = self.trainer.num_predict_batches
+        if isinstance(max_batches, int):
+            max_batches = [max_batches] * len(self.dataloaders)
+        return max_batches
+
+    @property
+    def dataloaders(self) -> Sequence[DataLoader]:
+        """Returns all prediction dataloaders"""
+        return self.trainer.predict_dataloaders
+
+    @property
+    def done(self) -> bool:
+        """Whether prediction is finished: Max batches run or all dataloaders processed"""
+        return self.current_dataloader_idx >= len(self.dataloaders)
+
+    @property
+    def skip(self) -> bool:
+        return sum(self.max_batches) == 0
+
+    def connect(self, trainer: 'pl.Trainer', *args: Any, **kwargs: Any) -> None:
+        """Connects the loop with all necessary things (like trainer)"""
+        super().connect(trainer, *args, **kwargs)
+        self.epoch_loop.connect(trainer, *args, **kwargs)
+
+    def reset(self) -> None:
+        """Resets the internal state of the loop for a new run"""
+        super().reset()
+        self.predictions = []
+        self.epoch_batch_indices = []
+
+    def on_run_start(self) -> None:
+        """Calls ``on_predict_start`` hook"""
+        self.on_predict_start()
+
+    def advance(self, *args: Any, **kwargs: Any) -> None:
+        """Predicts one entire dataloader"""
+        void(*args, **kwargs)
+        dataloader = self.trainer.accelerator.process_dataloader(self.current_dataloader)
+        dataloader_iter = enumerate(dataloader)
+        dl_max_batches = self.max_batches[self.current_dataloader_idx]
+
+        dl_predictions, dl_batch_indices = self.epoch_loop.run(
+            dataloader_iter, self.current_dataloader_idx, dl_max_batches, self.num_dataloaders, self.return_predictions
+        )
+        self.predictions.append(dl_predictions)
+        self.epoch_batch_indices.append(dl_batch_indices)
+
+    def on_run_end(self) -> Union[List[Any], List[List[Any]]]:
+        """Calls ``on_predict_epoch_end`` and ``on_predict_end`` hooks and returns results from all dataloaders"""
+        results = self.on_predict_epoch_end()
+        self.on_predict_end()
+        return results
+
+    def on_predict_start(self) -> None:
+        """
+        Sets model to eval mode and disables gradients. Also calls ``on_predict_start`` and
+        ``on_predict_epoch_start`` hooks.
+        """
+        # enable eval mode + no grads
+        self.on_predict_model_eval()
+        self.trainer.lightning_module.zero_grad()
+
+        # hook
+        self.trainer.call_hook("on_predict_start")
+        self.trainer.call_hook("on_predict_epoch_start")
+
+    def on_predict_epoch_end(self) -> Optional[_PREDICT_OUTPUT]:
+        """Calls ``on_predict_epoch_end`` hook.
+
+        Returns:
+            the results for all dataloaders
+        """
+        self.trainer.profiler.describe()
+
+        results = self.predictions
+
+        self.trainer.call_hook("on_predict_epoch_end", results)
+
+        if self.return_predictions:
+            return results[0] if self.num_dataloaders == 1 else results
+
+    def on_predict_end(self) -> None:
+        """Resets previous gradient status and calls ``on_predict_end`` hook"""
+        # clear memory. the predictions are extracted in `on_predict_epoch_end`.
+        self.predictions = []
+        self.epoch_batch_indices = []
+
+        # hook
+        self.trainer.call_hook("on_predict_end")
+
+    def on_predict_model_eval(self):
+        """Calls ``on_predict_model_eval`` hook"""
+        model_ref = self.trainer.lightning_module
+        model_ref.on_predict_model_eval()
diff --git a/pytorch_lightning/loops/epoch/__init__.py b/pytorch_lightning/loops/epoch/__init__.py
new file mode 100644
index 0000000000000..789953937a6b4
--- /dev/null
+++ b/pytorch_lightning/loops/epoch/__init__.py
@@ -0,0 +1,17 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pytorch_lightning.loops.epoch.evaluation_epoch_loop import EvaluationEpochLoop  # noqa: F401
+from pytorch_lightning.loops.epoch.prediction_epoch_loop import PredictionEpochLoop  # noqa: F401
+from pytorch_lightning.loops.epoch.training_epoch_loop import TrainingEpochLoop  # noqa: F401
diff --git a/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py b/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py
new file mode 100644
index 0000000000000..7f8ef06d7687f
--- /dev/null
+++ b/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py
@@ -0,0 +1,255 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import OrderedDict
+from typing import Any, Dict, Iterator, List, Optional, Union
+
+from deprecate import void
+from torch import Tensor
+
+from pytorch_lightning.loops.base import Loop
+from pytorch_lightning.trainer.connectors.logger_connector.result import ResultCollection
+from pytorch_lightning.trainer.supporters import PredictionCollection
+from pytorch_lightning.utilities.memory import recursive_detach
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+
+
+class EvaluationEpochLoop(Loop):
+    """
+    This is the loop performing the evaluation. It mainly loops over the given dataloader and runs the validation
+    or test step (depending on the trainer's current state).
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.predictions: Optional[PredictionCollection] = None
+        self.dataloader: Optional[Iterator] = None
+        self.dl_max_batches: Optional[int] = None
+        self.dataloader_idx: Optional[int] = None
+        self.num_dataloaders: Optional[int] = None
+        self.outputs: List[STEP_OUTPUT] = []
+
+    @property
+    def done(self) -> bool:
+        """Returns ``True`` if the current iteration count reaches the number of dataloader batches."""
+        return self.iteration_count >= self.dl_max_batches
+
+    def reset(self) -> None:
+        """Resets the loop's internal state."""
+        self.iteration_count = 0
+        self.predictions = PredictionCollection(self.trainer.global_rank, self.trainer.world_size)
+        self.dl_max_batches = None
+        self.dataloader_idx = None
+        self.num_dataloaders = None
+        self.outputs = []
+
+    def on_run_start(
+        self,
+        dataloader_iter: Iterator,
+        dataloader_idx: int,
+        dl_max_batches: int,
+        num_dataloaders: int,
+    ) -> None:
+        """Adds the passed arguments to the loop's state if necessary
+
+        Args:
+            dataloader_iter: iterator over the dataloader
+            dataloader_idx: index of the current dataloader
+            dl_max_batches: maximum number of batches the dataloader can produce
+            num_dataloaders: the total number of dataloaders
+        """
+        void(dataloader_iter)
+
+        self.dl_max_batches = dl_max_batches
+        self.dataloader_idx = dataloader_idx
+        self.num_dataloaders = num_dataloaders
+
+    def advance(
+        self,
+        dataloader_iter: Iterator,
+        dataloader_idx: int,
+        dl_max_batches: int,
+        num_dataloaders: int,
+    ) -> None:
+        """Calls the evaluation step with the corresponding hooks and updates the logger connector.
+
+        Args:
+            dataloader_iter: iterator over the dataloader
+            dataloader_idx: index of the current dataloader
+            dl_max_batches: maximum number of batches the dataloader can produce
+            num_dataloaders: the total number of dataloaders
+
+        Raises:
+            StopIteration: If the current batch is None
+        """
+        void(dl_max_batches, num_dataloaders)
+
+        batch_idx, batch = next(dataloader_iter)
+
+        if batch is None:
+            raise StopIteration
+
+        # hook
+        self.on_evaluation_batch_start(batch, batch_idx, dataloader_idx)
+
+        # lightning module methods
+        with self.trainer.profiler.profile("evaluation_step_and_end"):
+            output = self.evaluation_step(batch, batch_idx, dataloader_idx)
+            output = self.evaluation_step_end(output)
+
+        # hook + store predictions
+        self.on_evaluation_batch_end(output, batch, batch_idx, dataloader_idx)
+
+        # log batch metrics
+        self.trainer.logger_connector.update_eval_step_metrics()
+
+        # track epoch level outputs
+        self.outputs = self._track_output_for_epoch_end(self.outputs, output)
+
+    def on_run_end(self) -> List[STEP_OUTPUT]:
+        """Returns the outputs of the whole run"""
+        outputs = self.outputs
+        # free memory
+        self.outputs = []
+        return outputs
+
+    def evaluation_step(self, batch: Any, batch_idx: int, dataloader_idx: int) -> Optional[STEP_OUTPUT]:
+        """The evaluation step (validation_step or test_step depending on the trainer's state).
+
+        Args:
+            batch: The current batch to run through the step.
+            batch_idx: The index of the current batch
+            dataloader_idx: the index of the dataloader producing the current batch
+
+        Returns:
+            the outputs of the step
+        """
+        # configure step_kwargs
+        step_kwargs = self._build_kwargs(batch, batch_idx, dataloader_idx)
+
+        if self.trainer.testing:
+            self.trainer.lightning_module._current_fx_name = "test_step"
+            with self.trainer.profiler.profile("test_step"):
+                output = self.trainer.accelerator.test_step(step_kwargs)
+        else:
+            self.trainer.lightning_module._current_fx_name = "validation_step"
+            with self.trainer.profiler.profile("validation_step"):
+                output = self.trainer.accelerator.validation_step(step_kwargs)
+
+        return output
+
+    def evaluation_step_end(self, *args: Any, **kwargs: Any) -> Optional[STEP_OUTPUT]:
+        """Calls the `{validation/test}_step_end` hook"""
+        hook_name = "test_step_end" if self.trainer.testing else "validation_step_end"
+        output = self.trainer.call_hook(hook_name, *args, **kwargs)
+        return output
+
+    def on_evaluation_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """Calls the ``on_{validation/test}_batch_start`` hook.
+
+        Args:
+            batch: The current batch to run through the step
+            batch_idx: The index of the current batch
+            dataloader_idx: The index of the dataloader producing the current batch
+
+        Raises:
+            AssertionError: If the number of dataloaders is None (has not yet been set).
+        """
+        self.trainer.logger_connector.on_batch_start()
+
+        assert self.num_dataloaders is not None
+        self.trainer.logger_connector.on_evaluation_batch_start(batch, batch_idx, dataloader_idx, self.num_dataloaders)
+
+        if self.trainer.testing:
+            self.trainer.call_hook("on_test_batch_start", batch, batch_idx, dataloader_idx)
+        else:
+            self.trainer.call_hook("on_validation_batch_start", batch, batch_idx, dataloader_idx)
+
+    def on_evaluation_batch_end(
+        self,
+        output: Optional[STEP_OUTPUT],
+        batch: Any,
+        batch_idx: int,
+        dataloader_idx: int,
+    ) -> None:
+        """The ``on_{validation/test}_batch_end`` hook.
+
+        Args:
+            output: The output of the performed step
+            batch: The input batch for the step
+            batch_idx: The index of the current batch
+            dataloader_idx: Index of the dataloader producing the current batch
+        """
+        hook_name = "on_test_batch_end" if self.trainer.testing else "on_validation_batch_end"
+        self.trainer.call_hook(hook_name, output, batch, batch_idx, dataloader_idx)
+
+        self.trainer.logger_connector.on_batch_end()
+
+        # store predicitons if do_write_predictions and track eval loss history
+        self.store_predictions(output, batch_idx, dataloader_idx)
+
+    def store_predictions(self, output: Optional[STEP_OUTPUT], batch_idx: int, dataloader_idx: int) -> None:
+        """Stores the predictions in the prediction collection (only if running in test mode)
+
+        Args:
+            output: the outputs of the current step
+            batch_idx: the index of the current batch
+            dataloader_idx: the index of the dataloader producing the current batch
+        """
+        # Add step predictions to prediction collection to write later
+        if output is not None and self.predictions is not None:
+            if isinstance(output, ResultCollection) and self.trainer.testing:
+                self.predictions.add(output.pop("predictions", None))
+
+        # track debug metrics
+        self.trainer.dev_debugger.track_eval_loss_history(batch_idx, dataloader_idx, output)
+
+    def _build_kwargs(self, batch: Any, batch_idx: int, dataloader_idx: int) -> Dict[str, Union[Any, int]]:
+        """Helper function to build the arguments for the current step
+
+        Args:
+            batch: The current batch to run through the step
+            batch_idx: the index of the current batch
+            dataloader_idx: the index of the dataloader producing the current batch
+
+        Returns:
+            the keyword arguments to pass to the step function
+        """
+        # make dataloader_idx arg in validation_step optional
+        step_kwargs = OrderedDict([("batch", batch), ("batch_idx", batch_idx)])
+
+        multiple_val_loaders = not self.trainer.testing and self.num_dataloaders > 1
+        multiple_test_loaders = self.trainer.testing and self.num_dataloaders > 1
+
+        if multiple_test_loaders or multiple_val_loaders:
+            step_kwargs["dataloader_idx"] = dataloader_idx
+
+        return step_kwargs
+
+    def _track_output_for_epoch_end(
+        self,
+        outputs: List[Union[ResultCollection, Dict, Tensor]],
+        output: Optional[Union[ResultCollection, Dict, Tensor]],
+    ) -> List[Union[ResultCollection, Dict, Tensor]]:
+        if output is not None:
+            if isinstance(output, ResultCollection):
+                output = output.detach()
+                if self.trainer.move_metrics_to_cpu:
+                    output = output.cpu()
+            elif isinstance(output, dict):
+                output = recursive_detach(output, to_cpu=self.trainer.move_metrics_to_cpu)
+            elif isinstance(output, Tensor) and output.is_cuda and self.trainer.move_metrics_to_cpu:
+                output = output.cpu()
+            outputs.append(output)
+        return outputs
diff --git a/pytorch_lightning/loops/epoch/prediction_epoch_loop.py b/pytorch_lightning/loops/epoch/prediction_epoch_loop.py
new file mode 100644
index 0000000000000..29a76793b4648
--- /dev/null
+++ b/pytorch_lightning/loops/epoch/prediction_epoch_loop.py
@@ -0,0 +1,151 @@
+from collections import OrderedDict
+from typing import Any, Dict, Iterator, List, Optional, Tuple
+
+from deprecate import void
+
+from pytorch_lightning.loops.base import Loop
+from pytorch_lightning.overrides.distributed import IndexBatchSamplerWrapper
+from pytorch_lightning.utilities.warnings import WarningCache
+
+
+class PredictionEpochLoop(Loop):
+    """Loop performing prediction on arbitrary sequentially used dataloaders."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.return_predictions: bool = False
+        self.predictions: List[Any] = []
+        self.current_batch_indices: List[int] = []
+        self._dl_max_batches: Optional[int] = None
+        self._num_dataloaders: Optional[int] = None
+        self._warning_cache = WarningCache()
+        self._all_batch_indices: List[int] = []
+
+    @property
+    def done(self) -> bool:
+        """Ends prediction when the iteration count exceeds the total number of available batches"""
+        return self.iteration_count >= self._dl_max_batches
+
+    @property
+    def should_store_predictions(self) -> bool:
+        """Whether the predictions should be stored for later usage (e.g. aggregation or returning)"""
+        any_pred = any(cb.interval.on_epoch for cb in self.trainer.prediction_writer_callbacks)
+        return self.return_predictions or any_pred
+
+    def reset(self) -> None:
+        """Resets the loops internal state"""
+        self.iteration_count = 0
+        self._all_batch_indices: List[int] = []
+        self.predictions: List[Any] = []
+
+    def on_run_start(
+        self,
+        dataloader_iter: Iterator,
+        dataloader_idx: int,
+        dl_max_batches: int,
+        num_dataloaders: int,
+        return_predictions: bool = False
+    ) -> None:
+        """
+        Prepares the loops internal state
+
+        Args:
+            dataloader_iter: the iterator over the current dataloader
+            dataloader_idx: the index of the current dataloader
+            dl_max_batches: the maximum number of batches the current loader can produce
+            num_dataloaders: the total number of dataloaders
+            return_predictions: whether to return the obtained predictions
+        """
+        void(dataloader_iter, dataloader_idx)
+        self._dl_max_batches = dl_max_batches
+        self._num_dataloaders = num_dataloaders
+        self.return_predictions = return_predictions
+
+    def advance(
+        self,
+        dataloader_iter: Iterator,
+        dataloader_idx: int,
+        dl_max_batches: int,
+        num_dataloaders: int,
+        return_predictions: bool = False
+    ) -> None:
+        """
+        Runs one prediction step.
+
+        Args:
+            dataloader_iter: the iterator over the current dataloader
+            dataloader_idx: the index of the current dataloader
+            dl_max_batches: the maximum number of batches the current loader can produce
+            num_dataloaders: the total number of dataloaders
+            return_predictions: whether to return the obtained predictions
+        """
+        batch_idx, batch = next(dataloader_iter)
+        if batch is None:
+            raise StopIteration
+
+        with self.trainer.profiler.profile("predict_step"):
+            self._predict_step(batch, batch_idx, dataloader_idx)
+
+    def on_run_end(self) -> Tuple[Any, Any]:
+        """Returns the predictions and the corresponding batch indices"""
+        predictions = self.predictions
+        all_batch_indices = self._all_batch_indices
+        # free memory
+        self.predictions = []
+        self._all_batch_indices = []
+        return predictions, all_batch_indices
+
+    def _predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """Runs the actual predict step together with all the
+        necessary bookkeeping and the hooks tied to the predict step.
+
+        Args:
+            batch: the current batch to run the prediction on
+            batch_idx: the index of the current batch
+            dataloader_idx: the index of the dataloader producing the current batch
+        """
+        # configure step_kwargs
+        step_kwargs = self._build_kwargs(batch, batch_idx, dataloader_idx)
+
+        # extract batch_indices and store them
+        self._store_batch_indices(dataloader_idx)
+
+        model_ref = self.trainer.lightning_module
+
+        self.trainer.call_hook("on_predict_batch_start", batch, batch_idx, dataloader_idx)
+
+        model_ref._current_fx_name = "predict_step"
+        predictions = self.trainer.accelerator.predict_step(step_kwargs)
+
+        if predictions is None:
+            self._warning_cache.warn("predict returned None if it was on purpose, ignore this warning...")
+
+        self.trainer.call_hook("on_predict_batch_end", predictions, batch, batch_idx, dataloader_idx)
+
+        if self.should_store_predictions:
+            self.predictions.append(predictions)
+
+    def _build_kwargs(self, batch: Any, batch_idx: int, dataloader_idx: int) -> Dict[str, Any]:
+        """
+        Assembles the keyword arguments for the ``predict_step``
+
+        Args:
+            batch: the current batch to run the prediction on
+            batch_idx: the index of the current batch
+            dataloader_idx: the index of the dataloader producing the current batch
+
+        Returns:
+            the dictionary containing all the keyboard arguments for the predict step
+        """
+        step_kwargs = OrderedDict([('batch', batch), ('batch_idx', batch_idx)])
+        if self._num_dataloaders > 1:
+            step_kwargs['dataloader_idx'] = dataloader_idx
+        return step_kwargs
+
+    def _store_batch_indices(self, dataloader_idx: int) -> None:
+        """Stores the batch indices if the predictions should be stored"""
+        batch_sampler = self.trainer.predict_dataloaders[dataloader_idx].batch_sampler
+        if isinstance(batch_sampler, IndexBatchSamplerWrapper):
+            self.current_batch_indices = batch_sampler.batch_indices
+            if self.should_store_predictions:
+                self._all_batch_indices.append(batch_sampler.batch_indices)
diff --git a/pytorch_lightning/loops/epoch/training_epoch_loop.py b/pytorch_lightning/loops/epoch/training_epoch_loop.py
new file mode 100644
index 0000000000000..f1eb3c942b8a0
--- /dev/null
+++ b/pytorch_lightning/loops/epoch/training_epoch_loop.py
@@ -0,0 +1,426 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, Iterator, List, Optional, Union
+
+import torch
+
+import pytorch_lightning as pl
+from pytorch_lightning import loops  # import as loops to avoid circular imports
+from pytorch_lightning.loops.batch import TrainingBatchLoop
+from pytorch_lightning.trainer.connectors.logger_connector.result import ResultCollection
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.model_helpers import is_overridden
+from pytorch_lightning.utilities.signature_utils import is_param_in_hook_signature
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+from pytorch_lightning.utilities.warnings import WarningCache
+
+
+class TrainingEpochLoop(loops.Loop):
+    """ Runs over all batches in a dataloader (one epoch). """
+
+    def __init__(self, min_steps: int, max_steps: int):
+        super().__init__()
+        self.min_steps: int = min_steps
+        self.max_steps: int = max_steps
+        self.global_step: int = 0
+        # the total batch index across all epochs
+        self.total_batch_idx: int = 0
+        # the current batch index in the loop that runs over the dataloader(s)
+        self.iteration_count: int = 0
+        # the current split index when the batch gets split into chunks in truncated backprop through time
+        self.split_idx: Optional[int] = None
+        # the number of batches seen this run, updates immediately after batch_loop.run()
+        self.batches_seen: int = 0
+        self.is_last_batch: Optional[bool] = None
+
+        self.batch_loop = TrainingBatchLoop()
+        self.val_loop = loops.EvaluationLoop()
+
+        self._results = ResultCollection(training=True)
+        self._dataloader_idx: Optional[int] = None
+        self._warning_cache: WarningCache = WarningCache()
+        self._epoch_output: Optional[List[List[STEP_OUTPUT]]] = None
+
+    @property
+    def batch_idx(self) -> int:
+        """Returns the current batch index (within this epoch)"""
+        return self.iteration_count
+
+    @property
+    def done(self) -> bool:
+        """Returns whether the training should be stopped.
+        The criteria are that the number of steps reached the max steps,
+        the last batch is reached or the trainer signals to stop (e.g. by early stopping).
+        """
+        max_steps_reached = self.max_steps is not None and self.global_step >= self.max_steps
+        return max_steps_reached or self.trainer.should_stop or self._num_training_batches_reached(self.is_last_batch)
+
+    def connect(self, trainer: 'pl.Trainer', *args: Any, **kwargs: Any) -> None:
+        """Connects the loop with all necessary parts like trainer and accelerators"""
+        super().connect(trainer, *args, **kwargs)
+        self.batch_loop.connect(trainer)
+        self.val_loop.connect(trainer)
+
+    def reset(self) -> None:
+        """Resets the internal state of the loop for a new run"""
+        self.iteration_count = 0
+        self.batches_seen = 0
+        self.is_last_batch = False
+        self._dataloader_idx = 0
+
+        # track epoch output
+        self._epoch_output = [[] for _ in range(self.batch_loop.num_active_optimizers(self.total_batch_idx))]
+
+    def on_run_start(self, *args: Any, **kwargs: Any) -> None:
+        # hook
+        self.trainer.logger_connector.on_epoch_start()
+        self.trainer.call_hook("on_epoch_start")
+        self.trainer.call_hook("on_train_epoch_start")
+
+    def advance(self, dataloader_iter: Iterator, **kwargs: Any) -> None:
+        """Runs a single training batch.
+
+        Args:
+            dataloader_iter: the iterator over the dataloader producing the new batch
+
+        Raises:
+            StopIteration: When the epoch is canceled by the user returning -1
+        """
+        _, (batch, is_last) = next(dataloader_iter)
+        self.is_last_batch = is_last
+
+        # ------------------------------------
+        # TRAINING_STEP + TRAINING_STEP_END
+        # ------------------------------------
+        with self.trainer.profiler.profile("run_training_batch"):
+            batch_output = self.batch_loop.run(batch, self.iteration_count, self._dataloader_idx)
+            self.batches_seen += 1
+
+        # when returning -1 from train_step, we end epoch early
+        if batch_output.signal == -1:
+            raise StopIteration
+
+        # update non-plateau LR schedulers
+        # update epoch-interval ones only when we are at the end of training epoch
+        self.update_lr_schedulers('step', update_plateau_schedulers=False)
+        if self._num_training_batches_reached(is_last):
+            self.update_lr_schedulers('epoch', update_plateau_schedulers=False)
+
+        batch_end_outputs = [opt_idx_out for opt_idx_out in batch_output.training_step_output if len(opt_idx_out)]
+        processed_batch_end_outputs = self._prepare_outputs(batch_end_outputs, batch_mode=True)
+
+        # hook
+        self.trainer.call_hook(
+            'on_train_batch_end', processed_batch_end_outputs, batch, self.iteration_count, self._dataloader_idx
+        )
+        self.trainer.call_hook('on_batch_end')
+        self.trainer.logger_connector.on_batch_end()
+
+        # figure out what to track for epoch end
+        self._track_epoch_end_reduce_metrics(self._epoch_output, batch_end_outputs)
+
+        # -----------------------------------------
+        # SAVE METRICS TO LOGGERS AND PROGRESS_BAR
+        # -----------------------------------------
+        self.trainer.logger_connector.update_train_step_metrics()
+
+    def on_advance_end(self):
+        """Runs validation and Checkpointing if necessary.
+
+        Raises:
+            StopIteration: if :attr:`done` evaluates to ``True`` to finish this epoch
+        """
+        # -----------------------------------------
+        # VALIDATE IF NEEDED + CHECKPOINT CALLBACK
+        # -----------------------------------------
+        should_check_val = self._should_check_val_fx(self.iteration_count, self.is_last_batch)
+        if should_check_val:
+            self.trainer.validating = True
+            self._run_validation()
+            self.trainer.training = True
+
+        # -----------------------------------------
+        # SAVE LOGGERS (ie: Tensorboard, etc...)
+        # -----------------------------------------
+        self._save_loggers_on_train_batch_end()
+
+        # update plateau LR scheduler after metrics are logged
+        self.update_lr_schedulers('step', update_plateau_schedulers=True)
+
+        self.total_batch_idx += 1
+
+        # progress global step according to grads progress
+        self._increment_accumulated_grad_global_step()
+
+        if self.done:
+            raise StopIteration
+
+    def on_run_end(self) -> List[List[STEP_OUTPUT]]:
+        """Calls the on_epoch_end hook.
+
+        Returns:
+            The output of each training step for each optimizer
+
+        Raises:
+            MisconfigurationException: ``train_epoch_end`` does not return ``None``
+        """
+        if self.batches_seen == 0:
+            # dataloader/iterator did not produce a batch
+            return
+
+        # inform logger the batch loop has finished
+        self.trainer.logger_connector.epoch_end_reached()
+
+        # prepare epoch output
+        processed_outputs = self._prepare_outputs(self._epoch_output, batch_mode=False)
+
+        # get the model and call model.training_epoch_end
+        model = self.trainer.lightning_module
+
+        if is_overridden('training_epoch_end', model):
+            # run training_epoch_end
+            # refresh the result for custom logging at the epoch level
+            model._current_fx_name = 'training_epoch_end'
+
+            # lightningmodule hook
+            training_epoch_end_output = model.training_epoch_end(processed_outputs)
+
+            if training_epoch_end_output is not None:
+                raise MisconfigurationException(
+                    'training_epoch_end expects a return of None. '
+                    'HINT: remove the return statement in training_epoch_end'
+                )
+
+        # call train epoch end hooks
+        self._on_train_epoch_end_hook(processed_outputs)
+        self.trainer.call_hook('on_epoch_end')
+        self.trainer.logger_connector.on_epoch_end()
+
+        epoch_output = self._epoch_output
+        # free memory
+        self._epoch_output = None
+        return epoch_output
+
+    def teardown(self) -> None:
+        self._results.cpu()
+        self.batch_loop.teardown()
+        self.val_loop.teardown()
+
+    def _run_validation(self):
+        # reload dataloaders
+        self.val_loop.reload_evaluation_dataloaders()
+
+        with torch.no_grad():
+            self.val_loop.run()
+
+    def _on_train_epoch_end_hook(self, processed_epoch_output: List[List[STEP_OUTPUT]]) -> None:
+        """Runs ``on_train_epoch_end hook``."""
+        # We cannot rely on Trainer.call_hook because the signatures might be different across
+        # lightning module and callback
+        # As a result, we need to inspect if the module accepts `outputs` in `on_train_epoch_end`
+
+        # This implementation is copied from Trainer.call_hook
+        hook_name = "on_train_epoch_end"
+        prev_fx_name = self.trainer.lightning_module._current_fx_name
+        self.trainer.lightning_module._current_fx_name = hook_name
+
+        # always profile hooks
+        with self.trainer.profiler.profile(hook_name):
+
+            # first call trainer hook
+            if hasattr(self.trainer, hook_name):
+                trainer_hook = getattr(self.trainer, hook_name)
+                trainer_hook(processed_epoch_output)
+
+            # next call hook in lightningModule
+            model_ref = self.trainer.lightning_module
+            if is_overridden(hook_name, model_ref):
+                hook_fx = getattr(model_ref, hook_name)
+                if is_param_in_hook_signature(hook_fx, "outputs"):
+                    self._warning_cache.deprecation(
+                        "The signature of `ModelHooks.on_train_epoch_end` has changed in v1.3."
+                        " `outputs` parameter has been deprecated."
+                        " Support for the old signature will be removed in v1.5",
+                    )
+                    model_ref.on_train_epoch_end(processed_epoch_output)
+                else:
+                    model_ref.on_train_epoch_end()
+
+            # call the accelerator hook
+            if hasattr(self.trainer.accelerator, hook_name):
+                accelerator_hook = getattr(self.trainer.accelerator, hook_name)
+                accelerator_hook()
+
+        # restore current_fx when nested context
+        self.trainer.lightning_module._current_fx_name = prev_fx_name
+
+    def _num_training_batches_reached(self, is_last_batch: bool = False) -> bool:
+        """Checks if we are in the last batch or if there are more batches to follow."""
+
+        # TODO: Can we combine this with training_batch_loop's arg that does a similar check?
+        return self.batches_seen == self.trainer.num_training_batches or is_last_batch
+
+    def _track_epoch_end_reduce_metrics(
+        self, epoch_output: List[List[STEP_OUTPUT]], batch_end_outputs: STEP_OUTPUT
+    ) -> None:
+        """Adds the batch outputs to the epoch outputs and prepares reduction"""
+        hook_overridden = self._should_add_batch_output_to_epoch_output()
+        if not hook_overridden:
+            return
+
+        # track the outputs to reduce at the end of the epoch
+        for opt_idx, opt_outputs in enumerate(batch_end_outputs):
+            # with 1 step (no tbptt) don't use a sequence at epoch end
+            if (
+                isinstance(opt_outputs, list) and len(opt_outputs) == 1
+                and not isinstance(opt_outputs[0], ResultCollection)
+            ):
+                opt_outputs = opt_outputs[0]
+
+            epoch_output[opt_idx].append(opt_outputs)
+
+    def _should_add_batch_output_to_epoch_output(self) -> bool:
+        """
+        We add to the epoch outputs if
+        1. The model defines training_epoch_end OR
+        2. The model overrides on_train_epoch_end which has `outputs` in the signature
+        """
+        # TODO: in v1.5 this only needs to check if training_epoch_end is overridden
+        lightning_module = self.trainer.lightning_module
+        if is_overridden("training_epoch_end", lightning_module):
+            return True
+
+        if is_overridden("on_train_epoch_end", lightning_module):
+            model_hook_fx = getattr(lightning_module, "on_train_epoch_end")
+            if is_param_in_hook_signature(model_hook_fx, "outputs"):
+                return True
+
+        return False
+
+    @staticmethod
+    def _prepare_outputs(
+        outputs: List[List[List['ResultCollection']]],
+        batch_mode: bool,
+    ) -> Union[List[List[List[Dict]]], List[List[Dict]], List[Dict], Dict]:
+        """
+        Extract required information from batch or epoch end results.
+
+        Args:
+            outputs: A 3-dimensional list of ``ResultCollection`` objects with dimensions:
+                ``[optimizer outs][batch outs][tbptt steps]``.
+
+            batch_mode: If True, ignore the batch output dimension.
+
+        Returns:
+            The cleaned outputs with ``ResultCollection`` objects converted to dictionaries.
+            All list dimensions of size one will be collapsed.
+        """
+        processed_outputs = []
+        for opt_outputs in outputs:
+            # handle an edge case where an optimizer output is the empty list
+            if len(opt_outputs) == 0:
+                continue
+
+            processed_batch_outputs = []
+
+            if batch_mode:
+                opt_outputs = [opt_outputs]
+
+            for batch_outputs in opt_outputs:
+                processed_tbptt_outputs = []
+
+                if isinstance(batch_outputs, ResultCollection):
+                    batch_outputs = [batch_outputs]
+
+                for tbptt_output in batch_outputs:
+                    out = tbptt_output.extra
+                    if tbptt_output.minimize is not None:
+                        out['loss'] = tbptt_output.minimize.detach()
+                    processed_tbptt_outputs.append(out)
+
+                # if there was only one tbptt step then we can collapse that dimension
+                if len(processed_tbptt_outputs) == 1:
+                    processed_tbptt_outputs = processed_tbptt_outputs[0]
+                processed_batch_outputs.append(processed_tbptt_outputs)
+
+            # batch_outputs should be just one dict (or a list of dicts if using tbptt) per optimizer
+            if batch_mode:
+                processed_batch_outputs = processed_batch_outputs[0]
+            processed_outputs.append(processed_batch_outputs)
+
+        # if there is only one optimiser then we collapse that dimension
+        if len(processed_outputs) == 1:
+            processed_outputs = processed_outputs[0]
+        return processed_outputs
+
+    def update_lr_schedulers(self, interval: str, update_plateau_schedulers: bool) -> None:
+        """updates the lr schedulers based on the given interval"""
+        if interval == "step" and self.batch_loop.should_accumulate():
+            return
+        self.trainer.optimizer_connector.update_learning_rates(
+            interval=interval,
+            update_plateau_schedulers=update_plateau_schedulers,
+            opt_indices=[opt_idx for opt_idx, _ in self.batch_loop.get_active_optimizers(self.total_batch_idx)],
+        )
+
+    def _increment_accumulated_grad_global_step(self) -> None:
+        """increments global step"""
+        num_accumulated_batches_reached = self.batch_loop._accumulated_batches_reached()
+        num_training_batches_reached = self._num_training_batches_reached()
+
+        # progress global step according to grads progress
+        if num_accumulated_batches_reached or num_training_batches_reached:
+            self.global_step = self.trainer.accelerator.update_global_step(
+                self.total_batch_idx, self.trainer.global_step
+            )
+
+    def _should_check_val_fx(self, batch_idx: int, is_last_batch: bool) -> bool:
+        """ Decide if we should run validation. """
+        if not self.trainer.enable_validation:
+            return False
+
+        is_val_check_epoch = (self.trainer.current_epoch + 1) % self.trainer.check_val_every_n_epoch == 0
+        if not is_val_check_epoch:
+            return False
+
+        # val_check_batch is inf for iterable datasets with no length defined
+        is_infinite_dataset = self.trainer.val_check_batch == float('inf')
+        if is_last_batch and is_infinite_dataset:
+            return True
+
+        if self.trainer.should_stop:
+            return True
+
+        # TODO(@awaelchli): let training/eval loop handle logic around limit_*_batches and val_check_batch
+        is_val_check_batch = is_last_batch
+        if isinstance(self.trainer.limit_train_batches, int) and is_infinite_dataset:
+            is_val_check_batch = (batch_idx + 1) % self.trainer.limit_train_batches == 0
+        elif self.trainer.val_check_batch != float('inf'):
+            is_val_check_batch = (batch_idx + 1) % self.trainer.val_check_batch == 0
+        return is_val_check_batch
+
+    def _save_loggers_on_train_batch_end(self) -> None:
+        """Flushes loggers to disk"""
+        # when loggers should save to disk
+        should_flush_logs = self.trainer.logger_connector.should_flush_logs
+        if should_flush_logs and self.trainer.is_global_zero and self.trainer.logger is not None:
+            self.trainer.logger.save()
+
+    def state_dict(self) -> Dict:
+        return {"batch_loop": self.batch_loop.state_dict(), "val_loop": self.val_loop.state_dict()}
+
+    def load_state_dict(self, state_dict: Dict) -> None:
+        self.batch_loop.load_state_dict(state_dict["batch_loop"])
+        self.val_loop.load_state_dict(state_dict["val_loop"])
diff --git a/pytorch_lightning/loops/fit_loop.py b/pytorch_lightning/loops/fit_loop.py
new file mode 100644
index 0000000000000..c7207f2cf833f
--- /dev/null
+++ b/pytorch_lightning/loops/fit_loop.py
@@ -0,0 +1,265 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from contextlib import suppress
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+from pytorch_lightning.loops import Loop
+from pytorch_lightning.loops.epoch import TrainingEpochLoop
+from pytorch_lightning.trainer.connectors.logger_connector.result import ResultCollection
+from pytorch_lightning.trainer.supporters import TensorRunningAccum
+
+log = logging.getLogger(__name__)
+
+
+class FitLoop(Loop):
+    """This Loop iterates over the epochs to run the training
+
+    Args:
+        min_epochs: The minimum number of epochs
+        max_epochs: The maximum number of epochs
+        min_steps: The minimum number of steps
+        max_steps: The maximum number of epoch
+
+    .. note::
+        If neither the minimum epochs nor steps are specified the minimum number of epochs is set to 1
+        and if neither the maximum steps nor epochs are specified, the maximum epochs are set to 1000.
+    """
+
+    def __init__(
+        self,
+        min_epochs: Optional[int] = None,
+        max_epochs: Optional[int] = None,
+        min_steps: Optional[int] = None,
+        max_steps: Optional[int] = None
+    ):
+        super().__init__()
+        self.max_epochs = 1000 if (max_epochs is None and max_steps is None) else max_epochs
+        self.min_epochs = 1 if (min_epochs is None and min_steps is None) else min_epochs
+
+        self.epoch_loop = TrainingEpochLoop(min_steps, max_steps)
+
+    @property
+    def current_epoch(self) -> int:
+        """Return the current epoch"""
+        return self.iteration_count
+
+    @current_epoch.setter
+    def current_epoch(self, value: int) -> None:
+        """Setter for the current epoch"""
+        self.iteration_count = value
+
+    @property
+    def global_step(self) -> int:
+        """Returns the global step"""
+        return self.epoch_loop.global_step
+
+    @global_step.setter
+    def global_step(self, value: int) -> None:
+        """Sets the global step (forwards to epoch_loop)"""
+        self.epoch_loop.global_step = value
+
+    @property
+    def total_batch_idx(self) -> int:
+        """Returns the total number of batches already run (across all epochs)"""
+        return self.epoch_loop.total_batch_idx
+
+    @property
+    def batch_idx(self) -> int:
+        """Returns the number of batches already run within this epoch"""
+        return self.epoch_loop.iteration_count
+
+    @property
+    def split_idx(self) -> int:
+        """Returns the index of the current batch split (within the current batch) for bptt"""
+        return self.epoch_loop.split_idx
+
+    @property
+    def min_steps(self) -> int:
+        # TODO(@justusschock): Why aren't we using the attribute in this class?
+        """Returns the minimum numnber of steps to run"""
+        return self.epoch_loop.min_steps
+
+    @min_steps.setter
+    def min_steps(self, value: int) -> None:
+        """Sets the minimum number of steps (forwards to epoch_loop)"""
+        # TODO(@awaelchli): This setter is required by debugging connector (fast dev run), should be avoided
+        self.epoch_loop.min_steps = value
+
+    @property
+    def max_steps(self) -> int:
+        """Returns the maximum number of steps to run"""
+        return self.epoch_loop.max_steps
+
+    @max_steps.setter
+    def max_steps(self, value: int) -> None:
+        """Sets the maximum number of steps (forwards to epoch_loop)"""
+        # TODO(@awaelchli): This setter is required by debugging connector (fast dev run), should be avoided
+        self.epoch_loop.max_steps = value
+
+    @property
+    def running_loss(self) -> TensorRunningAccum:
+        """Returns the running loss"""
+        return self.epoch_loop.batch_loop.running_loss
+
+    @property
+    def _skip_backward(self) -> bool:
+        """ Determines whether the loop will skip backward during automatic optimization. """
+        return self.epoch_loop.batch_loop._skip_backward
+
+    @_skip_backward.setter
+    def _skip_backward(self, value: bool) -> None:
+        """ Determines whether the loop will skip backward during automatic optimization. """
+        self.epoch_loop.batch_loop._skip_backward = value
+
+    @property
+    def _results(self) -> ResultCollection:
+        if self.trainer.training:
+            return self.epoch_loop._results
+        if self.trainer.validating:
+            return self.epoch_loop.val_loop._results
+        raise RuntimeError("`FitLoop._results` property isn't defined. Accessed outside of scope")
+
+    @property
+    def done(self) -> bool:
+        """Evaluates when to leave the loop.
+
+        Returns True if trainer.should_stop was set (e.g. by early stopping)
+        or if the maximum number of steps or epochs is reached.
+        """
+        # TODO(@awaelchli): Move track steps inside training loop and move part of these condition inside training loop
+        stop_steps = self.max_steps is not None and self.global_step >= self.max_steps
+        stop_epochs = self.max_epochs is not None and self.current_epoch >= self.max_epochs
+
+        should_stop = False
+        if self.trainer.should_stop:
+            # early stopping
+            met_min_epochs = self.current_epoch >= self.min_epochs if self.min_epochs else True
+            met_min_steps = self.global_step >= self.min_steps if self.min_steps else True
+            if met_min_epochs and met_min_steps:
+                should_stop = True
+            else:
+                log.info(
+                    'Trainer was signaled to stop but required minimum epochs'
+                    f' ({self.min_epochs}) or minimum steps ({self.min_steps}) has'
+                    ' not been met. Training will continue...'
+                )
+        self.trainer.should_stop = should_stop
+
+        return stop_steps or should_stop or stop_epochs
+
+    @property
+    def skip(self) -> bool:
+        """Whether we should skip the training and immediately return from the call to :meth:`run`."""
+        return self.done or self.trainer.num_training_batches == 0
+
+    def connect(self, trainer: 'pl.Trainer', *args: Any, **kwargs: Any) -> None:
+        """Connects the loop with necessary arguments like the trainer"""
+        super().connect(trainer, *args, **kwargs)
+        self.epoch_loop.connect(trainer)
+
+    def reset(self) -> None:
+        """Resets the internal state of this loop"""
+
+    def on_run_start(self) -> None:
+        """Calls the ``on_train_start`` hook."""
+        self._results.to(device=self.trainer.lightning_module.device)
+        self.trainer.call_hook("on_train_start")
+
+    def on_advance_start(self) -> None:
+        """Prepares the dataloader for training and calls the hooks ``on_epoch_start`` and ``on_train_epoch_start``"""
+        model = self.trainer.lightning_module
+
+        # reset train dataloader
+        if self.current_epoch != 0 and self.trainer.reload_dataloaders_every_epoch:
+            self.trainer.reset_train_dataloader(model)
+
+        # TODO: specify the possible exception
+        with suppress(Exception):
+            # set seed for distributed sampler (enables shuffling for each epoch)
+            self.trainer.train_dataloader.sampler.set_epoch(self.current_epoch)
+
+        # changing gradient according accumulation_scheduler
+        self.trainer.accumulation_scheduler.on_train_epoch_start(self.trainer, self.trainer.lightning_module)
+
+        # stores accumulated grad fractions per batch
+        self.epoch_loop.batch_loop.accumulated_loss = TensorRunningAccum(
+            window_length=self.trainer.accumulate_grad_batches
+        )
+
+    def advance(self) -> None:
+        """Runs one whole epoch."""
+        train_dataloader = self.trainer.accelerator.process_dataloader(self.trainer.train_dataloader)
+        train_dataloader = self.trainer.data_connector.get_profiled_train_dataloader(train_dataloader)
+
+        with self.trainer.profiler.profile("run_training_epoch"):
+            # run train epoch
+            epoch_output = self.epoch_loop.run(train_dataloader)
+
+            if epoch_output is None:
+                return
+
+            # the global step is manually decreased here due to backwards compatibility with existing loggers
+            # as they expect that the same step is used when logging epoch end metrics even when the batch loop has
+            # finished. this means the attribute does not exactly track the number of optimizer steps applied.
+            # TODO(@carmocca): deprecate and rename so users don't get confused
+            self.global_step -= 1
+            # log epoch metrics
+            self.trainer.logger_connector.update_train_epoch_metrics()
+            self.global_step += 1
+
+    def on_advance_end(self) -> None:
+        """Updates the LR schedulers and does some internal bookkeeping"""
+        if self.epoch_loop.batches_seen == 0:
+            return
+
+        self.epoch_loop.update_lr_schedulers('epoch', update_plateau_schedulers=True)
+
+    def on_run_end(self) -> None:
+        """Calls the ``on_train_end`` hook"""
+        # NOTE: the iteration_count/current_epoch is already incremented
+        # Lightning today does not increment the current epoch at the last epoch run in Trainer.fit
+        # To simulate that current behavior, we decrement here.
+        # TODO: must be fixed by https://github.com/PyTorchLightning/pytorch-lightning/issues/5007
+        self.current_epoch -= 1
+
+        # hook
+        self.trainer.call_hook("on_train_end")
+
+        # todo: TPU 8 cores hangs in flush with TensorBoard. Might do for all loggers.
+        # It might be related to xla tensors blocked when moving the cpu
+        # kill loggers
+        if self.trainer.logger is not None:
+            self.trainer.logger.finalize("success")
+
+        # summarize profile results
+        self.trainer.profiler.describe()
+
+        # give accelerators a chance to finish
+        self.trainer.accelerator.on_train_end()
+
+    def should_accumulate(self) -> bool:
+        """Whether the gradients should be accumulated"""
+        return self.epoch_loop.batch_loop.should_accumulate()
+
+    def state_dict(self) -> Dict:
+        return {"epoch_loop": self.epoch_loop.state_dict()}
+
+    def load_state_dict(self, state_dict: Dict) -> None:
+        self.epoch_loop.load_state_dict(state_dict["epoch_loop"])
+
+    def teardown(self) -> None:
+        self.epoch_loop.teardown()
diff --git a/pytorch_lightning/metrics/__init__.py b/pytorch_lightning/metrics/__init__.py
index 9b27fdf0cb253..da682e4840489 100644
--- a/pytorch_lightning/metrics/__init__.py
+++ b/pytorch_lightning/metrics/__init__.py
@@ -38,9 +38,3 @@
     R2Score,
     SSIM,
 )
-from pytorch_lightning.utilities import rank_zero_deprecation
-
-rank_zero_deprecation(
-    "`pytorch_lightning.metrics.*` module has been renamed to `torchmetrics.*` and split off to its own package"
-    " (https://github.com/PyTorchLightning/metrics) since v1.3 and will be removed in v1.5"
-)
diff --git a/pytorch_lightning/metrics/classification/accuracy.py b/pytorch_lightning/metrics/classification/accuracy.py
index 53a16a2a270d7..cf99bc5940a8f 100644
--- a/pytorch_lightning/metrics/classification/accuracy.py
+++ b/pytorch_lightning/metrics/classification/accuracy.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import Accuracy as _Accuracy
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class Accuracy(_Accuracy):
@@ -37,4 +37,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.Accuracy`. Will be removed in v1.5.0.
         """
-        _ = threshold, top_k, subset_accuracy, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        void(threshold, top_k, subset_accuracy, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
diff --git a/pytorch_lightning/metrics/classification/auc.py b/pytorch_lightning/metrics/classification/auc.py
index 917810d57b5dd..42813620758a5 100644
--- a/pytorch_lightning/metrics/classification/auc.py
+++ b/pytorch_lightning/metrics/classification/auc.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import AUC as _AUC
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class AUC(_AUC):
@@ -35,4 +35,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.AUC`. Will be removed in v1.5.0.
         """
-        _ = reorder, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        void(reorder, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
diff --git a/pytorch_lightning/metrics/classification/auroc.py b/pytorch_lightning/metrics/classification/auroc.py
index 78fb722ddf610..d1e797d956483 100644
--- a/pytorch_lightning/metrics/classification/auroc.py
+++ b/pytorch_lightning/metrics/classification/auroc.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import AUROC as _AUROC
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class AUROC(_AUROC):
@@ -38,4 +38,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.AUROC`. Will be removed in v1.5.0.
         """
-        _ = num_classes, pos_label, average, max_fpr, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        void(num_classes, pos_label, average, max_fpr, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
diff --git a/pytorch_lightning/metrics/classification/average_precision.py b/pytorch_lightning/metrics/classification/average_precision.py
index d7e0d3d387d39..fdb1b26178304 100644
--- a/pytorch_lightning/metrics/classification/average_precision.py
+++ b/pytorch_lightning/metrics/classification/average_precision.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import AveragePrecision as _AveragePrecision
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class AveragePrecision(_AveragePrecision):
@@ -35,4 +35,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.AveragePrecision`. Will be removed in v1.5.0.
         """
-        _ = num_classes, pos_label, compute_on_step, dist_sync_on_step, process_group
+        void(num_classes, pos_label, compute_on_step, dist_sync_on_step, process_group)
diff --git a/pytorch_lightning/metrics/classification/confusion_matrix.py b/pytorch_lightning/metrics/classification/confusion_matrix.py
index 7a4673b9a8495..e77df43e63524 100644
--- a/pytorch_lightning/metrics/classification/confusion_matrix.py
+++ b/pytorch_lightning/metrics/classification/confusion_matrix.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import ConfusionMatrix as _ConfusionMatrix
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class ConfusionMatrix(_ConfusionMatrix):
@@ -36,4 +36,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.ConfusionMatrix`. Will be removed in v1.5.0.
         """
-        _ = num_classes, normalize, threshold, compute_on_step, dist_sync_on_step, process_group
+        void(num_classes, normalize, threshold, compute_on_step, dist_sync_on_step, process_group)
diff --git a/pytorch_lightning/metrics/classification/f_beta.py b/pytorch_lightning/metrics/classification/f_beta.py
index 57c5d67c6a5f1..58a50f163b08a 100644
--- a/pytorch_lightning/metrics/classification/f_beta.py
+++ b/pytorch_lightning/metrics/classification/f_beta.py
@@ -16,12 +16,12 @@
 from torchmetrics import F1 as _F1
 from torchmetrics import FBeta as _FBeta
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class FBeta(_FBeta):
 
-    @deprecated_metrics(target=_FBeta)
+    @deprecated_metrics(target=_FBeta, args_mapping={"multilabel": None})
     def __init__(
         self,
         num_classes: int,
@@ -44,7 +44,7 @@ def __init__(
 
 class F1(_F1):
 
-    @deprecated_metrics(target=_F1)
+    @deprecated_metrics(target=_F1, args_mapping={"multilabel": None})
     def __init__(
         self,
         num_classes: int,
@@ -61,4 +61,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.F1`. Will be removed in v1.5.0.
         """
-        _ = num_classes, threshold, average, multilabel, compute_on_step, dist_sync_on_step, process_group
+        void(num_classes, threshold, average, multilabel, compute_on_step, dist_sync_on_step, process_group)
diff --git a/pytorch_lightning/metrics/classification/hamming_distance.py b/pytorch_lightning/metrics/classification/hamming_distance.py
index c06755d6c6c39..134bc33cf1267 100644
--- a/pytorch_lightning/metrics/classification/hamming_distance.py
+++ b/pytorch_lightning/metrics/classification/hamming_distance.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import HammingDistance as _HammingDistance
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class HammingDistance(_HammingDistance):
@@ -35,4 +35,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.HammingDistance`. Will be removed in v1.5.0.
         """
-        _ = threshold, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        void(threshold, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
diff --git a/pytorch_lightning/metrics/classification/iou.py b/pytorch_lightning/metrics/classification/iou.py
index 5fe8e4f11401d..00168b1924821 100644
--- a/pytorch_lightning/metrics/classification/iou.py
+++ b/pytorch_lightning/metrics/classification/iou.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import IoU as _IoU
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class IoU(_IoU):
@@ -38,5 +38,7 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.IoU`. Will be removed in v1.5.0.
         """
-        _ = num_classes, ignore_index, absent_score, threshold, reduction, \
-            compute_on_step, dist_sync_on_step, process_group
+        void(
+            num_classes, ignore_index, absent_score, threshold, reduction, compute_on_step, dist_sync_on_step,
+            process_group
+        )
diff --git a/pytorch_lightning/metrics/classification/precision_recall.py b/pytorch_lightning/metrics/classification/precision_recall.py
index b40c5a0c627e0..6507f6d071000 100644
--- a/pytorch_lightning/metrics/classification/precision_recall.py
+++ b/pytorch_lightning/metrics/classification/precision_recall.py
@@ -16,12 +16,12 @@
 from torchmetrics import Precision as _Precision
 from torchmetrics import Recall as _Recall
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class Precision(_Precision):
 
-    @deprecated_metrics(target=_Precision)
+    @deprecated_metrics(target=_Precision, args_mapping={"multilabel": None, "is_multiclass": None})
     def __init__(
         self,
         num_classes: Optional[int] = None,
@@ -49,7 +49,7 @@ def __init__(
 
 class Recall(_Recall):
 
-    @deprecated_metrics(target=_Recall)
+    @deprecated_metrics(target=_Recall, args_mapping={"multilabel": None, "is_multiclass": None})
     def __init__(
         self,
         num_classes: Optional[int] = None,
@@ -71,3 +71,7 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.Recall`. Will be removed in v1.5.0.
         """
+        void(
+            num_classes, threshold, average, multilabel, mdmc_average, ignore_index, top_k, is_multiclass,
+            compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        )
diff --git a/pytorch_lightning/metrics/classification/precision_recall_curve.py b/pytorch_lightning/metrics/classification/precision_recall_curve.py
index a1a7f0dc665cd..c51650663073c 100644
--- a/pytorch_lightning/metrics/classification/precision_recall_curve.py
+++ b/pytorch_lightning/metrics/classification/precision_recall_curve.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import PrecisionRecallCurve as _PrecisionRecallCurve
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class PrecisionRecallCurve(_PrecisionRecallCurve):
@@ -35,4 +35,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.PrecisionRecallCurve`. Will be removed in v1.5.0.
         """
-        _ = num_classes, pos_label, compute_on_step, dist_sync_on_step, process_group
+        void(num_classes, pos_label, compute_on_step, dist_sync_on_step, process_group)
diff --git a/pytorch_lightning/metrics/classification/roc.py b/pytorch_lightning/metrics/classification/roc.py
index 55a2782e0408c..824d2a22c3951 100644
--- a/pytorch_lightning/metrics/classification/roc.py
+++ b/pytorch_lightning/metrics/classification/roc.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import ROC as _ROC
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class ROC(_ROC):
@@ -35,4 +35,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.ROC`. Will be removed in v1.5.0.
         """
-        _ = num_classes, pos_label, compute_on_step, dist_sync_on_step, process_group
+        void(num_classes, pos_label, compute_on_step, dist_sync_on_step, process_group)
diff --git a/pytorch_lightning/metrics/classification/stat_scores.py b/pytorch_lightning/metrics/classification/stat_scores.py
index 94fde40a392ba..806ee73e176dc 100644
--- a/pytorch_lightning/metrics/classification/stat_scores.py
+++ b/pytorch_lightning/metrics/classification/stat_scores.py
@@ -15,12 +15,12 @@
 
 from torchmetrics import StatScores as _StatScores
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class StatScores(_StatScores):
 
-    @deprecated_metrics(target=_StatScores)
+    @deprecated_metrics(target=_StatScores, args_mapping={"is_multiclass": None})
     def __init__(
         self,
         threshold: float = 0.5,
@@ -41,5 +41,7 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.StatScores`. Will be removed in v1.5.0.
         """
-        _ = threshold, top_k, reduce, num_classes, ignore_index, mdmc_reduce, is_multiclass, compute_on_step, \
+        void(
+            threshold, top_k, reduce, num_classes, ignore_index, mdmc_reduce, is_multiclass, compute_on_step,
             dist_sync_on_step, process_group, dist_sync_fn
+        )
diff --git a/pytorch_lightning/metrics/compositional.py b/pytorch_lightning/metrics/compositional.py
index 56bb1912e48e6..01189e129d69d 100644
--- a/pytorch_lightning/metrics/compositional.py
+++ b/pytorch_lightning/metrics/compositional.py
@@ -17,7 +17,7 @@
 from torchmetrics import Metric
 from torchmetrics.metric import CompositionalMetric as _CompositionalMetric
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class CompositionalMetric(_CompositionalMetric):
@@ -33,3 +33,4 @@ def __init__(
         .. deprecated::
             Use :class:`torchmetrics.metric.CompositionalMetric`. Will be removed in v1.5.0.
         """
+        void(operator, metric_a, metric_b)
diff --git a/pytorch_lightning/metrics/functional/accuracy.py b/pytorch_lightning/metrics/functional/accuracy.py
index 69fa9d75590e0..0dddcb37676e0 100644
--- a/pytorch_lightning/metrics/functional/accuracy.py
+++ b/pytorch_lightning/metrics/functional/accuracy.py
@@ -16,7 +16,7 @@
 import torch
 from torchmetrics.functional import accuracy as _accuracy
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_accuracy)
@@ -31,3 +31,4 @@ def accuracy(
     .. deprecated::
         Use :func:`torchmetrics.functional.accuracy`. Will be removed in v1.5.0.
     """
+    return void(preds, target, threshold, top_k, subset_accuracy)
diff --git a/pytorch_lightning/metrics/functional/auc.py b/pytorch_lightning/metrics/functional/auc.py
index 7cc6aa458d397..f8b43e47d6eca 100644
--- a/pytorch_lightning/metrics/functional/auc.py
+++ b/pytorch_lightning/metrics/functional/auc.py
@@ -14,7 +14,7 @@
 import torch
 from torchmetrics.functional import auc as _auc
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_auc)
@@ -23,3 +23,4 @@ def auc(x: torch.Tensor, y: torch.Tensor, reorder: bool = False) -> torch.Tensor
     .. deprecated::
         Use :func:`torchmetrics.functional.auc`. Will be removed in v1.5.0.
     """
+    return void(x, y, reorder)
diff --git a/pytorch_lightning/metrics/functional/auroc.py b/pytorch_lightning/metrics/functional/auroc.py
index c49aa1a8fdc48..4815a2e88b410 100644
--- a/pytorch_lightning/metrics/functional/auroc.py
+++ b/pytorch_lightning/metrics/functional/auroc.py
@@ -16,7 +16,7 @@
 import torch
 from torchmetrics.functional import auroc as _auroc
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_auroc)
@@ -33,3 +33,4 @@ def auroc(
     .. deprecated::
         Use :func:`torchmetrics.functional.auroc`. Will be removed in v1.5.0.
     """
+    return void(preds, target, num_classes, pos_label, average, max_fpr, sample_weights)
diff --git a/pytorch_lightning/metrics/functional/average_precision.py b/pytorch_lightning/metrics/functional/average_precision.py
index 017b34739a0f4..79712935ab70d 100644
--- a/pytorch_lightning/metrics/functional/average_precision.py
+++ b/pytorch_lightning/metrics/functional/average_precision.py
@@ -16,7 +16,7 @@
 import torch
 from torchmetrics.functional import average_precision as _average_precision
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_average_precision)
@@ -31,3 +31,4 @@ def average_precision(
     .. deprecated::
         Use :func:`torchmetrics.functional.average_precision`. Will be removed in v1.5.0.
     """
+    return void(preds, target, num_classes, pos_label, sample_weights)
diff --git a/pytorch_lightning/metrics/functional/confusion_matrix.py b/pytorch_lightning/metrics/functional/confusion_matrix.py
index 038bd8b49b730..2607abc49ec07 100644
--- a/pytorch_lightning/metrics/functional/confusion_matrix.py
+++ b/pytorch_lightning/metrics/functional/confusion_matrix.py
@@ -16,7 +16,7 @@
 import torch
 from torchmetrics.functional import confusion_matrix as _confusion_matrix
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_confusion_matrix)
@@ -31,3 +31,4 @@ def confusion_matrix(
     .. deprecated::
         Use :func:`torchmetrics.functional.confusion_matrix`. Will be removed in v1.5.0.
     """
+    return void(preds, target, num_classes, normalize, threshold)
diff --git a/pytorch_lightning/metrics/functional/explained_variance.py b/pytorch_lightning/metrics/functional/explained_variance.py
index 233a0851b8d56..7885c8e8b04a9 100644
--- a/pytorch_lightning/metrics/functional/explained_variance.py
+++ b/pytorch_lightning/metrics/functional/explained_variance.py
@@ -16,7 +16,7 @@
 import torch
 from torchmetrics.functional import explained_variance as _explained_variance
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_explained_variance)
@@ -29,3 +29,4 @@ def explained_variance(
     .. deprecated::
         Use :func:`torchmetrics.functional.explained_variance`. Will be removed in v1.5.0.
     """
+    return void(preds, target, multioutput)
diff --git a/pytorch_lightning/metrics/functional/f_beta.py b/pytorch_lightning/metrics/functional/f_beta.py
index 1130b700c6b8c..ed3d92e69ff23 100644
--- a/pytorch_lightning/metrics/functional/f_beta.py
+++ b/pytorch_lightning/metrics/functional/f_beta.py
@@ -17,10 +17,10 @@
 from torchmetrics.functional import f1 as _f1
 from torchmetrics.functional import fbeta as _fbeta
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
-@deprecated_metrics(target=_fbeta)
+@deprecated_metrics(target=_fbeta, args_mapping={"multilabel": None})
 def fbeta(
     preds: torch.Tensor,
     target: torch.Tensor,
@@ -34,9 +34,10 @@ def fbeta(
     .. deprecated::
         Use :func:`torchmetrics.functional.accuracy`. Will be removed in v1.5.0.
     """
+    return void(preds, target, num_classes, beta, threshold, average, multilabel)
 
 
-@deprecated_metrics(target=_f1)
+@deprecated_metrics(target=_f1, args_mapping={"multilabel": None})
 def f1(
     preds: torch.Tensor,
     target: torch.Tensor,
@@ -49,3 +50,4 @@ def f1(
     .. deprecated::
         Use :func:`torchmetrics.functional.f1`. Will be removed in v1.5.0.
     """
+    return void(preds, target, num_classes, threshold, average, multilabel)
diff --git a/pytorch_lightning/metrics/functional/hamming_distance.py b/pytorch_lightning/metrics/functional/hamming_distance.py
index 6a390e776f111..a501184dc3bbf 100644
--- a/pytorch_lightning/metrics/functional/hamming_distance.py
+++ b/pytorch_lightning/metrics/functional/hamming_distance.py
@@ -14,7 +14,7 @@
 import torch
 from torchmetrics.functional import hamming_distance as _hamming_distance
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_hamming_distance)
@@ -23,3 +23,4 @@ def hamming_distance(preds: torch.Tensor, target: torch.Tensor, threshold: float
     .. deprecated::
         Use :func:`torchmetrics.functional.hamming_distance`. Will be removed in v1.5.0.
     """
+    return void(preds, target, threshold)
diff --git a/pytorch_lightning/metrics/functional/image_gradients.py b/pytorch_lightning/metrics/functional/image_gradients.py
index e2151c5fc1d93..539dc5ee4c55e 100644
--- a/pytorch_lightning/metrics/functional/image_gradients.py
+++ b/pytorch_lightning/metrics/functional/image_gradients.py
@@ -16,7 +16,7 @@
 import torch
 from torchmetrics.functional import image_gradients as _image_gradients
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_image_gradients)
@@ -25,3 +25,4 @@ def image_gradients(img: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
     .. deprecated::
         Use :func:`torchmetrics.functional.image_gradients`. Will be removed in v1.5.0.
     """
+    return void(img)
diff --git a/pytorch_lightning/metrics/functional/iou.py b/pytorch_lightning/metrics/functional/iou.py
index 5554a5d77a355..d73310e4e8ea9 100644
--- a/pytorch_lightning/metrics/functional/iou.py
+++ b/pytorch_lightning/metrics/functional/iou.py
@@ -16,7 +16,7 @@
 import torch
 from torchmetrics.functional import iou as _iou
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 from pytorch_lightning.utilities.imports import _TORCHMETRICS_GREATER_EQUAL_0_3, _TORCHMETRICS_LOWER_THAN_0_3
 
 
@@ -35,3 +35,4 @@ def iou(
     .. deprecated::
         Use :func:`torchmetrics.functional.iou`. Will be removed in v1.5.0.
     """
+    return void(pred, target, ignore_index, absent_score, threshold, num_classes, reduction)
diff --git a/pytorch_lightning/metrics/functional/mean_absolute_error.py b/pytorch_lightning/metrics/functional/mean_absolute_error.py
index 219284d79d623..2ffd340196e81 100644
--- a/pytorch_lightning/metrics/functional/mean_absolute_error.py
+++ b/pytorch_lightning/metrics/functional/mean_absolute_error.py
@@ -15,7 +15,7 @@
 import torch
 from torchmetrics.functional import mean_absolute_error as _mean_absolute_error
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_mean_absolute_error)
@@ -24,3 +24,4 @@ def mean_absolute_error(preds: torch.Tensor, target: torch.Tensor) -> torch.Tens
     .. deprecated::
         Use :func:`torchmetrics.functional.mean_absolute_error`. Will be removed in v1.5.0.
     """
+    return void(preds, target)
diff --git a/pytorch_lightning/metrics/functional/mean_relative_error.py b/pytorch_lightning/metrics/functional/mean_relative_error.py
index 329fe040ebc7d..4d45b0d3e6141 100644
--- a/pytorch_lightning/metrics/functional/mean_relative_error.py
+++ b/pytorch_lightning/metrics/functional/mean_relative_error.py
@@ -15,7 +15,7 @@
 import torch
 from torchmetrics.functional.regression.mean_relative_error import mean_relative_error as _mean_relative_error
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_mean_relative_error)
@@ -24,3 +24,4 @@ def mean_relative_error(preds: torch.Tensor, target: torch.Tensor) -> torch.Tens
     .. deprecated::
         Use :func:`torchmetrics.functional.regression.mean_relative_error`. Will be removed in v1.5.0.
     """
+    return void(preds, target)
diff --git a/pytorch_lightning/metrics/functional/mean_squared_error.py b/pytorch_lightning/metrics/functional/mean_squared_error.py
index 5bbc0bb1c6a83..3ff06569f856d 100644
--- a/pytorch_lightning/metrics/functional/mean_squared_error.py
+++ b/pytorch_lightning/metrics/functional/mean_squared_error.py
@@ -15,7 +15,7 @@
 import torch
 from torchmetrics.functional import mean_squared_error as _mean_squared_error
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_mean_squared_error)
@@ -24,3 +24,4 @@ def mean_squared_error(preds: torch.Tensor, target: torch.Tensor) -> torch.Tenso
     .. deprecated::
         Use :func:`torchmetrics.functional.mean_squared_error`. Will be removed in v1.5.0.
     """
+    return void(preds, target)
diff --git a/pytorch_lightning/metrics/functional/mean_squared_log_error.py b/pytorch_lightning/metrics/functional/mean_squared_log_error.py
index 29786529381d5..b799ce4847f6e 100644
--- a/pytorch_lightning/metrics/functional/mean_squared_log_error.py
+++ b/pytorch_lightning/metrics/functional/mean_squared_log_error.py
@@ -15,7 +15,7 @@
 import torch
 from torchmetrics.functional import mean_squared_log_error as _mean_squared_log_error
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_mean_squared_log_error)
@@ -24,3 +24,4 @@ def mean_squared_log_error(preds: torch.Tensor, target: torch.Tensor) -> torch.T
     .. deprecated::
         Use :func:`torchmetrics.functional.mean_squared_log_error`. Will be removed in v1.5.0.
     """
+    return void(preds, target)
diff --git a/pytorch_lightning/metrics/functional/nlp.py b/pytorch_lightning/metrics/functional/nlp.py
index c59d7cf2b8976..3eaa5eff1cc5c 100644
--- a/pytorch_lightning/metrics/functional/nlp.py
+++ b/pytorch_lightning/metrics/functional/nlp.py
@@ -21,7 +21,7 @@
 import torch
 from torchmetrics.functional import bleu_score as _bleu_score
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_bleu_score)
@@ -35,3 +35,4 @@ def bleu_score(
     .. deprecated::
         Use :func:`torchmetrics.functional.bleu_score`. Will be removed in v1.5.0.
     """
+    return void(translate_corpus, reference_corpus, n_gram, smooth)
diff --git a/pytorch_lightning/metrics/functional/precision_recall.py b/pytorch_lightning/metrics/functional/precision_recall.py
index 7b6c8641b5829..367c9c9111f07 100644
--- a/pytorch_lightning/metrics/functional/precision_recall.py
+++ b/pytorch_lightning/metrics/functional/precision_recall.py
@@ -18,10 +18,10 @@
 from torchmetrics.functional import precision_recall as _precision_recall
 from torchmetrics.functional import recall as _recall
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
-@deprecated_metrics(target=_precision)
+@deprecated_metrics(target=_precision, args_mapping={"is_multiclass": None})
 def precision(
     preds: torch.Tensor,
     target: torch.Tensor,
@@ -37,9 +37,10 @@ def precision(
     .. deprecated::
         Use :func:`torchmetrics.functional.precision`. Will be removed in v1.5.0.
     """
+    return void(preds, target, average, mdmc_average, ignore_index, num_classes, threshold, top_k, is_multiclass)
 
 
-@deprecated_metrics(target=_recall)
+@deprecated_metrics(target=_recall, args_mapping={"is_multiclass": None})
 def recall(
     preds: torch.Tensor,
     target: torch.Tensor,
@@ -55,9 +56,10 @@ def recall(
     .. deprecated::
         Use :func:`torchmetrics.functional.accuracy`. Will be removed in v1.5.0.
     """
+    return void(preds, target, average, mdmc_average, ignore_index, num_classes, threshold, top_k, is_multiclass)
 
 
-@deprecated_metrics(target=_precision_recall)
+@deprecated_metrics(target=_precision_recall, args_mapping={"is_multiclass": None})
 def precision_recall(
     preds: torch.Tensor,
     target: torch.Tensor,
@@ -73,3 +75,4 @@ def precision_recall(
     .. deprecated::
         Use :func:`torchmetrics.functional.precision_recall`. Will be removed in v1.5.0.
     """
+    return void(preds, target, average, mdmc_average, ignore_index, num_classes, threshold, top_k, is_multiclass)
diff --git a/pytorch_lightning/metrics/functional/precision_recall_curve.py b/pytorch_lightning/metrics/functional/precision_recall_curve.py
index dc9863cbb47c4..58d35557cce11 100644
--- a/pytorch_lightning/metrics/functional/precision_recall_curve.py
+++ b/pytorch_lightning/metrics/functional/precision_recall_curve.py
@@ -16,7 +16,7 @@
 import torch
 from torchmetrics.functional import precision_recall_curve as _precision_recall_curve
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_precision_recall_curve)
@@ -32,3 +32,4 @@ def precision_recall_curve(
     .. deprecated::
         Use :func:`torchmetrics.functional.accuracy`. Will be removed in v1.5.0.
     """
+    return void(preds, target, num_classes, pos_label, sample_weights)
diff --git a/pytorch_lightning/metrics/functional/psnr.py b/pytorch_lightning/metrics/functional/psnr.py
index 51be9d47b91f9..df54ae17adb32 100644
--- a/pytorch_lightning/metrics/functional/psnr.py
+++ b/pytorch_lightning/metrics/functional/psnr.py
@@ -16,7 +16,7 @@
 import torch
 from torchmetrics.functional import psnr as _psnr
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_psnr)
@@ -32,3 +32,4 @@ def psnr(
     .. deprecated::
         Use :func:`torchmetrics.functional.psnr`. Will be removed in v1.5.0.
     """
+    return void(preds, target, data_range, base, reduction, dim)
diff --git a/pytorch_lightning/metrics/functional/r2score.py b/pytorch_lightning/metrics/functional/r2score.py
index fe4b541989358..d5df9c2bfb4a3 100644
--- a/pytorch_lightning/metrics/functional/r2score.py
+++ b/pytorch_lightning/metrics/functional/r2score.py
@@ -15,7 +15,7 @@
 import torch
 from torchmetrics.functional import r2score as _r2score
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_r2score)
@@ -29,3 +29,4 @@ def r2score(
     .. deprecated::
         Use :func:`torchmetrics.functional.r2score`. Will be removed in v1.5.0.
     """
+    return void(preds, target, adjusted, multioutput)
diff --git a/pytorch_lightning/metrics/functional/roc.py b/pytorch_lightning/metrics/functional/roc.py
index 928a0b40fca54..f7d58af15e557 100644
--- a/pytorch_lightning/metrics/functional/roc.py
+++ b/pytorch_lightning/metrics/functional/roc.py
@@ -16,7 +16,7 @@
 from torch import Tensor
 from torchmetrics.functional import roc as _roc
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_roc)
@@ -31,3 +31,4 @@ def roc(
     .. deprecated::
         Use :func:`torchmetrics.functional.roc`. Will be removed in v1.5.0.
     """
+    return void(preds, target, num_classes, pos_label, sample_weights)
diff --git a/pytorch_lightning/metrics/functional/self_supervised.py b/pytorch_lightning/metrics/functional/self_supervised.py
index 65dec211e938a..5de4383683844 100644
--- a/pytorch_lightning/metrics/functional/self_supervised.py
+++ b/pytorch_lightning/metrics/functional/self_supervised.py
@@ -14,7 +14,7 @@
 import torch
 from torchmetrics.functional import embedding_similarity as _embedding_similarity
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_embedding_similarity)
@@ -28,3 +28,4 @@ def embedding_similarity(
     .. deprecated::
         Use :func:`torchmetrics.functional.embedding_similarity`. Will be removed in v1.5.0.
     """
+    return void(batch, similarity, reduction, zero_diagonal)
diff --git a/pytorch_lightning/metrics/functional/ssim.py b/pytorch_lightning/metrics/functional/ssim.py
index 31cff7fcfb9b4..2033520f011b0 100644
--- a/pytorch_lightning/metrics/functional/ssim.py
+++ b/pytorch_lightning/metrics/functional/ssim.py
@@ -16,7 +16,7 @@
 import torch
 from torchmetrics.functional import ssim as _ssim
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 @deprecated_metrics(target=_ssim)
@@ -34,3 +34,4 @@ def ssim(
     .. deprecated::
         Use :func:`torchmetrics.functional.ssim`. Will be removed in v1.5.0.
     """
+    return void(preds, target, kernel_size, sigma, reduction, data_range, k1, k2)
diff --git a/pytorch_lightning/metrics/functional/stat_scores.py b/pytorch_lightning/metrics/functional/stat_scores.py
index 30c03da237fe6..da654a54e3bf6 100644
--- a/pytorch_lightning/metrics/functional/stat_scores.py
+++ b/pytorch_lightning/metrics/functional/stat_scores.py
@@ -16,10 +16,10 @@
 import torch
 from torchmetrics.functional import stat_scores as _stat_scores
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
-@deprecated_metrics(target=_stat_scores)
+@deprecated_metrics(target=_stat_scores, args_mapping={"is_multiclass": None})
 def stat_scores(
     preds: torch.Tensor,
     target: torch.Tensor,
@@ -35,3 +35,4 @@ def stat_scores(
     .. deprecated::
         Use :func:`torchmetrics.functional.stat_scores`. Will be removed in v1.5.0.
     """
+    return void(preds, target, reduce, mdmc_reduce, num_classes, top_k, threshold, is_multiclass, ignore_index)
diff --git a/pytorch_lightning/metrics/metric.py b/pytorch_lightning/metrics/metric.py
index ee0fcdb8a92e1..e5fc0866d7e8d 100644
--- a/pytorch_lightning/metrics/metric.py
+++ b/pytorch_lightning/metrics/metric.py
@@ -16,7 +16,7 @@
 from torchmetrics import Metric as _Metric
 from torchmetrics.collections import MetricCollection as _MetricCollection
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class Metric(_Metric):
@@ -33,6 +33,7 @@ def __init__(
         .. deprecated::
             Use :class:`torchmetrics.Metric`. Will be removed in v1.5.0.
         """
+        void(compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
 
 
 class MetricCollection(_MetricCollection):
@@ -43,3 +44,4 @@ def __init__(self, metrics: Union[List[Metric], Tuple[Metric], Dict[str, Metric]
         .. deprecated::
             Use :class:`torchmetrics.MetricCollection`. Will be removed in v1.5.0.
         """
+        void(metrics)
diff --git a/pytorch_lightning/metrics/regression/explained_variance.py b/pytorch_lightning/metrics/regression/explained_variance.py
index 50c620b82f87f..64a0eaa1a171b 100644
--- a/pytorch_lightning/metrics/regression/explained_variance.py
+++ b/pytorch_lightning/metrics/regression/explained_variance.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import ExplainedVariance as _ExplainedVariance
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class ExplainedVariance(_ExplainedVariance):
@@ -35,4 +35,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.ExplainedVariance`. Will be removed in v1.5.0.
         """
-        _ = multioutput, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        void(multioutput, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
diff --git a/pytorch_lightning/metrics/regression/mean_absolute_error.py b/pytorch_lightning/metrics/regression/mean_absolute_error.py
index 493294a2811b9..c0744bd1c2fa5 100644
--- a/pytorch_lightning/metrics/regression/mean_absolute_error.py
+++ b/pytorch_lightning/metrics/regression/mean_absolute_error.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import MeanAbsoluteError as _MeanAbsoluteError
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class MeanAbsoluteError(_MeanAbsoluteError):
@@ -34,4 +34,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.MeanAbsoluteError`. Will be removed in v1.5.0.
         """
-        _ = compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        void(compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
diff --git a/pytorch_lightning/metrics/regression/mean_squared_error.py b/pytorch_lightning/metrics/regression/mean_squared_error.py
index ec2b34c4fd86e..bececd5633ea5 100644
--- a/pytorch_lightning/metrics/regression/mean_squared_error.py
+++ b/pytorch_lightning/metrics/regression/mean_squared_error.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import MeanSquaredError as _MeanSquaredError
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class MeanSquaredError(_MeanSquaredError):
@@ -34,4 +34,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.MeanSquaredError`. Will be removed in v1.5.0.
         """
-        _ = compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        void(compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
diff --git a/pytorch_lightning/metrics/regression/mean_squared_log_error.py b/pytorch_lightning/metrics/regression/mean_squared_log_error.py
index b95ee46ea3fca..be010de4483d9 100644
--- a/pytorch_lightning/metrics/regression/mean_squared_log_error.py
+++ b/pytorch_lightning/metrics/regression/mean_squared_log_error.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import MeanSquaredLogError as _MeanSquaredLogError
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class MeanSquaredLogError(_MeanSquaredLogError):
@@ -34,4 +34,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.MeanSquaredLogError`. Will be removed in v1.5.0.
         """
-        _ = compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        void(compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
diff --git a/pytorch_lightning/metrics/regression/psnr.py b/pytorch_lightning/metrics/regression/psnr.py
index d81cfb3035f3b..d647fae2622fe 100644
--- a/pytorch_lightning/metrics/regression/psnr.py
+++ b/pytorch_lightning/metrics/regression/psnr.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import PSNR as _PSNR
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class PSNR(_PSNR):
@@ -37,4 +37,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.PSNR`. Will be removed in v1.5.0.
         """
-        _ = data_range, base, reduction, dim, compute_on_step, dist_sync_on_step, process_group
+        void(data_range, base, reduction, dim, compute_on_step, dist_sync_on_step, process_group)
diff --git a/pytorch_lightning/metrics/regression/r2score.py b/pytorch_lightning/metrics/regression/r2score.py
index 7ec2f9a586c69..93b986b8620a5 100644
--- a/pytorch_lightning/metrics/regression/r2score.py
+++ b/pytorch_lightning/metrics/regression/r2score.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import R2Score as _R2Score
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class R2Score(_R2Score):
@@ -37,4 +37,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.R2Score`. Will be removed in v1.5.0.
         """
-        _ = num_outputs, adjusted, multioutput, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn
+        void(num_outputs, adjusted, multioutput, compute_on_step, dist_sync_on_step, process_group, dist_sync_fn)
diff --git a/pytorch_lightning/metrics/regression/ssim.py b/pytorch_lightning/metrics/regression/ssim.py
index 2ea8872e6ad53..e1d2e575cd882 100644
--- a/pytorch_lightning/metrics/regression/ssim.py
+++ b/pytorch_lightning/metrics/regression/ssim.py
@@ -15,7 +15,7 @@
 
 from torchmetrics import SSIM as _SSIM
 
-from pytorch_lightning.metrics.utils import deprecated_metrics
+from pytorch_lightning.metrics.utils import deprecated_metrics, void
 
 
 class SSIM(_SSIM):
@@ -39,4 +39,4 @@ def __init__(
         .. deprecated::
             Use :class:`~torchmetrics.SSIM`. Will be removed in v1.5.0.
         """
-        _ = kernel_size, sigma, reduction, data_range, k1, k2, compute_on_step, dist_sync_on_step, process_group
+        void(kernel_size, sigma, reduction, data_range, k1, k2, compute_on_step, dist_sync_on_step, process_group)
diff --git a/pytorch_lightning/metrics/utils.py b/pytorch_lightning/metrics/utils.py
index 30c2975c924d1..dd58e59751eb3 100644
--- a/pytorch_lightning/metrics/utils.py
+++ b/pytorch_lightning/metrics/utils.py
@@ -15,7 +15,7 @@
 from typing import Optional
 
 import torch
-from deprecate import deprecated
+from deprecate import deprecated, void
 from torchmetrics.utilities.data import dim_zero_cat as _dim_zero_cat
 from torchmetrics.utilities.data import dim_zero_mean as _dim_zero_mean
 from torchmetrics.utilities.data import dim_zero_sum as _dim_zero_sum
@@ -34,17 +34,17 @@
 
 @deprecated_metrics(target=_dim_zero_cat)
 def dim_zero_cat(x):
-    pass
+    return void(x)
 
 
 @deprecated_metrics(target=_dim_zero_sum)
 def dim_zero_sum(x):
-    pass
+    return void(x)
 
 
 @deprecated_metrics(target=_dim_zero_mean)
 def dim_zero_mean(x):
-    pass
+    return void(x)
 
 
 @deprecated_metrics(target=_to_onehot)
@@ -53,6 +53,7 @@ def to_onehot(label_tensor: torch.Tensor, num_classes: Optional[int] = None) ->
     .. deprecated::
         Use :func:`torchmetrics.utilities.data.to_onehot`. Will be removed in v1.5.0.
     """
+    return void(label_tensor, num_classes)
 
 
 @deprecated_metrics(target=_select_topk)
@@ -61,14 +62,16 @@ def select_topk(prob_tensor: torch.Tensor, topk: int = 1, dim: int = 1) -> torch
     .. deprecated::
         Use :func:`torchmetrics.utilities.data.select_topk`. Will be removed in v1.5.0.
     """
+    return void(prob_tensor, topk, dim)
 
 
-@deprecated_metrics(target=_to_categorical)
+@deprecated_metrics(target=_to_categorical, args_mapping={"tensor": "x"})
 def to_categorical(tensor: torch.Tensor, argmax_dim: int = 1) -> torch.Tensor:
     """
     .. deprecated::
         Use :func:`torchmetrics.utilities.data.to_categorical`. Will be removed in v1.5.0.
     """
+    return void(tensor, argmax_dim)
 
 
 @deprecated_metrics(target=_get_num_classes, skip_if=_TORCHMETRICS_GREATER_EQUAL_0_3)
@@ -78,6 +81,7 @@ def get_num_classes(pred: torch.Tensor, target: torch.Tensor, num_classes: Optio
     .. deprecated::
         Use :func:`torchmetrics.utilities.data.get_num_classes`. Will be removed in v1.5.0.
     """
+    return void(pred, target, num_classes)
 
 
 @deprecated_metrics(target=_reduce)
@@ -86,6 +90,7 @@ def reduce(to_reduce: torch.Tensor, reduction: str) -> torch.Tensor:
     .. deprecated::
         Use :func:`torchmetrics.utilities.reduce`. Will be removed in v1.5.0.
     """
+    return void(to_reduce, reduction)
 
 
 @deprecated_metrics(target=_class_reduce)
@@ -96,3 +101,4 @@ def class_reduce(
     .. deprecated::
         Use :func:`torchmetrics.utilities.class_reduce`. Will be removed in v1.5.0.
     """
+    return void(num, denom, weights, class_reduction)
diff --git a/pytorch_lightning/overrides/base.py b/pytorch_lightning/overrides/base.py
index 88e8ed6375e1b..3f396c3d602f8 100644
--- a/pytorch_lightning/overrides/base.py
+++ b/pytorch_lightning/overrides/base.py
@@ -11,17 +11,54 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Any, Union
+
 import torch
 from torch.nn import DataParallel
 from torch.nn.parallel import DistributedDataParallel
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin
 
 
+class _LightningPrecisionModuleWrapperBase(DeviceDtypeModuleMixin, torch.nn.Module):
+
+    def __init__(self, pl_module: 'pl.LightningModule') -> None:
+        """
+        Wraps the user's LightningModule. Requires overriding all ``*_step`` methods and ``forward`` so that it can
+        safely be wrapped by a ``_LightningModuleWrapperBase`` and a ``*DataParallel``.
+
+        Args:
+            pl_module: the model to wrap
+        """
+        super().__init__()
+        self.module = pl_module
+
+        # set the parameters_to_ignore from LightningModule.
+        self._ddp_params_and_buffers_to_ignore = getattr(pl_module, "_ddp_params_and_buffers_to_ignore", [])
+
+    def training_step(self, *args: Any, **kwargs: Any) -> Any:
+        raise NotImplementedError
+
+    def validation_step(self, *args: Any, **kwargs: Any) -> Any:
+        raise NotImplementedError
+
+    def test_step(self, *args: Any, **kwargs: Any) -> Any:
+        raise NotImplementedError
+
+    def predict_step(self, *args: Any, **kwargs: Any) -> Any:
+        raise NotImplementedError
+
+    def forward(self, *args: Any, **kwargs: Any) -> Any:
+        raise NotImplementedError
+
+    def on_post_move_to_device(self) -> None:
+        pass
+
+
 class _LightningModuleWrapperBase(DeviceDtypeModuleMixin, torch.nn.Module):
 
-    def __init__(self, pl_module: LightningModule):
+    def __init__(self, pl_module: Union['pl.LightningModule', _LightningPrecisionModuleWrapperBase]):
         """
         Wraps the user's LightningModule and redirects the forward call to the appropriate
         method, either ``training_step``, ``validation_step`` or ``test_step``.
@@ -39,8 +76,9 @@ def __init__(self, pl_module: LightningModule):
         # set the parameters_to_ignore from LightningModule.
         self._ddp_params_and_buffers_to_ignore = getattr(pl_module, "_ddp_params_and_buffers_to_ignore", [])
 
-    def forward(self, *inputs, **kwargs):
-        trainer = self.module.trainer
+    def forward(self, *inputs: Any, **kwargs: Any) -> Any:
+        lightning_module = unwrap_lightning_module(self.module)
+        trainer = lightning_module.trainer
 
         if trainer and trainer.training:
             output = self.module.training_step(*inputs, **kwargs)
@@ -49,7 +87,7 @@ def forward(self, *inputs, **kwargs):
             # it is done manually in ``LightningModule.manual_backward``
             # `require_backward_grad_sync` will be reset in the
             # ddp_plugin ``post_training_step`` hook
-            if not self.module.automatic_optimization:
+            if not lightning_module.automatic_optimization:
                 trainer.model.require_backward_grad_sync = False
         elif trainer and trainer.testing:
             output = self.module.test_step(*inputs, **kwargs)
@@ -62,14 +100,14 @@ def forward(self, *inputs, **kwargs):
 
         return output
 
-    def on_post_move_to_device(self):
+    def on_post_move_to_device(self) -> None:
         pass
 
 
-def unwrap_lightning_module(wrapped_model) -> LightningModule:
+def unwrap_lightning_module(wrapped_model) -> 'pl.LightningModule':
     model = wrapped_model
     if isinstance(model, (DistributedDataParallel, DataParallel)):
-        model = model.module
-    if isinstance(model, _LightningModuleWrapperBase):
-        model = model.module
+        model = unwrap_lightning_module(model.module)
+    if isinstance(model, (_LightningModuleWrapperBase, _LightningPrecisionModuleWrapperBase)):
+        model = unwrap_lightning_module(model.module)
     return model
diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 3d6e527ef95a9..57919db6ab221 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -17,7 +17,7 @@
 
 import torch
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.overrides.base import _LightningModuleWrapperBase
 from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.apply_func import apply_to_collection
@@ -53,7 +53,7 @@ class LightningParallelModule(_LightningModuleWrapperBase):
 
     """
 
-    def __init__(self, pl_module: LightningModule):
+    def __init__(self, pl_module: 'pl.LightningModule') -> None:
         super().__init__(pl_module)
         _ignore_scalar_return_in_dp()
 
diff --git a/pytorch_lightning/overrides/distributed.py b/pytorch_lightning/overrides/distributed.py
index d064040d8e019..71ed9c8018ec3 100644
--- a/pytorch_lightning/overrides/distributed.py
+++ b/pytorch_lightning/overrides/distributed.py
@@ -18,13 +18,13 @@
 from torch.nn.parallel import DistributedDataParallel
 from torch.utils.data import BatchSampler, DistributedSampler, Sampler
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.overrides.base import _LightningModuleWrapperBase
 
 
 class LightningDistributedModule(_LightningModuleWrapperBase):
 
-    def __init__(self, pl_module: LightningModule):
+    def __init__(self, pl_module: 'pl.LightningModule') -> None:
         """
         Wraps the user's LightningModule and redirects the forward call to the appropriate
         method, either ``training_step``, ``validation_step``, ``test_step`` or ``predict``.
@@ -63,6 +63,9 @@ def _find_tensors(obj):  # pragma: no-cover
 # Note: Keep track of Pytorch DDP and update if there is a change
 # https://github.com/pytorch/pytorch/blob/v1.7.1/torch/nn/parallel/distributed.py#L626-L638
 def prepare_for_backward(model: DistributedDataParallel, output: Any):
+    # `prepare_for_backward` is `DistributedDataParallel` specific.
+    if not isinstance(model, DistributedDataParallel):
+        return
     if torch.is_grad_enabled() and model.require_backward_grad_sync:
         model.require_forward_param_sync = True
         # We'll return the output object verbatim since it is a freeform
@@ -132,6 +135,9 @@ def __iter__(self) -> Iterator[List[int]]:
             self.batch_indices = batch
             yield batch
 
+    def __len__(self) -> int:
+        return len(self._sampler)
+
     @property
     def drop_last(self) -> bool:
         return self._sampler.drop_last
diff --git a/pytorch_lightning/overrides/fairscale.py b/pytorch_lightning/overrides/fairscale.py
index f7c3b8d5fd575..e531db6de77f3 100644
--- a/pytorch_lightning/overrides/fairscale.py
+++ b/pytorch_lightning/overrides/fairscale.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.overrides.base import _LightningModuleWrapperBase, unwrap_lightning_module
 from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE
 
@@ -23,7 +23,7 @@ class LightningShardedDataParallel(_LightningModuleWrapperBase):
         # Just do this for later docstrings
         pass
 
-    def unwrap_lightning_module_sharded(wrapped_model) -> LightningModule:
+    def unwrap_lightning_module_sharded(wrapped_model) -> 'pl.LightningModule':
         model = wrapped_model
         if isinstance(model, ShardedDataParallel):
             model = model.module
diff --git a/pytorch_lightning/plugins/__init__.py b/pytorch_lightning/plugins/__init__.py
index 58d43dc54cb7f..f620ee28afe9a 100644
--- a/pytorch_lightning/plugins/__init__.py
+++ b/pytorch_lightning/plugins/__init__.py
@@ -9,6 +9,7 @@
 from pytorch_lightning.plugins.precision.fully_sharded_native_amp import (  # noqa: F401
     FullyShardedNativeMixedPrecisionPlugin,
 )
+from pytorch_lightning.plugins.precision.ipu_precision import IPUPrecisionPlugin  # noqa: F401
 from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin  # noqa: F401
 from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin  # noqa: F401
 from pytorch_lightning.plugins.precision.sharded_native_amp import ShardedNativeMixedPrecisionPlugin  # noqa: F401
@@ -20,9 +21,8 @@
 from pytorch_lightning.plugins.training_type.dp import DataParallelPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.fully_sharded import DDPFullyShardedPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.horovod import HorovodPlugin  # noqa: F401
+from pytorch_lightning.plugins.training_type.ipu import IPUPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin  # noqa: F401
-from pytorch_lightning.plugins.training_type.rpc import RPCPlugin  # noqa: F401
-from pytorch_lightning.plugins.training_type.rpc_sequential import RPCSequentialPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.sharded import DDPShardedPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.sharded_spawn import DDPSpawnShardedPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.single_device import SingleDevicePlugin  # noqa: F401
@@ -41,6 +41,8 @@
     "DeepSpeedPrecisionPlugin",
     "DoublePrecisionPlugin",
     "HorovodPlugin",
+    "IPUPlugin",
+    "IPUPrecisionPlugin",
     "NativeMixedPrecisionPlugin",
     "PrecisionPlugin",
     "ShardedNativeMixedPrecisionPlugin",
@@ -49,8 +51,6 @@
     "SingleTPUPlugin",
     "TPUHalfPrecisionPlugin",
     "TPUSpawnPlugin",
-    "RPCPlugin",
-    "RPCSequentialPlugin",
     "TrainingTypePlugin",
     "ParallelPlugin",
     "Plugin",
diff --git a/pytorch_lightning/plugins/precision/apex_amp.py b/pytorch_lightning/plugins/precision/apex_amp.py
index 71c2119e734fd..b2565e7dd34b4 100644
--- a/pytorch_lightning/plugins/precision/apex_amp.py
+++ b/pytorch_lightning/plugins/precision/apex_amp.py
@@ -11,14 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Callable, ContextManager, Sequence
+from typing import Any, Callable, ContextManager, Dict, Sequence
 
 import torch
 from torch import Tensor
 from torch.optim import Optimizer
 
 import pytorch_lightning as pl
-from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.plugins.precision.mixed import MixedPrecisionPlugin
 from pytorch_lightning.utilities import _APEX_AVAILABLE, AMPType
 from pytorch_lightning.utilities.types import _PARAMETERS
@@ -39,7 +38,7 @@ def __init__(self, amp_level: str = "O2") -> None:
     def master_params(self, optimizer: Optimizer) -> _PARAMETERS:
         return amp.master_params(optimizer)
 
-    def dispatch(self, trainer: "pl.Trainer") -> None:
+    def dispatch(self, trainer: 'pl.Trainer') -> None:
         if not self._connected:
             accelerator = trainer.accelerator
             _, accelerator.optimizers = amp.initialize(
@@ -50,7 +49,7 @@ def dispatch(self, trainer: "pl.Trainer") -> None:
 
     def backward(
         self,
-        model: LightningModule,
+        model: 'pl.LightningModule',
         closure_loss: Tensor,
         optimizer: Optimizer,
         opt_idx: int,
@@ -76,7 +75,7 @@ def backward(
 
         # do backward pass
         # TODO: not entirely sure, why we need this
-        if model is not None and isinstance(model, LightningModule):
+        if model is not None and isinstance(model, pl.LightningModule):
             model.backward(closure_loss, optimizer, opt_idx, **kwargs)
 
             # TODO: avoid dev_debugger and track these calls with mock
@@ -118,7 +117,7 @@ def reinit_scheduler_properties(optimizers: Sequence[Optimizer], schedulers: Seq
 
     def pre_optimizer_step(
         self,
-        pl_module: LightningModule,
+        pl_module: 'pl.LightningModule',
         optimizer: Optimizer,
         optimizer_idx: int,
         lambda_closure: Callable,
@@ -135,3 +134,10 @@ def pre_optimizer_step(
 
         optimizer.step(**kwargs)
         return False
+
+    def on_load_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
+        if "amp_scaling_state" in checkpoint:
+            amp.load_state_dict(checkpoint["amp_scaling_state"])
+
+    def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
+        checkpoint["amp_scaling_state"] = amp.state_dict()
diff --git a/pytorch_lightning/plugins/precision/double.py b/pytorch_lightning/plugins/precision/double.py
index 6d985a0f4eb9d..387fac81c8614 100644
--- a/pytorch_lightning/plugins/precision/double.py
+++ b/pytorch_lightning/plugins/precision/double.py
@@ -12,28 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from contextlib import contextmanager
-from functools import wraps
-from typing import Any, Generator, List, Tuple
+from typing import Any, cast, Generator, List, Tuple
 
 import torch
 import torch.nn as nn
 from torch.optim import Optimizer
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
+from pytorch_lightning.overrides.base import _LightningPrecisionModuleWrapperBase
 from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 
 
-class _DoublePrecisionPatch:
-    """Class to handle patching of methods in the ``LightningModule`` and subsequent teardown."""
+class LightningDoublePrecisionModule(_LightningPrecisionModuleWrapperBase):
+    """
+    LightningModule wrapper which converts incoming floating point data in ``*_step`` and ``forward`` to double
+    (``torch.float64``) precision.
 
-    def __init__(self, model: nn.Module, method_name: str, old_method: Any) -> None:
-        self.model = model
-        self.method_name = method_name
-        self.old_method = old_method
-
-    def teardown(self) -> None:
-        setattr(self.model, self.method_name, self.old_method)
+    Args:
+        pl_module: the model to wrap
+    """
 
     @staticmethod
     def _to_double_precision(data: torch.Tensor) -> torch.Tensor:
@@ -43,55 +41,63 @@ def _to_double_precision(data: torch.Tensor) -> torch.Tensor:
 
     @staticmethod
     def _move_float_tensors_to_double(collection: Any) -> Any:
-        return apply_to_collection(collection, torch.Tensor, function=_DoublePrecisionPatch._to_double_precision)
-
-    @classmethod
-    def patch(cls, model: nn.Module, method_name: str) -> '_DoublePrecisionPatch':
-        old_method = getattr(model, method_name)
-
-        @wraps(old_method)
-        def new_method(*args: Any, **kwargs: Any) -> Any:
-            return old_method(
-                *_DoublePrecisionPatch._move_float_tensors_to_double(args),
-                **_DoublePrecisionPatch._move_float_tensors_to_double(kwargs)
-            )
-
-        setattr(model, method_name, new_method if callable(old_method) else old_method)
-        return cls(model, method_name, old_method)
+        return apply_to_collection(
+            collection,
+            torch.Tensor,
+            LightningDoublePrecisionModule._to_double_precision,
+        )
+
+    def training_step(self, *args: Any, **kwargs: Any) -> Any:
+        return self.module.training_step(
+            *LightningDoublePrecisionModule._move_float_tensors_to_double(args),
+            **LightningDoublePrecisionModule._move_float_tensors_to_double(kwargs),
+        )
+
+    def validation_step(self, *args: Any, **kwargs: Any) -> Any:
+        return self.module.validation_step(
+            *LightningDoublePrecisionModule._move_float_tensors_to_double(args),
+            **LightningDoublePrecisionModule._move_float_tensors_to_double(kwargs),
+        )
+
+    def test_step(self, *args: Any, **kwargs: Any) -> Any:
+        return self.module.test_step(
+            *LightningDoublePrecisionModule._move_float_tensors_to_double(args),
+            **LightningDoublePrecisionModule._move_float_tensors_to_double(kwargs),
+        )
+
+    def predict_step(self, *args: Any, **kwargs: Any) -> Any:
+        return self.module.predict_step(
+            *LightningDoublePrecisionModule._move_float_tensors_to_double(args),
+            **LightningDoublePrecisionModule._move_float_tensors_to_double(kwargs),
+        )
+
+    def forward(self, *args: Any, **kwargs: Any) -> Any:
+        return self.module(
+            *LightningDoublePrecisionModule._move_float_tensors_to_double(args),
+            **LightningDoublePrecisionModule._move_float_tensors_to_double(kwargs),
+        )
 
 
 class DoublePrecisionPlugin(PrecisionPlugin):
-    """Plugin for training with double (``torch.float64``) precision."""
+    """ Plugin for training with double (``torch.float64``) precision. """
 
     precision: int = 64
 
-    def __init__(self) -> None:
-        super().__init__()
-        self.patches: List[_DoublePrecisionPatch] = []
-
     def connect(
         self,
         model: nn.Module,
         optimizers: List[Optimizer],
         lr_schedulers: List[Any],
-    ) -> Tuple[nn.Module, List[Optimizer], List[Any]]:
-        """Converts the model to double precision and wraps the `training_step`, `validation_step`, `test_step`,
-        `predict_step`, and `forward` methods to convert incoming floating point data to double. Does not alter
-        `optimizers` or `lr_schedulers`."""
-        model = model.to(dtype=torch.float64)
-        if isinstance(model, LightningModule):
-            self.patches.append(_DoublePrecisionPatch.patch(model, 'training_step'))
-            self.patches.append(_DoublePrecisionPatch.patch(model, 'validation_step'))
-            self.patches.append(_DoublePrecisionPatch.patch(model, 'test_step'))
-            self.patches.append(_DoublePrecisionPatch.patch(model, 'predict_step'))
-        self.patches.append(_DoublePrecisionPatch.patch(model, 'forward'))
+    ) -> Tuple[nn.Module, List['Optimizer'], List[Any]]:
+        """Converts the model to double precision and wraps it in a ``LightningDoublePrecisionModule`` to convert
+        incoming floating point data to double (``torch.float64``) precision. Does not alter `optimizers` or
+        `lr_schedulers`.
+        """
+        model = cast(pl.LightningModule, model.double())
+        model = LightningDoublePrecisionModule(model)
 
         return super().connect(model, optimizers, lr_schedulers)
 
-    def post_dispatch(self) -> None:
-        while len(self.patches) > 0:
-            self.patches.pop().teardown()
-
     @contextmanager
     def train_step_context(self) -> Generator[None, None, None]:
         """
diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py
new file mode 100644
index 0000000000000..e6983966e166b
--- /dev/null
+++ b/pytorch_lightning/plugins/precision/ipu_precision.py
@@ -0,0 +1,60 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Union
+
+from torch import Tensor
+from torch.nn import Module
+from torch.optim import Optimizer
+
+import pytorch_lightning as pl
+from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin
+from pytorch_lightning.utilities import GradClipAlgorithmType
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+
+
+class IPUPrecisionPlugin(PrecisionPlugin):
+
+    def __init__(self, precision: int) -> None:
+        super().__init__()
+        self.precision = precision
+
+    def backward(
+        self,
+        model: 'pl.LightningModule',
+        closure_loss: Tensor,
+        optimizer: Optimizer,
+        opt_idx: int,
+        should_accumulate: bool,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Tensor:
+        # IPU internally manages bwd step.
+        return closure_loss
+
+    def clip_gradients(
+        self,
+        optimizer: Optimizer,
+        clip_val: Union[int, float],
+        gradient_clip_algorithm: GradClipAlgorithmType = GradClipAlgorithmType.NORM,
+        model: Optional[Module] = None
+    ) -> None:
+        """Clips the gradients"""
+        if clip_val is None:
+            return
+
+        clip_val = float(clip_val)
+        if clip_val <= 0:
+            return
+
+        raise MisconfigurationException("IPUs currently do not support clipping gradients.")
diff --git a/pytorch_lightning/plugins/precision/native_amp.py b/pytorch_lightning/plugins/precision/native_amp.py
index 994b7f26135ff..e25f46d9ec239 100644
--- a/pytorch_lightning/plugins/precision/native_amp.py
+++ b/pytorch_lightning/plugins/precision/native_amp.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from contextlib import contextmanager
-from typing import Any, Callable, Generator
+from typing import Any, Callable, Dict, Generator
 
 import torch
 from torch.optim import LBFGS, Optimizer
@@ -83,19 +83,21 @@ def pre_optimizer_step(
                 f"native PyTorch amp and lbfgs are not compatible (optimizer {optimizer_idx})."
                 " To request, please file a Github issue in PyTorch and tag @mcarilli"
             )
-        lambda_closure()
 
         if not pl_module.automatic_optimization:
             self.scaler.unscale_(optimizer)
             pl_module.trainer.call_hook("on_after_backward")
+            self.scaler.step(optimizer)
+            self.scaler.update()
+        else:
+            result = lambda_closure()
+            # lambda_closure returning None indicates that backward has been skipped
+            if result is not None:
+                self.scaler.step(optimizer)
+                self.scaler.update()
 
         return False
 
-    def post_optimizer_step(self, optimizer: Optimizer, optimizer_idx: int) -> None:
-        """Updates the GradScaler"""
-        self.scaler.step(optimizer)
-        self.scaler.update()
-
     @contextmanager
     def train_step_context(self) -> Generator[None, None, None]:
         """Enable autocast context"""
@@ -119,3 +121,10 @@ def predict_step_context(self) -> Generator[None, None, None]:
         """Enable autocast context"""
         with torch.cuda.amp.autocast():
             yield
+
+    def on_load_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
+        if "native_amp_scaling_state" in checkpoint:
+            self.scaler.load_state_dict(checkpoint["native_amp_scaling_state"])
+
+    def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
+        checkpoint["native_amp_scaling_state"] = self.scaler.state_dict()
diff --git a/pytorch_lightning/plugins/precision/precision_plugin.py b/pytorch_lightning/plugins/precision/precision_plugin.py
index a5488623dc592..e8dccbed741fa 100644
--- a/pytorch_lightning/plugins/precision/precision_plugin.py
+++ b/pytorch_lightning/plugins/precision/precision_plugin.py
@@ -19,12 +19,13 @@
 from torch.optim import Optimizer
 
 import pytorch_lightning as pl
+from pytorch_lightning.core.hooks import CheckpointHooks
 from pytorch_lightning.plugins.base_plugin import Plugin
 from pytorch_lightning.utilities import GradClipAlgorithmType
 from pytorch_lightning.utilities.types import _PARAMETERS
 
 
-class PrecisionPlugin(Plugin):
+class PrecisionPlugin(Plugin, CheckpointHooks):
     """
     Base class for all plugins handling the precision-specific parts of the training.
     The class attribute precision must be overwritten in child classes.
diff --git a/pytorch_lightning/plugins/training_type/__init__.py b/pytorch_lightning/plugins/training_type/__init__.py
index 3cb43e44f5565..6a56d68e17db9 100644
--- a/pytorch_lightning/plugins/training_type/__init__.py
+++ b/pytorch_lightning/plugins/training_type/__init__.py
@@ -6,8 +6,6 @@
 from pytorch_lightning.plugins.training_type.fully_sharded import DDPFullyShardedPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.horovod import HorovodPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin  # noqa: F401
-from pytorch_lightning.plugins.training_type.rpc import RPCPlugin  # noqa: F401
-from pytorch_lightning.plugins.training_type.rpc_sequential import RPCSequentialPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.sharded import DDPShardedPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.sharded_spawn import DDPSpawnShardedPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.single_device import SingleDevicePlugin  # noqa: F401
diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py
index e65a6512d3846..a882390b78b0d 100644
--- a/pytorch_lightning/plugins/training_type/ddp.py
+++ b/pytorch_lightning/plugins/training_type/ddp.py
@@ -13,14 +13,19 @@
 # limitations under the License.
 import logging
 import os
+import shutil
+import signal
 import subprocess
 import sys
+import tempfile
+import time
 from time import sleep
 from typing import Any, Dict, List, Optional, Union
 
+import __main__
 import numpy as np
 import torch
-import torch.distributed as torch_distrib
+import torch.distributed
 from torch.nn.parallel.distributed import DistributedDataParallel
 from torch.optim import Optimizer
 
@@ -36,8 +41,14 @@
     rank_zero_deprecation,
     rank_zero_warn,
 )
-from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp, sync_ddp_if_available
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.distributed import (
+    distributed_available,
+    rank_zero_info,
+    rank_zero_only,
+    ReduceOp,
+    sync_ddp_if_available,
+)
+from pytorch_lightning.utilities.exceptions import DeadlockDetectedException, MisconfigurationException
 from pytorch_lightning.utilities.seed import reset_seed
 
 if _HYDRA_AVAILABLE:
@@ -89,12 +100,18 @@ def __init__(
         self.num_processes = len(self.parallel_devices) if self.parallel_devices is not None else 0
         self._ddp_kwargs = kwargs
         self._has_spawned_children = False
-        self.task_idx = None
+        self._task_idx = None
         self._ddp_comm_state = ddp_comm_state
         self._ddp_comm_hook = ddp_comm_hook
         self._ddp_comm_wrapper = ddp_comm_wrapper
+        self._pids: Optional[List[int]] = None
+        self._sync_dir: Optional[str] = None
         self.set_world_ranks()
 
+    @property
+    def is_distributed(self) -> bool:
+        return True
+
     @property
     def root_device(self) -> torch.device:
         return self.parallel_devices[self.local_rank]
@@ -117,6 +134,18 @@ def sync_batchnorm(self) -> bool:
     def sync_batchnorm(self, sync_batchnorm: bool) -> None:
         self._sync_batchnorm = sync_batchnorm
 
+    @property
+    def task_idx(self) -> Optional[int]:
+        rank_zero_deprecation(
+            f'`{self.__class__.__name__}.task_idx` is deprecated in v1.4 and will be removed in v1.6. Use '
+            f'`{self.__class__.__name__}.local_rank` instead.'
+        )
+        return self._task_idx
+
+    @task_idx.setter
+    def task_idx(self, task_idx: int) -> None:
+        self._task_idx = task_idx
+
     @property
     def distributed_sampler_kwargs(self):
         distributed_sampler_kwargs = dict(num_replicas=(self.num_nodes * self.num_processes), rank=self.global_rank)
@@ -137,7 +166,6 @@ def setup_environment(self) -> None:
         self.setup_distributed()
 
     def _call_children_scripts(self):
-
         # bookkeeping of spawned processes
         assert self.local_rank == 0
         self._check_can_spawn_children()
@@ -151,19 +179,28 @@ def _call_children_scripts(self):
         os.environ["NODE_RANK"] = str(self.cluster_environment.node_rank())
         os.environ["LOCAL_RANK"] = str(self.cluster_environment.local_rank())
 
-        # when user is using hydra find the absolute path
-        path_lib = os.path.abspath if not _HYDRA_AVAILABLE else to_absolute_path
-
-        # pull out the commands used to run the script and resolve the abs file path
-        command = sys.argv
-        try:
-            full_path = path_lib(command[0])
-        except Exception:
-            full_path = os.path.abspath(command[0])
-
-        command[0] = full_path
-        # use the same python interpreter and actually running
-        command = [sys.executable] + command
+        # create a temporary directory used to synchronize processes on deadlock.
+        os.environ["PL_DDP_SYNC_TMPDIR"] = self._sync_dir = tempfile.mkdtemp()
+
+        # Check if the current calling command looked like `python a/b/c.py` or `python -m a.b.c`
+        # See https://docs.python.org/3/reference/import.html#main-spec
+        if __main__.__spec__ is None:  # pragma: no-cover
+            # Script called as `python a/b/c.py`
+            # when user is using hydra find the absolute path
+            path_lib = os.path.abspath if not _HYDRA_AVAILABLE else to_absolute_path
+
+            # pull out the commands used to run the script and resolve the abs file path
+            command = sys.argv
+            try:
+                full_path = path_lib(command[0])
+            except Exception:
+                full_path = os.path.abspath(command[0])
+
+            command[0] = full_path
+            # use the same python interpreter and actually running
+            command = [sys.executable] + command
+        else:  # Script called as `python -m a.b.c`
+            command = [sys.executable, "-m", __main__.__spec__.name] + sys.argv[1:]
 
         # the visible devices tell us how many GPUs we want to use.
         # when the trainer script was called the device has already been scoped by the time
@@ -172,11 +209,9 @@ def _call_children_scripts(self):
         if self.parallel_devices is None:
             raise MisconfigurationException("you selected (distribute_backend = ddp) but did not set Trainer(gpus=?)")
 
-        os.environ["PL_TRAINER_GPUS"] = ",".join([str(device.index) for device in self.parallel_devices])
         os.environ["PL_IN_DDP_SUBPROCESS"] = "1"
 
-        num_gpus = len(self.parallel_devices)
-        os.environ["WORLD_SIZE"] = f"{num_gpus * self.num_nodes}"
+        os.environ["WORLD_SIZE"] = f"{self.num_processes * self.num_nodes}"
 
         self.interactive_ddp_procs = []
 
@@ -222,13 +257,6 @@ def setup_distributed(self):
         # where to store ip_table
         self.init_ddp_connection()
 
-        # on world_size=0 let everyone know training is starting
-        if self.is_global_zero and not torch.distributed.is_initialized():
-            log.info("-" * 100)
-            log.info(f"distributed_backend={self.distributed_backend}")
-            log.info(f"All DDP processes registered. Starting ddp with {self.world_size} processes")
-            log.info("-" * 100)
-
         # set the ranks and devices
         self.dist.rank = self.global_rank
         self.dist.device = self.root_device
@@ -295,7 +323,17 @@ def init_ddp_connection(self, global_rank: Optional[int] = None, world_size: Opt
         os.environ["MASTER_PORT"] = str(self.cluster_environment.master_port())
         if not torch.distributed.is_initialized():
             log.info(f"initializing ddp: GLOBAL_RANK: {global_rank}, MEMBER: {global_rank + 1}/{world_size}")
-            torch_distrib.init_process_group(self.torch_distributed_backend, rank=global_rank, world_size=world_size)
+            torch.distributed.init_process_group(
+                self.torch_distributed_backend, rank=global_rank, world_size=world_size
+            )
+
+            # on rank=0 let everyone know training is starting
+            rank_zero_info(
+                f"{'-' * 100}\n"
+                f"distributed_backend={self.torch_distributed_backend}\n"
+                f"All DDP processes registered. Starting ddp with {self.world_size} processes\n"
+                f"{'-' * 100}\n"
+            )
 
     def pre_dispatch(self):
         # move the model to the correct device
@@ -306,21 +344,26 @@ def pre_dispatch(self):
 
         self.configure_ddp()
 
-        self.barrier()
+        # share ddp pids to all processes
+        self._share_information_to_prevent_deadlock()
 
     def post_dispatch(self) -> None:
         self.cluster_environment.teardown()
 
-    def barrier(self, *args, **kwargs):
-        if torch_distrib.is_available() and torch_distrib.is_initialized():
-            torch_distrib.barrier()
+    def barrier(self, *args, **kwargs) -> None:
+        if not distributed_available():
+            return
+        if _TORCH_GREATER_EQUAL_1_8 and torch.distributed.get_backend() == "nccl":
+            torch.distributed.barrier(device_ids=self.determine_ddp_device_ids())
+        else:
+            torch.distributed.barrier()
 
     def broadcast(self, obj: object, src: int = 0) -> object:
         return self.dist.broadcast(obj)
 
     def pre_backward(self, closure_loss: torch.Tensor, should_accumulate: bool, optimizer: Optimizer, opt_idx: int):
         """Run before precision plugin executes backward"""
-        if not self.lightning_module.automatic_optimization and self.model.require_backward_grad_sync:
+        if not self.lightning_module.automatic_optimization:
             prepare_for_backward(self.model, closure_loss)
 
     def model_to_device(self):
@@ -328,7 +371,7 @@ def model_to_device(self):
             torch.cuda.set_device(self.root_device)
         self.model.to(self.root_device)
 
-    def reduce(self, tensor, group: Optional[Any] = None, reduce_op: Optional[Union[ReduceOp, str]] = "mean"):
+    def reduce(self, tensor, group: Optional[Any] = None, reduce_op: Union[ReduceOp, str] = "mean") -> torch.Tensor:
         """
         Reduces a tensor from several distributed processes to one aggregated tensor.
 
@@ -342,7 +385,7 @@ def reduce(self, tensor, group: Optional[Any] = None, reduce_op: Optional[Union[
             reduced value, except when the input was not a tensor the output remains is unchanged
         """
         if isinstance(tensor, torch.Tensor):
-            tensor = sync_ddp_if_available(tensor, group, reduce_op=(reduce_op or "mean"))
+            tensor = sync_ddp_if_available(tensor, group, reduce_op=reduce_op)
         return tensor
 
     def training_step(self, *args, **kwargs):
@@ -369,3 +412,41 @@ def register_plugins(cls, plugin_registry: Dict) -> None:
             description="DDP Plugin with `find_unused_parameters` as False",
             find_unused_parameters=False
         )
+
+    def _share_information_to_prevent_deadlock(self):
+        self._share_pids()
+
+        # remove `PL_DDP_SYNC_TMPDIR` from os.environ
+        self._sync_dir = os.environ.pop("PL_DDP_SYNC_TMPDIR", None)
+
+    def _share_pids(self):
+        """
+        Make all DDP processes aware of all processes pids.
+        """
+        self.barrier()
+        pids = self.all_gather(torch.tensor(os.getpid(), device=self.root_device))
+        pids = pids.cpu().numpy().tolist()
+        self._pids = pids if isinstance(pids, list) else [pids]
+
+    def reconciliate_processes(self, trace: str):
+        if self.world_size < 2:
+            return
+
+        sync_dir = self._sync_dir
+
+        # save a file locally.
+        torch.save(True, os.path.join(sync_dir, f"{self.global_rank}.pl"))
+
+        # sleep for a short time
+        time.sleep(3)
+
+        # return if all processes wrote a file in the `sync_dir`.
+        # todo (tchaton) Add support for non-shared file-system which will fail.
+        if len(os.listdir(sync_dir)) == self.world_size:
+            return
+
+        for pid in self._pids:
+            if pid != os.getpid():
+                os.kill(pid, signal.SIGKILL)
+            shutil.rmtree(sync_dir)
+            raise DeadlockDetectedException(f"DeadLock detected from rank: {self.global_rank} \n {trace}")
diff --git a/pytorch_lightning/plugins/training_type/ddp2.py b/pytorch_lightning/plugins/training_type/ddp2.py
index b6d21904d1933..185e955135141 100644
--- a/pytorch_lightning/plugins/training_type/ddp2.py
+++ b/pytorch_lightning/plugins/training_type/ddp2.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 import torch
 
-from pytorch_lightning.core.step_result import Result
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
+from pytorch_lightning.utilities.apply_func import apply_to_collection
+from pytorch_lightning.utilities.types import _METRIC_COLLECTION
 
 
 class DDP2Plugin(DDPPlugin):
@@ -34,26 +35,25 @@ def setup(self, model):
         self.task_idx = self.cluster_environment.local_rank()
         # the difference to DDP is that we don't call children processes here
 
-    def reduce(self, tensor, *args, **kwargs):
+    def reduce(self, collection: _METRIC_COLLECTION, *args, **kwargs) -> _METRIC_COLLECTION:
         """
-        Reduces a tensor from all processes to one aggregated tensor.
+        Reduces a collection of tensors from all processes. It can be applied to just a single tensor.
         In DDP2, the reduction here is only across local devices within the node.
 
         Args:
-            tensor: the tensor to sync and reduce
+            collection: The collection of tensors to sync and reduce.
             *args: ignored for DDP2
             **kwargs: ignored for DDP2
 
         Return:
-            reduced value, except when the input was not a tensor the output remains is unchanged
+            Reduced tensor values or the same value if it was not or did not contain a tensor.
         """
-        if isinstance(tensor, Result):
-            tensor.dp_reduce()
 
-        elif isinstance(tensor, torch.Tensor):
-            tensor = tensor.mean()
+        def mean(t: torch.Tensor) -> torch.Tensor:
+            original_dtype = t.dtype
+            return t.float().mean().to(original_dtype)
 
-        return tensor
+        return apply_to_collection(collection, torch.Tensor, mean)
 
     @property
     def root_device(self):
diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py
index df9f0ee158ba3..e5084adb1a63e 100644
--- a/pytorch_lightning/plugins/training_type/ddp_spawn.py
+++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py
@@ -17,7 +17,7 @@
 from typing import Any, List, Optional, Union
 
 import torch
-import torch.distributed as torch_distrib
+import torch.distributed
 import torch.multiprocessing as mp
 from torch.nn.parallel.distributed import DistributedDataParallel
 from torch.optim import Optimizer
@@ -28,13 +28,18 @@
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.trainer.states import TrainerFn
-from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8
+from pytorch_lightning.utilities import (
+    _TORCH_GREATER_EQUAL_1_7,
+    _TORCH_GREATER_EQUAL_1_8,
+    rank_zero_deprecation,
+    rank_zero_warn,
+)
 from pytorch_lightning.utilities.cloud_io import atomic_save
 from pytorch_lightning.utilities.cloud_io import load as pl_load
 from pytorch_lightning.utilities.distributed import (
-    rank_zero_deprecation,
+    distributed_available,
+    rank_zero_info,
     rank_zero_only,
-    rank_zero_warn,
     ReduceOp,
     sync_ddp_if_available,
 )
@@ -68,13 +73,13 @@ def __init__(
         super().__init__(parallel_devices=parallel_devices, cluster_environment=cluster_environment)
         if num_nodes is not None:
             rank_zero_deprecation(
-                "Argument `num_nodes` in `DDPPlugin` is deprecated in v1.4, and will be removed in v1.6. "
+                "Argument `num_nodes` in `DDPSpawnPlugin` is deprecated in v1.4, and will be removed in v1.6. "
                 "Notice that it will be overriden by the trainer setting."
             )
         self._num_nodes = num_nodes or 1
         if sync_batchnorm is not None:
             rank_zero_deprecation(
-                "Argument `sync_batchnorm` in `DDPPlugin` is deprecated in v1.4, and will be removed in v1.6. "
+                "Argument `sync_batchnorm` in `DDPSpawnPlugin` is deprecated in v1.4, and will be removed in v1.6. "
                 "Notice that it will be overriden by the trainer setting."
             )
         self._sync_batchnorm = sync_batchnorm or False
@@ -183,13 +188,6 @@ def new_process(self, process_idx, trainer, mp_queue):
         #   ... need to double check that it is the correct place
         # self.trainer.call_setup_hook(self.model)
 
-        # on world_size=0 let everyone know training is starting
-        if self.is_global_zero and not torch.distributed.is_initialized():
-            log.info("-" * 100)
-            log.info(f"distributed_backend={self.distributed_backend}")
-            log.info(f"All DDP processes registered. Starting ddp with {self.world_size} processes")
-            log.info("-" * 100)
-
         # set the ranks and devices
         self.dist.rank = self.global_rank
         self.dist.device = self.root_device
@@ -214,6 +212,9 @@ def post_dispatch(self):
         best_path = self.mp_queue.get()
         last_path = self.mp_queue.get()
         self._results = self.mp_queue.get()
+        # get the `callback_metrics` and set it to the trainer
+        # only in case the user does not override it.
+        self.lightning_module.get_from_queue(self.mp_queue)
 
         # recover the weights of the processes trained in the children
         self.__recover_child_process_weights(best_path, last_path)
@@ -263,7 +264,17 @@ def init_ddp_connection(self, global_rank: Optional[int], world_size: Optional[i
 
         if not torch.distributed.is_initialized():
             log.info(f"initializing ddp: GLOBAL_RANK: {global_rank}, MEMBER: {global_rank + 1}/{world_size}")
-            torch_distrib.init_process_group(self.torch_distributed_backend, rank=global_rank, world_size=world_size)
+            torch.distributed.init_process_group(
+                self.torch_distributed_backend, rank=global_rank, world_size=world_size
+            )
+
+            # on rank=0 let everyone know training is starting
+            rank_zero_info(
+                f"{'-' * 100}\n"
+                f"distributed_backend={self.torch_distributed_backend}\n"
+                f"All DDP processes registered. Starting ddp with {self.world_size} processes\n"
+                f"{'-' * 100}\n"
+            )
 
     def determine_ddp_device_ids(self):
         if self.root_device.type == "cpu":
@@ -274,6 +285,9 @@ def transfer_distrib_spawn_state_on_fit_end(self, results):
         checkpoint_callback = self.lightning_module.trainer.checkpoint_callback
         best_model_path = checkpoint_callback.best_model_path if checkpoint_callback else None
 
+        # requires to compute the state_dict on all processes in case Metrics are present
+        state_dict = self.lightning_module.state_dict()
+
         if self.global_rank == 0 and self.mp_queue is not None:
             rank_zero_warn("cleaning up ddp environment...")
 
@@ -284,12 +298,13 @@ def transfer_distrib_spawn_state_on_fit_end(self, results):
                 and len(best_model_path) > 0
             ):
                 last_path = re.sub(".ckpt", ".tmp_end.ckpt", best_model_path)
-                atomic_save(self.on_save(self.lightning_module.state_dict()), last_path)
+                atomic_save(self.on_save(state_dict), last_path)
 
             # todo, pass complete checkpoint as state dictionary
             self.mp_queue.put(best_model_path)
             self.mp_queue.put(last_path)
             self.mp_queue.put(results)
+            self.lightning_module.add_to_queue(self.mp_queue)  # adds the `callback_metrics` to the queue
 
     def __recover_child_process_weights(self, best_path, last_path):
         # transfer back the best path to the trainer
@@ -302,9 +317,13 @@ def __recover_child_process_weights(self, best_path, last_path):
             ckpt = pl_load(last_path, map_location=lambda storage, loc: storage)
             self.lightning_module.load_state_dict(ckpt)
 
-    def barrier(self, *args, **kwargs):
-        if torch_distrib.is_initialized():
-            torch_distrib.barrier()
+    def barrier(self, *args, **kwargs) -> None:
+        if not distributed_available():
+            return
+        if _TORCH_GREATER_EQUAL_1_8 and torch.distributed.get_backend() == "nccl":
+            torch.distributed.barrier(device_ids=self.determine_ddp_device_ids())
+        else:
+            torch.distributed.barrier()
 
     def broadcast(self, obj: object, src: int = 0) -> object:
         return self.dist.broadcast(obj)
@@ -319,7 +338,7 @@ def pre_backward(self, closure_loss: torch.Tensor, should_accumulate: bool, opti
         if not self.lightning_module.automatic_optimization and self.model.require_backward_grad_sync:
             prepare_for_backward(self.model, closure_loss)
 
-    def reduce(self, tensor, group: Optional[Any] = None, reduce_op: Optional[Union[ReduceOp, str]] = "mean"):
+    def reduce(self, tensor, group: Optional[Any] = None, reduce_op: Union[ReduceOp, str] = "mean") -> torch.Tensor:
         """
         Reduces a tensor from several distributed processes to one aggregated tensor.
 
@@ -333,7 +352,7 @@ def reduce(self, tensor, group: Optional[Any] = None, reduce_op: Optional[Union[
             reduced value, except when the input was not a tensor the output remains is unchanged
         """
         if isinstance(tensor, torch.Tensor):
-            tensor = sync_ddp_if_available(tensor, group, reduce_op=(reduce_op or "mean"))
+            tensor = sync_ddp_if_available(tensor, group, reduce_op=reduce_op)
         return tensor
 
     def training_step(self, *args, **kwargs):
diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
index 8dd04aafa6b86..4d229e4bff43a 100644
--- a/pytorch_lightning/plugins/training_type/deepspeed.py
+++ b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -17,13 +17,12 @@
 import os
 from collections import OrderedDict
 from pathlib import Path
-from types import SimpleNamespace
-from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, Generator, List, Mapping, Optional, Tuple, Union
 
 import torch
 
+import pytorch_lightning as pl
 from pytorch_lightning.callbacks import GradientAccumulationScheduler
-from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.overrides.base import _LightningModuleWrapperBase
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
@@ -33,6 +32,7 @@
 from pytorch_lightning.utilities.distributed import rank_zero_info, rank_zero_only
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE
+from pytorch_lightning.utilities.warnings import _warn, LightningDeprecationWarning
 
 if _DEEPSPEED_AVAILABLE:
     import deepspeed
@@ -51,7 +51,7 @@ def remove_module_hooks(model: torch.nn.Module) -> None:
 
 class LightningDeepSpeedModule(_LightningModuleWrapperBase):
 
-    def __init__(self, pl_module: LightningModule, precision: int):
+    def __init__(self, pl_module: 'pl.LightningModule', precision: int) -> None:
         super().__init__(pl_module)
         self.precision = precision
 
@@ -78,9 +78,23 @@ def __init__(
         self,
         zero_optimization: bool = True,
         stage: int = 2,
-        cpu_offload: bool = False,
-        cpu_offload_params: bool = False,
-        cpu_offload_use_pin_memory: bool = False,
+        remote_device: str = 'cpu',
+        offload_optimizer: bool = False,
+        offload_parameters: bool = False,
+        offload_params_device: str = 'cpu',
+        nvme_path: str = '/local_nvme',
+        params_buffer_count: int = 5,
+        params_buffer_size: int = 1e8,
+        max_in_cpu: int = 1e9,
+        offload_optimizer_device: str = 'cpu',
+        optimizer_buffer_count: int = 4,
+        block_size: int = 1048576,
+        queue_depth: int = 8,
+        single_submit: bool = False,
+        overlap_events: bool = True,
+        thread_count: int = 1,
+        pin_memory: bool = False,
+        sub_group_size: int = 1e12,
         contiguous_gradients: bool = True,
         overlap_comm: bool = True,
         allgather_partitions: bool = True,
@@ -104,11 +118,14 @@ def __init__(
         contiguous_memory_optimization: bool = False,
         synchronize_checkpoint_boundary: bool = False,
         save_full_weights: bool = True,
+        cpu_offload: bool = False,
+        cpu_offload_params: bool = False,
+        cpu_offload_use_pin_memory: bool = False,
     ) -> None:
         """
         Provides capabilities to run training using the DeepSpeed library,
         with training optimizations for large billion parameter models.
-        `For more information: https://www.deepspeed.ai/`.
+        `For more information: https://pytorch-lightning.readthedocs.io/en/latest/advanced/multi_gpu.html#deepspeed`.
 
         .. warning:: ``DeepSpeedPlugin`` is in beta and subject to change.
 
@@ -118,36 +135,81 @@ def __init__(
 
         Arguments:
 
-            zero_optimization: Enable ZeRO optimization. This is only compatible with precision=16. (default: True)
+            zero_optimization: Enable ZeRO optimization. This is only compatible with precision=16.
 
             stage: Different stages of the ZeRO Optimizer. 0 is disabled,
-                1 is optimizer state partitioning, 2 is optimizer+gradient state partitioning (default: 2)
+                1 is optimizer state partitioning, 2 is optimizer+gradient state partitioning,
+                3 is optimizer+gradient_parameter partitioning using the infinity engine.
+
+            remote_device: Device to instantiate the model on initially (``cpu`` or ``nvme``).
+
+            offload_optimizer: Enable offloading optimizer memory and computation to CPU or NVMe
+                based on ``offload_optimizer_device``.
+
+            offload_parameters: When using ZeRO Stage 3, Enable offloading parameter memory and computation
+                to CPU or NVMe based on ``offload_params_device``.
+
+            offload_params_device: When offloading parameters choose the device to offload to, ``cpu`` or ``nvme``.
+
+            offload_optimizer_device: When offloading optimizer state choose the device to offload to,
+                ``cpu`` or ``nvme``.
+
+            params_buffer_count: Number of buffers in buffer pool for
+                parameter offloading when ``offload_params_device`` is ``nvme``.
+
+            params_buffer_size: Size of buffers in buffer pool for parameter offloading
+                when ``offload_params_device`` is ``nvme``.
+
+            max_in_cpu: Number of parameter elements to maintain in CPU memory when offloading to NVMe is enabled.
 
-            cpu_offload: Enable offloading optimizer memory and computation to CPU
+            nvme_path: Filesystem path for NVMe device for optimizer/parameter state offloading.
 
-            cpu_offload_params: When using ZeRO stage 3, offload parameters to CPU
+            optimizer_buffer_count: Number of buffers in buffer pool for optimizer state offloading
+                when ``offload_optimizer_device`` is set to to ``nvme``.
+                This should be at least the number of states maintained per parameter by the optimizer.
+                For example, Adam optimizer has 4 states (parameter, gradient, momentum, and variance).
 
-            cpu_offload_use_pin_memory: When using ZeRO stage 3, pin memory on CPU
+            block_size: When using NVMe Offloading, the I/O block size in bytes.
+
+            queue_depth: When using NVMe Offloading, the I/O queue depth.
+
+            single_submit: When using NVMe Offloading,
+                submit requests to storage device as multiple individual requests,
+                as opposed to one block of requests.
+
+            overlap_events: When using NVMe Offloading,
+                submit requests to storage device in an overlapped fashion
+                without waiting for completion of earlier requests.
+
+            thread_count: When using NVMe Offloading,
+                Intra-request parallelism for each read/write submitted by a user thread.
+
+            pin_memory: When using ZeRO stage 3, pin optimizer state memory on CPU.
+                This could boost throughput at the cost of extra memory overhead.
+
+            sub_group_size: When using ZeRO stage 3, defines the number of parameters
+                within a sub group to offload at a time.
+                Smaller numbers require more communication, but improve memory efficiency.
 
             contiguous_gradients: Copies gradients to a continuous buffer as they are produced.
-                Avoids memory fragmentation during backwards. Useful when training large models. (default: True)
+                Avoids memory fragmentation during backwards. Useful when training large models.
 
             overlap_comm: Overlap the reduction (synchronization) of gradients with the backwards computation.
-                This is a speed optimization when training across multiple GPUs/machines. (default: True)
+                This is a speed optimization when training across multiple GPUs/machines.
 
             allgather_partitions: All gather updated parameters at the end of training step,
-                instead of using a series of broadcast collectives (default: True)
+                instead of using a series of broadcast collectives.
 
-            reduce_scatter: Use reduce/scatter instead of allreduce to average gradients (default:True)
+            reduce_scatter: Use reduce/scatter instead of allreduce to average gradients.
 
             allgather_bucket_size: Number of elements to allgather at once.
-                Used to limit the memory required for larger model sizes, with a tradeoff with speed. (default: 2e8)
+                Used to limit the memory required for larger model sizes, with a tradeoff with speed.
 
             reduce_bucket_size: Number of elements to reduce at once.
-                Used to limit the memory required for larger model sizes, with a tradeoff with speed (default: 2e8)
+                Used to limit the memory required for larger model sizes, with a tradeoff with speed.
 
             zero_allow_untested_optimizer: Allow untested optimizers to be used with ZeRO. Currently only Adam is a
-                DeepSpeed supported optimizer when using ZeRO (default: True)
+                DeepSpeed supported optimizer when using ZeRO.
 
             logging_batch_size_per_gpu: Config used in DeepSpeed to calculate verbose timing for logging
                 on a per sample per second basis (only displayed if logging=logging.INFO).
@@ -158,45 +220,56 @@ def __init__(
 
             config: Pass in a deepspeed formatted config dict,
                 or path to a deepspeed config: https://www.deepspeed.ai/docs/config-json.
-                All defaults will be ignored if a config is passed in. (Default: ``None``)
+                All defaults will be ignored if a config is passed in.
 
-            logging_level: Set logging level for deepspeed. (Default: ``logging.WARN``)
+            logging_level: Set logging level for deepspeed.
 
             loss_scale: Loss scaling value for FP16 training.
-                0.0 results in dynamic loss scaling, otherwise static (Default: 0)
+                0.0 results in dynamic loss scaling, otherwise static.
 
             initial_scale_power: Power of the initial dynamic loss scale value. Loss scale is computed
-                by ``2^initial_scale_power`` (Default: 32)
+                by ``2^initial_scale_power``.
 
-            loss_scale_window: Window in which to raise/lower the dynamic FP16 loss scaling value (Default: 1000)
+            loss_scale_window: Window in which to raise/lower the dynamic FP16 loss scaling value.
 
-            hysteresis: FP16 Delay shift in Dynamic Loss scaling (Default: 2)
+            hysteresis: FP16 Delay shift in Dynamic Loss scaling.
 
-            min_loss_scale: The minimum FP16 dynamic loss scaling value (Default: 1000)
+            min_loss_scale: The minimum FP16 dynamic loss scaling value.
 
-            partition_activations: Enables partition activation when used with ZeRO stage 3.
+            partition_activations: Enables partition activation when used with ZeRO stage 3 and model parallelism.
                 Still requires you to wrap your forward functions in deepspeed.checkpointing.checkpoint.
                 See `deepspeed tutorial
-                <https://www.deepspeed.ai/tutorials/megatron/#deepspeed-activation-checkpoints-optional>`_
+                <https://www.deepspeed.ai/tutorials/megatron/#deepspeed-activation-checkpoints-optional>`_.
 
-            cpu_checkpointing: Offloads partitioned activations to CPU if ``partition_activations`` is enabled
+            cpu_checkpointing: Offloads partitioned activations to CPU if ``partition_activations`` is enabled.
 
             contiguous_memory_optimization: Copies partitioned activations so that they are contiguous in memory.
-                Not supported by all models
+                Not supported by all models.
 
             synchronize_checkpoint_boundary: Insert :func:`torch.cuda.synchronize` at each checkpoint boundary.
 
             save_full_weights: Gathers weights across all processes before saving to disk
                 when using ZeRO Stage 3. This allows a single weight file to contain the entire model,
                 rather than individual sharded weight files.
-                Disable to save sharded states individually. (Default: True)
-
+                Disable to save sharded states individually.
         """
         if not _DEEPSPEED_AVAILABLE:
             raise MisconfigurationException(
                 "To use the DeepSpeed plugin, you must have DeepSpeed installed."
                 " pip install deepspeed"
             )
+
+        if cpu_offload or cpu_offload_params or cpu_offload_use_pin_memory:
+            _warn(
+                "The usage of `cpu_offload`, `cpu_offload_params`, and `cpu_offload_use_pin_memory` "
+                "is deprecated since v1.4 and will be removed in v1.5."
+                " From now on use `offload_optimizer`, `offload_parameters` and `pin_memory`.",
+                category=LightningDeprecationWarning
+            )
+            offload_optimizer = cpu_offload
+            offload_parameters = cpu_offload_params
+            pin_memory = cpu_offload_use_pin_memory
+
         super().__init__(
             parallel_devices=parallel_devices, num_nodes=num_nodes, cluster_environment=cluster_environment
         )
@@ -207,24 +280,38 @@ def __init__(
                 zero_optimization,
                 zero_allow_untested_optimizer,
                 logging_batch_size_per_gpu,
+                offload_optimizer=offload_optimizer,
+                offload_parameters=offload_parameters,
+                nvme_path=nvme_path,
+                offload_params_device=offload_params_device,
+                params_buffer_count=params_buffer_count,
+                params_buffer_size=params_buffer_size,
+                max_in_cpu=max_in_cpu,
+                pin_memory=pin_memory,
+                offload_optimizer_device=offload_optimizer_device,
+                optimizer_buffer_count=optimizer_buffer_count,
+                block_size=block_size,
+                queue_depth=queue_depth,
+                single_submit=single_submit,
+                overlap_events=overlap_events,
+                thread_count=thread_count,
                 partition_activations=partition_activations,
                 cpu_checkpointing=cpu_checkpointing,
                 contiguous_memory_optimization=contiguous_memory_optimization,
                 synchronize_checkpoint_boundary=synchronize_checkpoint_boundary,
                 stage=stage,
-                cpu_offload=cpu_offload,
-                cpu_offload_params=cpu_offload_params,
-                cpu_offload_use_pin_memory=cpu_offload_use_pin_memory,
                 contiguous_gradients=contiguous_gradients,
                 overlap_comm=overlap_comm,
                 allgather_partitions=allgather_partitions,
                 reduce_scatter=reduce_scatter,
                 allgather_bucket_size=allgather_bucket_size,
                 reduce_bucket_size=reduce_bucket_size,
+                sub_group_size=sub_group_size,
             )
         self._config_initialized = False
         deepspeed.utils.logging.logger.setLevel(logging_level)
 
+        self.remote_device = remote_device
         self.save_full_weights = save_full_weights
 
         # default FP16 parameters.
@@ -247,22 +334,30 @@ def _load_config(self, config):
                 config = json.load(f)
         return config
 
+    def setup_distributed(self):
+        super().setup_distributed()
+        if not self._config_initialized:
+            self._format_config()
+            self._config_initialized = True
+        if self.on_gpu:
+            torch.cuda.set_device(self.root_device)
+
     def pre_dispatch(self):
         self.init_deepspeed()
         self.barrier()
 
     def init_deepspeed(self):
-        if not self._config_initialized:
-            self._format_config()
-            self._config_initialized = True
-
         self._handle_gradient_accumulation_steps()
 
         precision = self.lightning_module.trainer.accelerator.precision
         model = LightningDeepSpeedModule(pl_module=self.model, precision=precision)
 
-        if self.on_gpu:
-            torch.cuda.set_device(self.root_device)
+        if self.zero_stage_3:
+            # Ensure the entire model has been moved to the appropriate device
+            dtype = torch.float16 if self.precision in (16, "mixed") else torch.float32
+            deepspeed.zero.Init(
+                module=model, remote_device=self.remote_device, pin_memory=True, config=self.config, dtype=dtype
+            )
 
         if self.lightning_module.trainer and self.lightning_module.trainer.training:
             self._initialize_deepspeed_train(model)
@@ -287,6 +382,7 @@ def zero_stage_3(self) -> bool:
 
     def _initialize_deepspeed_train(self, model):
         optimizer, lightning_scheduler, optimizer_frequencies = None, None, None
+
         if "optimizer" not in self.config:
             rank_zero_info(
                 "You have not specified an optimizer or scheduler within the DeepSpeed config."
@@ -295,12 +391,12 @@ def _initialize_deepspeed_train(self, model):
             optimizer, lightning_scheduler, optimizer_frequencies = self._init_scheduler_optimizer()
         model_parameters = filter(lambda p: p.requires_grad, self.model.parameters())
         model, optimizer, _, lr_scheduler = deepspeed.initialize(
-            args=SimpleNamespace(local_rank=self.local_rank),
+            config=self.config,
             model=model,
             model_parameters=model_parameters,
             optimizer=optimizer,
             lr_scheduler=lightning_scheduler,
-            config_params=self.config,
+            dist_init_required=False
         )
         self._set_deepspeed_activation_checkpointing()
 
@@ -312,13 +408,21 @@ def _initialize_deepspeed_train(self, model):
     @contextlib.contextmanager
     def model_sharded_context(self) -> Generator[None, None, None]:
         if self.zero_stage_3:
-            model_parallel_context = deepspeed.zero.Init(remote_device="cpu", pin_memory=True)
+            assert self._config_initialized
+            dtype = torch.float16 if self.precision in (16, "mixed") else torch.float32
+            model_parallel_context = deepspeed.zero.Init(
+                remote_device=self.remote_device, pin_memory=True, config=self.config, dtype=dtype
+            )
         else:
             model_parallel_context = super().model_sharded_context()
 
         with model_parallel_context:
             yield
 
+    @property
+    def precision(self) -> Union[str, int]:
+        return self.lightning_module.trainer.precision
+
     def _set_deepspeed_activation_checkpointing(self):
         if self.config.get('activation_checkpointing'):
             checkpoint_config = self.config['activation_checkpointing']
@@ -353,12 +457,12 @@ def _initialize_deepspeed_inference(self, model):
         # Remove all module hooks before initializing new model
         remove_module_hooks(model)
         model, _, _, _ = deepspeed.initialize(
-            args=SimpleNamespace(local_rank=self.local_rank),
+            config=inference_config,
             model=model,
             optimizer=optimizer,
             lr_scheduler=lightning_scheduler,
-            config_params=inference_config,
             model_parameters=[],
+            dist_init_required=False
         )
         self.model = model
 
@@ -378,7 +482,7 @@ def distributed_sampler_kwargs(self):
         distributed_sampler_kwargs = dict(num_replicas=self.world_size, rank=self.global_rank)
         return distributed_sampler_kwargs
 
-    def init_optimizers(self, trainer, model: LightningModule) -> Tuple[List, List, List]:
+    def init_optimizers(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> Tuple[List, List, List]:
         # Skip initializing optimizers here as DeepSpeed handles optimizers via config.
         # User may have specified config options instead in configure_optimizers, but this is handled
         # via `_initialize_deepspeed_train`
@@ -469,6 +573,21 @@ def _create_default_config(
         cpu_checkpointing: bool,
         contiguous_memory_optimization: bool,
         synchronize_checkpoint_boundary: bool,
+        offload_optimizer: bool,
+        offload_parameters: bool,
+        nvme_path: str,
+        offload_params_device: str,
+        params_buffer_count: int,
+        params_buffer_size: int,
+        max_in_cpu: int,
+        offload_optimizer_device: str,
+        optimizer_buffer_count: int,
+        pin_memory: bool,
+        block_size: int,
+        queue_depth: int,
+        single_submit: bool,
+        overlap_events: bool,
+        thread_count: int,
         **zero_kwargs,
     ) -> Dict:
         cfg = {
@@ -477,12 +596,37 @@ def _create_default_config(
                 "cpu_checkpointing": cpu_checkpointing,
                 "contiguous_memory_optimization": contiguous_memory_optimization,
                 "synchronize_checkpoint_boundary": synchronize_checkpoint_boundary
-            }
+            },
+            "aio": {
+                "block_size": block_size,
+                "queue_depth": queue_depth,
+                "single_submit": single_submit,
+                "overlap_events": overlap_events,
+                "thread_count": thread_count
+            },
         }
         if zero_optimization:
+            zero_config = zero_kwargs
+
+            if offload_optimizer:
+                zero_config["offload_optimizer"] = {
+                    'device': offload_optimizer_device,
+                    'nvme_path': nvme_path,
+                    'buffer_count': optimizer_buffer_count,
+                    'pin_memory': pin_memory
+                }
+            if offload_parameters:
+                zero_config['offload_param'] = {
+                    'device': offload_params_device,
+                    'nvme_path': nvme_path,
+                    'buffer_count': params_buffer_count,
+                    'buffer_size': params_buffer_size,
+                    'max_in_cpu': max_in_cpu,
+                    'pin_memory': pin_memory
+                }
             cfg = {
                 "zero_allow_untested_optimizer": zero_allow_untested_optimizer,
-                "zero_optimization": zero_kwargs,
+                "zero_optimization": zero_config,
                 **cfg
             }
         if logging_batch_size_per_gpu != 'auto':
@@ -524,45 +668,41 @@ def save_checkpoint(self, checkpoint: Dict, filepath: str) -> None:
         else:
             super().save_checkpoint(checkpoint, filepath)
 
-    def restore_model_state_from_ckpt_path(
-        self,
-        ckpt_path: str,
-        map_location: Callable = lambda storage, loc: storage,
-    ) -> Tuple[Dict, bool]:
-        if not self.save_full_weights and self.world_size > 1:
-            # Rely on deepspeed to load the checkpoint and necessary information
-            from pytorch_lightning.trainer.states import TrainerFn
-            is_fitting = self.lightning_module.trainer.state.fn == TrainerFn.FITTING
-            save_dir = self._filepath_to_dir(ckpt_path)
-
-            if self.zero_stage_3:
-                # TODO: Currently required as this call is missing within the deepspeed engine.
-                self.deepspeed_engine.optimizer._partition_all_parameters()
-
-            _, client_state = self.deepspeed_engine.load_checkpoint(
-                save_dir, load_optimizer_states=is_fitting, load_lr_scheduler_states=is_fitting
-            )
+    def load_checkpoint_file(self, checkpoint_path: Union[str, Path]) -> Dict[str, Any]:
+        if self.save_full_weights or self.world_size == 1:
+            # Broadcast to ensure we load from the rank 0 checkpoint
+            # This doesn't have to be the case when using deepspeed sharded checkpointing
+            checkpoint_path = self.broadcast(checkpoint_path)
+            return super().load_checkpoint_file(checkpoint_path)
+
+        # Rely on deepspeed to load the checkpoint and necessary information
+        from pytorch_lightning.trainer.states import TrainerFn
+        is_fitting = self.lightning_module.trainer.state.fn == TrainerFn.FITTING
+        save_dir = self._filepath_to_dir(checkpoint_path)
+
+        if self.zero_stage_3:
+            # TODO: Currently required as this call is missing within the deepspeed engine.
+            self.deepspeed_engine.optimizer._partition_all_parameters()
 
-            # restore datamodule states
-            if self.lightning_module.trainer.datamodule is not None:
-                self.lightning_module.trainer.datamodule.on_load_checkpoint(client_state)
+        _, client_state = self.deepspeed_engine.load_checkpoint(
+            save_dir, load_optimizer_states=is_fitting, load_lr_scheduler_states=is_fitting
+        )
+        return client_state
 
-            # hook: give user access to checkpoint if needed.
-            self.lightning_module.on_load_checkpoint(client_state)
-            return client_state, False
+    def load_model_state_dict(self, checkpoint: Mapping[str, Any]) -> None:
+        # override to do nothing, deepspeed engine already loaded the weights in `load_checkpoint_file()`
+        pass
 
-        # Broadcast to ensure we load from the rank 0 checkpoint
-        # This doesn't have to be the case when using deepspeed sharded checkpointing
-        ckpt_path = self.broadcast(ckpt_path)
-        return super().restore_model_state_from_ckpt_path(ckpt_path, map_location=map_location)
+    def load_optimizer_state_dict(self, checkpoint: Mapping[str, Any]) -> None:
+        # override to do nothing, deepspeed engine already loaded the states in `load_checkpoint_file()`
+        pass
 
     def update_global_step(self, total_batch_idx: int, current_global_step: int) -> int:
         if self._original_accumulate_grad_batches is None:
             return super().update_global_step(total_batch_idx, current_global_step)
-        else:
-            if total_batch_idx % self._original_accumulate_grad_batches == 0:
-                current_global_step += 1
-            return current_global_step
+        if total_batch_idx % self._original_accumulate_grad_batches == 0:
+            current_global_step += 1
+        return current_global_step
 
     @classmethod
     def register_plugins(cls, plugin_registry: Dict) -> None:
@@ -573,7 +713,7 @@ def register_plugins(cls, plugin_registry: Dict) -> None:
             cls,
             description="DeepSpeed ZeRO Stage 2 and CPU Offload",
             stage=2,
-            cpu_offload=True
+            offload_optimizer=True
         )
         plugin_registry.register("deepspeed_stage_3", cls, description="DeepSpeed ZeRO Stage 3", stage=3)
         plugin_registry.register(
@@ -581,5 +721,17 @@ def register_plugins(cls, plugin_registry: Dict) -> None:
             cls,
             description="DeepSpeed ZeRO Stage 3 and CPU Offload",
             stage=3,
-            cpu_offload=True
+            offload_optimizer=True,
+            offload_parameters=True,
+        )
+        plugin_registry.register(
+            "deepspeed_stage_3_offload_nvme",
+            cls,
+            description="DeepSpeed ZeRO Stage 3 and NVMe Offload",
+            stage=3,
+            offload_optimizer=True,
+            offload_parameters=True,
+            remote_device='nvme',
+            offload_params_device='nvme',
+            offload_optimizer_device='nvme'
         )
diff --git a/pytorch_lightning/plugins/training_type/dp.py b/pytorch_lightning/plugins/training_type/dp.py
index 08caa7398ab8c..2787ab5644ccd 100644
--- a/pytorch_lightning/plugins/training_type/dp.py
+++ b/pytorch_lightning/plugins/training_type/dp.py
@@ -16,10 +16,11 @@
 import torch
 from torch.nn import DataParallel
 
-from pytorch_lightning.core.step_result import Result
 from pytorch_lightning.overrides.data_parallel import LightningParallelModule
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.utilities.apply_func import apply_to_collection
+from pytorch_lightning.utilities.model_helpers import is_overridden
+from pytorch_lightning.utilities.types import _METRIC_COLLECTION
 
 
 class DataParallelPlugin(ParallelPlugin):
@@ -52,30 +53,24 @@ def setup(self, model):
         model.to(self.root_device)
         self._model = DataParallel(LightningParallelModule(model), self.parallel_devices)
 
-    def reduce(self, tensor, *args, **kwargs):
+    def reduce(self, collection: _METRIC_COLLECTION, *args, **kwargs) -> _METRIC_COLLECTION:
         """
-        Reduces a tensor from all parallel processes to one aggregated tensor.
+        Reduces a collection of tensors from all processes. It can be applied to just a single tensor.
 
         Args:
-            tensor: the tensor to sync and reduce
+            collection: The collection of tensors to sync and reduce.
             *args: ignored for DP
             **kwargs: ignored for DP
 
         Return:
-            reduced value, except when the input was not a tensor the output remains is unchanged
+            Reduced tensor values or the same value if it was not or did not contain a tensor.
         """
-        if isinstance(tensor, Result):
-            tensor.dp_reduce()
 
-        else:
+        def mean(t: torch.Tensor) -> torch.Tensor:
+            original_dtype = t.dtype
+            return t.float().mean().to(original_dtype)
 
-            def _reduce(t: torch.Tensor):
-                dtype_tensor = t.dtype
-                return t.float().mean().type(dtype_tensor)
-
-            tensor = apply_to_collection(tensor, torch.Tensor, _reduce)
-
-        return tensor
+        return apply_to_collection(collection, torch.Tensor, mean)
 
     @property
     def root_device(self):
@@ -107,10 +102,16 @@ def predict_step(self, *args, **kwargs):
         return self.model(*args, **kwargs)
 
     def training_step_end(self, output):
-        return self.reduce(output)
+        if not is_overridden("training_step_end", self.lightning_module):
+            return self.reduce(output)
+        return output
 
     def validation_step_end(self, output):
-        return self.reduce(output)
+        if not is_overridden("validation_step_end", self.lightning_module):
+            return self.reduce(output)
+        return output
 
     def test_step_end(self, output):
-        return self.reduce(output)
+        if not is_overridden("test_step_end", self.lightning_module):
+            return self.reduce(output)
+        return output
diff --git a/pytorch_lightning/plugins/training_type/horovod.py b/pytorch_lightning/plugins/training_type/horovod.py
index 99899aed11753..a402f4b19a36f 100644
--- a/pytorch_lightning/plugins/training_type/horovod.py
+++ b/pytorch_lightning/plugins/training_type/horovod.py
@@ -15,13 +15,12 @@
 from typing import Any, List, Optional, Union
 
 import torch
-import torch.distributed as torch_distrib
 from torch.optim.lr_scheduler import _LRScheduler, Optimizer
 
 from pytorch_lightning.core.optimizer import LightningOptimizer
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.utilities import _HOROVOD_AVAILABLE
-from pytorch_lightning.utilities.distributed import group, rank_zero_only, ReduceOp
+from pytorch_lightning.utilities.distributed import distributed_available, group, rank_zero_only, ReduceOp
 
 if _HOROVOD_AVAILABLE:
     import horovod.torch as hvd
@@ -125,7 +124,7 @@ def start_predicting(self, trainer):
         self.join()
 
     def barrier(self, *args, **kwargs):
-        if torch_distrib.is_initialized():
+        if distributed_available():
             self.join()
 
     def broadcast(self, obj: object, src: int = 0) -> object:
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
new file mode 100644
index 0000000000000..9de4e81447f0e
--- /dev/null
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -0,0 +1,393 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+import json
+import os
+from typing import Any, Iterable, List, Optional, Union
+
+import torch
+from torch.utils.data import DataLoader
+
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks import GradientAccumulationScheduler
+from pytorch_lightning.overrides.base import _LightningModuleWrapperBase
+from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
+from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
+from pytorch_lightning.trainer.states import RunningStage
+from pytorch_lightning.trainer.supporters import CombinedLoader
+from pytorch_lightning.utilities import _POPTORCH_AVAILABLE, rank_zero_warn
+from pytorch_lightning.utilities.apply_func import apply_to_collection
+from pytorch_lightning.utilities.cloud_io import get_filesystem
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+
+if _POPTORCH_AVAILABLE:
+    import poptorch
+
+
+class LightningIPUModule(_LightningModuleWrapperBase):
+
+    def __init__(self, pl_module: 'pl.LightningModule', precision: Union[str, int]):
+        super().__init__(pl_module)
+        self.precision = precision
+
+    def forward(self, *inputs: Any, **kwargs: Any) -> Any:
+        if self.precision in ("mixed", 16):
+            inputs = self._move_float_tensors_to_half(inputs)
+
+        return super().forward(*inputs, **kwargs)
+
+    @staticmethod
+    def batch_to(data: torch.Tensor) -> torch.Tensor:
+        return data.half()
+
+    def _move_float_tensors_to_half(self, batch: Any) -> Any:
+        batch = apply_to_collection(batch, (torch.FloatTensor, torch.cuda.FloatTensor), function=self.batch_to)
+        return batch
+
+
+class IPUPlugin(ParallelPlugin):
+    """
+        Plugin for training on IPU devices.
+    """
+
+    def __init__(
+        self,
+        device_iterations: int = 1,
+        autoreport: bool = True,
+        autoreport_dir: Optional[str] = None,
+        parallel_devices: Optional[List[torch.device]] = None,
+        cluster_environment: Optional[ClusterEnvironment] = None,
+        training_opts: Optional['poptorch.Options'] = None,
+        inference_opts: Optional['poptorch.Options'] = None
+    ) -> None:
+        """
+        Arguments:
+
+            device_iterations: Number of iterations to run on device at once before returning to host.
+                This can be used as an optimization to speed up training.
+                https://docs.graphcore.ai/projects/poptorch-user-guide/en/0.1.67/batching.html
+            autoreport: Enable auto-reporting for IPUs using PopVision
+                https://docs.graphcore.ai/projects/graphcore-popvision-user-guide/en/latest/graph/graph.html
+            autoreport_dir: Optional directory to store autoReport output.
+            training_opts: Optional ``poptorch.Options`` to override the default created options for training.
+            inference_opts: Optional ``poptorch.Options`` to override the default
+                created options for validation/testing and predicting.
+        """
+        super().__init__(parallel_devices, cluster_environment)
+        if not _POPTORCH_AVAILABLE or not poptorch.ipuHardwareIsAvailable():
+            raise MisconfigurationException(
+                "The IPU Accelerator requires IPU devices to run. "
+                "Learn more or get started with IPUs at https://www.graphcore.ai/getstarted"
+            )
+
+        self.device_iterations = device_iterations
+        self.autoreport = autoreport
+        self.autoreport_dir = autoreport_dir
+        self.poptorch_models = {}
+        self._original_accumulate_grad_batches = None
+        self._training_opts = training_opts
+        self._inference_opts = inference_opts
+
+        if self.autoreport:
+            options = {"autoReport.all": self.autoreport}
+            if self.autoreport_dir:
+                self._fs = get_filesystem(str(self.autoreport_dir))
+                self._fs.makedirs(self.autoreport_dir, exist_ok=True)
+                options["autoReport.directory"] = self.autoreport_dir
+            os.environ["POPLAR_ENGINE_OPTIONS"] = json.dumps(options)
+
+    def pre_dispatch(self) -> None:
+        precision = self.lightning_module.trainer.precision
+        model = LightningIPUModule(self.lightning_module, precision)
+        self.model = model
+
+        # Separate models are instantiated for different stages, but they share the same weights on host.
+        # When validation/test models are run, weights are synced first.
+
+        if self.lightning_module.trainer.state.stage is RunningStage.TRAINING:
+            # Create model for training which will run training.
+            optimizer = self.lightning_module.trainer.optimizers[0]
+            model = poptorch.trainingModel(model=model, options=self.training_opts, optimizer=optimizer)
+            self.poptorch_models[RunningStage.TRAINING] = model
+        for x in (RunningStage.VALIDATING, RunningStage.TESTING, RunningStage.PREDICTING):
+            model = poptorch.inferenceModel(
+                model=model,
+                options=self.inference_opts,
+            )
+            self.poptorch_models[x] = model
+        self._handle_gradient_accumulation_steps()
+
+    @property
+    def replication_factor(self):
+        return len(self.parallel_devices)
+
+    def _create_opts(self, training: bool):
+        opts = poptorch.Options()
+        opts.deviceIterations(self.device_iterations)
+        opts.replicationFactor(self.replication_factor)
+        gradient_accumulation = self.accumulate_grad_batches if training else 1
+        opts.Training.gradientAccumulation(gradient_accumulation)
+
+        if os.environ.get("PL_GLOBAL_SEED"):
+            opts.randomSeed(int(os.environ["PL_GLOBAL_SEED"]))
+        return opts
+
+    @property
+    def training_opts(self) -> 'poptorch.Options':
+        if self._training_opts is None:
+            self._training_opts = self._create_opts(training=True)
+        self._validate_opts(self._training_opts, training=True)
+        return self._training_opts
+
+    @property
+    def inference_opts(self) -> 'poptorch.Options':
+        if self._inference_opts is None:
+            self._inference_opts = self._create_opts(training=False)
+        self._validate_opts(self._inference_opts, training=False)
+        return self._inference_opts
+
+    def _validate_opts(self, opts: 'poptorch.Options', training: bool) -> None:
+        if opts is not None:
+            if opts.replication_factor != self.replication_factor:
+                rank_zero_warn(
+                    f"Manual poptorch.Options set replicationFactor to {opts.replication_factor} "
+                    f"which differs to the ipus={self.replication_factor} flag passed to the Trainer. "
+                    f"Setting to {self.replication_factor} in the poptorch.Options."
+                )
+                opts.set(replication_factor=self.replication_factor)
+            if training:
+                accumulate_grad_batches = self.accumulate_grad_batches
+                if opts.Training.gradient_accumulation != accumulate_grad_batches:
+                    rank_zero_warn(
+                        f"Training poptorch.Options set gradientAccumulation to {opts.Training.gradient_accumulation}. "
+                        f"This is different to accumulate_grad_batches which was set to {accumulate_grad_batches}. "
+                        f"To change gradientAccumulation, please set accumulate_grad_batches in the Trainer. "
+                        f"Setting poptorch.Options gradientAccumulation to {accumulate_grad_batches}"
+                    )
+                    opts.Training.set(gradient_accumulation=accumulate_grad_batches)
+            elif opts.Training.gradient_accumulation != 1:
+                rank_zero_warn(
+                    "Inference poptorch.Options should set gradientAccumulation to 1. "
+                    "Setting gradientAccumulation to 1 for inference options."
+                )
+                opts.Training.set(gradient_accumulation=1)
+
+    @property
+    def lightning_module(self) -> Optional['pl.LightningModule']:
+        return self.model.module if isinstance(self.model, LightningIPUModule) else self.model
+
+    def on_reset_train_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        return self.process_dataloader(dataloader)
+
+    def on_reset_val_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        return self.process_dataloader(dataloader)
+
+    def on_reset_test_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        return self.process_dataloader(dataloader)
+
+    def on_reset_predict_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        return self.process_dataloader(dataloader)
+
+    def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        if isinstance(dataloader, CombinedLoader):
+            dataloader.loaders = apply_to_collection(
+                dataloader.loaders,
+                DataLoader,
+                self.process_dataloader,
+            )
+            return dataloader
+        if isinstance(dataloader, list):
+            dataloader = apply_to_collection(dataloader, DataLoader, self.process_dataloader)
+            return dataloader
+        if not isinstance(dataloader, poptorch.DataLoader):
+            is_training = self.lightning_module.trainer.training
+            opts = self.training_opts if is_training else self.inference_opts
+            dataloader = self._convert_to_poptorch_loader(dataloader=dataloader, opts=opts)
+        return dataloader
+
+    def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
+                                    opts: 'poptorch.Options') -> Union[Iterable, DataLoader]:
+        skip_keys = ('sampler', 'batch_sampler', 'dataset_kind')
+
+        attrs = {k: v for k, v in vars(dataloader).items() if not k.startswith("_")}
+
+        params = set(inspect.signature(dataloader.__init__).parameters)
+        contains_dataset = True
+
+        if type(dataloader) is not DataLoader:
+            contains_dataset = "dataset" in params
+            params.update(inspect.signature(DataLoader.__init__).parameters)
+
+        dl_args = {name: attrs[name] for name in params if name in attrs and name not in skip_keys}
+
+        multiprocessing_context = dataloader.multiprocessing_context
+        dl_args['multiprocessing_context'] = multiprocessing_context
+        if not contains_dataset:
+            dl_args.pop('dataset')
+        # Override to drop last uneven batch, as IPUs does not support uneven inputs.
+        dl_args['drop_last'] = True
+
+        dataloader = poptorch.DataLoader(**dl_args, options=opts)
+        dataloader.multiprocessing_context = multiprocessing_context
+        return dataloader
+
+    @property
+    def accumulate_grad_batches(self) -> int:
+        """
+        Tracks lazily the set accumulate_grad_batches in the trainer.
+        The IPUPlugin replaces the original accumulate_grad_batches.
+        """
+        if self._original_accumulate_grad_batches is None:
+            self._original_accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
+            if not isinstance(self._original_accumulate_grad_batches, int):
+                raise MisconfigurationException(
+                    "IPUs currently only support accumulate_grad_batches being an integer value. "
+                    f"Received {self.accumulate_grad_batches}"
+                )
+        return self._original_accumulate_grad_batches
+
+    def _handle_gradient_accumulation_steps(self):
+        """
+        This functions overrides the trainer.accumulation_scheduler to generate
+        ``accumulate_grad_batches=1``.
+        Therefore, ``optimizer_step`` will be called on every batch, and the IPU will handle grad accumulation.
+        """
+        if self.accumulate_grad_batches > 1:
+            self.lightning_module.trainer.accumulation_scheduler = GradientAccumulationScheduler({0: 1})
+
+    def update_global_step(self, total_batch_idx: int, current_global_step: int) -> int:
+        if self.accumulate_grad_batches > 1:
+            if total_batch_idx % self.accumulate_grad_batches == 0:
+                current_global_step += 1
+            return current_global_step
+        return super().update_global_step(total_batch_idx, current_global_step)
+
+    @property
+    def _n_replicate(self):
+        opts = self.training_opts if self.lightning_module.training else self.inference_opts
+        accumulate_grad_batches = opts.Training.gradient_accumulation
+        device_iterations = opts.device_iterations
+        replication_factor = opts.replication_factor
+        return replication_factor * device_iterations * accumulate_grad_batches
+
+    def _prepare_input(self, args: Any):
+
+        def to_tuple(x):
+            return tuple(x)
+
+        def to_tensor(x):
+            return torch.tensor(x).unsqueeze(0).repeat(self._n_replicate)
+
+        args = apply_to_collection(args, dtype=list, function=to_tuple)
+        args = apply_to_collection(args, dtype=(int, float), function=to_tensor)
+        return args
+
+    def training_step(self, *args, **kwargs):
+        args = self._prepare_input(args)
+        return self.poptorch_models[RunningStage.TRAINING](*args, **kwargs)
+
+    def validation_step(self, *args, **kwargs):
+        args = self._prepare_input(args)
+        return self.poptorch_models[RunningStage.VALIDATING](*args, **kwargs)
+
+    def test_step(self, *args, **kwargs):
+        args = self._prepare_input(args)
+        return self.poptorch_models[RunningStage.TESTING](*args, **kwargs)
+
+    def predict_step(self, *args, **kwargs):
+        args = self._prepare_input(args)
+        return self.poptorch_models[RunningStage.PREDICTING](*args, **kwargs)
+
+    def teardown(self) -> None:
+        for model in self.poptorch_models.values():
+            model.destroy()
+
+    def _compiled(self, model: Any):
+        # Required to ensure we only attach compiled models, as they are compiled lazily.
+        return model._executable is not None
+
+    def _detach_models(self):
+        """
+        Detaches all stage specific models from IPU devices.
+        """
+        for k, model in self.poptorch_models.items():
+            if self._compiled(model) and model.isAttachedToDevice():
+                model.detachFromDevice()
+
+    def _load_model(self, stage: str):
+        """
+        Loads the stage specific accelerator model onto device if compiled and not attached to IPU devices.
+        Args:
+            stage: The stage to load
+        """
+        self._detach_models()
+        model = self.poptorch_models[stage]
+        if self._compiled(model) and not model.isAttachedToDevice():
+            model.attachToDevice()
+
+    def on_train_start(self):
+        self._load_model(RunningStage.TRAINING)
+
+    def on_validation_start(self):
+        self._load_model(RunningStage.VALIDATING)
+
+    def on_test_start(self):
+        self._load_model(RunningStage.TESTING)
+
+    def on_predict_start(self):
+        self._load_model(RunningStage.PREDICTING)
+
+    def on_train_end(self):
+        self._detach_models()
+
+    def on_validation_end(self):
+        self._detach_models()
+
+    def on_test_end(self):
+        self._detach_models()
+
+    def on_predict_end(self):
+        self._detach_models()
+
+    def on_train_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        # Updates optimizer stats if LR scheduler modified the optimizer state
+        optimizer = self.lightning_module.trainer.optimizers[0]
+        self.poptorch_models[RunningStage.TRAINING].setOptimizer(optimizer)
+
+    @property
+    def on_gpu(self) -> bool:
+        return False
+
+    @property
+    def root_device(self) -> torch.device:
+        pass
+
+    def model_to_device(self) -> None:
+        pass
+
+    @property
+    def is_global_zero(self) -> bool:
+        return True
+
+    def reduce(self, tensor: Union[torch.Tensor, Any], *args: Any, **kwargs: Any) -> Union[torch.Tensor, Any]:
+        return tensor
+
+    def barrier(self, name: Optional[str] = None) -> None:
+        pass
+
+    def all_gather(self, tensor: torch.Tensor, group: Optional[Any] = None, sync_grads: bool = False) -> torch.Tensor:
+        return tensor
+
+    def broadcast(self, obj: object, src: int = 0) -> object:
+        return obj
diff --git a/pytorch_lightning/plugins/training_type/parallel.py b/pytorch_lightning/plugins/training_type/parallel.py
index a8028e5be1a69..e1c9a7149d066 100644
--- a/pytorch_lightning/plugins/training_type/parallel.py
+++ b/pytorch_lightning/plugins/training_type/parallel.py
@@ -19,7 +19,7 @@
 import torch
 from torch.nn.parallel import DistributedDataParallel
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.overrides.base import unwrap_lightning_module
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.training_type.training_type_plugin import TrainingTypePlugin
@@ -81,6 +81,11 @@ def distributed_sampler_kwargs(self):
         distributed_sampler_kwargs = dict(num_replicas=len(self.parallel_devices), rank=self.global_rank)
         return distributed_sampler_kwargs
 
+    def reconciliate_processes(self, trace: str):
+        """
+        Function to re-conciliate processes on failure
+        """
+
     def all_gather(self, tensor: torch.Tensor, group: Optional[Any] = None, sync_grads: bool = False) -> torch.Tensor:
         """Perform a all_gather on all processes """
         return all_gather_ddp_if_available(tensor, group=group, sync_grads=sync_grads)
@@ -99,7 +104,7 @@ def torch_distributed_backend(self):
         return torch_backend
 
     @staticmethod
-    def configure_sync_batchnorm(model: LightningModule) -> LightningModule:
+    def configure_sync_batchnorm(model: 'pl.LightningModule') -> 'pl.LightningModule':
         """
         Add global batchnorm for a model spread across multiple GPUs and nodes.
 
@@ -112,8 +117,7 @@ def configure_sync_batchnorm(model: LightningModule) -> LightningModule:
         Return:
             LightningModule with batchnorm layers synchronized between process groups
         """
-        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
-        return model
+        return torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
 
     @contextmanager
     def block_backward_sync(self):
@@ -133,5 +137,4 @@ def teardown(self) -> None:
             # GPU teardown
             self.lightning_module.cpu()
             # clean up memory
-            with torch.cuda.device(self.root_device):
-                torch.cuda.empty_cache()
+            torch.cuda.empty_cache()
diff --git a/pytorch_lightning/plugins/training_type/rpc.py b/pytorch_lightning/plugins/training_type/rpc.py
deleted file mode 100644
index 3e0f57daef001..0000000000000
--- a/pytorch_lightning/plugins/training_type/rpc.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-from contextlib import suppress
-from typing import Callable, List, Optional
-
-import torch
-
-from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
-from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
-from pytorch_lightning.utilities import _RPC_AVAILABLE
-
-DEFAULT_RPC_TIMEOUT_SEC = 60.
-if _RPC_AVAILABLE:
-    from torch.distributed import rpc
-
-    with suppress(ModuleNotFoundError, ImportError):
-        from torch.distributed.rpc.constants import DEFAULT_RPC_TIMEOUT_SEC
-
-
-class RPCPlugin(DDPPlugin):
-    """
-    Backbone for RPC Plugins built on top of DDP.
-    RPC introduces different communication behaviour than DDP. Unlike DDP, processes potentially are not
-    required to run the same code as the main process.
-    This leads to edge cases where logic needs to be re-defined. This class contains special cases
-    that need to be addressed when using RPC communication when building custom RPC Plugins.
-    """
-
-    def __init__(
-        self,
-        rpc_timeout_sec: float = DEFAULT_RPC_TIMEOUT_SEC,
-        parallel_devices: Optional[List[torch.device]] = None,
-        num_nodes: Optional[int] = None,
-        cluster_environment: Optional[ClusterEnvironment] = None,
-        sync_batchnorm: Optional[bool] = None,
-        **kwargs
-    ):
-        self.rpc_timeout_sec = rpc_timeout_sec
-        self._is_rpc_initialized = False
-        super().__init__(
-            parallel_devices=parallel_devices,
-            num_nodes=num_nodes,
-            cluster_environment=cluster_environment,
-            sync_batchnorm=sync_batchnorm,
-            **kwargs
-        )
-
-    def init_rpc_connection(self, global_rank: int, world_size: int) -> None:
-        os.environ['MASTER_PORT'] = os.getenv('RPC_MASTER_PORT', '15000')
-        rpc.init_rpc(f"worker{global_rank}", rank=global_rank, world_size=world_size)
-        rpc._set_rpc_timeout(self.rpc_timeout_sec)
-        self._is_rpc_initialized = True
-
-    def rpc_save_model(self, trainer, save_model_fn: Callable, filepath: str) -> None:
-        """
-        Override to save model to disk.
-        This is required as the main process will be required to handle aggregating model states from RPC processes.
-
-        Args:
-            trainer: The trainer object.
-            save_model_fn: The saving function to save final model.
-            filepath: The filepath to save the model to.
-        """
-        raise NotImplementedError
-
-    def exit_rpc_process(self):
-        if self._is_rpc_initialized:
-            torch.distributed.rpc.shutdown()
-            self._is_rpc_initialized = False
-
-    @property
-    def rpc_enabled(self) -> bool:
-        return True
diff --git a/pytorch_lightning/plugins/training_type/rpc_sequential.py b/pytorch_lightning/plugins/training_type/rpc_sequential.py
deleted file mode 100644
index a75839cbdb714..0000000000000
--- a/pytorch_lightning/plugins/training_type/rpc_sequential.py
+++ /dev/null
@@ -1,408 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License
-import logging
-import os
-from typing import Callable, List, Optional
-
-import torch
-import torch.distributed as torch_distrib
-from torch import nn
-from torch.nn.parallel import DistributedDataParallel
-from torch.optim import Optimizer
-
-from pytorch_lightning.core.lightning import LightningModule
-from pytorch_lightning.overrides.distributed import LightningDistributedModule
-from pytorch_lightning.plugins.training_type.rpc import DEFAULT_RPC_TIMEOUT_SEC, RPCPlugin
-from pytorch_lightning.trainer.states import TrainerFn
-from pytorch_lightning.utilities import _FAIRSCALE_PIPE_AVAILABLE, rank_zero_only
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
-
-if _FAIRSCALE_PIPE_AVAILABLE:
-    import fairscale.nn.model_parallel as mpu
-    from fairscale.nn import PipeRPCWrapper
-    from fairscale.nn.pipe import balance as pipe_balance
-    from fairscale.nn.pipe import rpc as rpc_pipe
-    from fairscale.nn.pipe.pipeline import PipelineStyle
-
-log = logging.getLogger(__name__)
-
-
-class RPCSequentialPlugin(RPCPlugin):
-
-    def __init__(
-        self,
-        balance: Optional[List[int]] = None,
-        microbatches: int = 8,
-        checkpoint: str = 'except_last',
-        balance_mode: str = "balance_by_size",
-        pipelined_backward: Optional[bool] = True,
-        rpc_timeout_sec: float = DEFAULT_RPC_TIMEOUT_SEC,
-        **kwargs
-    ):
-        """
-        Provides sequential model parallelism for :class:`nn.Sequential <torch.nn.Sequential>` module.
-        If the module requires lots of memory, Pipe can be used to reduce this by leveraging multiple GPUs.
-
-        .. _RPCSequentialPlugin: https://arxiv.org/abs/1811.06965
-
-        Pipeline parallelism comes with with checkpointing to reduce peak
-        memory required to train while minimizing device under-utilization.
-        This is turned on by default and can be turned off via the checkpoint argument.
-
-        You should determine the balance when defining the plugin,
-        or you can pass an example input array via the LightningModule to infer a balance.
-        The module will be partitioned into multiple devices according to the given balance. You may also rely on
-        your own heuristics to find your own optimal configuration.
-
-        Args:
-            balance: The balance of the model, i.e [2, 2] (two layers on each GPU).
-            If not provided assumes user provides an input example array to find a balance on all GPUs.
-
-            microbatches: Allows for parallelization to reduce device utilization
-            by splitting the batch into further smaller batches.
-
-            checkpoint: Enables gradient checkpointing. ['always', 'except_last', 'never']
-
-            balance_mode: Type of balance heuristic to use if balance to be inferred.
-
-                - 'balance_by_size': checks memory usage of each layer and determines balance
-
-                - 'balance_by_time': checks time of each layer and determines balance
-
-            pipelined_backward: if True, call torch.autograd.backward once per microbatch on the
-
-            backward pass (instead of once for the whole batch). This works
-            around a potential deadlock in pytorch when using tensor parallelism
-            at the same time. Defaults to `True` if
-            `get_model_parallel_world_size() > 1`
-        """
-        self._check_pipe_available()
-        super().__init__(rpc_timeout_sec=rpc_timeout_sec, **kwargs)
-
-        self.balance = balance
-
-        self.microbatches = microbatches
-        self.checkpoint = checkpoint
-        self.balance_mode = balance_mode
-        self.pipelined_backward = pipelined_backward
-        self._main_rpc_process = True
-
-    def init_ddp_connection(
-        self,
-        global_rank: Optional[int] = None,
-        world_size: Optional[int] = None,
-    ) -> None:
-        if self.lightning_module.trainer.amp_backend is not None:
-            raise MisconfigurationException(
-                '`RPCSequentialPlugin` is currently not supported in Automatic Mixed Precision'
-            )
-
-        if self._skip_init_connections():
-            return
-
-        global_rank = global_rank if global_rank is not None else self.cluster_environment.global_rank()
-        world_size = world_size if world_size is not None else self.cluster_environment.world_size()
-        super().init_ddp_connection(global_rank, world_size)
-        super().init_rpc_connection(global_rank=global_rank, world_size=world_size)
-        model = self.lightning_module
-        self.gpus_per_model = self._infer_check_num_gpus()
-        self.init_model_parallel_groups()
-        self.set_main_rpc_process()
-
-        self._check_sequential_model_exists(model)
-
-        # check if user given balance is valid
-        if self.balance is not None:
-            self._assert_valid_model_balance()
-
-        if self.main_rpc_process:
-            if self.balance is None:
-                self._infer_model_balance()
-            self.init_pipe_module()
-        else:
-            self.handle_transferred_pipe_module()
-            self.exit_rpc_process()
-
-    def _infer_model_balance(self):
-        log.info(f'Inferring model balance using {self.balance_mode} mode')
-        model = self.lightning_module
-        if model.example_input_array is None:
-            raise MisconfigurationException(
-                'Please set example_input_array to your model, so we can infer the right model balance for you'
-            )
-        balance_func = getattr(pipe_balance, self.balance_mode)
-        self.balance = balance_func(self.gpus_per_model, model.sequential_module, model.example_input_array)
-        self._sync_balance_to_all_parallel_groups()
-
-        log.info(f'The following model balance {self.balance.tolist()} was inferred using {self.balance_mode} mode')
-
-    def _sync_balance_to_all_parallel_groups(self, main_rank=0):
-        """
-        Ensures that we sync the balance to all main processes, so that the balance is the same per replica.
-        Args:
-            main_rank: The rank with the balance we'd like to replicate.
-        """
-        self.balance = torch.tensor(self.balance, dtype=torch.int, device='cuda')
-        # Ensure we sync to all processes within the main data parallel group
-        # We use the data parallel group as all main processes are found within the same group
-        torch_distrib.broadcast(self.balance, src=main_rank, group=mpu.get_data_parallel_group())
-        self.balance = self.balance.cpu()
-
-    def _check_sequential_model_exists(self, model):
-        if not hasattr(model, "sequential_module") or not isinstance(model.sequential_module, nn.Sequential):
-            raise MisconfigurationException(
-                'Could not find a PipeLightningModule within the model. '
-                'Did you set your sequential model as the `sequential_module` attribute of your model?'
-            )
-
-    def _find_and_init_pipe_module(self, model):
-        if hasattr(model, "sequential_module") and isinstance(model.sequential_module, LightningPipeModule):
-            # model has been wrapped already
-            return
-        elif hasattr(model, "sequential_module") and isinstance(model.sequential_module, nn.Sequential):
-            # try to wrap model for the user
-            model.sequential_module = LightningPipeModule(
-                model.sequential_module,
-                balance=self.balance,
-                microbatches=self.microbatches,
-                checkpoint=self.checkpoint,
-            )
-            # Update references for workers to access correct lightning functions when calling RPC
-            model.sequential_module.trainer = model.trainer
-            model.sequential_module.configure_optimizers = model.configure_optimizers
-
-            # Update references for main process to access correct lightning functions when calling RPC
-            model.sequential_module.module.model.trainer = model.trainer
-            model.sequential_module.module.model.configure_optimizers = model.configure_optimizers
-
-            self.model = model
-
-        else:
-            raise MisconfigurationException(
-                'Could not find a PipeLightningModule within the model. '
-                'Did you defined set your sequential model as a `sequential_module` attribute of your model?'
-            )
-
-    def _assert_valid_model_balance(self):
-        model = self.lightning_module
-        if sum(self.balance) != len(model.sequential_module):
-            raise MisconfigurationException(
-                f'The provided balance sum: {sum(self.balance)} does not'
-                f' match your Sequential length: {len(model.sequential_module)}'
-            )
-
-    def _skip_init_connections(self):
-        """
-        Skip initialization if torch is already initialized and we're in testing.
-        Returns: Whether to skip initialization
-
-        """
-        return torch_distrib.is_initialized() and self.lightning_module.trainer.state.fn != TrainerFn.FITTING
-
-    def init_model_parallel_groups(self):
-        num_model_parallel = 1  # TODO currently no support for vertical model parallel
-        mpu.initialize_model_parallel(model_parallel_size_=num_model_parallel, pipeline_length=self.gpus_per_model)
-
-    def _infer_check_num_gpus(self):
-        """
-        Infer the number of GPUs per model.
-
-        Returns: The appropriate balance for the model
-        """
-        if isinstance(self.balance, list):
-            if len(self.balance) != (self.world_size / self.num_nodes):
-                raise MisconfigurationException(
-                    "Pipe currently only supports splitting the module onto all available GPUs"
-                )
-            # User has defined a balance for his model
-            return len(self.balance)
-        # Assume that the user wants to balance his model on all GPUs
-        return self.world_size
-
-    def handle_transferred_pipe_module(self) -> None:
-        if self.lightning_module.trainer.state.fn == TrainerFn.FITTING:
-            torch_distrib.barrier()  # Ensure we await main process initialization
-            # Add trainer/configure_optimizers to the pipe model for access in all worker processes
-            rpc_pipe.PipeModel.trainer = self.lightning_module.trainer
-            del rpc_pipe.PipeModel.trainer.model.sequential_module
-            rpc_pipe.PipeModel.trainer.model.sequential_module = rpc_pipe.PipeModel
-            rpc_pipe.PipeModel.configure_optimizers = self.lightning_module.configure_optimizers
-
-    def init_pipe_module(self) -> None:
-        # Create pipe_module
-        model = self.lightning_module
-        self._find_and_init_pipe_module(model)
-        if self.lightning_module.trainer.state.fn == TrainerFn.FITTING:
-            torch_distrib.barrier()  # Ensure we join main process initialization
-            model.sequential_module.foreach_worker(register_optimizers, include_self=True)
-
-            # TODO: Move this to the connector
-
-    def pre_backward(self, closure_loss: torch.Tensor, should_accumulate: bool, optimizer: Optimizer, opt_idx: int):
-        """Run before precision plugin executes backward"""
-
-    def configure_ddp(self):
-        if self.main_rpc_process:
-            self.pre_configure_ddp()
-
-            self._model = DistributedDataParallel(
-                LightningDistributedModule(self.model),
-                device_ids=self.determine_ddp_device_ids(),
-                process_group=mpu.get_data_parallel_group(),
-                **self._ddp_kwargs,
-            )
-            # Plugin handle backwards across processes. Currently not supported for DDP + pipe parallel
-            self._model.require_backward_grad_sync = False
-
-    @rank_zero_only
-    def rpc_save_model(self, trainer, save_model_fn: Callable, filepath: str) -> None:
-        model = self.lightning_module
-        if not hasattr(model.sequential_module, "foreach_worker"):
-            return
-        current_layers = model.sequential_module
-        model.sequential_module.foreach_worker(
-            save_layers_on_all_rank_zero_workers, {"gpus_per_model": self.gpus_per_model}, include_self=True
-        )
-        model.sequential_module = load_sequential_from_saved_layers(self.gpus_per_model)
-        save_model_fn(trainer, filepath)
-        model.sequential_module = current_layers
-
-    def worker_optimizer_step(self, model: LightningModule, opt_idx: int, *args, **kwargs) -> None:
-        model.sequential_module.foreach_worker(
-            run_optimizer, {
-                "opt_idx": opt_idx,
-                "args": args,
-                "kwargs": kwargs
-            }, include_self=False
-        )
-
-    @property
-    def distributed_sampler_kwargs(self):
-        return dict(
-            num_replicas=mpu.get_data_parallel_world_size(),
-            rank=mpu.get_data_parallel_rank(),
-        )
-
-    @property
-    def data_parallel_group(self):
-        return mpu.get_data_parallel_group()
-
-    def set_main_rpc_process(self):
-        self.main_rpc_process = torch_distrib.get_rank(group=mpu.get_pipeline_parallel_group()) == 0
-
-    @property
-    def main_rpc_process(self) -> bool:
-        return self._main_rpc_process
-
-    @main_rpc_process.setter
-    def main_rpc_process(self, is_main_process):
-        self._main_rpc_process = is_main_process
-
-    def barrier(self, name: Optional[str] = None) -> None:
-        if torch_distrib.is_initialized() and self.main_rpc_process:
-            torch_distrib.barrier(group=self.data_parallel_group)
-
-    def _check_pipe_available(self):
-        if not _FAIRSCALE_PIPE_AVAILABLE:
-            raise MisconfigurationException(
-                'PipeRPCPlugin requires FairScale and currently is only supported on PyTorch 1.6.'
-            )
-
-    def post_optimizer_step(self, optimizer: Optimizer, optimizer_idx: int, **kwargs) -> None:
-        """Hook to do something after each optimizer step."""
-        if self.rpc_enabled and self.main_rpc_process:
-            # Initialize optimizer step on main process
-            self.worker_optimizer_step(model=self.lightning_module, opt_idx=optimizer_idx, **kwargs)
-
-    def post_training_step(self):
-        if self.main_rpc_process:
-            super().post_training_step()
-
-    def start_training(self, trainer) -> None:
-        if self.main_rpc_process:
-            super().start_training(trainer)
-
-    def start_evaluating(self, trainer) -> None:
-        if self.main_rpc_process:
-            super().start_evaluating(trainer)
-
-
-class LightningPipeModule(nn.Module):
-    """
-    This class wraps Fairscale Pipe and PipeRCPWrapper class.
-    """
-
-    def __init__(self, module: nn.Sequential, balance: List[int], microbatches: int = 8, checkpoint='never'):
-        super().__init__()
-        self.module = module
-        self.balance = balance
-        self.microbatches = microbatches
-        self.checkpoint = checkpoint
-        self._init_pipe()
-
-    def _init_pipe(self):
-        device = torch.device("cuda", torch_distrib.get_rank())
-
-        self.module = PipeRPCWrapper(
-            module=self.module,
-            balance=self.balance,
-            chunks=self.microbatches,
-            style=PipelineStyle.MultiProcess,
-            input_device=device,
-            worker_map=self.get_worker_map(),
-            checkpoint=self.checkpoint,
-        )
-
-    def foreach_worker(self, *args, **kwargs):
-        self.module.foreach_worker(*args, **kwargs)
-
-    def forward(self, *args, **kwargs):
-        return self.module(*args, **kwargs)
-
-    def get_worker_map(self):
-        # TODO, is this correct with multinodes? We also assume "worker" is the same as defined in the RPCPlugin
-        return {rank: f"worker{rank}" for rank in range(torch_distrib.get_world_size())}
-
-
-def register_optimizers(ctx, model):
-    optimizers, lr_schedulers, optimizer_frequencies = model.trainer.init_optimizers(model)
-    model.trainer.optimizers = optimizers
-    model.trainer.lr_schedulers = lr_schedulers
-    model.trainer.optimizer_frequencies = optimizer_frequencies
-
-
-def run_optimizer(ctx, model):
-    trainer = model.trainer
-    opt_idx = ctx["opt_idx"]
-    optimizer = trainer.optimizers[opt_idx]
-    optimizer.step(*ctx["args"], **ctx["kwargs"])
-
-
-def save_layers_on_all_rank_zero_workers(ctx, model):
-    gpus_per_model = ctx["gpus_per_model"]
-    rank = torch_distrib.get_rank()
-    if rank in range(gpus_per_model):
-        seq = list(model.children())[0]
-        torch.save(seq, f"seq_{rank}.pt")
-
-
-def load_sequential_from_saved_layers(gpus_per_model):
-    partial_seqs = [torch.load(f"seq_{rank}.pt", map_location='cpu') for rank in range(gpus_per_model)]
-    seq = nn.Sequential()
-    for p_seq in partial_seqs:
-        for name, child in p_seq.named_children():
-            seq.add_module(name, child)
-    # delete tmp files
-    [os.remove(f"seq_{rank}.pt") for rank in range(gpus_per_model)]
-    return seq
diff --git a/pytorch_lightning/plugins/training_type/sharded.py b/pytorch_lightning/plugins/training_type/sharded.py
index 02da937286dcc..7e5796d5b5668 100644
--- a/pytorch_lightning/plugins/training_type/sharded.py
+++ b/pytorch_lightning/plugins/training_type/sharded.py
@@ -16,7 +16,7 @@
 import torch
 from torch.optim import Optimizer
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.core.optimizer import is_lightning_optimizer
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.trainer.states import TrainerFn
@@ -54,7 +54,8 @@ def _reinit_optimizers_with_oss(self):
                 optim_class = type(optimizer)
                 zero_optimizer = OSS(params=optimizer.param_groups, optim=optim_class, **optimizer.defaults)
                 if _FAIRSCALE_OSS_FP16_BROADCAST_AVAILABLE:
-                    is_fp16 = self.lightning_module.trainer.precision == 16
+                    precision = self.lightning_module.trainer.precision
+                    is_fp16 = precision in ("mixed", 16)
                     # For multi-node training, compressing the model shards in fp16 before broadcasting
                     # improves performance. When using PyTorch AMP, it will not degrade
                     # the model performance.
@@ -85,7 +86,7 @@ def _optim_state_dict(self, optimizer):
         return optimizer.state_dict()
 
     @property
-    def lightning_module(self) -> LightningModule:
+    def lightning_module(self) -> 'pl.LightningModule':
         if not _FAIRSCALE_AVAILABLE:  # pragma: no cover
             raise MisconfigurationException(
                 "`DDPShardedPlugin` requires `fairscale` to be installed."
diff --git a/pytorch_lightning/plugins/training_type/sharded_spawn.py b/pytorch_lightning/plugins/training_type/sharded_spawn.py
index 5daf4e5be3735..c583ac756cd0f 100644
--- a/pytorch_lightning/plugins/training_type/sharded_spawn.py
+++ b/pytorch_lightning/plugins/training_type/sharded_spawn.py
@@ -16,7 +16,7 @@
 import torch
 from torch.optim import Optimizer
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.plugins.precision.sharded_native_amp import ShardedNativeMixedPrecisionPlugin
 from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin
 from pytorch_lightning.trainer.states import TrainerFn
@@ -71,7 +71,7 @@ def _optim_state_dict(self, optimizer):
         return optimizer.state_dict()
 
     @property
-    def lightning_module(self) -> LightningModule:
+    def lightning_module(self) -> 'pl.LightningModule':
         if not _FAIRSCALE_AVAILABLE:  # pragma: no cover
             raise MisconfigurationException(
                 "`DDPSpawnShardedPlugin` requires `fairscale` to be installed."
diff --git a/pytorch_lightning/plugins/training_type/single_device.py b/pytorch_lightning/plugins/training_type/single_device.py
index 1816f5838c948..d4a328902eba0 100644
--- a/pytorch_lightning/plugins/training_type/single_device.py
+++ b/pytorch_lightning/plugins/training_type/single_device.py
@@ -85,5 +85,4 @@ def teardown(self) -> None:
             # GPU teardown
             self.lightning_module.cpu()
             # clean up memory
-            with torch.cuda.device(self.root_device):
-                torch.cuda.empty_cache()
+            torch.cuda.empty_cache()
diff --git a/pytorch_lightning/plugins/training_type/single_tpu.py b/pytorch_lightning/plugins/training_type/single_tpu.py
index 99abff992ebeb..afc692951ce80 100644
--- a/pytorch_lightning/plugins/training_type/single_tpu.py
+++ b/pytorch_lightning/plugins/training_type/single_tpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-
-import torch
+from typing import Any, Dict
 
 from pytorch_lightning.core.decorators import parameter_validation
 from pytorch_lightning.plugins.training_type.single_device import SingleDevicePlugin
-from pytorch_lightning.utilities import _TPU_AVAILABLE
-from pytorch_lightning.utilities.apply_func import move_data_to_device
+from pytorch_lightning.utilities import _OMEGACONF_AVAILABLE, _TPU_AVAILABLE
+from pytorch_lightning.utilities.apply_func import apply_to_collection
 
 if _TPU_AVAILABLE:
     import torch_xla.core.xla_model as xm
 
+if _OMEGACONF_AVAILABLE:
+    from omegaconf import DictConfig, ListConfig, OmegaConf
+
 
 class SingleTPUPlugin(SingleDevicePlugin):
     """ Plugin for training on a single TPU device. """
@@ -54,13 +56,20 @@ def pre_dispatch(self) -> None:
         self.tpu_local_core_rank = xm.get_local_ordinal()
         self.tpu_global_core_rank = xm.get_ordinal()
 
-    def on_save(self, checkpoint: dict) -> dict:
-        """
-        Move XLA tensors to CPU before saving
-        Recommended on XLA Guide:
-        https://github.com/pytorch/xla/blob/master/API_GUIDE.md#saving-and-loading-xla-tensors
+    def save(self, state_dict: Dict, path: str) -> None:
+        xm.save(state_dict, path)
+
+    def save_checkpoint(self, checkpoint: Dict[str, Any], filepath: str) -> None:
+        """Save model/training states as a checkpoint file through state-dump and file-write.
+
+        Args:
+            checkpoint: dict containing model and trainer state
+            filepath: write-target file's path
         """
-        return move_data_to_device(checkpoint, torch.device("cpu"))
+        # Related Issue: https://github.com/pytorch/xla/issues/2773
+        if _OMEGACONF_AVAILABLE:
+            checkpoint = apply_to_collection(checkpoint, (DictConfig, ListConfig), OmegaConf.to_container)
+        self.save({k: v for k, v in checkpoint.items() if k != "callbacks"}, filepath)
 
     def teardown(self) -> None:
         # TPU teardown
diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py
index 9a27e6230b201..2a30ddce23841 100644
--- a/pytorch_lightning/plugins/training_type/tpu_spawn.py
+++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py
@@ -52,7 +52,7 @@ class TPUSpawnPlugin(DDPSpawnPlugin):
     """ Plugin for training multiple TPU devices using the :func:`torch.multiprocessing.spawn` method. """
 
     def __init__(self, parallel_devices: Optional[List[int]] = None, debug: bool = False, **_: Any) -> None:
-        super().__init__(parallel_devices, num_nodes=1, cluster_environment=None, sync_batchnorm=False)
+        super().__init__(parallel_devices)
         self.debug = debug
         self.tpu_local_core_rank = 0
         self.tpu_global_core_rank = 0
@@ -185,6 +185,9 @@ def transfer_distrib_spawn_state_on_fit_end(self, results):
         checkpoint_callback = self.lightning_module.trainer.checkpoint_callback
         best_model_path = checkpoint_callback.best_model_path if checkpoint_callback else None
 
+        # requires to compute the state_dict on all processes in case Metrics are present
+        state_dict = self.lightning_module.state_dict()
+
         if self.mp_queue is not None:
             rank_zero_warn("cleaning up tpu spawn environment...")
 
@@ -195,13 +198,14 @@ def transfer_distrib_spawn_state_on_fit_end(self, results):
                 and len(best_model_path) > 0
             ):
                 last_path = re.sub(".ckpt", ".tmp_end.ckpt", best_model_path)
-                self.save(self.lightning_module.state_dict(), last_path)
+                self.save(state_dict, last_path)
 
             if self.local_rank == 0:
                 # todo, pass complete checkpoint as state dictionary
                 self.mp_queue.put(best_model_path)
                 self.mp_queue.put(last_path)
                 self.mp_queue.put(results)
+                self.lightning_module.add_to_queue(self.mp_queue)  # adds the `callback_metrics` to the queue
 
     def save(self, state_dict: Dict, path: str) -> None:
         xm.save(state_dict, path)
@@ -312,3 +316,7 @@ def teardown(self) -> None:
     @property
     def should_rank_save_checkpoint(self) -> bool:
         return self.local_rank == 0
+
+    @classmethod
+    def register_plugins(cls, plugin_registry: Dict) -> None:
+        plugin_registry.register("tpu_spawn_debug", cls, description="TPUSpawn Plugin with `debug` as True", debug=True)
diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py
index 8d27fd4ac6a2f..e7ca73bc9f40d 100644
--- a/pytorch_lightning/plugins/training_type/training_type_plugin.py
+++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py
@@ -13,7 +13,8 @@
 # limitations under the License.
 import contextlib
 from abc import ABC, abstractmethod
-from typing import Any, Callable, Dict, Generator, Iterable, Optional, Tuple, TypeVar, Union
+from pathlib import Path
+from typing import Any, Callable, Dict, Generator, Iterable, Mapping, Optional, TypeVar, Union
 
 import torch
 from torch import Tensor
@@ -144,9 +145,16 @@ def results(self) -> Optional[Union[_EVALUATE_OUTPUT, _PREDICT_OUTPUT]]:
         """
         return self._results
 
-    @property
-    def rpc_enabled(self) -> bool:
-        return False
+    def load_checkpoint_file(self, checkpoint_path: Union[str, Path]) -> Dict[str, Any]:
+        return pl_load(checkpoint_path, map_location=(lambda storage, loc: storage))
+
+    def load_model_state_dict(self, checkpoint: Mapping[str, Any]) -> None:
+        self.lightning_module.load_state_dict(checkpoint["state_dict"])
+
+    def load_optimizer_state_dict(self, checkpoint: Mapping[str, Any]) -> None:
+        optimizer_states = checkpoint["optimizer_states"]
+        for optimizer, opt_state in zip(self.lightning_module.trainer.accelerator.optimizers, optimizer_states):
+            optimizer.load_state_dict(opt_state)
 
     def start_training(self, trainer: 'pl.Trainer') -> None:
         # double dispatch to initiate the training loop
@@ -161,19 +169,19 @@ def start_predicting(self, trainer: 'pl.Trainer') -> None:
         self._results = trainer.run_stage()
 
     def training_step(self, *args, **kwargs):
-        return self.lightning_module.training_step(*args, **kwargs)
+        return self.model.training_step(*args, **kwargs)
 
     def post_training_step(self):
         pass
 
     def validation_step(self, *args, **kwargs):
-        return self.lightning_module.validation_step(*args, **kwargs)
+        return self.model.validation_step(*args, **kwargs)
 
     def test_step(self, *args, **kwargs):
-        return self.lightning_module.test_step(*args, **kwargs)
+        return self.model.test_step(*args, **kwargs)
 
     def predict_step(self, *args, **kwargs):
-        return self.lightning_module.predict_step(*args, **kwargs)
+        return self.model.predict_step(*args, **kwargs)
 
     def training_step_end(self, output):
         return output
@@ -195,6 +203,22 @@ def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[I
         """
         return dataloader
 
+    def on_reset_train_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        """Called before resetting the train dataloader."""
+        return dataloader
+
+    def on_reset_val_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        """Called before resetting the val dataloader."""
+        return dataloader
+
+    def on_reset_test_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        """Called before resetting the test dataloader."""
+        return dataloader
+
+    def on_reset_predict_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        """Called before resetting the predict dataloader."""
+        return dataloader
+
     def init_optimizers(self, trainer: 'pl.Trainer', model: 'pl.LightningModule'):
         return trainer.init_optimizers(model)
 
@@ -211,33 +235,6 @@ def setup_optimizers_in_pre_dispatch(self) -> bool:
         """
         return False
 
-    def restore_model_state_from_ckpt_path(
-        self,
-        ckpt_path: str,
-        map_location: Callable = lambda storage, loc: storage,
-    ) -> Tuple[Dict, bool]:
-        """
-        This function is used to load and restore the model state.
-
-        Args:
-            ckpt_path: Path to a checkpoint
-            map_location: lambda function to map checkpoint location
-
-        Return
-            checkpoint: Return loaded checkpoint
-            bool: Wether to load optimizer / lr_schedulers states from checkpoint
-
-        """
-        ckpt = pl_load(ckpt_path, map_location=map_location)
-        # restore datamodule states
-        if self.lightning_module.trainer.datamodule is not None:
-            self.lightning_module.trainer.datamodule.on_load_checkpoint(ckpt)
-
-        # hook: give user access to checkpoint if needed.
-        self.lightning_module.on_load_checkpoint(ckpt)
-        self.lightning_module.load_state_dict(ckpt['state_dict'])
-        return ckpt, True
-
     def update_global_step(self, total_batch_idx: int, current_global_step: int) -> int:
         """
         Provide a hook to count optimizer step calls.
@@ -314,3 +311,41 @@ def register_plugins(cls, plugin_registry):
     def should_rank_save_checkpoint(self) -> bool:
         """Returns whether the checkpoint should be saved (rank based)"""
         return self.is_global_zero
+
+    def on_train_start(self) -> None:
+        """Called when train begins."""
+        pass
+
+    def on_validation_start(self) -> None:
+        """Called when validation begins."""
+        pass
+
+    def on_test_start(self) -> None:
+        """Called when test begins."""
+        pass
+
+    def on_predict_start(self) -> None:
+        """Called when predict begins."""
+        pass
+
+    def on_train_end(self) -> None:
+        """Called when train ends."""
+        pass
+
+    def on_validation_end(self) -> None:
+        """Called when validation ends."""
+        pass
+
+    def on_test_end(self) -> None:
+        """Called when test end."""
+        pass
+
+    def on_predict_end(self):
+        """Called when predict ends."""
+        pass
+
+    def on_train_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """
+        Called in the training loop before anything happens for that batch.
+        """
+        pass
diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py
index 6ac6e16c18529..a21b3f173c26e 100644
--- a/pytorch_lightning/profiler/__init__.py
+++ b/pytorch_lightning/profiler/__init__.py
@@ -194,14 +194,18 @@ def custom_processing_step(self, data):
     python -c 'import torch; print(torch.autograd.profiler.load_nvprof("trace_name.prof"))'
 
 """
-
-from pytorch_lightning.profiler.profilers import AdvancedProfiler, BaseProfiler, PassThroughProfiler, SimpleProfiler
+from pytorch_lightning.profiler.advanced import AdvancedProfiler
+from pytorch_lightning.profiler.base import AbstractProfiler, BaseProfiler, PassThroughProfiler
 from pytorch_lightning.profiler.pytorch import PyTorchProfiler
+from pytorch_lightning.profiler.simple import SimpleProfiler
+from pytorch_lightning.profiler.xla import XLAProfiler
 
 __all__ = [
+    'AbstractProfiler',
     'BaseProfiler',
-    'SimpleProfiler',
     'AdvancedProfiler',
     'PassThroughProfiler',
-    "PyTorchProfiler",
+    'PyTorchProfiler',
+    'SimpleProfiler',
+    'XLAProfiler',
 ]
diff --git a/pytorch_lightning/profiler/advanced.py b/pytorch_lightning/profiler/advanced.py
new file mode 100644
index 0000000000000..3a017d72ff5e0
--- /dev/null
+++ b/pytorch_lightning/profiler/advanced.py
@@ -0,0 +1,92 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Profiler to check if there are any bottlenecks in your code."""
+import cProfile
+import io
+import logging
+import pstats
+from pathlib import Path
+from typing import Dict, Optional, Union
+
+from pytorch_lightning.profiler.base import BaseProfiler
+
+log = logging.getLogger(__name__)
+
+
+class AdvancedProfiler(BaseProfiler):
+    """
+    This profiler uses Python's cProfiler to record more detailed information about
+    time spent in each function call recorded during a given action. The output is quite
+    verbose and you should only use this if you want very detailed reports.
+    """
+
+    def __init__(
+        self,
+        dirpath: Optional[Union[str, Path]] = None,
+        filename: Optional[str] = None,
+        line_count_restriction: float = 1.0,
+        output_filename: Optional[str] = None,
+    ) -> None:
+        """
+        Args:
+            dirpath: Directory path for the ``filename``. If ``dirpath`` is ``None`` but ``filename`` is present, the
+                ``trainer.log_dir`` (from :class:`~pytorch_lightning.loggers.tensorboard.TensorBoardLogger`)
+                will be used.
+
+            filename: If present, filename where the profiler results will be saved instead of printing to stdout.
+                The ``.txt`` extension will be used automatically.
+
+            line_count_restriction: this can be used to limit the number of functions
+                reported for each action. either an integer (to select a count of lines),
+                or a decimal fraction between 0.0 and 1.0 inclusive (to select a percentage of lines)
+
+        Raises:
+            ValueError:
+                If you attempt to stop recording an action which was never started.
+        """
+        super().__init__(dirpath=dirpath, filename=filename, output_filename=output_filename)
+        self.profiled_actions: Dict[str, cProfile.Profile] = {}
+        self.line_count_restriction = line_count_restriction
+
+    def start(self, action_name: str) -> None:
+        if action_name not in self.profiled_actions:
+            self.profiled_actions[action_name] = cProfile.Profile()
+        self.profiled_actions[action_name].enable()
+
+    def stop(self, action_name: str) -> None:
+        pr = self.profiled_actions.get(action_name)
+        if pr is None:
+            raise ValueError(f"Attempting to stop recording an action ({action_name}) which was never started.")
+        pr.disable()
+
+    def summary(self) -> str:
+        recorded_stats = {}
+        for action_name, pr in self.profiled_actions.items():
+            s = io.StringIO()
+            ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats('cumulative')
+            ps.print_stats(self.line_count_restriction)
+            recorded_stats[action_name] = s.getvalue()
+        return self._stats_to_str(recorded_stats)
+
+    def teardown(self, stage: Optional[str] = None) -> None:
+        super().teardown(stage=stage)
+        self.profiled_actions = {}
+
+    def __reduce__(self):
+        # avoids `TypeError: cannot pickle 'cProfile.Profile' object`
+        return (
+            self.__class__,
+            tuple(),
+            dict(dirpath=self.dirpath, filename=self.filename, line_count_restriction=self.line_count_restriction),
+        )
diff --git a/pytorch_lightning/profiler/base.py b/pytorch_lightning/profiler/base.py
new file mode 100644
index 0000000000000..8b5bf5483d976
--- /dev/null
+++ b/pytorch_lightning/profiler/base.py
@@ -0,0 +1,219 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Profiler to check if there are any bottlenecks in your code."""
+import logging
+import os
+from abc import ABC, abstractmethod
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional, TextIO, Union
+
+from pytorch_lightning.utilities import rank_zero_deprecation
+from pytorch_lightning.utilities.cloud_io import get_filesystem
+
+log = logging.getLogger(__name__)
+
+
+class AbstractProfiler(ABC):
+    """Specification of a profiler."""
+
+    @abstractmethod
+    def start(self, action_name: str) -> None:
+        """Defines how to start recording an action."""
+
+    @abstractmethod
+    def stop(self, action_name: str) -> None:
+        """Defines how to record the duration once an action is complete."""
+
+    @abstractmethod
+    def summary(self) -> str:
+        """Create profiler summary in text format."""
+
+    @abstractmethod
+    def setup(self, **kwargs: Any) -> None:
+        """Execute arbitrary pre-profiling set-up steps as defined by subclass."""
+
+    @abstractmethod
+    def teardown(self, **kwargs: Any) -> None:
+        """Execute arbitrary post-profiling tear-down steps as defined by subclass."""
+
+
+class BaseProfiler(AbstractProfiler):
+    """
+    If you wish to write a custom profiler, you should inherit from this class.
+    """
+
+    def __init__(
+        self,
+        dirpath: Optional[Union[str, Path]] = None,
+        filename: Optional[str] = None,
+        output_filename: Optional[str] = None,
+    ) -> None:
+        self.dirpath = dirpath
+        self.filename = filename
+        if output_filename is not None:
+            rank_zero_deprecation(
+                "`Profiler` signature has changed in v1.3. The `output_filename` parameter has been removed in"
+                " favor of `dirpath` and `filename`. Support for the old signature will be removed in v1.5",
+            )
+            filepath = Path(output_filename)
+            self.dirpath = filepath.parent
+            self.filename = filepath.stem
+
+        self._output_file: Optional[TextIO] = None
+        self._write_stream: Optional[Callable] = None
+        self._local_rank: Optional[int] = None
+        self._log_dir: Optional[str] = None
+        self._stage: Optional[str] = None
+
+    @contextmanager
+    def profile(self, action_name: str) -> None:
+        """
+        Yields a context manager to encapsulate the scope of a profiled action.
+
+        Example::
+
+            with self.profile('load training data'):
+                # load training data code
+
+        The profiler will start once you've entered the context and will automatically
+        stop once you exit the code block.
+        """
+        try:
+            self.start(action_name)
+            yield action_name
+        finally:
+            self.stop(action_name)
+
+    def profile_iterable(self, iterable, action_name: str) -> None:
+        iterator = iter(iterable)
+        while True:
+            try:
+                self.start(action_name)
+                value = next(iterator)
+                self.stop(action_name)
+                yield value
+            except StopIteration:
+                self.stop(action_name)
+                break
+
+    def _rank_zero_info(self, *args, **kwargs) -> None:
+        if self._local_rank in (None, 0):
+            log.info(*args, **kwargs)
+
+    def _prepare_filename(
+        self, action_name: Optional[str] = None, extension: str = ".txt", split_token: str = "-"
+    ) -> str:
+        args = []
+        if self._stage is not None:
+            args.append(self._stage)
+        if self.filename:
+            args.append(self.filename)
+        if self._local_rank is not None:
+            args.append(str(self._local_rank))
+        if action_name is not None:
+            args.append(action_name)
+        filename = split_token.join(args) + extension
+        return filename
+
+    def _prepare_streams(self) -> None:
+        if self._write_stream is not None:
+            return
+        if self.filename:
+            filepath = os.path.join(self.dirpath, self._prepare_filename())
+            fs = get_filesystem(filepath)
+            file = fs.open(filepath, "a")
+            self._output_file = file
+            self._write_stream = file.write
+        else:
+            self._write_stream = self._rank_zero_info
+
+    def describe(self) -> None:
+        """Logs a profile report after the conclusion of run."""
+        # there are pickling issues with open file handles in Python 3.6
+        # so to avoid them, we open and close the files within this function
+        # by calling `_prepare_streams` and `teardown`
+        self._prepare_streams()
+        summary = self.summary()
+        if summary:
+            self._write_stream(summary)
+        if self._output_file is not None:
+            self._output_file.flush()
+        self.teardown(stage=self._stage)
+
+    def _stats_to_str(self, stats: Dict[str, str]) -> str:
+        stage = f"{self._stage.upper()} " if self._stage is not None else ""
+        output = [stage + "Profiler Report"]
+        for action, value in stats.items():
+            header = f"Profile stats for: {action}"
+            if self._local_rank is not None:
+                header += f" rank: {self._local_rank}"
+            output.append(header)
+            output.append(value)
+        return os.linesep.join(output)
+
+    def setup(
+        self,
+        stage: Optional[str] = None,
+        local_rank: Optional[int] = None,
+        log_dir: Optional[str] = None,
+    ) -> None:
+        """Execute arbitrary pre-profiling set-up steps."""
+        self._stage = stage
+        self._local_rank = local_rank
+        self._log_dir = log_dir
+        self.dirpath = self.dirpath or log_dir
+
+    def teardown(self, stage: Optional[str] = None) -> None:
+        """
+        Execute arbitrary post-profiling tear-down steps.
+
+        Closes the currently open file and stream.
+        """
+        self._write_stream = None
+        if self._output_file is not None:
+            self._output_file.close()
+            self._output_file = None  # can't pickle TextIOWrapper
+
+    def __del__(self) -> None:
+        self.teardown(stage=self._stage)
+
+    def start(self, action_name: str) -> None:
+        raise NotImplementedError
+
+    def stop(self, action_name: str) -> None:
+        raise NotImplementedError
+
+    def summary(self) -> str:
+        raise NotImplementedError
+
+    @property
+    def local_rank(self) -> int:
+        return 0 if self._local_rank is None else self._local_rank
+
+
+class PassThroughProfiler(BaseProfiler):
+    """
+    This class should be used when you don't want the (small) overhead of profiling.
+    The Trainer uses this class by default.
+    """
+
+    def start(self, action_name: str) -> None:
+        pass
+
+    def stop(self, action_name: str) -> None:
+        pass
+
+    def summary(self) -> str:
+        return ""
diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py
index 78327fa0a91d8..fb29ec5289744 100644
--- a/pytorch_lightning/profiler/profilers.py
+++ b/pytorch_lightning/profiler/profilers.py
@@ -1,387 +1,22 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Profiler to check if there are any bottlenecks in your code."""
-import cProfile
-import io
-import logging
-import os
-import pstats
-import time
-from abc import ABC, abstractmethod
-from collections import defaultdict
-from contextlib import contextmanager
-from pathlib import Path
-from typing import Any, Callable, Dict, Optional, TextIO, Tuple, Union
-
-import numpy as np
-
-from pytorch_lightning.utilities import rank_zero_warn
-from pytorch_lightning.utilities.cloud_io import get_filesystem
-
-log = logging.getLogger(__name__)
-
-
-class AbstractProfiler(ABC):
-    """Specification of a profiler."""
-
-    @abstractmethod
-    def start(self, action_name: str) -> None:
-        """Defines how to start recording an action."""
-
-    @abstractmethod
-    def stop(self, action_name: str) -> None:
-        """Defines how to record the duration once an action is complete."""
-
-    @abstractmethod
-    def summary(self) -> str:
-        """Create profiler summary in text format."""
-
-    @abstractmethod
-    def setup(self, **kwargs: Any) -> None:
-        """Execute arbitrary pre-profiling set-up steps as defined by subclass."""
-
-    @abstractmethod
-    def teardown(self, **kwargs: Any) -> None:
-        """Execute arbitrary post-profiling tear-down steps as defined by subclass."""
-
-
-class BaseProfiler(AbstractProfiler):
-    """
-    If you wish to write a custom profiler, you should inherit from this class.
-    """
-
-    def __init__(
-        self,
-        dirpath: Optional[Union[str, Path]] = None,
-        filename: Optional[str] = None,
-        output_filename: Optional[str] = None,
-    ) -> None:
-        self.dirpath = dirpath
-        self.filename = filename
-        if output_filename is not None:
-            rank_zero_warn(
-                "`Profiler` signature has changed in v1.3. The `output_filename` parameter has been removed in"
-                " favor of `dirpath` and `filename`. Support for the old signature will be removed in v1.5",
-                DeprecationWarning
-            )
-            filepath = Path(output_filename)
-            self.dirpath = filepath.parent
-            self.filename = filepath.stem
-
-        self._output_file: Optional[TextIO] = None
-        self._write_stream: Optional[Callable] = None
-        self._local_rank: Optional[int] = None
-        self._log_dir: Optional[str] = None
-        self._stage: Optional[str] = None
-
-    @contextmanager
-    def profile(self, action_name: str) -> None:
-        """
-        Yields a context manager to encapsulate the scope of a profiled action.
-
-        Example::
-
-            with self.profile('load training data'):
-                # load training data code
-
-        The profiler will start once you've entered the context and will automatically
-        stop once you exit the code block.
-        """
-        try:
-            self.start(action_name)
-            yield action_name
-        finally:
-            self.stop(action_name)
-
-    def profile_iterable(self, iterable, action_name: str) -> None:
-        iterator = iter(iterable)
-        while True:
-            try:
-                self.start(action_name)
-                value = next(iterator)
-                self.stop(action_name)
-                yield value
-            except StopIteration:
-                self.stop(action_name)
-                break
-
-    def _rank_zero_info(self, *args, **kwargs) -> None:
-        if self._local_rank in (None, 0):
-            log.info(*args, **kwargs)
-
-    def _prepare_filename(self, extension: str = ".txt") -> str:
-        filename = ""
-        if self._stage is not None:
-            filename += f"{self._stage}-"
-        filename += str(self.filename)
-        if self._local_rank is not None:
-            filename += f"-{self._local_rank}"
-        filename += extension
-        return filename
-
-    def _prepare_streams(self) -> None:
-        if self._write_stream is not None:
-            return
-        if self.filename:
-            filepath = os.path.join(self.dirpath, self._prepare_filename())
-            fs = get_filesystem(filepath)
-            file = fs.open(filepath, "a")
-            self._output_file = file
-            self._write_stream = file.write
-        else:
-            self._write_stream = self._rank_zero_info
-
-    def describe(self) -> None:
-        """Logs a profile report after the conclusion of run."""
-        # there are pickling issues with open file handles in Python 3.6
-        # so to avoid them, we open and close the files within this function
-        # by calling `_prepare_streams` and `teardown`
-        self._prepare_streams()
-        summary = self.summary()
-        if summary:
-            self._write_stream(summary)
-        if self._output_file is not None:
-            self._output_file.flush()
-        self.teardown(stage=self._stage)
-
-    def _stats_to_str(self, stats: Dict[str, str]) -> str:
-        stage = f"{self._stage.upper()} " if self._stage is not None else ""
-        output = [stage + "Profiler Report"]
-        for action, value in stats.items():
-            header = f"Profile stats for: {action}"
-            if self._local_rank is not None:
-                header += f" rank: {self._local_rank}"
-            output.append(header)
-            output.append(value)
-        return os.linesep.join(output)
-
-    def setup(
-        self,
-        stage: Optional[str] = None,
-        local_rank: Optional[int] = None,
-        log_dir: Optional[str] = None,
-    ) -> None:
-        """Execute arbitrary pre-profiling set-up steps."""
-        self._stage = stage
-        self._local_rank = local_rank
-        self._log_dir = log_dir
-        self.dirpath = self.dirpath or log_dir
-
-    def teardown(self, stage: Optional[str] = None) -> None:
-        """
-        Execute arbitrary post-profiling tear-down steps.
-
-        Closes the currently open file and stream.
-        """
-        self._write_stream = None
-        if self._output_file is not None:
-            self._output_file.close()
-            self._output_file = None  # can't pickle TextIOWrapper
-
-    def __del__(self) -> None:
-        self.teardown(stage=self._stage)
-
-    def start(self, action_name: str) -> None:
-        raise NotImplementedError
-
-    def stop(self, action_name: str) -> None:
-        raise NotImplementedError
-
-    def summary(self) -> str:
-        raise NotImplementedError
-
-    @property
-    def local_rank(self) -> int:
-        return 0 if self._local_rank is None else self._local_rank
-
-
-class PassThroughProfiler(BaseProfiler):
-    """
-    This class should be used when you don't want the (small) overhead of profiling.
-    The Trainer uses this class by default.
-    """
-
-    def start(self, action_name: str) -> None:
-        pass
-
-    def stop(self, action_name: str) -> None:
-        pass
-
-    def summary(self) -> str:
-        return ""
-
-
-class SimpleProfiler(BaseProfiler):
-    """
-    This profiler simply records the duration of actions (in seconds) and reports
-    the mean duration of each action and the total time spent over the entire training run.
-    """
-
-    def __init__(
-        self,
-        dirpath: Optional[Union[str, Path]] = None,
-        filename: Optional[str] = None,
-        extended: bool = True,
-        output_filename: Optional[str] = None,
-    ) -> None:
-        """
-        Args:
-            dirpath: Directory path for the ``filename``. If ``dirpath`` is ``None`` but ``filename`` is present, the
-                ``trainer.log_dir`` (from :class:`~pytorch_lightning.loggers.tensorboard.TensorBoardLogger`)
-                will be used.
-
-            filename: If present, filename where the profiler results will be saved instead of printing to stdout.
-                The ``.txt`` extension will be used automatically.
-
-        Raises:
-            ValueError:
-                If you attempt to start an action which has already started, or
-                if you attempt to stop recording an action which was never started.
-        """
-        super().__init__(dirpath=dirpath, filename=filename, output_filename=output_filename)
-        self.current_actions: Dict[str, float] = {}
-        self.recorded_durations = defaultdict(list)
-        self.extended = extended
-        self.start_time = time.monotonic()
-
-    def start(self, action_name: str) -> None:
-        if action_name in self.current_actions:
-            raise ValueError(f"Attempted to start {action_name} which has already started.")
-        self.current_actions[action_name] = time.monotonic()
-
-    def stop(self, action_name: str) -> None:
-        end_time = time.monotonic()
-        if action_name not in self.current_actions:
-            raise ValueError(f"Attempting to stop recording an action ({action_name}) which was never started.")
-        start_time = self.current_actions.pop(action_name)
-        duration = end_time - start_time
-        self.recorded_durations[action_name].append(duration)
-
-    def _make_report(self) -> Tuple[list, float]:
-        total_duration = time.monotonic() - self.start_time
-        report = [[a, d, 100. * np.sum(d) / total_duration] for a, d in self.recorded_durations.items()]
-        report.sort(key=lambda x: x[2], reverse=True)
-        return report, total_duration
-
-    def summary(self) -> str:
-        sep = os.linesep
-        output_string = ""
-        if self._stage is not None:
-            output_string += f"{self._stage.upper()} "
-        output_string += f"Profiler Report{sep}"
-
-        if self.extended:
-
-            if len(self.recorded_durations) > 0:
-                max_key = np.max([len(k) for k in self.recorded_durations.keys()])
-
-                def log_row(action, mean, num_calls, total, per):
-                    row = f"{sep}{action:<{max_key}s}\t|  {mean:<15}\t|"
-                    row += f"{num_calls:<15}\t|  {total:<15}\t|  {per:<15}\t|"
-                    return row
-
-                output_string += log_row("Action", "Mean duration (s)", "Num calls", "Total time (s)", "Percentage %")
-                output_string_len = len(output_string)
-                output_string += f"{sep}{'-' * output_string_len}"
-                report, total_duration = self._make_report()
-                output_string += log_row("Total", "-", "_", f"{total_duration:.5}", "100 %")
-                output_string += f"{sep}{'-' * output_string_len}"
-                for action, durations, duration_per in report:
-                    output_string += log_row(
-                        action,
-                        f"{np.mean(durations):.5}",
-                        f"{len(durations):}",
-                        f"{np.sum(durations):.5}",
-                        f"{duration_per:.5}",
-                    )
-        else:
-
-            def log_row(action, mean, total):
-                return f"{sep}{action:<20s}\t|  {mean:<15}\t|  {total:<15}"
-
-            output_string += log_row("Action", "Mean duration (s)", "Total time (s)")
-            output_string += f"{sep}{'-' * 65}"
-
-            for action, durations in self.recorded_durations.items():
-                output_string += log_row(action, f"{np.mean(durations):.5}", f"{np.sum(durations):.5}")
-        output_string += sep
-        return output_string
-
-
-class AdvancedProfiler(BaseProfiler):
-    """
-    This profiler uses Python's cProfiler to record more detailed information about
-    time spent in each function call recorded during a given action. The output is quite
-    verbose and you should only use this if you want very detailed reports.
-    """
-
-    def __init__(
-        self,
-        dirpath: Optional[Union[str, Path]] = None,
-        filename: Optional[str] = None,
-        line_count_restriction: float = 1.0,
-        output_filename: Optional[str] = None,
-    ) -> None:
-        """
-        Args:
-            dirpath: Directory path for the ``filename``. If ``dirpath`` is ``None`` but ``filename`` is present, the
-                ``trainer.log_dir`` (from :class:`~pytorch_lightning.loggers.tensorboard.TensorBoardLogger`)
-                will be used.
-
-            filename: If present, filename where the profiler results will be saved instead of printing to stdout.
-                The ``.txt`` extension will be used automatically.
-
-            line_count_restriction: this can be used to limit the number of functions
-                reported for each action. either an integer (to select a count of lines),
-                or a decimal fraction between 0.0 and 1.0 inclusive (to select a percentage of lines)
-
-        Raises:
-            ValueError:
-                If you attempt to stop recording an action which was never started.
-        """
-        super().__init__(dirpath=dirpath, filename=filename, output_filename=output_filename)
-        self.profiled_actions: Dict[str, cProfile.Profile] = {}
-        self.line_count_restriction = line_count_restriction
-
-    def start(self, action_name: str) -> None:
-        if action_name not in self.profiled_actions:
-            self.profiled_actions[action_name] = cProfile.Profile()
-        self.profiled_actions[action_name].enable()
-
-    def stop(self, action_name: str) -> None:
-        pr = self.profiled_actions.get(action_name)
-        if pr is None:
-            raise ValueError(f"Attempting to stop recording an action ({action_name}) which was never started.")
-        pr.disable()
-
-    def summary(self) -> str:
-        recorded_stats = {}
-        for action_name, pr in self.profiled_actions.items():
-            s = io.StringIO()
-            ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats('cumulative')
-            ps.print_stats(self.line_count_restriction)
-            recorded_stats[action_name] = s.getvalue()
-        return self._stats_to_str(recorded_stats)
-
-    def teardown(self, stage: Optional[str] = None) -> None:
-        super().teardown(stage=stage)
-        self.profiled_actions = {}
-
-    def __reduce__(self):
-        # avoids `TypeError: cannot pickle 'cProfile.Profile' object`
-        return (
-            self.__class__,
-            tuple(),
-            dict(dirpath=self.dirpath, filename=self.filename, line_count_restriction=self.line_count_restriction),
-        )
+from pytorch_lightning.utilities import rank_zero_deprecation
+
+rank_zero_deprecation(
+    "Using ``import pytorch_lightning.profiler.profilers`` is deprecated in v1.4, and will be removed in v1.6. "
+    "HINT: Use ``import pytorch_lightning.profiler`` directly."
+)
+
+from pytorch_lightning.profiler.advanced import AdvancedProfiler  # noqa E402
+from pytorch_lightning.profiler.base import AbstractProfiler, BaseProfiler, PassThroughProfiler  # noqa E402
+from pytorch_lightning.profiler.pytorch import PyTorchProfiler  # noqa E402
+from pytorch_lightning.profiler.simple import SimpleProfiler  # noqa E402
+from pytorch_lightning.profiler.xla import XLAProfiler  # noqa E402
+
+__all__ = [
+    'AbstractProfiler',
+    'BaseProfiler',
+    'AdvancedProfiler',
+    'PassThroughProfiler',
+    'PyTorchProfiler',
+    'SimpleProfiler',
+    'XLAProfiler',
+]
diff --git a/pytorch_lightning/profiler/pytorch.py b/pytorch_lightning/profiler/pytorch.py
index fa2c2917f98a2..6e8e21456e915 100644
--- a/pytorch_lightning/profiler/pytorch.py
+++ b/pytorch_lightning/profiler/pytorch.py
@@ -23,8 +23,8 @@
 from torch import nn, Tensor
 from torch.autograd.profiler import record_function
 
-from pytorch_lightning.profiler.profilers import BaseProfiler
-from pytorch_lightning.utilities.distributed import rank_zero_warn
+from pytorch_lightning.profiler.base import BaseProfiler
+from pytorch_lightning.utilities import rank_zero_deprecation, rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _KINETO_AVAILABLE
 
@@ -90,7 +90,7 @@ def __enter__(self) -> None:
 
                 self._handles[module_name] = [pre_forward_handle, post_forward_handle]
 
-    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+    def __exit__(self, type: Any, value: Any, traceback: Any) -> None:
         for handles in self._handles.values():
             for h in handles:
                 h.remove()
@@ -132,14 +132,13 @@ def reset(self):
     def num_step(self) -> int:
         if self._current_action == "training_step_and_backward":
             return self._num_training_step_and_backward
-        elif self._current_action == "validation_step":
+        if self._current_action == "validation_step":
             return self._num_validation_step
-        elif self._current_action == "test_step":
+        if self._current_action == "test_step":
             return self._num_test_step
-        elif self._current_action == "predict_step":
+        if self._current_action == "predict_step":
             return self._num_predict_step
-        else:
-            return 0
+        return 0
 
     def _step(self) -> None:
         if self._current_action == "training_step_and_backward":
@@ -159,11 +158,11 @@ def _step(self) -> None:
     def has_finished(self) -> bool:
         if self._current_action == "training_step_and_backward":
             return self._training_step_and_backward_reached_end
-        elif self._current_action == "validation_step":
+        if self._current_action == "validation_step":
             return self._validation_step_reached_end
-        elif self._current_action == "test_step":
+        if self._current_action == "test_step":
             return self._test_step_reached_end
-        elif self._current_action == "predict_step":
+        if self._current_action == "predict_step":
             return self._predict_step_reached_end
         return False
 
@@ -349,9 +348,9 @@ def __deprecation_check(
             record_functions = set()
 
         if profiled_functions is not None:
-            rank_zero_warn(
+            rank_zero_deprecation(
                 "`PyTorchProfiler.profiled_functions` has been renamed to"
-                " `record_functions` in v1.3 and will be removed in v1.5", DeprecationWarning
+                " `record_functions` in v1.3 and will be removed in v1.5"
             )
             if not record_functions:
                 record_functions |= set(profiled_functions)
@@ -427,11 +426,15 @@ def stop(self, action_name: str) -> None:
             def on_trace_ready(profiler):
                 if self.dirpath is not None:
                     if self._export_to_chrome:
-                        handler = tensorboard_trace_handler(self.dirpath, self._prepare_filename(extension=""))
+                        handler = tensorboard_trace_handler(
+                            self.dirpath, self._prepare_filename(action_name=action_name, extension="")
+                        )
                         handler(profiler)
 
                     if self._export_to_flame_graph:
-                        path = os.path.join(self.dirpath, self._prepare_filename(extension=".stack"))
+                        path = os.path.join(
+                            self.dirpath, self._prepare_filename(action_name=action_name, extension=".stack")
+                        )
                         profiler.export_stacks(path, metric=self._metric)
                 else:
                     rank_zero_warn("The PyTorchProfiler failed to export trace as `dirpath` is None")
diff --git a/pytorch_lightning/profiler/simple.py b/pytorch_lightning/profiler/simple.py
new file mode 100644
index 0000000000000..7fb8ac5be0c92
--- /dev/null
+++ b/pytorch_lightning/profiler/simple.py
@@ -0,0 +1,123 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Profiler to check if there are any bottlenecks in your code."""
+import logging
+import os
+import time
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, Optional, Tuple, Union
+
+import numpy as np
+
+from pytorch_lightning.profiler.base import BaseProfiler
+
+log = logging.getLogger(__name__)
+
+
+class SimpleProfiler(BaseProfiler):
+    """
+    This profiler simply records the duration of actions (in seconds) and reports
+    the mean duration of each action and the total time spent over the entire training run.
+    """
+
+    def __init__(
+        self,
+        dirpath: Optional[Union[str, Path]] = None,
+        filename: Optional[str] = None,
+        extended: bool = True,
+        output_filename: Optional[str] = None,
+    ) -> None:
+        """
+        Args:
+            dirpath: Directory path for the ``filename``. If ``dirpath`` is ``None`` but ``filename`` is present, the
+                ``trainer.log_dir`` (from :class:`~pytorch_lightning.loggers.tensorboard.TensorBoardLogger`)
+                will be used.
+
+            filename: If present, filename where the profiler results will be saved instead of printing to stdout.
+                The ``.txt`` extension will be used automatically.
+
+        Raises:
+            ValueError:
+                If you attempt to start an action which has already started, or
+                if you attempt to stop recording an action which was never started.
+        """
+        super().__init__(dirpath=dirpath, filename=filename, output_filename=output_filename)
+        self.current_actions: Dict[str, float] = {}
+        self.recorded_durations = defaultdict(list)
+        self.extended = extended
+        self.start_time = time.monotonic()
+
+    def start(self, action_name: str) -> None:
+        if action_name in self.current_actions:
+            raise ValueError(f"Attempted to start {action_name} which has already started.")
+        self.current_actions[action_name] = time.monotonic()
+
+    def stop(self, action_name: str) -> None:
+        end_time = time.monotonic()
+        if action_name not in self.current_actions:
+            raise ValueError(f"Attempting to stop recording an action ({action_name}) which was never started.")
+        start_time = self.current_actions.pop(action_name)
+        duration = end_time - start_time
+        self.recorded_durations[action_name].append(duration)
+
+    def _make_report(self) -> Tuple[list, float]:
+        total_duration = time.monotonic() - self.start_time
+        report = [[a, d, 100. * np.sum(d) / total_duration] for a, d in self.recorded_durations.items()]
+        report.sort(key=lambda x: x[2], reverse=True)
+        return report, total_duration
+
+    def summary(self) -> str:
+        sep = os.linesep
+        output_string = ""
+        if self._stage is not None:
+            output_string += f"{self._stage.upper()} "
+        output_string += f"Profiler Report{sep}"
+
+        if self.extended:
+
+            if len(self.recorded_durations) > 0:
+                max_key = np.max([len(k) for k in self.recorded_durations.keys()])
+
+                def log_row(action, mean, num_calls, total, per):
+                    row = f"{sep}{action:<{max_key}s}\t|  {mean:<15}\t|"
+                    row += f"{num_calls:<15}\t|  {total:<15}\t|  {per:<15}\t|"
+                    return row
+
+                output_string += log_row("Action", "Mean duration (s)", "Num calls", "Total time (s)", "Percentage %")
+                output_string_len = len(output_string)
+                output_string += f"{sep}{'-' * output_string_len}"
+                report, total_duration = self._make_report()
+                output_string += log_row("Total", "-", "_", f"{total_duration:.5}", "100 %")
+                output_string += f"{sep}{'-' * output_string_len}"
+                for action, durations, duration_per in report:
+                    output_string += log_row(
+                        action,
+                        f"{np.mean(durations):.5}",
+                        f"{len(durations):}",
+                        f"{np.sum(durations):.5}",
+                        f"{duration_per:.5}",
+                    )
+        else:
+
+            def log_row(action, mean, total):
+                return f"{sep}{action:<20s}\t|  {mean:<15}\t|  {total:<15}"
+
+            output_string += log_row("Action", "Mean duration (s)", "Total time (s)")
+            output_string += f"{sep}{'-' * 65}"
+
+            for action, durations in self.recorded_durations.items():
+                output_string += log_row(action, f"{np.mean(durations):.5}", f"{np.sum(durations):.5}")
+        output_string += sep
+        return output_string
diff --git a/pytorch_lightning/profiler/xla.py b/pytorch_lightning/profiler/xla.py
new file mode 100644
index 0000000000000..35b8e7f264c31
--- /dev/null
+++ b/pytorch_lightning/profiler/xla.py
@@ -0,0 +1,110 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+XLA Profiler will help you debug and optimize training workload performance
+for your models using Cloud TPU performance tools.
+
+Manual capture via TensorBoard
+
+The following instructions are for capturing trace from a running program
+
+0. This [guide](https://cloud.google.com/tpu/docs/pytorch-xla-performance-profiling-tpu-vm#tpu-vm) will
+help you with the Cloud TPU setup with the required installations
+
+1. Start a TensorBoard Server
+
+>> tensorboard --logdir ./tensorboard --port 9001
+
+You could view the TensorBoard output at http://localhost:9001 on your local machine, and then open the
+``PROFILE`` plugn from the top right dropdown or open http://localhost:9001/#profile
+
+2. Once the code you'd like to profile is running, click on ``CAPTURE PROFILE`` button. You could enter
+``localhost:9012`` (default port for XLA Profiler) as the Profile Service URL. Then, you could enter
+the number of milliseconds for the profiling duration, and click ``CAPTURE``
+
+3. Make sure the code is running, while you are trying to capture the traces. Also, it would lead to better
+performance insights if the profiling duration is longer than the step time
+
+4. Once the capture is finished, the page will refresh and you could browse through the insights using the
+``Tools`` dropdown at the top left
+
+"""
+import logging
+from typing import Dict
+
+from pytorch_lightning.profiler.base import BaseProfiler
+from pytorch_lightning.utilities import _TPU_AVAILABLE
+
+if _TPU_AVAILABLE:
+    import torch_xla.debug.profiler as xp
+
+log = logging.getLogger(__name__)
+
+
+class XLAProfiler(BaseProfiler):
+
+    STEP_FUNCTIONS = {
+        "training_step_and_backward",
+        "validation_step",
+        "test_step",
+        "predict_step",
+    }
+    RECORD_FUNCTIONS = {
+        "training_step_and_backward",
+        "training_step",
+        "backward",
+        "validation_step",
+        "test_step",
+        "predict_step",
+    }
+
+    def __init__(self, port: int = 9012) -> None:
+        """
+        This Profiler will help you debug and optimize training workload performance
+        for your models using Cloud TPU performance tools.
+        """
+        super().__init__(dirpath=None, filename=None, output_filename=None)
+        self.port = port
+        self._recording_map: Dict = {}
+        self._step_recoding_map: Dict = {}
+        self._start_trace: bool = False
+
+    def start(self, action_name: str) -> None:
+        if action_name in self.RECORD_FUNCTIONS:
+            if not self._start_trace:
+                self.server = xp.start_server(self.port)
+                self._start_trace = True
+
+            if action_name in self.STEP_FUNCTIONS:
+                step = self._get_step_num(action_name)
+                recording = xp.StepTrace(action_name, step_num=step)
+            else:
+                recording = xp.Trace(action_name)
+            recording.__enter__()
+            self._recording_map[action_name] = recording
+
+    def stop(self, action_name: str) -> None:
+        if action_name in self._recording_map:
+            self._recording_map[action_name].__exit__(None, None, None)
+            del self._recording_map[action_name]
+
+    def _get_step_num(self, action_name: str) -> int:
+        if action_name not in self._step_recoding_map:
+            self._step_recoding_map[action_name] = 1
+        else:
+            self._step_recoding_map[action_name] += 1
+        return self._step_recoding_map[action_name]
+
+    def summary(self) -> str:
+        return ""
diff --git a/pytorch_lightning/trainer/callback_hook.py b/pytorch_lightning/trainer/callback_hook.py
index 23df26b410a03..4f4e44e57d3a3 100644
--- a/pytorch_lightning/trainer/callback_hook.py
+++ b/pytorch_lightning/trainer/callback_hook.py
@@ -17,8 +17,8 @@
 from inspect import signature
 from typing import Any, Callable, Dict, List, Optional, Type
 
+import pytorch_lightning as pl
 from pytorch_lightning.callbacks import Callback
-from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.utilities import rank_zero_deprecation, rank_zero_warn
 from pytorch_lightning.utilities.signature_utils import is_param_in_hook_signature
 from pytorch_lightning.utilities.types import EPOCH_OUTPUT, STEP_OUTPUT
@@ -32,27 +32,27 @@ class TrainerCallbackHookMixin(ABC):
     # this is just a summary on variables used in this abstract class,
     # the proper values/initialisation should be done in child class
     callbacks: List[Callback] = []
-    lightning_module: LightningModule
+    lightning_module: 'pl.LightningModule'
 
-    def on_before_accelerator_backend_setup(self, model: LightningModule) -> None:
+    def on_before_accelerator_backend_setup(self, model: 'pl.LightningModule') -> None:
         """Called at the beginning of fit (train + validate), validate, test, or predict, or tune."""
         for callback in self.callbacks:
             callback.on_before_accelerator_backend_setup(self, model)
 
-    def configure_sharded_model(self, model: LightningModule) -> None:
+    def configure_sharded_model(self, model: 'pl.LightningModule') -> None:
         """Called at the beginning of fit (train + validate), validate, test, or predict, or tune."""
         for callback in self.callbacks:
             callback.on_configure_sharded_model(self, model)
 
-    def setup(self, model: LightningModule, stage: Optional[str]) -> None:
+    def setup(self, model: 'pl.LightningModule', stage: Optional[str]) -> None:
         """Called at the beginning of fit (train + validate), validate, test, or predict, or tune."""
         for callback in self.callbacks:
-            callback.setup(self, model, stage)
+            callback.setup(self, model, stage=stage)
 
     def teardown(self, stage: Optional[str] = None) -> None:
         """Called at the end of fit (train + validate), validate, test, or predict, or tune."""
         for callback in self.callbacks:
-            callback.teardown(self, self.lightning_module, stage)
+            callback.teardown(self, self.lightning_module, stage=stage)
 
     def on_init_start(self):
         """Called when the trainer initialization begins, model has not yet been set."""
@@ -97,10 +97,10 @@ def on_train_epoch_end(self, outputs: EPOCH_OUTPUT):
         """
         for callback in self.callbacks:
             if is_param_in_hook_signature(callback.on_train_epoch_end, "outputs"):
-                warning_cache.warn(
+                warning_cache.deprecation(
                     "The signature of `Callback.on_train_epoch_end` has changed in v1.3."
                     " `outputs` parameter has been removed."
-                    " Support for the old signature will be removed in v1.5", DeprecationWarning
+                    " Support for the old signature will be removed in v1.5"
                 )
                 callback.on_train_epoch_end(self, self.lightning_module, outputs)
             else:
@@ -254,7 +254,7 @@ def on_keyboard_interrupt(self):
     @staticmethod
     def __is_old_signature_on_save_checkpoint(fn: Callable) -> bool:
         parameters = list(signature(fn).parameters)
-        return len(parameters) == 2 and parameters[1] != "args"
+        return len(parameters) == 2 and parameters[0] != "args"
 
     @staticmethod
     def __is_old_signature_on_load_checkpoint(fn: Callable) -> bool:
diff --git a/pytorch_lightning/trainer/configuration_validator.py b/pytorch_lightning/trainer/configuration_validator.py
index e73bee761a241..8caeebb9ed3dd 100644
--- a/pytorch_lightning/trainer/configuration_validator.py
+++ b/pytorch_lightning/trainer/configuration_validator.py
@@ -34,6 +34,7 @@ def verify_loop_configurations(self, model: 'pl.LightningModule') -> None:
         if self.trainer.state.fn in (TrainerFn.FITTING, TrainerFn.TUNING):
             self.__verify_train_loop_configuration(model)
             self.__verify_eval_loop_configuration(model, 'val')
+            self.__verify_manual_optimization_support(model)
         elif self.trainer.state.fn == TrainerFn.VALIDATING:
             self.__verify_eval_loop_configuration(model, 'val')
         elif self.trainer.state.fn == TrainerFn.TESTING:
@@ -82,10 +83,10 @@ def __verify_train_loop_configuration(self, model: 'pl.LightningModule') -> None
 
         has_overriden_optimization_functions = trainer.overriden_optimizer_step or trainer.overriden_optimizer_zero_grad
         if has_overriden_optimization_functions and going_to_accumulate_grad_batches and automatic_optimization:
-            raise MisconfigurationException(
-                'When overriding `LightningModule` optimizer_step or optimizer_zero_grad,'
-                ' `accumulate_grad_batches` in `Trainer` should be 1.'
-                ' It ensures optimizer_step or optimizer_zero_grad are called on every batch.'
+            rank_zero_warn(
+                'When using `Trainer(accumulate_grad_batches != 1)` and overriding'
+                '`LightningModule.optimizer_{step,zero_grad}`, the hooks will not be called on every batch'
+                '(rather, they are called on every optimization step).'
             )
 
     def __verify_eval_loop_configuration(self, model: 'pl.LightningModule', stage: str) -> None:
@@ -112,3 +113,19 @@ def __verify_dp_batch_transfer_support(self, model: 'pl.LightningModule') -> Non
         for hook in batch_transfer_hooks:
             if self.trainer.accelerator_connector.use_dp and is_overridden(hook, model):
                 raise MisconfigurationException(f'Overriding `{hook}` is not supported in DP mode.')
+
+    def __verify_manual_optimization_support(self, model: 'pl.LightningModule') -> None:
+        if model.automatic_optimization:
+            return
+        if self.trainer.gradient_clip_val > 0:
+            raise MisconfigurationException(
+                f"Automatic gradient clipping is not supported for manual optimization."
+                f" Remove `Trainer(gradient_clip_val={self.trainer.gradient_clip_val})`"
+                f" or switch to automatic optimization."
+            )
+        if self.trainer.accumulate_grad_batches != 1:
+            raise MisconfigurationException(
+                f"Automatic gradient accumulation is not supported for manual optimization."
+                f" Remove `Trainer(accumulate_grad_batches={self.trainer.accumulate_grad_batches})`"
+                f" or switch to automatic optimization."
+            )
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 4d692ec517d19..f283c38d4dd7b 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -15,12 +15,14 @@
 import logging
 import os
 from typing import List, Optional, Sequence, Union
+from weakref import proxy
 
 import torch
 
 from pytorch_lightning.accelerators.accelerator import Accelerator
 from pytorch_lightning.accelerators.cpu import CPUAccelerator
 from pytorch_lightning.accelerators.gpu import GPUAccelerator
+from pytorch_lightning.accelerators.ipu import IPUAccelerator
 from pytorch_lightning.accelerators.tpu import TPUAccelerator
 from pytorch_lightning.plugins import (
     ApexMixedPrecisionPlugin,
@@ -36,6 +38,8 @@
     DoublePrecisionPlugin,
     FullyShardedNativeMixedPrecisionPlugin,
     HorovodPlugin,
+    IPUPlugin,
+    IPUPrecisionPlugin,
     NativeMixedPrecisionPlugin,
     PrecisionPlugin,
     ShardedNativeMixedPrecisionPlugin,
@@ -58,13 +62,14 @@
     _APEX_AVAILABLE,
     _HOROVOD_AVAILABLE,
     _NATIVE_AMP_AVAILABLE,
-    _TPU_AVAILABLE,
     AMPType,
     device_parser,
     DeviceType,
     DistributedType,
+    rank_zero_deprecation,
+    rank_zero_info,
+    rank_zero_warn,
 )
-from pytorch_lightning.utilities.distributed import rank_zero_deprecation, rank_zero_info, rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _HOROVOD_AVAILABLE:
@@ -79,6 +84,7 @@ def __init__(
         self,
         num_processes,
         tpu_cores,
+        ipus,
         distributed_backend,
         auto_select_gpus,
         gpus,
@@ -98,6 +104,7 @@ def __init__(
 
         self.num_processes = num_processes
         self.tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
+        self.ipus = ipus
         self.distributed_backend = distributed_backend
         self.auto_select_gpus = auto_select_gpus
         self.gpus = gpus
@@ -238,6 +245,8 @@ def training_type_plugin(self) -> TrainingTypePlugin:
 
     @property
     def cluster_environment(self) -> ClusterEnvironment:
+        if self._cluster_environment is None:
+            self._cluster_environment = self.select_cluster_environment()
         return self._cluster_environment
 
     @property
@@ -248,6 +257,10 @@ def on_cpu(self) -> bool:
     def on_tpu(self) -> bool:
         return self.tpu_cores is not None
 
+    @property
+    def on_ipu(self) -> bool:
+        return self.ipus is not None
+
     @property
     def tpu_id(self) -> Optional[int]:
         if self.on_tpu and isinstance(self.tpu_cores, list):
@@ -323,13 +336,18 @@ def parallel_devices(self) -> List[Union[torch.device, int]]:
             # https://github.com/PyTorchLightning/pytorch-lightning/issues/3169
             if isinstance(self.tpu_cores, int):
                 devices = list(range(self.tpu_cores))
+        elif self.on_ipu:
+            if isinstance(self.ipus, int):
+                devices = list(range(self.ipus))
         else:
             devices = [torch.device("cpu")] * self.num_processes
         return devices
 
     @property
     def root_gpu(self) -> Optional[int]:
-        return self.accelerator.root_device.index if not isinstance(self.accelerator, TPUAccelerator) else None
+        return self.accelerator.root_device.index if not isinstance(
+            self.accelerator, (IPUAccelerator, TPUAccelerator)
+        ) else None
 
     @property
     def is_training_type_in_plugins(self) -> bool:
@@ -353,14 +371,17 @@ def select_precision_plugin(self) -> PrecisionPlugin:
         # set precision type
         self.amp_type = AMPType.from_str(self.amp_type)
 
+        if self.on_ipu:
+            return IPUPrecisionPlugin(self.precision)
+
         if self._distrib_type == DistributedType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin):
             return DeepSpeedPrecisionPlugin(self.precision)
 
         if self.precision == 32:
             return PrecisionPlugin()
-        elif self.precision == 64:
+        if self.precision == 64:
             return DoublePrecisionPlugin()
-        elif self.precision == 16:
+        if self.precision == 16:
             if self.on_tpu:
                 return TPUHalfPrecisionPlugin()
 
@@ -403,7 +424,11 @@ def select_precision_plugin(self) -> PrecisionPlugin:
         raise NotImplementedError("We only support precisions 64, 32 and 16!")
 
     def select_training_type_plugin(self) -> TrainingTypePlugin:
-        if self.use_ddp2:
+        if isinstance(
+            self.distributed_backend, Accelerator
+        ) and self.distributed_backend.training_type_plugin is not None:
+            plugin = self.distributed_backend.training_type_plugin
+        elif self.use_ddp2:
             plugin = DDP2Plugin(
                 parallel_devices=self.parallel_devices,
                 cluster_environment=self.cluster_environment,
@@ -459,6 +484,8 @@ def select_training_type_plugin(self) -> TrainingTypePlugin:
             plugin = HorovodPlugin(parallel_devices=self.parallel_devices)
         elif self.on_tpu and isinstance(self.tpu_cores, list):
             plugin = SingleTPUPlugin(self.tpu_id)
+        elif self.on_ipu:
+            plugin = IPUPlugin(parallel_devices=self.parallel_devices)
         else:
             single_gpu_ordinal = device_parser.determine_root_gpu_device(self.parallel_device_ids)
             plugin = SingleDevicePlugin(device=torch.device(f"cuda:{single_gpu_ordinal}" if self.on_gpu else "cpu"))
@@ -472,7 +499,9 @@ def resolve_training_type_plugin(self, training_type: TrainingTypePlugin) -> Tra
                 training_type.num_processes = len(self.parallel_devices)
 
         if hasattr(training_type, 'cluster_environment') and getattr(training_type, 'cluster_environment') is None:
-            training_type.cluster_environment = self.select_cluster_environment()
+            # transfer ownership of the cluster environment to the training type
+            training_type.cluster_environment = self.cluster_environment
+            self._cluster_environment = proxy(self.cluster_environment)
 
         if hasattr(training_type, 'num_nodes'):
             # set num_nodes for training_type from trainer setting
@@ -499,14 +528,21 @@ def select_accelerator(self) -> Accelerator:
             acc_cls = GPUAccelerator
         elif self.on_tpu:
             acc_cls = TPUAccelerator
+        elif self.on_ipu:
+            acc_cls = IPUAccelerator
         else:
             acc_cls = CPUAccelerator
         # as precision_plugin is dependent on training_type_plugin, make sure
         # that we first select training_type_plugin, then precision_plugin
-        return acc_cls(
+        accelerator = acc_cls(
             training_type_plugin=self.training_type_plugin,
             precision_plugin=self.precision_plugin,
         )
+        # transfer ownership of the plugins to the accelerator
+        self._training_type_plugin = proxy(self.training_type_plugin)
+        self._precision_plugin = proxy(self.precision_plugin)
+
+        return accelerator
 
     def select_cluster_environment(self) -> ClusterEnvironment:
         if self._cluster_environment is not None:
@@ -562,6 +598,8 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
             self._device_type = DeviceType.TPU
             if isinstance(self.tpu_cores, int):
                 self._distrib_type = DistributedType.TPU_SPAWN
+        elif self.distributed_backend == 'ipu':
+            self._device_type = DeviceType.IPU
         elif self.distributed_backend and self._distrib_type is None:
             self._distrib_type = DistributedType(self.distributed_backend)
 
@@ -608,16 +646,6 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
                 'Please set accelerator=ddp or accelerator=ddp2.'
             )
 
-        rank_zero_info(f'GPU available: {torch.cuda.is_available()}, used: {self._device_type == DeviceType.GPU}')
-        num_cores = self.tpu_cores if self.tpu_cores is not None else 0
-        rank_zero_info(f'TPU available: {_TPU_AVAILABLE}, using: {num_cores} TPU cores')
-
-        if torch.cuda.is_available() and self._device_type != DeviceType.GPU:
-            rank_zero_warn(
-                "GPU available but not used. Set the gpus flag in your trainer"
-                " `Trainer(gpus=1)` or script `--gpus=1`."
-            )
-
     def _set_horovod_backend(self):
         self.check_horovod()
         self._distrib_type = DistributedType.HOROVOD
diff --git a/pytorch_lightning/trainer/connectors/callback_connector.py b/pytorch_lightning/trainer/connectors/callback_connector.py
index 98d0c292f92d0..2b14a229ce4f6 100644
--- a/pytorch_lightning/trainer/connectors/callback_connector.py
+++ b/pytorch_lightning/trainer/connectors/callback_connector.py
@@ -13,12 +13,11 @@
 # limitations under the License.
 import os
 from datetime import timedelta
-from pathlib import Path
 from typing import Dict, List, Optional, Union
 
+import pytorch_lightning as pl
 from pytorch_lightning.callbacks import Callback, ModelCheckpoint, ProgressBar, ProgressBarBase
 from pytorch_lightning.callbacks.timer import Timer
-from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.utilities import rank_zero_info
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
@@ -36,12 +35,9 @@ def on_trainer_init(
         process_position: int,
         default_root_dir: Optional[str],
         weights_save_path: Optional[str],
-        resume_from_checkpoint: Optional[Union[Path, str]],
         stochastic_weight_avg: bool,
         max_time: Optional[Union[str, timedelta, Dict[str, int]]] = None,
     ):
-        self.trainer.resume_from_checkpoint = resume_from_checkpoint
-
         # init folder paths for checkpoint + weights save callbacks
         self.trainer._default_root_dir = default_root_dir or os.getcwd()
         self.trainer._weights_save_path = weights_save_path or self.trainer._default_root_dir
@@ -141,7 +137,7 @@ def attach_model_logging_functions(self, model):
             callback.log_dict = model.log_dict
 
     @staticmethod
-    def _attach_model_callbacks(model: LightningModule, trainer) -> None:
+    def _attach_model_callbacks(model: 'pl.LightningModule', trainer) -> None:
         """
         Attaches the callbacks defined in the model.
         If a callback returned by the model's configure_callback method has the same type as one or several
@@ -157,8 +153,8 @@ def _attach_model_callbacks(model: LightningModule, trainer) -> None:
         model_callbacks = model.configure_callbacks()
         if not model_callbacks:
             return
-        model_callback_types = set(type(c) for c in model_callbacks)
-        trainer_callback_types = set(type(c) for c in trainer.callbacks)
+        model_callback_types = {type(c) for c in model_callbacks}
+        trainer_callback_types = {type(c) for c in trainer.callbacks}
         override_types = model_callback_types.intersection(trainer_callback_types)
         if override_types:
             rank_zero_info(
diff --git a/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
index 1181c4f3efd1e..ab74c3bccfc8d 100644
--- a/pytorch_lightning/trainer/connectors/checkpoint_connector.py
+++ b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
@@ -19,147 +19,183 @@
 
 import torch
 
-import pytorch_lightning
-from pytorch_lightning.core.lightning import LightningModule
-from pytorch_lightning.utilities import (
-    _APEX_AVAILABLE,
-    _OMEGACONF_AVAILABLE,
-    AMPType,
-    DeviceType,
-    rank_zero_info,
-    rank_zero_warn,
-)
+import pytorch_lightning as pl
+from pytorch_lightning.utilities import _OMEGACONF_AVAILABLE, rank_zero_deprecation, rank_zero_info, rank_zero_warn
 from pytorch_lightning.utilities.cloud_io import atomic_save, get_filesystem
-from pytorch_lightning.utilities.cloud_io import load as pl_load
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.upgrade_checkpoint import KEYS_MAPPING as DEPRECATED_CHECKPOINT_KEYS
 
-if _APEX_AVAILABLE:
-    from apex import amp
-
 if _OMEGACONF_AVAILABLE:
     from omegaconf import Container
 
 
 class CheckpointConnector:
 
-    def __init__(self, trainer):
+    def __init__(self, trainer, resume_from_checkpoint: Optional[Union[str, Path]] = None):
         self.trainer = trainer
+        self.resume_checkpoint_path = resume_from_checkpoint
+        self._loaded_checkpoint = dict()
 
-        # used to validate checkpointing logic
-        self.has_trained = False
+    @property
+    def hpc_resume_path(self) -> Optional[str]:
+        dir_path_hpc = str(self.trainer.weights_save_path)
+        max_version = self.max_ckpt_version_in_folder(dir_path_hpc, "hpc_ckpt_")
+        if max_version is not None:
+            return os.path.join(dir_path_hpc, f"hpc_ckpt_{max_version}.ckpt")
 
-    def restore_weights(self) -> None:
+    def resume_start(self) -> None:
         """
-        Attempt to restore a checkpoint (e.g. weights) in this priority:
-        1. from HPC weights
-        2. from `resume_from_checkpoint` file
+        Attempts to pre-load the checkpoint file to memory, with the source path determined in this priority:
+
+        1. from HPC weights if found
+        2. from `resume_from_checkpoint` file if provided
         3. don't restore
+
+        Raises:
+            FileNotFoundError: If the path to the checkpoint file is provided but the file does not exist.
         """
+        self.resume_checkpoint_path = self.hpc_resume_path or self.resume_checkpoint_path
+        checkpoint_path = self.resume_checkpoint_path
+        if not checkpoint_path:
+            return
+
         # clear cache before restore
-        if self.trainer._device_type == DeviceType.GPU:
-            torch.cuda.empty_cache()
+        torch.cuda.empty_cache()
 
-        # 1. Attempt to restore states from HPC checkpoint
-        dir_path_hpc = str(self.trainer.weights_save_path)
-        max_suffix = self.max_ckpt_in_folder(dir_path_hpc, "hpc_ckpt_")
-        if max_suffix is not None:
-            checkpoint_path = f'{dir_path_hpc}/hpc_ckpt_{max_suffix}.ckpt'
-            self.hpc_load(checkpoint_path, self.trainer._device_type == DeviceType.GPU)
-            rank_zero_info(f'restored hpc model from: {checkpoint_path}')
+        # Try to read the checkpoint file at `checkpoint_path`. If not exist, do not restore checkpoint.
+        fs = get_filesystem(checkpoint_path)
+        if not fs.exists(checkpoint_path):
+            raise FileNotFoundError(f"Checkpoint at {checkpoint_path} not found. Aborting training.")
 
-        # 2. Attempt to restore states from `resume_from_checkpoint` file
-        elif self.trainer.resume_from_checkpoint is not None:
-            self.restore(self.trainer.resume_from_checkpoint, on_gpu=self.trainer._device_type == DeviceType.GPU)
+        rank_zero_info(f"Restoring states from the checkpoint file at {checkpoint_path}")
+        self._loaded_checkpoint = self.trainer.training_type_plugin.load_checkpoint_file(checkpoint_path)
 
-        # wait for all to catch up
-        self.trainer.training_type_plugin.barrier('TrainerIOMixin.restore_weights')
+    def resume_end(self) -> None:
+        """ Signal the connector that all states have resumed and memory for the checkpoint object can be released. """
+        if self.resume_checkpoint_path:
+            rank_zero_info(f"Restored all states from the checkpoint file at {self.resume_checkpoint_path}")
+        self.resume_checkpoint_path = None
+        self._loaded_checkpoint = dict()
 
         # clear cache after restore
-        if self.trainer._device_type == DeviceType.GPU:
-            torch.cuda.empty_cache()
+        torch.cuda.empty_cache()
+
+        # wait for all to catch up
+        self.trainer.training_type_plugin.barrier("CheckpointConnector.resume_end")
 
-    def restore(self, checkpoint_path: str, on_gpu: bool) -> bool:
+    def restore(self, checkpoint_path: Optional[Union[Path, str]] = None) -> None:
         """
-        Load model/training states from a 'PyTorch-Lightning checkpoint' file through file-read and state-restore.
+        Attempt to restore everything at once from a 'PyTorch-Lightning checkpoint' file
+        through file-read and state-restore, in this priority:
+
+        1. from HPC weights if found
+        2. from `resume_from_checkpoint` file if provided
+        3. don't restore
+
         All restored states are listed in return value description of `dump_checkpoint`.
-        """
-        # Try to read the checkpoint file at `checkpoint_path`. If not exist, do not restore checkpoint.
-        fs = get_filesystem(checkpoint_path)
-        if not fs.exists(checkpoint_path):
-            raise FileNotFoundError(f"Checkpoint at {checkpoint_path} not found. Aborting training.")
 
-        checkpoint, load_optimizer_states = self.trainer.training_type_plugin.restore_model_state_from_ckpt_path(
-            checkpoint_path, map_location=lambda storage, loc: storage
-        )
+        Args:
+            checkpoint_path: Path to a PyTorch Lightning checkpoint file.
+        """
+        self.resume_checkpoint_path = checkpoint_path
+        self.resume_start()
 
-        model = self.trainer.lightning_module
+        # restore module states
+        self.restore_datamodule()
+        self.restore_model()
 
-        if on_gpu:
-            model.cuda(self.trainer.root_gpu)
+        # restore callback states
+        self.restore_callbacks()
 
         # restore training state
-        self.restore_training_state(checkpoint, load_optimizer_states)
+        self.restore_training_state()
+        self.resume_end()
 
-        rank_zero_info(f"Restored states from the checkpoint file at {checkpoint_path}")
-        return True
+    def restore_datamodule(self) -> None:
+        """ Calls hooks on the datamodule to give it a chance to restore its state from the checkpoint. """
+        if not self._loaded_checkpoint:
+            return
+
+        datamodule = self.trainer.datamodule
+        if datamodule is not None:
+            datamodule.on_load_checkpoint(self._loaded_checkpoint)
 
-    def restore_model_state(self, model: LightningModule, checkpoint) -> None:
+    def restore_model(self) -> None:
         """
-        Restore model states from a 'PyTorch-Lightning checkpoint' dictionary object
+        Restores a model's weights from a PyTorch Lightning checkpoint. Hooks are called first go give
+        the LightningModule a chance to modify the contents, then finally the model gets updated with
+        the loaded weights.
         """
+        if not self._loaded_checkpoint:
+            return
 
-        # restore datamodule states
-        if self.trainer.datamodule is not None:
-            self.trainer.datamodule.on_load_checkpoint(checkpoint)
+        model = self.trainer.lightning_module
 
         # hook: give user access to checkpoint if needed.
-        model.on_load_checkpoint(checkpoint)
+        model.on_load_checkpoint(self._loaded_checkpoint)
+
+        # call hpc specific hook
+        if self.hpc_resume_path is not None:
+            model.on_hpc_load(self._loaded_checkpoint)
 
         # restore model state_dict
-        model.load_state_dict(checkpoint['state_dict'])
+        self.trainer.training_type_plugin.load_model_state_dict(self._loaded_checkpoint)
+
+    def restore_model_weights(self, checkpoint_path: Optional[Union[str, Path]]) -> None:
+        """ Restore only the model weights. """
+        checkpoint = self._loaded_checkpoint
+        if checkpoint_path is not None:
+            checkpoint = self.trainer.training_type_plugin.load_checkpoint_file(checkpoint_path)
+
+        self.trainer.lightning_module.on_load_checkpoint(checkpoint)
+        self.trainer.training_type_plugin.load_model_state_dict(checkpoint)
 
-    def restore_training_state(self, checkpoint, load_optimizer_states: bool = True):
+    def restore_training_state(self) -> None:
         """
-        Restore trainer state.
-        Model will get its change to update
-        :param checkpoint:
-        :return:
+        Restore the trainer state from the pre-loaded checkpoint. This includes the precision settings, loop progress,
+        optimizer states and learning rate scheduler states.
         """
-        # validation
-        if load_optimizer_states and ('optimizer_states' not in checkpoint or 'lr_schedulers' not in checkpoint):
-            raise KeyError(
-                'Trying to restore training state but checkpoint contains only the model.'
-                ' This is probably due to `ModelCheckpoint.save_weights_only` being set to `True`.'
-            )
+        if not self._loaded_checkpoint:
+            return
+
+        # restore precision plugin (scaler etc.)
+        self.trainer.precision_plugin.on_load_checkpoint(self._loaded_checkpoint)
+        # restore progress (loops etc.)
+        self.restore_progress()
 
-        if any([key in checkpoint for key in DEPRECATED_CHECKPOINT_KEYS]):
+        self.restore_optimizers_and_schedulers()
+
+    def restore_callbacks(self) -> None:
+        """ Restores all callbacks from the pre-loaded checkpoint. """
+        if not self._loaded_checkpoint:
+            return
+
+        if any(key in self._loaded_checkpoint for key in DEPRECATED_CHECKPOINT_KEYS):
             raise ValueError(
                 "The checkpoint you're attempting to load follows an"
                 " outdated schema. You can upgrade to the current schema by running"
                 " `python -m pytorch_lightning.utilities.upgrade_checkpoint --file model.ckpt`"
                 " where `model.ckpt` is your checkpoint file."
             )
+        self.trainer.on_load_checkpoint(self._loaded_checkpoint)
 
-        # restore amp scaling
-        if self.trainer.amp_backend == AMPType.NATIVE and 'native_amp_scaling_state' in checkpoint:
-            self.trainer.scaler.load_state_dict(checkpoint['native_amp_scaling_state'])
-        elif self.trainer.amp_backend == AMPType.APEX and 'amp_scaling_state' in checkpoint:
-            amp.load_state_dict(checkpoint['amp_scaling_state'])
-
-        # restore callback states
-        self.trainer.on_load_checkpoint(checkpoint)
+    def restore_progress(self) -> None:
+        """
+        Restores the training progress from the pre-loaded checkpoint. This currently includes only the global step
+        and current epoch.
+        """
+        if not self._loaded_checkpoint:
+            return
 
-        self.trainer.train_loop.global_step = checkpoint['global_step']
-        self.trainer.train_loop.current_epoch = checkpoint['epoch']
+        self.trainer.fit_loop.global_step = self._loaded_checkpoint['global_step']
+        self.trainer.fit_loop.current_epoch = self._loaded_checkpoint['epoch']
 
         # crash if max_epochs is lower then the current epoch from the checkpoint
         if self.trainer.max_epochs is not None and self.trainer.current_epoch > self.trainer.max_epochs:
-            m = f"""
-            you restored a checkpoint with current_epoch={self.trainer.current_epoch}
-            but the Trainer(max_epochs={self.trainer.max_epochs})
-            """
-            raise MisconfigurationException(m)
+            raise MisconfigurationException(
+                f"You restored a checkpoint with current_epoch={self.trainer.current_epoch},"
+                f" but you have set Trainer(max_epochs={self.trainer.max_epochs})."
+            )
 
         # Division deals with global step stepping once per accumulated batch
         # Inequality deals with different global step for odd vs even num_training_batches
@@ -173,14 +209,28 @@ def restore_training_state(self, checkpoint, load_optimizer_states: bool = True)
                 " consider using an end of epoch checkpoint."
             )
 
-        if not load_optimizer_states:
+    def restore_optimizers_and_schedulers(self) -> None:
+        """ Restores the optimizers and learning rate scheduler states from the pre-loaded checkpoint. """
+        if not self._loaded_checkpoint:
             return
 
-        # restore the optimizers
-        optimizer_states = checkpoint['optimizer_states']
-        for optimizer, opt_state in zip(self.trainer.optimizers, optimizer_states):
-            optimizer.load_state_dict(opt_state)
+        # validation
+        if "optimizer_states" not in self._loaded_checkpoint or "lr_schedulers" not in self._loaded_checkpoint:
+            raise KeyError(
+                "Trying to restore training state but checkpoint contains only the model."
+                " This is probably due to `ModelCheckpoint.save_weights_only` being set to `True`."
+            )
+        self.restore_optimizers()
+        self.restore_lr_schedulers()
 
+    def restore_optimizers(self) -> None:
+        """ Restores the optimizer states from the pre-loaded checkpoint. """
+        if not self._loaded_checkpoint:
+            return
+
+        # restore the optimizers
+        self.trainer.training_type_plugin.load_optimizer_state_dict(self._loaded_checkpoint)
+        for optimizer in self.trainer.optimizers:
             # move optimizer to GPU 1 weight at a time
             # avoids OOM
             if self.trainer.root_gpu is not None:
@@ -189,14 +239,20 @@ def restore_training_state(self, checkpoint, load_optimizer_states: bool = True)
                         if isinstance(v, torch.Tensor):
                             state[k] = v.cuda(self.trainer.root_gpu)
 
+    def restore_lr_schedulers(self) -> None:
+        """ Restores the learning rate scheduler states from the pre-loaded checkpoint. """
+        if not self._loaded_checkpoint:
+            return
+
         # restore the lr schedulers
-        lr_schedulers = checkpoint['lr_schedulers']
+        lr_schedulers = self._loaded_checkpoint['lr_schedulers']
         for scheduler, lrs_state in zip(self.trainer.lr_schedulers, lr_schedulers):
             scheduler['scheduler'].load_state_dict(lrs_state)
 
     # ----------------------------------
     # PRIVATE OPS
     # ----------------------------------
+
     def hpc_save(self, folderpath: str, logger):
         # make sure the checkpoint folder exists
         folderpath = str(folderpath)  # because the tests pass a path object
@@ -206,7 +262,7 @@ def hpc_save(self, folderpath: str, logger):
         # save logger to make sure we get all the metrics
         logger.save()
 
-        max_suffix = self.max_ckpt_in_folder(folderpath)
+        max_suffix = self.max_ckpt_version_in_folder(folderpath)
         ckpt_number = (max_suffix if max_suffix is not None else 0) + 1
 
         fs.makedirs(folderpath, exist_ok=True)
@@ -225,8 +281,8 @@ def hpc_save(self, folderpath: str, logger):
         try:
             atomic_save(checkpoint, filepath)
         except AttributeError as err:
-            if LightningModule.CHECKPOINT_HYPER_PARAMS_KEY in checkpoint:
-                del checkpoint[LightningModule.CHECKPOINT_HYPER_PARAMS_KEY]
+            if pl.LightningModule.CHECKPOINT_HYPER_PARAMS_KEY in checkpoint:
+                del checkpoint[pl.LightningModule.CHECKPOINT_HYPER_PARAMS_KEY]
             rank_zero_warn(
                 'warning, `hyper_parameters` dropped from checkpoint.'
                 f' An attribute is not picklable {err}'
@@ -272,7 +328,7 @@ def dump_checkpoint(self, weights_only: bool = False) -> dict:
         checkpoint = {
             'epoch': current_epoch,
             'global_step': global_step,
-            'pytorch-lightning_version': pytorch_lightning.__version__,
+            'pytorch-lightning_version': pl.__version__,
             'state_dict': self.trainer.accelerator.lightning_module_state_dict(),
         }
 
@@ -294,25 +350,18 @@ def dump_checkpoint(self, weights_only: bool = False) -> dict:
                 lr_schedulers.append(scheduler['scheduler'].state_dict())
             checkpoint['lr_schedulers'] = lr_schedulers
 
-            # dump amp scaling
-            if (
-                self.trainer.amp_backend == AMPType.NATIVE and self.trainer._device_type != DeviceType.TPU
-                and self.trainer.scaler is not None
-            ):
-                checkpoint['native_amp_scaling_state'] = self.trainer.scaler.state_dict()
-            elif self.trainer.amp_backend == AMPType.APEX:
-                checkpoint['amp_scaling_state'] = amp.state_dict()
+            self.trainer.precision_plugin.on_save_checkpoint(checkpoint)
 
         # dump hyper-parameters
         if model.hparams:
             if hasattr(model, '_hparams_name'):
-                checkpoint[LightningModule.CHECKPOINT_HYPER_PARAMS_NAME] = model._hparams_name
+                checkpoint[pl.LightningModule.CHECKPOINT_HYPER_PARAMS_NAME] = model._hparams_name
             # dump arguments
             if _OMEGACONF_AVAILABLE and isinstance(model.hparams, Container):
-                checkpoint[LightningModule.CHECKPOINT_HYPER_PARAMS_KEY] = model.hparams
-                checkpoint[LightningModule.CHECKPOINT_HYPER_PARAMS_TYPE] = type(model.hparams)
+                checkpoint[pl.LightningModule.CHECKPOINT_HYPER_PARAMS_KEY] = model.hparams
+                checkpoint[pl.LightningModule.CHECKPOINT_HYPER_PARAMS_TYPE] = type(model.hparams)
             else:
-                checkpoint[LightningModule.CHECKPOINT_HYPER_PARAMS_KEY] = dict(model.hparams)
+                checkpoint[pl.LightningModule.CHECKPOINT_HYPER_PARAMS_KEY] = dict(model.hparams)
 
         # give the model a chance to dump a few things
         model.on_save_checkpoint(checkpoint)
@@ -321,31 +370,20 @@ def dump_checkpoint(self, weights_only: bool = False) -> dict:
 
         return checkpoint
 
-    def hpc_load(self, checkpoint_path: str, on_gpu: bool):
-        """
-        Load model/training states from a 'PyTorch-Lightning checkpoint' file for hpc.
-        All restored states are listed in return value description of `dump_checkpoint`.
+    def hpc_load(self, checkpoint_path: str) -> None:
         """
+        Attempts to restore the full training and model state from a HPC checkpoint file.
 
-        # read a checkpoint dictionary object from the 'PyTorch-Lightning checkpoint' file at `checkpoint_path`
-        checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage)
-
-        # acquire the model
-        model = self.trainer.lightning_module
-
-        # restore model and datamodule state
-        self.restore_model_state(model, checkpoint)
-
-        if self.trainer.root_gpu is not None:
-            model.cuda(self.trainer.root_gpu)
-
-        # restore training state
-        self.restore_training_state(checkpoint)
-
-        # call hpc specific hook
-        model.on_hpc_load(checkpoint)
+        .. deprecated::v1.4
+            Will be removed in v1.6. Use :meth:`restore` instead.
+        """
+        rank_zero_deprecation(
+            "`CheckpointConnector.hpc_load()` was deprecated in v1.4 and will be removed in v1.6."
+            " Use `CheckpointConnector.restore()` instead."
+        )
+        self.restore(checkpoint_path)
 
-    def max_ckpt_in_folder(self, dir_path: Union[str, Path], name_key: str = 'ckpt_') -> Optional[int]:
+    def max_ckpt_version_in_folder(self, dir_path: Union[str, Path], name_key: str = 'ckpt_') -> Optional[int]:
         """List up files in `dir_path` with `name_key`, then yield maximum suffix number.
         Args:
             dir_path: path of directory which may contain files whose name include `name_key`
@@ -377,7 +415,7 @@ def max_ckpt_in_folder(self, dir_path: Union[str, Path], name_key: str = 'ckpt_'
     def get_max_ckpt_path_from_folder(self, folder_path: Union[str, Path]) -> str:
         """Get path of maximum-epoch checkpoint in the folder."""
 
-        max_suffix = self.max_ckpt_in_folder(folder_path)
+        max_suffix = self.max_ckpt_version_in_folder(folder_path)
         ckpt_number = max_suffix if max_suffix is not None else 0
         return f'{folder_path}/hpc_ckpt_{ckpt_number}.ckpt'
 
diff --git a/pytorch_lightning/trainer/connectors/data_connector.py b/pytorch_lightning/trainer/connectors/data_connector.py
index a867bf96a8d77..6785b25a2112c 100644
--- a/pytorch_lightning/trainer/connectors/data_connector.py
+++ b/pytorch_lightning/trainer/connectors/data_connector.py
@@ -12,17 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Optional, Union
-
-from torch.utils.data import DataLoader
+from typing import Optional, Union
 
 import pytorch_lightning as pl
 from pytorch_lightning.trainer.supporters import prefetch_iterator
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.model_helpers import is_overridden
+from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS
 
 
-class DataConnector(object):
+class DataConnector:
 
     def __init__(self, trainer: "pl.Trainer", multiple_trainloader_mode: str = "max_size_cycle"):
         self.trainer = trainer
@@ -65,22 +64,21 @@ def can_prepare_data(self):
 
         if self.trainer.prepare_data_per_node:
             return self.trainer.local_rank == 0 and should_call_dm_prepare_data
-        else:
-            return self.trainer.node_rank == 0 and self.trainer.local_rank == 0 and should_call_dm_prepare_data
+        return self.trainer.node_rank == 0 and self.trainer.local_rank == 0 and should_call_dm_prepare_data
 
     def attach_data(
         self,
         model: 'pl.LightningModule',
-        train_dataloader: Optional[Union[DataLoader, List[DataLoader]]] = None,
-        val_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
-        test_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
-        predict_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
+        train_dataloaders: Optional[TRAIN_DATALOADERS] = None,
+        val_dataloaders: Optional[EVAL_DATALOADERS] = None,
+        test_dataloaders: Optional[EVAL_DATALOADERS] = None,
+        predict_dataloaders: Optional[EVAL_DATALOADERS] = None,
         datamodule: Optional['pl.LightningDataModule'] = None
     ) -> None:
         # set up the passed in dataloaders (if needed)
         self.attach_dataloaders(
             model,
-            train_dataloader=train_dataloader,
+            train_dataloaders=train_dataloaders,
             val_dataloaders=val_dataloaders,
             test_dataloaders=test_dataloaders,
             predict_dataloaders=predict_dataloaders,
@@ -92,15 +90,15 @@ def attach_data(
     def attach_dataloaders(
         self,
         model: 'pl.LightningModule',
-        train_dataloader: Optional[Union[DataLoader, List[DataLoader]]] = None,
-        val_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
-        test_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
-        predict_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
+        train_dataloaders: Optional[TRAIN_DATALOADERS] = None,
+        val_dataloaders: Optional[EVAL_DATALOADERS] = None,
+        test_dataloaders: Optional[EVAL_DATALOADERS] = None,
+        predict_dataloaders: Optional[EVAL_DATALOADERS] = None,
     ) -> None:
         # when dataloader is passed via fit, patch the train_dataloader
         # functions to overwrite with these implementations
-        if train_dataloader is not None:
-            model.train_dataloader = _PatchDataLoader(train_dataloader)
+        if train_dataloaders is not None:
+            model.train_dataloader = _PatchDataLoader(train_dataloaders)
 
         if val_dataloaders is not None:
             model.val_dataloader = _PatchDataLoader(val_dataloaders)
@@ -114,43 +112,40 @@ def attach_dataloaders(
     def attach_datamodule(
         self, model: 'pl.LightningModule', datamodule: Optional['pl.LightningDataModule'] = None
     ) -> None:
-        # We use datamodule if it's been provided, otherwise we check model for it
-        datamodule = datamodule or getattr(model, 'datamodule', None)
-
         # If we have a datamodule, attach necessary hooks + dataloaders
-        if datamodule:
+        if datamodule is None:
+            return
 
-            # Override loader hooks
-            dl_methods = ('train_dataloader', 'val_dataloader', 'test_dataloader', 'predict_dataloader')
-            for method in dl_methods:
-                if is_overridden(method, datamodule):
-                    setattr(model, method, getattr(datamodule, method))
+        # Override loader hooks
+        dl_methods = ('train_dataloader', 'val_dataloader', 'test_dataloader', 'predict_dataloader')
+        for method in dl_methods:
+            if is_overridden(method, datamodule):
+                setattr(model, method, getattr(datamodule, method))
 
-            # Override data transfer hooks if dataset-specific to_device logic has been defined in datamodule
-            batch_transfer_hooks = ('on_before_batch_transfer', 'transfer_batch_to_device', 'on_after_batch_transfer')
-            for hook in batch_transfer_hooks:
-                if is_overridden(hook, datamodule):
-                    setattr(model, hook, getattr(datamodule, hook))
+        # Override data transfer hooks if dataset-specific to_device logic has been defined in datamodule
+        batch_transfer_hooks = ('on_before_batch_transfer', 'transfer_batch_to_device', 'on_after_batch_transfer')
+        for hook in batch_transfer_hooks:
+            if is_overridden(hook, datamodule):
+                setattr(model, hook, getattr(datamodule, hook))
 
-            self.trainer.datamodule = datamodule
-            datamodule.trainer = self.trainer
+        self.trainer.datamodule = datamodule
+        datamodule.trainer = self.trainer
 
-            # experimental feature for Flash
-            if hasattr(datamodule, "data_pipeline"):
-                model.data_pipeline = datamodule.data_pipeline
+        # experimental feature for Flash
+        if hasattr(datamodule, "data_pipeline"):
+            model.data_pipeline = datamodule.data_pipeline
 
 
-class _PatchDataLoader(object):
+class _PatchDataLoader:
     r"""
     Callable object for patching dataloaders passed into trainer.fit().
     Use this class to override model.*_dataloader() and be pickle-compatible.
 
     Args:
         dataloader: Dataloader object to return when called.
-
     """
 
-    def __init__(self, dataloader: Union[List[DataLoader], DataLoader]):
+    def __init__(self, dataloader: Union[TRAIN_DATALOADERS, EVAL_DATALOADERS]) -> None:
         self.dataloader = dataloader
 
         # cannot pickle __code__ so cannot verify if PatchDataloader
@@ -158,5 +153,5 @@ def __init__(self, dataloader: Union[List[DataLoader], DataLoader]):
         # so, we hack it by using the string representation
         self.patch_loader_code = str(self.__call__.__code__)
 
-    def __call__(self) -> Union[List[DataLoader], DataLoader]:
+    def __call__(self) -> Union[TRAIN_DATALOADERS, EVAL_DATALOADERS]:
         return self.dataloader
diff --git a/pytorch_lightning/trainer/connectors/debugging_connector.py b/pytorch_lightning/trainer/connectors/debugging_connector.py
index 0108a1045698f..9691f416a0c23 100644
--- a/pytorch_lightning/trainer/connectors/debugging_connector.py
+++ b/pytorch_lightning/trainer/connectors/debugging_connector.py
@@ -58,9 +58,9 @@ def on_init_start(
             limit_val_batches = fast_dev_run
             limit_test_batches = fast_dev_run
             limit_predict_batches = fast_dev_run
-            self.trainer.train_loop.max_steps = fast_dev_run
+            self.trainer.fit_loop.max_steps = fast_dev_run
             self.trainer.num_sanity_val_steps = 0
-            self.trainer.train_loop.max_epochs = 1
+            self.trainer.fit_loop.max_epochs = 1
             val_check_interval = 1.0
             self.trainer.check_val_every_n_epoch = 1
             self.trainer.logger = DummyLogger()
@@ -89,9 +89,8 @@ def determine_data_use_amount(self, overfit_batches: float) -> None:
 def _determine_batch_limits(batches: Union[int, float], name: str) -> Union[int, float]:
     if 0 <= batches <= 1:
         return batches
-    elif batches > 1 and batches % 1.0 == 0:
+    if batches > 1 and batches % 1.0 == 0:
         return int(batches)
-    else:
-        raise MisconfigurationException(
-            f'You have passed invalid value {batches} for {name}, it has to be in [0.0, 1.0] or an int.'
-        )
+    raise MisconfigurationException(
+        f'You have passed invalid value {batches} for {name}, it has to be in [0.0, 1.0] or an int.'
+    )
diff --git a/pytorch_lightning/trainer/connectors/env_vars_connector.py b/pytorch_lightning/trainer/connectors/env_vars_connector.py
index 1f1c41c6eb2f0..d3084e3e4ece5 100644
--- a/pytorch_lightning/trainer/connectors/env_vars_connector.py
+++ b/pytorch_lightning/trainer/connectors/env_vars_connector.py
@@ -31,7 +31,7 @@ def insert_env_defaults(self, *args, **kwargs):
             # parse only the argument names
             cls_arg_names = [arg[0] for arg in get_init_arguments_and_types(cls)]
             # convert args to kwargs
-            kwargs.update({k: v for k, v in zip(cls_arg_names, args)})
+            kwargs.update(dict(zip(cls_arg_names, args)))
         env_variables = vars(parse_env_variables(cls))
         # update the kwargs by env variables
         kwargs = dict(list(env_variables.items()) + list(kwargs.items()))
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py
deleted file mode 100644
index 3d6370e3eb658..0000000000000
--- a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py
+++ /dev/null
@@ -1,493 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from collections import defaultdict
-from typing import Any, Dict, List, Optional, Tuple
-from weakref import proxy
-
-import torch
-
-import pytorch_lightning as pl
-from pytorch_lightning.core.step_result import Result
-from pytorch_lightning.trainer.states import TrainerFn
-from pytorch_lightning.utilities import DistributedType, LightningEnum
-
-
-class ResultStoreType(LightningEnum):
-    INSIDE_BATCH_TRAIN_LOOP = "inside_batch_train_loop"
-    OUTSIDE_BATCH_TRAIN_LOOP = "outside_batch_train_loop"
-
-
-class HookResultStore:
-    """
-    This class is defined for internal usage.
-    It holds all metrics logged using the self.log function
-    in the scope of ModelHooks or Callback functions.
-
-    We need to differentiate 3 different scenarios:
-        - (1): We are outside of a batch loop
-            * It means no dataloader_idx, no optimizer idx, etc..
-        - (2): We are inside the training batch loop
-            * We have an optimizer idx and split idx to track
-        - (3): We are inside the evaluation loop
-            * We have a dataloader_idx to track
-
-    The data store `Result` objects for those 3 scenarios in `self._internals`.
-
-    (1): self._internals = {dataloader_idx: [Result(), ..., Result()]}
-        * dataloader_idx not being defined, it is set to 0 b default
-    (2): self._internals = {dataloader_idx: {optimizer_idx: {batch_idx: [Result(), ..., Result()]}}}
-    (3): Same as (1) for simplicity
-
-    Those data structures enables us to reduce properly Result object when batch loop is finished.
-    """
-
-    def __init__(self, fx_name: str) -> None:
-        self._fx_name = fx_name
-        self._internals = {}
-        self._internals_reduced = {}
-        self._internal_type: Optional[ResultStoreType] = None
-        self.has_reduced = False
-        self._latest_ref = {}
-
-    @property
-    def num_dataloaders(self) -> int:
-        return len(self._internals_reduced if self.has_reduced else self._internals)
-
-    def check_dataloader_idx(self, result: Result) -> bool:
-        random_key = list(result.keys())[-1]
-        return result["meta"][random_key]["dataloader_idx"] is not None
-
-    def get_latest_from_func_name(self, latest_result_opt, func_name: str, *args, **kwargs) -> Dict:
-        results = {}
-        for opt_idx in latest_result_opt:
-            latest_result = latest_result_opt[opt_idx]
-            add_dataloader_idx = self.check_dataloader_idx(latest_result)
-            func = getattr(latest_result, func_name)
-            results.update(func(*args, add_dataloader_idx=add_dataloader_idx, **kwargs))
-        return results
-
-    def run_latest_batch_metrics_with_func_name(self, func_name, *args, **kwargs) -> List[Dict]:
-        """
-        This function used cache_ref and cache_result to optimize loading metrics
-
-        Context: As we update the logger_connector metrics on every `self.log` call,
-        and it can be pretty time consuming, especially when logging outside batch loop.
-
-        HookResultStore keeps track of its latest added result object,
-        and cache its pbar and log metrics if already called on,
-        """
-        return [
-            self.get_latest_from_func_name(self._latest_ref[dl_idx], func_name, *args, **kwargs)
-            for dl_idx in range(self.num_dataloaders)
-        ]
-
-    def get_batch_pbar_metrics(self, *args, **kwargs):
-        return self.run_latest_batch_metrics_with_func_name("get_batch_pbar_metrics", *args, **kwargs)
-
-    def get_batch_log_metrics(self, *args, **kwargs):
-        return self.run_latest_batch_metrics_with_func_name("get_batch_log_metrics", *args, **kwargs)
-
-    def run_epoch_func(self, results, opt_metric, func_name, *args, **kwargs) -> None:
-        if not isinstance(opt_metric, Result):
-            raise Exception("The provided opt_metric should be a Result Object. Something is wrong")
-
-        func = getattr(opt_metric, func_name)
-        metrics_to_log = func(*args, add_dataloader_idx=self.num_dataloaders > 1, **kwargs)
-
-        results.append(metrics_to_log)
-
-    def get_epoch_from_func_name(self, func_name, *args, **kwargs) -> List[Dict]:
-        results = []
-        for dl_idx in range(self.num_dataloaders):
-            opt_metrics = self._internals_reduced[dl_idx]
-            if isinstance(opt_metrics, defaultdict):
-                for opt_metric in opt_metrics.values():
-                    self.run_epoch_func(results, opt_metric, func_name, *args, **kwargs)
-            else:
-                self.run_epoch_func(results, opt_metrics, func_name, *args, **kwargs)
-        return results
-
-    def get_epoch_pbar_metrics(self, *_, **__) -> List[Dict]:
-        return self.get_epoch_from_func_name("get_epoch_pbar_metrics")
-
-    def get_epoch_log_metrics(self, *_, **__) -> List[Dict]:
-        return self.get_epoch_from_func_name("get_epoch_log_metrics")
-
-    def get_forked_metrics(self, *_, **__) -> List[Dict]:
-        return self.get_epoch_from_func_name("get_forked_metrics")
-
-    def append(self, result: Result, info: Dict) -> None:
-        dataloader_idx = info["dataloader_idx"]
-        self._internal_type = info["type"]
-        opt_idx = info["opt_idx"]
-
-        if self._internal_type == ResultStoreType.INSIDE_BATCH_TRAIN_LOOP:
-            if dataloader_idx not in self._internals:
-                self._internals_reduced[dataloader_idx] = defaultdict(dict)
-                self._latest_ref[dataloader_idx] = {}
-            self._internals.setdefault(dataloader_idx, {})
-
-            batch_idx = info["batch_idx"]
-            self._internals[dataloader_idx].setdefault(opt_idx, {})
-            self._internals[dataloader_idx][opt_idx].setdefault(batch_idx, [])
-            self._internals[dataloader_idx][opt_idx][batch_idx].append(result)
-        else:
-            self._internals.setdefault(dataloader_idx, [])
-            self._internals[dataloader_idx].append(result)
-            self._latest_ref.setdefault(dataloader_idx, {})
-
-        self._latest_ref[dataloader_idx].setdefault(opt_idx, {})
-        self._latest_ref[dataloader_idx][opt_idx] = result
-
-    def auto_reduce_results_on_epoch_end(self) -> None:
-        """
-        This function is called to reduce `self._internals` Result object.
-        The reduced Result object will be saved into `self._internals_reduced`
-        The `self._internals` stored Result objects will be deleted to save memory.
-        """
-        if self.has_reduced:
-            return
-        for dl_idx in range(self.num_dataloaders):
-            epoch_metrics = self._internals[dl_idx]
-
-            if self._internal_type == ResultStoreType.INSIDE_BATCH_TRAIN_LOOP:
-                for opt_idx in list(epoch_metrics):
-                    # TODO: Figure out to reduce memory
-                    # TODO: How to start training in middle of epoch
-                    outputs = epoch_metrics[opt_idx]
-                    # reduce across time first
-                    time_reduced_outputs = []
-                    for tbptt_outputs in outputs.values():
-                        tbptt_outputs = type(tbptt_outputs[0]).reduce_across_time(tbptt_outputs)
-                        if len(tbptt_outputs) > 1:
-                            time_reduced_outputs.append(tbptt_outputs)
-
-                    if len(time_reduced_outputs) == 0:
-                        continue
-
-                    # reduce across training steps
-                    outputs = type(time_reduced_outputs[0]).reduce_on_epoch_end(time_reduced_outputs)
-
-                    # with manual opt need 1 + metrics because meta is always there
-                    if outputs.minimize is not None:
-                        outputs.minimize = outputs.minimize.mean()
-
-                    self._internals_reduced[dl_idx][opt_idx] = outputs
-
-                    # free memory
-                    del self._internals[dl_idx][opt_idx]
-            else:
-                reduced_epoch_metrics = epoch_metrics[0]
-                if len(epoch_metrics) != 1:
-                    reduced_epoch_metrics = type(reduced_epoch_metrics).reduce_on_epoch_end(epoch_metrics)
-
-                self._internals_reduced[dl_idx] = reduced_epoch_metrics
-
-                # free memory
-                del self._internals[dl_idx]
-
-        self.has_reduced = True
-
-    def reset(self) -> None:
-        """
-        Call at the end of epoch to reset Result objects
-        """
-        for dl_idx in range(self.num_dataloaders):
-            epoch_metrics = self._internals[dl_idx] if not self.has_reduced else self._internals_reduced[dl_idx]
-            if self._internal_type == ResultStoreType.INSIDE_BATCH_TRAIN_LOOP:
-                for opt_idx in list(epoch_metrics):
-                    epoch_metrics[opt_idx].reset()
-            else:
-                epoch_metrics.reset()
-
-    def __getitem__(self, key: str) -> Any:
-        return self._internals.get(key, None)
-
-    def __repr__(self):
-        return self._internals.__repr__()
-
-
-class EpochResultStore:
-    """
-    This class is defined for internal usage.
-    It holds all metrics logged using the self.log function inside `HookResultStore` objects.
-
-    The internal data-structure is as follow:
-    self._internals = {"fx_name_0": HookResultStore(), ..., "fx_name_n": HookResultStore()}
-
-    ..example::
-
-        model._results = Result()
-        model._current_fx_name = 'something'
-        model.log('a', ...)
-        epoch_result_store.cache_result()
-    """
-
-    def __init__(self, trainer: 'pl.Trainer') -> None:
-        self.trainer = proxy(trainer)
-        self._internals = {}
-        self.reset()
-
-    def __getitem__(self, key: str) -> Any:
-        return self._internals.get(key, None)
-
-    @property
-    def info(self):
-        """
-        This function provides necessary parameters to properly configure HookResultStore obj
-        """
-        model_ref = self.trainer.lightning_module
-        return {
-            "batch_idx": self.trainer.train_loop.batch_idx,
-            "fx_name": model_ref._current_fx_name,
-            "dataloader_idx": model_ref._current_dataloader_idx or 0,
-            "opt_idx": self._opt_idx or 0,
-            "split_idx": self._split_idx or 0,
-            "type": (
-                ResultStoreType.INSIDE_BATCH_TRAIN_LOOP if self._opt_idx is not None and self._split_idx is not None
-                else ResultStoreType.OUTSIDE_BATCH_TRAIN_LOOP
-            )
-        }
-
-    def reset_model(self):
-        """
-        This function is used to reset model state at the end of the capture
-        """
-        model_ref = self.trainer.lightning_module
-        model_ref._results = Result()
-        model_ref._current_fx_name = None
-
-    def cache_result(self) -> None:
-        """
-        This function is called after every hook and stores the result object
-        """
-        with self.trainer.profiler.profile("cache_result"):
-            model_ref = self.trainer.lightning_module
-
-            # extract hook results
-            hook_result = model_ref._results
-
-            if len(hook_result) == 1:
-                model_ref._current_fx_name = None
-                return
-
-            info = self.info
-            fx_name = info["fx_name"]
-
-            self._internals.setdefault(fx_name, HookResultStore(fx_name))
-
-            # attach capture batch_size
-            Result.attach_batch_size(self._batch_size, hook_result)
-
-            hook_result = hook_result.detach()
-            if self.trainer.move_metrics_to_cpu:
-                hook_result = hook_result.cpu()
-            elif self.trainer._distrib_type == DistributedType.DP:
-                hook_result = hook_result.to(torch.device("cuda", self.trainer.root_gpu))
-
-            self._internals[fx_name].append(hook_result, info)
-
-            # update logged_metrics, progress_bar_metrics, callback_metrics
-            if "epoch_end" in fx_name:
-                self.update_logger_connector()
-
-            self.reset_model()
-
-    def update_logger_connector(self) -> Tuple[Dict, Dict]:
-        """
-        This function is called every time we capture a hook
-        It automatically updates the logger_connector followings:
-            -  progress_bar_metrics with pbar_metrics
-            -  logged_metrics with log_metrics
-            -  callback_metrics with progress_bar_metrics + logged_metrics
-        """
-
-        logger_connector = self.trainer.logger_connector
-
-        callback_metrics = {}
-        batch_pbar_metrics = {}
-        batch_log_metrics = {}
-
-        if not self._has_batch_loop_finished:
-            # get pbar
-            batch_pbar_metrics = self.get_latest_batch_pbar_metrics()
-            logger_connector.add_progress_bar_metrics(batch_pbar_metrics)
-            batch_log_metrics = self.get_latest_batch_log_metrics()
-
-            if self.trainer.training:
-                logger_connector._logged_metrics.update(batch_log_metrics)
-                callback_metrics.update(batch_pbar_metrics)
-                callback_metrics.update(batch_log_metrics)
-        else:
-            # get pbar
-            epoch_pbar_metrics = self.get_epoch_pbar_metrics()
-            logger_connector.add_progress_bar_metrics(epoch_pbar_metrics)
-
-            # get logged_metrics
-            epoch_log_metrics = self.get_epoch_log_metrics()
-            logger_connector._logged_metrics.update(epoch_log_metrics)
-            logger_connector._logged_metrics.update({"epoch": self.trainer.current_epoch})
-
-            # get forked_metrics
-            forked_metrics = self.get_forked_metrics()
-
-            callback_metrics.update(epoch_pbar_metrics)
-            callback_metrics.update(epoch_log_metrics)
-            callback_metrics.update(forked_metrics)
-
-        # TODO(carmocca): when we implement flushing the logger connector metrics after
-        # the trainer.state changes, this should check trainer.evaluating instead
-        if self.trainer.state.fn in (TrainerFn.TESTING, TrainerFn.VALIDATING):
-            logger_connector.evaluation_callback_metrics.update(callback_metrics)
-
-        # update callback_metrics
-        logger_connector._callback_metrics.update(callback_metrics)
-
-        batch_pbar_metrics.pop("debug_epoch", None)
-        return batch_pbar_metrics, batch_log_metrics
-
-    def run_batch_from_func_name(self, func_name) -> Dict:
-        results = [getattr(hook_result, func_name) for hook_result in self._internals.values()]
-        results = [func(include_forked_originals=False) for func in results]
-        return {k: v for d in sum(results, []) for k, v in d.items()}  # List[List[dict]] -> dict
-
-    def get_latest_batch_log_metrics(self) -> Dict:
-        batch_log_metrics = self.run_batch_from_func_name("get_batch_log_metrics")
-        return batch_log_metrics
-
-    def get_latest_batch_pbar_metrics(self) -> Dict:
-        batch_pbar_metrics = self.run_batch_from_func_name("get_batch_pbar_metrics")
-        return batch_pbar_metrics
-
-    @property
-    def has_reduced(self) -> bool:
-        hook_results = self._internals.values()
-        return len(hook_results) == sum(h.has_reduced for h in hook_results)
-
-    def auto_reduce_results_on_epoch_end(self) -> None:
-        if not self.has_reduced:
-            for hook_result in self._internals.values():
-                hook_result.auto_reduce_results_on_epoch_end()
-
-    @property
-    def has_batch_loop_finished(self) -> bool:
-        return self._has_batch_loop_finished
-
-    @has_batch_loop_finished.setter
-    def has_batch_loop_finished(self, has_batch_loop_finished):
-        if has_batch_loop_finished:
-            # If batch loop has finished, reduce metrics
-            self.auto_reduce_results_on_epoch_end()
-
-            # batch_size should be none as we finished batch loop
-            self._batch_size = None
-
-        self._has_batch_loop_finished = has_batch_loop_finished
-        self.update_logger_connector()
-
-    def run_epoch_by_func_name(self, func_name) -> Dict:
-        if not self.has_reduced:
-            self.auto_reduce_results_on_epoch_end()
-        results = [getattr(hook_result, func_name) for hook_result in self._internals.values()]
-        results = [func() for func in results]
-        return {k: v for d in sum(results, []) for k, v in d.items()}  # List[List[dict]] -> dict
-
-    def get_epoch_pbar_metrics(self) -> Dict:
-        return self.run_epoch_by_func_name("get_epoch_pbar_metrics")
-
-    def get_epoch_log_metrics(self) -> Dict:
-        return self.run_epoch_by_func_name("get_epoch_log_metrics")
-
-    def get_forked_metrics(self) -> Dict:
-        return self.run_epoch_by_func_name("get_forked_metrics")
-
-    def reset(self) -> None:
-        for value in self._internals.values():
-            value.reset()
-        self._internals = {}
-        self._dataloader_idx: Optional[int] = None
-        self._split_idx: Optional[int] = None
-        self._opt_idx: Optional[int] = None
-        self._batch_size: Optional[int] = None
-        self._has_batch_loop_finished = False
-
-    def __call__(
-        self,
-        fx_name: str,
-        dl_idx: Optional[int] = None,
-        opt_idx: Optional[int] = None,
-        batch_idx: Optional[int] = None,
-        split_idx: Optional[int] = None,
-        reduced: bool = False,
-    ):
-        """
-        This function is a helper to access stored data
-
-        It access data from the HookResultStore. Please,
-        check its data structure for better understanding
-
-        Data can be accessed with the following chains:
-
-        IF REDUCED:
-            * IF accessing a fx_name defined in batch training loop:
-                fx_name -> dl_idx -> opt_idx -> batch_idx -> split_idx
-            * ELSE fx_name -> dl_idx -> batch_idx
-        ELSE:
-            * IF accessing a fx_name defined in batch training loop:
-                fx_name -> dl_idx -> opt_idx
-            * ELSE fx_name -> dl_idx
-
-        Note:
-            As soon as a param is None, it breaks the chain and returns associated stored data.
-
-        Example::
-
-            result: Result = self(fx_name="training_step", dl_idx=0, opt_idx=0, reduced=True)
-            result['train_loss_epoch'] # aggregated train_loss over one epoch.
-
-        Args:
-
-            fx_name: Hook name from ModelHooks or Callback. Example: ``"training_step"``
-
-            dl_idx: Dataloader index in short. From ``0`` to ``num_dataloaders - 1``
-
-            opt_idx: Optimizer index in short. From ``0`` to ``num_optimizers - 1``
-
-            batch_idx: Batch index seen during batch training or evaluation.
-                Works only with ``reduced=False``
-
-            split_idx: Index of split idx in training loop when tbptt is used.
-
-            reduced: Data are being aggregated on on_epoch_end.
-                Indicates if we want to access the aggregated Result or not.
-        """
-        hook_result = self[fx_name]
-        internal_type = hook_result._internal_type
-        result = hook_result._internals_reduced if reduced else hook_result._internals
-
-        if dl_idx is not None:
-            result = result[dl_idx]
-            if internal_type == ResultStoreType.INSIDE_BATCH_TRAIN_LOOP:
-                if opt_idx is not None:
-                    result = result[opt_idx]
-                    if not reduced and batch_idx is not None:
-                        result = result[batch_idx]
-                        if split_idx is not None:
-                            result = result[split_idx]
-            elif not reduced and batch_idx is not None:
-                result = result[batch_idx]
-        return result
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(internals={self._internals})"
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py b/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py
index 3db8aace451dd..8d079f8b4a637 100644
--- a/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py
+++ b/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py
@@ -29,26 +29,26 @@ class FxValidator:
         on_fit_end=None,
         on_sanity_check_start=None,
         on_sanity_check_end=None,
-        on_train_start=dict(on_step=(False, True), on_epoch=(False, True)),
+        on_train_start=dict(on_step=(False, ), on_epoch=(True, )),
         on_train_end=None,
-        on_validation_start=dict(on_step=(False, True), on_epoch=(False, True)),
+        on_validation_start=dict(on_step=(False, ), on_epoch=(True, )),
         on_validation_end=None,
-        on_test_start=dict(on_step=(False, True), on_epoch=(False, True)),
+        on_test_start=dict(on_step=(False, ), on_epoch=(True, )),
         on_test_end=None,
         on_predict_start=None,
         on_predict_end=None,
         on_pretrain_routine_start=None,
         on_pretrain_routine_end=None,
-        on_train_epoch_start=dict(on_step=(False, True), on_epoch=(False, True)),
-        on_train_epoch_end=dict(on_step=(False, ), on_epoch=(False, True)),
-        on_validation_epoch_start=dict(on_step=(False, True), on_epoch=(False, True)),
-        on_validation_epoch_end=dict(on_step=(False, ), on_epoch=(False, True)),
-        on_test_epoch_start=dict(on_step=(False, True), on_epoch=(False, True)),
-        on_test_epoch_end=dict(on_step=(False, ), on_epoch=(False, True)),
+        on_train_epoch_start=dict(on_step=(False, True), on_epoch=(True, )),
+        on_train_epoch_end=dict(on_step=(False, ), on_epoch=(True, )),
+        on_validation_epoch_start=dict(on_step=(False, True), on_epoch=(True, )),
+        on_validation_epoch_end=dict(on_step=(False, ), on_epoch=(True, )),
+        on_test_epoch_start=dict(on_step=(False, True), on_epoch=(True, )),
+        on_test_epoch_end=dict(on_step=(False, ), on_epoch=(True, )),
         on_predict_epoch_start=None,
         on_predict_epoch_end=None,
-        on_epoch_start=dict(on_step=(False, True), on_epoch=(False, True)),
-        on_epoch_end=dict(on_step=(False, ), on_epoch=(False, True)),
+        on_epoch_start=dict(on_step=(False, True), on_epoch=(True, )),
+        on_epoch_end=dict(on_step=(False, ), on_epoch=(True, )),
         on_batch_start=dict(on_step=(False, True), on_epoch=(False, True)),
         on_batch_end=dict(on_step=(False, True), on_epoch=(False, True)),
         on_train_batch_start=dict(on_step=(False, True), on_epoch=(False, True)),
@@ -72,19 +72,26 @@ class FxValidator:
         training_step_end=dict(on_step=(False, True), on_epoch=(False, True)),
         validation_step_end=dict(on_step=(False, True), on_epoch=(False, True)),
         test_step_end=dict(on_step=(False, True), on_epoch=(False, True)),
-        training_epoch_end=dict(on_step=(False, ), on_epoch=(False, True)),
-        validation_epoch_end=dict(on_step=(False, ), on_epoch=(False, True)),
-        test_epoch_end=dict(on_step=(False, ), on_epoch=(False, True)),
+        training_epoch_end=dict(on_step=(False, ), on_epoch=(True, )),
+        validation_epoch_end=dict(on_step=(False, ), on_epoch=(True, )),
+        test_epoch_end=dict(on_step=(False, ), on_epoch=(True, )),
+        on_before_batch_transfer=None,
+        transfer_batch_to_device=None,
+        on_after_batch_transfer=None,
+        backward=None,
+        optimizer_step=None,
         # TODO(@carmocca): some {step,epoch}_{start,end} are missing
     )
 
-    def check_logging(self, fx_name: str, on_step: bool, on_epoch: bool) -> None:
-        if fx_name not in self.functions:
+    @classmethod
+    def check_logging(cls, fx_name: str, on_step: bool, on_epoch: bool) -> None:
+        """Check if the given function name is allowed to log"""
+        if fx_name not in cls.functions:
             raise RuntimeError(
                 f'You are trying to `self.log()` inside `{fx_name}` but it is not implemented.'
                 ' Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`'
             )
-        allowed = self.functions[fx_name]
+        allowed = cls.functions[fx_name]
         if allowed is None:
             raise MisconfigurationException(f"{fx_name} function doesn't support logging using `self.log()`")
 
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
index a16f5119abff2..e248b5ff8cf13 100644
--- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
+++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
@@ -12,18 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-from copy import deepcopy
 from pprint import pprint
-from typing import Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, Iterable, Mapping, Optional, Union
 
 import torch
 
+import pytorch_lightning as pl
 from pytorch_lightning.core import memory
-from pytorch_lightning.core.step_result import Result
-from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger
-from pytorch_lightning.trainer.connectors.logger_connector.epoch_result_store import EpochResultStore
-from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import FxValidator
-from pytorch_lightning.trainer.connectors.logger_connector.metrics_holder import MetricsHolder
+from pytorch_lightning.loggers import LightningLoggerBase, LoggerCollection, TensorBoardLogger
+from pytorch_lightning.trainer.connectors.logger_connector.result import _METRIC, MetricSource
 from pytorch_lightning.trainer.states import RunningStage, TrainerFn
 from pytorch_lightning.utilities import DeviceType
 from pytorch_lightning.utilities.metrics import metrics_to_scalars
@@ -32,110 +29,44 @@
 
 class LoggerConnector:
 
-    def __init__(self, trainer, log_gpu_memory: Optional[str] = None):
+    def __init__(self, trainer: 'pl.Trainer', log_gpu_memory: Optional[str] = None) -> None:
         self.trainer = trainer
         self.log_gpu_memory = log_gpu_memory
-        self._callback_metrics = MetricsHolder()
-        self._evaluation_callback_metrics = MetricsHolder(to_float=True)
-        self._logged_metrics = MetricsHolder()
-        self._progress_bar_metrics = MetricsHolder(to_float=True)
         self.eval_loop_results = []
-        self._cached_results = {stage: EpochResultStore(trainer) for stage in RunningStage}
-        self._cached_results[None] = EpochResultStore(trainer)
-        self._fx_validator = FxValidator()
         self._val_log_step: int = 0
         self._test_log_step: int = 0
-
-    @property
-    def callback_metrics(self) -> Dict:
-        return self.get_metrics("callback_metrics")
-
-    @callback_metrics.setter
-    def callback_metrics(self, callback_metrics: Dict) -> None:
-        self.set_metrics("callback_metrics", callback_metrics)
-
-    @property
-    def evaluation_callback_metrics(self) -> Dict:
-        return self.get_metrics("evaluation_callback_metrics")
-
-    @evaluation_callback_metrics.setter
-    def evaluation_callback_metrics(self, evaluation_callback_metrics: Dict) -> None:
-        self.set_metrics("evaluation_callback_metrics", evaluation_callback_metrics)
-
-    @property
-    def logged_metrics(self) -> Dict:
-        return self.get_metrics("logged_metrics")
-
-    @logged_metrics.setter
-    def logged_metrics(self, logged_metrics: Dict) -> None:
-        self.set_metrics("logged_metrics", logged_metrics)
-
-    @property
-    def progress_bar_metrics(self) -> Dict:
-        return self.get_metrics("progress_bar_metrics")
-
-    @progress_bar_metrics.setter
-    def progress_bar_metrics(self, progress_bar_metrics: Dict) -> None:
-        self.set_metrics("progress_bar_metrics", progress_bar_metrics)
-
-    @property
-    def cached_results(self) -> Union[EpochResultStore, None]:
-        return self._cached_results.get(self.trainer.state.stage)
-
-    def get_metrics(self, key: str) -> Dict:
-        metrics_holder: MetricsHolder = getattr(self, f"_{key}")
-        model = self.trainer.lightning_module
-        metrics_holder.convert(model.device if model is not None else None)
-        return metrics_holder.metrics
-
-    def set_metrics(self, key: str, val: Dict) -> None:
-        metrics_holder: MetricsHolder = getattr(self, f"_{key}")
-        metrics_holder.reset(val)
-
-    def reset(self) -> None:
-        self.cached_results.reset()
-
-    def check_logging(self, fx_name: str, on_step: bool, on_epoch: bool) -> None:
-        self._fx_validator.check_logging(fx_name=fx_name, on_step=on_step, on_epoch=on_epoch)
-
-    def on_evaluation_batch_start(self, batch, dataloader_idx, num_dataloaders):
-        model = self.trainer.lightning_module
-        # set dataloader_idx only if multiple ones
-        model._current_dataloader_idx = dataloader_idx if num_dataloaders > 1 else None
-        # track batch_size
-        self.cached_results._batch_size = Result.extract_batch_size(batch)
-
-    def on_train_split_start(self, split_idx: int, opt_idx: int, split_batch) -> None:
-        self.cached_results._split_idx = split_idx
-        self.cached_results._opt_idx = opt_idx
-        self.cached_results._batch_size = Result.extract_batch_size(split_batch)
-
-    def on_train_batch_end(self) -> None:
-        self.cached_results._split_idx = None
-        self.cached_results._opt_idx = None
-        self.cached_results._batch_size = None
-
-    def cache_logged_metrics(self):
-        self._cached_results[self.trainer.state.stage].cache_result()
-
-    def on_trainer_init(self, logger, flush_logs_every_n_steps: int, log_every_n_steps: int, move_metrics_to_cpu: bool):
-        # logging
+        self._progress_bar_metrics: Dict[str, float] = {}
+        self._logged_metrics: Dict[str, _METRIC] = {}
+        self._callback_metrics: Dict[str, _METRIC] = {}
+        self._gpus_metrics: Dict[str, str] = {}
+        self._epoch_end_reached = False
+        self._current_fx: Optional[str] = None
+        self._batch_idx: Optional[int] = None
+        self._split_idx: Optional[int] = None
+
+    def on_trainer_init(
+        self,
+        logger: LightningLoggerBase,
+        flush_logs_every_n_steps: int,
+        log_every_n_steps: int,
+        move_metrics_to_cpu: bool,
+    ) -> None:
         self.configure_logger(logger)
         self.trainer.flush_logs_every_n_steps = flush_logs_every_n_steps
         self.trainer.log_every_n_steps = log_every_n_steps
         self.trainer.move_metrics_to_cpu = move_metrics_to_cpu
 
     @property
-    def should_flush_logs(self):
+    def should_flush_logs(self) -> bool:
         should_flush = (self.trainer.global_step + 1) % self.trainer.flush_logs_every_n_steps == 0
         return should_flush or self.trainer.should_stop
 
     @property
-    def should_update_logs(self):
+    def should_update_logs(self) -> bool:
         should_log_every_n_steps = (self.trainer.global_step + 1) % self.trainer.log_every_n_steps == 0
         return should_log_every_n_steps or self.trainer.should_stop
 
-    def configure_logger(self, logger):
+    def configure_logger(self, logger: Union[bool, Iterable, LightningLoggerBase]) -> None:
         if logger is True:
             version = os.environ.get('PL_EXP_VERSION', self.trainer.slurm_job_id)
 
@@ -151,131 +82,100 @@ def configure_logger(self, logger):
             else:
                 self.trainer.logger = logger
 
-    def cache_training_step_metrics(self, opt_closure_result):
-        """
-        This function is responsible to update
-        logger_connector internals metrics holder based for depreceated logging
-        """
-        using_results_obj = isinstance(opt_closure_result.training_step_output, Result)
-
-        # temporary dict to collect metrics
-        logged_metrics_tmp = {}
-        pbar_metrics_tmp = {}
-        callback_metrics_tmp = {}
-
-        if using_results_obj:
-            batch_log_metrics = opt_closure_result.training_step_output.get_batch_log_metrics(
-                include_forked_originals=False
-            )
-            logged_metrics_tmp.update(batch_log_metrics)
-
-            batch_pbar_metrics = opt_closure_result.training_step_output.get_batch_pbar_metrics(
-                include_forked_originals=False
-            )
-            pbar_metrics_tmp.update(batch_pbar_metrics)
-
-            forked_metrics = opt_closure_result.training_step_output.get_forked_metrics()
-            callback_metrics_tmp.update(forked_metrics)
-            callback_metrics_tmp.update(logged_metrics_tmp)
-
-        else:
-            batch_log_metrics = opt_closure_result.training_step_output.log_metrics
-            logged_metrics_tmp.update(batch_log_metrics)
-
-            batch_pbar_metrics = opt_closure_result.training_step_output.pbar_on_batch_end
-            pbar_metrics_tmp.update(batch_pbar_metrics)
-
-        # track progress bar metrics
-        if len(pbar_metrics_tmp) > 0:
-            self.add_progress_bar_metrics(pbar_metrics_tmp)
-
-        self._callback_metrics.update(callback_metrics_tmp)
-        self._logged_metrics.update(logged_metrics_tmp)
-
-    def log_metrics(self, metrics, grad_norm_dict, step=None):
+    def log_metrics(self, metrics: Dict[str, _METRIC], step: Optional[int] = None) -> None:
         """Logs the metric dict passed in.
         If `step` parameter is None and `step` key is presented is metrics,
         uses metrics["step"] as a step
 
         Args:
-            metrics (dict): Metric values
-            grad_norm_dict (dict): Gradient norms
-            step (int): Step for which metrics should be logged. Default value is `self.global_step` during training or
+            metrics: Metric values
+            step: Step for which metrics should be logged. Default value is `self.global_step` during training or
                 the total validation / test log step count during validation and testing.
         """
-        # add gpu memory
-        if self.trainer._device_type == DeviceType.GPU and self.log_gpu_memory:
-            mem_map = memory.get_memory_profile(self.log_gpu_memory)
-            metrics.update(mem_map)
-
-        # add norms
-        metrics.update(grad_norm_dict)
+        if self.trainer.logger is None or not metrics:
+            return
 
         # turn all tensors to scalars
         scalar_metrics = metrics_to_scalars(metrics)
 
-        if "step" in scalar_metrics and step is None:
-            step = scalar_metrics.pop("step")
-
-        elif step is None:
-            # added metrics by Lightning for convenience
-            scalar_metrics['epoch'] = self.trainer.current_epoch
+        if step is None:
+            step = scalar_metrics.pop("step", None)
+        if step is None:
+            # added metrics for convenience
+            scalar_metrics.setdefault("epoch", self.trainer.current_epoch)
             step = self.trainer.global_step
 
         # log actual metrics
-        if self.trainer.logger is not None:
-            if self.trainer.is_global_zero:
-                self.trainer.logger.agg_and_log_metrics(scalar_metrics, step=step)
-                self.trainer.logger.save()
-
-            # track the logged metrics
-            self.logged_metrics.update(scalar_metrics)
-            self.trainer.dev_debugger.track_logged_metrics_history(scalar_metrics)
-
-    def add_progress_bar_metrics(self, metrics):
-        for k, v in metrics.items():
-            if isinstance(v, torch.Tensor):
-                v = v.item()
-
-            self._progress_bar_metrics.metrics[k] = v
-
-        self.trainer.dev_debugger.track_pbar_metrics_history(metrics)
-
-    def evaluation_epoch_end(self):
-        # reset dataloader idx
-        model_ref = self.trainer.lightning_module
-        model_ref._current_dataloader_idx = None
-
-        # setting `has_batch_loop_finished` to True
-        # will perform Results reduction accross entire epoch.
-        self.cached_results.has_batch_loop_finished = True
-
-    def add_to_eval_loop_results(self, dl_idx, has_been_initialized):
-        callback_metrics = deepcopy(self.evaluation_callback_metrics)
-        for key in list(callback_metrics.keys()):
-            if "dataloader_idx" in key:
-                if f"dataloader_idx_{dl_idx}" not in key:
-                    # remove dl_idx from self.callback_metrics not belonging to this dataset.
-                    del callback_metrics[key]
-        if has_been_initialized:
-            self.eval_loop_results[dl_idx].update(callback_metrics)
-        else:
-            self.eval_loop_results.append(callback_metrics)
+        if self.trainer.is_global_zero:
+            self.trainer.logger.agg_and_log_metrics(scalar_metrics, step=step)
+            self.trainer.logger.save()
+
+        self._logged_metrics.update(scalar_metrics)
+
+    """
+    Evaluation metric updates
+    """
 
-    def prepare_eval_loop_results(self):
-        num_dataloaders = self.trainer.evaluation_loop.num_dataloaders
+    @property
+    def _eval_log_step(self) -> Optional[int]:
+        if self.trainer.state.stage is RunningStage.VALIDATING:
+            return self._val_log_step
+        if self.trainer.state.stage is RunningStage.TESTING:
+            return self._test_log_step
+        return None
+
+    def _increment_eval_log_step(self) -> None:
+        if self.trainer.state.stage is RunningStage.VALIDATING:
+            self._val_log_step += 1
+        elif self.trainer.state.stage is RunningStage.TESTING:
+            self._test_log_step += 1
+
+    def on_evaluation_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int, num_dataloaders: int) -> None:
+        model = self.trainer.lightning_module
+        # set dataloader_idx only if multiple ones
+        model._current_dataloader_idx = dataloader_idx if num_dataloaders > 1 else None
+
+        # track batch_size
+        self.trainer._results.extract_batch_size(batch)
+        self._batch_idx = batch_idx
+
+    def update_eval_step_metrics(self) -> None:
+        if self.trainer.sanity_checking:
+            return
+
+        # logs user requested information to logger
+        assert not self._epoch_end_reached
+        self.log_metrics(self.metrics[MetricSource.LOG], step=self._eval_log_step)
+
+        # increment the step even if nothing was logged
+        self._increment_eval_log_step()
+
+    def _prepare_eval_loop_results(self, metrics: Mapping[str, _METRIC]) -> None:
+        if self.trainer.sanity_checking:
+            return
+
+        num_dataloaders = self.trainer._evaluation_loop.num_dataloaders
         has_been_initialized = len(self.eval_loop_results) == num_dataloaders
-        for dl_idx in range(self.trainer.evaluation_loop.num_dataloaders):
-            self.add_to_eval_loop_results(dl_idx, has_been_initialized)
+        for dl_idx in range(self.trainer._evaluation_loop.num_dataloaders):
+            # remove callback metrics that don't belong to this dataloader
+            callback_metrics = {
+                k: v
+                for k, v in metrics.items() if "dataloader_idx" not in k or f"dataloader_idx_{dl_idx}" in k
+            }
+            if has_been_initialized:
+                self.eval_loop_results[dl_idx].update(callback_metrics)
+            else:
+                self.eval_loop_results.append(callback_metrics)
+
+    def update_eval_epoch_metrics(self) -> _EVALUATE_OUTPUT:
+        assert self._epoch_end_reached
+        metrics = self.metrics
 
-    def get_evaluate_epoch_results(self) -> _EVALUATE_OUTPUT:
         if not self.trainer.sanity_checking:
             # log all the metrics as a single dict
-            metrics_to_log = self.cached_results.get_epoch_log_metrics()
-            if len(metrics_to_log) > 0:
-                self.log_metrics(metrics_to_log, {})
+            self.log_metrics(metrics[MetricSource.LOG])
 
-        self.prepare_eval_loop_results()
+        self._prepare_eval_loop_results(metrics[MetricSource.CALLBACK])
 
         # log results of evaluation
         if (
@@ -292,110 +192,123 @@ def get_evaluate_epoch_results(self) -> _EVALUATE_OUTPUT:
                 print('-' * 80)
 
         results = self.eval_loop_results
-
         # clear mem
         self.eval_loop_results = []
         return results
 
-    def on_train_epoch_end(self):
-        # inform cached logger connector epoch finished
-        self.cached_results.has_batch_loop_finished = True
-
-    def log_train_epoch_end_metrics(self, epoch_output: List[List[List[Result]]]) -> None:
-        # epoch output is a list. Each item in that list has all the outputs per optimizer
-        # epoch_output[optimizer_idx][training_step_idx][tbptt_index]
-        # remember that not using truncated backprop is equivalent with truncated back prop of len(1)
-
-        # log/aggregate metrics automatically
-        epoch_log_metrics, epoch_progress_bar_metrics = self.__auto_reduce_results_on_epoch_end(epoch_output)
-
-        # it will perform reduction over epoch and return log metrics
-        cached_epoch_log_metrics = self.cached_results.get_epoch_log_metrics()
-        cached_epoch_pbar_metrics = self.cached_results.get_epoch_pbar_metrics()
-
-        # update
-        epoch_log_metrics.update(cached_epoch_log_metrics)
-        epoch_progress_bar_metrics.update(cached_epoch_pbar_metrics)
-
-        # --------------------------
-        # track results
-        # --------------------------
-        # add the metrics to the loggers and callbacks
-        if epoch_log_metrics and len(epoch_log_metrics) > 0:
-            self.log_metrics(epoch_log_metrics, {})
-            self._callback_metrics.update(epoch_log_metrics)
-
-        # add metrics to progress_bar and callbacks
-        if len(epoch_progress_bar_metrics) > 0:
-            self.add_progress_bar_metrics(epoch_progress_bar_metrics)
-            self._callback_metrics.update(epoch_progress_bar_metrics)
-
-        # reset epoch loop result for next epoch
-        self.cached_results.reset()
-
-    def __auto_reduce_results_on_epoch_end(self, epoch_output):
-        epoch_log_metrics = {}
-        epoch_progress_bar_metrics = {}
-        for opt_outputs in epoch_output:
-            # reduce across time first
-            time_reduced_outputs = []
-            for tbptt_outs in opt_outputs:
-                tbptt_outs = tbptt_outs[0].__class__.reduce_across_time(tbptt_outs)
-                if len(tbptt_outs) > 1:
-                    time_reduced_outputs.append(tbptt_outs)
-
-            if len(time_reduced_outputs) == 0:
-                continue
-
-            # reduce across training steps
-            opt_outputs = time_reduced_outputs[0].__class__.reduce_on_epoch_end(time_reduced_outputs)
-
-            # with manual opt need 1 + metrics because meta is always there
-            if opt_outputs.minimize is not None:
-                opt_outputs.minimize = opt_outputs.minimize.mean()
-            epoch_log_metrics.update(opt_outputs.epoch_log_metrics)
-            epoch_progress_bar_metrics.update(opt_outputs.epoch_pbar_metrics)
-
-        return epoch_log_metrics, epoch_progress_bar_metrics
-
-    def log_train_step_metrics(self, batch_output):
-        if self.trainer.train_loop.should_accumulate() and self.trainer.lightning_module.automatic_optimization:
+    """
+    Train metric updates
+    """
+
+    def on_train_split_start(self, batch_idx: int, split_idx: int, split_batch: Any) -> None:
+        self.trainer._results.extract_batch_size(split_batch)
+        self._batch_idx = batch_idx
+        self._split_idx = split_idx
+
+    def update_train_step_metrics(self) -> None:
+        if self.trainer.fit_loop.should_accumulate() and self.trainer.lightning_module.automatic_optimization:
             return
-        _, batch_log_metrics = self.cached_results.update_logger_connector()
+
+        self._log_gpus_metrics()
+
         # when metrics should be logged
-        if self.should_update_logs or self.trainer.fast_dev_run is True:
-            # logs user requested information to logger
-            grad_norm_dict = batch_output.grad_norm_dict
-            if grad_norm_dict is None:
-                grad_norm_dict = {}
-            if len(batch_log_metrics) > 0 or len(grad_norm_dict) > 0:
-                self.log_metrics(batch_log_metrics, grad_norm_dict)
-                self._callback_metrics.update(batch_log_metrics)
+        assert not self._epoch_end_reached
+        if self.should_update_logs or self.trainer.fast_dev_run:
+            self.log_metrics(self.metrics[MetricSource.LOG])
+
+    def update_train_epoch_metrics(self) -> None:
+        # add the metrics to the loggers
+        assert self._epoch_end_reached
+        self.log_metrics(self.metrics[MetricSource.LOG])
+
+        # reset result collection for next epoch
+        self.trainer._results.reset(metrics=True)
+
+    def _log_gpus_metrics(self):
+        for key, mem in self.gpus_metrics.items():
+            gpu_id = int(key.split('/')[0].split(':')[1])
+            if gpu_id in self.trainer.accelerator_connector.parallel_device_ids:
+                self.trainer.lightning_module.log(key, mem, prog_bar=False, logger=True, on_step=True, on_epoch=False)
+
+    """
+    Utilities and properties
+    """
+
+    def on_epoch_start(self) -> None:
+        self._epoch_end_reached = False
+
+    def on_batch_start(self) -> None:
+        self._epoch_end_reached = False
+
+    def epoch_end_reached(self):
+        self.trainer.logger_connector._epoch_end_reached = True
+        self.trainer.logger_connector._batch_idx = None
+        self.trainer.logger_connector._split_idx = None
+
+    def on_epoch_end(self) -> None:
+        assert self._epoch_end_reached
+        metrics = self.metrics
+        self._progress_bar_metrics.update(metrics[MetricSource.PBAR])
+        self._callback_metrics.update(metrics[MetricSource.CALLBACK])
+        self._logged_metrics.update(metrics[MetricSource.LOG])
+        self._current_fx = None
+
+    def on_batch_end(self) -> None:
+        assert not self._epoch_end_reached
+        metrics = self.metrics
+        self._progress_bar_metrics.update(metrics[MetricSource.PBAR])
+        self._callback_metrics.update(metrics[MetricSource.CALLBACK])
+        self._logged_metrics.update(metrics[MetricSource.LOG])
+
+    def should_reset_tensors(self, fx: str) -> bool:
+        is_different_fx = self._current_fx != fx
+        if self._split_idx is None:
+            is_first_batch = self._batch_idx in (None, 0)
+        else:
+            is_first_batch = self._batch_idx + self._split_idx == 0
+        return is_different_fx and is_first_batch
+
+    def reset(self, metrics: Optional[bool] = None) -> None:
+        if self.trainer.sanity_checking:
+            # reset metrics
+            self._progress_bar_metrics = {}
+            self._logged_metrics = {}
+            self._callback_metrics = {}
+        self.trainer._results.reset(metrics=metrics)
+        self._batch_idx = None
+        self._split_idx = None
+        self._current_fx = None
 
     @property
-    def evaluation_log_step(self) -> Optional[int]:
-        if self.trainer.state.stage is RunningStage.VALIDATING:
-            return self._val_log_step
-        elif self.trainer.state.stage is RunningStage.TESTING:
-            return self._test_log_step
-        else:
-            return None
+    def metrics(self) -> Dict[MetricSource, Dict[str, _METRIC]]:
+        """This function returns either batch or epoch metrics depending on ``_epoch_end_reached``."""
+        on_step = not self._epoch_end_reached
+        return self.trainer._results.metrics(on_step)
 
-    def increment_evaluation_log_step(self) -> None:
-        if self.trainer.state.stage is RunningStage.VALIDATING:
-            self._val_log_step += 1
-        elif self.trainer.state.stage is RunningStage.TESTING:
-            self._test_log_step += 1
+    @property
+    def gpus_metrics(self) -> Dict[str, str]:
+        if self.trainer._device_type == DeviceType.GPU and self.log_gpu_memory:
+            mem_map = memory.get_memory_profile(self.log_gpu_memory)
+            self._gpus_metrics.update(mem_map)
+        return self._gpus_metrics
 
-    def log_evaluation_step_metrics(self) -> None:
-        if self.trainer.sanity_checking:
-            return
-        _, batch_log_metrics = self.cached_results.update_logger_connector()
+    @property
+    def callback_metrics(self) -> Dict[str, _METRIC]:
+        if self.trainer._results:
+            metrics = self.metrics[MetricSource.CALLBACK]
+            self._callback_metrics.update(metrics)
+        return self._callback_metrics
 
-        # logs user requested information to logger
-        if len(batch_log_metrics) > 0:
-            kwargs = dict() if "step" in batch_log_metrics else dict(step=self.evaluation_log_step)
-            self.log_metrics(batch_log_metrics, {}, **kwargs)
+    @property
+    def logged_metrics(self) -> Dict[str, _METRIC]:
+        if self.trainer._results:
+            metrics = self.metrics[MetricSource.LOG]
+            self._logged_metrics.update(metrics)
+        return self._logged_metrics
 
-        # increment the step even if nothing was logged
-        self.increment_evaluation_log_step()
+    @property
+    def progress_bar_metrics(self) -> Dict[str, float]:
+        if self.trainer._results:
+            metrics = self.metrics[MetricSource.PBAR]
+            self._progress_bar_metrics.update(metrics)
+        return self._progress_bar_metrics
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py b/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py
deleted file mode 100644
index 8f12f57c640b0..0000000000000
--- a/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import numbers
-from typing import Dict, Optional
-
-import torch
-from torchmetrics import Metric
-
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.types import _METRIC
-
-
-class MetricsHolder:
-    """
-    This class acts as a dictionary holder.
-    It holds metrics and implements conversion functions.
-    Those functions will be triggered within LoggerConnector
-    when the property is being requested from the user.
-    """
-
-    def __init__(self, to_float: bool = False) -> None:
-        self.metrics: Dict[str, _METRIC] = {}
-        self._to_float = to_float
-
-    def update(self, metrics: dict) -> None:
-        self.metrics.update(metrics)
-
-    def pop(self, key: str, default: _METRIC) -> _METRIC:
-        return self.metrics.pop(key, default)
-
-    def reset(self, metrics: Dict[str, _METRIC]) -> None:
-        self.metrics = metrics
-
-    def convert(self, device: Optional[torch.device]) -> None:
-        for key, value in self.metrics.items():
-            if self._to_float:
-                if isinstance(value, torch.Tensor) and value.numel() != 1:
-                    raise MisconfigurationException(
-                        f"The metric `{key}` does not contain a single element"
-                        f" thus it cannot be converted to float. Found `{value}`"
-                    )
-                converted = self._convert_to_float(value)
-            else:
-                converted = self._convert_to_tensor(value, device)
-            self.metrics[key] = converted
-
-    @staticmethod
-    def _convert_to_float(current: _METRIC) -> float:
-        if isinstance(current, Metric):
-            current = current.compute().detach()
-
-        if isinstance(current, torch.Tensor):
-            current = float(current.item())
-
-        elif isinstance(current, int):
-            current = float(current)
-
-        return current
-
-    @staticmethod
-    def _convert_to_tensor(current: _METRIC, device: Optional[torch.device]) -> torch.Tensor:
-        if isinstance(current, Metric):
-            current = current.compute().detach()
-
-        elif isinstance(current, numbers.Number):
-            current = torch.tensor(current, device=device, dtype=torch.float)
-
-        if isinstance(current, torch.Tensor) and current.device.type == "xla":
-            current = current.cpu()
-
-        return current
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/result.py b/pytorch_lightning/trainer/connectors/logger_connector/result.py
new file mode 100644
index 0000000000000..d97156fdb4b24
--- /dev/null
+++ b/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -0,0 +1,700 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections.abc import Generator
+from dataclasses import asdict, dataclass, replace
+from functools import partial, wraps
+from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Tuple, Union
+
+import torch
+from torchmetrics import Metric
+
+from pytorch_lightning.utilities import rank_zero_warn
+from pytorch_lightning.utilities.apply_func import apply_to_collection, apply_to_collections
+from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin
+from pytorch_lightning.utilities.distributed import distributed_available
+from pytorch_lightning.utilities.enums import LightningEnum
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.metrics import metrics_to_scalars
+from pytorch_lightning.utilities.warnings import WarningCache
+
+# re-define the ones from pytorch_lightning.utilities.types without the `Number` type
+# TODO(@tchaton): Typing-pickle issue on python<3.7 (https://github.com/cloudpipe/cloudpickle/pull/318)
+_METRIC = Any  # Union[Metric, torch.Tensor]
+_METRIC_COLLECTION = Union[_METRIC, Mapping[str, _METRIC]]
+
+warning_cache = WarningCache()
+
+
+class MetricSource(LightningEnum):
+    CALLBACK = "callback"
+    PBAR = "pbar"
+    LOG = "log"
+
+
+@dataclass
+class _Sync:
+    fn: Optional[Callable] = None
+    should: bool = False
+    rank_zero_only: bool = False
+    op: Optional[str] = None
+    group: Optional[Any] = None
+
+    def __post_init__(self) -> None:
+        if self.fn is None:
+            self.fn = self.no_op
+
+    @property
+    def __call__(self) -> Any:
+        return (
+            partial(self.fn, reduce_op=self.op, group=self.group)
+            if self.should and not self.rank_zero_only else self.no_op
+        )
+
+    @staticmethod
+    def no_op(value: Any, *_, **__) -> Any:
+        return value
+
+
+@dataclass
+class _Metadata:
+    fx: str
+    name: str
+    prog_bar: bool = False
+    logger: bool = True
+    on_step: bool = False
+    on_epoch: bool = True
+    _reduce_fx: Callable = torch.mean
+    enable_graph: bool = False
+    dataloader_idx: Optional[int] = None
+    metric_attribute: Optional[str] = None
+    _sync: Optional[_Sync] = None
+
+    @property
+    def reduce_fx(self) -> Callable:
+        return self._reduce_fx
+
+    @reduce_fx.setter
+    def reduce_fx(self, reduce_fx: Union[str, Callable]) -> None:
+        error = (
+            'Only `self.log(..., reduce_fx={min,max,mean,sum})` are currently supported.'
+            ' Please, open an issue in `https://github.com/PyTorchLightning/pytorch-lightning/issues`.'
+            f' Found: {reduce_fx}'
+        )
+        self._reduce_fx = reduce_fx
+        if isinstance(reduce_fx, str):
+            reduce_fx = reduce_fx.lower()
+            if reduce_fx == 'avg':
+                reduce_fx = 'mean'
+            if reduce_fx not in ('min', 'max', 'mean', 'sum'):
+                raise MisconfigurationException(error)
+            self._reduce_fx = getattr(torch, reduce_fx)
+        elif self.is_custom_reduction:
+            raise MisconfigurationException(error)
+
+    @property
+    def sync(self) -> Optional[_Sync]:
+        return self._sync
+
+    @sync.setter
+    def sync(self, sync: _Sync) -> None:
+        if sync.op is None:
+            sync.op = self.reduce_fx.__name__
+        self._sync = sync
+
+    @property
+    def forked(self) -> bool:
+        return self.on_step and self.on_epoch
+
+    def forked_name(self, on_step: bool) -> str:
+        if self.forked:
+            return f'{self.name}_{"step" if on_step else "epoch"}'
+        return self.name
+
+    @property
+    def is_mean_reduction(self) -> bool:
+        return self.reduce_fx is torch.mean
+
+    @property
+    def is_sum_reduction(self) -> bool:
+        return self.reduce_fx in (torch.sum, sum)
+
+    @property
+    def is_max_reduction(self) -> bool:
+        return self.reduce_fx in (torch.max, max)
+
+    @property
+    def is_min_reduction(self) -> bool:
+        return self.reduce_fx in (torch.min, min)
+
+    @property
+    def is_custom_reduction(self) -> bool:
+        return not (self.is_mean_reduction or self.is_max_reduction or self.is_min_reduction or self.is_sum_reduction)
+
+    def __getstate__(self) -> dict:
+        # drop the `sync.fn` to avoid potential pickle errors
+        # need to drop `fn` first otherwise `asdict` produces a `RecursionError`
+        copy = replace(self, _sync=replace(self.sync, fn=None))
+        d = asdict(copy)
+        # delete the `None` value so it does not override
+        del d['_sync']['fn']
+        return d
+
+    def __setstate__(self, state: dict, sync_fn: Optional[Callable] = None) -> None:
+        d = {**state, '_sync': _Sync(**state['_sync'], fn=sync_fn)}
+        self.__dict__.update(d)
+
+    @classmethod
+    def _reconstruct(cls, state: dict, sync_fn: Optional[Callable] = None) -> '_Metadata':
+        meta = cls(state['fx'], state['name'])
+        meta.__setstate__(state, sync_fn=sync_fn)
+        return meta
+
+
+class ResultMetric(Metric, DeviceDtypeModuleMixin):
+    """Wraps the value provided to `:meth:`~pytorch_lightning.core.lightning.LightningModule.log`"""
+
+    def __init__(self, metadata: _Metadata, is_tensor: bool) -> None:
+        super().__init__()
+        self.is_tensor = is_tensor
+        self.meta = metadata
+        self.has_reset = False
+        if is_tensor:
+            self.add_state("value", torch.tensor(0, dtype=torch.float), dist_reduce_fx=torch.sum)
+            if self.meta.is_mean_reduction:
+                self.add_state("cumulated_batch_size", torch.tensor(0, dtype=torch.float), dist_reduce_fx=torch.sum)
+
+    def update(self, value: _METRIC, batch_size: torch.Tensor) -> None:
+        if self.is_tensor:
+            value = value.float()
+            self._forward_cache = value
+            # performance: no need to accumulate on values only logged on_step
+            if self.meta.on_step and not self.meta.on_epoch:
+                self.value = self.meta.sync(value)
+                return
+            # perform accumulation with reduction
+            if self.meta.is_mean_reduction:
+                self.value += value.mean() * batch_size
+                self.cumulated_batch_size += batch_size
+            elif self.meta.is_max_reduction or self.meta.is_min_reduction:
+                self.value = self.meta.reduce_fx(self.value, value.mean())
+            elif self.meta.is_sum_reduction:
+                self.value += value.mean() * batch_size
+        else:
+            self.value = value  # noqa: attribute-defined-outside-init
+            self._forward_cache = value._forward_cache
+
+    def compute(self) -> torch.Tensor:
+        if self.is_tensor:
+            value = self.meta.sync(self.value)
+            if self.meta.is_mean_reduction:
+                cumulated_batch_size = self.meta.sync(self.cumulated_batch_size)
+                return value / cumulated_batch_size
+            elif self.meta.is_max_reduction or self.meta.is_min_reduction or self.meta.is_sum_reduction:
+                return value
+        return self.value.compute()
+
+    def reset(self) -> None:
+        if self.is_tensor:
+            super().reset()
+        else:
+            self.value.reset()
+        self.has_reset = True
+
+    def forward(self, value: _METRIC, batch_size: torch.Tensor) -> None:
+        if self.meta.enable_graph:
+            with torch.no_grad():
+                self.update(value, batch_size)
+        else:
+            # performance: skip the `torch.no_grad` context manager by calling `update` directly
+            self.update(value, batch_size)
+
+    def _wrap_compute(self, compute: Any) -> Any:
+        # Override to avoid syncing - we handle it ourselves.
+        @wraps(compute)
+        def wrapped_func(*args, **kwargs):
+            if not self._update_called:
+                rank_zero_warn(
+                    f"The ``compute`` method of metric {self.__class__.__name__}"
+                    " was called before the ``update`` method which may lead to errors,"
+                    " as metric states have not yet been updated.", UserWarning
+                )
+
+            # return cached value
+            if self._computed is not None:
+                return self._computed
+            self._computed = compute(*args, **kwargs)
+            return self._computed
+
+        return wrapped_func
+
+    def __setattr__(self, key: str, value: Any) -> None:
+        # performance: skip the `torch.nn.Module.__setattr__` checks
+        object.__setattr__(self, key, value)
+
+    def __repr__(self) -> str:
+        state = f"{repr(self.meta.name)}, value={self.value}"
+        if self.is_tensor and self.meta.is_mean_reduction:
+            state += f", cumulated_batch_size={self.cumulated_batch_size}"
+        return f"{self.__class__.__name__}({state})"
+
+    def __getstate__(self, drop_value: bool = False) -> dict:
+        skip = ['update', 'compute', '_update_signature']
+        if not self.is_tensor and drop_value:
+            # Avoid serializing ResultMetrics which are passed Metrics
+            skip.append('value')
+        with self.sync_context(
+            should_sync=not self.meta.sync.rank_zero_only,
+            process_group=self.meta.sync.group,
+            distributed_available=distributed_available
+        ):
+            d = {k: v for k, v in self.__dict__.items() if k not in skip}
+        d['meta'] = d['meta'].__getstate__()
+        d['_class'] = self.__class__.__name__
+        return d
+
+    def __setstate__(self, state: dict, sync_fn: Optional[Callable] = None) -> None:
+        d = {**state, 'meta': _Metadata._reconstruct(state['meta'], sync_fn=sync_fn)}
+        super().__setstate__(d)
+
+    @classmethod
+    def _reconstruct(cls, state: dict, sync_fn: Optional[Callable] = None) -> 'ResultMetric':
+        # need to reconstruct twice because `meta` is used in `__init__`
+        meta = _Metadata._reconstruct(state['meta'])
+        result_metric = cls(meta, state['is_tensor'])
+        result_metric.__setstate__(state, sync_fn=sync_fn)
+        return result_metric
+
+
+class ResultMetricCollection(dict):
+    """
+    Dict wrapper for easy access to metadata.
+
+    All of the leaf items should be instances of
+    :class:`~pytorch_lightning.trainer.connectors.logger_connector.result.ResultMetric`
+    with the same metadata.
+    """
+
+    def __init__(self, *args, metadata: Optional[_Metadata] = None) -> None:
+        super().__init__(*args)
+        self.meta = metadata
+
+    def __getstate__(self, drop_value: bool = False) -> dict:
+
+        def getstate(item: ResultMetric) -> dict:
+            return item.__getstate__(drop_value=drop_value)
+
+        items = apply_to_collection(dict(self), (ResultMetric, ResultMetricCollection), getstate)
+        return {"items": items, "meta": self.meta.__getstate__(), "_class": self.__class__.__name__}
+
+    def __setstate__(self, state: dict, sync_fn: Optional[Callable] = None) -> None:
+
+        def setstate(item: dict) -> Union[Dict[str, ResultMetric], ResultMetric, Any]:
+            # recurse through dictionaries to set the state. can't use `apply_to_collection`
+            # as it does not recurse items of the same type.
+            if not isinstance(item, dict):
+                return item
+            if item.get('_class') == ResultMetric.__name__:
+                return ResultMetric._reconstruct(item, sync_fn=sync_fn)
+            return {k: setstate(v) for k, v in item.items()}
+
+        items = setstate(state["items"])
+        self.update(items)
+
+        any_result_metric = next(iter(items.values()))
+        self.meta = any_result_metric.meta
+
+    @classmethod
+    def _reconstruct(cls, state: dict, sync_fn: Optional[Callable] = None) -> 'ResultMetricCollection':
+        rmc = cls()
+        rmc.__setstate__(state, sync_fn=sync_fn)
+        return rmc
+
+
+class ResultCollection(dict):
+    """
+    Collection (dictionary) of :class:`~pytorch_lightning.trainer.connectors.logger_connector.result.ResultMetric` or
+    :class:`~pytorch_lightning.trainer.connectors.logger_connector.result.ResultMetricCollection`
+
+    Example:
+
+        # `device` needs to be provided before logging
+        result = ResultCollection(training=True, torch.device("cpu"))
+
+        # you can log to a specific collection.
+        # arguments: fx, key, value, metadata
+        result.log('training_step', 'acc', torch.tensor(...), on_step=True, on_epoch=True)
+        result.log('validation_step', 'recall', torch.tensor(...), on_step=True, on_epoch=True)
+    """
+
+    DATALOADER_SUFFIX = "/dataloader_idx_{}"
+
+    def __init__(self, training: bool, device: Optional[Union[str, torch.device]] = None) -> None:
+        super().__init__()
+        self.training = training
+        self._minimize = None
+        self._batch_size = torch.tensor(1, device=device)
+        self.device: Optional[Union[str, torch.device]] = device
+
+    @property
+    def result_metrics(self) -> List[ResultMetric]:
+        o = []
+
+        def append_fn(v: ResultMetric) -> None:
+            nonlocal o
+            o.append(v)
+
+        apply_to_collection(list(self.values()), ResultMetric, append_fn)
+        return o
+
+    @property
+    def batch_size(self) -> torch.Tensor:
+        # performance: cache the `batch_size` tensor instead of re-creating it
+        return self._batch_size
+
+    @batch_size.setter
+    def batch_size(self, value: int) -> None:
+        self._batch_size = torch.tensor(value, device=self.device)
+
+    @property
+    def minimize(self) -> Optional[torch.Tensor]:
+        """
+        The :meth:`~pytorch_lightning.core.lightning.LightningModule.training_step` loss
+        will be saved as the ``minimize`` attribute.
+        """
+        return self._minimize
+
+    @minimize.setter
+    def minimize(self, loss: Optional[torch.Tensor]) -> None:
+        if loss is not None:
+            if not isinstance(loss, torch.Tensor):
+                raise ValueError(f"`Result.minimize` must be a `torch.Tensor`, found: {loss}")
+        self._minimize = loss
+
+    @property
+    def extra(self) -> Dict[str, Any]:
+        """
+        Extras are any keys other than the loss returned by
+        :meth:`~pytorch_lightning.core.lightning.LightningModule.training_step`
+        """
+        return self.get('_extra', {})
+
+    @extra.setter
+    def extra(self, extra: Dict[str, Any]) -> None:
+
+        def check_fn(v):
+            if v.grad_fn is not None:
+                warning_cache.deprecation(
+                    f"One of the returned values {set(extra.keys())} has a `grad_fn`. We will detach it automatically"
+                    " but this behaviour will change in v1.6. Please detach it manually:"
+                    " `return {'loss': ..., 'something': something.detach()}`"
+                )
+                return v.detach()
+            return v
+
+        # update instead of replace to keep the extra dict reference. TODO: remove with v1.6 deprecation removal
+        extra.update(apply_to_collection(extra, torch.Tensor, check_fn))
+        self['_extra'] = extra
+
+    def log(
+        self,
+        fx: str,
+        name: str,
+        value: _METRIC_COLLECTION,
+        prog_bar: bool = False,
+        logger: bool = True,
+        on_step: bool = False,
+        on_epoch: bool = True,
+        reduce_fx: Callable = torch.mean,
+        enable_graph: bool = False,
+        sync_dist: bool = False,
+        sync_dist_fn: Callable = _Sync.no_op,
+        sync_dist_group: Optional[Any] = None,
+        dataloader_idx: Optional[int] = None,
+        batch_size: Optional[int] = None,
+        metric_attribute: Optional[str] = None,
+        rank_zero_only: bool = False,
+    ) -> None:
+        """See :meth:`~pytorch_lightning.core.lightning.LightningModule.log`"""
+        # no metrics should be logged with graphs
+        if not enable_graph and isinstance(value, torch.Tensor):
+            value = value.detach()
+
+        # move metrics to cpu on TPU.
+        if isinstance(value, torch.Tensor) and value.device.type == "xla":
+            value = value.cpu()
+
+        # storage key
+        key = f"{fx}.{name}"
+        # add dataloader_suffix to both key and fx
+        if dataloader_idx is not None:
+            key += f'.{dataloader_idx}'
+            fx += f'.{dataloader_idx}'
+
+        meta = _Metadata(
+            fx=fx,
+            name=name,
+            prog_bar=prog_bar,
+            logger=logger,
+            on_step=on_step,
+            on_epoch=on_epoch,
+            enable_graph=enable_graph,
+            dataloader_idx=dataloader_idx,
+            metric_attribute=metric_attribute,
+        )
+        meta.reduce_fx = reduce_fx
+        meta.sync = _Sync(
+            should=sync_dist,
+            fn=sync_dist_fn,
+            group=sync_dist_group,
+            rank_zero_only=rank_zero_only,
+        )
+
+        # register logged value if it doesn't exist
+        if key not in self:
+            self.register_key(key, meta, value)
+
+        # check the stored metadata and the current one match
+        elif meta != self[key].meta:
+            raise MisconfigurationException(
+                f'You called `self.log({name}, ...)` twice in `{fx}` with different arguments. This is not allowed'
+            )
+
+        if batch_size is not None:
+            self.batch_size = batch_size
+
+        self.update_metrics(key, value)
+
+    def register_key(self, key: str, meta: _Metadata, value: _METRIC_COLLECTION) -> None:
+        """Create one ResultMetric object per value. Value can be provided as a nested collection"""
+
+        def fn(v: _METRIC) -> ResultMetric:
+            metric = ResultMetric(meta, isinstance(v, torch.Tensor))
+            return metric.to(self.device)
+
+        value = apply_to_collection(value, (torch.Tensor, Metric), fn)
+        if isinstance(value, dict):
+            value = ResultMetricCollection(value, metadata=meta)
+        self[key] = value
+
+    def update_metrics(self, key: str, value: _METRIC_COLLECTION) -> None:
+
+        def fn(result_metric, v):
+            # performance: avoid calling `__call__` to avoid the checks in `torch.nn.Module._call_impl`
+            result_metric.forward(v.to(self.device), self.batch_size)
+            result_metric.has_reset = False
+
+        apply_to_collections(self[key], value, ResultMetric, fn)
+
+    @staticmethod
+    def _get_cache(result_metric: ResultMetric, on_step: bool) -> Optional[torch.Tensor]:
+        cache = None
+        if on_step and result_metric.meta.on_step:
+            cache = result_metric._forward_cache
+        elif not on_step and result_metric.meta.on_epoch:
+            if not result_metric._computed:
+                # always reduce on epoch end
+                should = result_metric.meta.sync.should
+                result_metric.meta.sync.should = True
+                result_metric.compute()
+                result_metric.meta.sync.should = should
+            cache = result_metric._computed
+        if cache is not None and not result_metric.meta.enable_graph:
+            return cache.detach()
+        return cache
+
+    def valid_items(self) -> Generator:
+        """This function is used to iterate over current valid metrics."""
+        return ((k, v) for k, v in self.items()
+                if not k == "_extra" and not (isinstance(v, ResultMetric) and v.has_reset))
+
+    def _forked_name(self, result_metric: ResultMetric, on_step: bool) -> Tuple[str, str]:
+        name = result_metric.meta.name
+        forked_name = result_metric.meta.forked_name(on_step)
+        dl_idx = result_metric.meta.dataloader_idx
+        if dl_idx is not None:
+            dataloader_suffix = self.DATALOADER_SUFFIX.format(dl_idx)
+            name += dataloader_suffix
+            forked_name += dataloader_suffix
+        return name, forked_name
+
+    def metrics(self, on_step: bool) -> Dict[MetricSource, Dict[str, _METRIC]]:
+        metrics = {k: {} for k in MetricSource}
+
+        for _, result_metric in self.valid_items():
+
+            # extract forward_cache or computed from the ResultMetric. ignore when the output is None
+            value = apply_to_collection(result_metric, ResultMetric, self._get_cache, on_step, include_none=False)
+
+            # convert metric collection to dict container.
+            if isinstance(value, ResultMetricCollection):
+                value = dict(value.items())
+
+            # check if the collection is empty
+            has_tensor = False
+
+            def any_tensor(_):
+                nonlocal has_tensor
+                has_tensor = True
+
+            apply_to_collection(value, torch.Tensor, any_tensor)
+            if not has_tensor:
+                continue
+
+            name, forked_name = self._forked_name(result_metric, on_step)
+
+            # populate logging metrics
+            if result_metric.meta.logger:
+                metrics[MetricSource.LOG][forked_name] = value
+
+            # populate callback metrics. callback metrics don't take `_step` forked metrics
+            if self.training or result_metric.meta.on_epoch and not on_step:
+                metrics[MetricSource.CALLBACK][name] = value
+                metrics[MetricSource.CALLBACK][forked_name] = value
+
+            # populate progress_bar metrics. convert tensors to numbers
+            if result_metric.meta.prog_bar:
+                metrics[MetricSource.PBAR][forked_name] = metrics_to_scalars(value)
+
+        return metrics
+
+    def reset(self, metrics: Optional[bool] = None, fx: Optional[str] = None) -> None:
+        """
+        Reset the result collection
+
+        Args:
+            metrics: If True, only ``torchmetrics.Metric`` results are reset,
+                if False, only ``torch.Tensors`` are reset,
+                if ``None``, both are.
+            fx: Function to reset
+        """
+
+        def fn(item: ResultMetric) -> None:
+            requested_type = metrics is None or metrics ^ item.is_tensor
+            same_fx = fx is None or fx == item.meta.fx
+            if requested_type and same_fx:
+                item.reset()
+
+        apply_to_collection(self, ResultMetric, fn)
+
+    def extract_batch_size(self, batch: Any) -> None:
+        try:
+            self.batch_size = self._extract_batch_size(batch)
+        except RecursionError:
+            self.batch_size = 1
+
+    def _extract_batch_size(self, batch: Any) -> int:
+        """
+        Recursively unpack a batch to find a torch.Tensor.
+
+        Returns:
+            ``len(tensor)`` when found, or ``1`` when it hits an empty or non iterable.
+        """
+        if isinstance(batch, torch.Tensor):
+            size = batch.size(0)
+        elif isinstance(batch, str):
+            return len(batch)
+        elif isinstance(batch, dict):
+            sample = next(iter(batch.values()), 1)
+            size = self._extract_batch_size(sample)
+        elif isinstance(batch, Iterable):
+            sample = next(iter(batch), 1)
+            size = self._extract_batch_size(sample)
+        else:
+            size = 1
+        return size
+
+    def to(self, *args, **kwargs) -> 'ResultCollection':
+        """Move all data to the given device."""
+
+        def to_(item: Union[torch.Tensor, Metric], *args: Any, **kwargs: Any) -> Union[torch.Tensor, Metric]:
+            return item.to(*args, **kwargs)
+
+        apply_to_collection(self, (torch.Tensor, Metric), to_, *args, **kwargs)
+
+        if self.minimize is not None:
+            self.minimize = self.minimize.to(*args, **kwargs)
+        self._batch_size = self._batch_size.to(*args, **kwargs)
+        if 'device' in kwargs:
+            self.device = kwargs['device']
+        return self
+
+    def cpu(self) -> 'ResultCollection':
+        """Move all data to CPU."""
+        return self.to(device="cpu")
+
+    def __str__(self) -> str:
+        return f'{self.__class__.__name__}({self.training}, {self.device}, {repr(self)})'
+
+    def __getstate__(self, drop_value: bool = True) -> dict:
+        d = self.__dict__.copy()
+
+        # can't deepcopy tensors with grad_fn
+        minimize = d['_minimize']
+        if minimize is not None:
+            d['_minimize'] = minimize.detach()
+
+        extra = self.get('_extra')
+        if extra is not None:
+            d['_extra'] = extra
+
+        # all the items should be either `ResultMetric`s or `ResultMetricCollection`s
+        items = {k: v.__getstate__(drop_value=drop_value) for k, v in self.items() if k != '_extra'}
+        return {**d, 'items': items}
+
+    def __setstate__(
+        self,
+        state: dict,
+        map_location: Optional[Union[str, torch.device]] = None,
+        sync_fn: Optional[Callable] = None,
+    ) -> None:
+        self.__dict__.update({k: v for k, v in state.items() if k != 'items'})
+
+        def setstate(k: str, item: dict) -> Union[ResultMetric, ResultMetricCollection]:
+            if not isinstance(item, dict):
+                raise ValueError(f'Unexpected value: {item}')
+            cls = item['_class']
+            if cls == ResultMetric.__name__:
+                cls = ResultMetric
+            elif cls == ResultMetricCollection.__name__:
+                cls = ResultMetricCollection
+            else:
+                raise ValueError(f"Unexpected class name: {cls}")
+            _sync_fn = sync_fn or (self[k].meta.sync.fn if k in self else None)
+            return cls._reconstruct(item, sync_fn=_sync_fn)
+
+        items = {k: setstate(k, v) for k, v in state['items'].items()}
+        self.update(items)
+
+        device = map_location or self.device
+        self.to(device)
+
+    def state_dict(self, drop_value: bool = True) -> dict:
+        return self.__getstate__(drop_value)
+
+    def load_state_dict(
+        self,
+        state_dict: dict,
+        map_location: Optional[Union[str, torch.device]] = None,
+        sync_fn: Optional[Callable] = None,
+        metrics: Optional[Dict[str, Metric]] = None,
+    ) -> None:
+        self.__setstate__(state_dict, map_location=map_location, sync_fn=sync_fn)
+
+        if not metrics:
+            return
+        result_metrics = self.result_metrics
+        for metric_attribute, metric in metrics.items():
+            for result_metric in result_metrics:
+                if result_metric.meta.metric_attribute == metric_attribute:
+                    result_metric.value = metric
diff --git a/pytorch_lightning/trainer/connectors/optimizer_connector.py b/pytorch_lightning/trainer/connectors/optimizer_connector.py
index 2797504288bd3..083d35d4cbb94 100644
--- a/pytorch_lightning/trainer/connectors/optimizer_connector.py
+++ b/pytorch_lightning/trainer/connectors/optimizer_connector.py
@@ -29,11 +29,17 @@ def on_trainer_init(self) -> None:
         self.trainer.optimizers = []
         self.trainer.optimizer_frequencies = []
 
-    def update_learning_rates(self, interval: str, opt_indices: Optional[List[int]] = None) -> None:
+    def update_learning_rates(
+        self, interval: str, update_plateau_schedulers: bool, opt_indices: Optional[List[int]] = None
+    ) -> None:
         """Update learning rates.
 
         Args:
             interval: either 'epoch' or 'step'.
+            update_plateau_schedulers: control whether ``ReduceLROnPlateau`` or non-plateau schedulers get updated.
+                This is used so non-plateau schedulers can be updated before running validation. Checkpoints are
+                commonly saved during validation, however, on-plateau schedulers might monitor a validation metric
+                so they have to be updated separately.
             opt_indices: indices of the optimizers to update.
         """
         if not self.trainer.lr_schedulers or not self.trainer.lightning_module.automatic_optimization:
@@ -46,7 +52,10 @@ def update_learning_rates(self, interval: str, opt_indices: Optional[List[int]]
             if isinstance(lr_scheduler['opt_idx'], int) and lr_scheduler['opt_idx'] not in opt_indices:
                 continue
 
-            current_idx = self.trainer.train_loop.batch_idx if interval == 'step' else self.trainer.current_epoch
+            if update_plateau_schedulers ^ lr_scheduler["reduce_on_plateau"]:
+                continue
+
+            current_idx = self.trainer.fit_loop.batch_idx if interval == 'step' else self.trainer.current_epoch
             current_idx += 1  # account for both batch and epoch starts from 0
             # Take step if call to update_learning_rates matches the interval key and
             # the current step modulo the schedulers frequency is zero
@@ -83,7 +92,7 @@ def update_learning_rates(self, interval: str, opt_indices: Optional[List[int]]
 
                 if self.trainer.dev_debugger.enabled:
                     self.trainer.dev_debugger.track_lr_schedulers_update(
-                        self.trainer.train_loop.batch_idx,
+                        self.trainer.fit_loop.batch_idx,
                         interval,
                         scheduler_idx,
                         old_lr,
diff --git a/pytorch_lightning/trainer/connectors/training_trick_connector.py b/pytorch_lightning/trainer/connectors/training_trick_connector.py
index f27288d2b13f4..4d93fa5977d13 100644
--- a/pytorch_lightning/trainer/connectors/training_trick_connector.py
+++ b/pytorch_lightning/trainer/connectors/training_trick_connector.py
@@ -14,8 +14,7 @@
 from typing import Dict, List, Optional, Union
 
 from pytorch_lightning.callbacks import GradientAccumulationScheduler
-from pytorch_lightning.utilities import GradClipAlgorithmType
-from pytorch_lightning.utilities.distributed import rank_zero_deprecation
+from pytorch_lightning.utilities import GradClipAlgorithmType, rank_zero_deprecation
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 
diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py
index 53c9b07dffbaf..ce6caa4e2f330 100644
--- a/pytorch_lightning/trainer/data_loading.py
+++ b/pytorch_lightning/trainer/data_loading.py
@@ -22,8 +22,8 @@
 from torch.utils.data import BatchSampler, DataLoader, RandomSampler, SequentialSampler
 from torch.utils.data.distributed import DistributedSampler
 
+import pytorch_lightning as pl
 from pytorch_lightning.accelerators import Accelerator
-from pytorch_lightning.core import LightningModule
 from pytorch_lightning.overrides.distributed import IndexBatchSamplerWrapper, UnrepeatedDistributedSampler
 from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector
 from pytorch_lightning.trainer.states import RunningStage
@@ -51,6 +51,7 @@ class TrainerDataLoadingMixin(ABC):
     test_dataloaders: Optional[List[DataLoader]]
     num_test_batches: List[Union[int, float]]
     limit_train_batches: Union[int, float]
+    log_every_n_steps: int
     overfit_batches: Union[int, float]
     distributed_sampler_kwargs: dict
     accelerator: Accelerator
@@ -225,7 +226,7 @@ def _get_distributed_sampler(
         sampler = cls(dataloader.dataset, **kwargs)
         return sampler
 
-    def reset_train_dataloader(self, model: LightningModule) -> None:
+    def reset_train_dataloader(self, model: 'pl.LightningModule') -> None:
         """Resets the train dataloader and initialises required variables
         (number of batches, when to validate, etc.).
 
@@ -261,6 +262,9 @@ def reset_train_dataloader(self, model: LightningModule) -> None:
         # wrap the sequence of train loaders to a CombinedLoader object for computing the num_training_batches
         self.train_dataloader = CombinedLoader(self.train_dataloader, self.data_connector.multiple_trainloader_mode)
 
+        # allow accelerator to modify dataloader
+        self.train_dataloader = self.accelerator.on_reset_train_dataloader(self.train_dataloader)
+
         self.num_training_batches = len(self.train_dataloader) if has_len(self.train_dataloader) else float('inf')
 
         if isinstance(self.limit_train_batches, int) or self.limit_train_batches == 0.0:
@@ -299,9 +303,16 @@ def reset_train_dataloader(self, model: LightningModule) -> None:
                 self.val_check_batch = int(self.num_training_batches * self.val_check_interval)
                 self.val_check_batch = max(1, self.val_check_batch)
 
+        if self.logger and self.num_training_batches < self.log_every_n_steps:
+            rank_zero_warn(
+                f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
+                f" Trainer(log_every_n_steps={self.log_every_n_steps}). Set a lower value for log_every_n_steps if"
+                f" you want to see logs for the training epoch."
+            )
+
     def _reset_eval_dataloader(
         self,
-        model: LightningModule,
+        model: 'pl.LightningModule',
         mode: str,
     ) -> Tuple[List[Union[int, float]], List[DataLoader]]:
         """Generic method to reset a dataloader for evaluation.
@@ -361,6 +372,10 @@ def _reset_eval_dataloader(
         # add worker_init_fn for correct seeding in worker processes
         apply_to_collection(dataloaders, dtype=DataLoader, function=self.auto_add_worker_init_fn)
 
+        # allow accelerator to modify dataloader
+        hook_name = f"on_reset_{mode}_dataloader"
+        dataloaders = getattr(self.accelerator, hook_name)(dataloaders)
+
         loader_num_batches = []
 
         # determine number of batches
@@ -397,7 +412,7 @@ def _reset_eval_dataloader(
 
         return loader_num_batches, dataloaders
 
-    def reset_val_dataloader(self, model: LightningModule) -> None:
+    def reset_val_dataloader(self, model: 'pl.LightningModule') -> None:
         """Resets the validation dataloader and determines the number of batches.
 
         Args:
@@ -429,7 +444,20 @@ def reset_predict_dataloader(self, model) -> None:
         if has_loader:
             self.num_predict_batches, self.predict_dataloaders = self._reset_eval_dataloader(model, 'predict')
 
-    def request_dataloader(self, model: LightningModule, stage: str) -> DataLoader:
+    def reset_train_val_dataloaders(self, model) -> None:
+        """
+        Resets train and val dataloaders if none are attached to the trainer.
+
+        The val dataloader must be initialized before training loop starts, as the training loop
+        inspects the val dataloader to determine whether to run the evaluation loop.
+        """
+        if self.train_dataloader is None:
+            self.reset_train_dataloader(model)
+
+        if self.val_dataloaders is None:
+            self.reset_val_dataloader(model)
+
+    def request_dataloader(self, model: 'pl.LightningModule', stage: str) -> DataLoader:
         """Handles downloading data in the GPU or TPU case.
 
         Args:
diff --git a/pytorch_lightning/trainer/deprecated_api.py b/pytorch_lightning/trainer/deprecated_api.py
index 7e7817d277dae..a650c6bfe73e8 100644
--- a/pytorch_lightning/trainer/deprecated_api.py
+++ b/pytorch_lightning/trainer/deprecated_api.py
@@ -11,13 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+from pytorch_lightning.loops import FitLoop
 from pytorch_lightning.utilities import rank_zero_deprecation
 
 
 class DeprecatedTrainerAttributes:
 
     sanity_checking: bool
+    fit_loop: FitLoop
 
     @property
     def running_sanity_check(self) -> bool:
@@ -25,3 +26,10 @@ def running_sanity_check(self) -> bool:
             "`Trainer.running_sanity_check` has been renamed to `Trainer.sanity_checking` and will be removed in v1.5."
         )
         return self.sanity_checking
+
+    @property
+    def train_loop(self) -> FitLoop:
+        rank_zero_deprecation(
+            "`Trainer.train_loop` has been renamed to `Trainer.fit_loop` and will be removed in v1.6."
+        )
+        return self.fit_loop
diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py
deleted file mode 100644
index 810efef3fa52b..0000000000000
--- a/pytorch_lightning/trainer/evaluation_loop.py
+++ /dev/null
@@ -1,252 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from collections import OrderedDict
-from typing import Any, Dict, List, Optional, Tuple, Union
-
-from torch.utils.data import DataLoader
-
-import pytorch_lightning as pl
-from pytorch_lightning.core.step_result import Result
-from pytorch_lightning.trainer.states import TrainerFn
-from pytorch_lightning.trainer.supporters import PredictionCollection
-from pytorch_lightning.utilities.model_helpers import is_overridden
-from pytorch_lightning.utilities.types import EPOCH_OUTPUT, STEP_OUTPUT
-from pytorch_lightning.utilities.warnings import WarningCache
-
-
-class EvaluationLoop(object):
-
-    def __init__(self, trainer: 'pl.Trainer'):
-        self.trainer: 'pl.Trainer' = trainer
-        self.outputs: EPOCH_OUTPUT = []
-        self.predictions: Optional[PredictionCollection] = None
-        self.max_batches: Optional[List[Union[int, float]]] = None
-        self.warning_cache = WarningCache()
-        self.num_dataloaders: Optional[int] = None
-
-    def on_trainer_init(self) -> None:
-        self.trainer.num_sanity_val_batches = []
-        self.trainer.num_test_batches = []
-        self.trainer.num_val_batches = []
-        self.trainer.test_dataloaders = None
-        self.trainer.val_dataloaders = None
-
-        # .validate() and .test() set this when they load a checkpoint
-        self.trainer.validated_ckpt_path = None
-        self.trainer.tested_ckpt_path = None
-
-        # when true, print evaluation results in .validate() and .test()
-        self.trainer.verbose_evaluate = True
-
-    def get_evaluation_dataloaders(self) -> Tuple[Optional[List[DataLoader]], List[Union[int, float]]]:
-        model = self.trainer.lightning_module
-
-        # select dataloaders
-        if self.trainer.testing:
-            self.trainer.reset_test_dataloader(model)
-
-            dataloaders = self.trainer.test_dataloaders
-            max_batches = self.trainer.num_test_batches
-        else:
-            # val
-            if self.trainer.val_dataloaders is None or self.trainer.reload_dataloaders_every_epoch:
-                self.trainer.reset_val_dataloader(model)
-            if self.trainer.sanity_checking:
-                self.trainer.num_sanity_val_batches = [
-                    min(self.trainer.num_sanity_val_steps, val_batches) for val_batches in self.trainer.num_val_batches
-                ]
-                max_batches = self.trainer.num_sanity_val_batches
-            else:
-                max_batches = self.trainer.num_val_batches
-            dataloaders = self.trainer.val_dataloaders
-        return dataloaders, max_batches
-
-    def on_evaluation_start(self, *args: Any, **kwargs: Any) -> None:
-        self.should_track_batch_outputs_for_epoch_end: bool = self._should_track_batch_outputs_for_epoch_end()
-        if self.trainer.testing:
-            self.trainer.call_hook('on_test_start', *args, **kwargs)
-        else:
-            self.trainer.call_hook('on_validation_start', *args, **kwargs)
-
-    def on_evaluation_model_eval(self) -> None:
-        model_ref = self.trainer.lightning_module
-        if self.trainer.testing:
-            model_ref.on_test_model_eval()
-        else:
-            model_ref.on_validation_model_eval()
-
-    def on_evaluation_model_train(self) -> None:
-        model_ref = self.trainer.lightning_module
-        if self.trainer.testing:
-            model_ref.on_test_model_train()
-        else:
-            model_ref.on_validation_model_train()
-
-    def on_evaluation_end(self, *args: Any, **kwargs: Any) -> None:
-        if self.trainer.testing:
-            self.trainer.call_hook('on_test_end', *args, **kwargs)
-        else:
-            self.trainer.call_hook('on_validation_end', *args, **kwargs)
-
-        if self.trainer.state.fn != TrainerFn.FITTING:
-            # summarize profile results
-            self.trainer.profiler.describe()
-
-    def reload_evaluation_dataloaders(self) -> None:
-        model = self.trainer.lightning_module
-        if self.trainer.testing:
-            self.trainer.reset_test_dataloader(model)
-        else:
-            self.trainer.reset_val_dataloader(model)
-
-    def setup(self, max_batches: List[Union[int, float]], dataloaders: List[DataLoader]) -> None:
-        # bookkeeping
-        self.outputs = []
-        self.predictions = PredictionCollection(self.trainer.global_rank, self.trainer.world_size)
-
-        # convert max_batches to list
-        if isinstance(max_batches, int):
-            max_batches = [max_batches] * len(dataloaders)
-
-        self.max_batches = max_batches
-        self.num_dataloaders = self._get_num_dataloaders(dataloaders)
-
-    def on_evaluation_epoch_start(self, *args: Any, **kwargs: Any) -> None:
-        self.trainer.call_hook('on_epoch_start', *args, **kwargs)
-
-        if self.trainer.testing:
-            self.trainer.call_hook('on_test_epoch_start', *args, **kwargs)
-        else:
-            self.trainer.call_hook('on_validation_epoch_start', *args, **kwargs)
-
-    def _build_kwargs(self, batch: Any, batch_idx: int, dataloader_idx: int) -> Dict[str, Union[Any, int]]:
-        # make dataloader_idx arg in validation_step optional
-        step_kwargs = OrderedDict([('batch', batch), ('batch_idx', batch_idx)])
-
-        multiple_val_loaders = (
-            not self.trainer.testing and self._get_num_dataloaders(self.trainer.val_dataloaders) > 1
-        )
-        multiple_test_loaders = (self.trainer.testing and self._get_num_dataloaders(self.trainer.test_dataloaders) > 1)
-
-        if multiple_test_loaders or multiple_val_loaders:
-            step_kwargs['dataloader_idx'] = dataloader_idx
-
-        return step_kwargs
-
-    def _get_num_dataloaders(self, dataloaders: Optional[List[DataLoader]]) -> int:
-        # case where user does:
-        # return dl1, dl2
-        if dataloaders is not None:
-            length = len(dataloaders)
-            if len(dataloaders) > 0 and isinstance(dataloaders[0], (list, tuple)):
-                length = len(dataloaders[0])
-            return length
-        else:
-            return 0
-
-    def evaluation_step(self, batch: Any, batch_idx: int, dataloader_idx: int) -> Optional[STEP_OUTPUT]:
-        # configure step_kwargs
-        step_kwargs = self._build_kwargs(batch, batch_idx, dataloader_idx)
-
-        model_ref = self.trainer.lightning_module
-        model_ref._results = Result()
-
-        if self.trainer.testing:
-            model_ref._current_fx_name = "test_step"
-            with self.trainer.profiler.profile("test_step"):
-                output = self.trainer.accelerator.test_step(step_kwargs)
-        else:
-            model_ref._current_fx_name = "validation_step"
-            with self.trainer.profiler.profile("validation_step"):
-                output = self.trainer.accelerator.validation_step(step_kwargs)
-
-        # capture any logged information
-        self.trainer.logger_connector.cache_logged_metrics()
-        # track batch size for weighted average
-        if isinstance(output, Result):
-            output.track_batch_size(batch)
-
-        return output
-
-    def evaluation_step_end(self, *args: Any, **kwargs: Any) -> Optional[STEP_OUTPUT]:
-        if self.trainer.testing:
-            output = self.trainer.call_hook('test_step_end', *args, **kwargs)
-        else:
-            output = self.trainer.call_hook('validation_step_end', *args, **kwargs)
-        return output
-
-    def _should_track_batch_outputs_for_epoch_end(self) -> bool:
-        model = self.trainer.lightning_module
-        if self.trainer.testing:
-            return is_overridden('test_epoch_end', model=model)
-        else:
-            return is_overridden('validation_epoch_end', model=model)
-
-    def evaluation_epoch_end(self, outputs: EPOCH_OUTPUT) -> None:
-        # unset dataloder_idx in model
-        self.trainer.logger_connector.evaluation_epoch_end()
-
-        # call the model epoch end
-        model = self.trainer.lightning_module
-
-        if self.trainer.testing:
-            if is_overridden('test_epoch_end', model=model):
-                model._current_fx_name = 'test_epoch_end'
-                model.test_epoch_end(outputs)
-
-        else:
-            if is_overridden('validation_epoch_end', model=model):
-                model._current_fx_name = 'validation_epoch_end'
-                model.validation_epoch_end(outputs)
-
-        # capture logging
-        self.trainer.logger_connector.cache_logged_metrics()
-
-    def on_evaluation_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
-        # set dataloader_idx to model and track batch_size
-        self.trainer.logger_connector.on_evaluation_batch_start(batch, dataloader_idx, self.num_dataloaders)
-
-        if self.trainer.testing:
-            self.trainer.call_hook('on_test_batch_start', batch, batch_idx, dataloader_idx)
-        else:
-            self.trainer.call_hook('on_validation_batch_start', batch, batch_idx, dataloader_idx)
-
-    def on_evaluation_batch_end(
-        self,
-        output: Optional[STEP_OUTPUT],
-        batch: Any,
-        batch_idx: int,
-        dataloader_idx: int,
-    ) -> None:
-        if self.trainer.testing:
-            self.trainer.call_hook('on_test_batch_end', output, batch, batch_idx, dataloader_idx)
-        else:
-            self.trainer.call_hook('on_validation_batch_end', output, batch, batch_idx, dataloader_idx)
-
-        # store predicitons if do_write_predictions and track eval loss history
-        self.store_predictions(output, batch_idx, dataloader_idx)
-
-    def store_predictions(self, output: Optional[STEP_OUTPUT], batch_idx: int, dataloader_idx: int) -> None:
-        # Add step predictions to prediction collection to write later
-        if output is not None and self.predictions is not None:
-            if isinstance(output, Result) and self.trainer.testing:
-                self.predictions.add(output.pop('predictions', None))
-
-        # track debug metrics
-        self.trainer.dev_debugger.track_eval_loss_history(batch_idx, dataloader_idx, output)
-
-    def on_evaluation_epoch_end(self) -> None:
-        hook_name = "on_test_epoch_end" if self.trainer.testing else "on_validation_epoch_end"
-        self.trainer.call_hook(hook_name)
-        self.trainer.call_hook('on_epoch_end')
diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py
index 0a59b9d8d4c36..74603782f3293 100644
--- a/pytorch_lightning/trainer/logging.py
+++ b/pytorch_lightning/trainer/logging.py
@@ -14,7 +14,7 @@
 
 from abc import ABC
 
-from pytorch_lightning.utilities.distributed import rank_zero_deprecation
+from pytorch_lightning.utilities import rank_zero_deprecation
 from pytorch_lightning.utilities.metrics import metrics_to_scalars as new_metrics_to_scalars
 
 
diff --git a/pytorch_lightning/trainer/model_hooks.py b/pytorch_lightning/trainer/model_hooks.py
index 86cb1334a7067..2336379fc3d49 100644
--- a/pytorch_lightning/trainer/model_hooks.py
+++ b/pytorch_lightning/trainer/model_hooks.py
@@ -15,8 +15,8 @@
 from abc import ABC
 from typing import Optional
 
-from pytorch_lightning.core.lightning import LightningModule
-from pytorch_lightning.utilities.distributed import rank_zero_deprecation
+import pytorch_lightning as pl
+from pytorch_lightning.utilities import rank_zero_deprecation
 from pytorch_lightning.utilities.signature_utils import is_param_in_hook_signature
 
 
@@ -27,9 +27,9 @@ class TrainerModelHooksMixin(ABC):
     Use the utilities from ``pytorch_lightning.utilities.signature_utils`` instead.
     """
 
-    lightning_module: LightningModule
+    lightning_module: 'pl.LightningModule'
 
-    def is_function_implemented(self, f_name: str, model: Optional[LightningModule] = None) -> bool:
+    def is_function_implemented(self, f_name: str, model: Optional['pl.LightningModule'] = None) -> bool:
         rank_zero_deprecation(
             "Internal: TrainerModelHooksMixin.is_function_implemented is deprecated in v1.4"
             " and will be removed in v1.6."
diff --git a/pytorch_lightning/trainer/optimizers.py b/pytorch_lightning/trainer/optimizers.py
index b5afe7bf75168..80ec5857de287 100644
--- a/pytorch_lightning/trainer/optimizers.py
+++ b/pytorch_lightning/trainer/optimizers.py
@@ -19,7 +19,7 @@
 from torch import optim
 from torch.optim.optimizer import Optimizer
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.core.optimizer import LightningOptimizer
 from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -29,7 +29,7 @@ class TrainerOptimizersMixin(ABC):
 
     _lightning_optimizers: Optional[List[LightningOptimizer]]
 
-    def init_optimizers(self, model: LightningModule) -> Tuple[List, List, List]:
+    def init_optimizers(self, model: 'pl.LightningModule') -> Tuple[List, List, List]:
         self._lightning_optimizers = None
         optim_conf = model.configure_optimizers()
         if optim_conf is None:
diff --git a/pytorch_lightning/trainer/predict_loop.py b/pytorch_lightning/trainer/predict_loop.py
deleted file mode 100644
index c06ced6662d81..0000000000000
--- a/pytorch_lightning/trainer/predict_loop.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from collections import OrderedDict
-from typing import Any, List, Optional
-
-import torch
-from torch.utils.data.dataloader import DataLoader
-
-from pytorch_lightning.overrides.distributed import IndexBatchSamplerWrapper
-from pytorch_lightning.plugins import DDPSpawnPlugin
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.types import _PREDICT_OUTPUT
-from pytorch_lightning.utilities.warnings import WarningCache
-
-
-class PredictLoop(object):
-
-    def __init__(self, trainer):
-        self.trainer = trainer
-        self.max_batches = None
-        self.num_dataloaders = None
-        self.warning_cache = WarningCache()
-        self.batch_indices: Optional[List[int]] = None
-        self.epoch_batch_indices: Optional[List[List[int]]] = None
-        self.predictions: Optional[List[List[Any]]] = None
-        # `DDPSpawnPlugin` plugins and derivate don't support return predictions.
-        self._return_predictions: Optional[bool] = None
-        self._previous_grad_status: Optional[bool] = None
-
-    @property
-    def return_predictions(self) -> bool:
-        return self._return_predictions
-
-    @return_predictions.setter
-    def return_predictions(self, return_predictions: Optional[bool] = None) -> None:
-        # ``DDPSpawnPlugin`` plugins and derivate don't support return predictions.
-        is_ddp_spawn = isinstance(self.trainer.training_type_plugin, DDPSpawnPlugin)
-        if return_predictions and is_ddp_spawn:
-            raise MisconfigurationException(
-                "`return_predictions` should be set to `False` when using the `DDPSpawnPlugin` or children class. "
-                f"Found {return_predictions} with training_type_plugin {type(self.trainer.training_type_plugin)}."
-            )
-        # For non ``DDPSpawnPlugin`` plugin, the `return_predictions` is True by default unless user decide otherwise.
-        self._return_predictions = not is_ddp_spawn if return_predictions is None else return_predictions
-
-    @property
-    def should_store_predictions(self) -> bool:
-        any_pred = any(cb.interval.on_epoch for cb in self.trainer.prediction_writer_callbacks)
-        return self.return_predictions or any_pred
-
-    def on_trainer_init(self):
-        self.trainer.num_predict_batches = []
-        self.trainer.predicted_ckpt_path = None
-
-    def get_predict_dataloaders(self):
-        self.trainer.reset_predict_dataloader(self.trainer.lightning_module)
-
-        dataloaders = self.trainer.predict_dataloaders
-        max_batches = self.trainer.num_predict_batches
-
-        return dataloaders, max_batches
-
-    def should_skip_predict(self, max_batches):
-        return sum(max_batches) == 0
-
-    def on_predict_model_eval(self):
-        model_ref = self.trainer.lightning_module
-        model_ref.on_predict_model_eval()
-
-    def setup(self, max_batches, dataloaders):
-        # convert max_batches to list
-        if isinstance(max_batches, int):
-            max_batches = [max_batches] * len(dataloaders)
-
-        self.max_batches = max_batches
-        self.num_dataloaders = self._get_num_dataloaders(dataloaders)
-        self.predictions = [[] for _ in range(self.num_dataloaders)]
-        self.epoch_batch_indices = [[] for _ in range(self.num_dataloaders)]
-
-    def _get_num_dataloaders(self, dataloaders: List[DataLoader]) -> int:
-        # case where user does:
-        # return dl1, dl2
-        length = len(dataloaders)
-        if len(dataloaders) > 0 and isinstance(dataloaders[0], (list, tuple)):
-            length = len(dataloaders[0])
-        return length
-
-    def _build_kwargs(self, batch, batch_idx, dataloader_idx):
-        step_kwargs = OrderedDict([('batch', batch), ('batch_idx', batch_idx)])
-        if self.num_dataloaders:
-            step_kwargs['dataloader_idx'] = dataloader_idx
-        return step_kwargs
-
-    def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
-        # configure step_kwargs
-        step_kwargs = self._build_kwargs(batch, batch_idx, dataloader_idx)
-
-        # extract batch_indices and store them
-        self._store_batch_indices(dataloader_idx)
-
-        model_ref = self.trainer.lightning_module
-
-        self.trainer.call_hook("on_predict_batch_start", batch, batch_idx, dataloader_idx)
-
-        model_ref._current_fx_name = "predict_step"
-        predictions = self.trainer.accelerator.predict_step(step_kwargs)
-
-        if predictions is None:
-            self.warning_cache.warn("predict returned None if it was on purpose, ignore this warning...")
-
-        self.trainer.call_hook("on_predict_batch_end", predictions, batch, batch_idx, dataloader_idx)
-
-        if self.should_store_predictions:
-            self.predictions[dataloader_idx].append(predictions)
-
-    def _store_batch_indices(self, dataloader_idx: int) -> None:
-        batch_sampler = self.trainer.predict_dataloaders[dataloader_idx].batch_sampler
-        if isinstance(batch_sampler, IndexBatchSamplerWrapper):
-            self.batch_indices = batch_sampler.batch_indices
-            if self.should_store_predictions:
-                self.epoch_batch_indices[dataloader_idx].append(batch_sampler.batch_indices)
-
-    def on_predict_start(self) -> None:
-        # enable eval mode + no grads
-        self.on_predict_model_eval()
-        self.trainer.lightning_module.zero_grad()
-        self._previous_grad_status = torch.is_grad_enabled()
-        torch.set_grad_enabled(False)
-
-        # hook
-        self.trainer.call_hook("on_predict_start")
-        self.trainer.call_hook("on_predict_epoch_start")
-
-    def on_predict_epoch_end(self) -> Optional[_PREDICT_OUTPUT]:
-        self.trainer.profiler.describe()
-
-        results = self.predictions
-
-        self.trainer.call_hook("on_predict_epoch_end", results)
-
-        if self.return_predictions:
-            return results[0] if self.num_dataloaders == 1 else results
-
-    def on_predict_end(self):
-        # clear memory. the predictions are extracted in `on_predict_epoch_end`.
-        self.predictions = None
-        self.batch_indices = None
-
-        # reset grad to its previous status.
-        torch.set_grad_enabled(self._previous_grad_status)
-
-        # hook
-        self.trainer.call_hook("on_predict_end")
diff --git a/pytorch_lightning/trainer/progress.py b/pytorch_lightning/trainer/progress.py
index fce4b431b347c..2d7a1d7e8f53a 100644
--- a/pytorch_lightning/trainer/progress.py
+++ b/pytorch_lightning/trainer/progress.py
@@ -11,12 +11,31 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from dataclasses import dataclass, field
+from dataclasses import asdict, dataclass, field
 from typing import Optional
 
 
 @dataclass
-class Tracker:
+class _DataclassStateDictMixin:
+
+    def __getstate__(self) -> dict:
+        return asdict(self)
+
+    def __setstate__(self, state: dict) -> None:
+        self.__dict__.update(state)
+
+    def state_dict(self) -> dict:
+        return self.__getstate__()
+
+    @classmethod
+    def from_state_dict(cls, state_dict: dict) -> "_DataclassStateDictMixin":
+        obj = cls()
+        obj.__setstate__(state_dict)
+        return obj
+
+
+@dataclass
+class Tracker(_DataclassStateDictMixin):
     """
     Track an event's progress.
 
@@ -28,6 +47,7 @@ class Tracker:
 
     Attributes set to ``None`` are treated as unused and are restricted.
     """
+
     ready: Optional[int] = 0
     started: Optional[int] = 0
     processed: Optional[int] = 0
@@ -55,7 +75,7 @@ def __repr__(self):
 
 
 @dataclass
-class Progress:
+class Progress(_DataclassStateDictMixin):
     """
     Track aggregated and current progress.
 
@@ -63,6 +83,7 @@ class Progress:
         total: Intended to track the total progress of an event
         current: Intended to track the current progress of an event
     """
+
     total: Tracker = field(default_factory=Tracker)
     current: Tracker = field(default_factory=Tracker)
 
@@ -91,35 +112,70 @@ def increment_completed(self) -> None:
         self.current.completed += 1
 
     @classmethod
-    def from_defaults(cls, **kwargs: Optional[int]) -> 'Progress':
+    def from_defaults(cls, **kwargs: Optional[int]) -> "Progress":
         return cls(total=Tracker(**kwargs), current=Tracker(**kwargs))
 
+    def __setstate__(self, state: dict) -> None:
+        self.total.__setstate__(state["total"])
+        self.current.__setstate__(state["current"])
 
-@dataclass
-class LoopProgress:
+
+class BatchProgress(Progress):
+    """
+    Tracks the batch progress
+
+    Args:
+        total: Tracks the total epoch progress
+        current: Tracks the current epoch progress
     """
-    Track loop progress during execution.
 
+
+@dataclass
+class EpochProgress(Progress):
+    """
+    Tracks the epoch progress
     These counters are local to a trainer rank. By default, they are not globally synced across all ranks.
 
     Args:
-        epoch: Tracks epochs progress.
+        total: Tracks the total epoch progress
+        current: Tracks the current epoch progress
         batch: Tracks batch progress.
     """
-    epoch: Progress = field(default_factory=Progress)
-    batch: Progress = field(default_factory=Progress)
 
-    def increment_epoch_completed(self) -> None:
-        self.epoch.increment_completed()
-        self.reset_on_epoch()
+    batch: BatchProgress = field(default_factory=BatchProgress)
 
     def reset_on_epoch(self) -> None:
         self.batch.current.reset()
-        self.epoch.current.reset()
+
+    def __setstate__(self, state: dict) -> None:
+        super().__setstate__(state)
+        self.batch.__setstate__(state["batch"])
 
 
 @dataclass
-class OptimizationProgress:
+class OptimizerProgress(_DataclassStateDictMixin):
+    """
+    Track optimizer progress.
+
+    Args:
+        step: Tracks ``optimizer.step`` calls.
+        zero_grad: Tracks ``optimizer.zero_grad`` calls.
+    """
+
+    step: Progress = field(default_factory=lambda: Progress.from_defaults(processed=None))
+    zero_grad: Progress = field(default_factory=lambda: Progress.from_defaults(processed=None))
+
+    def reset_on_epoch(self) -> None:
+        self.step.current.reset()
+        self.zero_grad.current.reset()
+
+    def __setstate__(self, state: dict) -> None:
+        self.step.__setstate__(state["step"])
+        self.zero_grad.__setstate__(state["zero_grad"])
+
+
+@dataclass
+class OptimizationProgress(_DataclassStateDictMixin):
     """
     Track optimization progress.
 
@@ -127,54 +183,86 @@ class OptimizationProgress:
         optimizer: Tracks optimizer progress.
         scheduler: Tracks scheduler progress.
     """
-    optimizer: Progress = Progress.from_defaults(processed=None)
-    scheduler: Progress = Progress.from_defaults(started=None, processed=None)
-    zero_grad: Progress = Progress.from_defaults(processed=None)
+
+    # TODO: support for multiple optimizers
+    optimizer: OptimizerProgress = field(default_factory=OptimizerProgress)
+    scheduler: Progress = field(default_factory=lambda: Progress.from_defaults(started=None, processed=None))
 
     @property
     def optimizer_steps(self) -> int:
-        return self.optimizer.total.completed
+        return self.optimizer.step.total.completed
 
     @property
     def scheduler_steps(self) -> int:
         return self.scheduler.total.completed
 
+    def reset_on_epoch(self) -> None:
+        self.optimizer.reset_on_epoch()
+        self.scheduler.current.reset()
+
+    def __setstate__(self, state: dict) -> None:
+        self.optimizer.__setstate__(state["optimizer"])
+        self.scheduler.__setstate__(state["scheduler"])
+
 
 @dataclass
-class TrainingProgress(Progress):
+class EpochLoopProgress(_DataclassStateDictMixin):
     """
-    Extends ``Progress`` with training specific attributes
+    Tracks epoch loop progress.
+    These counters are local to a trainer rank. By default, they are not globally synced across all ranks.
 
     Args:
-        optimization: Tracks optimization progress
+        epoch: Tracks epochs progress.
     """
-    optimization: OptimizationProgress = field(default_factory=OptimizationProgress)
 
+    epoch: EpochProgress = field(default_factory=EpochProgress)
 
-@dataclass
-class TrainingLoopProgress(LoopProgress):
-    epoch: TrainingProgress = field(default_factory=TrainingProgress)
+    def increment_epoch_completed(self) -> None:
+        self.epoch.increment_completed()
+        self.reset_on_epoch()
 
     def reset_on_epoch(self) -> None:
-        # override to avoid resetting `epoch.current`
-        self.batch.current.reset()
+        self.epoch.reset_on_epoch()
+        self.epoch.current.reset()
+
+    def __setstate__(self, state: dict) -> None:
+        self.epoch.__setstate__(state["epoch"])
 
 
 @dataclass
-class FitLoopProgress:
-    train: TrainingLoopProgress = field(default_factory=TrainingLoopProgress)
-    val: LoopProgress = field(default_factory=LoopProgress)
+class TrainingEpochProgress(EpochProgress):
+    """
+    Extends ``EpochProgress`` with training specific attributes
+
+    Args:
+        total: Tracks the total epoch progress.
+        current: Tracks the current epoch progress.
+        batch: Tracks batch progress.
+        optim: Tracks optimization progress.
+        val: Tracks val_loop progress.
+    """
+
+    optim: OptimizationProgress = field(default_factory=OptimizationProgress)
+    val: EpochLoopProgress = field(default_factory=EpochLoopProgress)
+
+    def __setstate__(self, state: dict) -> None:
+        super().__setstate__(state)
+        self.optim.__setstate__(state["optim"])
+        self.val.__setstate__(state["val"])
 
 
 @dataclass
-class LoopState:
+class FitLoopProgress(EpochLoopProgress):
     """
-    Basic dataclass to track loop progress across trainer functions during trainer execution.
+    Extends ``EpochLoopProgress`` with fit specific attributes
 
-    This class will be removed and these attributes will live in each loop.
+    Args:
+        epoch: Tracks epochs progress.
     """
 
-    fit: FitLoopProgress = field(default_factory=FitLoopProgress)
-    val: LoopProgress = field(default_factory=LoopProgress)
-    test: LoopProgress = field(default_factory=LoopProgress)
-    predict: LoopProgress = field(default_factory=LoopProgress)
+    epoch: TrainingEpochProgress = field(default_factory=TrainingEpochProgress)
+
+    def reset_on_epoch(self) -> None:
+        # do not reset `epoch.current` as it should track the number of epochs this `fit` call
+        self.epoch.reset_on_epoch()
+        self.epoch.optim.reset_on_epoch()
diff --git a/pytorch_lightning/trainer/properties.py b/pytorch_lightning/trainer/properties.py
index 440a6693aba43..edf30d7f3f79f 100644
--- a/pytorch_lightning/trainer/properties.py
+++ b/pytorch_lightning/trainer/properties.py
@@ -15,25 +15,29 @@
 import os
 from abc import ABC
 from argparse import ArgumentParser, Namespace
+from pathlib import Path
 from typing import cast, List, Optional, Type, TypeVar, Union
 
 import torch
 from torch.optim import Optimizer
 
+import pytorch_lightning as pl
 from pytorch_lightning.accelerators import Accelerator
 from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, ProgressBarBase
 from pytorch_lightning.callbacks.base import Callback
 from pytorch_lightning.callbacks.prediction_writer import BasePredictionWriter
-from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.core.optimizer import LightningOptimizer
 from pytorch_lightning.loggers import LightningLoggerBase
 from pytorch_lightning.loggers.tensorboard import TensorBoardLogger
+from pytorch_lightning.loops import PredictionLoop
+from pytorch_lightning.loops.dataloader.evaluation_loop import EvaluationLoop
+from pytorch_lightning.loops.fit_loop import FitLoop
 from pytorch_lightning.plugins import ParallelPlugin, PrecisionPlugin, TrainingTypePlugin
 from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector
 from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
 from pytorch_lightning.trainer.connectors.logger_connector import LoggerConnector
-from pytorch_lightning.trainer.states import RunningStage, TrainerState, TrainerStatus
-from pytorch_lightning.trainer.training_loop import TrainLoop
+from pytorch_lightning.trainer.connectors.logger_connector.result import ResultCollection
+from pytorch_lightning.trainer.states import RunningStage, TrainerFn, TrainerState, TrainerStatus
 from pytorch_lightning.utilities import DeviceType, DistributedType, rank_zero_warn
 from pytorch_lightning.utilities.argparse import (
     add_argparse_args,
@@ -59,7 +63,13 @@ class TrainerProperties(ABC):
     logger: LightningLoggerBase
     logger_connector: LoggerConnector
     state: TrainerState
-    train_loop: TrainLoop
+    fit_loop: FitLoop
+    validate_loop: EvaluationLoop
+    test_loop: EvaluationLoop
+    predict_loop: PredictionLoop
+    """
+    Accelerator properties
+    """
 
     @property
     def accelerator(self) -> Accelerator:
@@ -125,6 +135,10 @@ def root_gpu(self) -> Optional[int]:
     def tpu_cores(self) -> int:
         return self.accelerator_connector.tpu_cores
 
+    @property
+    def ipus(self) -> int:
+        return self.accelerator_connector.ipus
+
     @property
     def num_gpus(self) -> int:
         return self.accelerator_connector.num_gpus
@@ -134,46 +148,92 @@ def data_parallel_device_ids(self) -> Optional[List[int]]:
         return self.accelerator_connector.parallel_device_ids
 
     @property
-    def log_dir(self) -> Optional[str]:
-        if self.logger is None:
-            dirpath = self.default_root_dir
-        else:
-            dirpath = getattr(self.logger, 'log_dir' if isinstance(self.logger, TensorBoardLogger) else 'save_dir')
+    def lightning_module(self) -> 'pl.LightningModule':
+        return self.accelerator.lightning_module
 
-        dirpath = self.accelerator.broadcast(dirpath)
-        return dirpath
+    @property
+    def optimizers(self) -> Optional[List[Optimizer]]:
+        return self.accelerator.optimizers
+
+    @optimizers.setter
+    def optimizers(self, new_optims: Optional[List[Optimizer]]) -> None:
+        # Necessary to rewrap optimizers to lightning
+        # They will be re-created when accessing
+        # the `lightning_optimizers` trainer property
+        self._lightning_optimizers = None
+
+        self.accelerator.optimizers = new_optims
 
     @property
-    def use_amp(self) -> bool:
-        return self.precision == 16
+    def lr_schedulers(self) -> Optional[list]:
+        return self.accelerator.lr_schedulers
+
+    @lr_schedulers.setter
+    def lr_schedulers(self, new_schedulers: Optional[list]) -> None:
+        self.accelerator.lr_schedulers = new_schedulers
 
     @property
-    def callback_metrics(self) -> dict:
-        return self.logger_connector.callback_metrics
+    def optimizer_frequencies(self) -> list:
+        return self.accelerator.optimizer_frequencies
 
-    @callback_metrics.setter
-    def callback_metrics(self, x: dict) -> None:
-        self.logger_connector.callback_metrics = x
+    @optimizer_frequencies.setter
+    def optimizer_frequencies(self, new_freqs: list) -> None:
+        self.accelerator.optimizer_frequencies = new_freqs
 
     @property
-    def logged_metrics(self) -> dict:
-        return self.logger_connector.logged_metrics
+    def amp_backend(self) -> Optional[str]:
+        return self.accelerator.amp_backend
 
-    @logged_metrics.setter
-    def logged_metrics(self, x: dict) -> None:
-        self.logger_connector.logged_metrics = x
+    @property
+    def precision(self) -> Union[str, int]:
+        return self.accelerator.precision
 
     @property
-    def progress_bar_metrics(self) -> dict:
-        return self.logger_connector.progress_bar_metrics
+    def scaler(self):
+        return self.accelerator.scaler
 
-    @progress_bar_metrics.setter
-    def progress_bar_metrics(self, x: dict) -> None:
-        self.logger_connector.progress_bar_metrics = x
+    @property
+    def gpus(self) -> Optional[Union[List[int], str, int]]:
+        return self.accelerator_connector.gpus
 
     @property
-    def interrupted(self) -> bool:
-        return self.state.status == TrainerStatus.INTERRUPTED
+    def model(self) -> torch.nn.Module:
+        """
+        The LightningModule, but possibly wrapped into DataParallel or DistributedDataParallel.
+        To access the pure LightningModule, use
+        :meth:`~pytorch_lightning.trainer.trainer.Trainer.lightning_module` instead.
+        """
+        return self.accelerator.model
+
+    @model.setter
+    def model(self, model: torch.nn.Module) -> None:
+        """
+        Setter for the model, pass-through to accelerator and plugin where the model reference is stored.
+        Used by the Tuner to reset the state of Trainer and Accelerator.
+
+        Args:
+            model: The LightningModule, possibly wrapped into DataParallel or DistributedDataParallel, depending
+                on the backend.
+        """
+        self.accelerator.model = model
+
+    """
+    General properties
+    """
+
+    @property
+    def log_dir(self) -> Optional[str]:
+        if self.logger is None:
+            dirpath = self.default_root_dir
+        else:
+            dirpath = getattr(self.logger, 'log_dir' if isinstance(self.logger, TensorBoardLogger) else 'save_dir')
+
+        dirpath = self.accelerator.broadcast(dirpath)
+        return dirpath
+
+    @property
+    def use_amp(self) -> bool:
+        return self.precision == 16
 
     @property
     def is_global_zero(self) -> bool:
@@ -194,39 +254,16 @@ def slurm_job_id(self) -> Optional[int]:
             job_id = None
         return job_id
 
-    @classmethod
-    def default_attributes(cls) -> dict:
-        init_signature = inspect.signature(cls)
-        return {k: v.default for k, v in init_signature.parameters.items()}
-
-    @classmethod
-    def get_deprecated_arg_names(cls) -> List:
-        """Returns a list with deprecated Trainer arguments."""
-        depr_arg_names = []
-        for name, val in cls.__dict__.items():
-            if name.startswith('DEPRECATED') and isinstance(val, (tuple, list)):
-                depr_arg_names.extend(val)
-        return depr_arg_names
-
-    @classmethod
-    def from_argparse_args(cls: Type['_T'], args: Union[Namespace, ArgumentParser], **kwargs) -> '_T':
-        return from_argparse_args(cls, args, **kwargs)
-
-    @classmethod
-    def parse_argparser(cls, arg_parser: Union[ArgumentParser, Namespace]) -> Namespace:
-        return parse_argparser(cls, arg_parser)
-
-    @classmethod
-    def match_env_arguments(cls) -> Namespace:
-        return parse_env_variables(cls)
-
-    @classmethod
-    def add_argparse_args(cls, parent_parser: ArgumentParser, **kwargs) -> ArgumentParser:
-        return add_argparse_args(cls, parent_parser, **kwargs)
+    @property
+    def lightning_optimizers(self) -> List[LightningOptimizer]:
+        if self._lightning_optimizers is None:
+            self.convert_to_lightning_optimizers()
+        return self._lightning_optimizers
 
     @property
-    def gpus(self) -> Optional[Union[List[int], str, int]]:
-        return self.accelerator_connector.gpus
+    def distributed_sampler_kwargs(self) -> Optional[dict]:
+        if isinstance(self.training_type_plugin, ParallelPlugin):
+            return self.training_type_plugin.distributed_sampler_kwargs
 
     @property
     def data_parallel(self) -> bool:
@@ -242,11 +279,11 @@ def progress_bar_callback(self) -> Optional[ProgressBarBase]:
     def progress_bar_dict(self) -> dict:
         """ Read-only for progress bar metrics. """
         ref_model = self.lightning_module
-        ref_model = cast(LightningModule, ref_model)
+        ref_model = cast(pl.LightningModule, ref_model)
 
         standard_metrics = ref_model.get_progress_bar_dict()
-        logged_metrics = self.progress_bar_metrics
-        duplicates = list(standard_metrics.keys() & logged_metrics.keys())
+        pbar_metrics = self.progress_bar_metrics
+        duplicates = list(standard_metrics.keys() & pbar_metrics.keys())
         if duplicates:
             rank_zero_warn(
                 f"The progress bar already tracks a metric with the name(s) '{', '.join(duplicates)}' and"
@@ -254,9 +291,7 @@ def progress_bar_dict(self) -> dict:
                 f" If this is undesired, change the name or override `get_progress_bar_dict()`"
                 f" in `LightingModule`.", UserWarning
             )
-        all_metrics = dict(**standard_metrics)
-        all_metrics.update(**logged_metrics)
-        return all_metrics
+        return {**standard_metrics, **pbar_metrics}
 
     @property
     def enable_validation(self) -> bool:
@@ -327,94 +362,54 @@ def checkpoint_callbacks(self) -> List[ModelCheckpoint]:
         """
         return [c for c in self.callbacks if isinstance(c, ModelCheckpoint)]
 
-    def save_checkpoint(self, filepath, weights_only: bool = False) -> None:
-        self.checkpoint_connector.save_checkpoint(filepath, weights_only)
-
     @property
-    def model(self) -> torch.nn.Module:
-        """
-        The LightningModule, but possibly wrapped into DataParallel or DistributedDataParallel.
-        To access the pure LightningModule, use
-        :meth:`~pytorch_lightning.trainer.trainer.Trainer.lightning_module` instead.
-        """
-        return self.accelerator.model
-
-    @model.setter
-    def model(self, model: torch.nn.Module) -> None:
-        """
-        Setter for the model, pass-through to accelerator and plugin where the model reference is stored.
-        Used by the Tuner to reset the state of Trainer and Accelerator.
-
-        Args:
-            model: The LightningModule, possibly wrapped into DataParallel or DistributedDataParallel, depending
-                on the backend.
-        """
-        self.accelerator.model = model
+    def resume_from_checkpoint(self) -> Optional[Union[str, Path]]:
+        return self.checkpoint_connector.resume_checkpoint_path
 
-    @property
-    def lightning_optimizers(self) -> List[LightningOptimizer]:
-        if self._lightning_optimizers is None:
-            self.convert_to_lightning_optimizers()
-        return self._lightning_optimizers
-
-    @property
-    def lightning_module(self) -> LightningModule:
-        return self.accelerator.lightning_module
-
-    @property
-    def optimizers(self) -> Optional[List[Optimizer]]:
-        return self.accelerator.optimizers
-
-    @optimizers.setter
-    def optimizers(self, new_optims: Optional[List[Optimizer]]) -> None:
-        # Necessary to rewrap optimizers to lightning
-        # They will be re-created when accessing
-        # the `lightning_optimizers` trainer property
-        self._lightning_optimizers = None
-
-        self.accelerator.optimizers = new_optims
-
-    @property
-    def lr_schedulers(self) -> Optional[list]:
-        return self.accelerator.lr_schedulers
+    def save_checkpoint(self, filepath, weights_only: bool = False) -> None:
+        self.checkpoint_connector.save_checkpoint(filepath, weights_only)
 
-    @lr_schedulers.setter
-    def lr_schedulers(self, new_schedulers: Optional[list]) -> None:
-        self.accelerator.lr_schedulers = new_schedulers
+    """
+    Parsing properties
+    """
 
-    @property
-    def optimizer_frequencies(self) -> list:
-        return self.accelerator.optimizer_frequencies
+    @classmethod
+    def default_attributes(cls) -> dict:
+        init_signature = inspect.signature(cls)
+        return {k: v.default for k, v in init_signature.parameters.items()}
 
-    @optimizer_frequencies.setter
-    def optimizer_frequencies(self, new_freqs: list) -> None:
-        self.accelerator.optimizer_frequencies = new_freqs
+    @classmethod
+    def get_deprecated_arg_names(cls) -> List:
+        """Returns a list with deprecated Trainer arguments."""
+        depr_arg_names = []
+        for name, val in cls.__dict__.items():
+            if name.startswith('DEPRECATED') and isinstance(val, (tuple, list)):
+                depr_arg_names.extend(val)
+        return depr_arg_names
 
-    @property
-    def amp_backend(self) -> Optional[str]:
-        return self.accelerator.amp_backend
+    @classmethod
+    def from_argparse_args(cls: Type['_T'], args: Union[Namespace, ArgumentParser], **kwargs) -> '_T':
+        return from_argparse_args(cls, args, **kwargs)
 
-    @property
-    def precision(self) -> Union[str, int]:
-        return self.accelerator.precision
+    @classmethod
+    def parse_argparser(cls, arg_parser: Union[ArgumentParser, Namespace]) -> Namespace:
+        return parse_argparser(cls, arg_parser)
 
-    @property
-    def scaler(self):
-        return self.accelerator.scaler
+    @classmethod
+    def match_env_arguments(cls) -> Namespace:
+        return parse_env_variables(cls)
 
-    # TODO: refactor this so that it can be done in LightningOptimizer
-    def __getstate__(self):
-        # remove lightning_optimizers
-        self._lightning_optimizers = None
-        return self.__dict__
+    @classmethod
+    def add_argparse_args(cls, parent_parser: ArgumentParser, **kwargs) -> ArgumentParser:
+        return add_argparse_args(cls, parent_parser, **kwargs)
 
-    def __setstate__(self, state):
-        self.__dict__ = state
+    """
+    State properties
+    """
 
     @property
-    def distributed_sampler_kwargs(self) -> Optional[dict]:
-        if isinstance(self.training_type_plugin, ParallelPlugin):
-            return self.training_type_plugin.distributed_sampler_kwargs
+    def interrupted(self) -> bool:
+        return self.state.status == TrainerStatus.INTERRUPTED
 
     @property
     def training(self) -> bool:
@@ -486,29 +481,91 @@ def sanity_checking(self, val: bool) -> None:
         elif self.sanity_checking:
             self.state.stage = None
 
+    """
+    Loop properties
+    """
+
     @property
     def global_step(self) -> int:
-        return self.train_loop.global_step
+        return self.fit_loop.global_step
 
     @property
     def current_epoch(self) -> int:
-        return self.train_loop.current_epoch
+        return self.fit_loop.current_epoch
 
     @property
     def max_epochs(self) -> Optional[int]:
-        return self.train_loop.max_epochs
+        return self.fit_loop.max_epochs
 
     @property
     def min_epochs(self) -> Optional[int]:
-        return self.train_loop.min_epochs
+        return self.fit_loop.min_epochs
 
     @property
     def max_steps(self) -> Optional[int]:
-        return self.train_loop.max_steps
+        return self.fit_loop.max_steps
 
     @property
     def min_steps(self) -> Optional[int]:
-        return self.train_loop.min_steps
+        return self.fit_loop.min_steps
+
+    @property
+    def is_last_batch(self) -> bool:
+        return self.fit_loop.epoch_loop.is_last_batch
+
+    @property
+    def _evaluation_loop(self) -> EvaluationLoop:
+        if self.state.fn in (TrainerFn.FITTING, TrainerFn.TUNING):
+            return self.fit_loop.epoch_loop.val_loop
+        if self.state.fn == TrainerFn.VALIDATING:
+            return self.validate_loop
+        if self.state.fn == TrainerFn.TESTING:
+            return self.test_loop
+        raise RuntimeError("The `Trainer._evaluation_loop` property isn't defined. Accessed outside of scope")
+
+    @property
+    def _active_loop(self) -> Optional[Union[FitLoop, EvaluationLoop, PredictionLoop]]:
+        if self.training:
+            return self.fit_loop
+        if self.sanity_checking or self.evaluating:
+            return self._evaluation_loop
+        if self.predicting:
+            return self.predict_loop
+
+    """
+    Logging properties
+    """
+
+    @property
+    def callback_metrics(self) -> dict:
+        return self.logger_connector.callback_metrics
+
+    @property
+    def logged_metrics(self) -> dict:
+        return self.logger_connector.logged_metrics
+
+    @property
+    def progress_bar_metrics(self) -> dict:
+        return self.logger_connector.progress_bar_metrics
+
+    @property
+    def _results(self) -> Optional[ResultCollection]:
+        active_loop = self._active_loop
+        if active_loop is not None:
+            return active_loop._results
+
+    """
+    Other
+    """
+
+    # TODO: refactor this so that it can be done in LightningOptimizer
+    def __getstate__(self):
+        # remove lightning_optimizers
+        self._lightning_optimizers = None
+        return self.__dict__
+
+    def __setstate__(self, state):
+        self.__dict__ = state
 
 
 # Used to represent the concrete type TrainerProperties class methods are called on.
diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py
index df6db1e180c24..e93d87291193d 100644
--- a/pytorch_lightning/trainer/supporters.py
+++ b/pytorch_lightning/trainer/supporters.py
@@ -102,8 +102,7 @@ def _agg_memory(self, how: str):
         if self.last_idx is not None:
             if self.rotated:
                 return getattr(self.memory, how)()
-            else:
-                return getattr(self.memory[:self.current_idx], how)()
+            return getattr(self.memory[:self.current_idx], how)()
 
 
 class PredictionCollection(object):
@@ -158,7 +157,7 @@ def to_disk(self) -> None:
             # Switch predictions so each entry has its own dict
             outputs = []
             for values in zip(*predictions.values()):
-                output_element = {k: v for k, v in zip(predictions.keys(), values)}
+                output_element = dict(zip(predictions.keys(), values))
                 outputs.append(output_element)
 
             # Write predictions for current file to disk
@@ -295,10 +294,10 @@ def _get_len_recursive(self, data) -> int:
         if isinstance(data, Dataset):
             return len(data)
 
-        elif isinstance(data, (float, int)):
+        if isinstance(data, (float, int)):
             return data
 
-        elif isinstance(data, Mapping):
+        if isinstance(data, Mapping):
             if any(isinstance(v, (Mapping, Sequence, Dataset, Iterable)) for v in data.values()):
                 return {k: self._get_len_recursive(v) for k, v in data.items()}
         elif isinstance(data, Sequence):
@@ -417,9 +416,7 @@ def _calc_num_batches(loaders: Any) -> Union[int, float]:
 
         if isinstance(all_lengths, (int, float)):
             return all_lengths
-
-        else:
-            return _nested_calc_num_data(all_lengths, min)
+        return _nested_calc_num_data(all_lengths, min)
 
     def __len__(self) -> int:
         return self._calc_num_batches(self.loaders)
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index b24d6d7b2da48..78aa5b2b73d7f 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -13,23 +13,22 @@
 # limitations under the License.
 """Trainer to automate the training."""
 import logging
+import traceback
 import warnings
 from datetime import timedelta
-from itertools import count
 from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Union
 from weakref import proxy
 
 import torch
-from torch.utils.data import DataLoader
 
+import pytorch_lightning as pl
 from pytorch_lightning.accelerators import Accelerator
 from pytorch_lightning.callbacks import Callback
 from pytorch_lightning.core.datamodule import LightningDataModule
-from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.core.memory import ModelSummary
-from pytorch_lightning.core.step_result import Result
 from pytorch_lightning.loggers import LightningLoggerBase
+from pytorch_lightning.loops import EvaluationLoop, FitLoop, PredictionLoop
 from pytorch_lightning.plugins import Plugin
 from pytorch_lightning.plugins.environments import ClusterEnvironment
 from pytorch_lightning.profiler import (
@@ -38,6 +37,7 @@
     PassThroughProfiler,
     PyTorchProfiler,
     SimpleProfiler,
+    XLAProfiler,
 )
 from pytorch_lightning.trainer.callback_hook import TrainerCallbackHookMixin
 from pytorch_lightning.trainer.configuration_validator import ConfigValidator
@@ -54,24 +54,29 @@
 from pytorch_lightning.trainer.connectors.training_trick_connector import TrainingTricksConnector
 from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin
 from pytorch_lightning.trainer.deprecated_api import DeprecatedTrainerAttributes
-from pytorch_lightning.trainer.evaluation_loop import EvaluationLoop
 from pytorch_lightning.trainer.logging import TrainerLoggingMixin
 from pytorch_lightning.trainer.model_hooks import TrainerModelHooksMixin
 from pytorch_lightning.trainer.optimizers import TrainerOptimizersMixin
-from pytorch_lightning.trainer.predict_loop import PredictLoop
 from pytorch_lightning.trainer.properties import TrainerProperties
 from pytorch_lightning.trainer.states import TrainerFn, TrainerState, TrainerStatus
-from pytorch_lightning.trainer.training_loop import TrainLoop
 from pytorch_lightning.trainer.training_tricks import TrainerTrainingTricksMixin
 from pytorch_lightning.tuner.lr_finder import _LRFinder
 from pytorch_lightning.tuner.tuning import Tuner
-from pytorch_lightning.utilities import DeviceType, parsing, rank_zero_warn
+from pytorch_lightning.utilities import (
+    _IPU_AVAILABLE,
+    _TPU_AVAILABLE,
+    DeviceType,
+    parsing,
+    rank_zero_deprecation,
+    rank_zero_info,
+    rank_zero_warn,
+)
 from pytorch_lightning.utilities.debugging import InternalDebugger
+from pytorch_lightning.utilities.distributed import distributed_available
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.memory import recursive_detach
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.seed import reset_seed
-from pytorch_lightning.utilities.types import _EVALUATE_OUTPUT, _PREDICT_OUTPUT
+from pytorch_lightning.utilities.types import _EVALUATE_OUTPUT, _PREDICT_OUTPUT, EVAL_DATALOADERS, TRAIN_DATALOADERS
 
 log = logging.getLogger(__name__)
 # warnings to ignore in trainer
@@ -107,6 +112,7 @@ def __init__(
         gpus: Optional[Union[List[int], str, int]] = None,
         auto_select_gpus: bool = False,
         tpu_cores: Optional[Union[List[int], str, int]] = None,
+        ipus: Optional[int] = None,
         log_gpu_memory: Optional[str] = None,
         progress_bar_refresh_rate: Optional[int] = None,
         overfit_batches: Union[int, float] = 0.0,
@@ -283,6 +289,8 @@ def __init__(
 
             tpu_cores: How many TPU cores to train on (1 or 8) / Single TPU to train on [1]
 
+            ipus: How many IPUs to train on.
+
             track_grad_norm: -1 no tracking. Otherwise tracks that p-norm. May be set to 'inf' infinity-norm.
 
             truncated_bptt_steps: Deprecated in v1.3 to be removed in 1.5.
@@ -323,20 +331,26 @@ def __init__(
         self.optimizer_connector = OptimizerConnector(self)
 
         self.accelerator_connector = AcceleratorConnector(
-            num_processes, tpu_cores, distributed_backend, auto_select_gpus, gpus, num_nodes, sync_batchnorm, benchmark,
-            replace_sampler_ddp, deterministic, precision, amp_backend, amp_level, plugins
+            num_processes, tpu_cores, ipus, distributed_backend, auto_select_gpus, gpus, num_nodes, sync_batchnorm,
+            benchmark, replace_sampler_ddp, deterministic, precision, amp_backend, amp_level, plugins
         )
         self.logger_connector = LoggerConnector(self, log_gpu_memory)
         self.model_connector = ModelConnector(self)
         self.callback_connector = CallbackConnector(self)
         self.debugging_connector = DebuggingConnector(self)
         self.training_tricks_connector = TrainingTricksConnector(self)
-        self.checkpoint_connector = CheckpointConnector(self)
+        self.checkpoint_connector = CheckpointConnector(self, resume_from_checkpoint)
         self.slurm_connector = SLURMConnector(self)
         self.tuner = Tuner(self)
-        self.train_loop = TrainLoop(self, max_epochs, min_epochs, max_steps, min_steps, num_sanity_val_steps)
-        self.evaluation_loop = EvaluationLoop(self)
-        self.predict_loop = PredictLoop(self)
+
+        self.fit_loop = FitLoop(min_epochs, max_epochs, min_steps, max_steps)
+        self.validate_loop = EvaluationLoop()
+        self.test_loop = EvaluationLoop()
+        self.predict_loop = PredictionLoop()
+        self.fit_loop.connect(self)
+        self.validate_loop.connect(self)
+        self.test_loop.connect(self)
+        self.predict_loop.connect(self)
 
         # training state
         if weights_summary is not None and weights_summary not in ModelSummary.MODES:
@@ -355,7 +369,6 @@ def __init__(
             process_position,
             default_root_dir,
             weights_save_path,
-            resume_from_checkpoint,
             stochastic_weight_avg,
             max_time,
         )
@@ -380,8 +393,7 @@ def __init__(
             truncated_bptt_steps,
             terminate_on_nan,
         )
-        self.evaluation_loop.on_trainer_init()
-        self.predict_loop.on_trainer_init()
+        self._setup_on_init(num_sanity_val_steps)
 
         # configure tuner
         self.tuner.on_trainer_init(auto_lr_find, auto_scale_batch_size)
@@ -411,12 +423,45 @@ def __init__(
         # Callback system
         self.on_init_end()
 
+    def _setup_on_init(
+        self,
+        num_sanity_val_steps: int,
+    ) -> None:
+        self._log_device_info()
+
+        self.should_stop = False
+        self.state = TrainerState()
+        self.num_training_batches = 0
+        self.train_dataloader = None
+
+        if num_sanity_val_steps == -1:
+            self.num_sanity_val_steps = float("inf")
+        else:
+            self.num_sanity_val_steps = num_sanity_val_steps
+
+        self.num_sanity_val_batches = []
+        self.num_test_batches = []
+        self.num_val_batches = []
+        self.test_dataloaders = None
+        self.val_dataloaders = None
+
+        # .validate() and .test() set this when they load a checkpoint
+        self.validated_ckpt_path = None
+        self.tested_ckpt_path = None
+
+        # when true, print evaluation results in .validate() and .test()
+        self.verbose_evaluate = True
+
+        self.num_predict_batches = []
+        self.predicted_ckpt_path = None
+
     def fit(
         self,
-        model: LightningModule,
-        train_dataloader: Any = None,
-        val_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
+        model: 'pl.LightningModule',
+        train_dataloaders: Optional[Union[TRAIN_DATALOADERS, LightningDataModule]] = None,
+        val_dataloaders: Optional[EVAL_DATALOADERS] = None,
         datamodule: Optional[LightningDataModule] = None,
+        train_dataloader=None,  # noqa TODO: remove with 1.6
     ) -> None:
         r"""
         Runs the full optimization routine.
@@ -424,12 +469,11 @@ def fit(
         Args:
             model: Model to fit.
 
-            train_dataloader: Either a single PyTorch DataLoader or a collection of these
-                (list, dict, nested lists and dicts). In the case of multiple dataloaders, please
-                see this :ref:`page <multiple-training-dataloaders>`
+            train_dataloaders: A collection of :class:`torch.utils.data.DataLoader` or a
+                :class:`~pytorch_lightning.core.datamodule.LightningDataModule` specifying training samples.
+                In the case of multiple dataloaders, please see this :ref:`page <multiple-training-dataloaders>`.
 
-            val_dataloaders: Either a single Pytorch Dataloader or a list of them, specifying validation samples.
-                If the model has a predefined val_dataloaders method this will be skipped
+            val_dataloaders: A :class:`torch.utils.data.DataLoader` or a sequence of them specifying validation samples.
 
             datamodule: An instance of :class:`~pytorch_lightning.core.datamodule.LightningDataModule`.
         """
@@ -439,21 +483,29 @@ def fit(
         self.state.status = TrainerStatus.RUNNING
         self.training = True
 
+        if train_dataloader is not None:
+            rank_zero_deprecation(
+                "`trainer.fit(train_dataloader)` is deprecated in v1.4 and will be removed in v1.6."
+                " Use `trainer.fit(train_dataloaders)` instead. HINT: added 's'"
+            )
+            train_dataloaders = train_dataloader
         # if a datamodule comes in as the second arg, then fix it for the user
-        if isinstance(train_dataloader, LightningDataModule):
-            datamodule = train_dataloader
-            train_dataloader = None
+        if isinstance(train_dataloaders, LightningDataModule):
+            datamodule = train_dataloaders
+            train_dataloaders = None
         # If you supply a datamodule you can't supply train_dataloader or val_dataloaders
-        if (train_dataloader is not None or val_dataloaders is not None) and datamodule is not None:
+        if (train_dataloaders is not None or val_dataloaders is not None) and datamodule is not None:
             raise MisconfigurationException(
                 'You cannot pass `train_dataloader` or `val_dataloaders` to `trainer.fit(datamodule=...)`'
             )
 
         # links data to the trainer
         self.data_connector.attach_data(
-            model, train_dataloader=train_dataloader, val_dataloaders=val_dataloaders, datamodule=datamodule
+            model, train_dataloaders=train_dataloaders, val_dataloaders=val_dataloaders, datamodule=datamodule
         )
 
+        self.checkpoint_connector.resume_start()
+
         self._run(model)
 
         assert self.state.stopped
@@ -461,11 +513,12 @@ def fit(
 
     def validate(
         self,
-        model: Optional[LightningModule] = None,
-        val_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
+        model: Optional['pl.LightningModule'] = None,
+        dataloaders: Optional[Union[EVAL_DATALOADERS, LightningDataModule]] = None,
         ckpt_path: Optional[str] = 'best',
         verbose: bool = True,
         datamodule: Optional[LightningDataModule] = None,
+        val_dataloaders=None,  # noqa TODO: remove with 1.6
     ) -> _EVALUATE_OUTPUT:
         r"""
         Perform one evaluation epoch over the validation set.
@@ -473,8 +526,8 @@ def validate(
         Args:
             model: The model to validate.
 
-            val_dataloaders: Either a single PyTorch DataLoader or a list of them,
-                specifying validation samples.
+            dataloaders: A :class:`torch.utils.data.DataLoader` or a sequence of them,
+                or a :class:`~pytorch_lightning.core.datamodule.LightningDataModule` specifying validation samples.
 
             ckpt_path: Either ``best`` or path to the checkpoint you wish to validate.
                 If ``None``, use the current weights of the model.
@@ -499,11 +552,19 @@ def validate(
         self.state.status = TrainerStatus.RUNNING
         self.validating = True
 
-        # If you supply a datamodule you can't supply val_dataloaders
-        if val_dataloaders is not None and datamodule:
-            raise MisconfigurationException(
-                'You cannot pass both `trainer.validate(val_dataloaders=..., datamodule=...)`'
+        if val_dataloaders is not None:
+            rank_zero_deprecation(
+                "`trainer.validate(val_dataloaders)` is deprecated in v1.4 and will be removed in v1.6."
+                " Use `trainer.validate(dataloaders)` instead."
             )
+            dataloaders = val_dataloaders
+        # if a datamodule comes in as the second arg, then fix it for the user
+        if isinstance(dataloaders, LightningDataModule):
+            datamodule = dataloaders
+            dataloaders = None
+        # If you supply a datamodule you can't supply val_dataloaders
+        if dataloaders is not None and datamodule:
+            raise MisconfigurationException('You cannot pass both `trainer.validate(dataloaders=..., datamodule=...)`')
 
         model_provided = model is not None
         model = model or self.lightning_module
@@ -513,7 +574,7 @@ def validate(
             )
 
         # links data to the trainer
-        self.data_connector.attach_data(model, val_dataloaders=val_dataloaders, datamodule=datamodule)
+        self.data_connector.attach_data(model, val_dataloaders=dataloaders, datamodule=datamodule)
 
         if not model_provided:
             self.validated_ckpt_path = self.__load_ckpt_weights(ckpt_path)
@@ -528,11 +589,12 @@ def validate(
 
     def test(
         self,
-        model: Optional[LightningModule] = None,
-        test_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
+        model: Optional['pl.LightningModule'] = None,
+        dataloaders: Optional[Union[EVAL_DATALOADERS, LightningDataModule]] = None,
         ckpt_path: Optional[str] = 'best',
         verbose: bool = True,
         datamodule: Optional[LightningDataModule] = None,
+        test_dataloaders=None,  # noqa TODO: remove with 1.6
     ) -> _EVALUATE_OUTPUT:
         r"""
         Perform one evaluation epoch over the test set. It's separated from
@@ -541,8 +603,8 @@ def test(
         Args:
             model: The model to test.
 
-            test_dataloaders: Either a single PyTorch DataLoader or a list of them,
-                specifying test samples.
+            dataloaders: A :class:`torch.utils.data.DataLoader` or a sequence of them,
+                or a :class:`~pytorch_lightning.core.datamodule.LightningDataModule` specifying test samples.
 
             ckpt_path: Either ``best`` or path to the checkpoint you wish to test.
                 If ``None``, use the current weights of the model.
@@ -565,9 +627,19 @@ def test(
         self.state.status = TrainerStatus.RUNNING
         self.testing = True
 
+        if test_dataloaders is not None:
+            rank_zero_deprecation(
+                "`trainer.test(test_dataloaders)` is deprecated in v1.4 and will be removed in v1.6."
+                " Use `trainer.test(dataloaders)` instead."
+            )
+            dataloaders = test_dataloaders
+        # if a datamodule comes in as the second arg, then fix it for the user
+        if isinstance(dataloaders, LightningDataModule):
+            datamodule = dataloaders
+            dataloaders = None
         # If you supply a datamodule you can't supply test_dataloaders
-        if test_dataloaders is not None and datamodule:
-            raise MisconfigurationException('You cannot pass both `trainer.test(test_dataloaders=..., datamodule=...)`')
+        if dataloaders is not None and datamodule:
+            raise MisconfigurationException('You cannot pass both `trainer.test(dataloaders=..., datamodule=...)`')
 
         model_provided = model is not None
         model = model or self.lightning_module
@@ -577,7 +649,7 @@ def test(
             )
 
         # links data to the trainer
-        self.data_connector.attach_data(model, test_dataloaders=test_dataloaders, datamodule=datamodule)
+        self.data_connector.attach_data(model, test_dataloaders=dataloaders, datamodule=datamodule)
 
         if not model_provided:
             self.tested_ckpt_path = self.__load_ckpt_weights(ckpt_path)
@@ -592,8 +664,8 @@ def test(
 
     def predict(
         self,
-        model: Optional[LightningModule] = None,
-        dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
+        model: Optional['pl.LightningModule'] = None,
+        dataloaders: Optional[Union[EVAL_DATALOADERS, LightningDataModule]] = None,
         datamodule: Optional[LightningDataModule] = None,
         return_predictions: Optional[bool] = None,
         ckpt_path: Optional[str] = 'best',
@@ -606,7 +678,8 @@ def predict(
         Args:
             model: The model to predict with.
 
-            dataloaders: Either a single PyTorch DataLoader or a list of them, specifying inference samples.
+            dataloaders: A :class:`torch.utils.data.DataLoader` or a sequence of them,
+                or a :class:`~pytorch_lightning.core.datamodule.LightningDataModule` specifying prediction samples.
 
             datamodule: The datamodule with a predict_dataloader method that returns one or more dataloaders.
 
@@ -632,6 +705,10 @@ def predict(
 
         self.predict_loop.return_predictions = return_predictions
 
+        # if a datamodule comes in as the second arg, then fix it for the user
+        if isinstance(dataloaders, LightningDataModule):
+            datamodule = dataloaders
+            dataloaders = None
         if dataloaders is not None and datamodule:
             raise MisconfigurationException('You cannot pass both `trainer.predict(dataloaders=..., datamodule=...)`')
 
@@ -657,12 +734,13 @@ def predict(
 
     def tune(
         self,
-        model: LightningModule,
-        train_dataloader: Optional[DataLoader] = None,
-        val_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
+        model: 'pl.LightningModule',
+        train_dataloaders: Optional[Union[TRAIN_DATALOADERS, LightningDataModule]] = None,
+        val_dataloaders: Optional[EVAL_DATALOADERS] = None,
         datamodule: Optional[LightningDataModule] = None,
         scale_batch_size_kwargs: Optional[Dict[str, Any]] = None,
         lr_find_kwargs: Optional[Dict[str, Any]] = None,
+        train_dataloader=None,  # noqa TODO: remove with 1.6
     ) -> Dict[str, Optional[Union[int, _LRFinder]]]:
         r"""
         Runs routines to tune hyperparameters before training.
@@ -670,11 +748,11 @@ def tune(
         Args:
             model: Model to tune.
 
-            train_dataloader: A Pytorch DataLoader with training samples. If the model has
-                a predefined train_dataloader method this will be skipped.
+            train_dataloaders: A collection of :class:`torch.utils.data.DataLoader` or a
+                :class:`~pytorch_lightning.core.datamodule.LightningDataModule` specifying training samples.
+                In the case of multiple dataloaders, please see this :ref:`page <multiple-training-dataloaders>`.
 
-            val_dataloaders: Either a single Pytorch Dataloader or a list of them, specifying validation samples.
-                If the model has a predefined val_dataloaders method this will be skipped
+            val_dataloaders: A :class:`torch.utils.data.DataLoader` or a sequence of them specifying validation samples.
 
             datamodule: An instance of :class:`~pytorch_lightning.core.datamodule.LightningDataModule`.
 
@@ -688,19 +766,25 @@ def tune(
         self.state.status = TrainerStatus.RUNNING
         self.tuning = True
 
+        if train_dataloader is not None:
+            rank_zero_deprecation(
+                "`trainer.tune(train_dataloader)` is deprecated in v1.4 and will be removed in v1.6."
+                " Use `trainer.tune(train_dataloaders)` instead. HINT: added 's'"
+            )
+            train_dataloaders = train_dataloader
         # if a datamodule comes in as the second arg, then fix it for the user
-        if isinstance(train_dataloader, LightningDataModule):
-            datamodule = train_dataloader
-            train_dataloader = None
+        if isinstance(train_dataloaders, LightningDataModule):
+            datamodule = train_dataloaders
+            train_dataloaders = None
         # If you supply a datamodule you can't supply train_dataloader or val_dataloaders
-        if (train_dataloader is not None or val_dataloaders is not None) and datamodule is not None:
+        if (train_dataloaders is not None or val_dataloaders is not None) and datamodule is not None:
             raise MisconfigurationException(
                 'You cannot pass `train_dataloader` or `val_dataloaders` to `trainer.tune(datamodule=...)`'
             )
 
         # links data to the trainer
         self.data_connector.attach_data(
-            model, train_dataloader=train_dataloader, val_dataloaders=val_dataloaders, datamodule=datamodule
+            model, train_dataloaders=train_dataloaders, val_dataloaders=val_dataloaders, datamodule=datamodule
         )
 
         result = self.tuner._tune(model, scale_batch_size_kwargs=scale_batch_size_kwargs, lr_find_kwargs=lr_find_kwargs)
@@ -710,7 +794,7 @@ def tune(
 
         return result
 
-    def _run(self, model: LightningModule) -> Optional[Union[_EVALUATE_OUTPUT, _PREDICT_OUTPUT]]:
+    def _run(self, model: 'pl.LightningModule') -> Optional[Union[_EVALUATE_OUTPUT, _PREDICT_OUTPUT]]:
         # clean hparams
         if hasattr(model, "hparams"):
             parsing.clean_namespace(model.hparams)
@@ -731,6 +815,13 @@ def _run(self, model: LightningModule) -> Optional[Union[_EVALUATE_OUTPUT, _PRED
         self.accelerator.connect(model)
         self.accelerator.setup_environment()
         self._call_setup_hook(model)  # allow user to setup lightning_module in accelerator environment
+
+        # restore modules after setup
+        self.checkpoint_connector.restore_datamodule()
+        self.checkpoint_connector.restore_model()
+        # restore callback states
+        self.checkpoint_connector.restore_callbacks()
+
         self._call_configure_sharded_model(model)  # allow user to setup in model sharded environment
         self.accelerator.setup(self, model)  # note: this sets up self.lightning_module
 
@@ -752,7 +843,7 @@ def _run(self, model: LightningModule) -> Optional[Union[_EVALUATE_OUTPUT, _PRED
                          {self.run_stage}                     ||
                                 |                             ||  DIRECTION
                         {self._run_train}                     ||
-                     or {self._run_evaluation}                ||
+                     or {self._run_evaluate}                  ||
                      or {self._run_predict}                   ||
                                 |                             ||
                              results                          \/
@@ -772,6 +863,9 @@ def _run(self, model: LightningModule) -> Optional[Union[_EVALUATE_OUTPUT, _PRED
         # plugin will setup fitting (e.g. ddp will launch child processes)
         self._pre_dispatch()
 
+        # restore optimizers, etc.
+        self.checkpoint_connector.restore_training_state()
+
         # dispatch `start_training` or `start_evaluating` or `start_predicting`
         self._dispatch()
 
@@ -806,7 +900,10 @@ def _pre_dispatch(self):
 
     def _post_dispatch(self):
         self.accelerator.post_dispatch(self)
+        # these `teardown` calls are here instead of in `_call_teardown_hook` since they are internal teardowns
+        # which need to happen before.
         self.accelerator.teardown()
+        self._active_loop.teardown()
 
     def _dispatch(self):
         if self.evaluating:
@@ -833,6 +930,8 @@ def _pre_training_routine(self):
         # register auto-resubmit when on SLURM
         self.slurm_connector.register_slurm_signal_handlers()
 
+        self.checkpoint_connector.resume_end()
+
         # --------------------------
         # Pre-train
         # --------------------------
@@ -844,10 +943,8 @@ def _pre_training_routine(self):
 
         # print model summary
         if self.is_global_zero and self.weights_summary is not None and not self.testing:
-            ref_model.summarize(mode=self.weights_summary)
-
-        # restore training and model before hpc is called
-        self.checkpoint_connector.restore_weights()
+            max_depth = ModelSummary.MODES[self.weights_summary]
+            ref_model.summarize(max_depth=max_depth)
 
         # on pretrain routine end
         self.on_pretrain_routine_end()
@@ -861,56 +958,17 @@ def _run_train(self) -> None:
 
         self._run_sanity_check(self.lightning_module)
 
-        self.checkpoint_connector.has_trained = False
-
         # enable train mode
         self.model.train()
         torch.set_grad_enabled(True)
 
         # reload data when needed
         model = self.lightning_module
-        self.train_loop.reset_train_val_dataloaders(model)
 
-        # hook
-        self.train_loop.on_train_start()
+        self.reset_train_val_dataloaders(model)
 
         try:
-            if self.train_loop.should_skip_training():
-                return
-            # run all epochs
-            epochs = range(self.current_epoch, self.max_epochs) if self.max_epochs else count(self.current_epoch)
-            for epoch in epochs:
-
-                # hook
-                self.train_loop.on_train_epoch_start(epoch)
-
-                with self.profiler.profile("run_training_epoch"):
-                    # run train epoch
-                    self.train_loop.run_training_epoch()
-
-                if self.max_steps and self.max_steps <= self.global_step:
-                    self.train_loop.on_train_end()
-                    return
-
-                # early stopping
-                met_min_epochs = (epoch >= self.min_epochs - 1) if self.min_epochs else True
-                met_min_steps = self.global_step >= self.min_steps if self.min_steps else True
-
-                if self.should_stop:
-                    if met_min_epochs and met_min_steps:
-                        self.train_loop.on_train_end()
-                        return
-                    else:
-                        log.info(
-                            'Trainer was signaled to stop but required minimum epochs'
-                            f' ({self.min_epochs}) or minimum steps ({self.min_steps}) has'
-                            ' not been met. Training will continue...'
-                        )
-                        self.should_stop = False
-
-            # hook
-            self.train_loop.on_train_end()
-
+            self.fit_loop.run()
         except KeyboardInterrupt:
             rank_zero_warn('Detected KeyboardInterrupt, attempting graceful shutdown...')
             # user could press Ctrl+c many times... only shutdown once
@@ -919,137 +977,28 @@ def _run_train(self) -> None:
                 self.on_keyboard_interrupt()
                 # same treatment as below
                 self.accelerator.on_train_end()
-                self.state.stage = None
         except BaseException:
             self.state.status = TrainerStatus.INTERRUPTED
+            if distributed_available() and self.world_size > 1:
+                # try syncing remaing processes, kill otherwise
+                self.training_type_plugin.reconciliate_processes(traceback.format_exc())
             # give accelerators a chance to finish
             self.accelerator.on_train_end()
             # reset bookkeeping
             self.state.stage = None
             raise
 
-    def _run_evaluation(self) -> _EVALUATE_OUTPUT:
-        if not (self.evaluating or self.sanity_checking):
-            rank_zero_warn(
-                f"`trainer._run_evaluation()` was called but the running stage is set to {self.state.stage}."
-                " This should not happen normally. Setting it to `RunningStage.VALIDATING`", RuntimeWarning
-            )
-            self.validating = True
-
-        # prepare dataloaders
-        dataloaders, max_batches = self.evaluation_loop.get_evaluation_dataloaders()
-
-        # check if we want to skip this evaluation
-        if sum(max_batches) == 0:
-            return [], []
-
-        # enable eval mode + no grads
-        self.evaluation_loop.on_evaluation_model_eval()
-        # ref model
-        model = self.lightning_module
-        model.zero_grad()
-        torch.set_grad_enabled(False)
-
-        # hook
-        self.evaluation_loop.on_evaluation_start()
-
-        # set up the eval loop
-        self.evaluation_loop.setup(max_batches, dataloaders)
-
-        # hook
-        self.evaluation_loop.on_evaluation_epoch_start()
-
-        # run validation/testing
-        for dataloader_idx, dataloader in enumerate(dataloaders):
-            # bookkeeping
-            dl_outputs = []
-            dataloader = self.accelerator.process_dataloader(dataloader)
-            dl_max_batches = self.evaluation_loop.max_batches[dataloader_idx]
-
-            for batch_idx, batch in enumerate(dataloader):
-                if batch is None:
-                    continue
-
-                # stop short when running on limited batches
-                if batch_idx >= dl_max_batches:
-                    break
-
-                # hook
-                self.evaluation_loop.on_evaluation_batch_start(batch, batch_idx, dataloader_idx)
-
-                # lightning module methods
-                with self.profiler.profile("evaluation_step_and_end"):
-                    output = self.evaluation_loop.evaluation_step(batch, batch_idx, dataloader_idx)
-                    output = self.evaluation_loop.evaluation_step_end(output)
-
-                # hook + store predictions
-                self.evaluation_loop.on_evaluation_batch_end(output, batch, batch_idx, dataloader_idx)
-
-                # log batch metrics
-                self.logger_connector.log_evaluation_step_metrics()
-
-                # track epoch level outputs
-                dl_outputs = self._track_output_for_epoch_end(dl_outputs, output)
-
-            # store batch level output per dataloader
-            if self.evaluation_loop.should_track_batch_outputs_for_epoch_end:
-                self.evaluation_loop.outputs.append(dl_outputs)
-
-        outputs = self.evaluation_loop.outputs
-
-        # reset outputs
-        self.evaluation_loop.outputs = []
-
-        # with a single dataloader don't pass a 2D list
-        if len(outputs) > 0 and self.evaluation_loop.num_dataloaders == 1:
-            outputs = outputs[0]
-
-        # lightning module method
-        self.evaluation_loop.evaluation_epoch_end(outputs)
-
-        # hook
-        self.evaluation_loop.on_evaluation_epoch_end()
-
-        # log epoch metrics
-        eval_loop_results = self.logger_connector.get_evaluate_epoch_results()
-
-        # hook
-        self.evaluation_loop.on_evaluation_end()
-
-        # save predictions to disk
-        self.evaluation_loop.predictions.to_disk()
-
-        # enable train mode again
-        self.evaluation_loop.on_evaluation_model_train()
-
-        # reset cached results
-        self.logger_connector.reset()
-
-        torch.set_grad_enabled(True)
-
-        return eval_loop_results
-
-    def _track_output_for_epoch_end(self, outputs, output):
-        if output is not None:
-            if isinstance(output, Result):
-                output = output.detach()
-                if self.move_metrics_to_cpu:
-                    output = output.cpu()
-            elif isinstance(output, dict):
-                output = recursive_detach(output, to_cpu=self.move_metrics_to_cpu)
-            elif isinstance(output, torch.Tensor) and output.is_cuda and self.move_metrics_to_cpu:
-                output = output.cpu()
-            outputs.append(output)
-        return outputs
-
     def _run_evaluate(self) -> _EVALUATE_OUTPUT:
         if not self.is_global_zero and self.progress_bar_callback is not None:
             self.progress_bar_callback.disable()
 
         assert self.evaluating
 
-        with self.profiler.profile(f"run_{self.state.stage}_evaluation"):
-            eval_loop_results = self._run_evaluation()
+        # reload dataloaders
+        self._evaluation_loop.reload_evaluation_dataloaders()
+
+        with self.profiler.profile(f"run_{self.state.stage}_evaluation"), torch.no_grad():
+            eval_loop_results = self._evaluation_loop.run()
 
         # remove the tensors from the eval results
         for i, result in enumerate(eval_loop_results):
@@ -1061,42 +1010,9 @@ def _run_evaluate(self) -> _EVALUATE_OUTPUT:
         return eval_loop_results
 
     def _run_predict(self) -> Optional[_PREDICT_OUTPUT]:
-        # prepare dataloaders
-        dataloaders, max_batches = self.predict_loop.get_predict_dataloaders()
-
-        # check if we want to skip this evaluation
-        if self.predict_loop.should_skip_predict(max_batches):
-            return []
-
-        # set up the eval loop
-        self.predict_loop.setup(max_batches, dataloaders)
-
-        # call hook
-        self.predict_loop.on_predict_start()
-
-        # run validation/testing
-        for dataloader_idx, dataloader in enumerate(dataloaders):
-            dataloader = self.accelerator.process_dataloader(dataloader)
-            dl_max_batches = self.predict_loop.max_batches[dataloader_idx]
-            for batch_idx, batch in enumerate(dataloader):
-                if batch is None:
-                    continue
-
-                # stop short when running on limited batches
-                if batch_idx >= dl_max_batches:
-                    break
-
-                # lightning module methods
-                with self.profiler.profile("predict_step"):
-                    self.predict_loop.predict_step(batch, batch_idx, dataloader_idx)
-
-        # call hook
-        results = self.predict_loop.on_predict_epoch_end()
-
-        # call hook
-        self.predict_loop.on_predict_end()
-
-        return results
+        self.reset_predict_dataloader(self.lightning_module)
+        with torch.no_grad():
+            return self.predict_loop.run()
 
     def _run_sanity_check(self, ref_model):
         using_val_step = ref_model.val_dataloader is not None and is_overridden('validation_step', ref_model)
@@ -1111,17 +1027,25 @@ def _run_sanity_check(self, ref_model):
             # hook and callback
             self.on_sanity_check_start()
 
+            # reload dataloaders
+            self._evaluation_loop.reload_evaluation_dataloaders()
+
             # run eval step
-            self._run_evaluation()
+            with torch.no_grad():
+                self._evaluation_loop.run()
 
             self.on_sanity_check_end()
 
-            self.state.stage = stage
+            # reset validation metrics
+            self.logger_connector.reset()
 
             # reset the seed to what it was before sanity check
             # prevents sanity check to affect random sampling in training
             reset_seed()
 
+            # restore the previous stage when the sanity check if finished
+            self.state.stage = stage
+
     def __load_ckpt_weights(self, ckpt_path: Optional[str]) -> Optional[str]:
         if ckpt_path is None:
             return
@@ -1153,12 +1077,10 @@ def __load_ckpt_weights(self, ckpt_path: Optional[str]) -> Optional[str]:
         if not self._device_type == DeviceType.TPU:
             self.training_type_plugin.barrier()
 
-        self.training_type_plugin.restore_model_state_from_ckpt_path(
-            ckpt_path, map_location=lambda storage, loc: storage
-        )
+        self.checkpoint_connector.restore_model_weights(ckpt_path)
         return ckpt_path
 
-    def _call_setup_hook(self, model: LightningModule) -> None:
+    def _call_setup_hook(self, model: 'pl.LightningModule') -> None:
         fn = self.state.fn._setup_fn
 
         self.accelerator.barrier("pre_setup")
@@ -1170,7 +1092,7 @@ def _call_setup_hook(self, model: LightningModule) -> None:
 
         self.accelerator.barrier("post_setup")
 
-    def _call_configure_sharded_model(self, model: LightningModule) -> None:
+    def _call_configure_sharded_model(self, model: 'pl.LightningModule') -> None:
         # Call configure sharded model hook if accelerator requests. In some cases
         # we will not call the hook; the hook has initialized the sharded model for example.
 
@@ -1183,7 +1105,7 @@ def _call_configure_sharded_model(self, model: LightningModule) -> None:
             model.call_configure_sharded_model_hook = True
             self.accelerator.call_configure_sharded_model_hook = False
 
-    def _call_teardown_hook(self, model: LightningModule) -> None:
+    def _call_teardown_hook(self, model: 'pl.LightningModule') -> None:
         fn = self.state.fn._setup_fn
 
         if self.datamodule is not None:
@@ -1194,33 +1116,17 @@ def _call_teardown_hook(self, model: LightningModule) -> None:
 
         model._current_fx_name = None
         model._current_dataloader_idx = None
-
-    def _reset_result_and_set_fx_name(self, hook_name: str) -> bool:
-        # on_before_zero_grad is called within training_step
-        # TODO(@carmocca): Result should handle this logic
-        if "batch_start" in hook_name or hook_name in ("on_before_zero_grad", "on_after_backward"):
-            return True
-        model_ref = self.lightning_module
-        if model_ref is not None:
-            # used to track current hook name called
-            model_ref._results = Result()
-            model_ref._current_fx_name = hook_name
-        return False
-
-    def _cache_logged_metrics(self):
-        model_ref = self.lightning_module
-        if model_ref is not None:
-            # capture logging for this hook
-            self.logger_connector.cache_logged_metrics()
+        # these could have become stale if metrics are defined in `setup`
+        model._metric_attributes = None
 
     def call_hook(self, hook_name: str, *args, **kwargs) -> Any:
-        # Note this implementation is copy/pasted into the TrainLoop class in TrainLoop._on_train_epoch_end_hook
+        # Note this implementation is copy/pasted into the TrainLoop class in TrainingEpochLoop._on_train_epoch_end_hook
         # This was done to manage the deprecation of the `outputs` argument to on_train_epoch_end
         # If making changes to this function, ensure that those changes are also made to
-        # TrainLoop._on_train_epoch_end_hook
-
-        # set hook_name to model + reset Result obj
-        skip = self._reset_result_and_set_fx_name(hook_name)
+        # TrainingEpochLoop._on_train_epoch_end_hook
+        if self.lightning_module:
+            prev_fx_name = self.lightning_module._current_fx_name
+            self.lightning_module._current_fx_name = hook_name
 
         # always profile hooks
         with self.profiler.profile(hook_name):
@@ -1237,14 +1143,19 @@ def call_hook(self, hook_name: str, *args, **kwargs) -> Any:
                 hook_fx = getattr(model_ref, hook_name)
                 output = hook_fx(*args, **kwargs)
 
-            # if the PL module doesn't have the hook then call the accelerator
-            # used to auto-reduce things for the user with Results obj
-            elif hasattr(self.accelerator, hook_name):
+            # call the accelerator hook
+            if hasattr(self.accelerator, hook_name):
                 accelerator_hook = getattr(self.accelerator, hook_name)
-                output = accelerator_hook(*args, **kwargs)
+                accelerator_output = accelerator_hook(*args, **kwargs)
+                # Rely on the accelerator output if lightningModule hook returns nothing
+                # Required for cases such as DataParallel where we reduce the output for the user
+                # todo: move this data parallel logic into the data parallel plugin
+                output = accelerator_output if output is None else output
+
+        if self.lightning_module:
+            # restore current_fx when nested context
+            self.lightning_module._current_fx_name = prev_fx_name
 
-        if not skip:
-            self._cache_logged_metrics()
         return output
 
     @staticmethod
@@ -1257,6 +1168,7 @@ def __init_profiler(self, profiler: Optional[Union[BaseProfiler, str]]) -> None:
                 "simple": SimpleProfiler,
                 "advanced": AdvancedProfiler,
                 "pytorch": PyTorchProfiler,
+                "xla": XLAProfiler,
             }
             profiler = profiler.lower()
             if profiler not in PROFILERS:
@@ -1272,3 +1184,30 @@ def __setup_profiler(self) -> None:
         local_rank = self.local_rank if self.world_size > 1 else None
         self.profiler._lightning_module = proxy(self.lightning_module)
         self.profiler.setup(stage=self.state.fn._setup_fn, local_rank=local_rank, log_dir=self.log_dir)
+
+    def _log_device_info(self) -> None:
+        rank_zero_info(f'GPU available: {torch.cuda.is_available()}, used: {self._device_type == DeviceType.GPU}')
+
+        num_tpu_cores = self.tpu_cores if self.tpu_cores is not None else 0
+        rank_zero_info(f'TPU available: {_TPU_AVAILABLE}, using: {num_tpu_cores} TPU cores')
+
+        num_ipus = self.ipus if self.ipus is not None else 0
+        rank_zero_info(f'IPU available: {_IPU_AVAILABLE}, using: {num_ipus} IPUs')
+
+        if torch.cuda.is_available() and self._device_type != DeviceType.GPU:
+            rank_zero_warn(
+                "GPU available but not used. Set the gpus flag in your trainer"
+                " `Trainer(gpus=1)` or script `--gpus=1`."
+            )
+
+        if _TPU_AVAILABLE and self._device_type != DeviceType.TPU:
+            rank_zero_warn(
+                "TPU available but not used. Set the `tpu_cores` flag in your trainer"
+                " `Trainer(tpu_cores=8)` or script `--tpu_cores=8`."
+            )
+
+        if _IPU_AVAILABLE and self._device_type != DeviceType.IPU:
+            rank_zero_warn(
+                "IPU available but not used. Set the `ipus` flag in your trainer"
+                " `Trainer(ipus=8)` or script `--ipus=8`."
+            )
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
deleted file mode 100644
index ea33241b7a4af..0000000000000
--- a/pytorch_lightning/trainer/training_loop.py
+++ /dev/null
@@ -1,944 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from collections import OrderedDict
-from contextlib import contextmanager, suppress
-from copy import copy
-from functools import partial, update_wrapper
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-
-import numpy as np
-import torch
-from torch.optim import Optimizer
-
-from pytorch_lightning.core.optimizer import LightningOptimizer
-from pytorch_lightning.core.step_result import Result
-from pytorch_lightning.plugins import ParallelPlugin
-from pytorch_lightning.trainer.supporters import TensorRunningAccum
-from pytorch_lightning.utilities import _TPU_AVAILABLE, AMPType, DeviceType
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.finite_checks import detect_nan_parameters
-from pytorch_lightning.utilities.grads import grad_norm
-from pytorch_lightning.utilities.model_helpers import is_overridden
-from pytorch_lightning.utilities.parsing import AttributeDict
-from pytorch_lightning.utilities.signature_utils import is_param_in_hook_signature
-from pytorch_lightning.utilities.warnings import WarningCache
-
-
-class TrainLoop:
-
-    def __init__(
-        self,
-        trainer,
-        max_epochs: Optional[int],
-        min_epochs: Optional[int],
-        max_steps: Optional[int],
-        min_steps: Optional[int],
-        num_sanity_val_steps: int,
-    ):
-        self.trainer = trainer
-        self.accumulated_loss = None
-        self.warning_cache = WarningCache()
-        self._teardown_already_run = False
-        self.running_loss = TensorRunningAccum(window_length=20)
-        self._skip_backward = False
-        self._optimizer_freq_cumsum = None
-        self._hiddens = None
-
-        self.global_step = 0
-        self.current_epoch = 0
-        self.trainer.should_stop = False
-
-        # the total batch index across all epochs
-        self.total_batch_idx = 0
-        # the current batch index in the loop that runs over the dataloader(s)
-        self.batch_idx = 0
-        # the current split index when the batch gets split into chunks in truncated backprop through time
-        self.split_idx = None
-
-        self.trainer.num_training_batches = 0
-        self.trainer.train_dataloader = None
-
-        # If neither max_epochs or max_steps is set, then use existing default of max_epochs = 1000
-        self.max_epochs = 1000 if (max_epochs is None and max_steps is None) else max_epochs
-        # If neither min_epochs or min_steps is set, then use existing default of min_epochs = 1
-        self.min_epochs = 1 if (min_epochs is None and min_steps is None) else min_epochs
-        self.max_steps = max_steps
-        self.min_steps = min_steps
-
-        if num_sanity_val_steps == -1:
-            self.trainer.num_sanity_val_steps = float("inf")
-        else:
-            self.trainer.num_sanity_val_steps = num_sanity_val_steps
-
-    @property
-    def num_active_optimizers(self) -> int:
-        return len(self.get_active_optimizers())
-
-    @property
-    def optimizer_freq_cumsum(self):
-        if self._optimizer_freq_cumsum is None:
-            self._optimizer_freq_cumsum = np.cumsum(self.trainer.optimizer_frequencies)
-        return self._optimizer_freq_cumsum
-
-    def should_skip_training(self) -> bool:
-        should_by_max_steps = self.max_steps is not None and self.global_step >= self.max_steps
-        should_by_epoch = self.max_epochs is not None and self.current_epoch >= self.max_epochs
-        return should_by_max_steps or should_by_epoch or self.trainer.num_training_batches == 0
-
-    def on_train_start(self):
-        # hook
-        self.trainer.call_hook("on_train_start")
-
-    def on_train_end(self):
-        if self._teardown_already_run:
-            return
-        self._teardown_already_run = True
-
-        # hook
-        self.trainer.call_hook("on_train_end")
-
-        # todo: TPU 8 cores hangs in flush with TensorBoard. Might do for all loggers.
-        # It might be related to xla tensors blocked when moving the cpu
-        # kill loggers
-        if self.trainer.logger is not None:
-            self.trainer.logger.finalize("success")
-
-        # summarize profile results
-        self.trainer.profiler.describe()
-
-        # give accelerators a chance to finish
-        self.trainer.accelerator.on_train_end()
-
-        # reset bookkeeping
-        self.trainer.state.stage = None
-
-    def on_train_epoch_start(self, epoch):
-
-        # update training progress in trainer
-        self.current_epoch = epoch
-
-        model = self.trainer.lightning_module
-
-        # reset train dataloader
-        if epoch != 0 and self.trainer.reload_dataloaders_every_epoch:
-            self.trainer.reset_train_dataloader(model)
-
-        # todo: specify the possible exception
-        with suppress(Exception):
-            # set seed for distributed sampler (enables shuffling for each epoch)
-            self.trainer.train_dataloader.sampler.set_epoch(epoch)
-
-        # changing gradient according accumulation_scheduler
-        self.trainer.accumulation_scheduler.on_train_epoch_start(self.trainer, self.trainer.lightning_module)
-
-        # stores accumulated grad fractions per batch
-        self.accumulated_loss = TensorRunningAccum(window_length=self.trainer.accumulate_grad_batches)
-
-        # hook
-        self.trainer.call_hook("on_epoch_start")
-        self.trainer.call_hook("on_train_epoch_start")
-
-    def on_train_batch_end(self, epoch_output, batch_end_outputs, batch, batch_idx, dataloader_idx):
-        batch_end_outputs = [opt_idx_out for opt_idx_out in batch_end_outputs if len(opt_idx_out)]
-
-        processed_batch_end_outputs = TrainLoop._prepare_outputs(batch_end_outputs, batch_mode=True)
-
-        # hook
-        self.trainer.call_hook('on_train_batch_end', processed_batch_end_outputs, batch, batch_idx, dataloader_idx)
-        self.trainer.call_hook('on_batch_end')
-
-        # figure out what to track for epoch end
-        self.track_epoch_end_reduce_metrics(epoch_output, batch_end_outputs)
-
-        # reset batch logger internals
-        self.trainer.logger_connector.on_train_batch_end()
-
-    def reset_train_val_dataloaders(self, model) -> None:
-        """
-        Resets train and val dataloaders if none are attached to the trainer.
-
-        The val dataloader must be initialized before training loop starts, as the training loop
-        inspects the val dataloader to determine whether to run the evaluation loop.
-        """
-        if self.trainer.train_dataloader is None:
-            self.trainer.reset_train_dataloader(model)
-
-        if self.trainer.val_dataloaders is None:
-            self.trainer.reset_val_dataloader(model)
-
-    def track_epoch_end_reduce_metrics(self, epoch_output, batch_end_outputs):
-
-        hook_overridden = self._should_add_batch_output_to_epoch_output()
-
-        # track the outputs to reduce at the end of the epoch
-        for opt_idx, opt_outputs in enumerate(batch_end_outputs):
-            sample_output = opt_outputs[-1]
-
-            # decide if we need to reduce at the end of the epoch automatically
-            auto_reduce_tng_result = isinstance(sample_output, Result) and sample_output.should_reduce_on_epoch_end
-
-            # only track when a) it needs to be autoreduced OR b) the user wants to manually reduce on epoch end
-            if not (hook_overridden or auto_reduce_tng_result):
-                continue
-
-            # with 1 step (no tbptt) don't use a sequence at epoch end
-            if isinstance(opt_outputs, list) and len(opt_outputs) == 1 and not isinstance(opt_outputs[0], Result):
-                opt_outputs = opt_outputs[0]
-
-            epoch_output[opt_idx].append(opt_outputs)
-
-    def _should_add_batch_output_to_epoch_output(self) -> bool:
-        # We add to the epoch outputs if
-        # 1. The model defines training_epoch_end OR
-        # 2. The model overrides on_train_epoch_end which has `outputs` in the signature
-        # TODO: in v1.5 this only needs to check if training_epoch_end is overridden
-        lightning_module = self.trainer.lightning_module
-        if is_overridden("training_epoch_end", model=lightning_module):
-            return True
-
-        if is_overridden("on_train_epoch_end", model=lightning_module):
-            model_hook_fx = getattr(lightning_module, "on_train_epoch_end")
-            if is_param_in_hook_signature(model_hook_fx, "outputs"):
-                return True
-
-        return False
-
-    def get_active_optimizers(self, batch_idx: Optional[int] = None) -> List[Tuple[int, Optimizer]]:
-        """
-        Returns the currently active optimizers. When multiple optimizers are used with different frequencies,
-        only one of the optimizers is active at a time.
-
-        Returns:
-            A list of tuples (opt_idx, optimizer) of currently active optimizers.
-        """
-        if not self.trainer.optimizer_frequencies:
-            # call training_step once per optimizer
-            return list(enumerate(self.trainer.optimizers))
-
-        batch_idx = self.total_batch_idx if batch_idx is None else batch_idx
-        optimizers_loop_length = self.optimizer_freq_cumsum[-1]
-        current_place_in_loop = batch_idx % optimizers_loop_length
-
-        # find optimzier index by looking for the first {item > current_place} in the cumsum list
-        opt_idx = int(np.argmax(self.optimizer_freq_cumsum > current_place_in_loop))
-        return [(opt_idx, self.trainer.optimizers[opt_idx])]
-
-    def on_after_backward(self, training_step_output, batch_idx, untouched_loss):
-        training_step_output.detach()
-
-        # insert after step hook
-        self.trainer.call_hook("on_after_backward")
-
-        # when in dev debugging track the losses
-        self.trainer.dev_debugger.track_train_loss_history(batch_idx, untouched_loss.detach())
-
-    def _check_training_step_output(self, training_step_output):
-        if isinstance(training_step_output, torch.Tensor) and not self.trainer.lightning_module.automatic_optimization:
-            if training_step_output.grad_fn is None:
-                # TODO: Find why - RuntimeError: Expected to mark a variable ready only once ...
-                raise MisconfigurationException("In manual optimization, `training_step` should not return a Tensor")
-
-    def training_step(self, split_batch, batch_idx, opt_idx, hiddens):
-        # give the PL module a result for logging
-        model_ref = self.trainer.lightning_module
-
-        with self.trainer.profiler.profile("model_forward"):
-            step_kwargs = self._build_kwargs(split_batch, batch_idx, opt_idx, hiddens)
-
-            # manually capture logged metrics
-            model_ref._current_fx_name = 'training_step'
-            model_ref._results = Result()
-            with self.trainer.profiler.profile("training_step"):
-                training_step_output = self.trainer.accelerator.training_step(step_kwargs)
-                self.trainer.accelerator.post_training_step()
-
-            self.trainer.logger_connector.cache_logged_metrics()
-
-            self._check_training_step_output(training_step_output)
-
-            training_step_output = self.trainer.call_hook("training_step_end", training_step_output)
-
-            training_step_output_for_epoch_end, training_step_output = self._process_training_step_output(
-                training_step_output, split_batch
-            )
-            if training_step_output_for_epoch_end is None:
-                return
-
-        # enable empty loss when using manual opt
-        closure_loss = None
-        untouched_loss = None
-
-        if self.trainer.lightning_module.automatic_optimization:
-            # accumulate loss. if accumulate_grad_batches==1, no effect
-            closure_loss = training_step_output.minimize / self.trainer.accumulate_grad_batches
-
-            # the loss will get scaled for amp. avoid any modifications to it
-            untouched_loss = closure_loss.detach().clone()
-
-        # result
-        result = AttributeDict(
-            closure_loss=closure_loss,
-            loss=untouched_loss,
-            training_step_output=training_step_output,
-            training_step_output_for_epoch_end=training_step_output_for_epoch_end,
-        )
-        return result
-
-    def _process_training_step_output(self, training_step_output, split_batch):
-        training_step_output_for_epoch_end = training_step_output
-
-        # enable validation_step return None
-        if training_step_output_for_epoch_end is None:
-            return None, None
-
-        result = self.trainer.lightning_module._results
-
-        loss = None
-        hiddens = None
-        result["extra"] = {}
-
-        # handle dict return
-        if isinstance(training_step_output, dict):
-            loss = training_step_output.pop("loss", None)
-            hiddens = training_step_output.pop("hiddens", None)
-            if hiddens is not None:
-                hiddens = hiddens.detach()
-            result["extra"] = training_step_output
-
-        # handle scalar return
-        elif isinstance(training_step_output, torch.Tensor):
-            loss = training_step_output
-
-        # map to results under the hood
-        result.minimize = loss
-        self._hiddens = hiddens
-
-        # track batch for manual reduction with result
-        result.track_batch_size(len(split_batch))
-
-        # track metrics without grads for epoch reduction
-        training_step_output_for_epoch_end = copy(result)
-        training_step_output_for_epoch_end = training_step_output_for_epoch_end.detach()
-        if self.trainer.move_metrics_to_cpu:
-            training_step_output_for_epoch_end = training_step_output_for_epoch_end.cpu()
-
-        return training_step_output_for_epoch_end, result
-
-    @staticmethod
-    def _prepare_outputs(
-        outputs: List[List[List[Result]]],
-        batch_mode: bool,
-    ) -> Union[List[List[List[Dict]]], List[List[Dict]], List[Dict], Dict]:
-        """
-        Extract required information from batch or epoch end results.
-
-        Args:
-            outputs: A 3-dimensional list of ``Result`` objects with dimensions:
-                [optimizer outs][batch outs][tbptt steps].
-
-            batch_mode: If True, ignore the batch output dimension.
-
-        Returns:
-            The cleaned outputs with ``Result`` objects converted to dictionaries. All list dimensions of size one will
-            be collapsed.
-        """
-        processed_outputs = []
-        for opt_outputs in outputs:
-            # handle an edge case where an optimizer output is the empty list
-            if len(opt_outputs) == 0:
-                continue
-
-            processed_batch_outputs = []
-
-            if batch_mode:
-                opt_outputs = [opt_outputs]
-
-            for batch_outputs in opt_outputs:
-                processed_tbptt_outputs = []
-
-                for tbptt_output in batch_outputs:
-                    out = tbptt_output.extra
-                    out['loss'] = tbptt_output.minimize
-                    processed_tbptt_outputs.append(out)
-
-                # if there was only one tbptt step then we can collapse that dimension
-                if len(processed_tbptt_outputs) == 1:
-                    processed_tbptt_outputs = processed_tbptt_outputs[0]
-                processed_batch_outputs.append(processed_tbptt_outputs)
-
-            # batch_outputs should be just one dict (or a list of dicts if using tbptt) per optimizer
-            if batch_mode:
-                processed_batch_outputs = processed_batch_outputs[0]
-            processed_outputs.append(processed_batch_outputs)
-
-        # if there is only one optimiser then we collapse that dimension
-        if len(processed_outputs) == 1:
-            processed_outputs = processed_outputs[0]
-        return processed_outputs
-
-    def optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_closure):
-        model_ref = self.trainer.lightning_module
-
-        is_lbfgs = isinstance(optimizer, torch.optim.LBFGS)
-        using_native_amp = self.trainer.amp_backend == AMPType.NATIVE
-
-        # native amp + lbfgs is a no go right now
-        if using_native_amp and is_lbfgs:
-            raise MisconfigurationException(
-                'native PyTorch amp and lbfgs are not compatible.'
-                ' To request, please file a Github issue in PyTorch and tag @mcarilli'
-            )
-
-        # wraps into LightningOptimizer only for running step
-        optimizer = LightningOptimizer._to_lightning_optimizer(optimizer, self.trainer, opt_idx)
-
-        # model hook
-        model_ref.optimizer_step(
-            self.trainer.current_epoch,
-            batch_idx,
-            optimizer,
-            opt_idx,
-            train_step_and_backward_closure,
-            on_tpu=self.trainer._device_type == DeviceType.TPU and _TPU_AVAILABLE,
-            using_native_amp=using_native_amp,
-            using_lbfgs=is_lbfgs,
-        )
-
-    def on_before_zero_grad(self, optimizer):
-        self.trainer.call_hook('on_before_zero_grad', optimizer)
-
-    def optimizer_zero_grad(self, batch_idx, optimizer, opt_idx):
-        self.trainer.accelerator.optimizer_zero_grad(self.trainer.current_epoch, batch_idx, optimizer, opt_idx)
-
-    def track_and_norm_grad(self, optimizer) -> dict:
-        # track gradient norms
-        grad_norm_dict = self._track_gradient_norm()
-
-        # clip gradients
-        self.trainer.accelerator.clip_gradients(
-            optimizer, self.trainer.gradient_clip_val, gradient_clip_algorithm=self.trainer.gradient_clip_algorithm
-        )
-        return grad_norm_dict
-
-    def _track_gradient_norm(self):
-        grad_norm_dict = {}
-        if (self.global_step + 1) % self.trainer.log_every_n_steps == 0:
-            if float(self.trainer.track_grad_norm) > 0:
-                model = self.trainer.lightning_module
-                grad_norm_dict = grad_norm(model, self.trainer.track_grad_norm)
-        return grad_norm_dict
-
-    def _tbptt_split_batch(self, batch: Any) -> List[Any]:
-        splits = [batch]
-        truncated_bptt_enabled = self._truncated_bptt_enabled()
-        if truncated_bptt_enabled:
-            model_ref = self.trainer.lightning_module
-            with self.trainer.profiler.profile("tbptt_split_batch"):
-                splits = model_ref.tbptt_split_batch(batch, self._truncated_bptt_steps())
-        return splits
-
-    def run_training_epoch(self):
-        # modify dataloader if needed (ddp, etc...)
-        train_dataloader = self.trainer.accelerator.process_dataloader(self.trainer.train_dataloader)
-
-        # track epoch output
-        epoch_output = [[] for _ in range(self.num_active_optimizers)]
-
-        train_dataloader = self.trainer.data_connector.get_profiled_train_dataloader(train_dataloader)
-        dataloader_idx = 0
-        batch_idx = None
-
-        for batch_idx, (batch, is_last_batch) in train_dataloader:
-            self.batch_idx = batch_idx
-
-            # ------------------------------------
-            # TRAINING_STEP + TRAINING_STEP_END
-            # ------------------------------------
-            with self.trainer.profiler.profile("run_training_batch"):
-                batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
-
-            # when returning -1 from train_step, we end epoch early
-            if batch_output.signal == -1:
-                break
-
-            # hook
-            # TODO: add outputs to batches
-            self.on_train_batch_end(
-                epoch_output,
-                batch_output.training_step_output_for_epoch_end,
-                batch,
-                batch_idx,
-                dataloader_idx,
-            )
-
-            # -----------------------------------------
-            # SAVE METRICS TO LOGGERS
-            # -----------------------------------------
-            self.trainer.logger_connector.log_train_step_metrics(batch_output)
-
-            # -----------------------------------------
-            # VALIDATE IF NEEDED
-            # -----------------------------------------
-            should_check_val = self._should_check_val_fx(batch_idx, is_last_batch)
-            if should_check_val:
-                self.trainer.validating = True
-                self.trainer._run_evaluation()
-                self.trainer.training = True
-
-            # -----------------------------------------
-            # SAVE LOGGERS (ie: Tensorboard, etc...)
-            # -----------------------------------------
-            self.save_loggers_on_train_batch_end()
-
-            # update LR schedulers
-            self.update_lr_schedulers('step')
-            self.trainer.checkpoint_connector.has_trained = True
-
-            self.total_batch_idx += 1
-
-            # progress global step according to grads progress
-            self.increment_accumulated_grad_global_step()
-
-            max_steps_reached = (self.max_steps is not None and self.max_steps <= self.global_step)
-            if max_steps_reached or self.trainer.should_stop or self._num_training_batches_reached(is_last_batch):
-                break
-
-        if batch_idx is None:
-            # dataloader/iterator did not produce a batch
-            return
-
-        # handle epoch_output on epoch end
-        # TODO: this can log so ModelCheckpoint won't have access to them since the logger conector is updated after.
-        self.on_train_epoch_end(epoch_output)
-
-        # the global step is manually decreased here due to backwards compatibility with existing loggers
-        # as they expect that the same step is used when logging epoch end metrics even when the batch loop has
-        # finished. this means the attribute does not exactly track the number of optimizer steps applied.
-        # TODO(@carmocca): deprecate and rename so users don't get confused
-        self.global_step -= 1
-        # log epoch metrics
-        self.trainer.logger_connector.log_train_epoch_end_metrics(epoch_output)
-        self.global_step += 1
-
-        self.update_lr_schedulers('epoch')
-
-    def on_train_epoch_end(self, epoch_output: List[List[List[Result]]]) -> None:
-        # inform logger the batch loop has finished
-        self.trainer.logger_connector.on_train_epoch_end()
-
-        # prepare epoch output
-        processed_epoch_output = TrainLoop._prepare_outputs(epoch_output, batch_mode=False)
-
-        # get the model and call model.training_epoch_end
-        model = self.trainer.lightning_module
-
-        if is_overridden('training_epoch_end', model=model):
-            # run training_epoch_end
-            # refresh the result for custom logging at the epoch level
-            model._current_fx_name = 'training_epoch_end'
-            training_epoch_end_output = model.training_epoch_end(processed_epoch_output)
-
-            if training_epoch_end_output is not None:
-                raise MisconfigurationException(
-                    'training_epoch_end expects a return of None. '
-                    'HINT: remove the return statement in training_epoch_end'
-                )
-
-            # capture logging
-            self.trainer.logger_connector.cache_logged_metrics()
-
-        # call train epoch end hooks
-        self._on_train_epoch_end_hook(processed_epoch_output)
-        self.trainer.call_hook('on_epoch_end')
-
-    def _on_train_epoch_end_hook(self, processed_epoch_output) -> None:
-        # We cannot rely on Trainer.call_hook because the signatures might be different across
-        # lightning module and callback
-        # As a result, we need to inspect if the module accepts `outputs` in `on_train_epoch_end`
-
-        # This implementation is copied from Trainer.call_hook
-        hook_name = "on_train_epoch_end"
-
-        # set hook_name to model + reset Result obj
-        skip = self.trainer._reset_result_and_set_fx_name(hook_name)
-
-        # always profile hooks
-        with self.trainer.profiler.profile(hook_name):
-
-            # first call trainer hook
-            if hasattr(self.trainer, hook_name):
-                trainer_hook = getattr(self.trainer, hook_name)
-                trainer_hook(processed_epoch_output)
-
-            # next call hook in lightningModule
-            model_ref = self.trainer.lightning_module
-            if is_overridden(hook_name, model_ref):
-                hook_fx = getattr(model_ref, hook_name)
-                if is_param_in_hook_signature(hook_fx, "outputs"):
-                    self.warning_cache.warn(
-                        "The signature of `ModelHooks.on_train_epoch_end` has changed in v1.3."
-                        " `outputs` parameter has been deprecated."
-                        " Support for the old signature will be removed in v1.5", DeprecationWarning
-                    )
-                    model_ref.on_train_epoch_end(processed_epoch_output)
-                else:
-                    model_ref.on_train_epoch_end()
-
-            # if the PL module doesn't have the hook then call the accelerator
-            # used to auto-reduce things for the user with Results obj
-            elif hasattr(self.trainer.accelerator, hook_name):
-                accelerator_hook = getattr(self.trainer.accelerator, hook_name)
-                accelerator_hook()
-
-        if not skip:
-            self.trainer._cache_logged_metrics()
-
-    def run_training_batch(self, batch, batch_idx, dataloader_idx):
-        # track grad norms
-        grad_norm_dict = {}
-
-        # bookkeeping
-        self._hiddens = None
-
-        optimizers = list(enumerate(self.trainer.optimizers))
-
-        # track all outputs across time and num of optimizers
-        batch_outputs = [[] for _ in range(len(optimizers))]
-
-        if batch is None:
-            self.warning_cache.warn("train_dataloader yielded None. If this was on purpose, ignore this warning...")
-            return AttributeDict(
-                signal=0,
-                grad_norm_dict={},
-                training_step_output_for_epoch_end=batch_outputs,
-            )
-
-        # hook
-        response = self.trainer.call_hook("on_batch_start")
-        if response == -1:
-            return AttributeDict(signal=-1, grad_norm_dict={})
-
-        # hook
-        response = self.trainer.call_hook("on_train_batch_start", batch, batch_idx, dataloader_idx)
-        if response == -1:
-            return AttributeDict(signal=-1, grad_norm_dict={})
-
-        # lightning module hook
-        splits = self._tbptt_split_batch(batch)
-
-        for split_idx, split_batch in enumerate(splits):
-            self.split_idx = split_idx
-
-            if self.trainer.lightning_module.automatic_optimization:
-                for opt_idx, optimizer in self.get_active_optimizers(batch_idx):
-                    result = self._run_optimization(batch_idx, split_idx, split_batch, opt_idx, optimizer)
-                    if result:
-                        batch_outputs[opt_idx].append(result.training_step_output_for_epoch_end)
-                        grad_norm_dict = result.get("grad_norm_dict", {})
-            else:
-                # in manual optimization, there is no looping over optimizers
-                result = self._run_optimization(batch_idx, split_idx, split_batch)
-                if result:
-                    batch_outputs[0].append(result.training_step_output_for_epoch_end)
-
-        output = AttributeDict(
-            signal=0,
-            # todo: Properly aggregate grad_norm accros opt_idx and split_idx
-            grad_norm_dict=grad_norm_dict,
-            training_step_output_for_epoch_end=batch_outputs,
-        )
-        return output
-
-    def _run_optimization(self, batch_idx, split_idx, split_batch, opt_idx=0, optimizer=None):
-        # TODO: In v1.5, when optimizer_idx gets removed from training_step in manual_optimization, change
-        #   opt_idx=0 to opt_idx=None in the signature here
-
-        # toggle model params + set info to logger_connector
-        self.run_train_split_start(split_idx, split_batch, opt_idx, optimizer)
-
-        result = AttributeDict()
-        closure = self.make_closure(split_batch, batch_idx, opt_idx, optimizer, self._hiddens, result)
-
-        if self.should_accumulate():
-            # For gradient accumulation
-
-            # -------------------
-            # calculate loss (train step + train step end)
-            # -------------------
-            # automatic_optimization=True: perform ddp sync only when performing optimizer_step
-            # automatic_optimization=False: don't block synchronization here
-            with self.block_ddp_sync_behaviour():
-                closure()
-
-        # ------------------------------
-        # BACKWARD PASS
-        # ------------------------------
-        # gradient update with accumulated gradients
-        else:
-            if self.trainer.lightning_module.automatic_optimization:
-                self.optimizer_step(optimizer, opt_idx, batch_idx, closure)
-                if len(self.trainer.optimizers) > 1:
-                    # revert back to previous state
-                    self.trainer.lightning_module.untoggle_optimizer(opt_idx)
-            else:
-                result = self.training_step(split_batch, batch_idx, opt_idx, self._hiddens)
-
-            if not result:
-                # user decided to skip optimization
-                return result
-
-            # update running loss + reset accumulated loss
-            self.update_running_loss(result.loss)
-
-        self._process_closure_result(result)
-        return result
-
-    def training_step_and_backward_closure(
-        self,
-        split_batch: Any,
-        batch_idx: int,
-        opt_idx: int,
-        optimizer: Optimizer,
-        hiddens,
-        return_result: AttributeDict,
-    ) -> Optional[torch.Tensor]:
-
-        step_result = self.training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
-        if step_result is not None:
-            return_result.update(step_result)
-            return return_result.loss
-
-    def make_closure(self, *closure_args, **closure_kwargs: Any) -> Callable:
-        """ Wraps the training step closure into a partial object which will be called within ``optimizer.step``. """
-        partial_func = partial(self.training_step_and_backward_closure, *closure_args, **closure_kwargs)
-        return update_wrapper(partial_func, self.training_step_and_backward_closure)
-
-    @contextmanager
-    def block_ddp_sync_behaviour(self, should_block_sync: bool = False):
-        """
-        automatic_optimization = True
-        Blocks ddp sync gradients behaviour on backwards pass.
-        This is useful for skipping sync when accumulating gradients, reducing communication overhead
-
-        automatic_optimization = False
-        do not block ddp gradient sync when using manual optimization
-        as gradients are needed within the training step
-
-        Returns:
-            context manager with sync behaviour off
-
-        """
-        if (
-            isinstance(self.trainer.training_type_plugin, ParallelPlugin)
-            and (self.trainer.lightning_module.automatic_optimization or should_block_sync)
-        ):
-            with self.trainer.training_type_plugin.block_backward_sync():
-                yield None
-        else:
-            yield None
-
-    def _process_closure_result(self, opt_closure_result: Optional[AttributeDict]) -> None:
-        if not opt_closure_result:
-            return
-
-        # cache metrics
-        self.trainer.logger_connector.cache_training_step_metrics(opt_closure_result)
-
-        # check if loss or model weights are nan
-        if self.trainer.terminate_on_nan:
-            self._check_finite(opt_closure_result.loss)
-
-    def training_step_and_backward(self, split_batch, batch_idx, opt_idx, optimizer, hiddens):
-        """Wrap forward, zero_grad and backward in a closure so second order methods work"""
-        with self.trainer.profiler.profile("training_step_and_backward"):
-            # lightning module hook
-            result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
-
-            if not self._skip_backward and self.trainer.lightning_module.automatic_optimization:
-                is_first_batch_to_accumulate = batch_idx % self.trainer.accumulate_grad_batches == 0
-
-                if is_first_batch_to_accumulate:
-                    self.on_before_zero_grad(optimizer)
-                    self.optimizer_zero_grad(batch_idx, optimizer, opt_idx)
-
-                # backward pass
-                if result is not None:
-                    with self.trainer.profiler.profile("backward"):
-                        self.backward(result, optimizer, opt_idx)
-
-                    # hook - call this hook only
-                    # when gradients have finished to accumulate
-                    if not self.should_accumulate():
-                        self.on_after_backward(result.training_step_output, batch_idx, result.loss)
-
-                    # check if loss or model weights are nan
-                    if self.trainer.terminate_on_nan:
-                        self._check_finite(result.loss)
-
-                else:
-                    self.warning_cache.warn(
-                        "training_step returned None. If this was on purpose, ignore this warning..."
-                    )
-
-        return result
-
-    def _check_finite(self, loss: torch.Tensor) -> None:
-        if not torch.isfinite(loss).all():
-            raise ValueError(f'The loss returned in `training_step` is {loss}.')
-        model = self.trainer.lightning_module
-        detect_nan_parameters(model)
-
-    def backward(self, result, optimizer, opt_idx, *args, **kwargs):
-        self.trainer.dev_debugger.track_event("backward_call")
-
-        should_accumulate = self.should_accumulate()
-
-        # backward can be called manually in the training loop
-        if isinstance(result, torch.Tensor):
-            self.trainer.accelerator.backward(result, optimizer, opt_idx, should_accumulate, *args, **kwargs)
-        else:
-            result.closure_loss = self.trainer.accelerator.backward(
-                result.closure_loss, optimizer, opt_idx, should_accumulate, *args, **kwargs
-            )
-
-        if not self.should_accumulate():
-            # track gradients
-            result.grad_norm_dict = self.track_and_norm_grad(optimizer=optimizer)
-
-    def update_lr_schedulers(self, interval: str) -> None:
-        if interval == "step":
-            finished_accumulation = self._accumulated_batches_reached()
-            finished_epoch = self._num_training_batches_reached()
-            if not finished_accumulation and not finished_epoch:
-                return
-        self.trainer.optimizer_connector.update_learning_rates(
-            interval=interval,
-            opt_indices=[opt_idx for opt_idx, _ in self.get_active_optimizers()],
-        )
-
-    def increment_accumulated_grad_global_step(self):
-        num_accumulated_batches_reached = self._accumulated_batches_reached()
-        num_training_batches_reached = self._num_training_batches_reached()
-
-        # progress global step according to grads progress
-        if num_accumulated_batches_reached or num_training_batches_reached:
-            self.global_step = self.trainer.accelerator.update_global_step(self.total_batch_idx, self.global_step)
-
-    def _accumulated_batches_reached(self):
-        return (self.batch_idx + 1) % self.trainer.accumulate_grad_batches == 0
-
-    def _num_training_batches_reached(self, is_last_batch=False):
-        return (self.batch_idx + 1) == self.trainer.num_training_batches or is_last_batch
-
-    def should_accumulate(self):
-        # checks if backward or backward + optimizer step (via closure)
-        accumulation_done = self._accumulated_batches_reached()
-        is_final_batch = self._num_training_batches_reached()
-        return not (accumulation_done or is_final_batch)
-
-    def _should_check_val_fx(self, batch_idx: int, is_last_batch: bool) -> bool:
-        """ Decide if we should run validation. """
-        if not self.trainer.enable_validation:
-            return False
-
-        is_val_check_epoch = (self.trainer.current_epoch + 1) % self.trainer.check_val_every_n_epoch == 0
-        if not is_val_check_epoch:
-            return False
-
-        # val_check_batch is inf for iterable datasets with no length defined
-        is_infinite_dataset = self.trainer.val_check_batch == float('inf')
-        if is_last_batch and is_infinite_dataset:
-            return True
-
-        if self.trainer.should_stop:
-            return True
-
-        # TODO: let training/eval loop handle logic around limit_*_batches and val_check_batch
-        is_val_check_batch = is_last_batch
-        if isinstance(self.trainer.limit_train_batches, int) and is_infinite_dataset:
-            is_val_check_batch = (batch_idx + 1) % self.trainer.limit_train_batches == 0
-        elif self.trainer.val_check_batch != float('inf'):
-            is_val_check_batch = (batch_idx + 1) % self.trainer.val_check_batch == 0
-        return is_val_check_batch
-
-    def _build_kwargs(self, batch, batch_idx, opt_idx, hiddens):
-        # enable not needing to add opt_idx to training_step
-        step_kwargs = OrderedDict([('batch', batch), ('batch_idx', batch_idx)])
-
-        lightning_module = self.trainer.lightning_module
-
-        if len(self.trainer.optimizers) > 1:
-            training_step_fx = getattr(lightning_module, "training_step")
-            has_opt_idx_in_train_step = is_param_in_hook_signature(training_step_fx, "optimizer_idx")
-            if has_opt_idx_in_train_step:
-                if not lightning_module.automatic_optimization:
-                    self.warning_cache.warn(
-                        "`training_step` hook signature has changed in v1.3."
-                        " `optimizer_idx` argument has been removed in case of manual optimization. Support for"
-                        " the old signature will be removed in v1.5", DeprecationWarning
-                    )
-                step_kwargs['optimizer_idx'] = opt_idx
-            elif not has_opt_idx_in_train_step and self.trainer.lightning_module.automatic_optimization:
-                raise ValueError(
-                    f"Your LightningModule defines {len(self.trainer.optimizers)} optimizers but"
-                    ' `training_step` is missing the `optimizer_idx` argument.'
-                )
-
-        # pass hiddens if using tbptt
-        if self._truncated_bptt_enabled():
-            step_kwargs['hiddens'] = hiddens
-
-        return step_kwargs
-
-    def _truncated_bptt_enabled(self) -> bool:
-        """ Temporary tbptt utilities until this flag is fully migrated to the lightning module. """
-        return self._truncated_bptt_steps() > 0
-
-    def _truncated_bptt_steps(self) -> int:
-        lightning_module = self.trainer.lightning_module
-        # Give precedence to the LightningModule as the Trainer flag will be removed in v1.5
-        if lightning_module.truncated_bptt_steps > 0:
-            return lightning_module.truncated_bptt_steps
-        return self.trainer.truncated_bptt_steps or 0
-
-    def save_loggers_on_train_batch_end(self):
-        # when loggers should save to disk
-        should_flush_logs = self.trainer.logger_connector.should_flush_logs
-        if should_flush_logs and self.trainer.is_global_zero and self.trainer.logger is not None:
-            self.trainer.logger.save()
-
-    def run_train_split_start(self, split_idx, split_batch, opt_idx, optimizer):
-        # make sure only the gradients of the current optimizer's parameters are calculated
-        # in the training step to prevent dangling gradients in multiple-optimizer setup.
-        if self.trainer.lightning_module.automatic_optimization and len(self.trainer.optimizers) > 1:
-            model = self.trainer.lightning_module
-            model.toggle_optimizer(optimizer, opt_idx)
-
-        # use to track metrics internally
-        self.trainer.logger_connector.on_train_split_start(split_idx, opt_idx, split_batch)
-
-    def update_running_loss(self, current_loss: torch.Tensor) -> None:
-        if self.trainer.lightning_module.automatic_optimization:
-            # track total loss for logging (avoid mem leaks)
-            self.accumulated_loss.append(current_loss)
-
-        accumulated_loss = self.accumulated_loss.mean()
-
-        if accumulated_loss is not None:
-            # calculate running loss for display
-            self.running_loss.append(self.accumulated_loss.mean() * self.trainer.accumulate_grad_batches)
-
-        # reset for next set of accumulated grads
-        self.accumulated_loss.reset()
diff --git a/pytorch_lightning/trainer/training_tricks.py b/pytorch_lightning/trainer/training_tricks.py
index a45c9436dbdb7..beecc5e2a764d 100644
--- a/pytorch_lightning/trainer/training_tricks.py
+++ b/pytorch_lightning/trainer/training_tricks.py
@@ -18,7 +18,7 @@
 import torch
 from torch import Tensor
 
-from pytorch_lightning.core.lightning import LightningModule
+import pytorch_lightning as pl
 from pytorch_lightning.utilities import rank_zero_deprecation
 from pytorch_lightning.utilities.finite_checks import detect_nan_parameters, print_nan_gradients
 
@@ -34,7 +34,7 @@ class TrainerTrainingTricksMixin(ABC):
 
     # this is just a summary on variables used in this abstract class,
     #  the proper values/initialisation should be done in child class
-    lightning_module: LightningModule
+    lightning_module: 'pl.LightningModule'
 
     def print_nan_gradients(self) -> None:
         rank_zero_deprecation(
diff --git a/pytorch_lightning/tuner/batch_size_scaling.py b/pytorch_lightning/tuner/batch_size_scaling.py
index d114c36a60104..f23a7f883c5a2 100644
--- a/pytorch_lightning/tuner/batch_size_scaling.py
+++ b/pytorch_lightning/tuner/batch_size_scaling.py
@@ -17,7 +17,7 @@
 
 import pytorch_lightning as pl
 from pytorch_lightning.loggers.base import DummyLogger
-from pytorch_lightning.utilities import DeviceType, rank_zero_warn
+from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.cloud_io import get_filesystem
 from pytorch_lightning.utilities.data import has_len
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -83,7 +83,7 @@ def scale_batch_size(
 
     # Restore initial state of model
     if trainer.is_global_zero:
-        trainer.checkpoint_connector.restore(str(save_path), on_gpu=trainer._device_type == DeviceType.GPU)
+        trainer.checkpoint_connector.restore(str(save_path))
         fs = get_filesystem(str(save_path))
         if fs.exists(save_path):
             fs.rm(save_path)
@@ -115,8 +115,8 @@ def __scale_batch_dump_params(trainer: 'pl.Trainer') -> None:
 def __scale_batch_reset_params(trainer: 'pl.Trainer', model: 'pl.LightningModule', steps_per_trial: int) -> None:
     trainer.auto_scale_batch_size = None  # prevent recursion
     trainer.auto_lr_find = False  # avoid lr find being called multiple times
-    trainer.train_loop.current_epoch = 0
-    trainer.train_loop.max_steps = steps_per_trial  # take few steps
+    trainer.fit_loop.current_epoch = 0
+    trainer.fit_loop.max_steps = steps_per_trial  # take few steps
     trainer.weights_summary = None  # not needed before full run
     trainer.logger = DummyLogger()
     trainer.callbacks = []  # not needed before full run
@@ -127,8 +127,8 @@ def __scale_batch_reset_params(trainer: 'pl.Trainer', model: 'pl.LightningModule
 
 def __scale_batch_restore_params(trainer: 'pl.Trainer') -> None:
     trainer.auto_lr_find = trainer.__dumped_params['auto_lr_find']
-    trainer.train_loop.current_epoch = trainer.__dumped_params['current_epoch']
-    trainer.train_loop.max_steps = trainer.__dumped_params['max_steps']
+    trainer.fit_loop.current_epoch = trainer.__dumped_params['current_epoch']
+    trainer.fit_loop.max_steps = trainer.__dumped_params['max_steps']
     trainer.weights_summary = trainer.__dumped_params['weights_summary']
     trainer.logger = trainer.__dumped_params['logger']
     trainer.callbacks = trainer.__dumped_params['callbacks']
@@ -144,7 +144,7 @@ def _run_power_scaling(
     """ Batch scaling mode where the size is doubled at each iteration until an OOM error is encountered. """
     for _ in range(max_trials):
         garbage_collection_cuda()
-        trainer.train_loop.global_step = 0  # reset after each try
+        trainer.fit_loop.global_step = 0  # reset after each try
         try:
             # Try fit
             trainer.tuner._run(model)
@@ -178,7 +178,7 @@ def _run_binsearch_scaling(
     count = 0
     while True:
         garbage_collection_cuda()
-        trainer.train_loop.global_step = 0  # reset after each try
+        trainer.fit_loop.global_step = 0  # reset after each try
         try:
             # Try fit
             trainer.tuner._run(model)
diff --git a/pytorch_lightning/tuner/lr_finder.py b/pytorch_lightning/tuner/lr_finder.py
index 71d145d921ff4..29a93d3916aea 100644
--- a/pytorch_lightning/tuner/lr_finder.py
+++ b/pytorch_lightning/tuner/lr_finder.py
@@ -25,7 +25,7 @@
 import pytorch_lightning as pl
 from pytorch_lightning.callbacks import Callback
 from pytorch_lightning.loggers.base import DummyLogger
-from pytorch_lightning.utilities import DeviceType, rank_zero_warn
+from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.cloud_io import get_filesystem
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.parsing import lightning_hasattr, lightning_setattr
@@ -230,7 +230,7 @@ def lr_find(
     trainer.logger = DummyLogger()
 
     # Max step set to number of iterations
-    trainer.train_loop.max_steps = num_training
+    trainer.fit_loop.max_steps = num_training
 
     # Disable standard progress bar for fit
     if trainer.progress_bar_callback:
@@ -255,11 +255,11 @@ def lr_find(
 
     # Transfer results from callback to lr finder object
     lr_finder.results.update({'lr': trainer.callbacks[0].lrs, 'loss': trainer.callbacks[0].losses})
-    lr_finder._total_batch_idx = trainer.train_loop.total_batch_idx  # for debug purpose
+    lr_finder._total_batch_idx = trainer.fit_loop.total_batch_idx  # for debug purpose
 
     # Reset model state
     if trainer.is_global_zero:
-        trainer.checkpoint_connector.restore(str(save_path), on_gpu=trainer._device_type == DeviceType.GPU)
+        trainer.checkpoint_connector.restore(str(save_path))
         fs = get_filesystem(str(save_path))
         if fs.exists(save_path):
             fs.rm(save_path)
@@ -297,8 +297,8 @@ def __lr_finder_restore_params(trainer, model):
     trainer.auto_lr_find = trainer.__dumped_params['auto_lr_find']
     trainer.logger = trainer.__dumped_params['logger']
     trainer.callbacks = trainer.__dumped_params['callbacks']
-    trainer.train_loop.max_steps = trainer.__dumped_params['max_steps']
-    trainer.train_loop.current_epoch = trainer.__dumped_params['current_epoch']
+    trainer.fit_loop.max_steps = trainer.__dumped_params['max_steps']
+    trainer.fit_loop.current_epoch = trainer.__dumped_params['current_epoch']
     model.configure_optimizers = trainer.__dumped_params['configure_optimizers']
     del trainer.__dumped_params
 
@@ -340,7 +340,7 @@ def __init__(
 
     def on_batch_start(self, trainer, pl_module):
         """ Called before each training batch, logs the lr that will be used """
-        if (trainer.train_loop.batch_idx + 1) % trainer.accumulate_grad_batches != 0:
+        if (trainer.fit_loop.batch_idx + 1) % trainer.accumulate_grad_batches != 0:
             return
 
         if self.progress_bar_refresh_rate and self.progress_bar is None:
@@ -350,13 +350,13 @@ def on_batch_start(self, trainer, pl_module):
 
     def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
         """ Called when the training batch ends, logs the calculated loss """
-        if (trainer.train_loop.batch_idx + 1) % trainer.accumulate_grad_batches != 0:
+        if (trainer.fit_loop.batch_idx + 1) % trainer.accumulate_grad_batches != 0:
             return
 
         if self.progress_bar:
             self.progress_bar.update()
 
-        current_loss = trainer.train_loop.running_loss.last().item()
+        current_loss = trainer.fit_loop.running_loss.last().item()
         current_step = trainer.global_step
 
         # Avg loss (loss with momentum) + smoothing
@@ -366,7 +366,7 @@ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, data
         # Check if we diverging
         if self.early_stop_threshold is not None:
             if current_step > 1 and smoothed_loss > self.early_stop_threshold * self.best_loss:
-                trainer.train_loop.max_steps = current_step  # stop signal
+                trainer.fit_loop.max_steps = current_step  # stop signal
                 if self.progress_bar:
                     self.progress_bar.close()
 
diff --git a/pytorch_lightning/tuner/tuning.py b/pytorch_lightning/tuner/tuning.py
index a25b950ee3fca..449f9d862ecef 100644
--- a/pytorch_lightning/tuner/tuning.py
+++ b/pytorch_lightning/tuner/tuning.py
@@ -11,14 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, List, Optional, Union
-
-from torch.utils.data import DataLoader
+from typing import Any, Dict, Optional, Union
 
 import pytorch_lightning as pl
 from pytorch_lightning.trainer.states import TrainerStatus
 from pytorch_lightning.tuner.batch_size_scaling import scale_batch_size
 from pytorch_lightning.tuner.lr_finder import _LRFinder, lr_find
+from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS
 
 
 class Tuner:
@@ -67,14 +66,15 @@ def _run(self, *args: Any, **kwargs: Any) -> None:
     def scale_batch_size(
         self,
         model: 'pl.LightningModule',
-        train_dataloader: Optional[DataLoader] = None,
-        val_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
+        train_dataloaders: Optional[Union[TRAIN_DATALOADERS, 'pl.LightningDataModule']] = None,
+        val_dataloaders: Optional[EVAL_DATALOADERS] = None,
         datamodule: Optional['pl.LightningDataModule'] = None,
         mode: str = 'power',
         steps_per_trial: int = 3,
         init_val: int = 2,
         max_trials: int = 25,
         batch_arg_name: str = 'batch_size',
+        train_dataloader=None,  # noqa TODO: remove with 1.6
     ) -> Optional[int]:
         """
         Iteratively try to find the largest batch size for a given model
@@ -83,11 +83,11 @@ def scale_batch_size(
         Args:
             model: Model to tune.
 
-            train_dataloader: A Pytorch DataLoader with training samples. If the model has
-                a predefined train_dataloader method this will be skipped.
+            train_dataloaders: A collection of :class:`torch.utils.data.DataLoader` or a
+                :class:`~pytorch_lightning.core.datamodule.LightningDataModule` specifying training samples.
+                In the case of multiple dataloaders, please see this :ref:`page <multiple-training-dataloaders>`.
 
-            val_dataloaders: Either a single Pytorch Dataloader or a list of them, specifying validation samples.
-                If the model has a predefined val_dataloaders method this will be skipped
+            val_dataloaders: A :class:`torch.utils.data.DataLoader` or a sequence of them specifying validation samples.
 
             datamodule: An instance of :class:`~pytorch_lightning.core.datamodule.LightningDataModule`.
 
@@ -118,7 +118,8 @@ def scale_batch_size(
         self.trainer.auto_scale_batch_size = True
         result = self.trainer.tune(
             model,
-            train_dataloader=train_dataloader,
+            train_dataloaders=train_dataloaders,
+            train_dataloader=train_dataloader,  # TODO: deprecated - remove with 1.6
             val_dataloaders=val_dataloaders,
             datamodule=datamodule,
             scale_batch_size_kwargs={
@@ -135,8 +136,8 @@ def scale_batch_size(
     def lr_find(
         self,
         model: 'pl.LightningModule',
-        train_dataloader: Optional[DataLoader] = None,
-        val_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
+        train_dataloaders: Optional[Union[TRAIN_DATALOADERS, 'pl.LightningDataModule']] = None,
+        val_dataloaders: Optional[EVAL_DATALOADERS] = None,
         datamodule: Optional['pl.LightningDataModule'] = None,
         min_lr: float = 1e-8,
         max_lr: float = 1,
@@ -144,6 +145,7 @@ def lr_find(
         mode: str = 'exponential',
         early_stop_threshold: float = 4.0,
         update_attr: bool = False,
+        train_dataloader=None,  # noqa TODO: remove with 1.6
     ) -> Optional[_LRFinder]:
         """
         Enables the user to do a range test of good initial learning rates,
@@ -152,11 +154,11 @@ def lr_find(
         Args:
             model: Model to tune.
 
-            train_dataloader: A Pytorch DataLoader with training samples. If the model has
-                a predefined train_dataloader method this will be skipped.
+            train_dataloaders: A collection of :class:`torch.utils.data.DataLoader` or a
+                :class:`~pytorch_lightning.core.datamodule.LightningDataModule` specifying training samples.
+                In the case of multiple dataloaders, please see this :ref:`page <multiple-training-dataloaders>`.
 
-            val_dataloaders: Either a single Pytorch Dataloader or a list of them, specifying validation samples.
-                If the model has a predefined val_dataloaders method this will be skipped
+            val_dataloaders: A :class:`torch.utils.data.DataLoader` or a sequence of them specifying validation samples.
 
             datamodule: An instance of :class:`~pytorch_lightning.core.datamodule.LightningDataModule`.
 
@@ -185,7 +187,8 @@ def lr_find(
         self.trainer.auto_lr_find = True
         result = self.trainer.tune(
             model,
-            train_dataloader=train_dataloader,
+            train_dataloaders=train_dataloaders,
+            train_dataloader=train_dataloader,  # TODO: deprecated - remove with 1.6
             val_dataloaders=val_dataloaders,
             datamodule=datamodule,
             lr_find_kwargs={
diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py
index 6664be43bef88..536b36ceb81b0 100644
--- a/pytorch_lightning/utilities/__init__.py
+++ b/pytorch_lightning/utilities/__init__.py
@@ -16,13 +16,7 @@
 import numpy
 
 from pytorch_lightning.utilities.apply_func import move_data_to_device  # noqa: F401
-from pytorch_lightning.utilities.distributed import (  # noqa: F401
-    AllGatherGrad,
-    rank_zero_deprecation,
-    rank_zero_info,
-    rank_zero_only,
-    rank_zero_warn,
-)
+from pytorch_lightning.utilities.distributed import AllGatherGrad, rank_zero_info, rank_zero_only  # noqa: F401
 from pytorch_lightning.utilities.enums import (  # noqa: F401
     AMPType,
     DeviceType,
@@ -38,16 +32,16 @@
     _FAIRSCALE_AVAILABLE,
     _FAIRSCALE_FULLY_SHARDED_AVAILABLE,
     _FAIRSCALE_OSS_FP16_BROADCAST_AVAILABLE,
-    _FAIRSCALE_PIPE_AVAILABLE,
     _GROUP_AVAILABLE,
     _HOROVOD_AVAILABLE,
     _HYDRA_AVAILABLE,
     _HYDRA_EXPERIMENTAL_AVAILABLE,
+    _IPU_AVAILABLE,
     _IS_INTERACTIVE,
     _module_available,
     _NATIVE_AMP_AVAILABLE,
     _OMEGACONF_AVAILABLE,
-    _RPC_AVAILABLE,
+    _POPTORCH_AVAILABLE,
     _TORCH_GREATER_EQUAL_1_5,
     _TORCH_GREATER_EQUAL_1_6,
     _TORCH_GREATER_EQUAL_1_7,
@@ -61,6 +55,7 @@
     _XLA_AVAILABLE,
 )
 from pytorch_lightning.utilities.parsing import AttributeDict, flatten_dict, is_picklable  # noqa: F401
+from pytorch_lightning.utilities.warnings import rank_zero_deprecation, rank_zero_warn  # noqa: F401
 
 FLOAT16_EPSILON = numpy.finfo(numpy.float16).eps
 FLOAT32_EPSILON = numpy.finfo(numpy.float32).eps
diff --git a/pytorch_lightning/utilities/apply_func.py b/pytorch_lightning/utilities/apply_func.py
index 1cbab2fb8dee9..606eb37dd9730 100644
--- a/pytorch_lightning/utilities/apply_func.py
+++ b/pytorch_lightning/utilities/apply_func.py
@@ -11,8 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import dataclasses
 import operator
 from abc import ABC
+from collections import OrderedDict
 from collections.abc import Mapping, Sequence
 from copy import copy
 from functools import partial
@@ -54,12 +56,23 @@ def from_numpy(value, device: torch.device = None):
 ]
 
 
+def _is_namedtuple(obj: object) -> bool:
+    # https://github.com/pytorch/pytorch/blob/v1.8.1/torch/nn/parallel/scatter_gather.py#L4-L8
+    return isinstance(obj, tuple) and hasattr(obj, "_asdict") and hasattr(obj, "_fields")
+
+
+def _is_dataclass_instance(obj):
+    # https://docs.python.org/3/library/dataclasses.html#module-level-decorators-classes-and-functions
+    return dataclasses.is_dataclass(obj) and not isinstance(obj, type)
+
+
 def apply_to_collection(
     data: Any,
     dtype: Union[type, tuple],
     function: Callable,
     *args,
     wrong_dtype: Optional[Union[type, tuple]] = None,
+    include_none: bool = True,
     **kwargs
 ) -> Any:
     """
@@ -70,38 +83,108 @@ def apply_to_collection(
         dtype: the given function will be applied to all elements of this dtype
         function: the function to apply
         *args: positional arguments (will be forwarded to calls of ``function``)
-        wrong_dtype: the given function won't be applied if this type is specified and the given collections is of
-            the :attr:`wrong_type` even if it is of type :attr`dtype`
+        wrong_dtype: the given function won't be applied if this type is specified and the given collections
+            is of the ``wrong_dtype`` even if it is of type ``dtype``
+        include_none: Whether to include an element if the output of ``function`` is ``None``.
         **kwargs: keyword arguments (will be forwarded to calls of ``function``)
 
     Returns:
-        the resulting collection
+        The resulting collection
     """
-    elem_type = type(data)
-
     # Breaking condition
     if isinstance(data, dtype) and (wrong_dtype is None or not isinstance(data, wrong_dtype)):
         return function(data, *args, **kwargs)
 
+    elem_type = type(data)
+
     # Recursively apply to collection items
     if isinstance(data, Mapping):
+        out = []
+        for k, v in data.items():
+            v = apply_to_collection(v, dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs)
+            if include_none or v is not None:
+                out.append((k, v))
+        return elem_type(OrderedDict(out))
+
+    is_namedtuple = _is_namedtuple(data)
+    is_sequence = isinstance(data, Sequence) and not isinstance(data, str)
+    if is_namedtuple or is_sequence:
+        out = []
+        for d in data:
+            v = apply_to_collection(d, dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs)
+            if include_none or v is not None:
+                out.append(v)
+        return elem_type(*out) if is_namedtuple else elem_type(out)
+
+    if _is_dataclass_instance(data):
+        out = dict()
+        for field in data.__dataclass_fields__:
+            v = apply_to_collection(getattr(data, field), dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs)
+            if include_none or v is not None:
+                out[field] = v
+        return elem_type(**out)
+
+    # data is neither of dtype, nor a collection
+    return data
+
+
+def apply_to_collections(
+    data1: Optional[Any],
+    data2: Optional[Any],
+    dtype: Union[type, tuple],
+    function: Callable,
+    *args,
+    wrong_dtype: Optional[Union[type, tuple]] = None,
+    **kwargs
+) -> Any:
+    """
+    Zips two collections and applies a function to their items of a certain dtype.
+
+    Args:
+        data1: The first collection
+        data2: The second collection
+        dtype: the given function will be applied to all elements of this dtype
+        function: the function to apply
+        *args: positional arguments (will be forwarded to calls of ``function``)
+        wrong_dtype: the given function won't be applied if this type is specified and the given collections
+            is of the ``wrong_dtype`` even if it is of type ``dtype``
+        **kwargs: keyword arguments (will be forwarded to calls of ``function``)
+
+    Returns:
+        The resulting collection
+
+    Raises:
+        AssertionError:
+            If sequence collections have different data sizes.
+    """
+    if data1 is None and data2 is not None:
+        # in case they were passed reversed
+        data1, data2 = data2, None
+
+    elem_type = type(data1)
+
+    if isinstance(data1, dtype) and data2 is not None and (wrong_dtype is None or not isinstance(data1, wrong_dtype)):
+        return function(data1, data2, *args, **kwargs)
+
+    if isinstance(data1, Mapping) and data2 is not None:
+        # use union because we want to fail if a key does not exist in both
+        zipped = {k: (data1[k], data2[k]) for k in data1.keys() | data2.keys()}
         return elem_type({
-            k: apply_to_collection(v, dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs)
-            for k, v in data.items()
+            k: apply_to_collections(*v, dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs)
+            for k, v in zipped.items()
         })
 
-    if isinstance(data, tuple) and hasattr(data, '_fields'):  # named tuple
-        return elem_type(
-            *(apply_to_collection(d, dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs) for d in data)
-        )
+    is_namedtuple = _is_namedtuple(data1)
+    is_sequence = isinstance(data1, Sequence) and not isinstance(data1, str)
+    if (is_namedtuple or is_sequence) and data2 is not None:
+        assert len(data1) == len(data2), 'Sequence collections have different sizes'
+        out = [
+            apply_to_collections(v1, v2, dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs)
+            for v1, v2 in zip(data1, data2)
+        ]
+        return elem_type(*out) if is_namedtuple else elem_type(out)
 
-    if isinstance(data, Sequence) and not isinstance(data, str):
-        return elem_type([
-            apply_to_collection(d, dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs) for d in data
-        ])
-
-    # data is neither of dtype, nor a collection
-    return data
+    return apply_to_collection(data1, dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs)
 
 
 class TransferableDataType(ABC):
@@ -168,15 +251,15 @@ def batch_to(data):
     return apply_to_collection(batch, dtype=dtype, function=batch_to)
 
 
-def convert_to_tensors(data, device: torch.device = None):
+def convert_to_tensors(data: Any, device: torch.device) -> Any:
     if device is None:
-        raise MisconfigurationException("device (torch.device) should be provided.")
+        raise MisconfigurationException("`torch.device` should be provided.")
 
     for src_dtype, conversion_func in CONVERSION_DTYPES:
-        data = apply_to_collection(data, src_dtype, partial(conversion_func, device=device))
+        data = apply_to_collection(data, src_dtype, conversion_func, device=device)
 
-    def _move_to_device_and_make_contiguous(t: torch.Tensor, device: torch.device):
+    def _move_to_device_and_make_contiguous(t: torch.Tensor, device: torch.device) -> torch.Tensor:
         return t.to(device).contiguous()
 
-    data = apply_to_collection(data, torch.Tensor, partial(_move_to_device_and_make_contiguous, device=device))
+    data = apply_to_collection(data, torch.Tensor, _move_to_device_and_make_contiguous, device=device)
     return data
diff --git a/pytorch_lightning/utilities/argparse.py b/pytorch_lightning/utilities/argparse.py
index 6f91397bd0306..aebbcb41ac34f 100644
--- a/pytorch_lightning/utilities/argparse.py
+++ b/pytorch_lightning/utilities/argparse.py
@@ -46,7 +46,7 @@ def from_argparse_args(cls, args: Union[Namespace, ArgumentParser], **kwargs):
 
     # we only want to pass in valid Trainer args, the rest may be user specific
     valid_kwargs = inspect.signature(cls.__init__).parameters
-    trainer_kwargs = dict((name, params[name]) for name in valid_kwargs if name in params)
+    trainer_kwargs = {name: params[name] for name in valid_kwargs if name in params}
     trainer_kwargs.update(**kwargs)
 
     return cls(**trainer_kwargs)
@@ -139,9 +139,8 @@ def _get_abbrev_qualified_cls_name(cls):
     if cls.__module__.startswith("pytorch_lightning."):
         # Abbreviate.
         return f"pl.{cls.__name__}"
-    else:
-        # Fully qualified.
-        return f"{cls.__module__}.{cls.__qualname__}"
+    # Fully qualified.
+    return f"{cls.__module__}.{cls.__qualname__}"
 
 
 def add_argparse_args(
@@ -169,6 +168,10 @@ def add_argparse_args(
     Only arguments of the allowed types (str, float, int, bool) will
     extend the ``parent_parser``.
 
+    Raises:
+        RuntimeError:
+            If ``parent_parser`` is not an ``ArgumentParser`` instance
+
     Examples:
 
         # Option 1: Default usage.
@@ -254,8 +257,7 @@ def add_argparse_args(
 
     if use_argument_group:
         return parent_parser
-    else:
-        return parser
+    return parser
 
 
 def _parse_args_from_docstring(docstring: str) -> Dict[str, str]:
@@ -284,8 +286,7 @@ def _parse_args_from_docstring(docstring: str) -> Dict[str, str]:
 def _gpus_allowed_type(x) -> Union[int, str]:
     if ',' in x:
         return str(x)
-    else:
-        return int(x)
+    return int(x)
 
 
 def _gpus_arg_default(x) -> Union[int, str]:  # pragma: no-cover
@@ -298,5 +299,4 @@ def _gpus_arg_default(x) -> Union[int, str]:  # pragma: no-cover
 def _int_or_float_type(x) -> Union[int, float]:
     if '.' in str(x):
         return float(x)
-    else:
-        return int(x)
+    return int(x)
diff --git a/pytorch_lightning/utilities/cli.py b/pytorch_lightning/utilities/cli.py
index 5dccad4ab9135..0edc50c14e8c4 100644
--- a/pytorch_lightning/utilities/cli.py
+++ b/pytorch_lightning/utilities/cli.py
@@ -12,15 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+import warnings
 from argparse import Namespace
-from typing import Any, Dict, Optional, Type, Union
+from types import MethodType
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
+
+from torch.optim import Optimizer
 
 from pytorch_lightning.callbacks import Callback
 from pytorch_lightning.core.datamodule import LightningDataModule
 from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.trainer.trainer import Trainer
 from pytorch_lightning.utilities import _module_available
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.types import LRSchedulerType, LRSchedulerTypeTuple
 
 _JSONARGPARSE_AVAILABLE = _module_available("jsonargparse")
 if _JSONARGPARSE_AVAILABLE:
@@ -33,7 +40,7 @@
 class LightningArgumentParser(ArgumentParser):
     """Extension of jsonargparse's ArgumentParser for pytorch-lightning"""
 
-    def __init__(self, *args, parse_as_dict: bool = True, **kwargs) -> None:
+    def __init__(self, *args: Any, parse_as_dict: bool = True, **kwargs: Any) -> None:
         """Initialize argument parser that supports configuration file input
 
         For full details of accepted arguments see `ArgumentParser.__init__
@@ -48,44 +55,124 @@ def __init__(self, *args, parse_as_dict: bool = True, **kwargs) -> None:
         self.add_argument(
             '--config', action=ActionConfigFile, help='Path to a configuration file in json or yaml format.'
         )
+        self.callback_keys: List[str] = []
+        self.optimizers_and_lr_schedulers: Dict[str, Tuple[Union[Type, Tuple[Type, ...]], str]] = {}
 
     def add_lightning_class_args(
         self,
-        lightning_class: Union[Type[Trainer], Type[LightningModule], Type[LightningDataModule]],
+        lightning_class: Union[Type[Trainer], Type[LightningModule], Type[LightningDataModule], Type[Callback]],
         nested_key: str,
         subclass_mode: bool = False
-    ) -> None:
+    ) -> List[str]:
         """
         Adds arguments from a lightning class to a nested key of the parser
 
         Args:
-            lightning_class: Any subclass of {Trainer,LightningModule,LightningDataModule}.
+            lightning_class: Any subclass of {Trainer, LightningModule, LightningDataModule, Callback}.
             nested_key: Name of the nested namespace to store arguments.
             subclass_mode: Whether allow any subclass of the given class.
         """
-        assert issubclass(lightning_class, (Trainer, LightningModule, LightningDataModule))
+        assert issubclass(lightning_class, (Trainer, LightningModule, LightningDataModule, Callback))
+        if issubclass(lightning_class, Callback):
+            self.callback_keys.append(nested_key)
         if subclass_mode:
             return self.add_subclass_arguments(lightning_class, nested_key, required=True)
-        return self.add_class_arguments(lightning_class, nested_key, fail_untyped=False)
+        return self.add_class_arguments(
+            lightning_class,
+            nested_key,
+            fail_untyped=False,
+            instantiate=not issubclass(lightning_class, Trainer),
+        )
+
+    def add_optimizer_args(
+        self,
+        optimizer_class: Union[Type[Optimizer], Tuple[Type[Optimizer], ...]],
+        nested_key: str = 'optimizer',
+        link_to: str = 'AUTOMATIC',
+    ) -> None:
+        """
+        Adds arguments from an optimizer class to a nested key of the parser
+
+        Args:
+            optimizer_class: Any subclass of torch.optim.Optimizer.
+            nested_key: Name of the nested namespace to store arguments.
+            link_to: Dot notation of a parser key to set arguments or AUTOMATIC.
+        """
+        if isinstance(optimizer_class, tuple):
+            assert all(issubclass(o, Optimizer) for o in optimizer_class)
+        else:
+            assert issubclass(optimizer_class, Optimizer)
+        kwargs = {
+            'instantiate': False,
+            'fail_untyped': False,
+            'skip': {'params'},
+        }
+        if isinstance(optimizer_class, tuple):
+            self.add_subclass_arguments(optimizer_class, nested_key, required=True, **kwargs)
+        else:
+            self.add_class_arguments(optimizer_class, nested_key, **kwargs)
+        self.optimizers_and_lr_schedulers[nested_key] = (optimizer_class, link_to)
+
+    def add_lr_scheduler_args(
+        self,
+        lr_scheduler_class: Union[LRSchedulerType, Tuple[LRSchedulerType, ...]],
+        nested_key: str = 'lr_scheduler',
+        link_to: str = 'AUTOMATIC',
+    ) -> None:
+        """
+        Adds arguments from a learning rate scheduler class to a nested key of the parser
+
+        Args:
+            lr_scheduler_class: Any subclass of ``torch.optim.lr_scheduler.{_LRScheduler, ReduceLROnPlateau}``.
+            nested_key: Name of the nested namespace to store arguments.
+            link_to: Dot notation of a parser key to set arguments or AUTOMATIC.
+        """
+        if isinstance(lr_scheduler_class, tuple):
+            assert all(issubclass(o, LRSchedulerTypeTuple) for o in lr_scheduler_class)
+        else:
+            assert issubclass(lr_scheduler_class, LRSchedulerTypeTuple)
+        kwargs = {
+            'instantiate': False,
+            'fail_untyped': False,
+            'skip': {'optimizer'},
+        }
+        if isinstance(lr_scheduler_class, tuple):
+            self.add_subclass_arguments(lr_scheduler_class, nested_key, required=True, **kwargs)
+        else:
+            self.add_class_arguments(lr_scheduler_class, nested_key, **kwargs)
+        self.optimizers_and_lr_schedulers[nested_key] = (lr_scheduler_class, link_to)
 
 
 class SaveConfigCallback(Callback):
-    """Saves a LightningCLI config to the log_dir when training starts"""
+    """Saves a LightningCLI config to the log_dir when training starts
+
+    Raises:
+        RuntimeError: If the config file already exists in the directory to avoid overwriting a previous run
+    """
 
     def __init__(
         self,
         parser: LightningArgumentParser,
         config: Union[Namespace, Dict[str, Any]],
-        config_filename: str = 'config.yaml'
+        config_filename: str,
+        overwrite: bool = False,
     ) -> None:
         self.parser = parser
         self.config = config
         self.config_filename = config_filename
+        self.overwrite = overwrite
 
     def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
         log_dir = trainer.log_dir or trainer.default_root_dir
         config_path = os.path.join(log_dir, self.config_filename)
-        self.parser.save(self.config, config_path, skip_none=False)
+        if not self.overwrite and os.path.isfile(config_path):
+            raise RuntimeError(
+                f'{self.__class__.__name__} expected {config_path} to NOT exist. Aborting to avoid overwriting'
+                ' results of a previous run. You can delete the previous config file,'
+                ' set `LightningCLI(save_config_callback=None)` to disable config saving,'
+                ' or set `LightningCLI(save_config_overwrite=True)` to overwrite the config file.'
+            )
+        self.parser.save(self.config, config_path, skip_none=False, overwrite=self.overwrite)
 
 
 class LightningCLI:
@@ -95,7 +182,9 @@ def __init__(
         self,
         model_class: Type[LightningModule],
         datamodule_class: Type[LightningDataModule] = None,
-        save_config_callback: Type[SaveConfigCallback] = SaveConfigCallback,
+        save_config_callback: Optional[Type[SaveConfigCallback]] = SaveConfigCallback,
+        save_config_filename: str = 'config.yaml',
+        save_config_overwrite: bool = False,
         trainer_class: Type[Trainer] = Trainer,
         trainer_defaults: Dict[str, Any] = None,
         seed_everything_default: int = None,
@@ -132,6 +221,8 @@ def __init__(
             model_class: :class:`~pytorch_lightning.core.lightning.LightningModule` class to train on.
             datamodule_class: An optional :class:`~pytorch_lightning.core.datamodule.LightningDataModule` class.
             save_config_callback: A callback class to save the training config.
+            save_config_filename: Filename for the config file.
+            save_config_overwrite: Whether to overwrite an existing config file.
             trainer_class: An optional subclass of the :class:`~pytorch_lightning.trainer.trainer.Trainer` class.
             trainer_defaults: Set to override Trainer defaults or add persistent callbacks.
             seed_everything_default: Default value for the :func:`~pytorch_lightning.utilities.seed.seed_everything`
@@ -154,6 +245,8 @@ def __init__(
         self.model_class = model_class
         self.datamodule_class = datamodule_class
         self.save_config_callback = save_config_callback
+        self.save_config_filename = save_config_filename
+        self.save_config_overwrite = save_config_overwrite
         self.trainer_class = trainer_class
         self.trainer_defaults = {} if trainer_defaults is None else trainer_defaults
         self.seed_everything_default = seed_everything_default
@@ -165,11 +258,13 @@ def __init__(
         self.init_parser()
         self.add_core_arguments_to_parser()
         self.add_arguments_to_parser(self.parser)
+        self.link_optimizers_and_lr_schedulers()
         self.parse_arguments()
         if self.config['seed_everything'] is not None:
             seed_everything(self.config['seed_everything'], workers=True)
         self.before_instantiate_classes()
         self.instantiate_classes()
+        self.add_configure_optimizers_method_to_model()
         self.prepare_fit_kwargs()
         self.before_fit()
         self.fit()
@@ -201,6 +296,17 @@ def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
             parser: The argument parser object to which arguments can be added
         """
 
+    def link_optimizers_and_lr_schedulers(self) -> None:
+        """Creates argument links for optimizers and lr_schedulers that specified a link_to"""
+        for key, (class_type, link_to) in self.parser.optimizers_and_lr_schedulers.items():
+            if link_to == 'AUTOMATIC':
+                continue
+            if isinstance(class_type, tuple):
+                self.parser.link_arguments(key, link_to)
+            else:
+                add_class_path = _add_class_path_generator(class_type)
+                self.parser.link_arguments(key, link_to, compute_fn=add_class_path)
+
     def parse_arguments(self) -> None:
         """Parses command line arguments and stores it in self.config"""
         self.config = self.parser.parse_args()
@@ -210,40 +316,89 @@ def before_instantiate_classes(self) -> None:
 
     def instantiate_classes(self) -> None:
         """Instantiates the classes using settings from self.config"""
-        self.config_init = self.parser.instantiate_subclasses(self.config)
-        self.instantiate_datamodule()
-        self.instantiate_model()
+        self.config_init = self.parser.instantiate_classes(self.config)
+        self.datamodule = self.config_init.get('data')
+        self.model = self.config_init['model']
         self.instantiate_trainer()
 
-    def instantiate_datamodule(self) -> None:
-        """Instantiates the datamodule using self.config_init['data'] if given"""
-        if self.datamodule_class is None:
-            self.datamodule = None
-        elif self.subclass_mode_data:
-            self.datamodule = self.config_init['data']
-        else:
-            self.datamodule = self.datamodule_class(**self.config_init.get('data', {}))
-
-    def instantiate_model(self) -> None:
-        """Instantiates the model using self.config_init['model']"""
-        if self.subclass_mode_model:
-            self.model = self.config_init['model']
-        else:
-            self.model = self.model_class(**self.config_init.get('model', {}))
-
     def instantiate_trainer(self) -> None:
         """Instantiates the trainer using self.config_init['trainer']"""
         if self.config_init['trainer'].get('callbacks') is None:
             self.config_init['trainer']['callbacks'] = []
+        callbacks = [self.config_init[c] for c in self.parser.callback_keys]
+        self.config_init['trainer']['callbacks'].extend(callbacks)
         if 'callbacks' in self.trainer_defaults:
             if isinstance(self.trainer_defaults['callbacks'], list):
                 self.config_init['trainer']['callbacks'].extend(self.trainer_defaults['callbacks'])
             else:
                 self.config_init['trainer']['callbacks'].append(self.trainer_defaults['callbacks'])
-        if self.save_config_callback is not None:
-            self.config_init['trainer']['callbacks'].append(self.save_config_callback(self.parser, self.config))
+        if self.save_config_callback and not self.config_init['trainer']['fast_dev_run']:
+            config_callback = self.save_config_callback(
+                self.parser, self.config, self.save_config_filename, overwrite=self.save_config_overwrite
+            )
+            self.config_init['trainer']['callbacks'].append(config_callback)
         self.trainer = self.trainer_class(**self.config_init['trainer'])
 
+    def add_configure_optimizers_method_to_model(self) -> None:
+        """
+        Adds to the model an automatically generated configure_optimizers method
+
+        If a single optimizer and optionally a scheduler argument groups are added to the parser as 'AUTOMATIC',
+        then a `configure_optimizers` method is automatically implemented in the model class.
+        """
+
+        def get_automatic(class_type: Union[Type, Tuple[Type, ...]]) -> List[str]:
+            automatic = []
+            for key, (base_class, link_to) in self.parser.optimizers_and_lr_schedulers.items():
+                if not isinstance(base_class, tuple):
+                    base_class = (base_class, )
+                if link_to == 'AUTOMATIC' and any(issubclass(c, class_type) for c in base_class):
+                    automatic.append(key)
+            return automatic
+
+        optimizers = get_automatic(Optimizer)
+        lr_schedulers = get_automatic(LRSchedulerTypeTuple)
+
+        if len(optimizers) == 0:
+            return
+
+        if len(optimizers) > 1 or len(lr_schedulers) > 1:
+            raise MisconfigurationException(
+                f"`{self.__class__.__name__}.add_configure_optimizers_method_to_model` expects at most one optimizer "
+                f"and one lr_scheduler to be 'AUTOMATIC', but found {optimizers+lr_schedulers}. In this case the user "
+                "is expected to link the argument groups and implement `configure_optimizers`, see "
+                "https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_cli.html"
+                "#optimizers-and-learning-rate-schedulers"
+            )
+
+        if is_overridden('configure_optimizers', self.model):
+            warnings.warn(
+                f"`{self.model.__class__.__name__}.configure_optimizers` will be overridden by "
+                f"`{self.__class__.__name__}.add_configure_optimizers_method_to_model`."
+            )
+
+        optimizer_class = self.parser.optimizers_and_lr_schedulers[optimizers[0]][0]
+        optimizer_init = self.config_init.get(optimizers[0], {})
+        if not isinstance(optimizer_class, tuple):
+            optimizer_init = _global_add_class_path(optimizer_class, optimizer_init)
+        lr_scheduler_init = None
+        if lr_schedulers:
+            lr_scheduler_class = self.parser.optimizers_and_lr_schedulers[lr_schedulers[0]][0]
+            lr_scheduler_init = self.config_init.get(lr_schedulers[0], {})
+            if not isinstance(lr_scheduler_class, tuple):
+                lr_scheduler_init = _global_add_class_path(lr_scheduler_class, lr_scheduler_init)
+
+        def configure_optimizers(
+            self: LightningModule
+        ) -> Union[Optimizer, Tuple[List[Optimizer], List[LRSchedulerType]]]:
+            optimizer = instantiate_class(self.parameters(), optimizer_init)
+            if not lr_scheduler_init:
+                return optimizer
+            lr_scheduler = instantiate_class(optimizer, lr_scheduler_init)
+            return [optimizer], [lr_scheduler]
+
+        self.model.configure_optimizers = MethodType(configure_optimizers, self.model)
+
     def prepare_fit_kwargs(self) -> None:
         """Prepares fit_kwargs including datamodule using self.config_init['data'] if given"""
         self.fit_kwargs = {'model': self.model}
@@ -259,3 +414,37 @@ def fit(self) -> None:
 
     def after_fit(self) -> None:
         """Implement to run some code after fit has finished"""
+
+
+def _global_add_class_path(class_type: Type, init_args: Dict[str, Any]) -> Dict[str, Any]:
+    return {
+        'class_path': class_type.__module__ + '.' + class_type.__name__,
+        'init_args': init_args,
+    }
+
+
+def _add_class_path_generator(class_type: Type) -> Callable[[Dict[str, Any]], Dict[str, Any]]:
+
+    def add_class_path(init_args: Dict[str, Any]) -> Dict[str, Any]:
+        return _global_add_class_path(class_type, init_args)
+
+    return add_class_path
+
+
+def instantiate_class(args: Union[Any, Tuple[Any, ...]], init: Dict[str, Any]) -> Any:
+    """Instantiates a class with the given args and init.
+
+    Args:
+        args: Positional arguments required for instantiation.
+        init: Dict of the form {"class_path":...,"init_args":...}.
+
+    Returns:
+        The instantiated class object.
+    """
+    kwargs = init.get('init_args', {})
+    if not isinstance(args, tuple):
+        args = (args, )
+    class_module, class_name = init['class_path'].rsplit('.', 1)
+    module = __import__(class_module, fromlist=[class_name])
+    args_class = getattr(module, class_name)
+    return args_class(*args, **kwargs)
diff --git a/pytorch_lightning/utilities/cloud_io.py b/pytorch_lightning/utilities/cloud_io.py
index 9e8240981feda..6bd6a172a7a41 100644
--- a/pytorch_lightning/utilities/cloud_io.py
+++ b/pytorch_lightning/utilities/cloud_io.py
@@ -38,9 +38,8 @@ def get_filesystem(path: Union[str, Path]):
     if "://" in path:
         # use the fileystem from the protocol specified
         return fsspec.filesystem(path.split(":", 1)[0])
-    else:
-        # use local filesystem
-        return LocalFileSystem()
+    # use local filesystem
+    return LocalFileSystem()
 
 
 def atomic_save(checkpoint, filepath: str):
diff --git a/pytorch_lightning/utilities/data.py b/pytorch_lightning/utilities/data.py
index 27345fda3b110..9d36206748197 100644
--- a/pytorch_lightning/utilities/data.py
+++ b/pytorch_lightning/utilities/data.py
@@ -24,8 +24,14 @@ def has_iterable_dataset(dataloader: DataLoader):
 
 
 def has_len(dataloader: DataLoader) -> bool:
-    """ Checks if a given Dataloader has __len__ method implemented i.e. if
-    it is a finite dataloader or infinite dataloader. """
+    """
+    Checks if a given Dataloader has ``__len__`` method implemented i.e. if
+    it is a finite dataloader or infinite dataloader.
+
+    Raises:
+        ValueError:
+            If the length of Dataloader is 0, as it requires at least one batch
+    """
 
     try:
         # try getting the length
diff --git a/pytorch_lightning/utilities/debugging.py b/pytorch_lightning/utilities/debugging.py
index 56833fd03735a..b4388bf89c195 100644
--- a/pytorch_lightning/utilities/debugging.py
+++ b/pytorch_lightning/utilities/debugging.py
@@ -39,8 +39,6 @@ class InternalDebugger(object):
     def __init__(self, trainer):
         self.enabled = os.environ.get('PL_DEV_DEBUG', '0') == '1'
         self.trainer = trainer
-        self.logged_metrics = []
-        self.pbar_added_metrics = []
         self.saved_train_losses = []
         self.saved_val_losses = []
         self.saved_test_losses = []
@@ -53,6 +51,7 @@ def __init__(self, trainer):
         self.test_dataloader_calls = []
         self.dataloader_sequence_calls = []
 
+    @enabled_only
     def track_event(
         self,
         evt_type: str,
@@ -110,11 +109,6 @@ def track_load_dataloader_call(self, name, dataloaders):
         elif 'test' in name:
             self.test_dataloader_calls.append(values)
 
-    @enabled_only
-    def track_logged_metrics_history(self, scalar_metrics):
-        scalar_metrics['global_step'] = self.trainer.global_step
-        self.logged_metrics.append(scalar_metrics)
-
     @enabled_only
     def track_train_loss_history(self, batch_idx, loss):
         loss_dict = {'batch_idx': batch_idx, 'epoch': self.trainer.current_epoch, 'loss': loss.detach()}
@@ -151,11 +145,6 @@ def track_eval_loss_history(self, batch_idx, dataloader_idx, output):
         else:
             self.saved_val_losses.append(loss_dict)
 
-    @enabled_only
-    def track_pbar_metrics_history(self, metrics):
-        metrics['debug_epoch'] = self.trainer.current_epoch
-        self.pbar_added_metrics.append(metrics)
-
     @enabled_only
     def track_early_stopping_history(self, callback, current):
         debug_dict = {
diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index 511a91326953d..ffa11c053f83a 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -16,7 +16,8 @@
 
 import torch
 
-from pytorch_lightning.utilities import _TPU_AVAILABLE, rank_zero_warn
+from pytorch_lightning.plugins.environments import TorchElasticEnvironment
+from pytorch_lightning.utilities import _TPU_AVAILABLE, rank_zero_deprecation
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _compare_version
 
@@ -28,6 +29,12 @@ def determine_root_gpu_device(gpus: List[int]) -> Optional[int]:
 
     Returns:
         designated root GPU device id
+
+    Raises:
+        TypeError:
+            If ``gpus`` is not a list
+        AssertionError:
+            If GPU list is empty
     """
     if gpus is None:
         return None
@@ -78,6 +85,11 @@ def parse_gpu_ids(gpus: Optional[Union[int, str, List[int]]]) -> Optional[List[i
     gpus = _normalize_parse_gpu_input_to_list(gpus)
     if not gpus:
         raise MisconfigurationException("GPUs requested but none are available.")
+
+    if TorchElasticEnvironment.is_using_torchelastic() and len(gpus) != 1 and len(_get_all_available_gpus()) == 1:
+        # omit sanity check on torchelastic as by default shows one visible GPU per process
+        return gpus
+
     gpus = _sanitize_gpu_ids(gpus)
 
     return gpus
@@ -96,6 +108,10 @@ def parse_tpu_cores(tpu_cores: Union[int, str, List]) -> Optional[Union[List[int
 
     Returns:
         a list of tpu_cores to be used or ``None`` if no TPU cores were requested
+
+    Raises:
+        MisconfigurationException:
+            If TPU cores aren't 1 or 8 cores, or no TPU devices are found
     """
     _check_data_type(tpu_cores)
 
@@ -116,20 +132,18 @@ def _normalize_parse_gpu_string_input(s: Union[int, str, List[int]]) -> Union[in
         return s
     if s == '-1':
         return -1
-    elif ',' in s:
+    if ',' in s:
         return [int(x.strip()) for x in s.split(',') if len(x) > 0]
-    else:
-        num_gpus = int(s.strip())
-        if _compare_version("pytorch_lightning", operator.lt, "1.5"):
-            rank_zero_warn(
-                f"Parsing of the Trainer argument gpus='{s}' (string) will change in the future."
-                " In the current version of Lightning, this will select"
-                f" CUDA device with index {num_gpus}, but from v1.5 it will select gpus"
-                f" {list(range(num_gpus))} (same as gpus={s} (int)).",
-                DeprecationWarning,
-            )
-            return [num_gpus]
-        return num_gpus
+    num_gpus = int(s.strip())
+    if _compare_version("pytorch_lightning", operator.lt, "1.5"):
+        rank_zero_deprecation(
+            f"Parsing of the Trainer argument gpus='{s}' (string) will change in the future."
+            " In the current version of Lightning, this will select"
+            f" CUDA device with index {num_gpus}, but from v1.5 it will select gpus"
+            f" {list(range(num_gpus))} (same as gpus={s} (int)).",
+        )
+        return [num_gpus]
+    return num_gpus
 
 
 def _sanitize_gpu_ids(gpus: List[int]) -> List[int]:
@@ -142,6 +156,10 @@ def _sanitize_gpu_ids(gpus: List[int]) -> List[int]:
 
     Returns:
         unmodified gpus variable
+
+    Raises:
+        MisconfigurationException:
+            If machine has fewer available GPUs than requested.
     """
     all_available_gpus = _get_all_available_gpus()
     for gpu in gpus:
@@ -181,6 +199,10 @@ def _check_data_type(device_ids: Any) -> None:
 
     Args:
         device_ids: gpus/tpu_cores parameter as passed to the Trainer
+
+    Raises:
+        MisconfigurationException:
+            If ``device_ids`` of GPU/TPUs aren't ``int``, ``str``, sequence of ``int`` or ``None``
     """
     if device_ids is not None and \
             (not isinstance(device_ids, (int, str, MutableSequence, tuple)) or isinstance(device_ids, bool)):
diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py
index a54d00a983d9e..6ca2de7eb2ca2 100644
--- a/pytorch_lightning/utilities/distributed.py
+++ b/pytorch_lightning/utilities/distributed.py
@@ -14,8 +14,8 @@
 
 import logging
 import os
-import warnings
-from functools import partial, wraps
+from functools import wraps
+from platform import python_version
 from typing import Any, Optional, Union
 
 import torch
@@ -65,22 +65,44 @@ def _get_rank() -> int:
 rank_zero_only.rank = getattr(rank_zero_only, 'rank', _get_rank())
 
 
-def _warn(*args, **kwargs):
-    warnings.warn(*args, **kwargs)
+def rank_zero_warn(*args, stacklevel: int = 5, **kwargs):
+    from pytorch_lightning.utilities.warnings import rank_zero_deprecation, rank_zero_warn
+    rank_zero_deprecation(
+        '`pytorch_lightning.utilities.distributed.rank_zero_warn` has been moved to'
+        ' `pytorch_lightning.utilities.rank_zero_warn` in v1.3.7 and will be removed in v1.6'
+    )
+    return rank_zero_warn(*args, stacklevel=stacklevel, **kwargs)
+
 
+def rank_zero_deprecation(*args, stacklevel: int = 5, **kwargs):
+    from pytorch_lightning.utilities.warnings import rank_zero_deprecation
+    rank_zero_deprecation(
+        '`pytorch_lightning.utilities.distributed.rank_zero_deprecation` has been moved to'
+        ' `pytorch_lightning.utilities.rank_zero_deprecation` in v1.3.7 and will be removed in v1.6'
+    )
+    return rank_zero_deprecation(*args, stacklevel=stacklevel, **kwargs)
 
-def _info(*args, **kwargs):
+
+def _info(*args, stacklevel: int = 2, **kwargs):
+    if python_version() >= "3.8.0":
+        kwargs['stacklevel'] = stacklevel
     log.info(*args, **kwargs)
 
 
-def _debug(*args, **kwargs):
+def _debug(*args, stacklevel: int = 2, **kwargs):
+    if python_version() >= "3.8.0":
+        kwargs['stacklevel'] = stacklevel
     log.debug(*args, **kwargs)
 
 
-rank_zero_debug = rank_zero_only(_debug)
-rank_zero_info = rank_zero_only(_info)
-rank_zero_warn = rank_zero_only(_warn)
-rank_zero_deprecation = partial(rank_zero_warn, category=DeprecationWarning)
+@rank_zero_only
+def rank_zero_debug(*args, stacklevel: int = 4, **kwargs):
+    _debug(*args, stacklevel=stacklevel, **kwargs)
+
+
+@rank_zero_only
+def rank_zero_info(*args, stacklevel: int = 4, **kwargs):
+    _info(*args, stacklevel=stacklevel, **kwargs)
 
 
 def gather_all_tensors(result: Union[torch.Tensor], group: Optional[Any] = None):
@@ -113,6 +135,10 @@ def gather_all_tensors(result: Union[torch.Tensor], group: Optional[Any] = None)
     return gathered_result
 
 
+def distributed_available() -> bool:
+    return torch.distributed.is_available() and torch.distributed.is_initialized() or tpu_distributed()
+
+
 def sync_ddp_if_available(
     result: Union[torch.Tensor],
     group: Optional[Any] = None,
@@ -129,7 +155,7 @@ def sync_ddp_if_available(
     Return:
         reduced value
     """
-    if torch.distributed.is_available() and torch.distributed.is_initialized():
+    if distributed_available():
         return sync_ddp(result, group=group, reduce_op=reduce_op)
     return result
 
@@ -208,12 +234,11 @@ def all_gather_ddp_if_available(
         A tensor of shape (world_size, batch, ...)
     """
     group = group if group is not None else torch.distributed.group.WORLD
-    if torch.distributed.is_available() and torch.distributed.is_initialized():
+    if distributed_available():
         if sync_grads:
             return AllGatherGrad.apply(tensor, group)
-        else:
-            with torch.no_grad():
-                return AllGatherGrad.apply(tensor, group)
+        with torch.no_grad():
+            return AllGatherGrad.apply(tensor, group)
     return tensor
 
 
@@ -294,6 +319,7 @@ def register_ddp_comm_hook(
             ddp_comm_wrapper=default.fp16_compress_wrapper,
         )
     """
+    from pytorch_lightning.utilities import rank_zero_warn
     if not _TORCH_GREATER_EQUAL_1_8:
         rank_zero_warn("Not registering DDP comm hook. To use communication hooks, please use pytorch>=1.8.0.")
         return
diff --git a/pytorch_lightning/utilities/enums.py b/pytorch_lightning/utilities/enums.py
index 98e10a9126a44..98f2770d03cf9 100644
--- a/pytorch_lightning/utilities/enums.py
+++ b/pytorch_lightning/utilities/enums.py
@@ -79,7 +79,6 @@ def is_interactive_compatible(self) -> bool:
     HOROVOD = 'horovod'
     DDP_SHARDED = 'ddp_sharded'
     DDP_SHARDED_SPAWN = 'ddp_sharded_spawn'
-    RPC_SEQUENTIAL_PLUGIN = 'rpc_sequential'
     DDP_FULLY_SHARDED = "ddp_fully_sharded"
 
 
@@ -97,6 +96,7 @@ class DeviceType(LightningEnum):
     """
     CPU = 'CPU'
     GPU = 'GPU'
+    IPU = 'IPU'
     TPU = 'TPU'
 
 
diff --git a/pytorch_lightning/utilities/exceptions.py b/pytorch_lightning/utilities/exceptions.py
index 01b1e8c053950..bf5258f4f5f36 100644
--- a/pytorch_lightning/utilities/exceptions.py
+++ b/pytorch_lightning/utilities/exceptions.py
@@ -14,4 +14,12 @@
 
 
 class MisconfigurationException(Exception):
-    pass
+    """
+    Exception used to inform users of mis-use with PyTorch Lightning
+    """
+
+
+class DeadlockDetectedException(Exception):
+    """
+    Exception used when a deadlock has been detected and processes are being killed
+    """
diff --git a/pytorch_lightning/utilities/finite_checks.py b/pytorch_lightning/utilities/finite_checks.py
index 770ea7a2276f0..b40e97c9b45e9 100644
--- a/pytorch_lightning/utilities/finite_checks.py
+++ b/pytorch_lightning/utilities/finite_checks.py
@@ -29,7 +29,13 @@ def print_nan_gradients(model: nn.Module) -> None:
 
 
 def detect_nan_parameters(model: nn.Module) -> None:
-    """ Iterates over model parameters and prints gradients if any parameter is not finite. """
+    """
+    Iterates over model parameters and prints gradients if any parameter is not finite.
+
+    Raises:
+        ValueError:
+            If ``NaN`` or ``inf`` values are found
+    """
     for name, param in model.named_parameters():
         if not torch.isfinite(param).all():
             print_nan_gradients(model)
diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py
index f40d092f68e9f..3125a2d38f15e 100644
--- a/pytorch_lightning/utilities/imports.py
+++ b/pytorch_lightning/utilities/imports.py
@@ -75,7 +75,6 @@ def _compare_version(package: str, op, version) -> bool:
 _BOLTS_AVAILABLE = _module_available('pl_bolts')
 _DEEPSPEED_AVAILABLE = not _IS_WINDOWS and _module_available('deepspeed')
 _FAIRSCALE_AVAILABLE = _TORCH_GREATER_EQUAL_1_6 and not _IS_WINDOWS and _module_available('fairscale.nn')
-_FAIRSCALE_PIPE_AVAILABLE = _FAIRSCALE_AVAILABLE and _compare_version("fairscale", operator.le, "0.1.3")
 _FAIRSCALE_OSS_FP16_BROADCAST_AVAILABLE = _FAIRSCALE_AVAILABLE and _compare_version("fairscale", operator.ge, "0.3.3")
 _FAIRSCALE_FULLY_SHARDED_AVAILABLE = _FAIRSCALE_AVAILABLE and _compare_version("fairscale", operator.ge, "0.3.4")
 _GROUP_AVAILABLE = not _IS_WINDOWS and _module_available('torch.distributed.group')
@@ -85,7 +84,7 @@ def _compare_version(package: str, op, version) -> bool:
 _KINETO_AVAILABLE = _TORCH_GREATER_EQUAL_1_8_1 and torch.profiler.kineto_available()
 _NATIVE_AMP_AVAILABLE = _module_available("torch.cuda.amp") and hasattr(torch.cuda.amp, "autocast")
 _OMEGACONF_AVAILABLE = _module_available("omegaconf")
-_RPC_AVAILABLE = not _IS_WINDOWS and _module_available('torch.distributed.rpc')
+_POPTORCH_AVAILABLE = _module_available('poptorch')
 _TORCH_QUANTIZE_AVAILABLE = bool([eg for eg in torch.backends.quantized.supported_engines if eg != 'none'])
 _TORCHTEXT_AVAILABLE = _module_available("torchtext")
 _TORCHVISION_AVAILABLE = _module_available('torchvision')
@@ -96,3 +95,9 @@ def _compare_version(package: str, op, version) -> bool:
 from pytorch_lightning.utilities.xla_device import XLADeviceUtils  # noqa: E402
 
 _TPU_AVAILABLE = XLADeviceUtils.tpu_device_exists()
+
+if _POPTORCH_AVAILABLE:
+    import poptorch
+    _IPU_AVAILABLE = poptorch.ipuHardwareIsAvailable()
+else:
+    _IPU_AVAILABLE = False
diff --git a/pytorch_lightning/utilities/memory.py b/pytorch_lightning/utilities/memory.py
index 6c01390a8c81e..0ae88e8995614 100644
--- a/pytorch_lightning/utilities/memory.py
+++ b/pytorch_lightning/utilities/memory.py
@@ -76,11 +76,10 @@ def is_out_of_cpu_memory(exception):
 def garbage_collection_cuda():
     """Garbage collection Torch (CUDA) memory."""
     gc.collect()
-    if torch.cuda.is_available():
-        try:
-            # This is the last thing that should cause an OOM error, but seemingly it can.
-            torch.cuda.empty_cache()
-        except RuntimeError as exception:
-            if not is_oom_error(exception):
-                # Only handle OOM errors
-                raise
+    try:
+        # This is the last thing that should cause an OOM error, but seemingly it can.
+        torch.cuda.empty_cache()
+    except RuntimeError as exception:
+        if not is_oom_error(exception):
+            # Only handle OOM errors
+            raise
diff --git a/pytorch_lightning/utilities/metrics.py b/pytorch_lightning/utilities/metrics.py
index bd57470dc270e..5db2ff5d83360 100644
--- a/pytorch_lightning/utilities/metrics.py
+++ b/pytorch_lightning/utilities/metrics.py
@@ -12,29 +12,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Helper functions to operate on metric values. """
+import numbers
+from typing import Any
 
 import torch
 
+from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 
-def metrics_to_scalars(metrics: dict) -> dict:
-    """ Recursively walk through a dictionary of metrics and convert single-item tensors to scalar values. """
+def metrics_to_scalars(metrics: Any) -> Any:
+    """
+    Recursively walk through a collection and convert single-item tensors to scalar values
 
-    # TODO: this is duplicated in MetricsHolder. should be unified
-    new_metrics = {}
-    for k, v in metrics.items():
-        if isinstance(v, torch.Tensor):
-            if v.numel() != 1:
-                raise MisconfigurationException(
-                    f"The metric `{k}` does not contain a single element"
-                    f" thus it cannot be converted to float. Found `{v}`"
-                )
-            v = v.item()
+    Raises:
+        MisconfigurationException:
+            If ``value`` contains multiple elements, hence preventing conversion to ``float``
+    """
 
-        if isinstance(v, dict):
-            v = metrics_to_scalars(v)
+    def to_item(value: torch.Tensor) -> numbers.Number:
+        if value.numel() != 1:
+            raise MisconfigurationException(
+                f"The metric `{value}` does not contain a single element"
+                f" thus it cannot be converted to float."
+            )
+        return value.item()
 
-        new_metrics[k] = v
-
-    return new_metrics
+    return apply_to_collection(metrics, torch.Tensor, to_item)
diff --git a/pytorch_lightning/utilities/model_helpers.py b/pytorch_lightning/utilities/model_helpers.py
index 87bd9e6c4545d..e52f8efa2689f 100644
--- a/pytorch_lightning/utilities/model_helpers.py
+++ b/pytorch_lightning/utilities/model_helpers.py
@@ -11,33 +11,58 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from functools import partial
+from typing import Optional, Type, Union
+from unittest.mock import Mock
 
-from typing import Union
+import pytorch_lightning as pl
+from pytorch_lightning.utilities import rank_zero_deprecation
 
-from pytorch_lightning.core.datamodule import LightningDataModule
-from pytorch_lightning.core.lightning import LightningModule
 
+def is_overridden(
+    method_name: str,
+    instance: Optional[object] = None,
+    parent: Optional[Type[object]] = None,
+    model: Optional[Union['pl.LightningModule', 'pl.LightningDataModule']] = None,
+) -> bool:
+    if model is not None and instance is None:
+        rank_zero_deprecation(
+            '`is_overriden(model=...)` has been deprecated and will be removed in v1.6.'
+            'Please use `is_overriden(instance=...)`'
+        )
+        instance = model
 
-def is_overridden(method_name: str, model: Union[LightningModule, LightningDataModule]) -> bool:
-    # if you pass DataModule instead of None or a LightningModule, we use LightningDataModule as super
-    # TODO - refector this function to accept model_name, instance, parent so it makes more sense
-    super_object = LightningModule if not isinstance(model, LightningDataModule) else LightningDataModule
-
-    if not hasattr(model, method_name) or not hasattr(super_object, method_name):
-        # in case of calling deprecated method
+    if instance is None:
+        # if `self.lightning_module` was passed as instance, it can be `None`
         return False
 
-    instance_attr = getattr(model, method_name)
-    if not instance_attr:
+    if parent is None:
+        if isinstance(instance, pl.LightningModule):
+            parent = pl.LightningModule
+        elif isinstance(instance, pl.LightningDataModule):
+            parent = pl.LightningDataModule
+        if parent is None:
+            raise ValueError("Expected a parent")
+
+    instance_attr = getattr(instance, method_name, None)
+    # `Mock(wraps=...)` support
+    if isinstance(instance_attr, Mock):
+        # access the wrapped function
+        instance_attr = instance_attr._mock_wraps
+    # `partial` support
+    elif isinstance(instance_attr, partial):
+        instance_attr = instance_attr.func
+    if instance_attr is None:
         return False
-    super_attr = getattr(super_object, method_name)
-
-    # when code pointers are different, it was implemented
-    if hasattr(instance_attr, 'patch_loader_code'):
-        # cannot pickle __code__ so cannot verify if PatchDataloader
-        # exists which shows dataloader methods have been overwritten.
-        # so, we hack it by using the string representation
-        is_overridden = instance_attr.patch_loader_code != str(super_attr.__code__)
-    else:
-        is_overridden = instance_attr.__code__ is not super_attr.__code__
-    return is_overridden
+
+    parent_attr = getattr(parent, method_name, None)
+    if parent_attr is None:
+        raise ValueError("The parent should define the method")
+
+    # cannot pickle `__code__` so cannot verify if `PatchDataloader`
+    # exists which shows dataloader methods have been overwritten.
+    # so, we hack it by using the string representation
+    instance_code = getattr(instance_attr, 'patch_loader_code', None) or instance_attr.__code__
+    parent_code = parent_attr.__code__
+
+    return instance_code != parent_code
diff --git a/pytorch_lightning/utilities/parsing.py b/pytorch_lightning/utilities/parsing.py
index 6141a80b5f97c..c7b57fe3fd4e9 100644
--- a/pytorch_lightning/utilities/parsing.py
+++ b/pytorch_lightning/utilities/parsing.py
@@ -16,9 +16,10 @@
 import pickle
 import types
 from argparse import Namespace
+from dataclasses import fields, is_dataclass
 from typing import Any, Dict, Optional, Sequence, Tuple, Union
 
-from pytorch_lightning.utilities import rank_zero_warn
+from pytorch_lightning.utilities.warnings import rank_zero_warn
 
 
 def str_to_bool_or_str(val: str) -> Union[str, bool]:
@@ -32,18 +33,20 @@ def str_to_bool_or_str(val: str) -> Union[str, bool]:
     lower = val.lower()
     if lower in ('y', 'yes', 't', 'true', 'on', '1'):
         return True
-    elif lower in ('n', 'no', 'f', 'false', 'off', '0'):
+    if lower in ('n', 'no', 'f', 'false', 'off', '0'):
         return False
-    else:
-        return val
+    return val
 
 
 def str_to_bool(val: str) -> bool:
     """Convert a string representation of truth to bool.
 
     True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
-    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
-    'val' is anything else.
+    are 'n', 'no', 'f', 'false', 'off', and '0'.
+
+    Raises:
+        ValueError:
+            If ``val`` isn't in one of the aforementioned true or false values.
 
     >>> str_to_bool('YES')
     True
@@ -97,7 +100,7 @@ def clean_namespace(hparams):
     del_attrs = [k for k, v in hparams_dict.items() if not is_picklable(v)]
 
     for k in del_attrs:
-        rank_zero_warn(f"attribute '{k}' removed from hparams because it cannot be pickled", UserWarning)
+        rank_zero_warn(f"attribute '{k}' removed from hparams because it cannot be pickled")
         del hparams_dict[k]
 
 
@@ -164,10 +167,9 @@ def collect_init_args(frame, path_args: list, inside: bool = False) -> list:
         # recursive update
         path_args.append(local_args)
         return collect_init_args(frame.f_back, path_args, inside=True)
-    elif not inside:
+    if not inside:
         return collect_init_args(frame.f_back, path_args, inside)
-    else:
-        return path_args
+    return path_args
 
 
 def flatten_dict(source, result=None):
@@ -197,7 +199,11 @@ def save_hyperparameters(
 
     if not frame:
         frame = inspect.currentframe().f_back
-    init_args = get_init_args(frame)
+
+    if is_dataclass(obj):
+        init_args = {f.name: getattr(obj, f.name) for f in fields(obj)}
+    else:
+        init_args = get_init_args(frame)
     assert init_args, "failed to inspect the obj init"
 
     if ignore is not None:
diff --git a/pytorch_lightning/utilities/seed.py b/pytorch_lightning/utilities/seed.py
index 51547d5576e74..d5e712b8385bc 100644
--- a/pytorch_lightning/utilities/seed.py
+++ b/pytorch_lightning/utilities/seed.py
@@ -34,7 +34,7 @@ def seed_everything(seed: Optional[int] = None, workers: bool = False) -> int:
     In addition, sets the following environment variables:
 
     - `PL_GLOBAL_SEED`: will be passed to spawned subprocesses (e.g. ddp_spawn backend).
-    - `PL_SEED_WORKERS`: (optional) is set to 1 if ```workers=True``.
+    - `PL_SEED_WORKERS`: (optional) is set to 1 if ``workers=True``.
 
     Args:
         seed: the integer value seed for global random state in Lightning.
@@ -84,8 +84,9 @@ def reset_seed() -> None:
     If :func:`pytorch_lightning.utilities.seed.seed_everything` is unused, this function will do nothing.
     """
     seed = os.environ.get("PL_GLOBAL_SEED", None)
+    workers = os.environ.get("PL_SEED_WORKERS", False)
     if seed is not None:
-        seed_everything(int(seed))
+        seed_everything(int(seed), workers=bool(workers))
 
 
 def pl_worker_init_function(worker_id: int, rank: Optional = None) -> None:  # pragma: no cover
@@ -100,6 +101,9 @@ def pl_worker_init_function(worker_id: int, rank: Optional = None) -> None:  # p
     process_seed = torch.initial_seed()
     # back out the base seed so we can use all the bits
     base_seed = process_seed - worker_id
+    log.debug(
+        f'Initializing random number generators of process {global_rank} worker {worker_id} with base seed {base_seed}'
+    )
     ss = np.random.SeedSequence([base_seed, worker_id, global_rank])
     # use 128 bits (4 x 32-bit words)
     np.random.seed(ss.generate_state(4))
diff --git a/pytorch_lightning/utilities/types.py b/pytorch_lightning/utilities/types.py
index 8a81040af07db..ecbfa4c84f523 100644
--- a/pytorch_lightning/utilities/types.py
+++ b/pytorch_lightning/utilities/types.py
@@ -17,14 +17,31 @@
  - Types used in public hooks (as those in the `LightningModule` and `Callback`) should be public (no trailing `_`)
 """
 from numbers import Number
-from typing import Any, Dict, Iterator, List, Union
+from typing import Any, Dict, Iterator, List, Mapping, Sequence, Type, Union
 
 import torch
+from torch.optim.lr_scheduler import _LRScheduler, ReduceLROnPlateau
+from torch.utils.data import DataLoader
 from torchmetrics import Metric
 
 _METRIC = Union[Metric, torch.Tensor, Number]
+_METRIC_COLLECTION = Union[_METRIC, Mapping[str, _METRIC]]
 STEP_OUTPUT = Union[torch.Tensor, Dict[str, Any]]
 EPOCH_OUTPUT = List[STEP_OUTPUT]
 _EVALUATE_OUTPUT = List[Dict[str, float]]  # 1 dict per DataLoader
 _PREDICT_OUTPUT = Union[List[Any], List[List[Any]]]
 _PARAMETERS = Iterator[torch.nn.Parameter]
+# yapf: disable
+TRAIN_DATALOADERS = Union[
+    DataLoader,
+    Sequence[DataLoader],
+    Sequence[Sequence[DataLoader]],
+    Sequence[Dict[str, DataLoader]],
+    Dict[str, DataLoader],
+    Dict[str, Dict[str, DataLoader]],
+    Dict[str, Sequence[DataLoader]],
+]
+# yapf: enable
+EVAL_DATALOADERS = Union[DataLoader, Sequence[DataLoader]]
+LRSchedulerTypeTuple = (_LRScheduler, ReduceLROnPlateau)
+LRSchedulerType = Union[Type[_LRScheduler], Type[ReduceLROnPlateau]]
diff --git a/pytorch_lightning/utilities/warnings.py b/pytorch_lightning/utilities/warnings.py
index a3dde95fa928f..0595a41ea5aa0 100644
--- a/pytorch_lightning/utilities/warnings.py
+++ b/pytorch_lightning/utilities/warnings.py
@@ -11,18 +11,40 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from pytorch_lightning.utilities.distributed import rank_zero_warn
+"""Warning-related utilities"""
+import warnings
+from functools import partial
 
+from pytorch_lightning.utilities.distributed import rank_zero_only
 
-class WarningCache:
 
-    def __init__(self):
-        self.warnings = set()
+def _warn(*args, stacklevel: int = 2, **kwargs):
+    warnings.warn(*args, stacklevel=stacklevel, **kwargs)
 
-    def warn(self, m, *args, **kwargs):
-        if m not in self.warnings:
-            self.warnings.add(m)
-            rank_zero_warn(m, *args, **kwargs)
 
-    def clear(self):
-        self.warnings.clear()
+@rank_zero_only
+def rank_zero_warn(*args, stacklevel: int = 4, **kwargs):
+    _warn(*args, stacklevel=stacklevel, **kwargs)
+
+
+class LightningDeprecationWarning(DeprecationWarning):
+    ...
+
+
+# enable our warnings
+warnings.simplefilter('default', LightningDeprecationWarning)
+
+rank_zero_deprecation = partial(rank_zero_warn, category=LightningDeprecationWarning)
+
+
+class WarningCache(set):
+
+    def warn(self, m, *args, stacklevel: int = 5, **kwargs):
+        if m not in self:
+            self.add(m)
+            rank_zero_warn(m, *args, stacklevel=stacklevel, **kwargs)
+
+    def deprecation(self, m, *args, stacklevel: int = 5, **kwargs):
+        if m not in self:
+            self.add(m)
+            rank_zero_deprecation(m, *args, stacklevel=stacklevel, **kwargs)
diff --git a/requirements.txt b/requirements.txt
index 964bb493a2637..15c0fcbbab8cc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,9 +5,10 @@ torch>=1.4
 future>=0.17.1  # required for builtins in setup.py
 tqdm>=4.41.0
 PyYAML>=5.1,<=5.4.1
-fsspec[http]>=2021.4.0
+fsspec[http]>=2021.05.0, !=2021.06.0
 tensorboard>=2.2.0, !=2.5.0  # 2.5.0 GPU CI error: 'Couldn't build proto file into descriptor pool!'
-torchmetrics>=0.2.0
-pyDeprecate==0.3.0
-packaging
+torchmetrics>=0.4.0
+pyDeprecate==0.3.1
+packaging>=17.0
 typing-extensions  # TypedDict support for python<3.8
+pillow!=8.3.0  # TODO: delete line after https://github.com/python-pillow/Pillow/issues/5571
diff --git a/requirements/adjust_versions.py b/requirements/adjust_versions.py
index 3d9da2a2f1a22..84879b4e48a34 100644
--- a/requirements/adjust_versions.py
+++ b/requirements/adjust_versions.py
@@ -4,7 +4,8 @@
 from typing import Dict, Optional
 
 VERSIONS = [
-    dict(torch="1.9.0", torchvision="", torchtext=""),  # nightly
+    dict(torch="1.10.0", torchvision="", torchtext=""),  # nightly
+    dict(torch="1.9.0", torchvision="0.10.0", torchtext="0.10.0"),
     dict(torch="1.8.1", torchvision="0.9.1", torchtext="0.9.1"),
     dict(torch="1.8.0", torchvision="0.9.0", torchtext="0.9.0"),
     dict(torch="1.7.1", torchvision="0.8.2", torchtext="0.8.1"),
@@ -40,6 +41,8 @@ def main(path_req: str, torch_version: Optional[str] = None) -> None:
 
     with open(path_req, "r") as fp:
         req = fp.read()
+    # remove comments
+    req = re.sub(rf"\s*#.*{os.linesep}", os.linesep, req)
 
     latest = find_latest(torch_version)
     for lib, version in latest.items():
diff --git a/requirements/docs.txt b/requirements/docs.txt
index b53549e087e4f..5328c679d1f6c 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -1,7 +1,7 @@
 sphinx>=4.0
-recommonmark  # fails with badges
-m2r  # fails with multi-line text
-nbsphinx>=0.8
+myst-parser
+nbsphinx>=0.8.5
+ipython[notebook]
 pandoc>=1.0
 docutils>=0.16
 sphinxcontrib-fulltoc>=1.0
@@ -11,3 +11,5 @@ sphinx-autodoc-typehints>=1.0
 sphinx-paramlinks>=0.5.1
 sphinx-togglebutton>=0.2
 sphinx-copybutton>=0.3
+
+-r ../_notebooks/.actions/requirements.txt
diff --git a/requirements/extra.txt b/requirements/extra.txt
index c41f464ef383b..291813e05edcd 100644
--- a/requirements/extra.txt
+++ b/requirements/extra.txt
@@ -7,4 +7,4 @@ torchtext>=0.5
 # onnx>=1.7.0
 onnxruntime>=1.3.0
 hydra-core>=1.0
-jsonargparse[signatures]>=3.12.0
+jsonargparse[signatures]>=3.15.0
diff --git a/setup.cfg b/setup.cfg
index 5a68adb27b443..74e02d932dc3c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -129,6 +129,10 @@ ignore_errors = True
 [mypy-pytorch_lightning.loggers.*]
 ignore_errors = True
 
+# todo: add proper typing to this module...
+[mypy-pytorch_lightning.loops.*]
+ignore_errors = True
+
 # todo: add proper typing to this module...
 [mypy-pytorch_lightning.metrics.*]
 ignore_errors = True
@@ -163,6 +167,8 @@ ignore_errors = True
 # whitelist
 [mypy-pytorch_lightning.trainer.evaluation_loop]
 ignore_errors = False
+[mypy-pytorch_lightning.trainer.connectors.logger_connector]
+ignore_errors = False
 
 # todo: add proper typing to this module...
 [mypy-pytorch_lightning.distributed.*]
@@ -175,6 +181,8 @@ ignore_errors = True
 # todo: add proper typing to this module...
 [mypy-pytorch_lightning.utilities.*]
 ignore_errors = True
+[mypy-pytorch_lightning.utilities.cli]
+ignore_errors = False
 
 # todo: add proper typing to this module...
 [mypy-pl_examples.*]
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index e60b86513e5ff..4a9b01281f784 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -18,6 +18,7 @@
 
 import pytest
 import torch
+import torch.distributed
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.accelerators.accelerator import Accelerator
@@ -385,6 +386,35 @@ def on_fit_start(self, trainer, pl_module):
         trainer.fit(model)
 
 
+@RunIf(special=True)
+def test_accelerator_choice_ddp_cpu_and_plugin(tmpdir):
+    """ Test that accelerator="ddp_cpu" can work together with an instance of DDPPlugin. """
+    _test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class=DDPPlugin)
+
+
+@RunIf(special=True)
+def test_accelerator_choice_ddp_cpu_and_plugin_spawn(tmpdir):
+    """ Test that accelerator="ddp_cpu" can work together with an instance of DDPPSpawnPlugin. """
+    _test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class=DDPSpawnPlugin)
+
+
+def _test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class):
+
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        plugins=[ddp_plugin_class(find_unused_parameters=True)],
+        fast_dev_run=True,
+        accelerator='ddp_cpu',
+        num_processes=2,
+    )
+    assert isinstance(trainer.training_type_plugin, ddp_plugin_class)
+    assert isinstance(trainer.accelerator, CPUAccelerator)
+    assert trainer.training_type_plugin.num_processes == 2
+    assert trainer.training_type_plugin.parallel_devices == [torch.device("cpu")] * 2
+    trainer.fit(model)
+
+
 @mock.patch.dict(
     os.environ, {
         "SLURM_NTASKS": "2",
@@ -396,11 +426,8 @@ def on_fit_start(self, trainer, pl_module):
     }
 )
 @mock.patch('torch.cuda.device_count', return_value=0)
-@mock.patch('pytorch_lightning.plugins.DDPPlugin.setup_distributed', autospec=True)
-def test_accelerator_choice_ddp_cpu_custom_cluster(device_count_mock, setup_distributed_mock):
-    """
-    Test that we choose the custom cluster even when SLURM or TE flags are around
-    """
+def test_accelerator_choice_ddp_cpu_custom_cluster(_, tmpdir):
+    """ Test that we choose the custom cluster even when SLURM or TE flags are around """
 
     class CustomCluster(LightningEnvironment):
 
@@ -410,25 +437,16 @@ def master_address(self):
         def creates_children(self) -> bool:
             return True
 
-    class CB(Callback):
-
-        def on_fit_start(self, trainer, pl_module):
-            assert isinstance(trainer.accelerator, CPUAccelerator)
-            assert isinstance(trainer.training_type_plugin, DDPPlugin)
-            assert isinstance(trainer.training_type_plugin.cluster_environment, CustomCluster)
-            raise SystemExit()
-
-    model = BoringModel()
     trainer = Trainer(
+        default_root_dir=tmpdir,
         plugins=[CustomCluster()],
         fast_dev_run=True,
         accelerator='ddp_cpu',
         num_processes=2,
-        callbacks=[CB()],
     )
-
-    with pytest.raises(SystemExit):
-        trainer.fit(model)
+    assert isinstance(trainer.accelerator, CPUAccelerator)
+    assert isinstance(trainer.training_type_plugin, DDPPlugin)
+    assert isinstance(trainer.training_type_plugin.cluster_environment, CustomCluster)
 
 
 @mock.patch.dict(
@@ -453,8 +471,9 @@ class Prec(PrecisionPlugin):
     class TrainTypePlugin(SingleDevicePlugin):
         pass
 
+    ttp = TrainTypePlugin(device=torch.device("cpu"))
     accelerator = Accel(
-        training_type_plugin=TrainTypePlugin(device=torch.device("cpu")),
+        training_type_plugin=ttp,
         precision_plugin=Prec(),
     )
     trainer = Trainer(
@@ -465,6 +484,25 @@ class TrainTypePlugin(SingleDevicePlugin):
     assert isinstance(trainer.accelerator, Accel)
     assert isinstance(trainer.training_type_plugin, TrainTypePlugin)
     assert isinstance(trainer.precision_plugin, Prec)
+    assert trainer.accelerator_connector.training_type_plugin is ttp
+
+    class DistributedPlugin(DDPPlugin):
+        pass
+
+    ttp = DistributedPlugin()
+    accelerator = Accel(
+        training_type_plugin=ttp,
+        precision_plugin=Prec(),
+    )
+    trainer = Trainer(
+        accelerator=accelerator,
+        fast_dev_run=True,
+        num_processes=2,
+    )
+    assert isinstance(trainer.accelerator, Accel)
+    assert isinstance(trainer.training_type_plugin, DistributedPlugin)
+    assert isinstance(trainer.precision_plugin, Prec)
+    assert trainer.accelerator_connector.training_type_plugin is ttp
 
 
 @mock.patch.dict(
diff --git a/tests/accelerators/test_cpu.py b/tests/accelerators/test_cpu.py
index c7d7f98ae995d..7be1c6b9d1b65 100644
--- a/tests/accelerators/test_cpu.py
+++ b/tests/accelerators/test_cpu.py
@@ -7,6 +7,7 @@
 from pytorch_lightning.accelerators import CPUAccelerator
 from pytorch_lightning.plugins import SingleDevicePlugin
 from pytorch_lightning.plugins.precision import MixedPrecisionPlugin
+from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 
@@ -50,3 +51,112 @@ def setup_optimizers_in_pre_dispatch(self) -> bool:
     model = TestModel()
     trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, plugins=CustomPlugin(device=torch.device("cpu")))
     trainer.fit(model)
+
+
+def test_accelerator_on_reset_dataloader_hooks(tmpdir):
+    """
+    Ensure data-loader hooks are called using an Accelerator.
+    """
+
+    class CustomAccelerator(CPUAccelerator):
+        train_count: int = 0
+        val_count: int = 0
+        test_count: int = 0
+        predict_count: int = 0
+
+        def on_reset_train_dataloader(self, dataloader):
+            self.train_count += 1
+            assert self.lightning_module.trainer.training
+            return super().on_reset_train_dataloader(dataloader)
+
+        def on_reset_val_dataloader(self, dataloader):
+            self.val_count += 1
+            assert self.lightning_module.trainer.training or self.lightning_module.trainer.validating
+            return super().on_reset_val_dataloader(dataloader)
+
+        def on_reset_test_dataloader(self, dataloader):
+            self.test_count += 1
+            assert self.lightning_module.trainer.testing
+            return super().on_reset_test_dataloader(dataloader)
+
+        def on_reset_predict_dataloader(self, dataloader):
+            self.predict_count += 1
+            assert self.lightning_module.trainer.predicting
+            return super().on_reset_predict_dataloader(dataloader)
+
+    model = BoringModel()
+    accelerator = CustomAccelerator(PrecisionPlugin(), SingleDevicePlugin(device=torch.device('cpu')))
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, accelerator=accelerator)
+    trainer.fit(model)
+    trainer.validate(model)
+    trainer.test(model)
+    trainer.predict(model, dataloaders=model.test_dataloader())
+    # assert that all loader hooks were called
+    assert accelerator.train_count == 1
+    assert accelerator.val_count == 1  # only called once during the entire session
+    assert accelerator.test_count == 1
+    assert accelerator.predict_count == 1
+
+    accelerator = CustomAccelerator(PrecisionPlugin(), SingleDevicePlugin(device=torch.device('cpu')))
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, accelerator=accelerator)
+    trainer.validate(model)
+    trainer.test(model)
+    trainer.predict(model)
+    # assert val/test/predict loader hooks were called
+    assert accelerator.val_count == 1
+    assert accelerator.test_count == 1
+    assert accelerator.predict_count == 1
+
+
+def test_plugin_on_reset_dataloader_hooks(tmpdir):
+    """
+    Ensure data-loader hooks are called using a Plugin.
+    """
+
+    class CustomPlugin(SingleDevicePlugin):
+        train_count: int = 0
+        val_count: int = 0
+        test_count: int = 0
+        predict_count: int = 0
+
+        def on_reset_train_dataloader(self, dataloader):
+            self.train_count += 1
+            assert self.lightning_module.trainer.training
+            return super().on_reset_train_dataloader(dataloader)
+
+        def on_reset_val_dataloader(self, dataloader):
+            self.val_count += 1
+            assert self.lightning_module.trainer.training or self.lightning_module.trainer.validating
+            return super().on_reset_val_dataloader(dataloader)
+
+        def on_reset_test_dataloader(self, dataloader):
+            self.test_count += 1
+            assert self.lightning_module.trainer.testing
+            return super().on_reset_test_dataloader(dataloader)
+
+        def on_reset_predict_dataloader(self, dataloader):
+            self.predict_count += 1
+            assert self.lightning_module.trainer.predicting
+            return super().on_reset_predict_dataloader(dataloader)
+
+    plugin = CustomPlugin(device=torch.device('cpu'))
+    model = BoringModel()
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, plugins=plugin)
+    trainer.fit(model)
+    trainer.validate(model)
+    trainer.test(model)
+    trainer.predict(model, dataloaders=model.test_dataloader())
+    # assert that all loader hooks were called
+    assert plugin.train_count == 1
+    assert plugin.val_count == 1  # only called once during the entire session
+    assert plugin.test_count == 1
+    assert plugin.predict_count == 1
+    plugin = CustomPlugin(device=torch.device('cpu'))
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, plugins=plugin)
+    trainer.validate(model)
+    trainer.test(model)
+    trainer.predict(model)
+    # assert val/test/predict loader hooks were called
+    assert plugin.val_count == 1
+    assert plugin.test_count == 1
+    assert plugin.predict_count == 1
diff --git a/tests/accelerators/test_ddp.py b/tests/accelerators/test_ddp.py
index 80580b63bb6e7..f38d08df3daf9 100644
--- a/tests/accelerators/test_ddp.py
+++ b/tests/accelerators/test_ddp.py
@@ -32,9 +32,10 @@
 
 
 @RunIf(min_gpus=2)
-def test_multi_gpu_model_ddp_fit_only(tmpdir):
+@pytest.mark.parametrize("as_module", [True, False])
+def test_multi_gpu_model_ddp_fit_only(tmpdir, as_module):
     # call the script
-    call_training_script(ddp_model, CLI_ARGS, 'fit', tmpdir, timeout=120)
+    call_training_script(ddp_model, CLI_ARGS, 'fit', tmpdir, timeout=120, as_module=as_module)
 
     # load the results of the script
     result_path = os.path.join(tmpdir, 'ddp.result')
@@ -45,9 +46,10 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir):
 
 
 @RunIf(min_gpus=2)
-def test_multi_gpu_model_ddp_test_only(tmpdir):
+@pytest.mark.parametrize("as_module", [True, False])
+def test_multi_gpu_model_ddp_test_only(tmpdir, as_module):
     # call the script
-    call_training_script(ddp_model, CLI_ARGS, 'test', tmpdir)
+    call_training_script(ddp_model, CLI_ARGS, 'test', tmpdir, as_module=as_module)
 
     # load the results of the script
     result_path = os.path.join(tmpdir, 'ddp.result')
@@ -58,9 +60,10 @@ def test_multi_gpu_model_ddp_test_only(tmpdir):
 
 
 @RunIf(min_gpus=2)
-def test_multi_gpu_model_ddp_fit_test(tmpdir):
+@pytest.mark.parametrize("as_module", [True, False])
+def test_multi_gpu_model_ddp_fit_test(tmpdir, as_module):
     # call the script
-    call_training_script(ddp_model, CLI_ARGS, 'fit_test', tmpdir, timeout=20)
+    call_training_script(ddp_model, CLI_ARGS, 'fit_test', tmpdir, timeout=20, as_module=as_module)
 
     # load the results of the script
     result_path = os.path.join(tmpdir, 'ddp.result')
@@ -123,7 +126,16 @@ def setup(self, stage: Optional[str] = None) -> None:
 
 
 @RunIf(min_gpus=2, min_torch="1.8.1", special=True)
-def test_ddp_wrapper(tmpdir):
+def test_ddp_wrapper_16(tmpdir):
+    _test_ddp_wrapper(tmpdir, precision=16)
+
+
+@RunIf(min_gpus=2, min_torch="1.8.1", special=True)
+def test_ddp_wrapper_32(tmpdir):
+    _test_ddp_wrapper(tmpdir, precision=32)
+
+
+def _test_ddp_wrapper(tmpdir, precision):
     """
     Test parameters to ignore are carried over for DDP.
     """
@@ -150,5 +162,12 @@ def on_train_start(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule')
             assert trainer.training_type_plugin.model.module._ddp_params_and_buffers_to_ignore == ('something')
 
     model = CustomModel()
-    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=CustomCallback())
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        fast_dev_run=True,
+        precision=precision,
+        accelerator="ddp",
+        gpus=2,
+        callbacks=CustomCallback(),
+    )
     trainer.fit(model)
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
new file mode 100644
index 0000000000000..363648c9f681d
--- /dev/null
+++ b/tests/accelerators/test_ipu.py
@@ -0,0 +1,547 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from typing import Optional
+
+import pytest
+import torch
+import torch.nn.functional as F
+
+from pytorch_lightning import Callback, seed_everything, Trainer
+from pytorch_lightning.accelerators import IPUAccelerator
+from pytorch_lightning.core.lightning import LightningModule
+from pytorch_lightning.plugins import IPUPlugin, IPUPrecisionPlugin
+from pytorch_lightning.trainer.states import RunningStage
+from pytorch_lightning.utilities import _IPU_AVAILABLE
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from tests.helpers.boring_model import BoringModel
+from tests.helpers.datamodules import ClassifDataModule
+from tests.helpers.runif import RunIf
+from tests.helpers.simple_models import ClassificationModel
+
+if _IPU_AVAILABLE:
+    import poptorch
+
+
+class IPUModel(BoringModel):
+
+    def training_step(self, batch, batch_idx):
+        output = self(batch)
+        loss = self.loss(batch, output)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        output = self(batch)
+        loss = self.loss(batch, output)
+        return loss
+
+    def test_step(self, batch, batch_idx):
+        output = self(batch)
+        loss = self.loss(batch, output)
+        return loss
+
+    def training_epoch_end(self, outputs) -> None:
+        pass
+
+    def validation_epoch_end(self, outputs) -> None:
+        pass
+
+    def test_epoch_end(self, outputs) -> None:
+        pass
+
+
+class IPUClassificationModel(ClassificationModel):
+
+    def training_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        loss = F.cross_entropy(logits, y)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        acc = self.accuracy(logits, y)
+        return acc
+
+    def test_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        acc = self.accuracy(logits, y)
+        return acc
+
+    def accuracy(self, logits, y):
+        # todo (sean): currently IPU poptorch doesn't implicit convert bools to tensor
+        # hence we use an explicit calculation for accuracy here. Once fixed in poptorch
+        # we can use the accuracy metric.
+        acc = torch.sum(torch.eq(torch.argmax(logits, -1), y).to(torch.float32)) / len(y)
+        return acc
+
+    def validation_epoch_end(self, outputs) -> None:
+        self.log('val_acc', torch.stack(outputs).mean())
+
+    def test_epoch_end(self, outputs) -> None:
+        self.log('test_acc', torch.stack(outputs).mean())
+
+
+@pytest.mark.skipif(_IPU_AVAILABLE, reason="test requires non-IPU machine")
+def test_fail_if_no_ipus(tmpdir):
+    with pytest.raises(MisconfigurationException, match="IPU Accelerator requires IPU devices to run"):
+        Trainer(default_root_dir=tmpdir, ipus=1)
+
+    with pytest.raises(MisconfigurationException, match="IPU Accelerator requires IPU devices to run"):
+        Trainer(default_root_dir=tmpdir, ipus=1, accelerator='ipu')
+
+
+@RunIf(ipu=True)
+def test_accelerator_selected(tmpdir):
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1)
+    assert isinstance(trainer.accelerator, IPUAccelerator)
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1, accelerator='ipu')
+    assert isinstance(trainer.accelerator, IPUAccelerator)
+
+
+@RunIf(ipu=True)
+@pytest.mark.parametrize('ipus', [1, 4])
+def test_all_stages(tmpdir, ipus):
+    model = IPUModel()
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, ipus=ipus)
+    trainer.fit(model)
+    trainer.validate(model)
+    trainer.test(model)
+    trainer.predict(model, model.val_dataloader())
+
+
+@RunIf(ipu=True)
+@pytest.mark.parametrize('ipus', [1, 4])
+def test_inference_only(tmpdir, ipus):
+    model = IPUModel()
+
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, ipus=ipus)
+    trainer.validate(model)
+    trainer.test(model)
+    trainer.predict(model, model.val_dataloader())
+
+
+@RunIf(ipu=True)
+def test_optimization(tmpdir):
+    seed_everything(42)
+
+    dm = ClassifDataModule(length=1024)
+    model = IPUClassificationModel()
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+        ipus=2,
+    )
+
+    # fit model
+    trainer.fit(model, dm)
+    assert trainer.state.finished, f"Training failed with {trainer.state}"
+    assert dm.trainer is not None
+
+    # validate
+    result = trainer.validate(datamodule=dm)
+    assert dm.trainer is not None
+    assert result[0]['val_acc'] > 0.7
+
+    # test
+    result = trainer.test(model, datamodule=dm)
+    assert dm.trainer is not None
+    test_result = result[0]['test_acc']
+    assert test_result > 0.6
+
+    # test saved model
+    model_path = os.path.join(tmpdir, 'model.pt')
+    trainer.save_checkpoint(model_path)
+
+    model = IPUClassificationModel.load_from_checkpoint(model_path)
+
+    trainer = Trainer(default_root_dir=tmpdir, ipus=2)
+
+    result = trainer.test(model, datamodule=dm)
+    saved_result = result[0]['test_acc']
+    assert saved_result == test_result
+
+
+@RunIf(ipu=True)
+def test_mixed_precision(tmpdir):
+
+    class TestCallback(Callback):
+
+        def setup(self, trainer: Trainer, pl_module: LightningModule, stage: Optional[str] = None) -> None:
+            assert trainer.accelerator.model.precision == 16
+            raise SystemExit
+
+    model = IPUModel()
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, ipus=1, precision=16, callbacks=TestCallback())
+    assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
+    assert trainer.accelerator.precision_plugin.precision == 16
+    with pytest.raises(SystemExit):
+        trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_pure_half_precision(tmpdir):
+
+    class TestCallback(Callback):
+
+        def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
+            assert trainer.accelerator.model.precision == 16
+            for param in trainer.accelerator.model.parameters():
+                assert param.dtype == torch.float16
+            raise SystemExit
+
+    model = IPUModel()
+    model = model.half()
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, ipus=1, precision=16, callbacks=TestCallback())
+
+    assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+    assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
+    assert trainer.accelerator.precision_plugin.precision == 16
+
+    with pytest.raises(SystemExit):
+        trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_device_iterations_ipu_plugin(tmpdir):
+
+    class TestCallback(Callback):
+
+        def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
+            assert trainer.accelerator.training_type_plugin.device_iterations == 2
+            # assert device iterations has been set correctly within the poptorch options
+            poptorch_model = trainer.accelerator.training_type_plugin.poptorch_models[RunningStage.TRAINING]
+            assert poptorch_model._options.toDict()['device_iterations'] == 2
+            raise SystemExit
+
+    model = IPUModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        fast_dev_run=True,
+        ipus=1,
+        plugins=IPUPlugin(device_iterations=2),
+        callbacks=TestCallback()
+    )
+    assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+    with pytest.raises(SystemExit):
+        trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_accumulated_batches(tmpdir):
+
+    class TestCallback(Callback):
+
+        def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
+            # ensure the accumulation_scheduler is overridden to accumulate every batch
+            # since ipu handle accumulation
+            assert trainer.accumulation_scheduler.scheduling == {0: 1}
+            # assert poptorch option have been set correctly
+            poptorch_model = trainer.accelerator.training_type_plugin.poptorch_models[RunningStage.TRAINING]
+            assert poptorch_model._options.Training.toDict()['gradient_accumulation'] == 2
+            raise SystemExit
+
+    model = IPUModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir, fast_dev_run=True, ipus=1, accumulate_grad_batches=2, callbacks=TestCallback()
+    )
+    with pytest.raises(SystemExit):
+        trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_stages_correct(tmpdir):
+    """Ensure all stages correctly are traced correctly by asserting the output for each stage"""
+
+    class StageModel(IPUModel):
+
+        def training_step(self, batch, batch_idx):
+            loss = super().training_step(batch, batch_idx)
+            # tracing requires a loss value that depends on the model.
+            # force it to be a value but ensure we use the loss.
+            return (loss - loss) + torch.tensor(1)
+
+        def validation_step(self, batch, batch_idx):
+            loss = super().validation_step(batch, batch_idx)
+            return (loss - loss) + torch.tensor(2)
+
+        def test_step(self, batch, batch_idx):
+            loss = super().validation_step(batch, batch_idx)
+            return (loss - loss) + torch.tensor(3)
+
+        def predict_step(self, batch, batch_idx, dataloader_idx=None):
+            output = super().predict_step(batch, batch_idx)
+            return (output - output) + torch.tensor(4)
+
+    class TestCallback(Callback):
+
+        def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:
+            assert outputs['loss'].item() == 1
+
+        def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:
+            assert outputs.item() == 2
+
+        def on_test_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:
+            assert outputs.item() == 3
+
+        def on_predict_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:
+            assert torch.all(outputs == 4).item()
+
+    model = StageModel()
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, ipus=1, callbacks=TestCallback())
+    trainer.fit(model)
+    trainer.test(model)
+    trainer.validate(model)
+    trainer.predict(model, model.test_dataloader())
+
+
+@RunIf(ipu=True)
+def test_accumulate_grad_batches_dict_fails(tmpdir):
+    model = IPUModel()
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1, accumulate_grad_batches={0: 1})
+    with pytest.raises(
+        MisconfigurationException, match="IPUs currently only support accumulate_grad_batches being an integer value."
+    ):
+        trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_clip_gradients_fails(tmpdir):
+    model = IPUModel()
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1, gradient_clip_val=10)
+    with pytest.raises(MisconfigurationException, match="IPUs currently do not support clipping gradients."):
+        trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_autoreport(tmpdir):
+    """Ensure autoreport dumps to a file."""
+    model = IPUModel()
+    autoreport_path = os.path.join(tmpdir, 'report/')
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        ipus=1,
+        fast_dev_run=True,
+        plugins=IPUPlugin(autoreport=True, autoreport_dir=autoreport_path)
+    )
+    trainer.fit(model)
+    assert os.path.exists(autoreport_path)
+    assert os.path.isfile(autoreport_path + 'profile.pop')
+
+
+@RunIf(ipu=True)
+def test_manual_poptorch_opts(tmpdir):
+    """Ensure if the user passes manual poptorch Options, we run with the correct object."""
+    model = IPUModel()
+    inference_opts = poptorch.Options()
+    training_opts = poptorch.Options()
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        ipus=1,
+        fast_dev_run=True,
+        plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+    )
+    trainer.fit(model)
+
+    assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+    assert trainer.accelerator.training_type_plugin.training_opts == training_opts
+    assert trainer.accelerator.training_type_plugin.inference_opts == inference_opts
+
+
+@RunIf(ipu=True)
+def test_manual_poptorch_opts_ipu_count(tmpdir):
+    """
+    Ensure if the user passes manual poptorch Options
+    and the number of ipus do not match, we warn and we set it for the user.
+    """
+
+    manual_ipus = 1
+    expected_ipus = 2
+    model = IPUModel()
+    inference_opts = poptorch.Options()
+    inference_opts.replicationFactor(manual_ipus)
+
+    training_opts = poptorch.Options()
+    training_opts.replicationFactor(manual_ipus)
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        ipus=expected_ipus,
+        fast_dev_run=True,
+        plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+    )
+    with pytest.warns(
+        UserWarning,
+        match=f"Manual poptorch.Options set replicationFactor to {manual_ipus} "
+        f"which differs to the ipus={expected_ipus} flag passed to the Trainer. "
+        f"Setting to {expected_ipus} in the poptorch.Options."
+    ):
+        trainer.fit(model)
+        assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+        assert trainer.accelerator.training_type_plugin.training_opts.replication_factor == 2
+        assert trainer.accelerator.training_type_plugin.inference_opts.replication_factor == 2
+
+
+@RunIf(ipu=True)
+def test_manual_poptorch_opts_inference_grad_accum(tmpdir):
+    """
+    Ensure if the user passes manual poptorch Options
+    and grad accumulation is set greater than 1 for inference, we warn and set to 1.
+    """
+
+    model = IPUModel()
+    inference_opts = poptorch.Options()
+    inference_opts.Training.gradientAccumulation(4)
+
+    training_opts = poptorch.Options()
+    training_opts.Training.gradientAccumulation(1)
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        ipus=1,
+        fast_dev_run=True,
+        plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+    )
+    with pytest.warns(
+        UserWarning,
+        match="Inference poptorch.Options should set gradientAccumulation to 1. "
+        "Setting gradientAccumulation to 1 for inference options.",
+    ):
+        trainer.fit(model)
+        assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+        assert trainer.accelerator.training_type_plugin.inference_opts.Training.gradient_accumulation == 1
+
+
+@RunIf(ipu=True)
+def test_manual_poptorch_opts_train_grad_accum(tmpdir):
+    """
+    Ensure if the user passes manual poptorch Options
+    and grad accumulation differs to accumulate_grad_batches, we
+    """
+
+    model = IPUModel()
+    inference_opts = poptorch.Options()
+    inference_opts.Training.gradientAccumulation(1)
+
+    training_opts = poptorch.Options()
+    training_opts.Training.gradientAccumulation(2)
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        ipus=1,
+        fast_dev_run=True,
+        accumulate_grad_batches=1,
+        plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+    )
+    with pytest.warns(
+        UserWarning,
+        match=f"Training poptorch.Options set gradientAccumulation to {2}. "
+        f"This is different to accumulate_grad_batches which was set to {1}. "
+        f"To change gradientAccumulation, please set accumulate_grad_batches in the Trainer. "
+        f"Setting poptorch.Options gradientAccumulation to {1}",
+    ):
+        trainer.fit(model)
+        assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+        assert trainer.accelerator.training_type_plugin.inference_opts.Training.gradient_accumulation == 1
+
+
+@RunIf(ipu=True)
+def test_manual_poptorch_opts_custom(tmpdir):
+    """
+    Ensure if the user passes manual poptorch Options with custom parameters set,
+    we respect them in our poptorch options.
+    """
+
+    model = IPUModel()
+    inference_opts = poptorch.Options()
+    inference_opts.deviceIterations(16)
+    inference_opts.replicationFactor(2)
+    inference_opts.Training.gradientAccumulation(1)
+
+    training_opts = poptorch.Options()
+    training_opts.deviceIterations(8)
+    training_opts.replicationFactor(2)
+    training_opts.Training.gradientAccumulation(2)
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        ipus=2,
+        fast_dev_run=True,
+        accumulate_grad_batches=2,
+        plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+    )
+    trainer.fit(model)
+    plugin = trainer.accelerator.training_type_plugin
+    assert isinstance(plugin, IPUPlugin)
+    inference_opts = plugin.inference_opts
+    training_opts = plugin.training_opts
+    assert inference_opts.device_iterations == 16
+    assert inference_opts.replication_factor == 2
+    assert inference_opts.Training.gradient_accumulation == 1
+
+    assert training_opts.device_iterations == 8
+    assert training_opts.replication_factor == 2
+    assert training_opts.Training.gradient_accumulation == 2
+
+
+@RunIf(ipu=True)
+def test_default_opts(tmpdir):
+    """
+    Ensure default opts are set correctly in the IPUPlugin.
+    """
+
+    model = IPUModel()
+
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1, fast_dev_run=True)
+    trainer.fit(model)
+    assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+    inference_opts = trainer.accelerator.training_type_plugin.inference_opts
+    training_opts = trainer.accelerator.training_type_plugin.training_opts
+    for opts in (inference_opts, training_opts):
+        assert isinstance(opts, poptorch.Options)
+        assert opts.Training.gradient_accumulation == 1
+        assert opts.device_iterations == 1
+        assert opts.replication_factor == 1
+
+
+@RunIf(ipu=True)
+def test_multi_optimizers_fails(tmpdir):
+    """
+    Ensure if there are multiple optimizers, we throw an exception
+    """
+
+    class TestModel(IPUModel):
+
+        def configure_optimizers(self):
+            return [torch.optim.Adam(self.parameters()), torch.optim.Adam(self.parameters())]
+
+    model = TestModel()
+
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1)
+    with pytest.raises(MisconfigurationException, match="IPUs currently only support one optimizer."):
+        trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_precision_plugin(tmpdir):
+    """
+    Ensure precision plugin value is set correctly.
+    """
+
+    plugin = IPUPrecisionPlugin(precision=16)
+    assert plugin.precision == 16
diff --git a/tests/accelerators/test_multi_nodes_gpu.py b/tests/accelerators/test_multi_nodes_gpu.py
index 42a9b1c064199..463307ead8717 100644
--- a/tests/accelerators/test_multi_nodes_gpu.py
+++ b/tests/accelerators/test_multi_nodes_gpu.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 import os
 import sys
-from unittest import mock
 
 import pytest
 import torch
@@ -73,7 +72,6 @@ def validation_step(self, batch, batch_idx):
 # use an environment variable `PL_RUNNING_MULTINODE_TESTS` and set `RunIf(multinode=True)`
 @pytest.mark.skip("Multi-node testing is currently disabled")
 @RunIf(special=True)
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 def test__validation_step__log(tmpdir):
     """
     Tests that validation_step can log
@@ -117,7 +115,7 @@ def backward(self, loss, optimizer, optimizer_idx):
     trainer.fit(model)
 
     # make sure all the metrics are available for callbacks
-    expected_logged_metrics = {
+    assert set(trainer.logged_metrics) == {
         'a2',
         'a_step',
         'a_epoch',
@@ -125,12 +123,7 @@ def backward(self, loss, optimizer, optimizer_idx):
         'b_epoch',
         'epoch',
     }
-    logged_metrics = set(trainer.logged_metrics.keys())
-    assert expected_logged_metrics == logged_metrics
 
     # we don't want to enable val metrics during steps because it is not something that users should do
-    # on purpose DO NOT allow step_b... it's silly to monitor val step metrics
-    callback_metrics = set(trainer.callback_metrics.keys())
-    callback_metrics.remove('debug_epoch')
-    expected_cb_metrics = {'a', 'a2', 'b', 'a_epoch', 'b_epoch', 'a_step'}
-    assert expected_cb_metrics == callback_metrics
+    # on purpose DO NOT allow b_step... it's silly to monitor val step metrics
+    assert set(trainer.callback_metrics) == {'a', 'a2', 'b', 'a_epoch', 'b_epoch', 'a_step'}
diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index c24cf5ded575a..2ef83ffd5a2de 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from abc import ABC
-from collections import OrderedDict
 
 
 class TrainingStepVariations(ABC):
@@ -31,18 +30,7 @@ def training_step(self, batch, batch_idx, optimizer_idx=None):
 
         # calculate loss
         loss_train = self.loss(y, y_hat)
-        log_train = loss_train
-
-        # alternate between tensors and scalars for "log" and "progress_bar"
-        if batch_idx % 2 == 0:
-            log_train = log_train.item()
-
-        output = OrderedDict({
-            'loss': loss_train,
-            'progress_bar': dict(some_val=log_train * log_train),
-            'log': dict(train_some_val=log_train * log_train),
-        })
-        return output
+        return {'loss': loss_train}
 
     def training_step__multiple_dataloaders(self, batch, batch_idx, optimizer_idx=None):
         """Training step for multiple train loaders"""
@@ -61,19 +49,4 @@ def training_step__multiple_dataloaders(self, batch, batch_idx, optimizer_idx=No
 
         # calculate loss
         loss_val = self.loss(y, y_hat)
-        log_val = loss_val
-
-        # alternate between tensors and scalars for "log" and "progress_bar"
-        if batch_idx % 2 == 0:
-            log_val = log_val.item()
-
-        output = OrderedDict({
-            'loss': loss_val,
-            'progress_bar': {
-                'some_val': log_val * log_val
-            },
-            'log': {
-                'train_some_val': log_val * log_val
-            },
-        })
-        return output
+        return {'loss': loss_val}
diff --git a/tests/callbacks/test_callback_hook_outputs.py b/tests/callbacks/test_callback_hook_outputs.py
index 36322482c5eba..eac95e9bf18c6 100644
--- a/tests/callbacks/test_callback_hook_outputs.py
+++ b/tests/callbacks/test_callback_hook_outputs.py
@@ -70,7 +70,7 @@ def test_free_memory_on_eval_outputs(tmpdir):
     class CB(Callback):
 
         def on_epoch_end(self, trainer, pl_module):
-            assert len(trainer.evaluation_loop.outputs) == 0
+            assert len(trainer._evaluation_loop.outputs) == 0
 
     model = BoringModel()
 
diff --git a/tests/callbacks/test_callbacks.py b/tests/callbacks/test_callbacks.py
index a22e72ce09184..57fdd1bf66322 100644
--- a/tests/callbacks/test_callbacks.py
+++ b/tests/callbacks/test_callbacks.py
@@ -11,168 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from unittest import mock
-from unittest.mock import ANY, call, MagicMock, Mock
+from unittest.mock import call, Mock
 
 from pytorch_lightning import Trainer
 from tests.helpers import BoringModel
 
 
-@mock.patch("torch.save")  # need to mock torch.save or we get pickle error
-def test_trainer_callback_hook_system_fit(_, tmpdir):
-    """Test the callback hook system for fit."""
-
-    model = BoringModel()
-    callback_mock = MagicMock()
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        callbacks=[callback_mock],
-        max_epochs=1,
-        limit_val_batches=1,
-        limit_train_batches=3,
-        progress_bar_refresh_rate=0,
-    )
-
-    # check that only the to calls exists
-    assert trainer.callbacks[0] == callback_mock
-    assert callback_mock.method_calls == [
-        call.on_init_start(trainer),
-        call.on_init_end(trainer),
-    ]
-
-    # fit model
-    trainer.fit(model)
-
-    assert callback_mock.method_calls == [
-        call.on_init_start(trainer),
-        call.on_init_end(trainer),
-        call.on_before_accelerator_backend_setup(trainer, model),
-        call.setup(trainer, model, 'fit'),
-        call.on_configure_sharded_model(trainer, model),
-        call.on_fit_start(trainer, model),
-        call.on_pretrain_routine_start(trainer, model),
-        call.on_pretrain_routine_end(trainer, model),
-        call.on_sanity_check_start(trainer, model),
-        call.on_validation_start(trainer, model),
-        call.on_epoch_start(trainer, model),
-        call.on_validation_epoch_start(trainer, model),
-        call.on_validation_batch_start(trainer, model, ANY, 0, 0),
-        call.on_validation_batch_end(trainer, model, ANY, ANY, 0, 0),
-        call.on_validation_epoch_end(trainer, model),
-        call.on_epoch_end(trainer, model),
-        call.on_validation_end(trainer, model),
-        call.on_sanity_check_end(trainer, model),
-        call.on_train_start(trainer, model),
-        call.on_epoch_start(trainer, model),
-        call.on_train_epoch_start(trainer, model),
-        call.on_batch_start(trainer, model),
-        call.on_train_batch_start(trainer, model, ANY, 0, 0),
-        call.on_before_zero_grad(trainer, model, trainer.optimizers[0]),
-        call.on_after_backward(trainer, model),
-        call.on_train_batch_end(trainer, model, ANY, ANY, 0, 0),
-        call.on_batch_end(trainer, model),
-        call.on_batch_start(trainer, model),
-        call.on_train_batch_start(trainer, model, ANY, 1, 0),
-        call.on_before_zero_grad(trainer, model, trainer.optimizers[0]),
-        call.on_after_backward(trainer, model),
-        call.on_train_batch_end(trainer, model, ANY, ANY, 1, 0),
-        call.on_batch_end(trainer, model),
-        call.on_batch_start(trainer, model),
-        call.on_train_batch_start(trainer, model, ANY, 2, 0),
-        call.on_before_zero_grad(trainer, model, trainer.optimizers[0]),
-        call.on_after_backward(trainer, model),
-        call.on_train_batch_end(trainer, model, ANY, ANY, 2, 0),
-        call.on_batch_end(trainer, model),
-        call.on_validation_start(trainer, model),
-        call.on_epoch_start(trainer, model),
-        call.on_validation_epoch_start(trainer, model),
-        call.on_validation_batch_start(trainer, model, ANY, 0, 0),
-        call.on_validation_batch_end(trainer, model, ANY, ANY, 0, 0),
-        call.on_validation_epoch_end(trainer, model),
-        call.on_epoch_end(trainer, model),
-        call.on_validation_end(trainer, model),
-        call.on_save_checkpoint(trainer, model),  # should take ANY but we are inspecting signature for BC
-        call.on_train_epoch_end(trainer, model, ANY),
-        call.on_epoch_end(trainer, model),
-        call.on_train_end(trainer, model),
-        call.on_fit_end(trainer, model),
-        call.teardown(trainer, model, 'fit'),
-    ]
-
-
-def test_trainer_callback_hook_system_test(tmpdir):
-    """Test the callback hook system for test."""
-
-    model = BoringModel()
-    callback_mock = MagicMock()
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        callbacks=[callback_mock],
-        max_epochs=1,
-        limit_test_batches=2,
-        progress_bar_refresh_rate=0,
-    )
-
-    trainer.test(model)
-
-    assert callback_mock.method_calls == [
-        call.on_init_start(trainer),
-        call.on_init_end(trainer),
-        call.on_before_accelerator_backend_setup(trainer, model),
-        call.setup(trainer, model, 'test'),
-        call.on_configure_sharded_model(trainer, model),
-        call.on_test_start(trainer, model),
-        call.on_epoch_start(trainer, model),
-        call.on_test_epoch_start(trainer, model),
-        call.on_test_batch_start(trainer, model, ANY, 0, 0),
-        call.on_test_batch_end(trainer, model, ANY, ANY, 0, 0),
-        call.on_test_batch_start(trainer, model, ANY, 1, 0),
-        call.on_test_batch_end(trainer, model, ANY, ANY, 1, 0),
-        call.on_test_epoch_end(trainer, model),
-        call.on_epoch_end(trainer, model),
-        call.on_test_end(trainer, model),
-        call.teardown(trainer, model, 'test'),
-    ]
-
-
-def test_trainer_callback_hook_system_validate(tmpdir):
-    """Test the callback hook system for validate."""
-
-    model = BoringModel()
-    callback_mock = MagicMock()
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        callbacks=[callback_mock],
-        max_epochs=1,
-        limit_val_batches=2,
-        progress_bar_refresh_rate=0,
-    )
-
-    trainer.validate(model)
-
-    assert callback_mock.method_calls == [
-        call.on_init_start(trainer),
-        call.on_init_end(trainer),
-        call.on_before_accelerator_backend_setup(trainer, model),
-        call.setup(trainer, model, 'validate'),
-        call.on_configure_sharded_model(trainer, model),
-        call.on_validation_start(trainer, model),
-        call.on_epoch_start(trainer, model),
-        call.on_validation_epoch_start(trainer, model),
-        call.on_validation_batch_start(trainer, model, ANY, 0, 0),
-        call.on_validation_batch_end(trainer, model, ANY, ANY, 0, 0),
-        call.on_validation_batch_start(trainer, model, ANY, 1, 0),
-        call.on_validation_batch_end(trainer, model, ANY, ANY, 1, 0),
-        call.on_validation_epoch_end(trainer, model),
-        call.on_epoch_end(trainer, model),
-        call.on_validation_end(trainer, model),
-        call.teardown(trainer, model, 'validate'),
-    ]
-
-
-# TODO: add callback tests for predict and tune
-
-
 def test_callbacks_configured_in_model(tmpdir):
     """ Test the callback system with callbacks added through the model hook. """
 
diff --git a/tests/callbacks/test_early_stopping.py b/tests/callbacks/test_early_stopping.py
index 7d303e6ed00d6..d7a6f15459912 100644
--- a/tests/callbacks/test_early_stopping.py
+++ b/tests/callbacks/test_early_stopping.py
@@ -86,7 +86,7 @@ def test_resume_early_stopping_from_checkpoint(tmpdir):
         callbacks=[early_stop_callback],
     )
 
-    with pytest.raises(MisconfigurationException, match=r'.*you restored a checkpoint with current_epoch*'):
+    with pytest.raises(MisconfigurationException, match=r'You restored a checkpoint with current_epoch'):
         new_trainer.fit(model)
 
 
@@ -123,7 +123,7 @@ def test_early_stopping_patience(tmpdir, loss_values: list, patience: int, expec
     """Test to ensure that early stopping is not triggered before patience is exhausted."""
 
     class ModelOverrideValidationReturn(BoringModel):
-        validation_return_values = torch.Tensor(loss_values)
+        validation_return_values = torch.tensor(loss_values)
 
         def validation_epoch_end(self, outputs):
             loss = self.validation_return_values[self.current_epoch]
@@ -137,6 +137,7 @@ def validation_epoch_end(self, outputs):
         val_check_interval=1.0,
         num_sanity_val_steps=0,
         max_epochs=10,
+        progress_bar_refresh_rate=0,
     )
     trainer.fit(model)
     assert trainer.current_epoch == expected_stop_epoch
@@ -176,6 +177,7 @@ def training_epoch_end(self, outputs):
         callbacks=[early_stop_callback],
         num_sanity_val_steps=0,
         max_epochs=10,
+        progress_bar_refresh_rate=0,
     )
     trainer.fit(model)
     assert trainer.current_epoch == expected_stop_epoch
diff --git a/tests/callbacks/test_finetuning_callback.py b/tests/callbacks/test_finetuning_callback.py
index 53d34c4645bef..7492bcac7804a 100644
--- a/tests/callbacks/test_finetuning_callback.py
+++ b/tests/callbacks/test_finetuning_callback.py
@@ -27,7 +27,8 @@
 
 class TestBackboneFinetuningCallback(BackboneFinetuning):
 
-    def on_train_epoch_end(self, trainer, pl_module):
+    def on_train_epoch_start(self, trainer, pl_module):
+        super().on_train_epoch_start(trainer, pl_module)
         epoch = trainer.current_epoch
         if self.unfreeze_backbone_at_epoch <= epoch:
             optimizer = trainer.optimizers[0]
@@ -275,7 +276,7 @@ def configure_optimizers(self):
     model = FreezeModel()
     cb = OnEpochLayerFinetuning()
     trainer = Trainer(max_epochs=10, resume_from_checkpoint=chk.last_model_path, callbacks=[cb])
-    with pytest.raises(IndexError, match="index 6 is out of range"):
+    with pytest.raises(ValueError, match="loaded state dict has a different number of parameter groups"):
         trainer.fit(model)
 
 
@@ -307,7 +308,11 @@ def configure_optimizers(self):
     trainer.fit(model)
 
 
-def test_deep_nested_model():
+def test_complex_nested_model():
+    """
+    Test flattening, freezing, and thawing of models which contain parent (non-leaf) modules with parameters
+    directly themselves rather than exclusively their submodules containing parameters.
+    """
 
     class ConvBlock(nn.Module):
 
@@ -322,23 +327,41 @@ def forward(self, x):
             x = self.act(x)
             return self.bn(x)
 
+    class ConvBlockParam(nn.Module):
+
+        def __init__(self, in_channels, out_channels):
+            super().__init__()
+            self.module_dict = nn.ModuleDict({
+                "conv": nn.Conv2d(in_channels, out_channels, 3),
+                "act": nn.ReLU(),
+            })
+            # add trivial test parameter to convblock to validate parent (non-leaf) module parameter handling
+            self.parent_param = nn.Parameter(torch.zeros((1), dtype=torch.float))
+            self.bn = nn.BatchNorm2d(out_channels)
+
+        def forward(self, x):
+            x = self.module_dict["conv"](x)
+            x = self.module_dict["act"](x)
+            return self.bn(x)
+
     model = nn.Sequential(
         OrderedDict([
-            ("encoder", nn.Sequential(ConvBlock(3, 64), ConvBlock(64, 128))),
+            ("encoder", nn.Sequential(ConvBlockParam(3, 64), ConvBlock(64, 128))),
             ("decoder", ConvBlock(128, 10)),
         ])
     )
 
-    # There's 9 leaf layers in that model
-    assert len(BaseFinetuning.flatten_modules(model)) == 9
+    # There are 10 leaf modules or parent modules w/ parameters in the test model
+    assert len(BaseFinetuning.flatten_modules(model)) == 10
 
     BaseFinetuning.freeze(model.encoder, train_bn=True)
-    assert not model.encoder[0].conv.weight.requires_grad
+    assert not model.encoder[0].module_dict["conv"].weight.requires_grad  # Validate a leaf module parameter is frozen
+    assert not model.encoder[0].parent_param.requires_grad  # Validate the parent module parameter is frozen
     assert model.encoder[0].bn.weight.requires_grad
 
     BaseFinetuning.make_trainable(model)
     encoder_params = list(BaseFinetuning.filter_params(model.encoder, train_bn=True))
-    # The 8 parameters of the encoder are:
-    # conv0.weight, conv0.bias, bn0.weight, bn0.bias
+    # The 9 parameters of the encoder are:
+    # conv0.weight, conv0.bias, bn0.weight, bn0.bias, parent_param
     # conv1.weight, conv1.bias, bn1.weight, bn1.bias
-    assert len(encoder_params) == 8
+    assert len(encoder_params) == 9
diff --git a/tests/callbacks/test_lambda_function.py b/tests/callbacks/test_lambda_function.py
index 8d9f85fa56e8a..845846dfd1cfc 100644
--- a/tests/callbacks/test_lambda_function.py
+++ b/tests/callbacks/test_lambda_function.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import inspect
+from functools import partial
 
 from pytorch_lightning import seed_everything, Trainer
 from pytorch_lightning.callbacks import Callback, LambdaCallback
@@ -28,9 +29,13 @@ def on_train_epoch_start(self):
                 raise KeyboardInterrupt
 
     checker = set()
-    hooks = [m for m, _ in inspect.getmembers(Callback, predicate=inspect.isfunction)]
-    hooks_args = {h: (lambda x: lambda *_: checker.add(x))(h) for h in hooks}
-    hooks_args["on_save_checkpoint"] = (lambda x: lambda *_: [checker.add(x)])("on_save_checkpoint")
+
+    def call(hook, *_, **__):
+        checker.add(hook)
+
+    hooks = {m for m, _ in inspect.getmembers(Callback, predicate=inspect.isfunction)}
+    hooks_args = {h: partial(call, h) for h in hooks}
+    hooks_args["on_save_checkpoint"] = lambda *_: [checker.add('on_save_checkpoint')]
 
     model = CustomModel()
 
@@ -59,4 +64,4 @@ def on_train_epoch_start(self):
     trainer.test(model)
     trainer.predict(model)
 
-    assert checker == set(hooks)
+    assert checker == hooks
diff --git a/tests/callbacks/test_lr_monitor.py b/tests/callbacks/test_lr_monitor.py
index bea6c45e95ced..7956b756dcb3c 100644
--- a/tests/callbacks/test_lr_monitor.py
+++ b/tests/callbacks/test_lr_monitor.py
@@ -12,11 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pytest
+import torch
 from torch import optim
 
 import tests.helpers.utils as tutils
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import LearningRateMonitor
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.callbacks.finetuning import BackboneFinetuning
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel
 from tests.helpers.datamodules import ClassifDataModule
@@ -278,3 +281,173 @@ def configure_optimizers(self):
     )
     trainer.fit(TestModel())
     assert lr_monitor.lr_sch_names == list(lr_monitor.lrs.keys()) == ['my_logging_name']
+
+
+def test_lr_monitor_custom_pg_name(tmpdir):
+
+    class TestModel(BoringModel):
+
+        def configure_optimizers(self):
+            optimizer = torch.optim.SGD([{'params': list(self.layer.parameters()), 'name': 'linear'}], lr=0.1)
+            lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
+            return [optimizer], [lr_scheduler]
+
+    lr_monitor = LearningRateMonitor()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=2,
+        limit_val_batches=2,
+        limit_train_batches=2,
+        callbacks=[lr_monitor],
+        progress_bar_refresh_rate=0,
+        weights_summary=None,
+    )
+    trainer.fit(TestModel())
+    assert lr_monitor.lr_sch_names == ['lr-SGD']
+    assert list(lr_monitor.lrs) == ['lr-SGD/linear']
+
+
+def test_lr_monitor_duplicate_custom_pg_names(tmpdir):
+    tutils.reset_seed()
+
+    class TestModel(BoringModel):
+
+        def __init__(self):
+            super().__init__()
+            self.linear_a = torch.nn.Linear(32, 16)
+            self.linear_b = torch.nn.Linear(16, 2)
+
+        def forward(self, x):
+            x = self.linear_a(x)
+            x = self.linear_b(x)
+            return x
+
+        def configure_optimizers(self):
+            param_groups = [
+                {
+                    'params': list(self.linear_a.parameters()),
+                    'name': 'linear'
+                },
+                {
+                    'params': list(self.linear_b.parameters()),
+                    'name': 'linear'
+                },
+            ]
+            optimizer = torch.optim.SGD(param_groups, lr=0.1)
+            lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
+            return [optimizer], [lr_scheduler]
+
+    lr_monitor = LearningRateMonitor()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=2,
+        limit_val_batches=2,
+        limit_train_batches=2,
+        callbacks=[lr_monitor],
+        progress_bar_refresh_rate=0,
+        weights_summary=None,
+    )
+
+    with pytest.raises(
+        MisconfigurationException, match='A single `Optimizer` cannot have multiple parameter groups with identical'
+    ):
+        trainer.fit(TestModel())
+
+
+def test_multiple_optimizers_basefinetuning(tmpdir):
+
+    class TestModel(BoringModel):
+
+        def __init__(self):
+            super().__init__()
+            self.backbone = torch.nn.Sequential(
+                torch.nn.Linear(32, 32),
+                torch.nn.Linear(32, 32),
+                torch.nn.Linear(32, 32),
+                torch.nn.ReLU(True),
+            )
+            self.layer = torch.nn.Linear(32, 2)
+
+        def training_step(self, batch, batch_idx, optimizer_idx):
+            return super().training_step(batch, batch_idx)
+
+        def forward(self, x):
+            return self.layer(self.backbone(x))
+
+        def configure_optimizers(self):
+            parameters = list(filter(lambda p: p.requires_grad, self.parameters()))
+            opt = optim.Adam(parameters, lr=0.1)
+            opt_2 = optim.Adam(parameters, lr=0.1)
+            opt_3 = optim.Adam(parameters, lr=0.1)
+            optimizers = [opt, opt_2, opt_3]
+            schedulers = [
+                optim.lr_scheduler.StepLR(opt, step_size=1, gamma=0.5),
+                optim.lr_scheduler.StepLR(opt_2, step_size=1, gamma=0.5),
+            ]
+            return optimizers, schedulers
+
+    class Check(Callback):
+
+        def on_train_epoch_start(self, trainer, pl_module) -> None:
+            num_param_groups = sum([len(opt.param_groups) for opt in trainer.optimizers])
+            assert lr_monitor.lr_sch_names == ['lr-Adam', 'lr-Adam-1']
+            if trainer.current_epoch == 0:
+                assert num_param_groups == 3
+            elif trainer.current_epoch == 1:
+                assert num_param_groups == 4
+                assert list(lr_monitor.lrs) == ['lr-Adam-1', 'lr-Adam/pg1', 'lr-Adam/pg2']
+            elif trainer.current_epoch == 2:
+                assert num_param_groups == 5
+                assert list(lr_monitor.lrs) == ['lr-Adam/pg1', 'lr-Adam/pg2', 'lr-Adam-1/pg1', 'lr-Adam-1/pg2']
+            else:
+                expected = ['lr-Adam/pg1', 'lr-Adam/pg2', 'lr-Adam-1/pg1', 'lr-Adam-1/pg2', 'lr-Adam-1/pg3']
+                assert list(lr_monitor.lrs) == expected
+
+    class TestFinetuning(BackboneFinetuning):
+
+        def freeze_before_training(self, pl_module):
+            self.freeze(pl_module.backbone[0])
+            self.freeze(pl_module.backbone[1])
+            self.freeze(pl_module.layer)
+
+        def finetune_function(self, pl_module, epoch: int, optimizer, opt_idx: int):
+            """Called when the epoch begins."""
+            if epoch == 1 and opt_idx == 0:
+                self.unfreeze_and_add_param_group(pl_module.backbone[0], optimizer, lr=0.1)
+            if epoch == 2 and opt_idx == 1:
+                self.unfreeze_and_add_param_group(pl_module.layer, optimizer, lr=0.1)
+
+            if epoch == 3 and opt_idx == 1:
+                assert len(optimizer.param_groups) == 2
+                self.unfreeze_and_add_param_group(pl_module.backbone[1], optimizer, lr=0.1)
+                assert len(optimizer.param_groups) == 3
+
+    lr_monitor = LearningRateMonitor()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=5,
+        limit_val_batches=0,
+        limit_train_batches=2,
+        callbacks=[TestFinetuning(), lr_monitor, Check()],
+        progress_bar_refresh_rate=0,
+        weights_summary=None,
+        checkpoint_callback=False
+    )
+    model = TestModel()
+    model.training_epoch_end = None
+    trainer.fit(model)
+
+    expected = [0.1, 0.05, 0.025, 0.0125, 0.00625]
+    assert lr_monitor.lrs['lr-Adam/pg1'] == expected
+
+    expected = [0.1, 0.05, 0.025, 0.0125]
+    assert lr_monitor.lrs['lr-Adam/pg2'] == expected
+
+    expected = [0.1, 0.05, 0.025, 0.0125, 0.00625]
+    assert lr_monitor.lrs['lr-Adam-1/pg1'] == expected
+
+    expected = [0.1, 0.05, 0.025]
+    assert lr_monitor.lrs['lr-Adam-1/pg2'] == expected
+
+    expected = [0.1, 0.05]
+    assert lr_monitor.lrs['lr-Adam-1/pg3'] == expected
diff --git a/tests/callbacks/test_progress_bar.py b/tests/callbacks/test_progress_bar.py
index f4f8f34c1b4c1..aafb29d51b161 100644
--- a/tests/callbacks/test_progress_bar.py
+++ b/tests/callbacks/test_progress_bar.py
@@ -20,12 +20,14 @@
 
 import pytest
 import torch
+from torch.utils.data.dataloader import DataLoader
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import ModelCheckpoint, ProgressBar, ProgressBarBase
 from pytorch_lightning.callbacks.progress import tqdm
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from tests.helpers import BoringModel
+from tests.helpers.boring_model import BoringModel, RandomDataset
+from tests.helpers.runif import RunIf
 
 
 @pytest.mark.parametrize(
@@ -192,11 +194,11 @@ class CurrentProgressBar(ProgressBar):
 
         def on_train_batch_start(self, trainer, pl_module, batch, batch_idx, dataloader_idx):
             super().on_train_batch_start(trainer, pl_module, batch, batch_idx, dataloader_idx)
-            assert self.train_batch_idx == trainer.train_loop.batch_idx
+            assert self.train_batch_idx == trainer.fit_loop.batch_idx
 
         def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
             super().on_train_batch_end(trainer, pl_module, outputs, batch, batch_idx, dataloader_idx)
-            assert self.train_batch_idx == trainer.train_loop.batch_idx + 1
+            assert self.train_batch_idx == trainer.fit_loop.batch_idx + 1
             if not self.is_disabled and self.train_batch_idx % self.refresh_rate == 0:
                 assert self.main_progress_bar.n == self.train_batch_idx
             self.train_batches_seen += 1
@@ -350,7 +352,8 @@ def test_main_progress_bar_update_amount(
         checkpoint_callback=False,
     )
     trainer.fit(model)
-    progress_bar.main_progress_bar.update.assert_has_calls([call(delta) for delta in train_deltas])
+    if train_batches > 0:
+        progress_bar.main_progress_bar.update.assert_has_calls([call(delta) for delta in train_deltas])
     if val_batches > 0:
         progress_bar.val_progress_bar.update.assert_has_calls([call(delta) for delta in val_deltas])
 
@@ -384,8 +387,9 @@ def test_tensor_to_float_conversion(tmpdir):
     class TestModel(BoringModel):
 
         def training_step(self, batch, batch_idx):
-            self.log('foo', torch.tensor(0.123), prog_bar=True)
-            self.log('bar', {"baz": torch.tensor([1])}, prog_bar=True)
+            self.log('a', torch.tensor(0.123), prog_bar=True, on_epoch=False)
+            self.log('b', {"b1": torch.tensor([1])}, prog_bar=True, on_epoch=False)
+            self.log('c', {"c1": 2}, prog_bar=True, on_epoch=False)
             return super().training_step(batch, batch_idx)
 
     trainer = Trainer(
@@ -397,9 +401,12 @@ def training_step(self, batch, batch_idx):
     )
     trainer.fit(TestModel())
 
+    torch.testing.assert_allclose(trainer.progress_bar_metrics['a'], 0.123)
+    assert trainer.progress_bar_metrics['b'] == {'b1': 1.0}
+    assert trainer.progress_bar_metrics['c'] == {'c1': 2.0}
     pbar = trainer.progress_bar_callback.main_progress_bar
     actual = str(pbar.postfix)
-    assert actual.endswith("foo=0.123, bar={'baz': tensor([1])}")
+    assert actual.endswith("a=0.123, b={'b1': 1.0}, c={'c1': 2.0}"), actual
 
 
 @pytest.mark.parametrize(
@@ -533,3 +540,58 @@ def test_progress_bar_can_be_pickled():
     pickle.dumps(bar)
     trainer.predict(model)
     pickle.dumps(bar)
+
+
+@RunIf(min_gpus=2, special=True)
+def test_progress_bar_max_val_check_interval_0(tmpdir):
+    _test_progress_bar_max_val_check_interval(
+        tmpdir,
+        total_train_samples=8,
+        train_batch_size=4,
+        total_val_samples=2,
+        val_batch_size=1,
+        val_check_interval=0.2
+    )
+
+
+@RunIf(min_gpus=2, special=True)
+def test_progress_bar_max_val_check_interval_1(tmpdir):
+    _test_progress_bar_max_val_check_interval(
+        tmpdir,
+        total_train_samples=8,
+        train_batch_size=4,
+        total_val_samples=2,
+        val_batch_size=1,
+        val_check_interval=0.5
+    )
+
+
+def _test_progress_bar_max_val_check_interval(
+    tmpdir, total_train_samples, train_batch_size, total_val_samples, val_batch_size, val_check_interval
+):
+    world_size = 2
+    train_data = DataLoader(RandomDataset(32, total_train_samples), batch_size=train_batch_size)
+    val_data = DataLoader(RandomDataset(32, total_val_samples), batch_size=val_batch_size)
+
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        num_sanity_val_steps=0,
+        max_epochs=1,
+        weights_summary=None,
+        val_check_interval=val_check_interval,
+        gpus=world_size,
+        accelerator="ddp",
+    )
+    trainer.fit(model, train_dataloader=train_data, val_dataloaders=val_data)
+
+    total_train_batches = total_train_samples // (train_batch_size * world_size)
+    val_check_batch = max(1, int(total_train_batches * val_check_interval))
+    assert trainer.val_check_batch == val_check_batch
+    val_checks_per_epoch = total_train_batches / val_check_batch
+    total_val_batches = total_val_samples // (val_batch_size * world_size)
+    assert trainer.progress_bar_callback.total_train_batches == total_train_batches
+    assert trainer.progress_bar_callback.total_val_batches == total_val_batches
+    total_val_batches = total_val_batches * val_checks_per_epoch
+    if trainer.is_global_zero:
+        assert trainer.progress_bar_callback.main_progress_bar.total == total_train_batches + total_val_batches
diff --git a/tests/callbacks/test_pruning.py b/tests/callbacks/test_pruning.py
index d4957905454d8..1a5ddad64106e 100644
--- a/tests/callbacks/test_pruning.py
+++ b/tests/callbacks/test_pruning.py
@@ -161,14 +161,45 @@ def test_pruning_callback(
     )
 
 
-@RunIf(special=True)
-@pytest.mark.parametrize("parameters_to_prune", [False, True])
-@pytest.mark.parametrize("use_global_unstructured", [False, True])
-def test_pruning_callback_ddp(tmpdir, use_global_unstructured: bool, parameters_to_prune: bool):
+@RunIf(special=True, min_gpus=2)
+def test_pruning_callback_ddp_0(tmpdir):
     train_with_pruning_callback(
         tmpdir,
-        parameters_to_prune=parameters_to_prune,
-        use_global_unstructured=use_global_unstructured,
+        parameters_to_prune=False,
+        use_global_unstructured=False,
+        accelerator="ddp",
+        gpus=2,
+    )
+
+
+@RunIf(special=True, min_gpus=2)
+def test_pruning_callback_ddp_1(tmpdir):
+    train_with_pruning_callback(
+        tmpdir,
+        parameters_to_prune=False,
+        use_global_unstructured=True,
+        accelerator="ddp",
+        gpus=2,
+    )
+
+
+@RunIf(special=True, min_gpus=2)
+def test_pruning_callback_ddp_2(tmpdir):
+    train_with_pruning_callback(
+        tmpdir,
+        parameters_to_prune=True,
+        use_global_unstructured=False,
+        accelerator="ddp",
+        gpus=2,
+    )
+
+
+@RunIf(special=True, min_gpus=2)
+def test_pruning_callback_ddp_3(tmpdir):
+    train_with_pruning_callback(
+        tmpdir,
+        parameters_to_prune=True,
+        use_global_unstructured=True,
         accelerator="ddp",
         gpus=2,
     )
diff --git a/tests/callbacks/test_stochastic_weight_avg.py b/tests/callbacks/test_stochastic_weight_avg.py
index 81efc12b34662..8518fe16f0359 100644
--- a/tests/callbacks/test_stochastic_weight_avg.py
+++ b/tests/callbacks/test_stochastic_weight_avg.py
@@ -23,7 +23,7 @@
 from pytorch_lightning import Trainer
 from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_6
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from tests.helpers import BoringModel, RandomDataset
+from tests.helpers.boring_model import BoringModel, RandomDataset, RandomIterableDataset
 from tests.helpers.runif import RunIf
 
 if _TORCH_GREATER_EQUAL_1_6:
@@ -33,7 +33,7 @@
 
     class SwaTestModel(BoringModel):
 
-        def __init__(self, batchnorm: bool = True, interval: str = "epoch"):
+        def __init__(self, batchnorm: bool = True, interval: str = "epoch", iterable_dataset: bool = False):
             super().__init__()
             layers = [nn.Linear(32, 32)]
             if batchnorm:
@@ -41,6 +41,7 @@ def __init__(self, batchnorm: bool = True, interval: str = "epoch"):
             layers += [nn.ReLU(), nn.Linear(32, 2)]
             self.layer = nn.Sequential(*layers)
             self.interval = interval
+            self.iterable_dataset = iterable_dataset
 
         def training_step(self, batch, batch_idx):
             output = self.forward(batch)
@@ -48,7 +49,11 @@ def training_step(self, batch, batch_idx):
             return {"loss": loss}
 
         def train_dataloader(self):
-            return DataLoader(RandomDataset(32, 64), batch_size=2)
+
+            dset_cls = RandomIterableDataset if self.iterable_dataset else RandomDataset
+            dset = dset_cls(32, 64)
+
+            return DataLoader(dset, batch_size=2)
 
         def configure_optimizers(self):
             optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
@@ -74,7 +79,7 @@ def transfer_weights(self, *args, **kwargs):
 
         def on_train_epoch_start(self, trainer, *args):
             super().on_train_epoch_start(trainer, *args)
-            assert trainer.train_loop._skip_backward == (trainer.current_epoch > self.swa_end)
+            assert trainer.fit_loop._skip_backward == (trainer.current_epoch > self.swa_end)
             if self.swa_start <= trainer.current_epoch:
                 assert isinstance(trainer.lr_schedulers[0]["scheduler"], SWALR)
                 assert trainer.lr_schedulers[0]["interval"] == "epoch"
@@ -92,7 +97,7 @@ def on_train_end(self, trainer, pl_module):
             super().on_train_end(trainer, pl_module)
 
             # make sure these are correctly set again
-            assert not trainer.train_loop._skip_backward
+            assert not trainer.fit_loop._skip_backward
             assert trainer.accumulate_grad_batches == 2
             assert trainer.num_training_batches == 5
 
@@ -107,8 +112,10 @@ def on_train_end(self, trainer, pl_module):
 
 
 @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-def train_with_swa(tmpdir, batchnorm=True, accelerator=None, gpus=None, num_processes=1, interval="epoch"):
-    model = SwaTestModel(batchnorm=batchnorm, interval=interval)
+def train_with_swa(
+    tmpdir, batchnorm=True, accelerator=None, gpus=None, num_processes=1, interval="epoch", iterable_dataset=False
+):
+    model = SwaTestModel(batchnorm=batchnorm, interval=interval, iterable_dataset=iterable_dataset)
     swa_start = 2
     max_epochs = 5
     swa_callback = SwaTestCallback(swa_epoch_start=swa_start, swa_lrs=0.1)
@@ -155,8 +162,9 @@ def test_swa_callback_1_gpu(tmpdir):
 
 @RunIf(min_torch="1.6.0")
 @pytest.mark.parametrize("batchnorm", (True, False))
-def test_swa_callback(tmpdir, batchnorm: bool):
-    train_with_swa(tmpdir, batchnorm=batchnorm)
+@pytest.mark.parametrize('iterable_dataset', (True, False))
+def test_swa_callback(tmpdir, batchnorm: bool, iterable_dataset: bool):
+    train_with_swa(tmpdir, batchnorm=batchnorm, iterable_dataset=iterable_dataset)
 
 
 @RunIf(min_torch="1.6.0")
diff --git a/tests/callbacks/test_timer.py b/tests/callbacks/test_timer.py
index c27eebbeb7805..16e01a6adcaf4 100644
--- a/tests/callbacks/test_timer.py
+++ b/tests/callbacks/test_timer.py
@@ -95,7 +95,7 @@ def test_timer_time_remaining(time_mock):
     assert round(timer.time_elapsed()) == 3
 
 
-def test_timer_stops_training(tmpdir):
+def test_timer_stops_training(tmpdir, caplog):
     """ Test that the timer stops training before reaching max_epochs """
     model = BoringModel()
     duration = timedelta(milliseconds=100)
@@ -106,9 +106,12 @@ def test_timer_stops_training(tmpdir):
         max_epochs=1000,
         callbacks=[timer],
     )
-    trainer.fit(model)
+    with caplog.at_level(logging.INFO):
+        trainer.fit(model)
     assert trainer.global_step > 1
     assert trainer.current_epoch < 999
+    assert "Time limit reached." in caplog.text
+    assert "Signaling Trainer to stop." in caplog.text
 
 
 @pytest.mark.parametrize("interval", ["step", "epoch"])
diff --git a/tests/checkpointing/test_checkpoint_callback_frequency.py b/tests/checkpointing/test_checkpoint_callback_frequency.py
index 9fdd69dba7a9a..8617a9f8f7050 100644
--- a/tests/checkpointing/test_checkpoint_callback_frequency.py
+++ b/tests/checkpointing/test_checkpoint_callback_frequency.py
@@ -107,8 +107,17 @@ def training_step(self, batch, batch_idx):
 
 @mock.patch('torch.save')
 @RunIf(special=True, min_gpus=2)
-@pytest.mark.parametrize(['k', 'epochs', 'val_check_interval', 'expected'], [(1, 1, 1.0, 1), (2, 2, 0.3, 5)])
-def test_top_k_ddp(save_mock, tmpdir, k, epochs, val_check_interval, expected):
+def test_top_k_ddp_0(save_mock, tmpdir):
+    _top_k_ddp(save_mock, tmpdir, k=1, epochs=1, val_check_interval=1.0, expected=1)
+
+
+@mock.patch('torch.save')
+@RunIf(special=True, min_gpus=2)
+def test_top_k_ddp_1(save_mock, tmpdir):
+    _top_k_ddp(save_mock, tmpdir, k=2, epochs=2, val_check_interval=0.3, expected=5)
+
+
+def _top_k_ddp(save_mock, tmpdir, k, epochs, val_check_interval, expected):
 
     class TestModel(BoringModel):
 
@@ -120,7 +129,7 @@ def training_step(self, batch, batch_idx):
         def training_epoch_end(self, outputs) -> None:
             local_rank = int(os.getenv("LOCAL_RANK"))
             if self.trainer.is_global_zero:
-                self.log('my_loss_2', (1 + local_rank), on_epoch=True)
+                self.log('my_loss_2', (1 + local_rank), on_epoch=True, rank_zero_only=True)
             data = str(self.global_rank)
             obj = [[data], (data, ), set(data)]
             out = self.trainer.training_type_plugin.broadcast(obj)
diff --git a/tests/checkpointing/test_legacy_checkpoints.py b/tests/checkpointing/test_legacy_checkpoints.py
index fbcb700e4a3d2..13ae7300375e8 100644
--- a/tests/checkpointing/test_legacy_checkpoints.py
+++ b/tests/checkpointing/test_legacy_checkpoints.py
@@ -65,6 +65,12 @@
         "1.3.0",
         "1.3.1",
         "1.3.2",
+        "1.3.3",
+        "1.3.4",
+        "1.3.5",
+        "1.3.6",
+        "1.3.7",
+        "1.3.8",
     ]
 )
 def test_resume_legacy_checkpoints(tmpdir, pl_version: str):
diff --git a/tests/checkpointing/test_model_checkpoint.py b/tests/checkpointing/test_model_checkpoint.py
index 2f867d4e998b4..82432cfc7c601 100644
--- a/tests/checkpointing/test_model_checkpoint.py
+++ b/tests/checkpointing/test_model_checkpoint.py
@@ -83,6 +83,7 @@ def __init__(self):
             super().__init__()
             self.train_log_epochs = torch.randn(max_epochs, limit_train_batches)
             self.val_logs = torch.randn(max_epochs, limit_val_batches)
+            self.scores = []
 
         def training_step(self, batch, batch_idx):
             log_value = self.train_log_epochs[self.current_epoch, batch_idx]
@@ -109,6 +110,14 @@ def configure_optimizers(self):
 
             return [optimizer], [lr_scheduler]
 
+        def on_train_epoch_end(self):
+            if 'train' in monitor:
+                self.scores.append(self.trainer.logged_metrics[monitor])
+
+        def on_validation_epoch_end(self):
+            if not self.trainer.sanity_checking and 'val' in monitor:
+                self.scores.append(self.trainer.logged_metrics[monitor])
+
     filename = '{' + f'{monitor}' + ':.4f}-{epoch}'
     checkpoint = ModelCheckpoint(dirpath=tmpdir, filename=filename, monitor=monitor, save_top_k=-1)
 
@@ -131,13 +140,12 @@ def configure_optimizers(self):
     assert trainer.state.finished, f"Training failed with {trainer.state}"
 
     ckpt_files = list(Path(tmpdir).glob('*.ckpt'))
-    scores = [metric[monitor] for metric in trainer.dev_debugger.logged_metrics if monitor in metric]
     lr_scheduler_debug = trainer.dev_debugger.saved_lr_scheduler_updates
-    assert len(ckpt_files) == len(scores) == max_epochs
+    assert len(ckpt_files) == len(model.scores) == max_epochs
     assert len(lr_scheduler_debug) == max_epochs
 
     for epoch in range(max_epochs):
-        score = scores[epoch]
+        score = model.scores[epoch]
         expected_score = getattr(model, f'{monitor}s')[epoch].mean().item()
         expected_filename = f'{monitor}={score:.4f}-epoch={epoch}.ckpt'
         assert math.isclose(score, expected_score, rel_tol=1e-4)
@@ -154,10 +162,9 @@ def configure_optimizers(self):
         if not reduce_lr_on_plateau:
             actual_step_count = chk['lr_schedulers'][0]['_step_count']
             actual_lr = chk['lr_schedulers'][0]['_last_lr'][0]
-            # if validation_step_none, the checkpoint gets saved after the learning rate update
-            # so we need to increase the count by one
-            assert actual_step_count == epoch + 1 + validation_step_none
-            assert actual_lr == lr * gamma**(epoch + validation_step_none)
+            # checkpoint is saved after updating lr_scheduler states
+            assert actual_step_count == epoch + 2  # step_count starts at 1
+            assert actual_lr == lr * gamma**(epoch + 1)
 
         assert lr_scheduler_debug[epoch]['monitor_val'] == (score if reduce_lr_on_plateau else None)
         assert lr_scheduler_debug[epoch]['monitor_key'] == (monitor if reduce_lr_on_plateau else None)
@@ -193,6 +200,7 @@ def __init__(self):
             super().__init__()
             self.val_logs = torch.randn(per_epoch_val_checks * max_epochs, limit_val_batches)
             self.val_loop_count = 0
+            self.scores = []
 
         def validation_step(self, batch, batch_idx):
             log_value = self.val_logs[self.val_loop_count, batch_idx]
@@ -202,6 +210,7 @@ def validation_step(self, batch, batch_idx):
         def validation_epoch_end(self, outputs):
             self.val_loop_count += 1
             super().validation_epoch_end(outputs)
+            self.scores.append(self.trainer.logged_metrics[monitor])
 
         def configure_optimizers(self):
             optimizer = optim.SGD(self.parameters(), lr=lr)
@@ -236,24 +245,21 @@ def configure_optimizers(self):
     assert trainer.state.finished, f"Training failed with {trainer.state}"
 
     ckpt_files = list(Path(tmpdir).glob('*.ckpt'))
-    scores = [metric[monitor] for metric in trainer.dev_debugger.logged_metrics if monitor in metric]
     lr_scheduler_debug = trainer.dev_debugger.saved_lr_scheduler_updates
 
-    # on_train_end ckpt callback is called which creates an additional ckpt in case no ckpt is created at the
-    # end of epoch, thus if val_check_interval doesn't align with the training steps we create an additional ckpt
-    additional_ckpt, additional_ckpt_path = False, None
-    if not epoch_aligned:
-        additional_ckpt_path = [f for f in ckpt_files if 'v1' in f.stem][0]
-        additional_ckpt = True
-
-    assert len(ckpt_files) == len(scores) + additional_ckpt == per_epoch_val_checks * max_epochs + additional_ckpt
+    assert len(ckpt_files) == len(model.scores) == per_epoch_val_checks * max_epochs
     assert len(lr_scheduler_debug) == max_epochs
 
     def _make_assertions(epoch, ix, version=''):
         global_ix = ix + per_epoch_val_checks * epoch
         duplicated = bool(version)
 
-        score = scores[global_ix]
+        # checkpoint saved at the end of training epoch will have updated lr_scheduler states
+        epoch_end_checkpoint = duplicated
+        if epoch_aligned:
+            epoch_end_checkpoint = ix == (per_epoch_val_checks - 1)
+
+        score = model.scores[global_ix]
         expected_score = getattr(model, f'{monitor}s')[global_ix].mean().item()
         expected_filename = f'{monitor}={score:.4f}-epoch={epoch}{version}.ckpt'
         assert math.isclose(score, expected_score, rel_tol=1e-4)
@@ -272,8 +278,8 @@ def _make_assertions(epoch, ix, version=''):
         if not reduce_lr_on_plateau:
             actual_step_count = chk['lr_schedulers'][0]['_step_count']
             actual_lr = chk['lr_schedulers'][0]['_last_lr'][0]
-            assert actual_step_count == epoch + 1 + duplicated
-            assert actual_lr == lr * gamma**(epoch + duplicated)
+            assert actual_step_count == epoch + 1 + epoch_end_checkpoint
+            assert actual_lr == lr * gamma**(epoch + epoch_end_checkpoint)
 
         return score
 
@@ -284,10 +290,6 @@ def _make_assertions(epoch, ix, version=''):
         assert lr_scheduler_debug[epoch]['monitor_val'] == (score if reduce_lr_on_plateau else None)
         assert lr_scheduler_debug[epoch]['monitor_key'] == (monitor if reduce_lr_on_plateau else None)
 
-    # check the ckpt file saved on_train_end
-    if additional_ckpt_path:
-        _make_assertions(max_epochs - 1, per_epoch_val_checks - 1, version='-v1')
-
 
 @pytest.mark.parametrize("save_top_k", [-1, 0, 1, 2])
 def test_model_checkpoint_with_non_string_input(tmpdir, save_top_k: int):
@@ -327,7 +329,7 @@ def test_model_checkpoint_to_yaml(tmpdir, save_top_k: int):
     path_yaml = os.path.join(tmpdir, 'best_k_models.yaml')
     checkpoint.to_yaml(path_yaml)
     d = yaml.full_load(open(path_yaml, 'r'))
-    best_k = {k: v for k, v in checkpoint.best_k_models.items()}
+    best_k = dict(checkpoint.best_k_models.items())
     assert d == best_k
 
 
@@ -810,7 +812,7 @@ def test_model_checkpoint_topk_all(tmpdir):
     assert checkpoint_callback.best_model_path == tmpdir / "epoch=2.ckpt"
     assert checkpoint_callback.best_model_score == epochs - 1
     assert len(os.listdir(tmpdir)) == len(checkpoint_callback.best_k_models) == epochs
-    assert set(checkpoint_callback.best_k_models.keys()) == set(str(tmpdir / f"epoch={i}.ckpt") for i in range(epochs))
+    assert set(checkpoint_callback.best_k_models.keys()) == {str(tmpdir / f"epoch={i}.ckpt") for i in range(epochs)}
     assert checkpoint_callback.kth_best_model_path == tmpdir / 'epoch=0.ckpt'
 
 
@@ -879,6 +881,8 @@ def test_model_checkpoint_save_last_warning(
         default_root_dir=tmpdir,
         callbacks=[ckpt],
         max_epochs=max_epochs,
+        limit_train_batches=1,
+        limit_val_batches=1,
     )
     with caplog.at_level(logging.INFO):
         trainer.fit(model)
@@ -897,6 +901,8 @@ def test_model_checkpoint_save_last_checkpoint_contents(tmpdir):
         default_root_dir=tmpdir,
         callbacks=[model_checkpoint],
         max_epochs=num_epochs,
+        limit_train_batches=2,
+        limit_val_batches=2,
     )
     trainer.fit(model)
 
@@ -907,7 +913,9 @@ def test_model_checkpoint_save_last_checkpoint_contents(tmpdir):
 
     ckpt_last_epoch = torch.load(path_last_epoch)
     ckpt_last = torch.load(path_last)
-    assert all(ckpt_last_epoch[k] == ckpt_last[k] for k in ("epoch", "global_step"))
+
+    assert ckpt_last_epoch["epoch"] == ckpt_last["epoch"]
+    assert ckpt_last_epoch["global_step"] == ckpt_last["global_step"]
 
     ch_type = type(model_checkpoint)
     assert ckpt_last["callbacks"][ch_type] == ckpt_last_epoch["callbacks"][ch_type]
@@ -1008,7 +1016,6 @@ def validation_epoch_end(self, *_):
             ...
 
     def assert_trainer_init(trainer):
-        assert not trainer.checkpoint_connector.has_trained
         assert trainer.global_step == 0
         assert trainer.current_epoch == 0
 
@@ -1044,7 +1051,6 @@ def assert_checkpoint_log_dir(idx):
 
     model = ExtendedBoringModel()
     trainer.fit(model)
-    assert trainer.checkpoint_connector.has_trained
     assert trainer.global_step == epochs * limit_train_batches
     assert trainer.current_epoch == epochs - 1
     assert_checkpoint_log_dir(0)
@@ -1068,19 +1074,16 @@ def assert_checkpoint_log_dir(idx):
         model = ExtendedBoringModel()
 
         trainer.test(model)
-        assert not trainer.checkpoint_connector.has_trained
         # resume_from_checkpoint is resumed when calling `.fit`
         assert trainer.global_step == 0
         assert trainer.current_epoch == 0
 
         trainer.fit(model)
-        assert not trainer.checkpoint_connector.has_trained
         assert trainer.global_step == epochs * limit_train_batches
         assert trainer.current_epoch == epochs
         assert_checkpoint_log_dir(idx)
 
         trainer.validate(model)
-        assert not trainer.checkpoint_connector.has_trained
         assert trainer.global_step == epochs * limit_train_batches
         assert trainer.current_epoch == epochs
 
@@ -1259,10 +1262,11 @@ def test_ckpt_version_after_rerun_new_trainer(tmpdir):
 
         # check best_k_models state
         expected = {"epoch=0-v1.ckpt", "epoch=1-v1.ckpt"} if i else {"epoch=0.ckpt", "epoch=1.ckpt"}
-        assert {Path(f).name for f in mc.best_k_models.keys()} == expected
+        assert {Path(f).name for f in mc.best_k_models} == expected
 
     # check created ckpts
-    assert set(f.basename for f in tmpdir.listdir()) == {
+    actual = {f.basename for f in tmpdir.listdir()}
+    assert actual == {
         "epoch=0.ckpt",
         "epoch=1.ckpt",
         "epoch=0-v1.ckpt",
@@ -1288,13 +1292,13 @@ def test_ckpt_version_after_rerun_same_trainer(tmpdir):
         progress_bar_refresh_rate=0,
     )
     trainer.fit(BoringModel())
-    trainer.train_loop.max_epochs = 4
+    trainer.fit_loop.max_epochs = 4
     trainer.fit(BoringModel())
 
     ckpt_range = range(mc.STARTING_VERSION, trainer.max_epochs + mc.STARTING_VERSION)
     expected = {'test.ckpt', *[f"test-v{i}.ckpt" for i in ckpt_range]}
     # check best_k_models state
-    assert {Path(f).name for f in mc.best_k_models.keys()} == expected
+    assert {Path(f).name for f in mc.best_k_models} == expected
     # check created ckpts
     assert set(os.listdir(tmpdir)) == expected
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 7f6407ecfd82b..3f767d8b6fad2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -18,6 +18,7 @@
 from http.server import SimpleHTTPRequestHandler
 
 import pytest
+import torch.distributed
 import torch.multiprocessing as mp
 
 
@@ -41,6 +42,14 @@ def restore_env_variables():
     os.environ.update(env_backup)
 
 
+@pytest.fixture(scope="function", autouse=True)
+def teardown_process_group():
+    """ Ensures that the distributed process group gets closed before the next test runs. """
+    yield
+    if torch.distributed.is_available() and torch.distributed.is_initialized():
+        torch.distributed.destroy_process_group()
+
+
 def pytest_configure(config):
     config.addinivalue_line("markers", "spawn: spawn test in a separate process using torch.multiprocessing.spawn")
 
diff --git a/tests/core/test_datamodules.py b/tests/core/test_datamodules.py
index e6500a15eeed1..30131cdcc80d2 100644
--- a/tests/core/test_datamodules.py
+++ b/tests/core/test_datamodules.py
@@ -34,6 +34,7 @@
 @mock.patch("pytorch_lightning.trainer.trainer.Trainer.local_rank", new_callable=PropertyMock)
 def test_can_prepare_data(local_rank, node_rank):
 
+    model = BoringModel()
     dm = BoringDataModule()
     trainer = Trainer()
     trainer.datamodule = dm
@@ -43,30 +44,54 @@ def test_can_prepare_data(local_rank, node_rank):
     # local rank = 0   (True)
     trainer.prepare_data_per_node = True
 
+    dm.random_full = None
+    dm._has_prepared_data = False
     local_rank.return_value = 0
     assert trainer.local_rank == 0
     assert trainer.data_connector.can_prepare_data()
 
+    trainer.data_connector.prepare_data(model)
+    assert dm.random_full is not None
+
     # local rank = 1   (False)
+    dm.random_full = None
+    dm._has_prepared_data = False
     local_rank.return_value = 1
     assert trainer.local_rank == 1
     assert not trainer.data_connector.can_prepare_data()
 
+    trainer.data_connector.prepare_data(model)
+    assert dm.random_full is None
+
     # prepare_data_per_node = False (prepare across all nodes)
     # global rank = 0   (True)
+    dm.random_full = None
+    dm._has_prepared_data = False
     trainer.prepare_data_per_node = False
     node_rank.return_value = 0
     local_rank.return_value = 0
     assert trainer.data_connector.can_prepare_data()
 
+    trainer.data_connector.prepare_data(model)
+    assert dm.random_full is not None
+
     # global rank = 1   (False)
+    dm.random_full = None
+    dm._has_prepared_data = False
     node_rank.return_value = 1
     local_rank.return_value = 0
     assert not trainer.data_connector.can_prepare_data()
+
+    trainer.data_connector.prepare_data(model)
+    assert dm.random_full is None
+
     node_rank.return_value = 0
     local_rank.return_value = 1
     assert not trainer.data_connector.can_prepare_data()
 
+    trainer.data_connector.prepare_data(model)
+    assert dm.random_full is None
+
     # 2 dm
     # prepar per node = True
     # local rank = 0 (True)
@@ -355,12 +380,12 @@ def test_full_loop(tmpdir):
     assert dm.trainer is not None
 
     # validate
-    result = trainer.validate(datamodule=dm)
+    result = trainer.validate(model, dm)
     assert dm.trainer is not None
     assert result[0]['val_acc'] > 0.7
 
     # test
-    result = trainer.test(datamodule=dm)
+    result = trainer.test(model, dm)
     assert dm.trainer is not None
     assert result[0]['test_acc'] > 0.6
 
@@ -524,46 +549,3 @@ def test_dm_init_from_datasets_dataloaders(iterable):
             call(test_dss[0], batch_size=4, shuffle=False, num_workers=0, pin_memory=True),
             call(test_dss[1], batch_size=4, shuffle=False, num_workers=0, pin_memory=True)
         ])
-
-
-def test_datamodule_hooks_calls(tmpdir):
-    """Test that repeated calls to DataHooks' hooks have no effect"""
-
-    class TestDataModule(BoringDataModule):
-        setup_calls = []
-        teardown_calls = []
-        prepare_data_calls = 0
-
-        def setup(self, stage=None):
-            super().setup(stage=stage)
-            self.setup_calls.append(stage)
-
-        def teardown(self, stage=None):
-            super().teardown(stage=stage)
-            self.teardown_calls.append(stage)
-
-        def prepare_data(self):
-            super().prepare_data()
-            self.prepare_data_calls += 1
-
-    dm = TestDataModule()
-    dm.prepare_data()
-    dm.prepare_data()
-    dm.setup('fit')
-    dm.setup('fit')
-    dm.setup()
-    dm.setup()
-    dm.teardown('validate')
-    dm.teardown('validate')
-
-    assert dm.prepare_data_calls == 1
-    assert dm.setup_calls == ['fit', None]
-    assert dm.teardown_calls == ['validate']
-
-    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=1)
-    trainer.test(BoringModel(), datamodule=dm)
-
-    # same number of calls
-    assert dm.prepare_data_calls == 1
-    assert dm.setup_calls == ['fit', None]
-    assert dm.teardown_calls == ['validate', 'test']
diff --git a/tests/core/test_lightning_module.py b/tests/core/test_lightning_module.py
index 84d206dead22c..f05305c785c7e 100644
--- a/tests/core/test_lightning_module.py
+++ b/tests/core/test_lightning_module.py
@@ -13,14 +13,12 @@
 # limitations under the License.
 from unittest.mock import Mock
 
-import pytest
 import torch
 from torch import nn
 from torch.optim import Adam, SGD
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.loggers import TensorBoardLogger
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel
 from tests.helpers.runif import RunIf
 
@@ -76,27 +74,6 @@ def test_property_logger(tmpdir):
     assert model.logger == logger
 
 
-def test_automatic_optimization_raises(tmpdir):
-
-    class TestModel(BoringModel):
-
-        def optimizer_step(self, *_, **__):
-            pass
-
-    model = TestModel()
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        limit_train_batches=2,
-        limit_val_batches=2,
-        accumulate_grad_batches=2,
-    )
-
-    with pytest.raises(
-        MisconfigurationException, match='overriding .* optimizer_step .* `accumulate_grad_batches` .* should be 1'
-    ):
-        trainer.fit(model)
-
-
 def test_params_groups_and_state_are_accessible(tmpdir):
 
     class TestModel(BoringModel):
diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py
index d79cae75956a2..b0e96c6d42fbf 100644
--- a/tests/core/test_lightning_optimizer.py
+++ b/tests/core/test_lightning_optimizer.py
@@ -123,7 +123,6 @@ def configure_optimizers(self):
         limit_val_batches=1,
         max_epochs=1,
         weights_summary=None,
-        accumulate_grad_batches=999,  # does not do anything if manual optimization
     )
 
     with patch.multiple(torch.optim.SGD, zero_grad=DEFAULT, step=DEFAULT) as sgd, \
@@ -243,7 +242,7 @@ def training_epoch_end(self, outputs):
             ...
 
         def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, optimizer_closure, **_):
-            assert optimizer_closure.__name__ == "training_step_and_backward_closure"
+            assert optimizer_closure.__name__ == "_training_step_and_backward_closure"
             # not passing the closure to the optimizer because step is mocked
             # zero_grad is called inside the closure
             if isinstance(optimizer, SGD) and batch_idx % 2 == 0:
diff --git a/tests/core/test_memory.py b/tests/core/test_memory.py
index 3088743f71488..96e1bfaec14cb 100644
--- a/tests/core/test_memory.py
+++ b/tests/core/test_memory.py
@@ -17,6 +17,7 @@
 
 from pytorch_lightning import LightningModule, Trainer
 from pytorch_lightning.core.memory import ModelSummary, UNKNOWN_SIZE
+from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_9
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel
 from tests.helpers.advanced_models import ParityModuleRNN
@@ -101,6 +102,41 @@ def forward(self, x):
         return self.layer2(self.layer1(x))
 
 
+class LazyModel(LightningModule):
+    """ A model which contains lazy layers with unintialized parameters. """
+
+    def __init__(self):
+        super().__init__()
+        self.layer1 = nn.LazyLinear(5)
+        self.layer2 = nn.LazyLinear(2)
+
+    def forward(self, inp):
+        return self.layer2(self.layer1(inp))
+
+
+class DeepNestedModel(LightningModule):
+    """ A model with deep nested layers. """
+
+    def __init__(self):
+        super().__init__()
+        self.branch1 = nn.Sequential(
+            nn.Linear(5, 5),
+            nn.Sequential(
+                nn.Linear(5, 5),
+                nn.Sequential(
+                    nn.Linear(5, 5),
+                    nn.Sequential(nn.Linear(5, 5), nn.Sequential(nn.Linear(5, 5), nn.Sequential(nn.Linear(5, 3))))
+                )
+            )
+        )
+        self.branch2 = nn.Linear(5, 10)
+        self.head = UnorderedModel()
+        self.example_input_array = torch.rand(2, 5)
+
+    def forward(self, inp):
+        return self.head(self.branch1(inp), self.branch2(inp))
+
+
 def test_invalid_weights_summmary():
     """ Test that invalid value for weights_summary raises an error. """
     with pytest.raises(MisconfigurationException, match='`mode` can be None, .* got temp'):
@@ -110,8 +146,8 @@ def test_invalid_weights_summmary():
         Trainer(weights_summary='temp')
 
 
-@pytest.mark.parametrize('mode', [ModelSummary.MODE_FULL, ModelSummary.MODE_TOP])
-def test_empty_model_summary_shapes(mode: ModelSummary):
+@pytest.mark.parametrize('mode', ["full", "top"])
+def test_empty_model_summary_shapes(mode: str):
     """ Test that the summary works for models that have no submodules. """
     model = EmptyModule()
     summary = model.summarize(mode=mode)
@@ -121,7 +157,7 @@ def test_empty_model_summary_shapes(mode: ModelSummary):
 
 
 @RunIf(min_gpus=1)
-@pytest.mark.parametrize('mode', [ModelSummary.MODE_FULL, ModelSummary.MODE_TOP])
+@pytest.mark.parametrize('mode', ["full", "top"])
 @pytest.mark.parametrize(['device'], [
     pytest.param(torch.device('cpu')),
     pytest.param(torch.device('cuda', 0)),
@@ -164,18 +200,18 @@ def test_mixed_dtype_model_summary():
     ]
 
 
-@pytest.mark.parametrize('mode', [ModelSummary.MODE_FULL, ModelSummary.MODE_TOP])
-def test_hooks_removed_after_summarize(mode):
+@pytest.mark.parametrize('max_depth', [-1, 0])
+def test_hooks_removed_after_summarize(max_depth):
     """ Test that all hooks were properly removed after summary, even ones that were not run. """
     model = UnorderedModel()
-    summary = ModelSummary(model, mode=mode)
+    summary = ModelSummary(model, max_depth=max_depth)
     # hooks should be removed
     for _, layer in summary.summarize().items():
         handle = layer._hook_handle
         assert handle.id not in handle.hooks_dict_ref()
 
 
-@pytest.mark.parametrize('mode', [ModelSummary.MODE_FULL, ModelSummary.MODE_TOP])
+@pytest.mark.parametrize('mode', ["full", "top"])
 def test_rnn_summary_shapes(mode):
     """ Test that the model summary works for RNNs. """
     model = ParityModuleRNN()
@@ -199,7 +235,7 @@ def test_rnn_summary_shapes(mode):
     ]
 
 
-@pytest.mark.parametrize('mode', [ModelSummary.MODE_FULL, ModelSummary.MODE_TOP])
+@pytest.mark.parametrize('mode', ["full", "top"])
 def test_summary_parameter_count(mode):
     """ Test that the summary counts the number of parameters in every submodule. """
     model = UnorderedModel()
@@ -213,7 +249,7 @@ def test_summary_parameter_count(mode):
     ]
 
 
-@pytest.mark.parametrize('mode', [ModelSummary.MODE_FULL, ModelSummary.MODE_TOP])
+@pytest.mark.parametrize('mode', ["full", "top"])
 def test_summary_layer_types(mode):
     """ Test that the summary displays the layer names correctly. """
     model = UnorderedModel()
@@ -227,7 +263,7 @@ def test_summary_layer_types(mode):
     ]
 
 
-@pytest.mark.parametrize('mode', [ModelSummary.MODE_FULL, ModelSummary.MODE_TOP])
+@pytest.mark.parametrize('mode', ["full", "top"])
 def test_summary_with_scripted_modules(mode):
     model = PartialScriptModel()
     summary = model.summarize(mode=mode)
@@ -236,7 +272,7 @@ def test_summary_with_scripted_modules(mode):
     assert summary.out_sizes == [UNKNOWN_SIZE, [2, 2]]
 
 
-@pytest.mark.parametrize('mode', [ModelSummary.MODE_FULL, ModelSummary.MODE_TOP])
+@pytest.mark.parametrize('mode', ["full", "top"])
 @pytest.mark.parametrize(['example_input', 'expected_size'], [
     pytest.param([], UNKNOWN_SIZE),
     pytest.param((1, 2, 3), [UNKNOWN_SIZE] * 3),
@@ -270,7 +306,7 @@ def forward(self, *args, **kwargs):
     assert summary.in_sizes == [expected_size]
 
 
-@pytest.mark.parametrize('mode', [ModelSummary.MODE_FULL, ModelSummary.MODE_TOP])
+@pytest.mark.parametrize('mode', ["full", "top"])
 def test_model_size(mode):
     """ Test model size is calculated correctly. """
     model = PreCalculatedModel()
@@ -278,7 +314,7 @@ def test_model_size(mode):
     assert model.pre_calculated_model_size == summary.model_size
 
 
-@pytest.mark.parametrize('mode', [ModelSummary.MODE_FULL, ModelSummary.MODE_TOP])
+@pytest.mark.parametrize('mode', ["full", "top"])
 def test_empty_model_size(mode):
     """ Test empty model size is zero. """
     model = EmptyModule()
@@ -302,3 +338,53 @@ def test_model_size_precision(tmpdir):
     trainer.fit(model)
     summary = model.summarize()
     assert model.pre_calculated_model_size == summary.model_size
+
+
+@RunIf(min_torch="1.8")
+def test_lazy_model_summary():
+    """ Test that the model summary can work with lazy layers. """
+    lazy_model = LazyModel()
+    summary = ModelSummary(lazy_model)
+
+    with pytest.warns(
+        UserWarning,
+        match=r"A layer with UninitializedParameter was found. "
+        r"Thus, the total number of parameters detected may be inaccurate."
+    ):
+        if _TORCH_GREATER_EQUAL_1_9:
+            assert summary.total_parameters == 0
+            assert summary.trainable_parameters == 0
+        else:
+            # bug in 1.8: the bias of a LazyLinear layer is initialized!
+            # https://github.com/pytorch/pytorch/issues/58350
+            assert summary.total_parameters == 7
+            assert summary.trainable_parameters == 7
+
+
+def test_max_depth_equals_mode_interface():
+    """Test model.summarize(full/top) interface mapping matches max_depth"""
+    model = DeepNestedModel()
+
+    summary_top = model.summarize(mode="top")
+    summary_0 = model.summarize(max_depth=1)
+    assert str(summary_top) == str(summary_0)
+
+    summary_full = model.summarize(mode="full")
+    summary_minus1 = model.summarize(max_depth=-1)
+    assert str(summary_full) == str(summary_minus1)
+
+
+@pytest.mark.parametrize('max_depth', [-1, 0, 1, 3, 999])
+def test_max_depth_param(max_depth):
+    """Test that only the modules up to the desired depth are shown"""
+    model = DeepNestedModel()
+    summary = ModelSummary(model, max_depth=max_depth)
+    for lname in summary.layer_names:
+        if max_depth >= 0:
+            assert lname.count(".") < max_depth
+
+
+@pytest.mark.parametrize('max_depth', [-99, -2, "invalid"])
+def test_raise_invalid_max_depth_value(max_depth):
+    with pytest.raises(ValueError, match=f"`max_depth` can be -1, 0 or > 0, got {max_depth}"):
+        DeepNestedModel().summarize(max_depth=max_depth)
diff --git a/tests/core/test_metric_result_integration.py b/tests/core/test_metric_result_integration.py
index 734b9e7f56152..7471914886a27 100644
--- a/tests/core/test_metric_result_integration.py
+++ b/tests/core/test_metric_result_integration.py
@@ -11,14 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import pickle
+from copy import deepcopy
 
+import pytest
 import torch
 import torch.distributed as dist
 import torch.multiprocessing as mp
 from torchmetrics import Metric
 
 import tests.helpers.utils as tutils
-from pytorch_lightning.core.step_result import Result
+from pytorch_lightning import Trainer
+from pytorch_lightning.callbacks import ModelCheckpoint
+from pytorch_lightning.trainer.connectors.logger_connector.result import _Sync, MetricSource, ResultCollection
+from tests.helpers import BoringModel
 from tests.helpers.runif import RunIf
 
 
@@ -52,12 +58,14 @@ def _ddp_test_fn(rank, worldsize):
     metric_b = DummyMetric()
     metric_c = DummyMetric()
 
-    # dist_sync_on_step is False by default
-    result = Result()
+    metric_a = metric_a.to(f"cuda:{rank}")
+    metric_b = metric_b.to(f"cuda:{rank}")
+    metric_c = metric_c.to(f"cuda:{rank}")
 
-    for epoch in range(3):
-        cumulative_sum = 0
+    result = ResultCollection(True, torch.device(f"cuda:{rank}"))
 
+    for _ in range(3):
+        cumulative_sum = 0
         for i in range(5):
             metric_a(i)
             metric_b(i)
@@ -65,32 +73,25 @@ def _ddp_test_fn(rank, worldsize):
 
             cumulative_sum += i
 
-            result.log('a', metric_a, on_step=True, on_epoch=True)
-            result.log('b', metric_b, on_step=False, on_epoch=True)
-            result.log('c', metric_c, on_step=True, on_epoch=False)
+            result.log('h', 'a', metric_a, on_step=True, on_epoch=True)
+            result.log('h', 'b', metric_b, on_step=False, on_epoch=True)
+            result.log('h', 'c', metric_c, on_step=True, on_epoch=False)
 
-            batch_log = result.get_batch_log_metrics()
-            batch_expected = {"a_step": i, "a": i, "c": i}
-            assert set(batch_log.keys()) == set(batch_expected.keys())
-            for k in batch_expected.keys():
-                assert batch_expected[k] == batch_log[k]
+            batch_log = result.metrics(True)[MetricSource.LOG]
+            assert batch_log == {"a_step": i, "c": i}
 
-        epoch_log = result.get_epoch_log_metrics()
+        epoch_log = result.metrics(False)[MetricSource.LOG]
         result.reset()
 
         # assert metric state reset to default values
-        assert metric_a.x == metric_a._defaults['x']
+        assert metric_a.x == metric_a._defaults['x'], (metric_a.x, metric_a._defaults['x'])
         assert metric_b.x == metric_b._defaults['x']
         assert metric_c.x == metric_c._defaults['x']
 
-        epoch_expected = {"b": cumulative_sum * worldsize, "a_epoch": cumulative_sum * worldsize}
+        assert epoch_log == {"b": cumulative_sum * worldsize, "a_epoch": cumulative_sum * worldsize}
 
-        assert set(epoch_log.keys()) == set(epoch_expected.keys())
-        for k in epoch_expected.keys():
-            assert epoch_expected[k] == epoch_log[k]
 
-
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, min_gpus=2)
 def test_result_reduce_ddp():
     """Make sure result logging works with DDP"""
     tutils.set_random_master_port()
@@ -104,11 +105,10 @@ def test_result_metric_integration():
     metric_b = DummyMetric()
     metric_c = DummyMetric()
 
-    result = Result()
+    result = ResultCollection(True, torch.device("cpu"))
 
-    for epoch in range(3):
+    for _ in range(3):
         cumulative_sum = 0
-
         for i in range(5):
             metric_a(i)
             metric_b(i)
@@ -116,17 +116,14 @@ def test_result_metric_integration():
 
             cumulative_sum += i
 
-            result.log('a', metric_a, on_step=True, on_epoch=True)
-            result.log('b', metric_b, on_step=False, on_epoch=True)
-            result.log('c', metric_c, on_step=True, on_epoch=False)
+            result.log('h', 'a', metric_a, on_step=True, on_epoch=True)
+            result.log('h', 'b', metric_b, on_step=False, on_epoch=True)
+            result.log('h', 'c', metric_c, on_step=True, on_epoch=False)
 
-            batch_log = result.get_batch_log_metrics()
-            batch_expected = {"a_step": i, "a": i, "c": i}
-            assert set(batch_log.keys()) == set(batch_expected.keys())
-            for k in batch_expected.keys():
-                assert batch_expected[k] == batch_log[k]
+            batch_log = result.metrics(True)[MetricSource.LOG]
+            assert batch_log == {"a_step": i, "c": i}
 
-        epoch_log = result.get_epoch_log_metrics()
+        epoch_log = result.metrics(False)[MetricSource.LOG]
         result.reset()
 
         # assert metric state reset to default values
@@ -134,8 +131,211 @@ def test_result_metric_integration():
         assert metric_b.x == metric_b._defaults['x']
         assert metric_c.x == metric_c._defaults['x']
 
-        epoch_expected = {"b": cumulative_sum, "a_epoch": cumulative_sum}
+        assert epoch_log == {"b": cumulative_sum, "a_epoch": cumulative_sum}
+
+    assert str(result) == (
+        "ResultCollection(True, cpu, {"
+        "'h.a': ResultMetric('a', value=DummyMetric()), "
+        "'h.b': ResultMetric('b', value=DummyMetric()), "
+        "'h.c': ResultMetric('c', value=DummyMetric())"
+        "})"
+    )
+
+
+def test_result_collection_simple_loop():
+    result = ResultCollection(True, torch.device("cpu"))
+    current_fx_name = None
+    batch_idx = None
+
+    def lightning_log(fx, *args, **kwargs):
+        nonlocal current_fx_name
+        if current_fx_name != fx and batch_idx in (None, 0):
+            result.reset(metrics=False, fx=fx)
+        result.log(fx, *args, **kwargs)
+        current_fx_name = fx
+
+    lightning_log('a0', 'a', torch.tensor(0.), on_step=True, on_epoch=True)
+    lightning_log('a1', 'a', torch.tensor(0.), on_step=True, on_epoch=True)
+    for epoch in range(2):
+        lightning_log('b0', 'a', torch.tensor(1.) + epoch, on_step=True, on_epoch=True)
+        lightning_log('b1', 'a', torch.tensor(1.) + epoch, on_step=True, on_epoch=True)
+        for batch_idx in range(2):
+            lightning_log('c0', 'a', torch.tensor(2.) + epoch, on_step=True, on_epoch=True)
+            lightning_log('c1', 'a', torch.tensor(2.) + epoch, on_step=True, on_epoch=True)
+            lightning_log('c2', 'a', torch.tensor(2.) + epoch, on_step=True, on_epoch=True)
+        batch_idx = None
+        lightning_log('d0', 'a', torch.tensor(3.) + epoch, on_step=False, on_epoch=True)
+        lightning_log('d1', 'a', torch.tensor(3.) + epoch, on_step=False, on_epoch=True)
+
+        for k in ('a0.a', 'a1.a'):
+            assert result[k].value == torch.tensor(0.), k
+            assert result[k].cumulated_batch_size == torch.tensor(1.), k
+
+        for k in ('b0.a', 'b1.a'):
+            assert result[k].value == torch.tensor(1.) + epoch, k
+            assert result[k].cumulated_batch_size == torch.tensor(1.), k
+
+        for k in ('c0.a', 'c1.a', 'c2.a'):
+            assert result[k].value == torch.tensor(4.) + epoch * 2, k
+            assert result[k].cumulated_batch_size == torch.tensor(2.), k
+
+        for k in ('d0.a', 'd1.a'):
+            assert result[k].value == torch.tensor(3.) + epoch, k
+            assert result[k].cumulated_batch_size == torch.tensor(1.), k
+
+
+def my_sync_dist(x, *_, **__):
+    return x
+
+
+def test_result_collection_restoration(tmpdir):
+    """"
+    This test make sure metrics are properly reloaded on failure.
+    """
+
+    result = ResultCollection(True, torch.device("cpu"))
+    metric_a = DummyMetric()
+    metric_b = DummyMetric()
+    metric_c = DummyMetric()
+    metric_d = DummyMetric()
+    current_fx_name = None
+    batch_idx = None
+
+    def lightning_log(fx, *args, **kwargs):
+        nonlocal current_fx_name
+        if current_fx_name != fx and batch_idx in (None, 0):
+            result.reset(metrics=False, fx=fx)
+        result.log(fx, *args, **kwargs, sync_dist_fn=my_sync_dist)
+        current_fx_name = fx
+
+    for epoch in range(2):
+
+        cumulative_sum = 0
+
+        for i in range(3):
+
+            a = metric_a(i)
+            b = metric_b(i)
+            c = metric_c(i)
+            metric_d(i)
+
+            cumulative_sum += i
+
+            metric = metric_a if i < 1 else metric_d
+            lightning_log('training_step', 'a', metric, on_step=True, on_epoch=True, metric_attribute="metric")
+            lightning_log('training_step', 'b', metric_b, on_step=False, on_epoch=True, metric_attribute="metric_b")
+            lightning_log('training_step', 'c', metric_c, on_step=True, on_epoch=False, metric_attribute="metric_c")
+            lightning_log('training_step', 'a_1', a, on_step=True, on_epoch=True)
+            lightning_log('training_step', 'b_1', b, on_step=False, on_epoch=True)
+            lightning_log('training_step', 'c_1', {'1': c, '2': c}, on_step=True, on_epoch=False)
+
+            batch_log = result.metrics(on_step=True)[MetricSource.LOG]
+            assert set(batch_log) == {"a_step", "c", "a_1_step", "c_1"}
+            assert set(batch_log['c_1']) == {'1', '2'}
+
+            result_copy = deepcopy(result)
+            new_result = ResultCollection(True, torch.device("cpu"))
+            state_dict = result.state_dict()
+            # check the sync fn was dropped
+            assert 'fn' not in state_dict['items']['training_step.a']['meta']['_sync']
+
+            assert not new_result.result_metrics
+            assert len(result.result_metrics) == 7 + epoch > 0
+
+            new_result.load_state_dict(
+                state_dict, metrics={
+                    "metric": metric,
+                    "metric_b": metric_b,
+                    "metric_c": metric_c
+                }
+            )
+            # should match
+            assert result_copy == new_result
+            # the sync fn has been kept
+            assert result_copy['training_step.a'].meta.sync.fn == new_result['training_step.a'].meta.sync.fn
+
+        epoch_log = result.metrics(on_step=False)[MetricSource.LOG]
+        epoch_log_copy = result_copy.metrics(on_step=False)[MetricSource.LOG]
+        assert epoch_log == epoch_log_copy
+
+        lightning_log('train_epoch_end', 'a', metric_a, on_step=False, on_epoch=True)
+        epoch_log = result.metrics(on_step=False)[MetricSource.LOG]
+        assert epoch_log == {
+            'a_1_epoch': 1,
+            'a_epoch': cumulative_sum,
+            'a': cumulative_sum,
+            'b': cumulative_sum,
+            'b_1': 1
+        }
+
+        # make sure can be pickled
+        pickle.loads(pickle.dumps(result))
+        # make sure can be torch.loaded
+        filepath = str(tmpdir / 'result')
+        torch.save(result, filepath)
+        torch.load(filepath)
+
+        # assert metric state reset to default values
+        result.reset()
+        assert metric_a.x == metric_a._defaults['x']
+        assert metric_b.x == metric_b._defaults['x']
+        assert metric_c.x == metric_c._defaults['x']
+
+        batch_idx = None
+
+
+@pytest.mark.parametrize('device', ('cpu', pytest.param('cuda', marks=RunIf(min_gpus=1))))
+def test_lightning_module_logging_result_collection(tmpdir, device):
+
+    class LoggingModel(BoringModel):
+
+        def __init__(self):
+            super().__init__()
+            self.metric = DummyMetric()
+
+        def validation_step(self, batch, batch_idx):
+            v = self.metric(batch_idx)
+            self.log_dict({"v": v, "m": self.metric})
+            return super().validation_step(batch, batch_idx)
+
+        def on_save_checkpoint(self, checkpoint) -> None:
+            results = self.trainer._results
+            # simplify logic
+            state_dict = results.state_dict(drop_value=False)
+
+            # check device
+            assert results['validation_step.v'].value.device.type == device
+            assert state_dict['items']['validation_step.v']['value'].device.type == device
+
+            # sync fn should be kept
+            assert results['validation_step.v'].meta.sync.fn == self.trainer.training_type_plugin.reduce
+
+            # sync fn dropped from the state dict
+            assert 'fn' not in state_dict['items']['validation_step.v']['meta']['_sync']
+            results.load_state_dict(state_dict)
+
+            # check device after loading
+            assert results['validation_step.v'].value.device.type == device
+
+            # sync fn was preserved in the original result
+            assert results['validation_step.v'].meta.sync.fn == self.trainer.training_type_plugin.reduce
+
+            # default sync fn
+            new_results = ResultCollection(False, device)
+            new_results.load_state_dict(state_dict, map_location='cpu')
+            assert new_results['validation_step.v'].meta.sync.fn == _Sync.no_op
+
+            # check map location
+            assert new_results['validation_step.v'].value.device.type == 'cpu'
 
-        assert set(epoch_log.keys()) == set(epoch_expected.keys())
-        for k in epoch_expected.keys():
-            assert epoch_expected[k] == epoch_log[k]
+    model = LoggingModel()
+    ckpt = ModelCheckpoint(dirpath=tmpdir, save_last=True)
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=2,
+        limit_train_batches=2,
+        limit_val_batches=2,
+        callbacks=[ckpt],
+        gpus=1 if device == 'cuda' else 0,
+    )
+    trainer.fit(model)
diff --git a/tests/core/test_results.py b/tests/core/test_results.py
index 02d30d9f79ee3..e2e3c892cc124 100644
--- a/tests/core/test_results.py
+++ b/tests/core/test_results.py
@@ -18,11 +18,11 @@
 import torch
 import torch.distributed as dist
 import torch.multiprocessing as mp
-from torch.utils.data import DataLoader
 
 import tests.helpers.utils as tutils
-from pytorch_lightning import LightningModule, Trainer
-from pytorch_lightning.core.step_result import Result
+from pytorch_lightning import Trainer
+from pytorch_lightning.trainer.connectors.logger_connector.result import _Sync
+from pytorch_lightning.utilities.distributed import sync_ddp_if_available
 from tests.helpers import BoringDataModule, BoringModel
 from tests.helpers.runif import RunIf
 
@@ -39,7 +39,8 @@ def _setup_ddp(rank, worldsize):
 def _ddp_test_fn(rank, worldsize):
     _setup_ddp(rank, worldsize)
     tensor = torch.tensor([1.0])
-    actual = LightningModule._LightningModule__sync(tensor, sync_dist=True, sync_dist_op=torch.distributed.ReduceOp.SUM)
+    sync = _Sync(sync_ddp_if_available, should=True, op='SUM')
+    actual = sync(tensor)
     assert actual.item() == dist.get_world_size(), "Result-Log does not work properly with DDP and Tensors"
 
 
@@ -51,23 +52,21 @@ def test_result_reduce_ddp():
     mp.spawn(_ddp_test_fn, args=(worldsize, ), nprocs=worldsize)
 
 
-@pytest.mark.parametrize(
-    "test_option,do_train,gpus", [
-        pytest.param(0, True, 0, id='full_loop'),
-        pytest.param(0, False, 0, id='test_only'),
-        pytest.param(
-            1, False, 0, id='test_only_mismatching_tensor', marks=pytest.mark.xfail(raises=ValueError, match="Mism.*")
-        ),
-        pytest.param(2, False, 0, id='mix_of_tensor_dims'),
-        pytest.param(3, False, 0, id='string_list_predictions'),
-        pytest.param(4, False, 0, id='int_list_predictions'),
-        pytest.param(5, False, 0, id='nested_list_predictions'),
-        pytest.param(6, False, 0, id='dict_list_predictions'),
-        pytest.param(7, True, 0, id='write_dict_predictions'),
-        pytest.param(0, True, 1, id='full_loop_single_gpu', marks=RunIf(min_gpus=1))
-    ]
-)
-def test_result_obj_predictions(tmpdir, test_option: int, do_train: bool, gpus: int):
+@pytest.mark.parametrize(["option", "do_train", "gpus"], [
+    pytest.param(0, True, 0, id='full_loop'),
+    pytest.param(0, False, 0, id='test_only'),
+    pytest.param(
+        1, False, 0, id='test_only_mismatching_tensor', marks=pytest.mark.xfail(raises=ValueError, match="Mism.*")
+    ),
+    pytest.param(2, False, 0, id='mix_of_tensor_dims'),
+    pytest.param(3, False, 0, id='string_list_predictions'),
+    pytest.param(4, False, 0, id='int_list_predictions'),
+    pytest.param(5, False, 0, id='nested_list_predictions'),
+    pytest.param(6, False, 0, id='dict_list_predictions'),
+    pytest.param(7, True, 0, id='write_dict_predictions'),
+    pytest.param(0, True, 1, id='full_loop_single_gpu', marks=RunIf(min_gpus=1))
+])
+def test_write_predictions(tmpdir, option: int, do_train: bool, gpus: int):
 
     class CustomBoringModel(BoringModel):
 
@@ -82,8 +81,6 @@ def test_step(self, batch, batch_idx, optimizer_idx=None):
             lst_of_lst = [[x] for x in lst_of_int]
             lst_of_dict = [{k: v} for k, v in zip(lst_of_str, lst_of_int)]
 
-            # This is passed in from pytest via parameterization
-            option = getattr(self, 'test_option', 0)
             prediction_file = getattr(self, 'prediction_file', 'predictions.pt')
 
             lazy_ids = torch.arange(batch_idx * batch_size, batch_idx * batch_size + batch_size)
@@ -127,32 +124,13 @@ def test_step(self, batch, batch_idx, optimizer_idx=None):
             elif option == 7:
                 self.write_prediction_dict({'idxs': lazy_ids, 'preds': output}, prediction_file)
 
-    class CustomBoringDataModule(BoringDataModule):
-
-        def train_dataloader(self):
-            return DataLoader(self.random_train, batch_size=4)
-
-        def val_dataloader(self):
-            return DataLoader(self.random_val, batch_size=4)
-
-        def test_dataloader(self):
-            return DataLoader(self.random_test, batch_size=4)
-
-    tutils.reset_seed()
     prediction_file = Path(tmpdir) / 'predictions.pt'
 
     dm = BoringDataModule()
     model = CustomBoringModel()
-    model.test_step_end = None
     model.test_epoch_end = None
-    model.test_end = None
-
-    model.test_option = test_option
     model.prediction_file = prediction_file.as_posix()
 
-    if prediction_file.exists():
-        prediction_file.unlink()
-
     trainer = Trainer(
         default_root_dir=tmpdir,
         max_epochs=3,
@@ -175,11 +153,3 @@ def test_dataloader(self):
     assert prediction_file.exists()
     predictions = torch.load(prediction_file)
     assert len(predictions) == len(dm.random_test)
-
-
-def test_result_retrieve_last_logged_item():
-    result = Result()
-    result.log('a', 5., on_step=True, on_epoch=True)
-    assert result['a_epoch'] == 5.
-    assert result['a_step'] == 5.
-    assert result['a'] == 5.
diff --git a/tests/deprecated_api/test_remove_1-4.py b/tests/deprecated_api/test_remove_1-4.py
index 37d8abfdf905d..23df12586d328 100644
--- a/tests/deprecated_api/test_remove_1-4.py
+++ b/tests/deprecated_api/test_remove_1-4.py
@@ -66,3 +66,16 @@ def training_step(self, batch, batch_idx):
 
     with pytest.deprecated_call(match=r"Relying on.*is deprecated in v1.2 and will be removed in v1.4"):
         trainer.fit(TestModel())
+
+
+def test_v1_4_0_deprecated_hpc_load(tmpdir):
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_steps=1,
+    )
+    trainer.fit(model)
+    trainer.checkpoint_connector.hpc_save(tmpdir, trainer.logger)
+    checkpoint_path = trainer.checkpoint_connector.get_max_ckpt_path_from_folder(str(tmpdir))
+    with pytest.deprecated_call(match=r"`CheckpointConnector.hpc_load\(\)` was deprecated in v1.4"):
+        trainer.checkpoint_connector.hpc_load(checkpoint_path)
diff --git a/tests/deprecated_api/test_remove_1-5.py b/tests/deprecated_api/test_remove_1-5.py
index d6c9b6d8f8f31..70bcc71d0a2a6 100644
--- a/tests/deprecated_api/test_remove_1-5.py
+++ b/tests/deprecated_api/test_remove_1-5.py
@@ -25,12 +25,14 @@
 from pytorch_lightning.callbacks import ModelCheckpoint
 from pytorch_lightning.core.decorators import auto_move_data
 from pytorch_lightning.loggers import WandbLogger
+from pytorch_lightning.plugins import DeepSpeedPlugin
 from pytorch_lightning.profiler import AdvancedProfiler, BaseProfiler, PyTorchProfiler, SimpleProfiler
 from pytorch_lightning.trainer.callback_hook import warning_cache as callback_warning_cache
 from pytorch_lightning.utilities import device_parser
 from pytorch_lightning.utilities.imports import _compare_version
 from tests.deprecated_api import no_deprecated_call
 from tests.helpers import BoringDataModule, BoringModel
+from tests.helpers.runif import RunIf
 from tests.helpers.utils import no_warning_call
 
 
@@ -242,7 +244,7 @@ def on_train_epoch_end(self, outputs):  # noqa
     with pytest.deprecated_call(match="old signature will be removed in v1.5"):
         trainer.fit(model)
 
-    trainer.train_loop.warning_cache.clear()
+    trainer.fit_loop.epoch_loop._warning_cache.clear()
 
     class NewSignature(Callback):
 
@@ -367,10 +369,24 @@ def test_v1_5_0_datamodule_setter():
     datamodule = BoringDataModule()
     with no_deprecated_call(match="The `LightningModule.datamodule`"):
         model.datamodule = datamodule
-    with pytest.deprecated_call(match="The `LightningModule.datamodule`"):
-        _ = model.datamodule
+    from pytorch_lightning.core.lightning import warning_cache
+    warning_cache.clear()
+    _ = model.datamodule
+    assert any("The `LightningModule.datamodule`" in w for w in warning_cache)
 
 
 def test_v1_5_0_trainer_tbptt_steps(tmpdir):
     with pytest.deprecated_call(match="is deprecated in v1.3 and will be removed in v1.5"):
         _ = Trainer(truncated_bptt_steps=1)
+
+
+@RunIf(deepspeed=True)
+@pytest.mark.parametrize(
+    "params", [dict(cpu_offload=True),
+               dict(cpu_offload_params=True),
+               dict(cpu_offload_use_pin_memory=True)]
+)
+def test_v1_5_0_deepspeed_cpu_offload(tmpdir, params):
+
+    with pytest.deprecated_call(match="is deprecated since v1.4 and will be removed in v1.5"):
+        DeepSpeedPlugin(**params)
diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py
index 63b1c60fe7c62..ba033a0ebeced 100644
--- a/tests/deprecated_api/test_remove_1-6.py
+++ b/tests/deprecated_api/test_remove_1-6.py
@@ -12,12 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Test deprecated functionality which will be removed in v1.6.0 """
-
 import pytest
 
 from pytorch_lightning import Trainer
+from pytorch_lightning.callbacks.early_stopping import EarlyStopping
+from pytorch_lightning.core.memory import ModelSummary
 from pytorch_lightning.plugins.training_type import DDPPlugin, DDPSpawnPlugin
-from tests.helpers import BoringModel
+from pytorch_lightning.utilities.distributed import rank_zero_deprecation, rank_zero_warn
+from pytorch_lightning.utilities.model_helpers import is_overridden
+from tests.helpers import BoringDataModule, BoringModel
 
 
 def test_v1_6_0_trainer_model_hook_mixin(tmpdir):
@@ -31,6 +34,28 @@ def test_v1_6_0_trainer_model_hook_mixin(tmpdir):
         trainer.has_arg("training_step", "batch")
 
 
+def test_v1_6_0_dataloader_renaming(tmpdir):
+    model = BoringModel()
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
+    dl = model.train_dataloader()
+
+    with pytest.deprecated_call(match=r"fit\(train_dataloader\)` is deprecated in v1.4"):
+        trainer.fit(model, train_dataloader=dl)
+
+    with pytest.deprecated_call(match=r"validate\(val_dataloaders\)` is deprecated in v1.4"):
+        trainer.validate(model, val_dataloaders=dl)
+
+    with pytest.deprecated_call(match=r"test\(test_dataloaders\)` is deprecated in v1.4"):
+        trainer.test(model, test_dataloaders=dl)
+
+    with pytest.deprecated_call(match=r"tune\(train_dataloader\)` is deprecated in v1.4"):
+        trainer.tune(model, train_dataloader=dl)
+    with pytest.deprecated_call(match=r"tune\(train_dataloader\)` is deprecated in v1.4"):
+        trainer.tuner.scale_batch_size(model, train_dataloader=dl)
+    with pytest.deprecated_call(match=r"tune\(train_dataloader\)` is deprecated in v1.4"):
+        trainer.tuner.lr_find(model, train_dataloader=dl)
+
+
 def test_old_transfer_batch_to_device_hook(tmpdir):
 
     class OldModel(BoringModel):
@@ -54,12 +79,12 @@ def test_v1_6_0_ddp_sync_batchnorm():
 
 
 def test_v1_6_0_ddp_spawn_num_nodes():
-    with pytest.deprecated_call(match="Argument `num_nodes` in `DDPPlugin` is deprecated in v1.4"):
+    with pytest.deprecated_call(match="Argument `num_nodes` in `DDPSpawnPlugin` is deprecated in v1.4"):
         DDPSpawnPlugin(num_nodes=1)
 
 
 def test_v1_6_0_ddp_spawn_sync_batchnorm():
-    with pytest.deprecated_call(match="Argument `sync_batchnorm` in `DDPPlugin` is deprecated in v1.4"):
+    with pytest.deprecated_call(match="Argument `sync_batchnorm` in `DDPSpawnPlugin` is deprecated in v1.4"):
         DDPSpawnPlugin(sync_batchnorm=False)
 
 
@@ -87,3 +112,166 @@ def training_step(self, *args):
     trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
     with pytest.deprecated_call(match=r"tbptt_pad_token=...\)` is no longer supported"):
         trainer.fit(TestModel())
+
+
+def test_v1_6_0_sync_dist_op(tmpdir):
+
+    class TestModel(BoringModel):
+
+        def training_step(self, *args):
+            self.log("foo", 1, sync_dist_op='sum')
+            return super().training_step(*args)
+
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
+    with pytest.deprecated_call(match=r"`self.log\(sync_dist_op='sum'\)` is deprecated"):
+        trainer.fit(TestModel())
+
+
+def test_v1_6_0_datamodule_lifecycle_properties(tmpdir):
+    dm = BoringDataModule()
+    with pytest.deprecated_call(match=r"DataModule property `has_prepared_data` was deprecated in v1.4"):
+        dm.has_prepared_data
+    with pytest.deprecated_call(match=r"DataModule property `has_setup_fit` was deprecated in v1.4"):
+        dm.has_setup_fit
+    with pytest.deprecated_call(match=r"DataModule property `has_setup_validate` was deprecated in v1.4"):
+        dm.has_setup_validate
+    with pytest.deprecated_call(match=r"DataModule property `has_setup_test` was deprecated in v1.4"):
+        dm.has_setup_test
+    with pytest.deprecated_call(match=r"DataModule property `has_setup_predict` was deprecated in v1.4"):
+        dm.has_setup_predict
+    with pytest.deprecated_call(match=r"DataModule property `has_teardown_fit` was deprecated in v1.4"):
+        dm.has_teardown_fit
+    with pytest.deprecated_call(match=r"DataModule property `has_teardown_validate` was deprecated in v1.4"):
+        dm.has_teardown_validate
+    with pytest.deprecated_call(match=r"DataModule property `has_teardown_test` was deprecated in v1.4"):
+        dm.has_teardown_test
+    with pytest.deprecated_call(match=r"DataModule property `has_teardown_predict` was deprecated in v1.4"):
+        dm.has_teardown_predict
+
+
+def test_v1_6_0_datamodule_hooks_calls(tmpdir):
+    """Test that repeated calls to DataHooks' hooks show a warning about the coming API change."""
+
+    class TestDataModule(BoringDataModule):
+        setup_calls = []
+        teardown_calls = []
+        prepare_data_calls = 0
+
+        def setup(self, stage=None):
+            super().setup(stage=stage)
+            self.setup_calls.append(stage)
+
+        def teardown(self, stage=None):
+            super().teardown(stage=stage)
+            self.teardown_calls.append(stage)
+
+        def prepare_data(self):
+            super().prepare_data()
+            self.prepare_data_calls += 1
+
+    dm = TestDataModule()
+    dm.prepare_data()
+    dm.prepare_data()
+    dm.setup('fit')
+    with pytest.deprecated_call(
+        match=r"DataModule.setup has already been called, so it will not be called again. "
+        "In v1.6 this behavior will change to always call DataModule.setup"
+    ):
+        dm.setup('fit')
+    dm.setup()
+    dm.setup()
+    dm.teardown('validate')
+    with pytest.deprecated_call(
+        match=r"DataModule.teardown has already been called, so it will not be called again. "
+        "In v1.6 this behavior will change to always call DataModule.teardown"
+    ):
+        dm.teardown('validate')
+
+    assert dm.prepare_data_calls == 1
+    assert dm.setup_calls == ['fit', None]
+    assert dm.teardown_calls == ['validate']
+
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=1)
+    trainer.test(BoringModel(), datamodule=dm)
+
+    # same number of calls
+    assert dm.prepare_data_calls == 1
+    assert dm.setup_calls == ['fit', None]
+    assert dm.teardown_calls == ['validate', 'test']
+
+
+def test_v1_6_0_is_overridden_model():
+    model = BoringModel()
+    with pytest.deprecated_call(match="and will be removed in v1.6"):
+        assert is_overridden("validation_step", model=model)
+    with pytest.deprecated_call(match="and will be removed in v1.6"):
+        assert not is_overridden("foo", model=model)
+
+
+def test_v1_6_0_early_stopping_monitor(tmpdir):
+    with pytest.deprecated_call(
+        match=r"The `EarlyStopping\(monitor\)` argument will be required starting in v1.6."
+        " For backward compatibility, setting this to `early_stop_on`."
+    ):
+        EarlyStopping()
+
+
+def test_v1_6_0_extras_with_gradients(tmpdir):
+
+    class TestModel(BoringModel):
+
+        def training_step(self, *args):
+            loss = super().training_step(*args)['loss']
+            return {"loss": loss, 'foo': loss}
+
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=1)
+    model = TestModel()
+    match = r"\{'foo'\} has a `grad_fn`.*behaviour will change in v1\.6"
+    with pytest.deprecated_call(match=match):
+        trainer.fit(model)
+
+
+def test_v1_6_0_train_loop(tmpdir):
+    trainer = Trainer()
+    with pytest.deprecated_call(
+        match=r"`Trainer.train_loop` has been renamed to `Trainer.fit_loop` and will be removed in v1.6."
+    ):
+        _ = trainer.train_loop
+
+
+def test_v1_6_0_rank_zero_warnings_moved():
+    with pytest.deprecated_call(match='in v1.3.7 and will be removed in v1.6'):
+        rank_zero_warn('test')
+    with pytest.deprecated_call(match='in v1.3.7 and will be removed in v1.6'):
+        rank_zero_deprecation('test')
+
+
+def test_v1_6_0_ddp_plugin_task_idx():
+    plugin = DDPPlugin()
+    with pytest.deprecated_call(match='Use `DDPPlugin.local_rank` instead'):
+        _ = plugin.task_idx
+
+
+def test_v1_6_0_lightning_module_loaded_optimizer_states_dict():
+    from pytorch_lightning.core.lightning import warning_cache
+    model = BoringModel()
+    _ = model.loaded_optimizer_states_dict
+    assert any(
+        "The `LightningModule.loaded_optimizer_states_dict` property is deprecated in v1.4" in w for w in warning_cache
+    )
+    warning_cache.clear()
+
+    model.loaded_optimizer_states_dict = {}
+    assert any(
+        "The `LightningModule.loaded_optimizer_states_dict` property is deprecated in v1.4" in w for w in warning_cache
+    )
+    warning_cache.clear()
+
+
+def test_v1_6_0_deprecated_model_summary_mode(tmpdir):
+    model = BoringModel()
+    with pytest.deprecated_call(match="Argument `mode` in `ModelSummary` is deprecated in v1.4"):
+        ModelSummary(model, mode="top")
+
+    with pytest.deprecated_call(match="Argument `mode` in `LightningModule.summarize` is deprecated in v1.4"):
+        model.summarize(mode="top")
diff --git a/tests/helpers/advanced_models.py b/tests/helpers/advanced_models.py
index 2b0146e1ee099..8f3b9663aa2d7 100644
--- a/tests/helpers/advanced_models.py
+++ b/tests/helpers/advanced_models.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from collections import OrderedDict
 
 import numpy as np
 import torch
@@ -122,13 +121,8 @@ def training_step(self, batch, batch_idx, optimizer_idx=None):
 
             # adversarial loss is binary cross-entropy
             g_loss = self.adversarial_loss(self.discriminator(self.generated_imgs), valid)
-            tqdm_dict = {'g_loss': g_loss}
-            output = OrderedDict({
-                'loss': g_loss,
-                'progress_bar': tqdm_dict,
-                'log': tqdm_dict,
-            })
-            return output
+            self.log('g_loss', g_loss, prog_bar=True, logger=True)
+            return g_loss
 
         # train discriminator
         if optimizer_idx == 1:
@@ -148,13 +142,8 @@ def training_step(self, batch, batch_idx, optimizer_idx=None):
 
             # discriminator loss is the average of these
             d_loss = (real_loss + fake_loss) / 2
-            tqdm_dict = {'d_loss': d_loss}
-            output = OrderedDict({
-                'loss': d_loss,
-                'progress_bar': tqdm_dict,
-                'log': tqdm_dict,
-            })
-            return output
+            self.log('d_loss', d_loss, prog_bar=True, logger=True)
+            return d_loss
 
     def configure_optimizers(self):
         lr = self.learning_rate
diff --git a/tests/helpers/boring_model.py b/tests/helpers/boring_model.py
index eb81baeb2c29d..185baac51f41f 100644
--- a/tests/helpers/boring_model.py
+++ b/tests/helpers/boring_model.py
@@ -34,19 +34,6 @@ def __len__(self):
         return self.len
 
 
-class RandomDictStringDataset(Dataset):
-
-    def __init__(self, size, length):
-        self.len = length
-        self.data = torch.randn(length, size)
-
-    def __getitem__(self, index):
-        return {"id": str(index), "x": self.data[index]}
-
-    def __len__(self):
-        return self.len
-
-
 class RandomDataset(Dataset):
 
     def __init__(self, size, length):
@@ -174,7 +161,7 @@ def __init__(self, data_dir: str = './'):
         self.checkpoint_state: Optional[str] = None
 
     def prepare_data(self):
-        self.random_full = RandomDataset(32, 192)
+        self.random_full = RandomDataset(32, 64 * 4)
 
     def setup(self, stage: Optional[str] = None):
         if stage == "fit" or stage is None:
@@ -182,12 +169,16 @@ def setup(self, stage: Optional[str] = None):
             self.dims = self.random_train[0].shape
 
         if stage in ("fit", "validate") or stage is None:
-            self.random_val = Subset(self.random_full, indices=range(64, 128))
+            self.random_val = Subset(self.random_full, indices=range(64, 64 * 2))
 
         if stage == "test" or stage is None:
-            self.random_test = Subset(self.random_full, indices=range(128, 192))
+            self.random_test = Subset(self.random_full, indices=range(64 * 2, 64 * 3))
             self.dims = getattr(self, "dims", self.random_test[0].shape)
 
+        if stage == "predict" or stage is None:
+            self.random_predict = Subset(self.random_full, indices=range(64 * 3, 64 * 4))
+            self.dims = getattr(self, "dims", self.random_predict[0].shape)
+
     def train_dataloader(self):
         return DataLoader(self.random_train)
 
@@ -196,3 +187,6 @@ def val_dataloader(self):
 
     def test_dataloader(self):
         return DataLoader(self.random_test)
+
+    def predict_dataloader(self):
+        return DataLoader(self.random_predict)
diff --git a/tests/helpers/datasets.py b/tests/helpers/datasets.py
index 77035796ca3b1..9fadd947ac9c9 100644
--- a/tests/helpers/datasets.py
+++ b/tests/helpers/datasets.py
@@ -105,7 +105,7 @@ def prepare_data(self, download: bool = True):
             raise RuntimeError('Dataset not found.')
 
     def _download(self, data_folder: str) -> None:
-        os.makedirs(data_folder)
+        os.makedirs(data_folder, exist_ok=True)
         for url in self.RESOURCES:
             logging.info(f'Downloading {url}')
             fpath = os.path.join(data_folder, os.path.basename(url))
diff --git a/tests/helpers/pipelines.py b/tests/helpers/pipelines.py
index f7a6484f6b27e..961b5cf080396 100644
--- a/tests/helpers/pipelines.py
+++ b/tests/helpers/pipelines.py
@@ -91,11 +91,12 @@ def run_model_test(
         trainer.checkpoint_connector.hpc_save(save_dir, logger)
         # test HPC loading
         checkpoint_path = trainer.checkpoint_connector.get_max_ckpt_path_from_folder(save_dir)
-        trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu)
+        trainer.checkpoint_connector.restore(checkpoint_path)
 
 
 @torch.no_grad()
 def run_prediction_eval_model_template(trained_model, dataloader, min_acc=0.50):
+    orig_device = trained_model.device
     # run prediction on 1 batch
     trained_model.cpu()
     trained_model.eval()
@@ -108,3 +109,4 @@ def run_prediction_eval_model_template(trained_model, dataloader, min_acc=0.50):
     acc = accuracy(y_hat.cpu(), y.cpu(), top_k=2).item()
 
     assert acc >= min_acc, f"This model is expected to get > {min_acc} in test set (it got {acc})"
+    trained_model.to(orig_device)
diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py
index 630a341ec2d30..e4a1d20f72872 100644
--- a/tests/helpers/runif.py
+++ b/tests/helpers/runif.py
@@ -25,10 +25,9 @@
     _DEEPSPEED_AVAILABLE,
     _FAIRSCALE_AVAILABLE,
     _FAIRSCALE_FULLY_SHARDED_AVAILABLE,
-    _FAIRSCALE_PIPE_AVAILABLE,
     _HOROVOD_AVAILABLE,
+    _IPU_AVAILABLE,
     _NATIVE_AMP_AVAILABLE,
-    _RPC_AVAILABLE,
     _TORCH_QUANTIZE_AVAILABLE,
     _TPU_AVAILABLE,
 )
@@ -63,13 +62,12 @@ def __new__(
         amp_apex: bool = False,
         amp_native: bool = False,
         tpu: bool = False,
+        ipu: bool = False,
         horovod: bool = False,
         horovod_nccl: bool = False,
         skip_windows: bool = False,
         special: bool = False,
-        rpc: bool = False,
         fairscale: bool = False,
-        fairscale_pipe: bool = False,
         fairscale_fully_sharded: bool = False,
         deepspeed: bool = False,
         **kwargs
@@ -85,13 +83,12 @@ def __new__(
             amp_apex: NVIDIA Apex is installed
             amp_native: if native PyTorch native AMP is supported
             tpu: if TPU is available
+            ipu: if IPU is available
             horovod: if Horovod is installed
             horovod_nccl: if Horovod is installed with NCCL support
             skip_windows: skip test for Windows platform (typically fo some limited torch functionality)
             special: running in special mode, outside pytest suit
-            rpc: requires Remote Procedure Call (RPC)
             fairscale: if `fairscale` module is required to run the test
-            fairscale_pipe: if `fairscale` with pipe module is required to run the test
             fairscale_fully_sharded: if `fairscale` fully sharded module is required to run the test
             deepspeed: if `deepspeed` module is required to run the test
             kwargs: native pytest.mark.skipif keyword arguments
@@ -139,6 +136,10 @@ def __new__(
             conditions.append(not _TPU_AVAILABLE)
             reasons.append("TPU")
 
+        if ipu:
+            conditions.append(not _IPU_AVAILABLE)
+            reasons.append("IPU")
+
         if horovod:
             conditions.append(not _HOROVOD_AVAILABLE)
             reasons.append("Horovod")
@@ -152,18 +153,10 @@ def __new__(
             conditions.append(env_flag != '1')
             reasons.append("Special execution")
 
-        if rpc:
-            conditions.append(not _RPC_AVAILABLE)
-            reasons.append("RPC")
-
         if fairscale:
             conditions.append(not _FAIRSCALE_AVAILABLE)
             reasons.append("Fairscale")
 
-        if fairscale_pipe:
-            conditions.append(not _FAIRSCALE_PIPE_AVAILABLE)
-            reasons.append("Fairscale Pipe")
-
         if fairscale_fully_sharded:
             conditions.append(not _FAIRSCALE_FULLY_SHARDED_AVAILABLE)
             reasons.append("Fairscale Fully Sharded")
diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py
index a18b180be1f49..1bad12b1f9a3d 100644
--- a/tests/loggers/test_all.py
+++ b/tests/loggers/test_all.py
@@ -24,6 +24,7 @@
 from pytorch_lightning import Callback, Trainer
 from pytorch_lightning.loggers import (
     CometLogger,
+    CSVLogger,
     MLFlowLogger,
     NeptuneLogger,
     TensorBoardLogger,
@@ -233,6 +234,7 @@ def name(self):
     "logger_class",
     [
         CometLogger,
+        CSVLogger,
         MLFlowLogger,
         NeptuneLogger,
         TensorBoardLogger,
@@ -325,6 +327,7 @@ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx, dataloader_
 @pytest.mark.parametrize(
     "logger_class", [
         CometLogger,
+        CSVLogger,
         MLFlowLogger,
         NeptuneLogger,
         TensorBoardLogger,
diff --git a/tests/loggers/test_base.py b/tests/loggers/test_base.py
index c20b6096585cd..9209083148265 100644
--- a/tests/loggers/test_base.py
+++ b/tests/loggers/test_base.py
@@ -59,6 +59,7 @@ def __init__(self):
         self.hparams_logged = None
         self.metrics_logged = {}
         self.finalized = False
+        self.after_save_checkpoint_called = False
 
     @property
     def experiment(self):
@@ -92,6 +93,9 @@ def name(self):
     def version(self):
         return "1"
 
+    def after_save_checkpoint(self, checkpoint_callback):
+        self.after_save_checkpoint_called = True
+
 
 def test_custom_logger(tmpdir):
 
@@ -115,6 +119,7 @@ def training_step(self, batch, batch_idx):
     assert trainer.state.finished, f"Training failed with {trainer.state}"
     assert logger.hparams_logged == model.hparams
     assert logger.metrics_logged != {}
+    assert logger.after_save_checkpoint_called
     assert logger.finalized_status == "success"
 
 
diff --git a/tests/loggers/test_tensorboard.py b/tests/loggers/test_tensorboard.py
index f7fe1c3bfd47e..b8bafae8508e8 100644
--- a/tests/loggers/test_tensorboard.py
+++ b/tests/loggers/test_tensorboard.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import logging
 import os
 from argparse import Namespace
 from unittest import mock
@@ -275,7 +276,7 @@ def __init__(self):
 
         def training_step(self, *args):
             self.log('foo', 1, on_step=True, on_epoch=True)
-            if not self.trainer.train_loop.should_accumulate():
+            if not self.trainer.fit_loop.should_accumulate():
                 if self.trainer.logger_connector.should_update_logs:
                     self.indexes.append(self.trainer.global_step)
             return super().training_step(*args)
@@ -340,3 +341,15 @@ def test_tensorboard_with_symlink(log, tmpdir):
     _ = logger.version
 
     log.warning.assert_not_called()
+
+
+def test_tensorboard_missing_folder_warning(tmpdir, caplog):
+    """Verify that the logger throws a warning for invalid directory"""
+
+    name = "fake_dir"
+    logger = TensorBoardLogger(save_dir=tmpdir, name=name)
+
+    with caplog.at_level(logging.WARNING):
+        assert logger.version == 0
+
+    assert 'Missing logger folder:' in caplog.text
diff --git a/tests/loggers/test_wandb.py b/tests/loggers/test_wandb.py
index 22be315eaabe2..27185b911b6d0 100644
--- a/tests/loggers/test_wandb.py
+++ b/tests/loggers/test_wandb.py
@@ -24,14 +24,8 @@
 from tests.helpers import BoringModel
 
 
-def get_warnings(recwarn):
-    warnings_text = '\n'.join(str(w.message) for w in recwarn.list)
-    recwarn.clear()
-    return warnings_text
-
-
 @mock.patch('pytorch_lightning.loggers.wandb.wandb')
-def test_wandb_logger_init(wandb, recwarn):
+def test_wandb_logger_init(wandb):
     """Verify that basic functionality of wandb logger works.
     Wandb doesn't work well with pytest so we have to mock it out here."""
 
@@ -51,8 +45,6 @@ def test_wandb_logger_init(wandb, recwarn):
     run = wandb.init()
     logger = WandbLogger(experiment=run)
     assert logger.experiment
-    assert run.dir is not None
-    assert logger.save_dir == run.dir
 
     # test wandb.init not called if there is a W&B run
     wandb.init().log.reset_mock()
@@ -140,10 +132,8 @@ def test_wandb_logger_dirs_creation(wandb, tmpdir):
 
     # mock return values of experiment
     wandb.run = None
-    wandb.init().step = 0
     logger.experiment.id = '1'
     logger.experiment.project_name.return_value = 'project'
-    logger.experiment.step = 0
 
     for _ in range(2):
         _ = logger.experiment
@@ -164,6 +154,71 @@ def test_wandb_logger_dirs_creation(wandb, tmpdir):
     assert trainer.log_dir == logger.save_dir
 
 
+@mock.patch('pytorch_lightning.loggers.wandb.wandb')
+def test_wandb_log_model(wandb, tmpdir):
+    """ Test that the logger creates the folders and files in the right place. """
+
+    wandb.run = None
+    model = BoringModel()
+
+    # test log_model=True
+    logger = WandbLogger(log_model=True)
+    logger.experiment.id = '1'
+    logger.experiment.project_name.return_value = 'project'
+    trainer = Trainer(default_root_dir=tmpdir, logger=logger, max_epochs=2, limit_train_batches=3, limit_val_batches=3)
+    trainer.fit(model)
+    wandb.init().log_artifact.assert_called_once()
+
+    # test log_model='all'
+    wandb.init().log_artifact.reset_mock()
+    wandb.init.reset_mock()
+    logger = WandbLogger(log_model='all')
+    logger.experiment.id = '1'
+    logger.experiment.project_name.return_value = 'project'
+    trainer = Trainer(default_root_dir=tmpdir, logger=logger, max_epochs=2, limit_train_batches=3, limit_val_batches=3)
+    trainer.fit(model)
+    assert wandb.init().log_artifact.call_count == 2
+
+    # test log_model=False
+    wandb.init().log_artifact.reset_mock()
+    wandb.init.reset_mock()
+    logger = WandbLogger(log_model=False)
+    logger.experiment.id = '1'
+    logger.experiment.project_name.return_value = 'project'
+    trainer = Trainer(default_root_dir=tmpdir, logger=logger, max_epochs=2, limit_train_batches=3, limit_val_batches=3)
+    trainer.fit(model)
+    assert not wandb.init().log_artifact.called
+
+    # test correct metadata
+    import pytorch_lightning.loggers.wandb as pl_wandb
+    pl_wandb._WANDB_GREATER_EQUAL_0_10_22 = True
+    wandb.init().log_artifact.reset_mock()
+    wandb.init.reset_mock()
+    wandb.Artifact.reset_mock()
+    logger = pl_wandb.WandbLogger(log_model=True)
+    logger.experiment.id = '1'
+    logger.experiment.project_name.return_value = 'project'
+    trainer = Trainer(default_root_dir=tmpdir, logger=logger, max_epochs=2, limit_train_batches=3, limit_val_batches=3)
+    trainer.fit(model)
+    wandb.Artifact.assert_called_once_with(
+        name='model-1',
+        type='model',
+        metadata={
+            'score': None,
+            'original_filename': 'epoch=1-step=5-v3.ckpt',
+            'ModelCheckpoint': {
+                'monitor': None,
+                'mode': 'min',
+                'save_last': None,
+                'save_top_k': None,
+                'save_weights_only': False,
+                '_every_n_train_steps': 0,
+                '_every_n_val_epochs': 1
+            }
+        }
+    )
+
+
 def test_wandb_sanitize_callable_params(tmpdir):
     """
     Callback function are not serializiable. Therefore, we get them a chance to return
diff --git a/tests/loops/__init__.py b/tests/loops/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/loops/test_loop_state_dict.py b/tests/loops/test_loop_state_dict.py
new file mode 100644
index 0000000000000..1930dc46566fd
--- /dev/null
+++ b/tests/loops/test_loop_state_dict.py
@@ -0,0 +1,54 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from pytorch_lightning.loops import FitLoop
+from pytorch_lightning.trainer.trainer import Trainer
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+
+
+def test_loops_state_dict():
+    fit_loop = FitLoop()
+    with pytest.raises(MisconfigurationException, match="Loop FitLoop should be connected to a"):
+        fit_loop.connect(object())  # noqa
+
+    fit_loop.connect(Trainer())
+    state_dict = fit_loop.state_dict()
+    new_fit_loop = FitLoop()
+    new_fit_loop.load_state_dict(state_dict)
+    assert fit_loop.state_dict() == new_fit_loop.state_dict()
+
+
+def test_loops_state_dict_structure():
+    trainer = Trainer()
+    # structure saved by the checkpoint connector
+    state_dict = {
+        "fit_loop": trainer.fit_loop.state_dict(),
+        "validate_loop": trainer.validate_loop.state_dict(),
+        "test_loop": trainer.test_loop.state_dict(),
+        "predict_loop": trainer.predict_loop.state_dict(),
+    }
+    expected = {
+        "fit_loop": {
+            'epoch_loop': {
+                'batch_loop': {},
+                'val_loop': {},
+            }
+        },
+        "validate_loop": {},
+        "test_loop": {},
+        "predict_loop": {},
+    }
+    assert state_dict == expected
diff --git a/tests/loops/test_loops.py b/tests/loops/test_loops.py
new file mode 100644
index 0000000000000..af5801d2b4552
--- /dev/null
+++ b/tests/loops/test_loops.py
@@ -0,0 +1,74 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Iterator
+
+from pytorch_lightning.loops.base import Loop
+
+
+def test_loop_restore():
+
+    class CustomExpection(Exception):
+        pass
+
+    class Simple(Loop):
+
+        def __init__(self, dataset: Iterator):
+            super().__init__()
+            self.dataset = dataset
+
+        def restore(self) -> None:
+            self.iter_dataset = iter(self.dataset)
+            for _ in range(self.iteration_count):
+                next(self.iter_dataset)
+            self.iteration_count += 1
+
+        @property
+        def done(self) -> bool:
+            return self.iteration_count > len(self.dataset)
+
+        def reset(self) -> None:
+            self.iter_dataset = iter(self.dataset)
+            self.outputs = []
+
+        def advance(self) -> None:
+            value = next(self.iter_dataset)
+
+            if self.iteration_count == 5:
+                raise CustomExpection
+
+            self.outputs.append(value)
+
+        def state_dict(self) -> Dict:
+            return {"iteration_count": self.iteration_count, "outputs": self.outputs}
+
+        def load_state_dict(self, state_dict: Dict) -> None:
+            self.iteration_count = state_dict["iteration_count"]
+            self.outputs = state_dict["outputs"]
+
+    data = range(10)
+    loop = Simple(data)
+    try:
+        loop.run()
+        state_dict = {}
+    except CustomExpection:
+        state_dict = loop.state_dict()
+
+    loop = Simple(data)
+    loop.load_state_dict(state_dict)
+    loop.restarting = True
+    loop.run()
+
+    assert not loop.restarting
+    assert loop.outputs == list(range(10))
diff --git a/tests/metrics/test_metric_lightning.py b/tests/metrics/test_metric_lightning.py
index e52e39cb16488..6be288b00113b 100644
--- a/tests/metrics/test_metric_lightning.py
+++ b/tests/metrics/test_metric_lightning.py
@@ -1,9 +1,26 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
 import torch
+from torch import nn
 from torchmetrics import Metric as TMetric
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.metrics import Metric as PLMetric
 from pytorch_lightning.metrics import MetricCollection
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 
 
@@ -78,6 +95,7 @@ def __init__(self):
             self.metric_step = SumMetric()
             self.metric_epoch = SumMetric()
             self.sum = 0.0
+            self.total_sum = 0.0
 
         def on_epoch_start(self):
             self.sum = 0.0
@@ -90,7 +108,10 @@ def training_step(self, batch, batch_idx):
             return {'loss': self.step(x), 'data': x}
 
         def training_epoch_end(self, outs):
-            self.log("sum_epoch", self.metric_epoch(torch.stack([o['data'] for o in outs]).sum()))
+            total = torch.stack([o['data'] for o in outs]).sum()
+            self.metric_epoch(total)
+            self.log("sum_epoch", self.metric_epoch)
+            self.total_sum = total
 
     model = TestModel()
     model.val_dataloader = None
@@ -107,7 +128,7 @@ def training_epoch_end(self, outs):
 
     logged = trainer.logged_metrics
     assert torch.allclose(torch.tensor(logged["sum_step"]), model.sum)
-    assert torch.allclose(torch.tensor(logged["sum_epoch"]), model.sum)
+    assert torch.allclose(torch.tensor(logged["sum_epoch"]), model.total_sum)
 
 
 def test_scriptable(tmpdir):
@@ -188,3 +209,59 @@ def training_epoch_end(self, outputs):
     logged = trainer.logged_metrics
     assert torch.allclose(torch.tensor(logged["SumMetric_epoch"]), model.sum)
     assert torch.allclose(torch.tensor(logged["DiffMetric_epoch"]), model.diff)
+
+
+def test_log_metric_no_attributes_raises(tmpdir):
+
+    class TestModel(BoringModel):
+
+        def training_step(self, *args):
+            metric = SumMetric()
+            self.log("foo", metric)
+
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=1)
+    model = TestModel()
+    with pytest.raises(MisconfigurationException, match="Could not find the `LightningModule` attribute"):
+        trainer.fit(model)
+
+
+def test_log_metric_dict(tmpdir):
+
+    class TestModel(BoringModel):
+
+        def __init__(self):
+            super().__init__()
+            self.metrics = nn.ModuleDict({'sum': SumMetric(), 'diff': DiffMetric()})
+            self.sum = 0.0
+            self.diff = 0.0
+
+        def training_step(self, batch, batch_idx):
+            x = batch
+            self.metrics['sum'](x.sum())
+            self.metrics['diff'](x.sum())
+            self.sum += x.sum()
+            self.diff -= x.sum()
+            self.log_dict({f'{k}_step': v for k, v in self.metrics.items()})
+            return self.step(x)
+
+        def training_epoch_end(self, outputs):
+            self.metrics['sum'].compute()
+            self.metrics['diff'].compute()
+            self.log_dict({f'{k}_epoch': v for k, v in self.metrics.items()})
+
+    model = TestModel()
+    model.val_dataloader = None
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        limit_train_batches=2,
+        limit_val_batches=2,
+        max_epochs=1,
+        log_every_n_steps=1,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+
+    logged = trainer.logged_metrics
+    assert torch.allclose(torch.tensor(logged["sum_epoch"]), model.sum)
+    assert torch.allclose(torch.tensor(logged["diff_epoch"]), model.diff)
diff --git a/tests/metrics/test_remove_1-5_metrics.py b/tests/metrics/test_remove_1-5_metrics.py
index d3703bf3691c9..aa7d4977d1133 100644
--- a/tests/metrics/test_remove_1-5_metrics.py
+++ b/tests/metrics/test_remove_1-5_metrics.py
@@ -215,7 +215,7 @@ def test_v1_5_metric_classif_mix():
     preds = torch.tensor([0, 1, 0, 0])
     confusion_matrix._warned = False
     with pytest.deprecated_call(match='It will be removed in v1.5.0'):
-        assert torch.equal(confusion_matrix(preds, target, num_classes=2), torch.tensor([[2., 0.], [1., 1.]]))
+        assert torch.equal(confusion_matrix(preds, target, num_classes=2).float(), torch.tensor([[2., 0.], [1., 1.]]))
 
     target = torch.tensor([0, 1, 2, 0, 1, 2])
     preds = torch.tensor([0, 2, 1, 0, 0, 1])
diff --git a/tests/metrics/utils.py b/tests/metrics/utils.py
index f1f17d0624936..29c530953f99c 100644
--- a/tests/metrics/utils.py
+++ b/tests/metrics/utils.py
@@ -66,7 +66,7 @@ def _class_test(
     metric_class: Metric,
     sk_metric: Callable,
     dist_sync_on_step: bool,
-    metric_args: dict = {},
+    metric_args: dict = None,
     check_dist_sync_on_step: bool = True,
     check_batch: bool = True,
     atol: float = 1e-8,
@@ -89,6 +89,8 @@ def _class_test(
         check_batch: bool, if true will check if the metric is also correctly
             calculated across devices for each batch (and not just at the end)
     """
+    if metric_args is None:
+        metric_args = {}
     # Instanciate lightning metric
     metric = metric_class(compute_on_step=True, dist_sync_on_step=dist_sync_on_step, **metric_args)
 
@@ -130,7 +132,7 @@ def _functional_test(
     target: torch.Tensor,
     metric_functional: Callable,
     sk_metric: Callable,
-    metric_args: dict = {},
+    metric_args: dict = None,
     atol: float = 1e-8,
 ):
     """Utility function doing the actual comparison between lightning functional metric
@@ -143,6 +145,8 @@ def _functional_test(
         sk_metric: callable function that is used for comparison
         metric_args: dict with additional arguments used for class initialization
     """
+    if metric_args is None:
+        metric_args = {}
     metric = partial(metric_functional, **metric_args)
 
     for i in range(NUM_BATCHES):
@@ -185,7 +189,7 @@ def run_functional_metric_test(
         target: torch.Tensor,
         metric_functional: Callable,
         sk_metric: Callable,
-        metric_args: dict = {},
+        metric_args: dict = None,
     ):
         """Main method that should be used for testing functions. Call this inside
         testing method
@@ -197,6 +201,8 @@ def run_functional_metric_test(
             sk_metric: callable function that is used for comparison
             metric_args: dict with additional arguments used for class initialization
         """
+        if metric_args is None:
+            metric_args = {}
         _functional_test(
             preds=preds,
             target=target,
@@ -214,7 +220,7 @@ def run_class_metric_test(
         metric_class: Metric,
         sk_metric: Callable,
         dist_sync_on_step: bool,
-        metric_args: dict = {},
+        metric_args: dict = None,
         check_dist_sync_on_step: bool = True,
         check_batch: bool = True,
     ):
@@ -235,6 +241,8 @@ def run_class_metric_test(
             check_batch: bool, if true will check if the metric is also correctly
                 calculated across devices for each batch (and not just at the end)
         """
+        if metric_args is None:
+            metric_args = {}
         if ddp:
             if sys.platform == "win32":
                 pytest.skip("DDP not supported on windows")
diff --git a/tests/models/data/horovod/train_default_model.py b/tests/models/data/horovod/train_default_model.py
index ed0d33f5e8c82..c4cbaeb1363c9 100644
--- a/tests/models/data/horovod/train_default_model.py
+++ b/tests/models/data/horovod/train_default_model.py
@@ -87,7 +87,7 @@ def training_epoch_end(self, outputs) -> None:
     trainer.checkpoint_connector.hpc_save(ckpt_path, trainer.logger)
     # test HPC loading
     checkpoint_path = trainer.checkpoint_connector.get_max_ckpt_path_from_folder(ckpt_path)
-    trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu)
+    trainer.checkpoint_connector.restore(checkpoint_path)
 
     if on_gpu:
         trainer = Trainer(gpus=1, accelerator='horovod', max_epochs=1)
diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py
index b54e0d091bd16..84721fe8b575c 100644
--- a/tests/models/test_cpu.py
+++ b/tests/models/test_cpu.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import os
 
+import pytest
 import torch
 
 import tests.helpers.pipelines as tpipes
@@ -322,7 +323,8 @@ def test_all_features_cpu_model(tmpdir):
     tpipes.run_model_test(trainer_options, model, on_gpu=False, min_acc=0.01)
 
 
-def test_tbptt_cpu_model(tmpdir):
+@pytest.mark.parametrize("n_hidden_states", [1, 2])
+def test_tbptt_cpu_model(tmpdir, n_hidden_states):
     """Test truncated back propagation through time works."""
     truncated_bptt_steps = 2
     sequence_size = 30
@@ -341,15 +343,19 @@ def __len__(self):
 
     class BpttTestModel(BoringModel):
 
-        def __init__(self, batch_size, in_features, out_features, *args, **kwargs):
+        def __init__(self, batch_size, in_features, out_features, n_hidden_states, *args, **kwargs):
             super().__init__(*args, **kwargs)
             self.test_hidden = None
             self.batch_size = batch_size
             self.layer = torch.nn.Linear(in_features, out_features)
+            self.n_hidden_states = n_hidden_states
 
         def training_step(self, batch, batch_idx, hiddens):
             assert hiddens == self.test_hidden, "Hidden state not persistent between tbptt steps"
-            self.test_hidden = torch.rand(1)
+            if self.n_hidden_states == 1:
+                self.test_hidden = torch.rand(1)
+            else:
+                self.test_hidden = tuple([torch.rand(1)] * self.n_hidden_states)
 
             x_tensor, y_list = batch
             assert x_tensor.shape[1] == truncated_bptt_steps, "tbptt split Tensor failed"
@@ -378,7 +384,12 @@ def train_dataloader(self):
                 sampler=None,
             )
 
-    model = BpttTestModel(batch_size=batch_size, in_features=truncated_bptt_steps, out_features=truncated_bptt_steps)
+    model = BpttTestModel(
+        batch_size=batch_size,
+        in_features=truncated_bptt_steps,
+        out_features=truncated_bptt_steps,
+        n_hidden_states=n_hidden_states
+    )
     model.example_input_array = torch.randn(5, truncated_bptt_steps)
 
     # fit model
@@ -390,5 +401,4 @@ def train_dataloader(self):
         weights_summary=None,
     )
     trainer.fit(model)
-
-    assert trainer.state.finished, f"Training failed with {trainer.state}"
+    assert trainer.state.finished, f"Training model with `{n_hidden_states}` hidden state failed with {trainer.state}"
diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index 65a1e093a9e96..cd7c90552ab2e 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import operator
+import os
 from collections import namedtuple
+from unittest import mock
 from unittest.mock import patch
 
 import pytest
@@ -21,6 +23,7 @@
 import tests.helpers.pipelines as tpipes
 import tests.helpers.utils as tutils
 from pytorch_lightning import Trainer
+from pytorch_lightning.plugins.environments import TorchElasticEnvironment
 from pytorch_lightning.utilities import device_parser
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _compare_version
@@ -219,6 +222,29 @@ def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_coun
         device_parser.parse_gpu_ids(gpus)
 
 
+@mock.patch.dict(
+    os.environ, {
+        "CUDA_VISIBLE_DEVICES": "0",
+        "LOCAL_RANK": "1",
+        "GROUP_RANK": "1",
+        "RANK": "3",
+        "WORLD_SIZE": "4",
+        "LOCAL_WORLD_SIZE": "2",
+    }
+)
+@mock.patch('torch.cuda.device_count', return_value=1)
+@pytest.mark.parametrize("gpus", [[0, 1, 2], 2, '0'])
+def test_torchelastic_gpu_parsing(mocked_device_count, gpus):
+    """
+    Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device
+    That we omit sanitizing the gpus as only one of the GPUs is visible.
+    """
+    trainer = Trainer(gpus=gpus)
+    assert isinstance(trainer.accelerator_connector.cluster_environment, TorchElasticEnvironment)
+    assert trainer.accelerator_connector.parallel_device_ids == device_parser.parse_gpu_ids(gpus)
+    assert trainer.gpus == gpus
+
+
 @RunIf(min_gpus=1)
 def test_single_gpu_batch_parse():
     trainer = Trainer(gpus=1)
diff --git a/tests/models/test_grad_norm.py b/tests/models/test_grad_norm.py
index 0e380e085ce6a..384e643e184fe 100644
--- a/tests/models/test_grad_norm.py
+++ b/tests/models/test_grad_norm.py
@@ -11,8 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
-from unittest import mock
 from unittest.mock import patch
 
 import numpy as np
@@ -59,15 +57,19 @@ def on_after_backward(self):
         self.stored_grad_norms.append(out)
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 @pytest.mark.parametrize("norm_type", [1., 1.25, 2, 3, 5, 10, 'inf'])
 def test_grad_tracking(tmpdir, norm_type, rtol=5e-3):
     # rtol=5e-3 respects the 3 decimals rounding in `.grad_norms` and above
-
     reset_seed()
 
-    # use a custom grad tracking module and a list logger
-    model = ModelWithManualGradTracker(norm_type)
+    class TestModel(ModelWithManualGradTracker):
+        logged_metrics = []
+
+        def on_train_batch_end(self, *_) -> None:
+            # copy so they don't get reduced
+            self.logged_metrics.append(self.trainer.logged_metrics.copy())
+
+    model = TestModel(norm_type)
 
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -76,18 +78,13 @@ def test_grad_tracking(tmpdir, norm_type, rtol=5e-3):
         log_every_n_steps=1,  # request grad_norms every batch
     )
     trainer.fit(model)
-
     assert trainer.state.finished, f"Training failed with {trainer.state}"
-    logged_metrics = trainer.dev_debugger.logged_metrics
-    assert len(logged_metrics) == len(model.stored_grad_norms)
 
+    assert len(model.logged_metrics) == len(model.stored_grad_norms)
     # compare the logged metrics against tracked norms on `.backward`
-    for mod, log in zip(model.stored_grad_norms, logged_metrics):
-        common = mod.keys() & log.keys()
-
-        log, mod = [log[k] for k in common], [mod[k] for k in common]
-
-        assert np.allclose(log, mod, rtol=rtol)
+    for mod, log in zip(model.stored_grad_norms, model.logged_metrics):
+        for k in (mod.keys() & log.keys()):
+            assert np.allclose(mod[k], log[k], rtol=rtol), k
 
 
 @pytest.mark.parametrize("log_every_n_steps", [1, 2, 3])
@@ -111,5 +108,9 @@ def test_grad_tracking_interval(tmpdir, log_every_n_steps):
             if grad_norm_dict:
                 grad_norm_dicts.append(grad_norm_dict)
 
-        assert len(grad_norm_dicts) == expected
-        assert all(grad_norm_dicts[0].keys() == g.keys() for g in grad_norm_dicts)
+        # logging on n steps + 1 epochs
+        assert len(grad_norm_dicts) == expected + 1
+        # check all metrics derived from steps have the same keys
+        assert all(grad_norm_dicts[0].keys() == g.keys() for g in grad_norm_dicts[:-1])
+        epoch_end_keys = [k.replace("step", "epoch") for k in grad_norm_dicts[0]]
+        assert epoch_end_keys == list(grad_norm_dicts[-1])
diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 913f403a14dd3..9a689fe9d725a 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -11,14 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from functools import partial
+from inspect import getmembers, isfunction
 from unittest import mock
-from unittest.mock import PropertyMock
+from unittest.mock import ANY, PropertyMock
 
 import pytest
 import torch
 from torch.utils.data import DataLoader
 
-from pytorch_lightning import Trainer
+from pytorch_lightning import __version__, Callback, LightningDataModule, LightningModule, Trainer
 from tests.helpers import BoringDataModule, BoringModel, RandomDataset
 from tests.helpers.runif import RunIf
 
@@ -229,215 +231,151 @@ def train_dataloader(self):
     trainer.fit(model)
 
 
-class HookedModel(BoringModel):
-
-    def __init__(self):
-        super().__init__()
-        self.called = []
-        self.train_batch = [
-            'on_train_batch_start',
-            'on_before_batch_transfer',
-            'transfer_batch_to_device',
-            'on_after_batch_transfer',
-            'training_step',
-            'on_before_zero_grad',
-            'optimizer_zero_grad',
-            'backward',
-            'on_after_backward',
-            'optimizer_step',
-            'on_train_batch_end',
-        ]
-        self.val_batch = [
-            'on_validation_batch_start',
-            'on_before_batch_transfer',
-            'transfer_batch_to_device',
-            'on_after_batch_transfer',
-            'on_validation_batch_end',
-        ]
-
-    def training_step(self, *args, **kwargs):
-        self.called.append("training_step")
-        return super().training_step(*args, **kwargs)
-
-    def optimizer_zero_grad(self, *args, **kwargs):
-        self.called.append("optimizer_zero_grad")
-        super().optimizer_zero_grad(*args, **kwargs)
-
-    def training_epoch_end(self, *args, **kwargs):
-        self.called.append("training_epoch_end")
-        super().training_epoch_end(*args, **kwargs)
-
-    def backward(self, *args, **kwargs):
-        self.called.append("backward")
-        super().backward(*args, **kwargs)
-
-    def on_after_backward(self):
-        self.called.append("on_after_backward")
-        super().on_after_backward()
-
-    def optimizer_step(self, *args, **kwargs):
-        super().optimizer_step(*args, **kwargs)
-        self.called.append("optimizer_step")  # append after as closure calls other methods
-
-    def validation_epoch_end(self, *args, **kwargs):
-        self.called.append("validation_epoch_end")
-        super().validation_epoch_end(*args, **kwargs)
-
-    def on_before_zero_grad(self, *args, **kwargs):
-        self.called.append("on_before_zero_grad")
-        super().on_before_zero_grad(*args, **kwargs)
-
-    def on_epoch_start(self):
-        self.called.append("on_epoch_start")
-        super().on_epoch_start()
-
-    def on_epoch_end(self):
-        self.called.append("on_epoch_end")
-        super().on_epoch_end()
-
-    def on_fit_start(self):
-        self.called.append("on_fit_start")
-        super().on_fit_start()
-
-    def on_fit_end(self):
-        self.called.append("on_fit_end")
-        super().on_fit_end()
-
-    def on_hpc_load(self, *args, **kwargs):
-        self.called.append("on_hpc_load")
-        super().on_hpc_load(*args, **kwargs)
-
-    def on_hpc_save(self, *args, **kwargs):
-        self.called.append("on_hpc_save")
-        super().on_hpc_save(*args, **kwargs)
-
-    def on_load_checkpoint(self, *args, **kwargs):
-        self.called.append("on_load_checkpoint")
-        super().on_load_checkpoint(*args, **kwargs)
-
-    def on_save_checkpoint(self, *args, **kwargs):
-        self.called.append("on_save_checkpoint")
-        super().on_save_checkpoint(*args, **kwargs)
+def get_members(cls):
+    return {h for h, _ in getmembers(cls, predicate=isfunction) if not h.startswith('_')}
 
-    def on_pretrain_routine_start(self):
-        self.called.append("on_pretrain_routine_start")
-        super().on_pretrain_routine_start()
 
-    def on_pretrain_routine_end(self):
-        self.called.append("on_pretrain_routine_end")
-        super().on_pretrain_routine_end()
+class HookedCallback(Callback):
 
-    def on_train_start(self):
-        self.called.append("on_train_start")
-        super().on_train_start()
+    def __init__(self, called):
 
-    def on_train_end(self):
-        self.called.append("on_train_end")
-        super().on_train_end()
+        def call(hook, *args, **kwargs):
+            d = {'name': f'Callback.{hook}'}
+            if args:
+                d['args'] = args
+            if kwargs:
+                d['kwargs'] = kwargs
+            called.append(d)
 
-    def on_before_batch_transfer(self, *args, **kwargs):
-        self.called.append("on_before_batch_transfer")
-        return super().on_before_batch_transfer(*args, **kwargs)
+        for h in get_members(Callback):
+            setattr(self, h, partial(call, h))
 
-    def transfer_batch_to_device(self, *args, **kwargs):
-        self.called.append("transfer_batch_to_device")
-        return super().transfer_batch_to_device(*args, **kwargs)
 
-    def on_after_batch_transfer(self, *args, **kwargs):
-        self.called.append("on_after_batch_transfer")
-        return super().on_after_batch_transfer(*args, **kwargs)
-
-    def on_train_batch_start(self, *args, **kwargs):
-        self.called.append("on_train_batch_start")
-        super().on_train_batch_start(*args, **kwargs)
-
-    def on_train_batch_end(self, *args, **kwargs):
-        self.called.append("on_train_batch_end")
-        super().on_train_batch_end(*args, **kwargs)
-
-    def on_train_epoch_start(self):
-        self.called.append("on_train_epoch_start")
-        super().on_train_epoch_start()
-
-    def on_train_epoch_end(self):
-        self.called.append("on_train_epoch_end")
-        super().on_train_epoch_end()
-
-    def on_validation_start(self):
-        self.called.append("on_validation_start")
-        super().on_validation_start()
-
-    def on_validation_end(self):
-        self.called.append("on_validation_end")
-        super().on_validation_end()
-
-    def on_validation_batch_start(self, *args, **kwargs):
-        self.called.append("on_validation_batch_start")
-        super().on_validation_batch_start(*args, **kwargs)
-
-    def on_validation_batch_end(self, *args, **kwargs):
-        self.called.append("on_validation_batch_end")
-        super().on_validation_batch_end(*args, **kwargs)
-
-    def on_validation_epoch_start(self):
-        self.called.append("on_validation_epoch_start")
-        super().on_validation_epoch_start()
-
-    def on_validation_epoch_end(self, *args, **kwargs):
-        self.called.append("on_validation_epoch_end")
-        super().on_validation_epoch_end(*args, **kwargs)
-
-    def on_test_start(self):
-        self.called.append("on_test_start")
-        super().on_test_start()
-
-    def on_test_batch_start(self, *args, **kwargs):
-        self.called.append("on_test_batch_start")
-        super().on_test_batch_start(*args, **kwargs)
-
-    def on_test_batch_end(self, *args, **kwargs):
-        self.called.append("on_test_batch_end")
-        super().on_test_batch_end(*args, **kwargs)
-
-    def on_test_epoch_start(self):
-        self.called.append("on_test_epoch_start")
-        super().on_test_epoch_start()
-
-    def on_test_epoch_end(self, *args, **kwargs):
-        self.called.append("on_test_epoch_end")
-        super().on_test_epoch_end(*args, **kwargs)
-
-    def on_validation_model_eval(self):
-        self.called.append("on_validation_model_eval")
-        super().on_validation_model_eval()
-
-    def on_validation_model_train(self):
-        self.called.append("on_validation_model_train")
-        super().on_validation_model_train()
-
-    def on_test_model_eval(self):
-        self.called.append("on_test_model_eval")
-        super().on_test_model_eval()
-
-    def on_test_model_train(self):
-        self.called.append("on_test_model_train")
-        super().on_test_model_train()
+class HookedModel(BoringModel):
 
-    def on_test_end(self):
-        self.called.append("on_test_end")
-        super().on_test_end()
+    def __init__(self, called):
+        super().__init__()
+        pl_module_hooks = get_members(LightningModule)
+        # remove most `nn.Module` hooks
+        module_hooks = get_members(torch.nn.Module)
+        pl_module_hooks.difference_update(module_hooks - {'forward', 'zero_grad', 'train'})
+
+        def call(hook, fn, *args, **kwargs):
+            out = fn(*args, **kwargs)
+            d = {'name': hook}
+            if args:
+                d['args'] = args
+            if kwargs:
+                d['kwargs'] = kwargs
+            called.append(d)
+            return out
+
+        for h in pl_module_hooks:
+            attr = getattr(self, h)
+            setattr(self, h, partial(call, h, attr))
 
-    def setup(self, stage=None):
-        self.called.append(f"setup_{stage}")
-        super().setup(stage=stage)
+    def validation_epoch_end(self, *args, **kwargs):
+        # `BoringModel` does not have a return for `validation_step_end` so this would fail
+        pass
+
+    def test_epoch_end(self, *args, **kwargs):
+        # `BoringModel` does not have a return for `test_step_end` so this would fail
+        pass
+
+    @staticmethod
+    def _train_batch(trainer, model, batches, current_epoch=0):
+        out = []
+        for i in range(batches):
+            out.extend([
+                # TODO: `on_batch_{start,end}`
+                dict(name='Callback.on_batch_start', args=(trainer, model)),
+                dict(name='Callback.on_train_batch_start', args=(trainer, model, ANY, i, 0)),
+                dict(name='on_train_batch_start', args=(ANY, i, 0)),
+                dict(name='on_before_batch_transfer', args=(ANY, None)),
+                dict(name='transfer_batch_to_device', args=(ANY, torch.device('cpu'), None)),
+                dict(name='on_after_batch_transfer', args=(ANY, None)),
+                dict(name='forward', args=(ANY, )),
+                dict(name='training_step', args=(ANY, i)),
+                dict(name='training_step_end', args=(dict(loss=ANY), )),
+                dict(name='Callback.on_before_zero_grad', args=(trainer, model, ANY)),
+                dict(name='on_before_zero_grad', args=(ANY, )),
+                dict(name='optimizer_zero_grad', args=(current_epoch, i, ANY, 0)),
+                # TODO: `on_before_backward`
+                dict(name='backward', args=(ANY, ANY, 0)),
+                dict(name='Callback.on_after_backward', args=(trainer, model)),
+                dict(name='on_after_backward'),
+                # TODO: `on_before_optimizer_step`
+                dict(
+                    name='optimizer_step',
+                    args=(current_epoch, i, ANY, 0, ANY),
+                    kwargs=dict(on_tpu=False, using_lbfgs=False, using_native_amp=False)
+                ),
+                dict(name='Callback.on_train_batch_end', args=(trainer, model, dict(loss=ANY), ANY, i, 0)),
+                dict(name='on_train_batch_end', args=(dict(loss=ANY), ANY, i, 0)),
+                dict(name='Callback.on_batch_end', args=(trainer, model)),
+            ])
+        return out
+
+    @staticmethod
+    def _eval_epoch(fn, trainer, model, batches, key):
+        outputs = {key: ANY}
+        return [
+            dict(name='Callback.on_epoch_start', args=(trainer, model)),
+            dict(name='on_epoch_start'),
+            dict(name=f'Callback.on_{fn}_epoch_start', args=(trainer, model)),
+            dict(name=f'on_{fn}_epoch_start'),
+            *HookedModel._eval_batch(fn, trainer, model, batches, key),
+            dict(name=f'{fn}_epoch_end', args=([outputs] * batches, )),
+            dict(name=f'Callback.on_{fn}_epoch_end', args=(trainer, model)),
+            dict(name=f'on_{fn}_epoch_end'),
+            dict(name='Callback.on_epoch_end', args=(trainer, model)),
+            dict(name='on_epoch_end'),
+        ]
 
-    def teardown(self, stage=None):
-        self.called.append(f"teardown_{stage}")
-        super().teardown(stage)
+    @staticmethod
+    def _eval_batch(fn, trainer, model, batches, key):
+        out = []
+        outputs = {key: ANY}
+        for i in range(batches):
+            out.extend([
+                # TODO: `{,Callback}.on_batch_{start,end}`
+                dict(name=f'Callback.on_{fn}_batch_start', args=(trainer, model, ANY, i, 0)),
+                dict(name=f'on_{fn}_batch_start', args=(ANY, i, 0)),
+                dict(name='on_before_batch_transfer', args=(ANY, None)),
+                dict(name='transfer_batch_to_device', args=(ANY, torch.device('cpu'), None)),
+                dict(name='on_after_batch_transfer', args=(ANY, None)),
+                dict(name='forward', args=(ANY, )),
+                dict(name=f'{fn}_step', args=(ANY, i)),
+                dict(name=f'{fn}_step_end', args=(outputs, )),
+                dict(name=f'Callback.on_{fn}_batch_end', args=(trainer, model, outputs, ANY, i, 0)),
+                dict(name=f'on_{fn}_batch_end', args=(outputs, ANY, i, 0)),
+            ])
+        return out
+
+    @staticmethod
+    def _predict_batch(trainer, model, batches):
+        out = []
+        for i in range(batches):
+            out.extend([
+                # TODO: `{,Callback}.on_batch_{start,end}`
+                dict(name='Callback.on_predict_batch_start', args=(trainer, model, ANY, i, 0)),
+                dict(name='on_predict_batch_start', args=(ANY, i, 0)),
+                dict(name='on_before_batch_transfer', args=(ANY, None)),
+                dict(name='transfer_batch_to_device', args=(ANY, torch.device('cpu'), None)),
+                dict(name='on_after_batch_transfer', args=(ANY, None)),
+                dict(name='forward', args=(ANY, )),
+                dict(name='predict_step', args=(ANY, i)),
+                # TODO: `predict_step_end`
+                dict(name='Callback.on_predict_batch_end', args=(trainer, model, ANY, ANY, i, 0)),
+                dict(name='on_predict_batch_end', args=(ANY, ANY, i, 0)),
+            ])
+        return out
 
 
 def test_trainer_model_hook_system_fit(tmpdir):
-    model = HookedModel()
+    called = []
+    model = HookedModel(called)
+    callback = HookedCallback(called)
     train_batches = 2
     val_batches = 2
     trainer = Trainer(
@@ -447,143 +385,303 @@ def test_trainer_model_hook_system_fit(tmpdir):
         limit_val_batches=val_batches,
         progress_bar_refresh_rate=0,
         weights_summary=None,
+        callbacks=[callback]
     )
-    assert model.called == []
+    assert called == [
+        dict(name='Callback.on_init_start', args=(trainer, )),
+        dict(name='Callback.on_init_end', args=(trainer, )),
+    ]
     trainer.fit(model)
+    saved_ckpt = {
+        'callbacks': ANY,
+        'epoch': 1,
+        'global_step': train_batches,
+        'lr_schedulers': ANY,
+        'optimizer_states': ANY,
+        'pytorch-lightning_version': __version__,
+        'state_dict': ANY,
+    }
     expected = [
-        'setup_fit',
-        'on_fit_start',
-        'on_pretrain_routine_start',
-        'on_pretrain_routine_end',
-        'on_validation_model_eval',
-        'on_validation_start',
-        'on_epoch_start',
-        'on_validation_epoch_start',
-        *(model.val_batch * val_batches),
-        'validation_epoch_end',
-        'on_validation_epoch_end',
-        'on_epoch_end',
-        'on_validation_end',
-        'on_validation_model_train',
-        'on_train_start',
-        'on_epoch_start',
-        'on_train_epoch_start',
-        *(model.train_batch * train_batches),
-        'on_validation_model_eval',
-        'on_validation_start',
-        'on_epoch_start',
-        'on_validation_epoch_start',
-        *(model.val_batch * val_batches),
-        'validation_epoch_end',
-        'on_validation_epoch_end',
-        'on_epoch_end',
-        'on_save_checkpoint',
-        'on_validation_end',
-        'on_validation_model_train',
-        'training_epoch_end',
-        'on_train_epoch_end',
-        'on_epoch_end',
-        'on_train_end',
-        'on_fit_end',
-        'teardown_fit',
+        dict(name='Callback.on_init_start', args=(trainer, )),
+        dict(name='Callback.on_init_end', args=(trainer, )),
+        dict(name='prepare_data'),
+        dict(name='configure_callbacks'),
+        dict(name='Callback.on_before_accelerator_backend_setup', args=(trainer, model)),
+        dict(name='Callback.setup', args=(trainer, model), kwargs=dict(stage='fit')),
+        dict(name='setup', kwargs=dict(stage='fit')),
+        dict(name='configure_sharded_model'),
+        dict(name='Callback.on_configure_sharded_model', args=(trainer, model)),
+        dict(name='configure_optimizers'),
+        dict(name='Callback.on_fit_start', args=(trainer, model)),
+        dict(name='on_fit_start'),
+        dict(name='Callback.on_pretrain_routine_start', args=(trainer, model)),
+        dict(name='on_pretrain_routine_start'),
+        dict(name='Callback.on_pretrain_routine_end', args=(trainer, model)),
+        dict(name='on_pretrain_routine_end'),
+        dict(name='Callback.on_sanity_check_start', args=(trainer, model)),
+        dict(name='on_val_dataloader'),
+        dict(name='val_dataloader'),
+        dict(name='train', args=(False, )),
+        dict(name='on_validation_model_eval'),
+        dict(name='zero_grad'),
+        dict(name='Callback.on_validation_start', args=(trainer, model)),
+        dict(name='on_validation_start'),
+        *model._eval_epoch('validation', trainer, model, val_batches, 'x'),
+        dict(name='Callback.on_validation_end', args=(trainer, model)),
+        dict(name='on_validation_end'),
+        dict(name='train'),
+        dict(name='on_validation_model_train'),
+        dict(name='Callback.on_sanity_check_end', args=(trainer, model)),
+        # duplicate `train` because `_run_train` calls it again in case validation wasn't run
+        dict(name='train'),
+        dict(name='on_train_dataloader'),
+        dict(name='train_dataloader'),
+        dict(name='Callback.on_train_start', args=(trainer, model)),
+        dict(name='on_train_start'),
+        dict(name='Callback.on_epoch_start', args=(trainer, model)),
+        dict(name='on_epoch_start'),
+        dict(name='Callback.on_train_epoch_start', args=(trainer, model)),
+        dict(name='on_train_epoch_start'),
+        *model._train_batch(trainer, model, train_batches),
+        dict(name='train', args=(False, )),
+        dict(name='on_validation_model_eval'),
+        dict(name='zero_grad'),
+        dict(name='Callback.on_validation_start', args=(trainer, model)),
+        dict(name='on_validation_start'),
+        *model._eval_epoch('validation', trainer, model, val_batches, 'x'),
+        dict(name='Callback.on_validation_end', args=(trainer, model)),
+        # `ModelCheckpoint.save_checkpoint` is called here from `Callback.on_validation_end`
+        dict(name='Callback.on_save_checkpoint', args=(trainer, model, saved_ckpt)),
+        dict(name='on_save_checkpoint', args=(saved_ckpt, )),
+        dict(name='on_validation_end'),
+        dict(name='train'),
+        dict(name='on_validation_model_train'),
+        dict(name='training_epoch_end', args=([dict(loss=ANY)] * train_batches, )),
+        dict(name='Callback.on_train_epoch_end', args=(trainer, model, [dict(loss=ANY)] * train_batches)),
+        dict(name='on_train_epoch_end', args=([dict(loss=ANY)] * train_batches, )),
+        dict(name='Callback.on_epoch_end', args=(trainer, model)),
+        dict(name='on_epoch_end'),
+        dict(name='Callback.on_train_end', args=(trainer, model)),
+        dict(name='on_train_end'),
+        dict(name='Callback.on_fit_end', args=(trainer, model)),
+        dict(name='on_fit_end'),
+        dict(name='Callback.teardown', args=(trainer, model), kwargs=dict(stage='fit')),
+        dict(name='teardown', kwargs=dict(stage='fit')),
     ]
-    assert model.called == expected
+    assert called == expected
 
 
-def test_trainer_model_hook_system_fit_no_val(tmpdir):
-    model = HookedModel()
+def test_trainer_model_hook_system_fit_no_val_and_resume(tmpdir):
+    # initial training to get a checkpoint
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_steps=1,
+        limit_val_batches=0,
+        progress_bar_refresh_rate=0,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+    best_model_path = trainer.checkpoint_callback.best_model_path
+
+    # resume from checkpoint with HookedModel
+    called = []
+    model = HookedModel(called)
+    callback = HookedCallback(called)
     train_batches = 2
     trainer = Trainer(
         default_root_dir=tmpdir,
-        max_epochs=1,
+        # already performed 1 step, now resuming to do an additional 2
+        max_steps=(1 + train_batches),
         limit_val_batches=0,
-        limit_train_batches=train_batches,
         progress_bar_refresh_rate=0,
         weights_summary=None,
+        resume_from_checkpoint=best_model_path,
+        callbacks=[callback]
     )
-    assert model.called == []
+    assert called == [
+        dict(name='Callback.on_init_start', args=(trainer, )),
+        dict(name='Callback.on_init_end', args=(trainer, )),
+    ]
     trainer.fit(model)
+    saved_ckpt = {
+        'callbacks': ANY,
+        'epoch': 2,  # TODO: wrong saved epoch
+        'global_step': (1 + train_batches),
+        'lr_schedulers': ANY,
+        'optimizer_states': ANY,
+        'pytorch-lightning_version': __version__,
+        'state_dict': ANY,
+    }
     expected = [
-        'setup_fit',
-        'on_fit_start',
-        'on_pretrain_routine_start',
-        'on_pretrain_routine_end',
-        'on_train_start',
-        'on_epoch_start',
-        'on_train_epoch_start',
-        *(model.train_batch * train_batches),
-        'training_epoch_end',
-        'on_train_epoch_end',
-        'on_epoch_end',
-        'on_save_checkpoint',  # from train epoch end
-        'on_train_end',
-        'on_fit_end',
-        'teardown_fit',
+        dict(name='Callback.on_init_start', args=(trainer, )),
+        dict(name='Callback.on_init_end', args=(trainer, )),
+        dict(name='prepare_data'),
+        dict(name='configure_callbacks'),
+        dict(name='Callback.on_before_accelerator_backend_setup', args=(trainer, model)),
+        dict(name='Callback.setup', args=(trainer, model), kwargs=dict(stage='fit')),
+        dict(name='setup', kwargs=dict(stage='fit')),
+        dict(
+            name='on_load_checkpoint',
+            args=({
+                'callbacks': ANY,
+                'epoch': 1,
+                'global_step': 1,
+                'lr_schedulers': ANY,
+                'optimizer_states': ANY,
+                'pytorch-lightning_version': __version__,
+                'state_dict': ANY,
+            }, )
+        ),
+        dict(name='configure_sharded_model'),
+        dict(name='Callback.on_configure_sharded_model', args=(trainer, model)),
+        dict(name='configure_optimizers'),
+        dict(name='Callback.on_fit_start', args=(trainer, model)),
+        dict(name='on_fit_start'),
+        dict(name='Callback.on_pretrain_routine_start', args=(trainer, model)),
+        dict(name='on_pretrain_routine_start'),
+        dict(name='Callback.on_pretrain_routine_end', args=(trainer, model)),
+        dict(name='on_pretrain_routine_end'),
+        dict(name='train'),
+        dict(name='on_train_dataloader'),
+        dict(name='train_dataloader'),
+        # even though no validation runs, we initialize the val dataloader for properties like `num_val_batches`
+        dict(name='on_val_dataloader'),
+        dict(name='val_dataloader'),
+        dict(name='Callback.on_train_start', args=(trainer, model)),
+        dict(name='on_train_start'),
+        dict(name='Callback.on_epoch_start', args=(trainer, model)),
+        dict(name='on_epoch_start'),
+        dict(name='Callback.on_train_epoch_start', args=(trainer, model)),
+        dict(name='on_train_epoch_start'),
+        # TODO: wrong current epoch after reload
+        *model._train_batch(trainer, model, train_batches, current_epoch=1),
+        dict(name='training_epoch_end', args=([dict(loss=ANY)] * train_batches, )),
+        dict(name='Callback.on_train_epoch_end', args=(
+            trainer,
+            model,
+            [dict(loss=ANY)] * train_batches,
+        )),
+        dict(name='on_train_epoch_end', args=([dict(loss=ANY)] * train_batches, )),
+        dict(name='Callback.on_epoch_end', args=(trainer, model)),
+        dict(name='on_epoch_end'),
+        dict(name='Callback.on_save_checkpoint', args=(trainer, model, saved_ckpt)),
+        dict(name='on_save_checkpoint', args=(saved_ckpt, )),
+        dict(name='Callback.on_train_end', args=(trainer, model)),
+        dict(name='on_train_end'),
+        dict(name='Callback.on_fit_end', args=(trainer, model)),
+        dict(name='on_fit_end'),
+        dict(name='Callback.teardown', args=(trainer, model), kwargs=dict(stage='fit')),
+        dict(name='teardown', kwargs=dict(stage='fit')),
     ]
-    assert model.called == expected
-
-
-def test_trainer_model_hook_system_validate(tmpdir):
-    model = HookedModel()
+    assert called == expected
+
+
+@pytest.mark.parametrize('batches', (0, 2))
+@pytest.mark.parametrize(['verb', 'noun', 'dataloader', 'key'], [
+    ('validate', 'validation', 'val', 'x'),
+    ('test', 'test', 'test', 'y'),
+])
+def test_trainer_model_hook_system_eval(tmpdir, batches, verb, noun, dataloader, key):
+    called = []
+    model = HookedModel(called)
+    callback = HookedCallback(called)
     trainer = Trainer(
         default_root_dir=tmpdir,
         max_epochs=1,
-        limit_val_batches=1,
+        limit_val_batches=batches,
+        limit_test_batches=batches,
         progress_bar_refresh_rate=0,
         weights_summary=None,
+        callbacks=[callback],
     )
-    assert model.called == []
-    trainer.validate(model, verbose=False)
+    assert called == [
+        dict(name='Callback.on_init_start', args=(trainer, )),
+        dict(name='Callback.on_init_end', args=(trainer, )),
+    ]
+    fn = getattr(trainer, verb)
+    fn(model, verbose=False)
+    hooks = [
+        dict(name='train', args=(False, )),
+        dict(name=f'on_{noun}_model_eval'),
+        dict(name='zero_grad'),
+        dict(name=f'Callback.on_{noun}_start', args=(trainer, model)),
+        dict(name=f'on_{noun}_start'),
+        *model._eval_epoch(noun, trainer, model, batches, key),
+        dict(name=f'Callback.on_{noun}_end', args=(trainer, model)),
+        dict(name=f'on_{noun}_end'),
+        dict(name='train'),
+        dict(name=f'on_{noun}_model_train'),
+    ]
     expected = [
-        'setup_validate',
-        'on_validation_model_eval',
-        'on_validation_start',
-        'on_epoch_start',
-        'on_validation_epoch_start',
-        'on_validation_batch_start',
-        'on_before_batch_transfer',
-        'transfer_batch_to_device',
-        'on_after_batch_transfer',
-        'on_validation_batch_end',
-        'validation_epoch_end',
-        'on_validation_epoch_end',
-        'on_epoch_end',
-        'on_validation_end',
-        'on_validation_model_train',
-        'teardown_validate',
+        dict(name='Callback.on_init_start', args=(trainer, )),
+        dict(name='Callback.on_init_end', args=(trainer, )),
+        dict(name='prepare_data'),
+        dict(name='configure_callbacks'),
+        dict(name='Callback.on_before_accelerator_backend_setup', args=(trainer, model)),
+        dict(name='Callback.setup', args=(trainer, model), kwargs=dict(stage=verb)),
+        dict(name='setup', kwargs=dict(stage=verb)),
+        dict(name='configure_sharded_model'),
+        dict(name='Callback.on_configure_sharded_model', args=(trainer, model)),
+        dict(name=f'on_{dataloader}_dataloader'),
+        dict(name=f'{dataloader}_dataloader'),
+        *(hooks if batches else []),
+        dict(name='Callback.teardown', args=(trainer, model), kwargs=dict(stage=verb)),
+        dict(name='teardown', kwargs=dict(stage=verb)),
     ]
-    assert model.called == expected
+    assert called == expected
 
 
-def test_trainer_model_hook_system_test(tmpdir):
-    model = HookedModel()
+def test_trainer_model_hook_system_predict(tmpdir):
+    called = []
+    model = HookedModel(called)
+    callback = HookedCallback(called)
+    batches = 2
     trainer = Trainer(
         default_root_dir=tmpdir,
-        max_epochs=1,
-        limit_test_batches=1,
+        limit_predict_batches=batches,
         progress_bar_refresh_rate=0,
-        weights_summary=None,
+        callbacks=[callback],
     )
-    assert model.called == []
-    trainer.test(model, verbose=False)
+    assert called == [
+        dict(name='Callback.on_init_start', args=(trainer, )),
+        dict(name='Callback.on_init_end', args=(trainer, )),
+    ]
+    trainer.predict(model)
     expected = [
-        'setup_test',
-        'on_test_model_eval',
-        'on_test_start',
-        'on_epoch_start',
-        'on_test_epoch_start',
-        'on_test_batch_start',
-        'on_before_batch_transfer',
-        'transfer_batch_to_device',
-        'on_after_batch_transfer',
-        'on_test_batch_end',
-        'on_test_epoch_end',
-        'on_epoch_end',
-        'on_test_end',
-        'on_test_model_train',
-        'teardown_test',
+        dict(name='Callback.on_init_start', args=(trainer, )),
+        dict(name='Callback.on_init_end', args=(trainer, )),
+        dict(name='prepare_data'),
+        dict(name='configure_callbacks'),
+        dict(name='Callback.on_before_accelerator_backend_setup', args=(trainer, model)),
+        dict(name='Callback.setup', args=(trainer, model), kwargs=dict(stage='predict')),
+        dict(name='setup', kwargs=dict(stage='predict')),
+        dict(name='configure_sharded_model'),
+        dict(name='Callback.on_configure_sharded_model', args=(trainer, model)),
+        dict(name='on_predict_dataloader'),
+        dict(name='predict_dataloader'),
+        dict(name='train', args=(False, )),
+        dict(name='on_predict_model_eval'),
+        dict(name='zero_grad'),
+        dict(name='Callback.on_predict_start', args=(trainer, model)),
+        dict(name='on_predict_start'),
+        # TODO: `{,Callback}.on_epoch_{start,end}`
+        dict(name='Callback.on_predict_epoch_start', args=(trainer, model)),
+        dict(name='on_predict_epoch_start'),
+        *model._predict_batch(trainer, model, batches),
+        # TODO: `predict_epoch_end`
+        dict(name='Callback.on_predict_epoch_end', args=(trainer, model, [[ANY] * batches])),
+        dict(name='on_predict_epoch_end', args=([[ANY] * batches], )),
+        dict(name='Callback.on_predict_end', args=(trainer, model)),
+        dict(name='on_predict_end'),
+        # TODO: `on_predict_model_train`
+        dict(name='Callback.teardown', args=(trainer, model), kwargs=dict(stage='predict')),
+        dict(name='teardown', kwargs=dict(stage='predict')),
     ]
-    assert model.called == expected
+    assert called == expected
+
+
+# TODO: add test for tune
 
 
 def test_hooks_with_different_argument_names(tmpdir):
@@ -644,107 +742,102 @@ def test_trainer_datamodule_hook_system(tmpdir):
 
     class HookedDataModule(BoringDataModule):
 
-        def __init__(self):
+        def __init__(self, called):
             super().__init__()
-            self.called = []
 
-        def prepare_data(self):
-            self.called.append("prepare_data")
-            super().prepare_data()
+            def call(hook, fn, *args, **kwargs):
+                out = fn(*args, **kwargs)
+                d = {'name': hook}
+                if args:
+                    d['args'] = args
+                if kwargs:
+                    d['kwargs'] = kwargs
+                called.append(d)
+                return out
 
-        def setup(self, stage=None):
-            self.called.append(f"setup_{stage}")
-            super().setup(stage=stage)
-
-        def teardown(self, stage=None):
-            self.called.append(f"teardown_{stage}")
-            super().teardown(stage=stage)
-
-        def train_dataloader(self):
-            self.called.append("train_dataloader")
-            return super().train_dataloader()
-
-        def test_dataloader(self):
-            self.called.append("test_dataloader")
-            return super().test_dataloader()
-
-        def val_dataloader(self):
-            self.called.append("val_dataloader")
-            return super().val_dataloader()
-
-        def predict_dataloader(self):
-            self.called.append("predict_dataloader")
-
-        def transfer_batch_to_device(self, *args, **kwargs):
-            self.called.append("transfer_batch_to_device")
-            return super().transfer_batch_to_device(*args, **kwargs)
-
-        def on_before_batch_transfer(self, *args, **kwargs):
-            self.called.append("on_before_batch_transfer")
-            return super().on_before_batch_transfer(*args, **kwargs)
-
-        def on_after_batch_transfer(self, *args, **kwargs):
-            self.called.append("on_after_batch_transfer")
-            return super().on_after_batch_transfer(*args, **kwargs)
+            for h in get_members(LightningDataModule):
+                attr = getattr(self, h)
+                setattr(self, h, partial(call, h, attr))
 
     model = BoringModel()
-    dm = HookedDataModule()
-
+    batches = 2
     trainer = Trainer(
         default_root_dir=tmpdir,
         max_epochs=1,
-        limit_val_batches=1,
-        limit_train_batches=2,
-        limit_test_batches=1,
+        limit_train_batches=batches,
+        limit_val_batches=batches,
+        limit_test_batches=batches,
+        limit_predict_batches=batches,
         progress_bar_refresh_rate=0,
         weights_summary=None,
         reload_dataloaders_every_epoch=True,
     )
+
+    called = []
+    dm = HookedDataModule(called)
     trainer.fit(model, datamodule=dm)
+    batch_transfer = [
+        dict(name='on_before_batch_transfer', args=(ANY, None)),
+        dict(name='transfer_batch_to_device', args=(ANY, torch.device('cpu'), None)),
+        dict(name='on_after_batch_transfer', args=(ANY, None)),
+    ]
     expected = [
-        'prepare_data',
-        'setup_fit',
-        'val_dataloader',
-        'on_before_batch_transfer',
-        'transfer_batch_to_device',
-        'on_after_batch_transfer',
-        'train_dataloader',
-        'on_before_batch_transfer',
-        'transfer_batch_to_device',
-        'on_after_batch_transfer',
-        'on_before_batch_transfer',
-        'transfer_batch_to_device',
-        'on_after_batch_transfer',
-        'val_dataloader',
-        'on_before_batch_transfer',
-        'transfer_batch_to_device',
-        'on_after_batch_transfer',
-        'teardown_fit',
+        dict(name='prepare_data'),
+        dict(name='setup', kwargs=dict(stage='fit')),
+        dict(name='val_dataloader'),
+        *batch_transfer * batches,
+        dict(name='train_dataloader'),
+        *batch_transfer * batches,
+        dict(name='val_dataloader'),
+        *batch_transfer * batches,
+        dict(
+            name='on_save_checkpoint',
+            args=({
+                'callbacks': ANY,
+                'epoch': 1,
+                'global_step': 2,
+                'lr_schedulers': ANY,
+                'optimizer_states': ANY,
+                'pytorch-lightning_version': __version__,
+                'state_dict': ANY,
+            }, )
+        ),
+        dict(name='teardown', kwargs=dict(stage='fit')),
     ]
-    assert dm.called == expected
+    assert called == expected
 
-    dm = HookedDataModule()
+    called = []
+    dm = HookedDataModule(called)
     trainer.validate(model, datamodule=dm, verbose=False)
     expected = [
-        'prepare_data',
-        'setup_validate',
-        'val_dataloader',
-        'on_before_batch_transfer',
-        'transfer_batch_to_device',
-        'on_after_batch_transfer',
-        'teardown_validate',
+        dict(name='prepare_data'),
+        dict(name='setup', kwargs=dict(stage='validate')),
+        dict(name='val_dataloader'),
+        *batch_transfer * batches,
+        dict(name='teardown', kwargs=dict(stage='validate')),
     ]
-    assert dm.called == expected
+    assert called == expected
 
-    dm = HookedDataModule()
+    called = []
+    dm = HookedDataModule(called)
     trainer.test(model, datamodule=dm, verbose=False)
     expected = [
-        'prepare_data',
-        'setup_test',
-        'test_dataloader',
-        'on_before_batch_transfer',
-        'transfer_batch_to_device',
-        'on_after_batch_transfer',
-        'teardown_test',
+        dict(name='prepare_data'),
+        dict(name='setup', kwargs=dict(stage='test')),
+        dict(name='test_dataloader'),
+        *batch_transfer * batches,
+        dict(name='teardown', kwargs=dict(stage='test')),
+    ]
+    assert called == expected
+
+    called = []
+    dm = HookedDataModule(called)
+    trainer.predict(model, datamodule=dm)
+    expected = [
+        dict(name='prepare_data'),
+        dict(name='setup', kwargs=dict(stage='predict')),
+        dict(name='predict_dataloader'),
+        *batch_transfer * batches,
+        dict(name='teardown', kwargs=dict(stage='predict')),
     ]
-    assert dm.called == expected
+    assert called == expected
diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py
index 10f96845a7a48..ab3c3619652e9 100644
--- a/tests/models/test_horovod.py
+++ b/tests/models/test_horovod.py
@@ -264,7 +264,7 @@ def test_horovod_multi_optimizer(tmpdir):
         assert hasattr(optimizer, 'synchronize'), 'optimizer has not been wrapped into DistributedOptimizer'
 
     def get_model_params(model):
-        return set([p for p in model.parameters()])
+        return set(list(model.parameters()))
 
     def get_optimizer_params(optimizer):
         return set([p for group in optimizer.param_groups for p in group.get('params', [])])
@@ -296,7 +296,7 @@ def training_step(self, batch, batch_idx):
                 self.training_step_called = True
 
                 tensor = torch.tensor([1.0])
-                self.log("test_tensor", tensor, sync_dist=True, sync_dist_op='sum', on_step=True, on_epoch=True)
+                self.log("test_tensor", tensor, sync_dist=True, reduce_fx='sum', on_step=True, on_epoch=True)
 
                 res = self._results
 
diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py
index 06477b3572db7..7fa8872036a73 100644
--- a/tests/models/test_hparams.py
+++ b/tests/models/test_hparams.py
@@ -15,6 +15,7 @@
 import os
 import pickle
 from argparse import Namespace
+from dataclasses import dataclass
 
 import cloudpickle
 import pytest
@@ -719,3 +720,21 @@ def test_empty_hparams_container(tmpdir):
     assert not model.hparams
     model = HparamsNamespaceContainerModel(Namespace())
     assert not model.hparams
+
+
+@dataclass
+class DataClassModel(BoringModel):
+
+    mandatory: int
+    optional: str = "optional"
+    ignore_me: bool = False
+
+    def __post_init__(self):
+        super().__init__()
+        self.save_hyperparameters(ignore=("ignore_me", ))
+
+
+def test_dataclass_lightning_module(tmpdir):
+    """ Test that save_hyperparameters() works with a LightningModule as a dataclass. """
+    model = DataClassModel(33, optional="cocofruit")
+    assert model.hparams == dict(mandatory=33, optional="cocofruit")
diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py
index 09ae795297eb5..b1b8e73861ef1 100644
--- a/tests/models/test_restore.py
+++ b/tests/models/test_restore.py
@@ -143,7 +143,7 @@ def test_try_resume_from_non_existing_checkpoint(tmpdir):
 class CaptureCallbacksBeforeTraining(Callback):
     callbacks = []
 
-    def on_train_start(self, trainer, pl_module):
+    def on_pretrain_routine_end(self, trainer, pl_module):
         self.callbacks = deepcopy(trainer.callbacks)
 
 
@@ -156,7 +156,11 @@ def test_callbacks_state_resume_from_checkpoint(tmpdir):
     def get_trainer_args():
         checkpoint = ModelCheckpoint(dirpath=tmpdir, monitor="val_loss", save_last=True)
         trainer_args = dict(
-            default_root_dir=tmpdir, max_steps=1, logger=False, callbacks=[checkpoint, callback_capture]
+            default_root_dir=tmpdir,
+            max_steps=1,
+            logger=False,
+            callbacks=[checkpoint, callback_capture],
+            limit_val_batches=2
         )
         assert checkpoint.best_model_path == ""
         assert checkpoint.best_model_score is None
@@ -183,7 +187,13 @@ def test_callbacks_references_resume_from_checkpoint(tmpdir):
     """ Test that resuming from a checkpoint sets references as expected. """
     dm = ClassifDataModule()
     model = ClassificationModel()
-    args = {'default_root_dir': tmpdir, 'max_steps': 1, 'logger': False}
+    args = {
+        'default_root_dir': tmpdir,
+        'max_steps': 1,
+        'logger': False,
+        "limit_val_batches": 2,
+        "num_sanity_val_steps": 0
+    }
 
     # initial training
     checkpoint = ModelCheckpoint(dirpath=tmpdir, monitor="val_loss", save_last=True)
@@ -431,10 +441,10 @@ class CustomModel(CustomClassificationModelDP):
 
         def __init__(self):
             super().__init__()
-            self.on_train_start_called = False
+            self.on_pretrain_routine_end_called = False
 
         # set the epoch start hook so we can predict before the model does the full training
-        def on_train_start(self):
+        def on_pretrain_routine_end(self):
             assert self.trainer.current_epoch == real_global_epoch and self.trainer.current_epoch > 0
 
             # if model and state loaded correctly, predictions will be good even though we
@@ -443,14 +453,14 @@ def on_train_start(self):
 
             dataloader = self.train_dataloader()
             tpipes.run_prediction_eval_model_template(self.trainer.lightning_module, dataloader=dataloader)
-            self.on_train_start_called = True
+            self.on_pretrain_routine_end_called = True
 
     # new model
     model = CustomModel()
 
     # fit new model which should load hpc weights
     new_trainer.fit(model, datamodule=dm)
-    assert model.on_train_start_called
+    assert model.on_pretrain_routine_end_called
 
     # test freeze on gpu
     model.freeze()
diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index f7d0aea829ced..2e7db175801b9 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -24,8 +24,8 @@
 from pytorch_lightning import Trainer
 from pytorch_lightning.accelerators import TPUAccelerator
 from pytorch_lightning.callbacks import EarlyStopping
-from pytorch_lightning.core.step_result import Result
 from pytorch_lightning.plugins import TPUSpawnPlugin
+from pytorch_lightning.trainer.connectors.logger_connector.result import _Sync
 from pytorch_lightning.utilities import _TPU_AVAILABLE
 from pytorch_lightning.utilities.distributed import ReduceOp
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -424,20 +424,11 @@ def test_if_test_works_with_checkpoint_false(tmpdir):
 def test_tpu_sync_dist():
     """Test tpu spawn sync dist operation """
 
-    def test_sync_dist(rank):
-        tensor = torch.tensor([1.0])
-        training_type_plugin = TPUSpawnPlugin()
-
-        res = Result()
-        res.log(
-            "test_tensor",
-            tensor,
-            sync_fn=training_type_plugin.reduce,
-            sync_dist=True,
-            sync_dist_op=torch.distributed.ReduceOp.SUM
-        )
-
-        assert res["test_tensor"].item() == 8, "Result-Log does not work properly with TPU Spawn and Tensors"
+    def test_sync_dist(_):
+        sync = _Sync(TPUSpawnPlugin().reduce, should=True, op=torch.distributed.ReduceOp.SUM)
+        value = torch.tensor([1.0])
+        value = sync(value),
+        assert value.item() == 8
 
     xmp.spawn(test_sync_dist, nprocs=8, start_method='fork')
 
diff --git a/tests/overrides/test_base.py b/tests/overrides/test_base.py
new file mode 100644
index 0000000000000..ad0e63fb5f93d
--- /dev/null
+++ b/tests/overrides/test_base.py
@@ -0,0 +1,44 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import torch
+from torch.nn import DataParallel
+
+from pytorch_lightning.overrides.base import (
+    _LightningModuleWrapperBase,
+    _LightningPrecisionModuleWrapperBase,
+    unwrap_lightning_module,
+)
+from tests.helpers import BoringModel
+
+
+@pytest.mark.parametrize("wrapper_class", [
+    _LightningModuleWrapperBase,
+    _LightningPrecisionModuleWrapperBase,
+])
+def test_wrapper_device_dtype(wrapper_class):
+    model = BoringModel()
+    wrapped_model = wrapper_class(model)
+
+    wrapped_model.to(dtype=torch.float16)
+    assert model.dtype == torch.float16
+
+
+def test_unwrap_lightning_module():
+    model = BoringModel()
+    wrapped_model = _LightningPrecisionModuleWrapperBase(model)
+    wrapped_model = _LightningModuleWrapperBase(wrapped_model)
+    wrapped_model = DataParallel(wrapped_model)
+
+    assert unwrap_lightning_module(wrapped_model) == model
diff --git a/tests/overrides/test_distributed.py b/tests/overrides/test_distributed.py
index d09ac9c8bad06..c8d982bd733fe 100644
--- a/tests/overrides/test_distributed.py
+++ b/tests/overrides/test_distributed.py
@@ -11,11 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from collections.abc import Iterable
+
 import pytest
 from torch.utils.data import BatchSampler, SequentialSampler
 
 from pytorch_lightning import seed_everything
 from pytorch_lightning.overrides.distributed import IndexBatchSamplerWrapper, UnrepeatedDistributedSampler
+from pytorch_lightning.utilities.data import has_len
 
 
 @pytest.mark.parametrize("shuffle", [False, True])
@@ -29,7 +32,7 @@ def test_unrepeated_distributed_sampler(shuffle, tmpdir):
     for rank in range(world_size):
         samplers.append(UnrepeatedDistributedSampler(dataset, rank=rank, num_replicas=world_size, shuffle=shuffle))
 
-    indices = [[v for v in s] for s in samplers]
+    indices = [list(s) for s in samplers]
     assert len(indices[0]) == 26
     assert len(indices[1]) == 26
     assert len(indices[2]) == 26
@@ -54,3 +57,13 @@ def test_index_batch_sampler(tmpdir):
 
     for batch in index_batch_sampler:
         assert index_batch_sampler.batch_indices == batch
+
+
+def test_index_batch_sampler_methods():
+    dataset = range(15)
+    sampler = SequentialSampler(dataset)
+    batch_sampler = BatchSampler(sampler, 3, False)
+    index_batch_sampler = IndexBatchSamplerWrapper(batch_sampler)
+
+    assert isinstance(index_batch_sampler, Iterable)
+    assert has_len(index_batch_sampler)
diff --git a/tests/plugins/test_amp_plugins.py b/tests/plugins/test_amp_plugins.py
index 6d0dbed2cf88b..cf58427b071ce 100644
--- a/tests/plugins/test_amp_plugins.py
+++ b/tests/plugins/test_amp_plugins.py
@@ -99,6 +99,47 @@ def test_amp_gradient_unscale(tmpdir, accum: int):
     trainer.fit(model)
 
 
+@RunIf(min_gpus=1, amp_native=True)
+def test_amp_skip_optimizer(tmpdir):
+    """
+    Test that optimizers can be skipped when using amp
+    """
+
+    class CustomBoringModel(BoringModel):
+
+        def __init__(self):
+            super().__init__()
+            self.layer1 = torch.nn.Linear(32, 32)
+            self.layer2 = torch.nn.Linear(32, 2)
+
+        def forward(self, x: torch.Tensor):
+            x = self.layer1(x)
+            x = self.layer2(x)
+            return x
+
+        def training_step(self, batch, batch_idx, optimizer_idx):
+            if optimizer_idx == 1:
+                return None
+            output = self(batch)
+            return self.loss(batch, output)
+
+        def configure_optimizers(self):
+            return [
+                torch.optim.SGD(self.layer1.parameters(), lr=0.1),
+                torch.optim.SGD(self.layer2.parameters(), lr=0.1),
+            ]
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        gpus=1,
+        fast_dev_run=1,
+        amp_backend='native',
+        precision=16,
+    )
+    model = CustomBoringModel()
+    trainer.fit(model)
+
+
 @RunIf(min_gpus=2, amp_apex=True, special=True)
 @pytest.mark.parametrize("amp_level", ['O2'])
 def test_amp_apex_ddp_fit(amp_level, tmpdir):
diff --git a/tests/plugins/test_cluster_integration.py b/tests/plugins/test_cluster_integration.py
index f9ca8c23d34d9..9f5eba43cf5a0 100644
--- a/tests/plugins/test_cluster_integration.py
+++ b/tests/plugins/test_cluster_integration.py
@@ -18,7 +18,7 @@
 import torch
 
 from pytorch_lightning import Trainer
-from pytorch_lightning.plugins import DDP2Plugin, DDPPlugin, DDPShardedPlugin, DeepSpeedPlugin, RPCSequentialPlugin
+from pytorch_lightning.plugins import DDP2Plugin, DDPPlugin, DDPShardedPlugin, DeepSpeedPlugin
 from pytorch_lightning.plugins.environments import LightningEnvironment, SLURMEnvironment, TorchElasticEnvironment
 from pytorch_lightning.utilities import rank_zero_only
 from tests.helpers.runif import RunIf
@@ -66,7 +66,6 @@ def environment_combinations():
         DDPShardedPlugin,
         DDP2Plugin,
         pytest.param(DeepSpeedPlugin, marks=RunIf(deepspeed=True)),
-        pytest.param(RPCSequentialPlugin, marks=RunIf(fairscale_pipe=True)),
     ],
 )
 def test_ranks_available_manual_plugin_selection(plugin_cls):
diff --git a/tests/plugins/test_ddp_plugin.py b/tests/plugins/test_ddp_plugin.py
index d236dc145d96c..61c5d70191db2 100644
--- a/tests/plugins/test_ddp_plugin.py
+++ b/tests/plugins/test_ddp_plugin.py
@@ -11,7 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from unittest import mock
+
 import torch
+from torch.nn.parallel import DistributedDataParallel
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.plugins import DDPPlugin
@@ -46,3 +49,30 @@ def test_ddp_with_2_gpus():
     assert model.device == torch.device("cpu")
     cuda_memory = torch.cuda.memory_allocated()
     assert cuda_memory < model.start_cuda_memory
+
+
+class BarrierModel(BoringModel):
+
+    def setup(self, stage=None):
+        assert not isinstance(self.trainer.accelerator.model, DistributedDataParallel)
+        self.trainer.accelerator.barrier("barrier before model is wrapped")
+
+    def on_train_start(self):
+        assert isinstance(self.trainer.accelerator.model, DistributedDataParallel)
+        self.trainer.accelerator.barrier("barrier after model is wrapped")
+
+
+@RunIf(min_gpus=4, special=True)
+@mock.patch("torch.distributed.barrier")
+def test_ddp_barrier_non_consecutive_device_ids(barrier_mock, tmpdir):
+    """ Test correct usage of barriers when device ids do not start at 0 or are not consecutive. """
+    model = BoringModel()
+    gpus = [1, 3]
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_steps=1,
+        gpus=gpus,
+        accelerator="ddp",
+    )
+    trainer.fit(model)
+    barrier_mock.assert_any_call(device_ids=[gpus[trainer.local_rank]])
diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py
index 8afc30c4692ec..26a7746c41cfe 100644
--- a/tests/plugins/test_ddp_spawn_plugin.py
+++ b/tests/plugins/test_ddp_spawn_plugin.py
@@ -15,7 +15,7 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.plugins import DDPSpawnPlugin
-from tests.helpers.boring_model import BoringModel
+from tests.helpers.boring_model import BoringDataModule, BoringModel
 from tests.helpers.runif import RunIf
 
 
@@ -26,6 +26,26 @@ def on_train_start(self) -> None:
         assert self.device == torch.device("cpu")
 
 
+class BoringCallbackDDPSpawnModel(BoringModel):
+
+    def __init__(self, name: str, val: float):
+        super().__init__()
+        self.name = name
+        self.val = val
+
+    def validation_step(self, batch, batch_idx):
+        self.log(self.name, self.val)
+        return super().validation_step(batch, batch_idx)
+
+    def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None:
+        queue.put("test_val")
+        return super().add_to_queue(queue)
+
+    def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None:
+        self.test_val = queue.get()
+        return super().get_from_queue(queue)
+
+
 @RunIf(skip_windows=True)
 def test_ddp_cpu():
     """Tests if device is set correctely when training for DDPSpawnPlugin."""
@@ -40,3 +60,22 @@ def test_ddp_cpu():
     model = BoringModelDDPCPU()
 
     trainer.fit(model)
+
+
+@RunIf(min_gpus=2)
+def test_ddp_spawn_extra_parameters(tmpdir):
+    """Tests if device is set correctely when training for DDPSpawnPlugin."""
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp_spawn")
+
+    assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin)
+    assert trainer.training_type_plugin.on_gpu
+    assert trainer.training_type_plugin.root_device == torch.device("cuda:0")
+
+    val: float = 1.0
+    val_name: str = "val_acc"
+    model = BoringCallbackDDPSpawnModel(val_name, val)
+    dm = BoringDataModule()
+
+    trainer.fit(model, datamodule=dm)
+    assert trainer.callback_metrics[val_name] == torch.tensor(val)
+    assert model.test_val == "test_val"
diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py
index 85d069b90288d..efe8da981c9eb 100644
--- a/tests/plugins/test_deepspeed_plugin.py
+++ b/tests/plugins/test_deepspeed_plugin.py
@@ -24,15 +24,40 @@ class ModelParallelBoringModel(BoringModel):
 
     def __init__(self):
         super().__init__()
-        self.linear = None
+        self.layer = None
 
     def configure_sharded_model(self) -> None:
-        self.linear = torch.nn.Linear(32, 2)
+        self.layer = torch.nn.Linear(32, 2)
 
     def on_load_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
         self.configure_sharded_model()
 
 
+class ModelParallelBoringModelManualOptim(BoringModel):
+
+    def __init__(self):
+        super().__init__()
+        self.layer = None
+
+    def training_step(self, batch, batch_idx):
+        opt = self.optimizers()[0]
+        output = self(batch)
+        loss = self.loss(batch, output)
+        opt.zero_grad()
+        self.manual_backward(loss)
+        opt.step()
+
+    def configure_sharded_model(self) -> None:
+        self.layer = torch.nn.Linear(32, 2)
+
+    def on_load_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
+        self.configure_sharded_model()
+
+    @property
+    def automatic_optimization(self) -> bool:
+        return False
+
+
 def test_deepspeed_lightning_module(tmpdir):
     """
     Test to ensure that a model wrapped in `LightningDeepSpeedModule` moves types and device correctly.
@@ -483,6 +508,24 @@ def configure_optimizers(self):
         }]
 
 
+class ManualModelParallelClassificationModel(ModelParallelClassificationModel):
+
+    @property
+    def automatic_optimization(self) -> bool:
+        return False
+
+    def training_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self.forward(x)
+        loss = F.cross_entropy(logits, y)
+        opt = self.optimizers()[0]
+        self.log('train_loss', loss, prog_bar=True)
+        self.log('train_acc', self.train_acc(logits, y), prog_bar=True, sync_dist=True)
+        opt.zero_grad()
+        self.manual_backward(loss)
+        opt.step()
+
+
 @RunIf(min_gpus=2, deepspeed=True, special=True)
 def test_deepspeed_multigpu_stage_3(tmpdir, deepspeed_config):
     """
@@ -502,9 +545,34 @@ def test_deepspeed_multigpu_stage_3(tmpdir, deepspeed_config):
     _assert_save_model_is_equal(model, tmpdir, trainer, cls=ModelParallelBoringModel)
 
 
-def run_checkpoint_test(tmpdir, save_full_weights):
+@RunIf(min_gpus=2, deepspeed=True, special=True)
+def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config):
+    """
+    Test to ensure ZeRO Stage 3 works with a parallel model.
+    """
+    model = ModelParallelBoringModelManualOptim()
+    model.training_epoch_end = None
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        plugins=[DeepSpeedPlugin(stage=3)],
+        gpus=2,
+        fast_dev_run=True,
+        precision=16,
+    )
+    trainer.fit(model)
+    trainer.test(model)
+
+    _assert_save_model_is_equal(model, tmpdir, trainer, cls=ModelParallelBoringModelManualOptim)
+
+
+def run_checkpoint_test(
+    tmpdir: str, save_full_weights: bool, automatic_optimization: bool = True, accumulate_grad_batches: int = 2
+):
     seed_everything(1)
-    model = ModelParallelClassificationModel()
+    if automatic_optimization:
+        model = ModelParallelClassificationModel()
+    else:
+        model = ManualModelParallelClassificationModel()
     dm = ClassifDataModule()
     ck = ModelCheckpoint(monitor="val_acc", mode="max", save_last=True, save_top_k=-1)
     trainer = Trainer(
@@ -514,7 +582,7 @@ def run_checkpoint_test(tmpdir, save_full_weights):
         plugins=[DeepSpeedPlugin(stage=3, save_full_weights=save_full_weights)],
         gpus=2,
         precision=16,
-        accumulate_grad_batches=2,
+        accumulate_grad_batches=accumulate_grad_batches,
         callbacks=[ck]
     )
     trainer.fit(model, datamodule=dm)
@@ -563,12 +631,28 @@ def test_deepspeed_multigpu_stage_3_checkpointing_full_weights(tmpdir):
 
 
 @RunIf(min_gpus=2, deepspeed=True, special=True)
-@pytest.mark.parametrize('cpu_offload', [True, False])
-def test_deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, cpu_offload):
+def test_deepspeed_multigpu_stage_3_checkpointing_full_weights_manual(tmpdir):
+    """
+    Test to ensure with Stage 3 and multiple GPUs that we can save/load a model resuming from a checkpoint,
+    where we save the full weights to one file.
+    """
+    run_checkpoint_test(tmpdir, save_full_weights=True, automatic_optimization=False, accumulate_grad_batches=1)
+
+
+@RunIf(min_gpus=2, deepspeed=True, special=True)
+def test_deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir):
+    _deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, offload_optimizer=False)
+
+
+@RunIf(min_gpus=2, deepspeed=True, special=True)
+def test_deepspeed_multigpu_stage_2_accumulated_grad_batches_offload_optimizer(tmpdir):
+    _deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, offload_optimizer=True)
+
+
+def _deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, offload_optimizer):
     """
     Test to ensure with Stage 2 and multiple GPUs, accumulated grad batches works.
     """
-    os.environ['MASTER_PORT'] = "29500"
     seed_everything(42)
 
     class VerificationCallback(Callback):
@@ -585,7 +669,7 @@ def on_train_batch_start(
         default_root_dir=tmpdir,
         progress_bar_refresh_rate=0,
         max_epochs=5,
-        plugins=[DeepSpeedPlugin(stage=2, cpu_offload=cpu_offload)],
+        plugins=[DeepSpeedPlugin(stage=2, offload_optimizer=offload_optimizer)],
         gpus=2,
         limit_val_batches=2,
         precision=16,
diff --git a/tests/plugins/test_double_plugin.py b/tests/plugins/test_double_plugin.py
index 96ff2d182b504..302ee985b2379 100644
--- a/tests/plugins/test_double_plugin.py
+++ b/tests/plugins/test_double_plugin.py
@@ -11,12 +11,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import pickle
+from unittest.mock import MagicMock
+
 import pytest
 import torch
 from torch.utils.data import DataLoader, Dataset
 
 from pytorch_lightning import Trainer
+from pytorch_lightning.plugins import DoublePrecisionPlugin
+from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7
 from tests.helpers.boring_model import BoringModel, RandomDataset
+from tests.helpers.runif import RunIf
 
 
 class RandomFloatIntDataset(Dataset):
@@ -118,10 +124,30 @@ def predict_dataloader(self):
         return DataLoader(RandomDataset(32, 64))
 
 
-@pytest.mark.parametrize('boring_model', (DoublePrecisionBoringModel, DoublePrecisionBoringModelNoForward))
+class DoublePrecisionBoringModelComplexBuffer(BoringModel):
+
+    def __init__(self):
+        super().__init__()
+
+        self.register_buffer("complex_buffer", torch.complex(torch.rand(10), torch.rand(10)), False)
+
+    def on_fit_start(self):
+        assert self.layer.weight.dtype == torch.float64
+        assert self.complex_buffer.dtype == torch.complex64
+
+
+@pytest.mark.parametrize(
+    'boring_model', [
+        DoublePrecisionBoringModel,
+        DoublePrecisionBoringModelNoForward,
+        pytest.param(
+            DoublePrecisionBoringModelComplexBuffer,
+            marks=pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="torch.complex not available")
+        ),
+    ]
+)
 def test_double_precision(tmpdir, boring_model):
     model = boring_model()
-    original_training_step = model.training_step
 
     trainer = Trainer(
         max_epochs=2,
@@ -134,4 +160,25 @@ def test_double_precision(tmpdir, boring_model):
     trainer.test(model)
     trainer.predict(model)
 
-    assert model.training_step == original_training_step
+
+@RunIf(min_gpus=2)
+def test_double_precision_ddp(tmpdir):
+    model = DoublePrecisionBoringModel()
+
+    trainer = Trainer(
+        max_epochs=1,
+        default_root_dir=tmpdir,
+        accelerator='ddp_spawn',
+        gpus=2,
+        fast_dev_run=2,
+        precision=64,
+        log_every_n_steps=1,
+    )
+    trainer.fit(model)
+
+
+def test_double_precision_pickle(tmpdir):
+    model = BoringModel()
+    plugin = DoublePrecisionPlugin()
+    model, _, __ = plugin.connect(model, MagicMock(), MagicMock())
+    pickle.dumps(model)
diff --git a/tests/plugins/test_plugins_registry.py b/tests/plugins/test_plugins_registry.py
index 8ccba40013517..d2ca1d46c975f 100644
--- a/tests/plugins/test_plugins_registry.py
+++ b/tests/plugins/test_plugins_registry.py
@@ -14,7 +14,7 @@
 import pytest
 
 from pytorch_lightning import Trainer
-from pytorch_lightning.plugins import DDPPlugin, DeepSpeedPlugin, TrainingTypePluginsRegistry
+from pytorch_lightning.plugins import DDPPlugin, DeepSpeedPlugin, TPUSpawnPlugin, TrainingTypePluginsRegistry
 from tests.helpers.runif import RunIf
 
 
@@ -54,14 +54,15 @@ def __init__(self, param1, param2):
         }),
         ("deepspeed_stage_2_offload", {
             "stage": 2,
-            "cpu_offload": True
+            "offload_optimizer": True
         }),
         ("deepspeed_stage_3", {
             "stage": 3
         }),
         ("deepspeed_stage_3_offload", {
             "stage": 3,
-            "cpu_offload": True
+            "offload_parameters": True,
+            "offload_optimizer": True
         }),
     ],
 )
@@ -93,3 +94,16 @@ def test_ddp_training_type_plugins_registry_with_trainer(tmpdir):
     )
 
     assert isinstance(trainer.training_type_plugin, DDPPlugin)
+
+
+def test_tpu_spawn_debug_plugins_registry(tmpdir):
+
+    plugin = "tpu_spawn_debug"
+
+    assert plugin in TrainingTypePluginsRegistry
+    assert TrainingTypePluginsRegistry[plugin]["init_params"] == {"debug": True}
+    assert TrainingTypePluginsRegistry[plugin]["plugin"] == TPUSpawnPlugin
+
+    trainer = Trainer(plugins=plugin)
+
+    assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin)
diff --git a/tests/plugins/test_rpc_plugin.py b/tests/plugins/test_rpc_plugin.py
deleted file mode 100644
index 7abf9fcbd5039..0000000000000
--- a/tests/plugins/test_rpc_plugin.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import os
-from typing import Optional
-from unittest import mock
-
-import pytest
-
-from pytorch_lightning import Trainer
-from pytorch_lightning.callbacks import Callback
-from pytorch_lightning.plugins.training_type.rpc_sequential import RPCPlugin
-from tests.helpers.boring_model import BoringModel
-from tests.helpers.runif import RunIf
-
-
-@mock.patch.dict(
-    os.environ,
-    {
-        "CUDA_VISIBLE_DEVICES": "0,1",
-        "SLURM_NTASKS": "2",
-        "SLURM_JOB_NAME": "SOME_NAME",
-        "SLURM_NODEID": "0",
-        "LOCAL_RANK": "0",
-        "SLURM_PROCID": "0",
-        "SLURM_LOCALID": "0",
-    },
-)
-@mock.patch("torch.cuda.device_count", return_value=2)
-@pytest.mark.parametrize(
-    ["ddp_backend", "gpus", "num_processes"],
-    [("ddp_cpu", None, 2), ("ddp", 2, 0), ("ddp_spawn", 2, 0)],
-)
-@RunIf(rpc=True)
-def test_rpc_choice(tmpdir, ddp_backend, gpus, num_processes):
-
-    class CB(Callback):
-
-        def on_fit_start(self, trainer, pl_module):
-            assert isinstance(trainer.training_type_plugin, RPCPlugin)
-            raise RuntimeError('finished plugin check')
-
-    model = BoringModel()
-    trainer = Trainer(
-        default_root_dir=str(tmpdir),
-        fast_dev_run=True,
-        gpus=gpus,
-        num_processes=num_processes,
-        distributed_backend=ddp_backend,
-        callbacks=[CB()],
-        plugins=[RPCPlugin()]
-    )
-
-    with pytest.raises(RuntimeError, match='finished plugin check'):
-        trainer.fit(model)
-
-
-class CustomRPCPlugin(RPCPlugin):
-
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        self.rpc_save_model_count = 0
-        self.worker_optimizer_step_count = 0
-
-    def rpc_save_model(self, *_) -> None:
-        self.rpc_save_model_count += 1
-
-    def barrier(self, name: Optional[str] = None) -> None:
-        return
-
-
-@RunIf(min_gpus=2, special=True, rpc=True)
-def test_rpc_function_calls_ddp(tmpdir):
-    model = BoringModel()
-    plugin = CustomRPCPlugin()
-    max_epochs = 2
-    limit_train_batches = 2
-    trainer = Trainer(
-        limit_train_batches=limit_train_batches,
-        limit_val_batches=2,
-        max_epochs=max_epochs,
-        gpus=2,
-        distributed_backend='ddp',
-        plugins=[plugin],
-        default_root_dir=tmpdir,
-    )
-
-    trainer.fit(model)
-    if trainer.global_rank == 0:  # Main process
-        assert plugin.rpc_save_model_count == max_epochs
-    else:  # Worker process
-        assert plugin.rpc_save_model_count == max_epochs
diff --git a/tests/plugins/test_rpc_sequential_plugin.py b/tests/plugins/test_rpc_sequential_plugin.py
deleted file mode 100644
index 00a6220036c3e..0000000000000
--- a/tests/plugins/test_rpc_sequential_plugin.py
+++ /dev/null
@@ -1,185 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-from unittest import mock
-
-import pytest
-import torch
-import torch.distributed as torch_distrib
-from torch import nn
-
-from pytorch_lightning import LightningModule, Trainer
-from pytorch_lightning.plugins.training_type.rpc_sequential import RPCSequentialPlugin
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from tests.helpers.boring_model import RandomDataset
-from tests.helpers.runif import RunIf
-
-
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
-def test_rpc_sequential_plugin_manual(tmpdir):
-    model = SequentialModelRPCManual()
-    trainer = Trainer(
-        max_epochs=2,
-        limit_train_batches=2,
-        limit_val_batches=2,
-        limit_test_batches=2,
-        gpus=2,
-        distributed_backend="ddp",
-        plugins=[RPCSequentialPlugin(balance=[2, 1], rpc_timeout_sec=5 * 60)],
-    )
-
-    trainer.fit(model)
-
-    if torch_distrib.is_initialized() and torch_distrib.get_rank() == 0:
-        assert len(trainer.dev_debugger.pbar_added_metrics) > 0
-
-    if trainer.accelerator.rpc_enabled:
-        # Called at the end of trainer to ensure all processes are killed
-        trainer.accelerator.training_type_plugin.exit_rpc_process()
-
-
-@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
-def test_rpc_sequential_plugin_manual_amp(tmpdir):
-    model = SequentialModelRPCManual()
-    trainer = Trainer(
-        max_epochs=2,
-        limit_train_batches=2,
-        limit_val_batches=2,
-        limit_test_batches=2,
-        gpus=2,
-        precision=16,
-        amp_backend="native",
-        distributed_backend="ddp",
-        plugins=[RPCSequentialPlugin(balance=[2, 1])],
-    )
-    with pytest.raises(
-        MisconfigurationException,
-        match='`RPCSequentialPlugin` is currently not supported in Automatic Mixed Precision'
-    ):
-        trainer.fit(model)
-
-
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
-def test_rpc_sequential_plugin_automatic(tmpdir):
-    model = SequentialModelRPCAutomatic()
-    trainer = Trainer(
-        max_epochs=2,
-        limit_train_batches=2,
-        limit_val_batches=2,
-        limit_test_batches=2,
-        gpus=2,
-        distributed_backend="ddp",
-        plugins=[RPCSequentialPlugin(balance=[2, 1])],
-    )
-
-    trainer.fit(model)
-
-    if torch_distrib.is_initialized() and torch_distrib.get_rank() == 0:
-        assert len(trainer.dev_debugger.pbar_added_metrics) > 0
-
-    if trainer.accelerator.rpc_enabled:
-        # Called at the end of trainer to ensure all processes are killed
-        trainer.accelerator.training_type_plugin.exit_rpc_process()
-
-
-@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
-def test_rpc_sequential_plugin_with_wrong_balance(tmpdir):
-    model = SequentialModelRPCAutomatic()
-    trainer = Trainer(
-        max_epochs=2,
-        limit_train_batches=2,
-        limit_val_batches=2,
-        limit_test_batches=2,
-        gpus=2,
-        distributed_backend="ddp",
-        plugins=[RPCSequentialPlugin(balance=[2, 2])],
-    )
-
-    with pytest.raises(
-        MisconfigurationException, match="The provided balance sum: 4 does not match your Sequential length: 3"
-    ):
-        trainer.fit(model)
-
-    if trainer.accelerator.rpc_enabled:
-        # Called at the end of trainer to ensure all processes are killed
-        trainer.accelerator.training_type_plugin.exit_rpc_process()
-
-
-class SequentialModelRPCManual(LightningModule):
-
-    def __init__(self):
-        super().__init__()
-        self.sequential_module = nn.Sequential(torch.nn.Linear(32, 32), nn.ReLU(), nn.Linear(32, 2))
-        self.automatic_optimization = False
-
-    def forward(self, x):
-        return self.sequential_module(x)
-
-    def loss(self, prediction):
-        # An arbitrary loss to have a loss that updates the model weights during `Trainer.fit` calls
-        return torch.nn.functional.mse_loss(prediction, torch.ones_like(prediction))
-
-    def step(self, x):
-        x = self(x)
-        out = torch.nn.functional.mse_loss(x, torch.ones_like(x))
-        return out
-
-    def training_step(self, batch, batch_idx):
-        opt = self.optimizers()
-        output = self.sequential_module(batch)
-        loss = self.loss(output)
-        self.log("train_loss", loss, on_epoch=True, prog_bar=True)
-        self.manual_backward(loss, opt)
-        assert torch.stack([torch.abs(p.grad).sum() for p in self.parameters()]).sum() > 0
-        opt.step()
-        opt.zero_grad()
-        assert torch.stack([torch.abs(p.grad).sum() for p in self.parameters()]).sum() == 0
-
-    def validation_step(self, batch, batch_idx):
-        output = self.sequential_module(batch)
-        loss = self.loss(output)
-        return loss
-
-    def test_step(self, batch, batch_idx):
-        output = self.sequential_module(batch)
-        return self.loss(batch, output)
-
-    def configure_optimizers(self):
-        optimizer = torch.optim.SGD(self.parameters(), lr=0.1)
-        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
-        return [optimizer], [lr_scheduler]
-
-    def train_dataloader(self):
-        return torch.utils.data.DataLoader(RandomDataset(32, 64))
-
-    def val_dataloader(self):
-        return torch.utils.data.DataLoader(RandomDataset(32, 64))
-
-    def test_dataloader(self):
-        return torch.utils.data.DataLoader(RandomDataset(32, 64))
-
-
-class SequentialModelRPCAutomatic(SequentialModelRPCManual):
-
-    def __init__(self):
-        super().__init__()
-        self.automatic_optimization = True
-
-    def training_step(self, batch, batch_idx):
-        output = self.sequential_module(batch)
-        loss = self.loss(output)
-        self.log("train_loss", loss, on_epoch=True, prog_bar=True)
-        return loss
diff --git a/tests/plugins/test_sharded_plugin.py b/tests/plugins/test_sharded_plugin.py
index 7ab49e6826d58..543c3c8ae3382 100644
--- a/tests/plugins/test_sharded_plugin.py
+++ b/tests/plugins/test_sharded_plugin.py
@@ -297,12 +297,24 @@ def training_step(self, batch, batch_idx):
 
 
 @RunIf(skip_windows=True, special=True, fairscale=True, min_gpus=2)
-@pytest.mark.parametrize("accelerator", ["ddp_sharded", "ddp_sharded_spawn"])
-def test_ddp_sharded_plugin_manual_optimization(tmpdir, accelerator):
+def test_ddp_sharded_plugin_manual_optimization_spawn(tmpdir):
+    # todo (sean): this test has been split out as running both tests using parametrize causes "Address in use"
     model = ManualBoringModel()
     trainer = Trainer(
         default_root_dir=tmpdir,
-        accelerator=accelerator,
+        accelerator='ddp_sharded_spawn',
+        fast_dev_run=2,
+        gpus=2,
+    )
+    trainer.fit(model)
+
+
+@RunIf(skip_windows=True, special=True, fairscale=True, min_gpus=2)
+def test_ddp_sharded_plugin_manual_optimization(tmpdir):
+    model = ManualBoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        accelerator='ddp_sharded',
         fast_dev_run=2,
         gpus=2,
     )
diff --git a/tests/plugins/test_single_device_plugin.py b/tests/plugins/test_single_device_plugin.py
index a398d960daf91..2e4834233537e 100644
--- a/tests/plugins/test_single_device_plugin.py
+++ b/tests/plugins/test_single_device_plugin.py
@@ -38,7 +38,7 @@ def on_train_start(self) -> None:
 
 @RunIf(skip_windows=True, min_gpus=1)
 def test_single_gpu():
-    """Tests if device is set correctely when training and after teardown for single GPU plugin."""
+    """Tests if device is set correctly when training and after teardown for single GPU plugin."""
     trainer = Trainer(gpus=1, fast_dev_run=True)
     # assert training type plugin attributes for device setting
     assert isinstance(trainer.training_type_plugin, SingleDevicePlugin)
diff --git a/tests/plugins/test_tpu_spawn.py b/tests/plugins/test_tpu_spawn.py
index 85e1ecb781946..54c65c336fdd3 100644
--- a/tests/plugins/test_tpu_spawn.py
+++ b/tests/plugins/test_tpu_spawn.py
@@ -49,7 +49,7 @@ def predict_dataloader(self):
 
 
 @pytest.mark.parametrize(
-    "train_dataloader, val_dataloaders, test_dataloaders, predict_dataloaders",
+    "train_dataloaders, val_dataloaders, test_dataloaders, predict_dataloaders",
     [
         (_loader_no_len, None, None, None),
         (None, _loader_no_len, None, None),
@@ -60,14 +60,14 @@ def predict_dataloader(self):
 )
 @mock.patch("pytorch_lightning.plugins.training_type.tpu_spawn.xm")
 def test_error_patched_iterable_dataloaders(
-    _, tmpdir, train_dataloader, val_dataloaders, test_dataloaders, predict_dataloaders
+    _, tmpdir, train_dataloaders, val_dataloaders, test_dataloaders, predict_dataloaders
 ):
     model = BoringModelNoDataloaders()
     connector = DataConnector(MagicMock())
 
     connector.attach_dataloaders(
         model,
-        train_dataloader=train_dataloader,
+        train_dataloaders=train_dataloaders,
         val_dataloaders=val_dataloaders,
         test_dataloaders=test_dataloaders,
         predict_dataloaders=predict_dataloaders,
diff --git a/tests/profiler/__init__.py b/tests/profiler/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/test_profiler.py b/tests/profiler/test_profiler.py
similarity index 99%
rename from tests/test_profiler.py
rename to tests/profiler/test_profiler.py
index acc2bac1c466f..d940d4426b4a6 100644
--- a/tests/test_profiler.py
+++ b/tests/profiler/test_profiler.py
@@ -331,8 +331,8 @@ def test_pytorch_profiler_trainer_ddp(tmpdir, pytorch_profiler):
         files = [file for file in files if file.endswith('.json')]
         assert len(files) == 2, files
         local_rank = trainer.local_rank
-        assert any(f'training_step_{local_rank}' in f for f in files)
-        assert any(f'validation_step_{local_rank}' in f for f in files)
+        assert any(f'{local_rank}-training_step_and_backward' in f for f in files)
+        assert any(f'{local_rank}-validation_step' in f for f in files)
 
 
 def test_pytorch_profiler_trainer_test(tmpdir):
diff --git a/tests/profiler/test_xla_profiler.py b/tests/profiler/test_xla_profiler.py
new file mode 100644
index 0000000000000..35279ddee8deb
--- /dev/null
+++ b/tests/profiler/test_xla_profiler.py
@@ -0,0 +1,72 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from multiprocessing import Event, Process
+
+import pytest
+
+from pytorch_lightning import Trainer
+from pytorch_lightning.profiler import XLAProfiler
+from pytorch_lightning.utilities import _TPU_AVAILABLE
+from tests.helpers import BoringModel
+from tests.helpers.runif import RunIf
+
+if _TPU_AVAILABLE:
+    import torch_xla.debug.profiler as xp
+    import torch_xla.utils.utils as xu
+
+
+@RunIf(tpu=True)
+def test_xla_profiler_instance(tmpdir):
+
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        fast_dev_run=True,
+        profiler="xla",
+        tpu_cores=8,
+    )
+
+    assert isinstance(trainer.profiler, XLAProfiler)
+    trainer.fit(model)
+    assert trainer.state.finished, f"Training failed with {trainer.state}"
+
+
+@pytest.mark.skipif(True, reason="XLA Profiler doesn't support Prog. capture yet")
+def test_xla_profiler_prog_capture(tmpdir):
+
+    port = xu.get_free_tcp_ports()[0]
+    training_started = Event()
+
+    def train_worker():
+        model = BoringModel()
+        trainer = Trainer(
+            default_root_dir=tmpdir,
+            max_epochs=4,
+            profiler="xla",
+            tpu_cores=8,
+        )
+
+        trainer.fit(model)
+
+    p = Process(target=train_worker, daemon=True)
+    p.start()
+    training_started.wait(120)
+
+    logdir = str(tmpdir)
+    xp.trace(f'localhost:{port}', logdir, duration_ms=2000, num_tracing_attempts=5, delay_ms=1000)
+
+    p.terminate()
+
+    assert os.isfile(os.path.join(logdir, 'plugins', 'profile', '*', '*.xplane.pb'))
diff --git a/tests/special_tests.sh b/tests/special_tests.sh
index cf81700291b8d..95311fb2df515 100755
--- a/tests/special_tests.sh
+++ b/tests/special_tests.sh
@@ -17,7 +17,7 @@ set -e
 # this environment variable allows special tests to run
 export PL_RUNNING_SPECIAL_TESTS=1
 # python arguments
-defaults='-m coverage run --source pytorch_lightning --append -m pytest --verbose --capture=no'
+defaults='-m coverage run --source pytorch_lightning --append -m pytest --durations=0 --capture=no --disable-warnings'
 
 # find tests marked as `@RunIf(special=True)`
 grep_output=$(grep --recursive --line-number --word-regexp 'tests' 'benchmarks' --regexp 'special=True')
@@ -68,7 +68,15 @@ for i in "${!files_arr[@]}"; do
   done < <(echo "$test_code")
 done
 
-nvprof --profile-from-start off -o trace_name.prof -- python ${defaults} tests/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx
+if nvcc --version; then
+    nvprof --profile-from-start off -o trace_name.prof -- python ${defaults} tests/profiler/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx
+fi
+
+# needs to run outside of `pytest`
+python tests/utilities/test_warnings.py
+if [ $? -eq 0 ]; then
+    report+="Ran\ttests/utilities/test_warnings.py\n"
+fi
 
 # echo test report
 printf '=%.s' {1..80}
diff --git a/tests/trainer/connectors/test_callback_connector.py b/tests/trainer/connectors/test_callback_connector.py
index 34149e2231bf5..501482d77a240 100644
--- a/tests/trainer/connectors/test_callback_connector.py
+++ b/tests/trainer/connectors/test_callback_connector.py
@@ -1,3 +1,16 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import logging
 from unittest.mock import Mock
 
diff --git a/tests/trainer/connectors/test_checkpoint_connector.py b/tests/trainer/connectors/test_checkpoint_connector.py
new file mode 100644
index 0000000000000..6e152f5944b59
--- /dev/null
+++ b/tests/trainer/connectors/test_checkpoint_connector.py
@@ -0,0 +1,155 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from unittest.mock import Mock
+
+import torch
+
+from pytorch_lightning import Trainer
+from tests.helpers import BoringModel
+
+
+class HPCHookdedModel(BoringModel):
+
+    def __init__(self):
+        super().__init__()
+        self.hpc_save_called = 0
+        self.hpc_load_called = 0
+
+    def on_hpc_save(self, checkpoint):
+        assert "state_dict" in checkpoint
+        self.hpc_save_called += 1
+
+    def on_hpc_load(self, checkpoint):
+        assert "state_dict" in checkpoint
+        self.hpc_load_called += 1
+
+
+def test_hpc_hook_calls(tmpdir):
+    model = HPCHookdedModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_steps=1,
+        checkpoint_callback=False,
+        logger=False,
+    )
+    trainer.fit(model)
+    connector = trainer.checkpoint_connector
+    connector.hpc_save(tmpdir, logger=Mock())
+    assert model.hpc_save_called == 1
+    assert model.hpc_load_called == 0
+
+    # new training run, restore from hpc checkpoint file automatically
+    assert set(os.listdir(tmpdir)) == {"hpc_ckpt_1.ckpt"}
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_steps=1,
+        checkpoint_callback=False,
+        logger=False,
+    )
+    trainer.fit(model)
+    assert model.hpc_save_called == 1
+    assert model.hpc_load_called == 1
+
+
+def test_preloaded_checkpoint_lifecycle(tmpdir):
+    """ Tests that the preloaded checkpoint contents gets cleared from memory when it is not required anymore. """
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_steps=1,
+    )
+    trainer.fit(model)
+
+    connector = trainer.checkpoint_connector
+
+    assert not trainer.resume_from_checkpoint
+    assert not connector.resume_checkpoint_path
+    assert not connector._loaded_checkpoint
+
+    connector.resume_start()
+    assert not connector.resume_checkpoint_path
+    assert not connector._loaded_checkpoint
+    connector.resume_end()
+    assert not connector.resume_checkpoint_path
+    assert not connector._loaded_checkpoint
+
+    ckpt_path = trainer.checkpoint_callback.best_model_path
+    trainer = Trainer(default_root_dir=tmpdir, max_steps=2, resume_from_checkpoint=ckpt_path)
+    connector = trainer.checkpoint_connector
+    connector.resume_start()
+    assert connector.resume_checkpoint_path == ckpt_path
+    assert connector._loaded_checkpoint
+    assert isinstance(connector._loaded_checkpoint, dict)
+    connector.resume_end()
+    assert not connector.resume_checkpoint_path
+    assert not connector._loaded_checkpoint
+
+
+def test_hpc_restore_attempt(tmpdir):
+    """ Test that restore() attempts to restore the hpc_ckpt with highest priority. """
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_steps=1,
+        checkpoint_callback=False,
+        logger=False,
+    )
+    trainer.fit(model)
+
+    hpc_ckpt_path = tmpdir / "hpc_ckpt_3.ckpt"
+    trainer.save_checkpoint(hpc_ckpt_path)
+    assert os.listdir(tmpdir) == ["hpc_ckpt_3.ckpt"]
+
+    # set weights to zero
+    for param in model.parameters():
+        torch.nn.init.constant_(param, 0)
+
+    # case 1: restore hpc first, no explicit resume path provided
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_steps=2,
+        checkpoint_callback=False,
+        logger=False,
+    )
+    trainer.fit(model)
+
+    for param in model.parameters():
+        assert param.abs().sum() > 0
+        torch.nn.init.constant_(param, 0)
+
+    # case 2: explicit resume path provided, restore hpc anyway
+    trainer = Trainer(default_root_dir=tmpdir, max_steps=3, resume_from_checkpoint="not existing")
+    trainer.fit(model)
+
+    for param in model.parameters():
+        assert param.abs().sum() > 0
+
+
+def test_hpc_max_ckpt_version(tmpdir):
+    """ Test that the CheckpointConnector is able to find the hpc checkpoint file with the highest version. """
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_steps=1,
+    )
+    trainer.fit(model)
+    trainer.save_checkpoint(tmpdir / "hpc_ckpt.ckpt")
+    trainer.save_checkpoint(tmpdir / "hpc_ckpt_0.ckpt")
+    trainer.save_checkpoint(tmpdir / "hpc_ckpt_3.ckpt")
+    trainer.save_checkpoint(tmpdir / "hpc_ckpt_33.ckpt")
+
+    assert trainer.checkpoint_connector.hpc_resume_path == str(tmpdir / "hpc_ckpt_33.ckpt")
+    assert trainer.checkpoint_connector.max_ckpt_version_in_folder(tmpdir) == 33
+    assert trainer.checkpoint_connector.max_ckpt_version_in_folder(tmpdir / "not" / "existing") is None
diff --git a/tests/trainer/flags/test_fast_dev_run.py b/tests/trainer/flags/test_fast_dev_run.py
index 1dffefb092716..8320134058c4e 100644
--- a/tests/trainer/flags/test_fast_dev_run.py
+++ b/tests/trainer/flags/test_fast_dev_run.py
@@ -95,7 +95,6 @@ def _make_fast_dev_run_assertions(trainer, model):
 
         # there should be no logger with fast_dev_run
         assert isinstance(trainer.logger, DummyLogger)
-        assert len(trainer.dev_debugger.logged_metrics) == fast_dev_run
 
         # checkpoint callback should not have been called with fast_dev_run
         assert trainer.checkpoint_callback == checkpoint_callback
diff --git a/tests/trainer/logging_/test_distributed_logging.py b/tests/trainer/logging_/test_distributed_logging.py
index 5832f387cc63d..4094fd90021af 100644
--- a/tests/trainer/logging_/test_distributed_logging.py
+++ b/tests/trainer/logging_/test_distributed_logging.py
@@ -24,7 +24,7 @@ class TestModel(BoringModel):
 
     def on_pretrain_routine_end(self) -> None:
         with mock.patch('pytorch_lightning.loggers.base.LightningLoggerBase.agg_and_log_metrics') as m:
-            self.trainer.logger_connector.log_metrics({'a': 2}, {})
+            self.trainer.logger_connector.log_metrics({'a': 2})
             logged_times = m.call_count
             expected = int(self.trainer.is_global_zero)
             msg = f'actual logger called from non-global zero, logged_times: {logged_times}, expected: {expected}'
diff --git a/tests/trainer/logging_/test_eval_loop_logging.py b/tests/trainer/logging_/test_eval_loop_logging.py
index 331734aa9b412..5a4e335e0c7c4 100644
--- a/tests/trainer/logging_/test_eval_loop_logging.py
+++ b/tests/trainer/logging_/test_eval_loop_logging.py
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-Tests to ensure that the training loop works with a dict (1.0)
+Test logging in the evaluation loop
 """
 import collections
 import itertools
-import os
 from unittest import mock
 from unittest.mock import call
 
@@ -24,12 +23,9 @@
 import pytest
 import torch
 
-from pytorch_lightning import callbacks, seed_everything, Trainer
-from pytorch_lightning.callbacks import ModelCheckpoint
-from pytorch_lightning.core.lightning import LightningModule
+from pytorch_lightning import callbacks, Trainer
 from pytorch_lightning.loggers import TensorBoardLogger
 from tests.helpers import BoringModel, RandomDataset
-from tests.helpers.deterministic_model import DeterministicModel
 
 
 def test__validation_step__log(tmpdir):
@@ -37,25 +33,18 @@ def test__validation_step__log(tmpdir):
     Tests that validation_step can log
     """
 
-    class TestModel(DeterministicModel):
+    class TestModel(BoringModel):
 
         def training_step(self, batch, batch_idx):
-            acc = self.step(batch, batch_idx)
-            acc = acc + batch_idx
-            self.log('a', acc, on_step=True, on_epoch=True)
+            out = super().training_step(batch, batch_idx)
+            self.log('a', out['loss'], on_step=True, on_epoch=True)
             self.log('a2', 2)
-
-            self.training_step_called = True
-            return acc
+            return out
 
         def validation_step(self, batch, batch_idx):
-            acc = self.step(batch, batch_idx)
-            acc = acc + batch_idx
-            self.log('b', acc, on_step=True, on_epoch=True)
-            self.training_step_called = True
-
-        def backward(self, loss, optimizer, optimizer_idx):
-            return LightningModule.backward(self, loss, optimizer, optimizer_idx)
+            out = super().validation_step(batch, batch_idx)
+            self.log('b', out['x'], on_step=True, on_epoch=True)
+            return out
 
     model = TestModel()
     model.validation_step_end = None
@@ -71,8 +60,7 @@ def backward(self, loss, optimizer, optimizer_idx):
     )
     trainer.fit(model)
 
-    # make sure all the metrics are available for callbacks
-    expected_logged_metrics = {
+    assert set(trainer.logged_metrics) == {
         'a2',
         'a_step',
         'a_epoch',
@@ -80,49 +68,33 @@ def backward(self, loss, optimizer, optimizer_idx):
         'b_epoch',
         'epoch',
     }
-    logged_metrics = set(trainer.logged_metrics.keys())
-    assert expected_logged_metrics == logged_metrics
 
     # we don't want to enable val metrics during steps because it is not something that users should do
     # on purpose DO NOT allow b_step... it's silly to monitor val step metrics
-    callback_metrics = set(trainer.callback_metrics.keys())
-    expected_cb_metrics = {'a', 'a2', 'b', 'a_epoch', 'b_epoch', 'a_step'}
-    assert expected_cb_metrics == callback_metrics
+    assert set(trainer.callback_metrics) == {'a', 'a2', 'b', 'a_epoch', 'b_epoch', 'a_step'}
 
 
-def test__validation_step__step_end__epoch_end__log(tmpdir):
+def test__validation_step__epoch_end__log(tmpdir):
     """
-    Tests that validation_step can log
+    Tests that validation_epoch_end can log
     """
 
-    class TestModel(DeterministicModel):
+    class TestModel(BoringModel):
 
         def training_step(self, batch, batch_idx):
-            acc = self.step(batch, batch_idx)
-            acc = acc + batch_idx
-            self.log('a', acc)
-            self.log('b', acc, on_step=True, on_epoch=True)
-            self.training_step_called = True
-            return acc
+            out = super().training_step(batch, batch_idx)
+            self.log('a', out['loss'])
+            self.log('b', out['loss'], on_step=True, on_epoch=True)
+            return out
 
         def validation_step(self, batch, batch_idx):
-            acc = self.step(batch, batch_idx)
-            acc = acc + batch_idx
-            self.log('c', acc)
-            self.log('d', acc, on_step=True, on_epoch=True)
-            self.validation_step_called = True
-            return acc
-
-        def validation_step_end(self, acc):
-            self.validation_step_end_called = True
-            return ['random_thing']
+            out = super().validation_step(batch, batch_idx)
+            self.log('c', out['x'])
+            self.log('d', out['x'], on_step=True, on_epoch=True)
+            return out
 
         def validation_epoch_end(self, outputs):
             self.log('g', torch.tensor(2, device=self.device), on_epoch=True)
-            self.validation_epoch_end_called = True
-
-        def backward(self, loss, optimizer, optimizer_idx):
-            return LightningModule.backward(self, loss, optimizer, optimizer_idx)
 
     model = TestModel()
 
@@ -136,9 +108,8 @@ def backward(self, loss, optimizer, optimizer_idx):
     )
     trainer.fit(model)
 
-    # make sure all the metrics are available for callbacks
-    logged_metrics = set(trainer.logged_metrics.keys())
-    expected_logged_metrics = {
+    # make sure all the metrics are available for loggers
+    assert set(trainer.logged_metrics) == {
         'epoch',
         'a',
         'b_step',
@@ -148,24 +119,15 @@ def backward(self, loss, optimizer, optimizer_idx):
         'd_epoch',
         'g',
     }
-    assert expected_logged_metrics == logged_metrics
 
-    progress_bar_metrics = set(trainer.progress_bar_metrics.keys())
-    expected_pbar_metrics = set()
-    assert expected_pbar_metrics == progress_bar_metrics
+    assert not trainer.progress_bar_metrics
 
     # we don't want to enable val metrics during steps because it is not something that users should do
-    callback_metrics = set(trainer.callback_metrics.keys())
-    expected_cb_metrics = {'a', 'b', 'b_epoch', 'c', 'd', 'd_epoch', 'g', 'b_step'}
-    assert expected_cb_metrics == callback_metrics
+    assert set(trainer.callback_metrics) == {'a', 'b', 'b_epoch', 'c', 'd', 'd_epoch', 'g', 'b_step'}
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 @pytest.mark.parametrize(['batches', 'log_interval', 'max_epochs'], [(1, 1, 1), (64, 32, 2)])
 def test_eval_epoch_logging(tmpdir, batches, log_interval, max_epochs):
-    """
-    Tests that only training_step can be used
-    """
 
     class TestModel(BoringModel):
 
@@ -185,35 +147,23 @@ def validation_epoch_end(self, outputs):
     )
     trainer.fit(model)
 
-    # make sure all the metrics are available for callbacks
-    logged_metrics = set(trainer.logged_metrics.keys())
-    expected_logged_metrics = {
+    # assert the loggers received the expected number
+    logged_metrics = set(trainer.logged_metrics)
+    assert logged_metrics == {
         'c',
         'd/e/f',
         'epoch',
     }
-    assert logged_metrics == expected_logged_metrics
 
-    pbar_metrics = set(trainer.progress_bar_metrics.keys())
-    expected_pbar_metrics = {'c'}
-    assert pbar_metrics == expected_pbar_metrics
+    pbar_metrics = set(trainer.progress_bar_metrics)
+    assert pbar_metrics == {'c'}
 
-    callback_metrics = set(trainer.callback_metrics.keys())
-    callback_metrics.remove('debug_epoch')
-    expected_callback_metrics = set()
-    expected_callback_metrics = expected_callback_metrics.union(logged_metrics)
-    expected_callback_metrics = expected_callback_metrics.union(pbar_metrics)
-    expected_callback_metrics.remove('epoch')
-    assert callback_metrics == expected_callback_metrics
-
-    # assert the loggers received the expected number
-    assert len(trainer.dev_debugger.logged_metrics) == max_epochs
+    # make sure all the metrics are available for callbacks
+    callback_metrics = set(trainer.callback_metrics)
+    assert callback_metrics == (logged_metrics | pbar_metrics) - {'epoch'}
 
 
 def test_eval_float_logging(tmpdir):
-    """
-    Tests that only training_step can be used
-    """
 
     class TestModel(BoringModel):
 
@@ -235,45 +185,28 @@ def validation_step(self, batch, batch_idx):
     )
     trainer.fit(model)
 
-    # make sure all the metrics are available for callbacks
-    logged_metrics = set(trainer.logged_metrics.keys())
-    expected_logged_metrics = {
-        'a',
-        'epoch',
-    }
-    assert logged_metrics == expected_logged_metrics
+    assert set(trainer.logged_metrics) == {'a', 'epoch'}
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 def test_eval_logging_auto_reduce(tmpdir):
-    """
-    Tests that only training_step can be used
-    """
-    seed_everything(1234)
 
     class TestModel(BoringModel):
-
-        def on_pretrain_routine_end(self) -> None:
-            self.seen_vals = []
-            self.manual_epoch_end_mean = None
-
-        def on_validation_epoch_start(self) -> None:
-            self.seen_vals = []
+        val_losses = []
+        manual_epoch_end_mean = None
 
         def validation_step(self, batch, batch_idx):
             output = self.layer(batch)
             loss = self.loss(batch, output)
-            self.seen_vals.append(loss)
+            self.val_losses.append(loss)
             self.log('val_loss', loss, on_epoch=True, on_step=True, prog_bar=True)
             return {"x": loss}
 
         def validation_epoch_end(self, outputs) -> None:
-            for passed_in, manually_tracked in zip(outputs, self.seen_vals):
+            for passed_in, manually_tracked in zip(outputs, self.val_losses):
                 assert passed_in['x'] == manually_tracked
             self.manual_epoch_end_mean = torch.stack([x['x'] for x in outputs]).mean()
 
     model = TestModel()
-
     trainer = Trainer(
         default_root_dir=tmpdir,
         limit_train_batches=3,
@@ -281,93 +214,63 @@ def validation_epoch_end(self, outputs) -> None:
         max_epochs=1,
         log_every_n_steps=1,
         weights_summary=None,
-        callbacks=[ModelCheckpoint(dirpath=tmpdir)],
+        num_sanity_val_steps=0,
     )
     trainer.fit(model)
 
     # make sure all the metrics are available for callbacks
-    manual_mean = model.manual_epoch_end_mean
-    callback_metrics = set(trainer.callback_metrics.keys())
-    assert callback_metrics == {'debug_epoch', 'val_loss', 'val_loss_epoch'}
+    assert set(trainer.callback_metrics) == {'val_loss', 'val_loss_epoch'}
 
     # make sure values are correct
-    assert trainer.logged_metrics['val_loss_epoch'] == manual_mean
-    assert trainer.callback_metrics['val_loss'] == trainer.logged_metrics['val_loss_step']
-
-    # make sure correct values were logged
-    logged_val = trainer.dev_debugger.logged_metrics
-
-    # 3 val batches
-    assert logged_val[0]['val_loss_step'] == model.seen_vals[0]
-    assert logged_val[1]['val_loss_step'] == model.seen_vals[1]
-    assert logged_val[2]['val_loss_step'] == model.seen_vals[2]
-
-    # epoch mean
-    assert logged_val[3]['val_loss_epoch'] == model.manual_epoch_end_mean
-
-    # only those logged
-    assert len(logged_val) == 4
+    assert trainer.logged_metrics['val_loss_epoch'] == model.manual_epoch_end_mean
+    assert trainer.callback_metrics['val_loss_epoch'] == model.manual_epoch_end_mean
+    assert trainer.callback_metrics['val_loss'] == model.manual_epoch_end_mean
+    assert trainer.logged_metrics["val_loss_step"] == model.val_losses[-1]
 
 
 @pytest.mark.parametrize(['batches', 'log_interval', 'max_epochs'], [(1, 1, 1), (64, 32, 2)])
 def test_eval_epoch_only_logging(tmpdir, batches, log_interval, max_epochs):
     """
-    Tests that only test_epoch_end can be used to log, and we return them in the results.
+    Tests that test_epoch_end can be used to log, and we return them in the results.
     """
 
     class TestModel(BoringModel):
 
         def test_epoch_end(self, outputs):
-            self.log('c', torch.tensor(2), on_epoch=True, prog_bar=True, logger=True)
+            self.log('c', torch.tensor(2))
             self.log('d/e/f', 2)
 
     model = TestModel()
-
     trainer = Trainer(
         default_root_dir=tmpdir,
-        limit_train_batches=batches,
-        limit_val_batches=batches,
         max_epochs=max_epochs,
+        limit_test_batches=batches,
         log_every_n_steps=log_interval,
         weights_summary=None,
     )
-    trainer.fit(model)
     results = trainer.test(model)
 
-    expected_result_metrics = {
-        'c': torch.tensor(2),
-        'd/e/f': 2,
-    }
-    for result in results:
-        assert result == expected_result_metrics
-
-
-def test_monitor_val_epoch_end(tmpdir):
-    epoch_min_loss_override = 0
-    model = BoringModel()
-    checkpoint_callback = callbacks.ModelCheckpoint(dirpath=tmpdir, save_top_k=1, monitor="avg_val_loss")
-    trainer = Trainer(
-        max_epochs=epoch_min_loss_override + 2,
-        logger=False,
-        callbacks=[checkpoint_callback],
-    )
-    trainer.fit(model)
+    assert len(results) == 1
+    assert results[0] == {'c': torch.tensor(2), 'd/e/f': 2}
 
 
-def test_multi_dataloaders_add_suffix_properly(tmpdir):
+@pytest.mark.parametrize('suffix', (False, True))
+def test_multi_dataloaders_add_suffix_properly(tmpdir, suffix):
 
     class TestModel(BoringModel):
 
-        def test_step(self, batch, *args):
-            output = self.layer(batch)
-            loss = self.loss(batch, output)
-            self.log("test_loss", loss, on_step=True, on_epoch=True)
+        def test_step(self, batch, batch_idx, dataloader_idx=0):
+            out = super().test_step(batch, batch_idx)
+            self.log("test_loss", out['y'], on_step=True, on_epoch=True)
+            return out
 
         def test_dataloader(self):
-            return [
-                torch.utils.data.DataLoader(RandomDataset(32, 64)),
-                torch.utils.data.DataLoader(RandomDataset(32, 64))
-            ]
+            if suffix:
+                return [
+                    torch.utils.data.DataLoader(RandomDataset(32, 64)),
+                    torch.utils.data.DataLoader(RandomDataset(32, 64))
+                ]
+            return super().test_dataloader()
 
     model = TestModel()
     model.test_epoch_end = None
@@ -383,38 +286,13 @@ def test_dataloader(self):
     )
     results = trainer.test(model)
 
-    assert {"test_loss/dataloader_idx_0", "test_loss_epoch/dataloader_idx_0"} == set(results[0])
-    assert {"test_loss/dataloader_idx_1", "test_loss_epoch/dataloader_idx_1"} == set(results[1])
-
+    for i, r in enumerate(results):
+        expected = {'test_loss', 'test_loss_epoch'}
+        if suffix:
+            expected = {e + f'/dataloader_idx_{i}' for e in expected}
+        assert set(r) == expected
 
-def test_single_dataloader_no_suffix_added(tmpdir):
 
-    class TestModel(BoringModel):
-
-        def test_step(self, batch, *args):
-            output = self.layer(batch)
-            loss = self.loss(batch, output)
-            self.log("test_loss", loss, on_step=True, on_epoch=True)
-
-    model = TestModel()
-    model.test_epoch_end = None
-
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        limit_train_batches=0,
-        limit_val_batches=0,
-        limit_test_batches=5,
-        max_epochs=1,
-        log_every_n_steps=1,
-        weights_summary=None,
-    )
-    results = trainer.test(model)
-
-    assert len(results) == 1
-    assert {"test_loss", "test_loss_epoch"} == set(results[0])
-
-
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 def test_log_works_in_val_callback(tmpdir):
     """
     Tests that log can be called within callback
@@ -422,200 +300,105 @@ def test_log_works_in_val_callback(tmpdir):
 
     class TestCallback(callbacks.Callback):
 
-        # helpers
-        count = 1
+        count = 0
         choices = [False, True]
-        # used to compute expected values
-        callback_funcs_called = collections.defaultdict(list)
-        funcs_called_count = collections.defaultdict(int)
-        funcs_attr = {}
 
-        def make_logging(self, pl_module, func_name, func_idx, on_steps=[], on_epochs=[], prob_bars=[]):
-            self.funcs_called_count[func_name] += 1
-            product = [on_steps, on_epochs, prob_bars]
-            for idx, (on_step, on_epoch, prog_bar) in enumerate(list(itertools.product(*product))):
-                # run logging
-                custom_func_name = f"{func_idx}_{idx}_{func_name}"
-                pl_module.log(
-                    custom_func_name, self.count * func_idx, on_step=on_step, on_epoch=on_epoch, prog_bar=prog_bar
-                )
-                # catch information for verification
-                self.callback_funcs_called[func_name].append([self.count * func_idx])
-                self.funcs_attr[custom_func_name] = {
-                    "on_step": on_step,
-                    "on_epoch": on_epoch,
-                    "prog_bar": prog_bar,
-                    "forked": on_step and on_epoch,
-                    "func_name": func_name
-                }
+        # used to compute expected values
+        logged_values = collections.defaultdict(list)
+        call_counter = collections.Counter()
+        logged_arguments = {}
 
-                if on_step and on_epoch:
-                    self.funcs_attr[f"{custom_func_name}_step"] = {
-                        "on_step": True,
-                        "on_epoch": False,
-                        "prog_bar": prog_bar,
-                        "forked": False,
-                        "func_name": func_name
-                    }
+        def make_logging(self, pl_module, func_name, on_steps, on_epochs, prob_bars):
+            self.call_counter.update([func_name])
 
-                    self.funcs_attr[f"{custom_func_name}_epoch"] = {
-                        "on_step": False,
-                        "on_epoch": True,
-                        "prog_bar": prog_bar,
-                        "forked": False,
-                        "func_name": func_name
-                    }
+            for idx, (on_step, on_epoch, prog_bar) in enumerate(itertools.product(on_steps, on_epochs, prob_bars)):
+                fx = f"{func_name}_{idx}"
+                pl_module.log(fx, self.count, on_step=on_step, on_epoch=on_epoch, prog_bar=prog_bar)
+                self.logged_values[fx].append(self.count)
+                self.logged_arguments[fx] = {"on_step": on_step, "on_epoch": on_epoch, "prog_bar": prog_bar}
+                self.count += 1
 
-        def on_validation_start(self, trainer, pl_module):
+        def on_validation_start(self, _, pl_module):
             self.make_logging(
-                pl_module,
-                'on_validation_start',
-                1,
-                on_steps=self.choices,
-                on_epochs=self.choices,
-                prob_bars=self.choices
+                pl_module, 'on_validation_start', on_steps=[False], on_epochs=[True], prob_bars=self.choices
             )
 
         def on_epoch_start(self, trainer, pl_module):
             if trainer.validating:
                 self.make_logging(
-                    pl_module,
-                    'on_epoch_start',
-                    2,
-                    on_steps=self.choices,
-                    on_epochs=self.choices,
-                    prob_bars=self.choices
+                    pl_module, 'on_epoch_start', on_steps=[False], on_epochs=[True], prob_bars=self.choices
                 )
 
-        def on_validation_epoch_start(self, trainer, pl_module):
+        def on_validation_epoch_start(self, _, pl_module):
             self.make_logging(
-                pl_module,
-                'on_validation_epoch_start',
-                3,
-                on_steps=self.choices,
-                on_epochs=self.choices,
-                prob_bars=self.choices
-            )
-
-        def on_batch_end(self, trainer, pl_module):
-            self.make_logging(
-                pl_module, 'on_batch_end', 6, on_steps=self.choices, on_epochs=self.choices, prob_bars=self.choices
+                pl_module, 'on_validation_epoch_start', on_steps=[False], on_epochs=[True], prob_bars=self.choices
             )
 
-        def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
+        def on_validation_batch_end(self, _, pl_module, *__):
             self.make_logging(
                 pl_module,
                 'on_validation_batch_end',
-                7,
                 on_steps=self.choices,
                 on_epochs=self.choices,
                 prob_bars=self.choices
             )
-            # used to make sure aggregation works fine.
-            # we should obtain func[value * c for c in range(1, max_epochs * limit_validation_batches)])
-            # with func = np.mean if on_epoch else func = np.max
-            self.count += 1
 
         def on_epoch_end(self, trainer, pl_module):
             if trainer.validating:
-                self.make_logging(
-                    pl_module, 'on_epoch_end', 8, on_steps=[False], on_epochs=self.choices, prob_bars=self.choices
-                )
+                self.make_logging(pl_module, 'on_epoch_end', on_steps=[False], on_epochs=[True], prob_bars=self.choices)
 
-        def on_validation_epoch_end(self, trainer, pl_module):
+        def on_validation_epoch_end(self, _, pl_module):
             self.make_logging(
-                pl_module,
-                'on_validation_epoch_end',
-                9,
-                on_steps=[False],
-                on_epochs=self.choices,
-                prob_bars=self.choices
+                pl_module, 'on_validation_epoch_end', on_steps=[False], on_epochs=[True], prob_bars=self.choices
             )
 
     class TestModel(BoringModel):
 
         def validation_step(self, batch, batch_idx):
-            output = self.layer(batch)
-            loss = self.loss(batch, output)
+            loss = super().validation_step(batch, batch_idx)['x']
             self.log('val_loss', loss)
 
-    max_epochs = 1
     model = TestModel()
     model.validation_epoch_end = None
-    test_callback = TestCallback()
-
+    cb = TestCallback()
     trainer = Trainer(
         default_root_dir=tmpdir,
         limit_train_batches=1,
         limit_val_batches=4,
-        limit_test_batches=0,
-        val_check_interval=0.,
         num_sanity_val_steps=0,
-        max_epochs=max_epochs,
-        callbacks=[test_callback],
+        max_epochs=1,
+        callbacks=[cb],
     )
     trainer.fit(model)
 
-    assert test_callback.funcs_called_count["on_epoch_start"] == 1
-    # assert test_callback.funcs_called_count["on_batch_start"] == 1
-    assert test_callback.funcs_called_count["on_batch_end"] == 1
-    assert test_callback.funcs_called_count["on_validation_start"] == 1
-    assert test_callback.funcs_called_count["on_validation_epoch_start"] == 1
-    # assert test_callback.funcs_called_count["on_validation_batch_start"] == 4
-    assert test_callback.funcs_called_count["on_epoch_end"] == 1
-    assert test_callback.funcs_called_count["on_validation_batch_end"] == 4
-    assert test_callback.funcs_called_count["on_validation_epoch_end"] == 1
-
-    # Make sure the func_name exists within callback_metrics. If not, we missed some
-    callback_metrics_keys = [*trainer.callback_metrics.keys()]
-    for func_name in test_callback.callback_funcs_called.keys():
-        is_in = False
-        for callback_metrics_key in callback_metrics_keys:
-            if func_name in callback_metrics_key:
-                is_in = True
-        assert is_in, (func_name, callback_metrics_keys)
-
-    # function used to describe expected return logic
-    def get_expected_output(func_attr, original_values):
+    assert cb.call_counter == {
+        'on_validation_batch_end': 4,
+        'on_validation_start': 1,
+        'on_epoch_start': 1,
+        'on_validation_epoch_start': 1,
+        'on_validation_epoch_end': 1,
+        'on_epoch_end': 1
+    }
 
-        if func_attr["on_epoch"] and not func_attr["on_step"]:
-            # Apply mean on values
-            expected_output = np.mean(original_values)
-        else:
-            # Keep the latest value
-            expected_output = np.max(original_values)
-        return expected_output
+    def get_expected(on_epoch, values):
+        reduction = np.mean if on_epoch else np.max
+        return reduction(values)
 
-    # Make sure the func_name output equals the average from all logged values when on_epoch true
-    # pop extra keys
-    trainer.callback_metrics.pop("debug_epoch")
-    trainer.callback_metrics.pop("val_loss")
-    for func_name, output_value in trainer.callback_metrics.items():
-        # not sure how to handle this now
-        if "epoch_0" in func_name:
-            func_name = '/'.join(func_name.split('/')[:-1])
+    for fx, value in trainer.callback_metrics.items():
+        actual = value.item()
+        if fx not in cb.logged_arguments:
             continue
+        on_epoch = cb.logged_arguments[fx]['on_epoch']
+        values = cb.logged_values[fx]
+        expected = get_expected(on_epoch, values)
+        assert actual == expected
 
-        if torch.is_tensor(output_value):
-            output_value = output_value.item()
-        # get creation attr
-        func_attr = test_callback.funcs_attr[func_name]
-
-        # retrived orginal logged values
-        original_values = test_callback.callback_funcs_called[func_attr["func_name"]]
-
-        # compute expected output and compare to actual one
-        expected_output = get_expected_output(func_attr, original_values)
-        assert float(output_value) == float(expected_output)
-
-    for func_name, func_attr in test_callback.funcs_attr.items():
-        if func_attr["prog_bar"] and (func_attr["on_step"] or func_attr["on_epoch"]) and not func_attr["forked"]:
-            assert func_name in trainer.logger_connector.progress_bar_metrics
-        else:
-            assert func_name not in trainer.logger_connector.progress_bar_metrics
+    for fx, attrs in cb.logged_arguments.items():
+        should_include = attrs["prog_bar"] and attrs["on_step"] ^ attrs["on_epoch"]
+        is_included = fx in trainer.logger_connector.progress_bar_metrics
+        assert is_included if should_include else not is_included
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 def test_log_works_in_test_callback(tmpdir):
     """
     Tests that log can be called within callback
@@ -624,7 +407,7 @@ def test_log_works_in_test_callback(tmpdir):
     class TestCallback(callbacks.Callback):
 
         # helpers
-        count = 1
+        count = 0
         choices = [False, True]
 
         # used to compute expected values
@@ -632,19 +415,15 @@ class TestCallback(callbacks.Callback):
         funcs_called_count = collections.defaultdict(int)
         funcs_attr = {}
 
-        def make_logging(self, pl_module, func_name, func_idx, on_steps=[], on_epochs=[], prob_bars=[]):
+        def make_logging(self, pl_module, func_name, on_steps, on_epochs, prob_bars):
             original_func_name = func_name[:]
             self.funcs_called_count[original_func_name] += 1
-            product = [on_steps, on_epochs, prob_bars]
-            for idx, t in enumerate(list(itertools.product(*product))):
-                # run logging
+
+            for idx, (on_step, on_epoch, prog_bar) in enumerate(itertools.product(on_steps, on_epochs, prob_bars)):
                 func_name = original_func_name[:]
-                on_step, on_epoch, prog_bar = t
-                custom_func_name = f"{func_idx}_{idx}_{func_name}"
+                custom_func_name = f"{idx}_{func_name}"
 
-                pl_module.log(
-                    custom_func_name, self.count * func_idx, on_step=on_step, on_epoch=on_epoch, prog_bar=prog_bar
-                )
+                pl_module.log(custom_func_name, self.count, on_step=on_step, on_epoch=on_epoch, prog_bar=prog_bar)
 
                 num_dl_ext = ''
                 if pl_module._current_dataloader_idx is not None:
@@ -653,12 +432,11 @@ def make_logging(self, pl_module, func_name, func_idx, on_steps=[], on_epochs=[]
                     func_name += num_dl_ext
 
                 # catch information for verification
-                self.callback_funcs_called[func_name].append([self.count * func_idx])
+                self.callback_funcs_called[func_name].append([self.count])
                 self.funcs_attr[custom_func_name + num_dl_ext] = {
                     "on_step": on_step,
                     "on_epoch": on_epoch,
                     "prog_bar": prog_bar,
-                    "forked": on_step and on_epoch,
                     "func_name": func_name
                 }
                 if on_step and on_epoch:
@@ -666,7 +444,6 @@ def make_logging(self, pl_module, func_name, func_idx, on_steps=[], on_epochs=[]
                         "on_step": True,
                         "on_epoch": False,
                         "prog_bar": prog_bar,
-                        "forked": False,
                         "func_name": func_name
                     }
 
@@ -674,140 +451,89 @@ def make_logging(self, pl_module, func_name, func_idx, on_steps=[], on_epochs=[]
                         "on_step": False,
                         "on_epoch": True,
                         "prog_bar": prog_bar,
-                        "forked": False,
                         "func_name": func_name
                     }
 
-        def on_test_start(self, trainer, pl_module):
-            self.make_logging(
-                pl_module, 'on_test_start', 1, on_steps=self.choices, on_epochs=self.choices, prob_bars=self.choices
-            )
+        def on_test_start(self, _, pl_module):
+            self.make_logging(pl_module, 'on_test_start', on_steps=[False], on_epochs=[True], prob_bars=self.choices)
 
-        def on_test_epoch_start(self, trainer, pl_module):
+        def on_test_epoch_start(self, _, pl_module):
             self.make_logging(
-                pl_module,
-                'on_test_epoch_start',
-                3,
-                on_steps=self.choices,
-                on_epochs=self.choices,
-                prob_bars=self.choices
+                pl_module, 'on_test_epoch_start', on_steps=[False], on_epochs=[True], prob_bars=self.choices
             )
 
-        def on_test_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
+        def on_test_batch_end(self, _, pl_module, *__):
             self.make_logging(
-                pl_module,
-                'on_test_batch_end',
-                5,
-                on_steps=self.choices,
-                on_epochs=self.choices,
-                prob_bars=self.choices
+                pl_module, 'on_test_batch_end', on_steps=self.choices, on_epochs=self.choices, prob_bars=self.choices
             )
 
-            # used to make sure aggregation works fine.
-            # we should obtain func[value * c for c in range(1, max_epochs * limit_test_batches)])
-            # with func = np.mean if on_epoch else func = np.max
-            self.count += 1
-
-        def on_test_epoch_end(self, trainer, pl_module):
+        def on_test_epoch_end(self, _, pl_module):
             self.make_logging(
-                pl_module, 'on_test_epoch_end', 7, on_steps=[False], on_epochs=self.choices, prob_bars=self.choices
+                pl_module, 'on_test_epoch_end', on_steps=[False], on_epochs=[True], prob_bars=self.choices
             )
 
-    max_epochs = 2
     num_dataloaders = 2
 
     class TestModel(BoringModel):
-
-        manual_mean = collections.defaultdict(list)
+        seen_losses = {i: [] for i in range(num_dataloaders)}
 
         def test_step(self, batch, batch_idx, dataloader_idx=None):
-            output = self.layer(batch)
-            loss = self.loss(batch, output)
+            loss = super().test_step(batch, batch_idx)['y']
             self.log('test_loss', loss)
-            self.manual_mean[str(dataloader_idx)].append(loss)
+            self.seen_losses[dataloader_idx].append(loss)
 
         def test_dataloader(self):
             return [torch.utils.data.DataLoader(RandomDataset(32, 64)) for _ in range(num_dataloaders)]
 
     model = TestModel()
     model.test_epoch_end = None
-    test_callback = TestCallback()
-
+    cb = TestCallback()
     trainer = Trainer(
         default_root_dir=tmpdir,
-        limit_train_batches=2,
-        limit_val_batches=0,
         limit_test_batches=2,
-        val_check_interval=0.,
         num_sanity_val_steps=0,
-        max_epochs=max_epochs,
-        callbacks=[test_callback],
+        max_epochs=2,
+        callbacks=[cb],
     )
     trainer.test(model)
 
-    assert test_callback.funcs_called_count["on_test_start"] == 1
-    assert test_callback.funcs_called_count["on_test_epoch_start"] == 1
-    assert test_callback.funcs_called_count["on_test_batch_end"] == 4
-    assert test_callback.funcs_called_count["on_test_epoch_end"] == 1
-
-    # Make sure the func_name exists within callback_metrics. If not, we missed some
-    callback_metrics_keys = [*trainer.callback_metrics.keys()]
+    assert cb.funcs_called_count["on_test_start"] == 1
+    assert cb.funcs_called_count["on_test_epoch_start"] == 1
+    assert cb.funcs_called_count["on_test_batch_end"] == 4
+    assert cb.funcs_called_count["on_test_epoch_end"] == 1
 
-    for func_name in test_callback.callback_funcs_called.keys():
+    callback_metrics_keys = list(trainer.callback_metrics)
+    for func_name in cb.callback_funcs_called.keys():
         is_in = False
         for callback_metrics_key in callback_metrics_keys:
             if func_name in callback_metrics_key:
                 is_in = True
         assert is_in, (func_name, callback_metrics_keys)
 
-    # function used to describe expected return logic
-    def get_expected_output(func_attr, original_values):
-        # Apply mean on values
-        if func_attr["on_epoch"] and not func_attr["on_step"]:
-            expected_output = np.mean(original_values)
-        else:
-            expected_output = np.max(original_values)
-        return expected_output
+    def get_expected(on_epoch, values):
+        reduction = np.mean if on_epoch else np.max
+        return reduction(values)
 
     # Make sure the func_name output equals the average from all logged values when on_epoch true
-    # pop extra keys
-    assert "debug_epoch" in trainer.callback_metrics
-    trainer.callback_metrics.pop("debug_epoch")
-
     for dl_idx in range(num_dataloaders):
         key = f"test_loss/dataloader_idx_{dl_idx}"
         assert key in trainer.callback_metrics
-        assert torch.stack(model.manual_mean[str(dl_idx)]).mean() == trainer.callback_metrics[key]
-        trainer.callback_metrics.pop(key)
+        assert torch.stack(model.seen_losses[dl_idx]).mean() == trainer.callback_metrics.pop(key)
 
     for func_name, output_value in trainer.callback_metrics.items():
-        # not sure how to handle this now
-        if "epoch_1" in func_name:
-            func_name = '/'.join(func_name.split('/')[:-1])
-            continue
-
-        if torch.is_tensor(output_value):
-            output_value = output_value.item()
+        output_value = output_value.item()
+        func_attr = cb.funcs_attr[func_name]
+        original_values = cb.callback_funcs_called[func_attr["func_name"]]
+        expected_output = get_expected(func_attr['on_epoch'], original_values)
+        assert output_value == expected_output
 
-        # get func attr
-        func_attr = test_callback.funcs_attr[func_name]
-
-        # retrived orginal logged values
-        original_values = test_callback.callback_funcs_called[func_attr["func_name"]]
-
-        # compute expected output and compare to actual one
-        expected_output = get_expected_output(func_attr, original_values)
-        assert float(output_value) == float(expected_output)
-
-    for func_name, func_attr in test_callback.funcs_attr.items():
-        if func_attr["prog_bar"] and (func_attr["on_step"] or func_attr["on_epoch"]) and not func_attr["forked"]:
-            assert func_name in trainer.logger_connector.progress_bar_metrics
-        else:
-            assert func_name not in trainer.logger_connector.progress_bar_metrics
+    for fx, attrs in cb.funcs_attr.items():
+        should_include = attrs["prog_bar"] and attrs["on_step"] ^ attrs["on_epoch"]
+        is_included = fx in trainer.logger_connector.progress_bar_metrics
+        assert is_included if should_include else not is_included
 
 
 @mock.patch("pytorch_lightning.loggers.TensorBoardLogger.log_metrics")
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 def test_validation_step_log_with_tensorboard(mock_log_metrics, tmpdir):
     """
     This tests make sure we properly log_metrics to loggers
@@ -860,54 +586,42 @@ def test_step(self, batch, batch_idx):
     expected_num_calls = 1 + 2 + 1 + 2 + 1
 
     assert len(mock_log_metrics.mock_calls) == expected_num_calls
-
     assert mock_log_metrics.mock_calls[0] == call({'hp_metric': -1}, 0)
 
     def get_metrics_at_idx(idx):
         mock_calls = list(mock_log_metrics.mock_calls)
         if isinstance(mock_calls[idx].kwargs, dict):
             return mock_calls[idx].kwargs["metrics"]
-        else:
-            return mock_calls[idx][2]["metrics"]
+        return mock_calls[idx][2]["metrics"]
 
-    expected = ['valid_loss_0_step', 'valid_loss_2', 'global_step']
-    assert sorted(get_metrics_at_idx(1)) == sorted(expected)
-    assert sorted(get_metrics_at_idx(2)) == sorted(expected)
+    expected = {'valid_loss_0_step', 'valid_loss_2'}
+    assert set(get_metrics_at_idx(1)) == expected
+    assert set(get_metrics_at_idx(2)) == expected
 
-    expected = model.val_losses[2]
-    assert get_metrics_at_idx(1)["valid_loss_0_step"] == expected
-    expected = model.val_losses[3]
-    assert get_metrics_at_idx(2)["valid_loss_0_step"] == expected
+    assert get_metrics_at_idx(1)["valid_loss_0_step"] == model.val_losses[2]
+    assert get_metrics_at_idx(2)["valid_loss_0_step"] == model.val_losses[3]
 
-    expected = ['valid_loss_0_epoch', 'valid_loss_1', 'epoch', 'global_step']
-    assert sorted(get_metrics_at_idx(3)) == sorted(expected)
+    assert set(get_metrics_at_idx(3)) == {'valid_loss_0_epoch', 'valid_loss_1', 'epoch'}
 
-    expected = torch.stack(model.val_losses[2:4]).mean()
-    assert get_metrics_at_idx(3)["valid_loss_1"] == expected
-    expected = ['valid_loss_0_step', 'valid_loss_2', 'global_step']
+    assert get_metrics_at_idx(3)["valid_loss_1"] == torch.stack(model.val_losses[2:4]).mean()
 
-    assert sorted(get_metrics_at_idx(4)) == sorted(expected)
-    assert sorted(get_metrics_at_idx(5)) == sorted(expected)
+    expected = {'valid_loss_0_step', 'valid_loss_2'}
+    assert set(get_metrics_at_idx(4)) == expected
+    assert set(get_metrics_at_idx(5)) == expected
 
-    expected = model.val_losses[4]
-    assert get_metrics_at_idx(4)["valid_loss_0_step"] == expected
-    expected = model.val_losses[5]
-    assert get_metrics_at_idx(5)["valid_loss_0_step"] == expected
+    assert get_metrics_at_idx(4)["valid_loss_0_step"] == model.val_losses[4]
+    assert get_metrics_at_idx(5)["valid_loss_0_step"] == model.val_losses[5]
 
-    expected = ['valid_loss_0_epoch', 'valid_loss_1', 'epoch', 'global_step']
-    assert sorted(get_metrics_at_idx(6)) == sorted(expected)
+    assert set(get_metrics_at_idx(6)) == {'valid_loss_0_epoch', 'valid_loss_1', 'epoch'}
 
-    expected = torch.stack(model.val_losses[4:]).mean()
-    assert get_metrics_at_idx(6)["valid_loss_1"] == expected
+    assert get_metrics_at_idx(6)["valid_loss_1"] == torch.stack(model.val_losses[4:]).mean()
 
     results = trainer.test(model)
-    expected_callback_metrics = {
+    assert set(trainer.callback_metrics) == {
         'train_loss',
         'valid_loss_0_epoch',
         'valid_loss_0',
-        'debug_epoch',
         'valid_loss_1',
         'test_loss',
     }
-    assert set(trainer.callback_metrics) == expected_callback_metrics
-    assert set(results[0]) == {'test_loss', 'debug_epoch'}
+    assert set(results[0]) == {'test_loss'}
diff --git a/tests/trainer/logging_/test_logger_connector.py b/tests/trainer/logging_/test_logger_connector.py
index e0e1c3cdf42ec..592fde1569344 100644
--- a/tests/trainer/logging_/test_logger_connector.py
+++ b/tests/trainer/logging_/test_logger_connector.py
@@ -11,12 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""
-Tests to ensure that the training loop works with a dict (1.0)
-"""
-import os
-from copy import deepcopy
-from typing import Any, Callable
 from unittest import mock
 
 import pytest
@@ -26,251 +20,14 @@
 
 from pytorch_lightning import LightningModule
 from pytorch_lightning.callbacks.base import Callback
-from pytorch_lightning.core.step_result import Result
 from pytorch_lightning.trainer import Trainer
 from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import FxValidator
-from pytorch_lightning.trainer.connectors.logger_connector.metrics_holder import MetricsHolder
+from pytorch_lightning.trainer.connectors.logger_connector.result import MetricSource, ResultCollection
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel, RandomDataset
 from tests.helpers.runif import RunIf
 
 
-def decorator_with_arguments(fx_name: str = '', hook_fx_name: str = None) -> Callable:
-
-    def decorator(func: Callable) -> Callable:
-
-        def wrapper(self, *args, **kwargs) -> Any:
-            # Set information
-            self._current_fx_name = fx_name
-            self._current_hook_fx_name = hook_fx_name
-            self._results = Result()
-
-            result = func(self, *args, **kwargs)
-
-            # cache metrics
-            self.trainer.logger_connector.cache_logged_metrics()
-            return result
-
-        return wrapper
-
-    return decorator
-
-
-def test__logger_connector__epoch_result_store__train(tmpdir):
-    """
-    Tests that LoggerConnector will properly capture logged information
-    and reduce them
-    """
-
-    class TestModel(BoringModel):
-
-        train_losses = []
-
-        @decorator_with_arguments(fx_name="training_step")
-        def training_step(self, batch, batch_idx):
-            output = self.layer(batch)
-            loss = self.loss(batch, output)
-
-            self.train_losses.append(loss)
-
-            self.log("train_loss", loss, on_step=True, on_epoch=True)
-
-            return {"loss": loss}
-
-        def training_step_end(self, *_):
-            self.train_results = deepcopy(self.trainer.logger_connector.cached_results)
-
-    model = TestModel()
-    model.training_epoch_end = None
-    model.val_dataloader = None
-
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        limit_train_batches=2,
-        limit_val_batches=4,
-        max_epochs=1,
-        log_every_n_steps=1,
-        weights_summary=None,
-    )
-    trainer.fit(model)
-
-    train_results = model.train_results
-
-    assert len(train_results(fx_name="training_step", dl_idx=0, opt_idx=0)) == 2
-    generated = train_results(fx_name="training_step", dl_idx=0, opt_idx=0, batch_idx=0, split_idx=0)["train_loss"]
-    assert generated == model.train_losses[0]
-    generated = train_results(fx_name="training_step", dl_idx=0, opt_idx=0, batch_idx=1, split_idx=0)["train_loss"]
-    assert generated == model.train_losses[1]
-
-    assert train_results.has_reduced is not True
-
-    train_results.has_batch_loop_finished = True
-
-    assert train_results.has_reduced is True
-
-    generated = train_results(fx_name="training_step", dl_idx=0, opt_idx=0, reduced=True)['train_loss_epoch'].item()
-    excepted = torch.stack(model.train_losses).mean().item()
-    assert generated == excepted
-
-
-def test__logger_connector__epoch_result_store__train__tbptt(tmpdir):
-    """
-    Tests that LoggerConnector will properly capture logged information with ttbt
-    and reduce them
-    """
-    truncated_bptt_steps = 2
-    sequence_size = 30
-    batch_size = 30
-
-    x_seq = torch.rand(batch_size, sequence_size, 1)
-    y_seq_list = torch.rand(batch_size, sequence_size, 1).tolist()
-
-    class MockSeq2SeqDataset(torch.utils.data.Dataset):
-
-        def __getitem__(self, i):
-            return x_seq, y_seq_list
-
-        def __len__(self):
-            return 1
-
-    class TestModel(BoringModel):
-
-        train_losses = []
-
-        def __init__(self):
-            super().__init__()
-            self.test_hidden = None
-            self.layer = torch.nn.Linear(2, 2)
-
-        @decorator_with_arguments(fx_name="training_step")
-        def training_step(self, batch, batch_idx, hiddens):
-            assert hiddens == self.test_hidden, "Hidden state not persistent between tbptt steps"
-            self.test_hidden = torch.rand(1)
-
-            x_tensor, y_list = batch
-            assert x_tensor.shape[1] == truncated_bptt_steps, "tbptt split Tensor failed"
-
-            y_tensor = torch.tensor(y_list, dtype=x_tensor.dtype)
-            assert y_tensor.shape[1] == truncated_bptt_steps, "tbptt split list failed"
-
-            pred = self(x_tensor.view(batch_size, truncated_bptt_steps))
-            loss = torch.nn.functional.mse_loss(pred, y_tensor.view(batch_size, truncated_bptt_steps))
-
-            self.train_losses.append(loss)
-
-            self.log('a', loss, on_epoch=True)
-
-            return {'loss': loss, 'hiddens': self.test_hidden}
-
-        def on_train_epoch_start(self) -> None:
-            self.test_hidden = None
-
-        def train_dataloader(self):
-            return torch.utils.data.DataLoader(
-                dataset=MockSeq2SeqDataset(),
-                batch_size=batch_size,
-                shuffle=False,
-                sampler=None,
-            )
-
-        def training_step_end(self, training_step_output):
-            self.train_results = deepcopy(self.trainer.logger_connector.cached_results)
-            # must return
-            return training_step_output
-
-    model = TestModel()
-    model.training_epoch_end = None
-    model.example_input_array = torch.randn(5, truncated_bptt_steps)
-
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        limit_train_batches=10,
-        limit_val_batches=0,
-        truncated_bptt_steps=truncated_bptt_steps,
-        max_epochs=1,
-        log_every_n_steps=1,
-        weights_summary=None,
-    )
-    trainer.fit(model)
-
-    train_results = model.train_results
-
-    generated = train_results(fx_name="training_step", dl_idx=0, opt_idx=0, batch_idx=0)
-    assert len(generated) == len(model.train_losses)
-
-    # assert reduction didn't happen yet
-    assert train_results.has_reduced is False
-
-    # Launch reduction
-    train_results.has_batch_loop_finished = True
-
-    # assert reduction did happen
-    assert train_results.has_reduced is True
-
-    generated = train_results(fx_name="training_step", dl_idx=0, opt_idx=0, reduced=True)['a_epoch'].item()
-    assert generated == torch.stack(model.train_losses).mean().item()
-
-
-@pytest.mark.parametrize('num_dataloaders', [1, 2])
-def test__logger_connector__epoch_result_store__test_multi_dataloaders(tmpdir, num_dataloaders):
-    """
-    Tests that LoggerConnector will properly capture logged information in multi dataloaders scenario
-    """
-
-    class TestModel(BoringModel):
-        test_losses = {dl_idx: [] for dl_idx in range(num_dataloaders)}
-
-        @decorator_with_arguments(fx_name="test_step")
-        def test_step(self, batch, batch_idx, dl_idx=0):
-            output = self.layer(batch)
-            loss = self.loss(batch, output)
-            self.test_losses[dl_idx].append(loss)
-            self.log("test_loss", loss, on_step=True, on_epoch=True)
-            return {"test_loss": loss}
-
-        def on_test_batch_end(self, *args, **kwargs):
-            # save objects as it will be reset at the end of epoch.
-            self.batch_results = deepcopy(self.trainer.logger_connector.cached_results)
-
-        def on_test_epoch_end(self):
-            # save objects as it will be reset at the end of epoch.
-            self.reduce_results = deepcopy(self.trainer.logger_connector.cached_results)
-
-        def test_dataloader(self):
-            return [super().test_dataloader()] * num_dataloaders
-
-    model = TestModel()
-    model.test_epoch_end = None
-    limit_test_batches = 4
-
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        limit_train_batches=0,
-        limit_val_batches=0,
-        limit_test_batches=limit_test_batches,
-        max_epochs=1,
-        log_every_n_steps=1,
-        weights_summary=None,
-    )
-    trainer.test(model)
-
-    test_results = model.batch_results
-
-    generated = test_results(fx_name="test_step")
-    assert len(generated) == num_dataloaders
-
-    for dl_idx in range(num_dataloaders):
-        generated = test_results(fx_name="test_step", dl_idx=dl_idx)
-        assert len(generated) == limit_test_batches
-
-    test_results = model.reduce_results
-
-    for dl_idx in range(num_dataloaders):
-        expected = torch.stack(model.test_losses[dl_idx]).mean()
-        generated = test_results(fx_name="test_step", dl_idx=dl_idx, reduced=True)["test_loss_epoch"]
-        torch.testing.assert_allclose(generated, expected)
-
-
 def test_fx_validator(tmpdir):
     funcs_name = sorted([f for f in dir(Callback) if not f.startswith('_')])
 
@@ -360,7 +117,8 @@ def test_fx_validator(tmpdir):
         # This summarizes where and what is currently possible to log using `self.log`
         is_stage = "train" in func_name or "test" in func_name or "validation" in func_name
         is_start = "start" in func_name or "batch" in func_name
-        on_step = is_stage and is_start
+        is_epoch = "epoch" in func_name
+        on_step = is_stage and not is_start and not is_epoch
         on_epoch = True
         # creating allowed condition
         allowed = (
@@ -444,56 +202,6 @@ def test_dataloader(self):
     trainer.test(model, ckpt_path=None)
 
 
-@pytest.mark.parametrize('to_float', [False, True])
-def test_metrics_holder(to_float, tmpdir):
-
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    preds = torch.tensor([[0.9, 0.1]], device=device)
-
-    def is_float(value: Any) -> bool:
-        return isinstance(value, float)
-
-    excepted_function = is_float if to_float else torch.is_tensor
-    targets = torch.tensor([1], device=device)
-    acc = Accuracy().to(device)
-    metric_holder = MetricsHolder(to_float=to_float)
-    metric_holder.update({
-        "x": 1,
-        "y": torch.tensor(2),
-        "z": acc(preds, targets),
-    })
-    metric_holder.convert(device)
-    metrics = metric_holder.metrics
-    assert excepted_function(metrics["x"])
-    assert excepted_function(metrics["y"])
-    assert excepted_function(metrics["z"])
-
-
-def test_metric_holder_raises(tmpdir):
-    """Check that an error is raised when trying to convert non-scalar tensors"""
-
-    class TestModel(BoringModel):
-
-        def validation_step(self, batch, *args, **kwargs):
-            output = self(batch)
-            self.log('test', output)
-
-        def test_step(self, *args, **kwargs):
-            return self.validation_step(*args, **kwargs)
-
-    model = TestModel()
-    model.validation_epoch_end = None
-    model.test_epoch_end = None
-
-    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
-
-    match = "The metric `test` does not contain a single element"
-    with pytest.raises(MisconfigurationException, match=match):
-        trainer.validate(model)
-    with pytest.raises(MisconfigurationException, match=match):
-        trainer.test(model)
-
-
 def test_can_return_tensor_with_more_than_one_element(tmpdir):
     """Ensure {validation,test}_step return values are not included as callback metrics. #6623"""
 
@@ -562,7 +270,7 @@ def validation_step(self, *args, **kwargs):
     model = TestModel()
     model.validation_epoch_end = None
 
-    trainer = Trainer(default_root_dir=tmpdir, max_steps=5)
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=2)
     trainer.fit(model)
     logged = trainer.logged_metrics
 
@@ -575,33 +283,6 @@ def validation_step(self, *args, **kwargs):
         assert 'val_loss_custom_naming_1' in logged
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-def test_logged_metrics_steps(tmpdir):
-
-    class TestModel(BoringModel):
-
-        def validation_step(self, batch, batch_idx):
-            loss_val = torch.randn(1)
-            self.log('val_loss', loss_val)
-            return loss_val
-
-    model = TestModel()
-    model.validation_epoch_end = None
-
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        limit_train_batches=2,
-        limit_val_batches=2,
-        max_epochs=2,
-        log_every_n_steps=1,
-        weights_summary=None,
-    )
-    trainer.fit(model)
-
-    assert trainer.dev_debugger.logged_metrics[0]['global_step'] == 1
-    assert trainer.dev_debugger.logged_metrics[1]['global_step'] == 3
-
-
 def test_metrics_reset(tmpdir):
     """Tests that metrics are reset correctly after the end of the train/val/test epoch."""
 
@@ -611,48 +292,63 @@ def __init__(self):
             super().__init__()
             self.layer = torch.nn.Linear(32, 1)
 
-            for stage in ['train', 'val', 'test']:
-                acc = Accuracy()
-                acc.reset = mock.Mock(side_effect=acc.reset)
-                ap = AveragePrecision(num_classes=1, pos_label=1)
-                ap.reset = mock.Mock(side_effect=ap.reset)
-                self.add_module(f"acc_{stage}", acc)
-                self.add_module(f"ap_{stage}", ap)
+        def _create_metrics(self):
+            acc = Accuracy()
+            acc.reset = mock.Mock(side_effect=acc.reset)
+            ap = AveragePrecision(num_classes=1, pos_label=1)
+            ap.reset = mock.Mock(side_effect=ap.reset)
+            return acc, ap
+
+        def setup(self, stage):
+            fn = stage
+            if fn == 'fit':
+                for stage in ('train', 'validate'):
+                    acc, ap = self._create_metrics()
+                    self.add_module(f"acc_{fn}_{stage}", acc)
+                    self.add_module(f"ap_{fn}_{stage}", ap)
+            else:
+                acc, ap = self._create_metrics()
+                stage = self.trainer.state.stage
+                self.add_module(f"acc_{fn}_{stage}", acc)
+                self.add_module(f"ap_{fn}_{stage}", ap)
 
         def forward(self, x):
             return self.layer(x)
 
-        def _step(self, stage, batch):
-            labels = (batch.detach().sum(1) > 0).float()  # Fake some targets
-            logits = self.forward(batch)
-            loss = torch.nn.functional.binary_cross_entropy_with_logits(logits, labels.unsqueeze(1))
-            probs = torch.sigmoid(logits.detach())
-            self.log(f"loss/{stage}", loss)
+        def _step(self, batch):
+            fn, stage = self.trainer.state.fn, self.trainer.state.stage
+
+            logits = self(batch)
+            loss = logits.sum()
+            self.log(f"loss/{fn}_{stage}", loss)
 
-            acc = self._modules[f"acc_{stage}"]
-            ap = self._modules[f"ap_{stage}"]
+            acc = self._modules[f"acc_{fn}_{stage}"]
+            ap = self._modules[f"ap_{fn}_{stage}"]
 
-            labels_int = labels.to(torch.long)
-            acc(probs.flatten(), labels_int)
-            ap(probs.flatten(), labels_int)
+            preds = torch.rand(len(batch))  # Fake preds
+            labels = torch.randint(0, 1, [len(batch)])  # Fake targets
+            acc(preds, labels)
+            ap(preds, labels)
 
             # Metric.forward calls reset so reset the mocks here
             acc.reset.reset_mock()
             ap.reset.reset_mock()
 
-            self.log(f"{stage}/accuracy", acc)
-            self.log(f"{stage}/ap", ap)
+            self.log(f"acc/{fn}_{stage}", acc)
+            self.log(f"ap/{fn}_{stage}", ap)
 
             return loss
 
         def training_step(self, batch, batch_idx, *args, **kwargs):
-            return self._step('train', batch)
+            return self._step(batch)
 
         def validation_step(self, batch, batch_idx, *args, **kwargs):
-            return self._step('val', batch)
+            if self.trainer.sanity_checking:
+                return
+            return self._step(batch)
 
         def test_step(self, batch, batch_idx, *args, **kwargs):
-            return self._step('test', batch)
+            return self._step(batch)
 
         def configure_optimizers(self):
             optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
@@ -668,31 +364,11 @@ def val_dataloader(self):
         def test_dataloader(self):
             return DataLoader(RandomDataset(32, 64))
 
-        def _assert_epoch_end(self, stage):
-            acc = self._modules[f"acc_{stage}"]
-            ap = self._modules[f"ap_{stage}"]
-
-            acc.reset.asset_not_called()
-            ap.reset.assert_not_called()
-
-        def on_train_epoch_end(self):
-            self._assert_epoch_end('train')
-
-        def on_validation_epoch_end(self):
-            self._assert_epoch_end('val')
-
-        def on_test_epoch_end(self):
-            self._assert_epoch_end('test')
-
-    def _assert_called(model, stage):
-        acc = model._modules[f"acc_{stage}"]
-        ap = model._modules[f"ap_{stage}"]
-
+    def _assert_called(model, fn, stage):
+        acc = model._modules[f"acc_{fn}_{stage}"]
+        ap = model._modules[f"ap_{fn}_{stage}"]
         acc.reset.assert_called_once()
-        acc.reset.reset_mock()
-
         ap.reset.assert_called_once()
-        ap.reset.reset_mock()
 
     model = TestModel()
     trainer = Trainer(
@@ -702,14 +378,126 @@ def _assert_called(model, stage):
         limit_test_batches=2,
         max_epochs=1,
         progress_bar_refresh_rate=0,
+        num_sanity_val_steps=2,
+        checkpoint_callback=False,
     )
 
     trainer.fit(model)
-    _assert_called(model, 'train')
-    _assert_called(model, 'val')
+    _assert_called(model, 'fit', 'train')
+    _assert_called(model, 'fit', 'validate')
 
     trainer.validate(model)
-    _assert_called(model, 'val')
+    _assert_called(model, 'validate', 'validate')
 
     trainer.test(model)
-    _assert_called(model, 'test')
+    _assert_called(model, 'test', 'test')
+
+
+def test_result_collection_on_tensor_with_mean_reduction():
+    result_collection = ResultCollection(True, torch.device("cpu"))
+    product = [(True, True), (False, True), (True, False), (False, False)]
+    values = torch.arange(1, 10).float()  # need to convert to float() due to precision issues using torch 1.4
+    batches = values * values
+
+    for i, v in enumerate(values):
+        for prog_bar in [False, True]:
+            for logger in [False, True]:
+                for on_step, on_epoch in product:
+                    name = "loss"
+                    if on_step:
+                        name += "_on_step"
+                    if on_epoch:
+                        name += "_on_epoch"
+                    if prog_bar:
+                        name += "_prog_bar"
+                    if logger:
+                        name += "_logger"
+                    result_collection.log(
+                        "training_step",
+                        name,
+                        v,
+                        on_step=on_step,
+                        on_epoch=on_epoch,
+                        batch_size=batches[i],
+                        prog_bar=prog_bar,
+                        logger=logger,
+                    )
+
+    total_value = sum(values * batches)
+    total_batches = sum(batches)
+    assert result_collection["training_step.loss_on_step_on_epoch"].value == total_value
+    assert result_collection["training_step.loss_on_step_on_epoch"].cumulated_batch_size == total_batches
+
+    batch_metrics = result_collection.metrics(True)
+    max_ = max(values)
+    assert batch_metrics[MetricSource.PBAR] == {
+        'loss_on_step_on_epoch_prog_bar_step': max_,
+        'loss_on_step_on_epoch_prog_bar_logger_step': max_,
+        'loss_on_step_prog_bar': max_,
+        'loss_on_step_prog_bar_logger': max_,
+    }
+    assert batch_metrics[MetricSource.LOG] == {
+        'loss_on_step_on_epoch_logger_step': max_,
+        'loss_on_step_logger': max_,
+        'loss_on_step_on_epoch_prog_bar_logger_step': max_,
+        'loss_on_step_prog_bar_logger': max_,
+    }
+    assert batch_metrics[MetricSource.CALLBACK] == {
+        'loss_on_step': max_,
+        'loss_on_step_logger': max_,
+        'loss_on_step_on_epoch': max_,
+        'loss_on_step_on_epoch_logger': max_,
+        'loss_on_step_on_epoch_logger_step': max_,
+        'loss_on_step_on_epoch_prog_bar': max_,
+        'loss_on_step_on_epoch_prog_bar_logger': max_,
+        'loss_on_step_on_epoch_prog_bar_logger_step': max_,
+        'loss_on_step_on_epoch_prog_bar_step': max_,
+        'loss_on_step_on_epoch_step': max_,
+        'loss_on_step_prog_bar': max_,
+        'loss_on_step_prog_bar_logger': max_,
+    }
+
+    epoch_metrics = result_collection.metrics(False)
+    mean = total_value / total_batches
+    assert epoch_metrics[MetricSource.PBAR] == {
+        'loss_on_epoch_prog_bar': mean,
+        'loss_on_epoch_prog_bar_logger': mean,
+        'loss_on_step_on_epoch_prog_bar_epoch': mean,
+        'loss_on_step_on_epoch_prog_bar_logger_epoch': mean,
+    }
+    assert epoch_metrics[MetricSource.LOG] == {
+        'loss_on_epoch_logger': mean,
+        'loss_on_epoch_prog_bar_logger': mean,
+        'loss_on_step_on_epoch_logger_epoch': mean,
+        'loss_on_step_on_epoch_prog_bar_logger_epoch': mean
+    }
+    assert epoch_metrics[MetricSource.CALLBACK] == {
+        'loss_on_epoch': mean,
+        'loss_on_epoch_logger': mean,
+        'loss_on_epoch_prog_bar': mean,
+        'loss_on_epoch_prog_bar_logger': mean,
+        'loss_on_step_on_epoch': mean,
+        'loss_on_step_on_epoch_epoch': mean,
+        'loss_on_step_on_epoch_logger': mean,
+        'loss_on_step_on_epoch_logger_epoch': mean,
+        'loss_on_step_on_epoch_prog_bar': mean,
+        'loss_on_step_on_epoch_prog_bar_epoch': mean,
+        'loss_on_step_on_epoch_prog_bar_logger': mean,
+        'loss_on_step_on_epoch_prog_bar_logger_epoch': mean
+    }
+
+
+def test_logged_metrics_has_logged_epoch_value(tmpdir):
+
+    class TestModel(BoringModel):
+
+        def training_step(self, batch, batch_idx):
+            self.log('epoch', -batch_idx, logger=True)
+            return super().training_step(batch, batch_idx)
+
+    model = TestModel()
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=2)
+    trainer.fit(model)
+
+    # should not get overridden if logged manually
+    assert trainer.logged_metrics == {'epoch': -1}
diff --git a/tests/trainer/logging_/test_train_loop_logging.py b/tests/trainer/logging_/test_train_loop_logging.py
index 546fb9ff8fdac..b26e3fc83d25c 100644
--- a/tests/trainer/logging_/test_train_loop_logging.py
+++ b/tests/trainer/logging_/test_train_loop_logging.py
@@ -12,74 +12,67 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-Tests to ensure that the training loop works with a dict (1.0)
+Test logging in the training loop
 """
 
 import collections
 import itertools
-import os
-from unittest import mock
+from re import escape
 
 import numpy as np
 import pytest
 import torch
-from torch.utils.data import Dataset
+from torchmetrics import Accuracy
 
 import pytorch_lightning as pl
 from pytorch_lightning import callbacks, Trainer
 from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
-from pytorch_lightning.core.lightning import LightningModule
-from tests.helpers.boring_model import BoringModel, RandomDictDataset, RandomDictStringDataset
-from tests.helpers.deterministic_model import DeterministicModel
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from tests.helpers.boring_model import BoringModel, RandomDictDataset
 from tests.helpers.runif import RunIf
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 def test__training_step__log(tmpdir):
     """
     Tests that only training_step can be used
     """
 
-    class TestModel(DeterministicModel):
+    class TestModel(BoringModel):
 
         def training_step(self, batch, batch_idx):
-            acc = self.step(batch, batch_idx)
-            acc = acc + batch_idx
+            out = super().training_step(batch, batch_idx)
+            loss = out['loss']
 
             # -----------
             # default
             # -----------
-            self.log('default', acc)
+            self.log('default', loss)
 
             # -----------
             # logger
             # -----------
             # on_step T on_epoch F
-            self.log('l_s', acc, on_step=True, on_epoch=False, prog_bar=False, logger=True)
+            self.log('l_s', loss, on_step=True, on_epoch=False, prog_bar=False, logger=True)
 
             # on_step F on_epoch T
-            self.log('l_e', acc, on_step=False, on_epoch=True, prog_bar=False, logger=True)
+            self.log('l_e', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
 
             # on_step T on_epoch T
-            self.log('l_se', acc, on_step=True, on_epoch=True, prog_bar=False, logger=True)
+            self.log('l_se', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True)
 
             # -----------
             # pbar
             # -----------
             # on_step T on_epoch F
-            self.log('p_s', acc, on_step=True, on_epoch=False, prog_bar=True, logger=False)
+            self.log('p_s', loss, on_step=True, on_epoch=False, prog_bar=True, logger=False)
 
             # on_step F on_epoch T
-            self.log('p_e', acc, on_step=False, on_epoch=True, prog_bar=True, logger=False)
+            self.log('p_e', loss, on_step=False, on_epoch=True, prog_bar=True, logger=False)
 
             # on_step T on_epoch T
-            self.log('p_se', acc, on_step=True, on_epoch=True, prog_bar=True, logger=False)
-
-            self.training_step_called = True
-            return acc
+            self.log('p_se', loss, on_step=True, on_epoch=True, prog_bar=True, logger=False)
 
-        def backward(self, loss, optimizer, optimizer_idx):
-            return LightningModule.backward(self, loss, optimizer, optimizer_idx)
+            return loss
 
     model = TestModel()
     model.val_dataloader = None
@@ -95,14 +88,8 @@ def backward(self, loss, optimizer, optimizer_idx):
     )
     trainer.fit(model)
 
-    # make sure correct steps were called
-    assert model.training_step_called
-    assert not model.training_step_end_called
-    assert not model.training_epoch_end_called
-
-    # make sure all the metrics are available for callbacks
-    logged_metrics = set(trainer.logged_metrics.keys())
-    expected_logged_metrics = {
+    logged_metrics = set(trainer.logged_metrics)
+    assert logged_metrics == {
         'epoch',
         'default',
         'l_e',
@@ -110,51 +97,36 @@ def backward(self, loss, optimizer, optimizer_idx):
         'l_se_step',
         'l_se_epoch',
     }
-    assert logged_metrics == expected_logged_metrics
 
-    pbar_metrics = set(trainer.progress_bar_metrics.keys())
-    expected_pbar_metrics = {
+    pbar_metrics = set(trainer.progress_bar_metrics)
+    assert pbar_metrics == {
         'p_e',
         'p_s',
         'p_se_step',
         'p_se_epoch',
     }
-    assert pbar_metrics == expected_pbar_metrics
 
-    callback_metrics = set(trainer.callback_metrics.keys())
-    callback_metrics.remove('debug_epoch')
-    expected_callback_metrics = set()
-    expected_callback_metrics = expected_callback_metrics.union(logged_metrics)
-    expected_callback_metrics = expected_callback_metrics.union(pbar_metrics)
-    expected_callback_metrics.update({'p_se', 'l_se'})
-    expected_callback_metrics.remove('epoch')
-    assert callback_metrics == expected_callback_metrics
+    assert set(trainer.callback_metrics) == (logged_metrics | pbar_metrics | {'p_se', 'l_se'}) - {'epoch'}
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 def test__training_step__epoch_end__log(tmpdir):
     """
-    Tests that only training_step can be used
+    Tests that training_epoch_end can log
     """
 
-    class TestModel(DeterministicModel):
+    class TestModel(BoringModel):
 
         def training_step(self, batch, batch_idx):
-            self.training_step_called = True
-            acc = self.step(batch, batch_idx)
-            acc = acc + batch_idx
-            self.log('a', acc, on_step=True, on_epoch=True)
-            self.log_dict({'a1': acc, 'a2': acc})
-            return acc
+            out = super().training_step(batch, batch_idx)
+            loss = out['loss']
+            self.log('a', loss, on_step=True, on_epoch=True)
+            self.log_dict({'a1': loss, 'a2': loss})
+            return out
 
         def training_epoch_end(self, outputs):
-            self.training_epoch_end_called = True
             self.log('b1', outputs[0]['loss'])
             self.log('b', outputs[0]['loss'], on_epoch=True, prog_bar=True, logger=True)
 
-        def backward(self, loss, optimizer, optimizer_idx):
-            return LightningModule.backward(self, loss, optimizer, optimizer_idx)
-
     model = TestModel()
     model.val_dataloader = None
 
@@ -168,52 +140,33 @@ def backward(self, loss, optimizer, optimizer_idx):
     )
     trainer.fit(model)
 
-    # make sure correct steps were called
-    assert model.training_step_called
-    assert not model.training_step_end_called
-    assert model.training_epoch_end_called
-
-    # make sure all the metrics are available for callbacks
-    logged_metrics = set(trainer.logged_metrics.keys())
-    expected_logged_metrics = {'epoch', 'a_step', 'a_epoch', 'b', 'b1', 'a1', 'a2'}
-    assert logged_metrics == expected_logged_metrics
+    logged_metrics = set(trainer.logged_metrics)
+    assert logged_metrics == {'epoch', 'a_step', 'a_epoch', 'b', 'b1', 'a1', 'a2'}
 
-    pbar_metrics = set(trainer.progress_bar_metrics.keys())
-    expected_pbar_metrics = {'b'}
-    assert pbar_metrics == expected_pbar_metrics
+    pbar_metrics = set(trainer.progress_bar_metrics)
+    assert pbar_metrics == {'b'}
 
-    callback_metrics = set(trainer.callback_metrics.keys())
-    callback_metrics.remove('debug_epoch')
-    expected_callback_metrics = set()
-    expected_callback_metrics = expected_callback_metrics.union(logged_metrics)
-    expected_callback_metrics = expected_callback_metrics.union(pbar_metrics)
-    expected_callback_metrics.remove('epoch')
-    expected_callback_metrics.add('a')
-    assert callback_metrics == expected_callback_metrics
+    assert set(trainer.callback_metrics) == (logged_metrics | pbar_metrics | {'a'}) - {'epoch'}
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 @pytest.mark.parametrize(['batches', 'log_interval', 'max_epochs'], [(1, 1, 1), (64, 32, 2)])
 def test__training_step__step_end__epoch_end__log(tmpdir, batches, log_interval, max_epochs):
     """
-    Tests that only training_step can be used
+    Tests that training_step_end and training_epoch_end can log
     """
 
     class TestModel(BoringModel):
 
         def training_step(self, batch, batch_idx):
-            self.training_step_called = True
             loss = self.step(batch[0])
             self.log('a', loss, on_step=True, on_epoch=True)
             return loss
 
         def training_step_end(self, out):
-            self.training_step_end_called = True
             self.log('b', out, on_step=True, on_epoch=True, prog_bar=True, logger=True)
             return out
 
         def training_epoch_end(self, outputs):
-            self.training_epoch_end_called = True
             self.log('c', outputs[0]['loss'], on_epoch=True, prog_bar=True, logger=True)
             self.log('d/e/f', 2)
 
@@ -230,34 +183,23 @@ def training_epoch_end(self, outputs):
     )
     trainer.fit(model)
 
-    # make sure correct steps were called
-    assert model.training_step_called
-    assert model.training_step_end_called
-    assert model.training_epoch_end_called
-
     # make sure all the metrics are available for callbacks
-    logged_metrics = set(trainer.logged_metrics.keys())
-    expected_logged_metrics = {'a_step', 'a_epoch', 'b_step', 'b_epoch', 'c', 'd/e/f', 'epoch'}
-    assert logged_metrics == expected_logged_metrics
-
-    pbar_metrics = set(trainer.progress_bar_metrics.keys())
-    expected_pbar_metrics = {'c', 'b_epoch', 'b_step'}
-    assert pbar_metrics == expected_pbar_metrics
+    logged_metrics = set(trainer.logged_metrics)
+    assert logged_metrics == {'a_step', 'a_epoch', 'b_step', 'b_epoch', 'c', 'd/e/f', 'epoch'}
 
-    callback_metrics = set(trainer.callback_metrics.keys())
-    callback_metrics.remove('debug_epoch')
-    expected_callback_metrics = set()
-    expected_callback_metrics = expected_callback_metrics.union(logged_metrics)
-    expected_callback_metrics = expected_callback_metrics.union(pbar_metrics)
-    expected_callback_metrics.update({'a', 'b'})
-    expected_callback_metrics.remove('epoch')
-    assert callback_metrics == expected_callback_metrics
+    pbar_metrics = set(trainer.progress_bar_metrics)
+    assert pbar_metrics == {'c', 'b_epoch', 'b_step'}
 
-    # assert the loggers received the expected number
-    assert len(trainer.dev_debugger.logged_metrics) == ((batches / log_interval) * max_epochs) + max_epochs
+    assert set(trainer.callback_metrics) == (logged_metrics | pbar_metrics | {'a', 'b'}) - {'epoch'}
 
 
-@pytest.mark.parametrize(['batches', 'fx', 'result'], [(1, min, 0), (2, max, 1), (11, max, 10)])
+@pytest.mark.parametrize(['batches', 'fx', 'result'], [
+    (3, min, 0),
+    (3, torch.max, 2),
+    (11, max, 10),
+    (5, 'avg', 2),
+    (5, 'SUM', 10),
+])
 def test__training_step__log_max_reduce_fx(tmpdir, batches, fx, result):
     """
     Tests that log works correctly with different tensor types
@@ -267,7 +209,7 @@ class TestModel(BoringModel):
 
         def training_step(self, batch, batch_idx):
             acc = self.step(batch[0])
-            self.log('foo', torch.tensor(batch_idx).long(), on_step=False, on_epoch=True, reduce_fx=fx)
+            self.log('foo', torch.tensor(batch_idx, dtype=torch.long), on_step=False, on_epoch=True, reduce_fx=fx)
             return acc
 
         def validation_step(self, batch, batch_idx):
@@ -319,7 +261,9 @@ def __init__(self):
 
         def training_step(self, batch, batch_idx, hiddens):
             assert hiddens == self.test_hidden, "Hidden state not persistent between tbptt steps"
-            self.test_hidden = torch.rand(1)
+            if hiddens is not None:
+                assert hiddens.grad_fn is None
+            self.test_hidden = torch.tensor(2., requires_grad=True).pow(2)
 
             x_tensor, y_list = batch
             assert x_tensor.shape[1] == truncated_bptt_steps, "tbptt split Tensor failed"
@@ -347,7 +291,6 @@ def train_dataloader(self):
 
     model = TestModel()
     model.training_epoch_end = None
-    model.example_input_array = torch.randn(5, truncated_bptt_steps)
 
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -360,9 +303,7 @@ def train_dataloader(self):
     )
     trainer.fit(model)
 
-    generated = set(trainer.logged_metrics.keys())
-    expected = {'a_step', 'a_epoch', 'epoch'}
-    assert generated == expected
+    assert set(trainer.logged_metrics) == {'a_step', 'a_epoch', 'epoch'}
 
 
 def test_different_batch_types_for_sizing(tmpdir):
@@ -397,105 +338,13 @@ def val_dataloader(self):
         limit_val_batches=2,
         max_epochs=1,
         weights_summary=None,
+        fast_dev_run=True,
     )
     trainer.fit(model)
 
-    generated = set(trainer.logger_connector.logged_metrics)
-    expected = {'a_step', 'a_epoch', 'n_step', 'n_epoch', 'epoch'}
-
-    assert generated == expected
-
-
-def test_validation_step_with_string_data_logging(tmpdir):
-
-    class TestModel(BoringModel):
-
-        def on_train_epoch_start(self) -> None:
-            print("override any method to prove your bug")
-
-        def training_step(self, batch, batch_idx):
-            output = self.layer(batch["x"])
-            loss = self.loss(batch, output)
-            return {"loss": loss}
-
-        def validation_step(self, batch, batch_idx):
-            output = self.layer(batch["x"])
-            loss = self.loss(batch, output)
-            self.log("x", loss)
-            return {"x": loss}
-
-    # fake data
-    train_data = torch.utils.data.DataLoader(RandomDictStringDataset(32, 64))
-    val_data = torch.utils.data.DataLoader(RandomDictStringDataset(32, 64))
-
-    # model
-    model = TestModel()
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        limit_train_batches=1,
-        limit_val_batches=1,
-        max_epochs=1,
-        weights_summary=None,
-    )
-    trainer.fit(model, train_data, val_data)
-
-
-def test_nested_datasouce_batch(tmpdir):
-
-    class NestedDictStringDataset(Dataset):
-
-        def __init__(self, size, length):
-            self.len = length
-            self.data = torch.randn(length, size)
-
-        def __getitem__(self, index):
-            x = {
-                'post_text': ['bird is fast', 'big cat'],
-                'dense_0': [
-                    torch.tensor([-0.1000, 0.2000], dtype=torch.float64),
-                    torch.tensor([1, 1], dtype=torch.uint8),
-                ],
-                'post_id': ['115', '116'],
-                'label': [torch.tensor([0, 1]), torch.tensor([1, 1], dtype=torch.uint8)]
-            }
-            return x
-
-        def __len__(self):
-            return self.len
-
-    class TestModel(BoringModel):
-
-        def on_train_epoch_start(self) -> None:
-            print("override any method to prove your bug")
-
-        def training_step(self, batch, batch_idx):
-            output = self.layer(torch.rand(32))
-            loss = self.loss(batch, output)
-            return {"loss": loss}
-
-        def validation_step(self, batch, batch_idx):
-            output = self.layer(torch.rand(32))
-            loss = self.loss(batch, output)
-            self.log("x", loss)
-            return {"x": loss}
-
-    # fake data
-    train_data = torch.utils.data.DataLoader(NestedDictStringDataset(32, 64))
-    val_data = torch.utils.data.DataLoader(NestedDictStringDataset(32, 64))
-
-    # model
-    model = TestModel()
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        limit_train_batches=1,
-        limit_val_batches=1,
-        max_epochs=1,
-        weights_summary=None,
-    )
-    trainer.fit(model, train_data, val_data)
+    assert set(trainer.logged_metrics) == {'a_step', 'a_epoch', 'n_step', 'n_epoch', 'epoch'}
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 def test_log_works_in_train_callback(tmpdir):
     """
     Tests that log can be called within callback
@@ -503,214 +352,126 @@ def test_log_works_in_train_callback(tmpdir):
 
     class TestCallback(callbacks.Callback):
 
-        # helpers
-        count = 1
+        count = 0
         choices = [False, True]
+
         # used to compute expected values
-        callback_funcs_called = collections.defaultdict(list)
-        funcs_called_count = collections.defaultdict(int)
-        funcs_attr = {}
-
-        def make_logging(
-            self, pl_module: pl.LightningModule, func_name, func_idx, on_steps=[], on_epochs=[], prob_bars=[]
-        ):
-            self.funcs_called_count[func_name] += 1
-            iterate = list(itertools.product(*[on_steps, on_epochs, prob_bars]))
-            for idx, (on_step, on_epoch, prog_bar) in enumerate(iterate):
-                # run logging
-                custom_func_name = f"{func_idx}_{idx}_{func_name}"
-                pl_module.log(
-                    custom_func_name, self.count * func_idx, on_step=on_step, on_epoch=on_epoch, prog_bar=prog_bar
-                )
-
-                # catch information for verification
-
-                # on on_train_start is outside the main loop. Won't be called
-                if func_name == "on_train_start":
-                    self.callback_funcs_called[func_name].append([self.count * func_idx])
-
-                # Saved only values from second epoch, so we can compute its mean or latest.
-                if pl_module.trainer.current_epoch == 1:
-                    self.callback_funcs_called[func_name].append([self.count * func_idx])
-
-                forked = on_step and on_epoch
-
-                self.funcs_attr[custom_func_name] = {
-                    "on_step": on_step,
-                    "on_epoch": on_epoch,
-                    "prog_bar": prog_bar,
-                    "forked": forked,
-                    "func_name": func_name
-                }
-
-                if on_step and on_epoch:
-                    self.funcs_attr[f"{custom_func_name}_step"] = {
-                        "on_step": True,
-                        "on_epoch": False,
-                        "prog_bar": prog_bar,
-                        "forked": False,
-                        "func_name": func_name
-                    }
-
-                    self.funcs_attr[f"{custom_func_name}_epoch"] = {
-                        "on_step": False,
-                        "on_epoch": True,
-                        "prog_bar": prog_bar,
-                        "forked": False,
-                        "func_name": func_name
-                    }
+        logged_values = collections.defaultdict(list)
+        call_counter = collections.Counter()
+        logged_arguments = {}
 
-        def on_train_start(self, trainer, pl_module):
-            self.make_logging(
-                pl_module, 'on_train_start', 1, on_steps=self.choices, on_epochs=self.choices, prob_bars=self.choices
-            )
+        def make_logging(self, pl_module, func_name, on_steps, on_epochs, prob_bars):
+            self.call_counter.update([func_name])
 
-        def on_epoch_start(self, trainer, pl_module):
-            self.make_logging(
-                pl_module, 'on_epoch_start', 2, on_steps=self.choices, on_epochs=self.choices, prob_bars=self.choices
-            )
+            for idx, (on_step, on_epoch, prog_bar) in enumerate(itertools.product(on_steps, on_epochs, prob_bars)):
+                fx = f"{func_name}_{idx}"
+                pl_module.log(fx, self.count, on_step=on_step, on_epoch=on_epoch, prog_bar=prog_bar)
+                self.logged_values[fx].append(self.count)
+                self.logged_arguments[fx] = {"on_step": on_step, "on_epoch": on_epoch, "prog_bar": prog_bar}
+                self.count += 1
 
-        def on_train_epoch_start(self, trainer, pl_module):
+        def on_train_start(self, _, pl_module):
+            self.make_logging(pl_module, 'on_train_start', on_steps=[False], on_epochs=[True], prob_bars=self.choices)
+
+        def on_epoch_start(self, _, pl_module):
             self.make_logging(
-                pl_module,
-                'on_train_epoch_start',
-                3,
-                on_steps=self.choices,
-                on_epochs=self.choices,
-                prob_bars=self.choices
+                pl_module, 'on_epoch_start', on_steps=self.choices, on_epochs=[True], prob_bars=self.choices
             )
 
-        def on_batch_end(self, trainer, pl_module):
+        def on_train_epoch_start(self, _, pl_module):
             self.make_logging(
-                pl_module, 'on_batch_end', 6, on_steps=self.choices, on_epochs=self.choices, prob_bars=self.choices
+                pl_module, 'on_train_epoch_start', on_steps=self.choices, on_epochs=[True], prob_bars=self.choices
             )
 
-        def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
+        def on_batch_end(self, _, pl_module):
             self.make_logging(
-                pl_module,
-                'on_train_batch_end',
-                7,
-                on_steps=self.choices,
-                on_epochs=self.choices,
-                prob_bars=self.choices
+                pl_module, 'on_batch_end', on_steps=self.choices, on_epochs=self.choices, prob_bars=self.choices
             )
-            # used to make sure aggregation works fine.
-            # we should obtain func[value * c for c in range(1, max_epochs * limit_train_batches)])
-            # with func = np.mean if on_epoch else func = np.max
-            self.count += 1
 
-        def on_train_epoch_end(self, trainer, pl_module):
+        def on_train_batch_end(self, _, pl_module, *__):
             self.make_logging(
-                pl_module, 'on_train_epoch_end', 8, on_steps=[False], on_epochs=self.choices, prob_bars=self.choices
+                pl_module, 'on_train_batch_end', on_steps=self.choices, on_epochs=self.choices, prob_bars=self.choices
             )
 
-        def on_epoch_end(self, trainer, pl_module):
+        def on_train_epoch_end(self, _, pl_module):
             self.make_logging(
-                pl_module, 'on_epoch_end', 9, on_steps=[False], on_epochs=self.choices, prob_bars=self.choices
+                pl_module, 'on_train_epoch_end', on_steps=[False], on_epochs=[True], prob_bars=self.choices
             )
 
-    class TestModel(BoringModel):
+        def on_epoch_end(self, _, pl_module):
+            self.make_logging(pl_module, 'on_epoch_end', on_steps=[False], on_epochs=[True], prob_bars=self.choices)
 
-        manual_loss = []
+    class TestModel(BoringModel):
+        seen_losses = []
 
         def training_step(self, batch, batch_idx):
-            output = self.layer(batch)
-            loss = self.loss(batch, output)
-            self.manual_loss.append(loss)
-            self.log('train_loss', loss)
+            loss = super().training_step(batch, batch_idx)['loss']
+            self.seen_losses.append(loss)
+            self.log('train_loss', loss, prog_bar=True)
             return {"loss": loss}
 
-    max_epochs = 2
-    limit_train_batches = 2
     model = TestModel()
-    test_callback = TestCallback()
-
+    cb = TestCallback()
     trainer = Trainer(
         default_root_dir=tmpdir,
-        limit_train_batches=limit_train_batches,
+        limit_train_batches=2,
         limit_val_batches=0,
-        limit_test_batches=0,
-        val_check_interval=0.,
         num_sanity_val_steps=0,
-        max_epochs=max_epochs,
-        callbacks=[test_callback]
+        max_epochs=1,
+        callbacks=[cb]
     )
     trainer.fit(model)
 
-    assert test_callback.funcs_called_count["on_train_start"] == 1
-    assert test_callback.funcs_called_count["on_epoch_start"] == 2
-    assert test_callback.funcs_called_count["on_train_epoch_start"] == 2
-    assert test_callback.funcs_called_count["on_batch_end"] == 4
-    assert test_callback.funcs_called_count["on_epoch_end"] == 2
-    assert test_callback.funcs_called_count["on_train_batch_end"] == 4
-    assert test_callback.funcs_called_count["on_epoch_end"] == 2
-    assert test_callback.funcs_called_count["on_train_epoch_end"] == 2
-
-    # Make sure the func_name exists within callback_metrics. If not, we missed some
-    callback_metrics_keys = [*trainer.callback_metrics.keys()]
-    for func_name in test_callback.callback_funcs_called.keys():
-        is_in = False
-        for callback_metrics_key in callback_metrics_keys:
-            if func_name in callback_metrics_key:
-                is_in = True
-        assert is_in, (func_name, callback_metrics_keys)
-
-    # function used to describe expected return logic
-    def get_expected_output(func_attr, original_values):
-        if func_attr["on_epoch"] and not func_attr["on_step"]:
-            # Apply mean on values
-            expected_output = np.mean(original_values)
-        else:
-            # Keep the latest value
-            expected_output = np.max(original_values)
-        return expected_output
-
     # Make sure the func_name output equals the average from all logged values when on_epoch true
-    # pop extra keys
-    trainer.callback_metrics.pop("debug_epoch")
-    assert trainer.logged_metrics["train_loss"] == model.manual_loss[-1]
-    assert trainer.callback_metrics["train_loss"] == model.manual_loss[-1]
-    trainer.callback_metrics.pop("train_loss")
+    assert trainer.progress_bar_dict["train_loss"] == model.seen_losses[-1]
+    assert trainer.callback_metrics["train_loss"] == model.seen_losses[-1]
 
-    for func_name, output_value in trainer.callback_metrics.items():
-        if torch.is_tensor(output_value):
-            output_value = output_value.item()
-        # get creation attr
-        func_attr = test_callback.funcs_attr[func_name]
+    assert cb.call_counter == {
+        'on_train_start': 1,
+        'on_epoch_start': 1,
+        'on_train_epoch_start': 1,
+        'on_train_batch_end': 2,
+        'on_batch_end': 2,
+        'on_train_epoch_end': 1,
+        'on_epoch_end': 1
+    }
 
-        # retrived orginal logged values
-        original_values = test_callback.callback_funcs_called[func_attr["func_name"]]
+    def get_expected(on_epoch, values):
+        reduction = np.mean if on_epoch else np.max
+        return reduction(values)
 
-        # compute expected output and compare to actual one
-        expected_output = get_expected_output(func_attr, original_values)
-        assert float(output_value) == float(expected_output)
+    for fx, value in trainer.callback_metrics.items():
+        actual = value.item()
+        if fx not in cb.logged_arguments:
+            continue
+        on_epoch = cb.logged_arguments[fx]['on_epoch']
+        values = cb.logged_values[fx]
+        expected = get_expected(on_epoch, values)
+        assert actual == expected
 
-    for func_name, func_attr in test_callback.funcs_attr.items():
-        if func_attr["prog_bar"] and (func_attr["on_step"] or func_attr["on_epoch"]) and not func_attr["forked"]:
-            assert func_name in trainer.logger_connector.progress_bar_metrics
-        else:
-            assert func_name not in trainer.logger_connector.progress_bar_metrics
+    for fx, attrs in cb.logged_arguments.items():
+        should_include = attrs["prog_bar"] and attrs["on_step"] ^ attrs["on_epoch"]
+        is_included = fx in trainer.logger_connector.progress_bar_metrics
+        assert is_included if should_include else not is_included
 
 
-def test_logging_sync_dist_true_cpu(tmpdir):
+@pytest.mark.parametrize('gpus', [None, pytest.param(1, marks=RunIf(min_gpus=1))])
+def test_logging_sync_dist_true(tmpdir, gpus):
     """
-    Tests to ensure that the sync_dist flag works with CPU (should just return the original value)
+    Tests to ensure that the sync_dist flag works (should just return the original value)
     """
     fake_result = 1
 
     class TestModel(BoringModel):
 
         def training_step(self, batch, batch_idx):
-            acc = self.step(batch[0])
-            self.log('foo', torch.tensor(fake_result), on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='sum')
-            self.log('foo_2', 2, on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='sum')
-            return acc
+            self.log('foo', fake_result, on_step=False, on_epoch=True, sync_dist=True, reduce_fx='sum')
+            self.log('foo_2', 2, on_step=False, on_epoch=True, sync_dist=True, reduce_fx='sum')
+            return super().training_step(batch, batch_idx)
 
         def validation_step(self, batch, batch_idx):
-            output = self.layer(batch)
-            loss = self.loss(batch, output)
-            self.log('bar', torch.tensor(fake_result), on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='sum')
-            return {"x": loss}
+            self.log('bar', fake_result, on_step=False, on_epoch=True, sync_dist=True, reduce_fx='sum')
+            return super().validation_step(batch, batch_idx)
 
     model = TestModel()
     trainer = Trainer(
@@ -719,6 +480,7 @@ def validation_step(self, batch, batch_idx):
         limit_val_batches=1,
         max_epochs=2,
         weights_summary=None,
+        gpus=gpus,
     )
     trainer.fit(model)
 
@@ -737,15 +499,14 @@ class TestLoggingSyncDistModel(BoringModel):
 
         def training_step(self, batch, batch_idx):
             acc = self.step(batch[0])
-            self.log('foo', 1, on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='SUM')
+            self.log('foo', 1, on_step=False, on_epoch=True, sync_dist=True, reduce_fx='SUM')
             self.log('cho', acc, on_step=False, on_epoch=True)
             return acc
 
         def validation_step(self, batch, batch_idx):
-            self.training_step_called = True
             output = self.layer(batch)
             loss = self.loss(batch, output)
-            self.log('bar', 2, on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='AVG')
+            self.log('bar', 2, on_step=False, on_epoch=True, sync_dist=True, reduce_fx='AVG')
             return {"x": loss}
 
     model = TestLoggingSyncDistModel()
@@ -765,41 +526,6 @@ def validation_step(self, batch, batch_idx):
     assert trainer.logged_metrics['bar'] == 2
 
 
-@RunIf(min_gpus=1)
-def test_logging_sync_dist_true_gpu(tmpdir):
-    """
-    Tests to ensure that the sync_dist flag works with GPU (should just return the original value)
-    """
-    fake_result = 1
-
-    class TestModel(BoringModel):
-
-        def training_step(self, batch, batch_idx):
-            acc = self.step(batch[0])
-            self.log('foo', torch.tensor(fake_result), on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='sum')
-            return acc
-
-        def validation_step(self, batch, batch_idx):
-            output = self.layer(batch)
-            loss = self.loss(batch, output)
-            self.log('bar', torch.tensor(fake_result), on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='sum')
-            return {"x": loss}
-
-    model = TestModel()
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        limit_train_batches=1,
-        limit_val_batches=1,
-        max_epochs=2,
-        gpus=1,
-        weights_summary=None,
-    )
-    trainer.fit(model)
-
-    assert trainer.logged_metrics['foo'] == fake_result
-    assert trainer.logged_metrics['bar'] == fake_result
-
-
 def test_progress_bar_dict_contains_values_on_train_epoch_end(tmpdir):
 
     class TestModel(BoringModel):
@@ -809,21 +535,20 @@ def training_step(self, *args):
             return super().training_step(*args)
 
         def on_train_epoch_end(self, *_):
-            self.on_train_epoch_end_called = True
-            self.epoch_end_called = True
             self.log(
                 'foo_2',
                 torch.tensor(self.current_epoch),
                 prog_bar=True,
                 on_epoch=True,
                 sync_dist=True,
-                sync_dist_op='sum'
+                reduce_fx='sum'
             )
+            self.on_train_epoch_end_called = True
 
         def on_epoch_end(self):
-            self.epoch_end_called = True
             assert self.trainer.progress_bar_dict["foo"] == self.current_epoch
             assert self.trainer.progress_bar_dict["foo_2"] == self.current_epoch
+            self.on_epoch_end_called = True
 
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -837,8 +562,8 @@ def on_epoch_end(self):
     )
     model = TestModel()
     trainer.fit(model)
-    assert model.epoch_end_called
     assert model.on_train_epoch_end_called
+    assert model.on_epoch_end_called
 
 
 def test_logging_in_callbacks_with_log_function(tmpdir):
@@ -934,3 +659,135 @@ def validation_step(self, batch, batch_idx):
 
     assert trainer.callback_metrics["val_acc"] == 8 / 32.
     assert "train_loss" in trainer.callback_metrics
+
+
+@pytest.mark.parametrize(
+    'value',
+    [None, dict(a=None),
+     dict(a=dict(b=None)),
+     dict(a=dict(b=1)), 'foo', [1, 2, 3], (1, 2, 3), [[1, 2], 3]]
+)
+def test_log_none_raises(tmpdir, value):
+
+    class TestModel(BoringModel):
+
+        def training_step(self, *args):
+            self.log("foo", value)
+
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=1)
+    model = TestModel()
+    match = escape(f"self.log(foo, {value})` was called")
+    with pytest.raises(ValueError, match=match):
+        trainer.fit(model)
+
+
+def test_logging_raises(tmpdir):
+
+    class TestModel(BoringModel):
+
+        def training_step(self, batch, batch_idx):
+            self.log('foo/dataloader_idx_0', -1)
+
+    trainer = Trainer(default_root_dir=tmpdir)
+    model = TestModel()
+    with pytest.raises(MisconfigurationException, match='`self.log` with the key `foo/dataloader_idx_0`'):
+        trainer.fit(model)
+
+    class TestModel(BoringModel):
+
+        def training_step(self, batch, batch_idx):
+            self.log('foo', Accuracy())
+
+    trainer = Trainer(default_root_dir=tmpdir)
+    model = TestModel()
+    with pytest.raises(MisconfigurationException, match='fix this by setting an attribute for the metric in your'):
+        trainer.fit(model)
+
+    class TestModel(BoringModel):
+
+        def __init__(self):
+            super().__init__()
+            self.bar = Accuracy()
+
+        def training_step(self, batch, batch_idx):
+            self.log('foo', Accuracy())
+
+    trainer = Trainer(default_root_dir=tmpdir)
+    model = TestModel()
+    with pytest.raises(
+        MisconfigurationException,
+        match=r"`self.log\(foo, ..., metric_attribute=name\)` where `name` is one of \['bar'\]"
+    ):
+        trainer.fit(model)
+
+    class TestModel(BoringModel):
+
+        def training_step(self, *args):
+            self.log('foo', -1, prog_bar=False)
+            self.log('foo', -1, prog_bar=True)
+            return super().training_step(*args)
+
+    trainer = Trainer(default_root_dir=tmpdir)
+    model = TestModel()
+    with pytest.raises(MisconfigurationException, match=r'self.log\(foo, ...\)` twice in `training_step`'):
+        trainer.fit(model)
+
+    class TestModel(BoringModel):
+
+        def training_step(self, *args):
+            self.log('foo', -1, reduce_fx=torch.argmax)
+            return super().training_step(*args)
+
+    trainer = Trainer(default_root_dir=tmpdir)
+    model = TestModel()
+    with pytest.raises(MisconfigurationException, match=r'reduce_fx={min,max,mean,sum}\)` are currently supported'):
+        trainer.fit(model)
+
+
+def test_sanity_metrics_are_reset(tmpdir):
+
+    class TestModel(BoringModel):
+
+        def validation_step(self, batch, batch_idx):
+            output = super().validation_step(batch, batch_idx)
+            if self.trainer.sanity_checking:
+                self.log("val_loss", output["x"], prog_bar=True, logger=True)
+            return output
+
+        def training_step(self, batch, batch_idx):
+            loss = super().training_step(batch, batch_idx)
+            if batch_idx == 0:
+                assert self.trainer.logger_connector._progress_bar_metrics == {}
+                assert self.trainer.logger_connector._logged_metrics == {}
+                assert self.trainer.logger_connector._callback_metrics == {}
+            self.log("train_loss", loss, prog_bar=True, logger=True)
+            return loss
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+        limit_train_batches=1,
+        limit_val_batches=2,
+        num_sanity_val_steps=2,
+    )
+    trainer.fit(TestModel())
+
+    assert "val_loss" not in trainer.progress_bar_metrics
+
+
+@RunIf(min_gpus=2)
+def test_log_gpu_memory_without_logging_on_step(tmpdir):
+
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+        limit_train_batches=1,
+        limit_val_batches=0,
+        log_gpu_memory='all',
+        log_every_n_steps=1,
+        gpus=[1]
+    )
+    trainer.fit(model)
+
+    assert 'gpu_id: 1/memory.used (MB)' in trainer.logged_metrics
diff --git a/tests/trainer/loops/test_evaluation_loop.py b/tests/trainer/loops/test_evaluation_loop.py
index 278ed8619d0de..2a0f95a19209b 100644
--- a/tests/trainer/loops/test_evaluation_loop.py
+++ b/tests/trainer/loops/test_evaluation_loop.py
@@ -13,11 +13,15 @@
 # limitations under the License.
 from unittest import mock
 
+import torch
+from torch.utils.data import DataLoader
+
 from pytorch_lightning import Trainer
-from tests.helpers.boring_model import BoringModel
+from tests.helpers.boring_model import BoringModel, RandomDataset
+from tests.helpers.runif import RunIf
 
 
-@mock.patch("pytorch_lightning.trainer.evaluation_loop.EvaluationLoop.on_evaluation_epoch_end")
+@mock.patch("pytorch_lightning.loops.dataloader.evaluation_loop.EvaluationLoop.on_evaluation_epoch_end")
 def test_on_evaluation_epoch_end(eval_epoch_end_mock, tmpdir):
     """
     Tests that `on_evaluation_epoch_end` is called
@@ -43,12 +47,12 @@ def test_on_evaluation_epoch_end(eval_epoch_end_mock, tmpdir):
 
 
 @mock.patch(
-    "pytorch_lightning.trainer.connectors.logger_connector.logger_connector.LoggerConnector.get_evaluate_epoch_results"
+    "pytorch_lightning.trainer.connectors.logger_connector.logger_connector.LoggerConnector.update_eval_epoch_metrics"
 )
-def test_log_epoch_metrics_before_on_evaluation_end(get_evaluate_epoch_results_mock, tmpdir):
+def test_log_epoch_metrics_before_on_evaluation_end(update_eval_epoch_metrics_mock, tmpdir):
     """Test that the epoch metrics are logged before the `on_evalutaion_end` hook is fired"""
     order = []
-    get_evaluate_epoch_results_mock.side_effect = lambda: order.append("log_epoch_metrics")
+    update_eval_epoch_metrics_mock.side_effect = lambda: order.append("log_epoch_metrics")
 
     class LessBoringModel(BoringModel):
 
@@ -65,3 +69,52 @@ def on_validation_end(self):
     trainer.fit(LessBoringModel())
 
     assert order == ["log_epoch_metrics", "on_validation_end"]
+
+
+@RunIf(min_gpus=1)
+def test_memory_consumption_validation(tmpdir):
+    """Test that the training batch is no longer in GPU memory when running validation"""
+
+    initial_memory = torch.cuda.memory_allocated(0)
+
+    class BoringLargeBatchModel(BoringModel):
+
+        @property
+        def num_params(self):
+            return sum(p.numel() for p in self.parameters())
+
+        def train_dataloader(self):
+            # batch target memory >= 100x boring_model size
+            batch_size = self.num_params * 100 // 32 + 1
+            return DataLoader(RandomDataset(32, 5000), batch_size=batch_size)
+
+        def val_dataloader(self):
+            return self.train_dataloader()
+
+        def training_step(self, batch, batch_idx):
+            # there is a batch and the boring model, but not two batches on gpu, assume 32 bit = 4 bytes
+            lower = 101 * self.num_params * 4
+            upper = 201 * self.num_params * 4
+            current = torch.cuda.memory_allocated(0)
+            assert lower < current
+            assert current - initial_memory < upper
+            return super().training_step(batch, batch_idx)
+
+        def validation_step(self, batch, batch_idx):
+            # there is a batch and the boring model, but not two batches on gpu, assume 32 bit = 4 bytes
+            lower = 101 * self.num_params * 4
+            upper = 201 * self.num_params * 4
+            current = torch.cuda.memory_allocated(0)
+            assert lower < current
+            assert current - initial_memory < upper
+            return super().validation_step(batch, batch_idx)
+
+    torch.cuda.empty_cache()
+    trainer = Trainer(
+        gpus=1,
+        default_root_dir=tmpdir,
+        fast_dev_run=2,
+        move_metrics_to_cpu=True,
+        weights_summary=None,
+    )
+    trainer.fit(BoringLargeBatchModel())
diff --git a/tests/trainer/loops/test_evaluation_loop_flow.py b/tests/trainer/loops/test_evaluation_loop_flow.py
index 67ed756630734..14cb4ce4ae7f8 100644
--- a/tests/trainer/loops/test_evaluation_loop_flow.py
+++ b/tests/trainer/loops/test_evaluation_loop_flow.py
@@ -19,6 +19,7 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.core.lightning import LightningModule
+from pytorch_lightning.trainer.states import RunningStage
 from tests.helpers.deterministic_model import DeterministicModel
 
 
@@ -65,22 +66,20 @@ def backward(self, loss, optimizer, optimizer_idx):
     assert not model.validation_step_end_called
     assert not model.validation_epoch_end_called
 
-    # make sure training outputs what is expected
-    for batch_idx, batch in enumerate(model.train_dataloader()):
-        break
-
-    out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)
+    # simulate training manually
+    trainer.state.stage = RunningStage.TRAINING
+    batch_idx, batch = 0, next(iter(model.train_dataloader()))
+    out = trainer.fit_loop.epoch_loop.batch_loop.run(batch, batch_idx, 0)
     assert out.signal == 0
-    assert len(out.grad_norm_dict) == 0 and isinstance(out.grad_norm_dict, dict)
 
-    train_step_out = out.training_step_output_for_epoch_end
+    train_step_out = out.training_step_output
     assert len(train_step_out) == 1
     train_step_out = train_step_out[0][0]
-    assert isinstance(train_step_out['minimize'], torch.Tensor)
-    assert train_step_out['minimize'].item() == 171
+    assert isinstance(train_step_out.minimize, torch.Tensor)
+    assert train_step_out.minimize.item() == 171
 
     # make sure the optimizer closure returns the correct things
-    opt_closure_result = trainer.train_loop.training_step_and_backward(
+    opt_closure_result = trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward(
         batch,
         batch_idx,
         0,
@@ -138,22 +137,20 @@ def backward(self, loss, optimizer, optimizer_idx):
     assert model.validation_step_end_called
     assert not model.validation_epoch_end_called
 
+    trainer.state.stage = RunningStage.TRAINING
     # make sure training outputs what is expected
-    for batch_idx, batch in enumerate(model.train_dataloader()):
-        break
-
-    out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)
+    batch_idx, batch = 0, next(iter(model.train_dataloader()))
+    out = trainer.fit_loop.epoch_loop.batch_loop.run(batch, batch_idx, 0)
     assert out.signal == 0
-    assert len(out.grad_norm_dict) == 0 and isinstance(out.grad_norm_dict, dict)
 
-    train_step_out = out.training_step_output_for_epoch_end
+    train_step_out = out.training_step_output
     assert len(train_step_out) == 1
     train_step_out = train_step_out[0][0]
-    assert isinstance(train_step_out['minimize'], torch.Tensor)
-    assert train_step_out['minimize'].item() == 171
+    assert isinstance(train_step_out.minimize, torch.Tensor)
+    assert train_step_out.minimize.item() == 171
 
     # make sure the optimizer closure returns the correct things
-    opt_closure_result = trainer.train_loop.training_step_and_backward(
+    opt_closure_result = trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward(
         batch, batch_idx, 0, trainer.optimizers[0], hiddens=None
     )
     assert opt_closure_result['loss'].item() == 171
diff --git a/tests/trainer/loops/test_training_loop.py b/tests/trainer/loops/test_training_loop.py
index da4ecbe5a9f05..c0fde2983985d 100644
--- a/tests/trainer/loops/test_training_loop.py
+++ b/tests/trainer/loops/test_training_loop.py
@@ -11,10 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
+
 import pytest
 import torch
 
 from pytorch_lightning import seed_everything, Trainer
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel
 
 
@@ -105,10 +108,10 @@ def on_train_batch_start(self, batch, batch_idx, dataloader_idx):
     trainer = Trainer(max_epochs=max_epochs, limit_train_batches=10)
     trainer.fit(model)
     if batch_idx_ > trainer.num_training_batches - 1:
-        assert trainer.train_loop.batch_idx == trainer.num_training_batches - 1
+        assert trainer.fit_loop.batch_idx == trainer.num_training_batches - 1
         assert trainer.global_step == trainer.num_training_batches * max_epochs
     else:
-        assert trainer.train_loop.batch_idx == batch_idx_
+        assert trainer.fit_loop.batch_idx == batch_idx_
         assert trainer.global_step == batch_idx_ * max_epochs
 
 
@@ -142,3 +145,43 @@ def validation_step(self, *args):
     assert trainer.current_epoch == 0
     assert trainer.global_step == 5
     assert model.validation_called_at == (0, 4)
+
+
+@pytest.mark.parametrize(['output'], [(5., ), ({'a': 5}, )])
+def test_warning_invalid_trainstep_output(tmpdir, output):
+
+    class InvalidTrainStepModel(BoringModel):
+
+        def training_step(self, batch, batch_idx):
+            return output
+
+    model = InvalidTrainStepModel()
+
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=1)
+    with pytest.raises(
+        MisconfigurationException,
+        match=re.escape(
+            "In automatic optimization, `training_step` must either return a Tensor, "
+            "a dict with key 'loss' or None (where the step will be skipped)."
+        )
+    ):
+        trainer.fit(model)
+
+
+def test_warning_valid_train_step_end(tmpdir):
+
+    class ValidTrainStepEndModel(BoringModel):
+
+        def training_step(self, batch, batch_idx):
+            output = self(batch)
+            return {'output': output, 'batch': batch}
+
+        def training_step_end(self, outputs):
+            loss = self.loss(outputs['batch'], outputs['output'])
+            return loss
+
+    # No error is raised
+    model = ValidTrainStepEndModel()
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=1)
+
+    trainer.fit(model)
diff --git a/tests/trainer/loops/test_training_loop_flow_scalar.py b/tests/trainer/loops/test_training_loop_flow_scalar.py
index 2f503b62f56ee..9b438aea45f87 100644
--- a/tests/trainer/loops/test_training_loop_flow_scalar.py
+++ b/tests/trainer/loops/test_training_loop_flow_scalar.py
@@ -11,10 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""
-Tests to ensure that the training loop works with a dict (1.0)
-"""
-
 import pytest
 import torch
 from torch.utils.data import DataLoader
@@ -22,6 +18,7 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.core.lightning import LightningModule
+from pytorch_lightning.trainer.states import RunningStage
 from tests.helpers.boring_model import BoringModel, RandomDataset
 from tests.helpers.deterministic_model import DeterministicModel
 from tests.helpers.utils import no_warning_call
@@ -149,22 +146,20 @@ def backward(self, loss, optimizer, optimizer_idx):
     assert len(trainer.logger_connector.callback_metrics) == 0
     assert len(trainer.logger_connector.progress_bar_metrics) == 0
 
+    trainer.state.stage = RunningStage.TRAINING
     # make sure training outputs what is expected
-    for batch_idx, batch in enumerate(model.train_dataloader()):
-        break
-
-    out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)
+    batch_idx, batch = 0, next(iter(model.train_dataloader()))
+    out = trainer.fit_loop.epoch_loop.batch_loop.run(batch, batch_idx, 0)
     assert out.signal == 0
-    assert len(out.grad_norm_dict) == 0 and isinstance(out.grad_norm_dict, dict)
 
-    train_step_out = out.training_step_output_for_epoch_end
+    train_step_out = out.training_step_output
     assert len(train_step_out) == 1
     train_step_out = train_step_out[0][0]
-    assert isinstance(train_step_out['minimize'], torch.Tensor)
-    assert train_step_out['minimize'].item() == 171
+    assert isinstance(train_step_out.minimize, torch.Tensor)
+    assert train_step_out.minimize.item() == 171
 
     # make sure the optimizer closure returns the correct things
-    opt_closure_result = trainer.train_loop.training_step_and_backward(
+    opt_closure_result = trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward(
         batch,
         batch_idx,
         0,
@@ -229,22 +224,20 @@ def backward(self, loss, optimizer, optimizer_idx):
     assert len(trainer.logger_connector.callback_metrics) == 0
     assert len(trainer.logger_connector.progress_bar_metrics) == 0
 
+    trainer.state.stage = RunningStage.TRAINING
     # make sure training outputs what is expected
-    for batch_idx, batch in enumerate(model.train_dataloader()):
-        break
-
-    out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)
+    batch_idx, batch = 0, next(iter(model.train_dataloader()))
+    out = trainer.fit_loop.epoch_loop.batch_loop.run(batch, batch_idx, 0)
     assert out.signal == 0
-    assert len(out.grad_norm_dict) == 0 and isinstance(out.grad_norm_dict, dict)
 
-    train_step_out = out.training_step_output_for_epoch_end
+    train_step_out = out.training_step_output
     assert len(train_step_out) == 1
     train_step_out = train_step_out[0][0]
-    assert isinstance(train_step_out['minimize'], torch.Tensor)
-    assert train_step_out['minimize'].item() == 171
+    assert isinstance(train_step_out.minimize, torch.Tensor)
+    assert train_step_out.minimize.item() == 171
 
     # make sure the optimizer closure returns the correct things
-    opt_closure_result = trainer.train_loop.training_step_and_backward(
+    opt_closure_result = trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward(
         batch, batch_idx, 0, trainer.optimizers[0], hiddens=None
     )
     assert opt_closure_result['loss'].item() == 171
@@ -316,11 +309,13 @@ def training_step(self, batch, batch_idx):
     with pytest.warns(UserWarning, match=r'.*training_step returned None.*'):
         trainer.fit(model)
 
+    trainer.state.stage = RunningStage.TRAINING
+
     # manually check a few batches
     for batch_idx, batch in enumerate(model.train_dataloader()):
-        out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)
+        out = trainer.fit_loop.epoch_loop.batch_loop.run(batch, batch_idx, 0)
         if not batch_idx % 2:
-            assert out.training_step_output_for_epoch_end == [[]]
+            assert out.training_step_output == [[]]
         assert out.signal == 0
 
 
@@ -359,9 +354,11 @@ def train_dataloader(self):
     with pytest.warns(UserWarning, match=r'.*train_dataloader yielded None.*'):
         trainer.fit(model)
 
+    trainer.state.stage = RunningStage.TRAINING
+
     # manually check a few batches
     for batch_idx, batch in enumerate(model.train_dataloader()):
-        out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)
+        out = trainer.fit_loop.epoch_loop.batch_loop.run(batch, batch_idx, 0)
         if not batch_idx % 2:
-            assert out.training_step_output_for_epoch_end == [[]]
+            assert out.training_step_output == [[]]
         assert out.signal == 0
diff --git a/tests/trainer/optimization/test_manual_optimization.py b/tests/trainer/optimization/test_manual_optimization.py
index ea8732a3958b2..75a509e07c26b 100644
--- a/tests/trainer/optimization/test_manual_optimization.py
+++ b/tests/trainer/optimization/test_manual_optimization.py
@@ -28,54 +28,56 @@
 from tests.helpers.runif import RunIf
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-def test_multiple_optimizers_manual_no_return(tmpdir):
-    """
-    Tests that only training_step can be used
-    """
+class ManualOptModel(BoringModel):
 
-    class TestModel(BoringModel):
+    def __init__(self):
+        super().__init__()
+        self.automatic_optimization = False
 
-        def __init__(self):
-            super().__init__()
-            self.automatic_optimization = False
+    def training_step(self, batch, batch_idx):
+        opt_a, opt_b = self.optimizers()
 
-        def training_step(self, batch, batch_idx):
-            # manual
-            opt_a, opt_b = self.optimizers()
-            loss_1 = self.step(batch[0])
+        # make sure there are no grads
+        if batch_idx > 0:
+            assert torch.all(self.layer.weight.grad == 0)
 
-            # make sure there are no grads
-            if batch_idx > 0:
-                assert torch.all(self.layer.weight.grad == 0)
+        loss_1 = self.step(batch[0])
+        self.manual_backward(loss_1, opt_a)
+        opt_a.step()
+        opt_a.zero_grad()
+        assert torch.all(self.layer.weight.grad == 0)
 
-            self.manual_backward(loss_1, opt_a)
-            opt_a.step()
-            opt_a.zero_grad()
-            assert torch.all(self.layer.weight.grad == 0)
+        loss_2 = self.step(batch[0])
+        # ensure we forward the correct params to the optimizer
+        # without retain_graph we can't do multiple backward passes
+        self.manual_backward(loss_2, opt_b, retain_graph=True)
+        self.manual_backward(loss_2, opt_a)
+        assert self.layer.weight.grad is not None
+        opt_b.step()
+        opt_b.zero_grad()
+        assert torch.all(self.layer.weight.grad == 0)
 
-            # fake discriminator
-            loss_2 = self.step(batch[0])
+        return loss_2
 
-            # ensure we forward the correct params to the optimizer
-            # without retain_graph we can't do multiple backward passes
-            self.manual_backward(loss_2, opt_b, retain_graph=True)
-            self.manual_backward(loss_2, opt_a, retain_graph=True)
+    def configure_optimizers(self):
+        optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
+        optimizer_2 = torch.optim.SGD(self.layer.parameters(), lr=0.1)
+        return optimizer, optimizer_2
 
-            assert self.layer.weight.grad is not None
-            opt_b.step()
-            opt_b.zero_grad()
-            assert torch.all(self.layer.weight.grad == 0)
+
+@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
+def test_multiple_optimizers_manual_no_return(tmpdir):
+
+    class TestModel(ManualOptModel):
+
+        def training_step(self, batch, batch_idx):
+            # avoid returning a value
+            super().training_step(batch, batch_idx)
 
         def training_epoch_end(self, outputs) -> None:
             # outputs is empty as training_step does not return
             # and it is not automatic optimization
-            assert len(outputs) == 0
-
-        def configure_optimizers(self):
-            optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            optimizer_2 = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            return optimizer, optimizer_2
+            assert not outputs
 
     model = TestModel()
     model.val_dataloader = None
@@ -98,53 +100,16 @@ def configure_optimizers(self):
 
 @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 def test_multiple_optimizers_manual_return(tmpdir):
-    """
-    Tests that only training_step can be used
-    """
 
-    class TestModel(BoringModel):
-
-        def __init__(self):
-            super().__init__()
-            self.automatic_optimization = False
+    class TestModel(ManualOptModel):
 
         def training_step(self, batch, batch_idx):
-            # manual
-            opt_a, opt_b = self.optimizers()
-            loss_1 = self.step(batch[0])
-
-            # make sure there are no grads
-            if batch_idx > 0:
-                assert torch.all(self.layer.weight.grad == 0)
-
-            self.manual_backward(loss_1, opt_a)
-            opt_a.step()
-            opt_a.zero_grad()
-            assert torch.all(self.layer.weight.grad == 0)
-
-            # fake discriminator
-            loss_2 = self.step(batch[0])
-
-            # ensure we forward the correct params to the optimizer
-            # without retain_graph we can't do multiple backward passes
-            self.manual_backward(loss_2, opt_b, retain_graph=True)
-            self.manual_backward(loss_2, opt_a, retain_graph=True)
-
-            assert self.layer.weight.grad is not None
-            opt_b.step()
-            opt_b.zero_grad()
-            assert torch.all(self.layer.weight.grad == 0)
-
+            super().training_step(batch, batch_idx)
             return {'something': 'else'}
 
         def training_epoch_end(self, outputs) -> None:
             # outputs should be an array with an entry per optimizer
-            assert len(outputs) == 2
-
-        def configure_optimizers(self):
-            optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            optimizer_2 = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            return optimizer, optimizer_2
+            assert outputs == [{'something': 'else'}, {'something': 'else'}]
 
     model = TestModel()
     model.val_dataloader = None
@@ -166,55 +131,16 @@ def configure_optimizers(self):
 
 
 @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-def test_multiple_optimizers_manual_return_and_log(tmpdir):
-    """
-    Tests that only training_step can be used
-    """
-
-    class TestModel(BoringModel):
+def test_multiple_optimizers_manual_log(tmpdir):
 
-        def __init__(self):
-            super().__init__()
-            self.automatic_optimization = False
+    class TestModel(ManualOptModel):
 
         def training_step(self, batch, batch_idx):
-            # manual
-            opt_a, opt_b = self.optimizers()
-            loss_1 = self.step(batch[0])
-
-            # make sure there are no grads
-            if batch_idx > 0:
-                assert torch.all(self.layer.weight.grad == 0)
-
-            self.manual_backward(loss_1, opt_a)
-            opt_a.step()
-            opt_a.zero_grad()
-            assert torch.all(self.layer.weight.grad == 0)
-
-            # fake discriminator
-            loss_2 = self.step(batch[0])
-
-            # ensure we forward the correct params to the optimizer
-            # without retain_graph we can't do multiple backward passes
-            self.manual_backward(loss_2, opt_b, retain_graph=True)
-            self.manual_backward(loss_2, opt_a, retain_graph=True)
+            loss_2 = super().training_step(batch, batch_idx)
             self.log('a', loss_2, on_epoch=True)
 
-            assert self.layer.weight.grad is not None
-            opt_b.step()
-            opt_b.zero_grad()
-            assert torch.all(self.layer.weight.grad == 0)
-
-            return {'something': 'else'}
-
         def training_epoch_end(self, outputs) -> None:
-            # outputs should be an array with an entry per optimizer
-            assert len(outputs) == 2
-
-        def configure_optimizers(self):
-            optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            optimizer_2 = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            return optimizer, optimizer_2
+            assert not outputs
 
     model = TestModel()
     model.val_dataloader = None
@@ -234,62 +160,13 @@ def configure_optimizers(self):
     num_manual_backward_calls = 3
     assert trainer.dev_debugger.count_events('backward_call') == limit_train_batches * num_manual_backward_calls
 
-    expected = {'a_step', 'a_epoch', 'epoch'}
-    logged = set(trainer.logged_metrics.keys())
-    assert expected == logged
+    assert set(trainer.logged_metrics) == {'a_step', 'a_epoch', 'epoch'}
 
 
 @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 @RunIf(min_gpus=1)
 def test_multiple_optimizers_manual_native_amp(tmpdir):
-    """
-    Tests that only training_step can be used
-    """
-
-    class TestModel(BoringModel):
-
-        def __init__(self):
-            super().__init__()
-            self.automatic_optimization = False
-
-        def training_step(self, batch, batch_idx):
-            # manual
-            opt_a, opt_b = self.optimizers()
-            loss_1 = self.step(batch[0])
-
-            # make sure there are no grads
-            if batch_idx > 0:
-                assert torch.all(self.layer.weight.grad == 0)
-
-            self.manual_backward(loss_1, opt_a)
-            opt_a.step()
-            opt_a.zero_grad()
-            assert torch.all(self.layer.weight.grad == 0)
-
-            # fake discriminator
-            loss_2 = self.step(batch[0])
-
-            # ensure we forward the correct params to the optimizer
-            # without retain_graph we can't do multiple backward passes
-            self.manual_backward(loss_2, opt_b, retain_graph=True)
-            self.manual_backward(loss_2, opt_a, retain_graph=True)
-
-            assert self.layer.weight.grad is not None
-            opt_b.step()
-            opt_b.zero_grad()
-            assert torch.all(self.layer.weight.grad == 0)
-
-        def training_epoch_end(self, outputs) -> None:
-            # outputs is empty as training_step does not return
-            # and it is not automatic optimization
-            assert len(outputs) == 0
-
-        def configure_optimizers(self):
-            optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            optimizer_2 = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            return optimizer, optimizer_2
-
-    model = TestModel()
+    model = ManualOptModel()
     model.val_dataloader = None
 
     limit_train_batches = 2
@@ -313,57 +190,18 @@ def configure_optimizers(self):
 @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 @RunIf(min_gpus=1, amp_apex=True)
 def test_multiple_optimizers_manual_apex_no_return(tmpdir):
-    """
-    Tests that only training_step can be used
-    """
 
-    class TestModel(BoringModel):
-
-        def __init__(self):
-            super().__init__()
-            self.automatic_optimization = False
+    class TestModel(ManualOptModel):
 
         def training_step(self, batch, batch_idx):
-            # manual
-            opt_a, opt_b = self.optimizers()
-            x = batch[0]
-
-            loss_1 = self(x)
-            loss_1 = self.loss(loss_1, loss_1)
-
-            # make sure there are no grads
-            if batch_idx > 0:
-                assert torch.all(self.layer.weight.grad == 0)
-
-            self.manual_backward(loss_1, opt_a)
-            opt_a.step()
-            opt_a.zero_grad()
-            assert torch.all(self.layer.weight.grad == 0)
-
-            # fake discriminator
-            loss_2 = self(x)
-            loss_2 = self.loss(loss_2, loss_2)
-
-            # ensure we forward the correct params to the optimizer
-            # without retain_graph we can't do multiple backward passes
-            self.manual_backward(loss_2, retain_graph=True)
-            self.manual_backward(loss_2)
-
-            assert self.layer.weight.grad is not None
-            opt_b.step()
-            opt_b.zero_grad()
-            assert torch.all(self.layer.weight.grad == 0)
+            # avoid returning a value
+            super().training_step(batch, batch_idx)
 
         def training_epoch_end(self, outputs) -> None:
             # outputs is empty as training_step does not return
             # and it is not automatic optimization
             assert len(outputs) == 0
 
-        def configure_optimizers(self):
-            optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            optimizer_2 = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            return optimizer, optimizer_2
-
     model = TestModel()
     model.val_dataloader = None
 
@@ -586,7 +424,6 @@ def on_train_epoch_end(self, *_, **__):
         limit_val_batches=0,
         precision=16,
         amp_backend='native',
-        accumulate_grad_batches=4,
         gpus=1,
     )
     trainer.fit(model)
@@ -599,14 +436,10 @@ def test_multiple_optimizers_step(tmpdir):
     Tests that `step` works with several optimizers
     """
 
-    class TestModel(BoringModel):
+    class TestModel(ManualOptModel):
 
         called = False
 
-        def __init__(self):
-            super().__init__()
-            self.automatic_optimization = False
-
         def on_after_backward(self):
             self.called = True
             norm = torch.nn.utils.clip_grad_norm_(self.parameters(), 2)
@@ -641,17 +474,12 @@ def training_step(self, batch, batch_idx):
             opt_b.step()
             opt_b.zero_grad()
 
-            return {'loss1': loss_1, 'loss2': loss_2}
+            return {'loss1': loss_1.detach(), 'loss2': loss_2.detach()}
 
         def training_epoch_end(self, outputs) -> None:
             # outputs should be an array with an entry per optimizer
             assert len(outputs) == 2
 
-        def configure_optimizers(self):
-            optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            optimizer_2 = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            return optimizer, optimizer_2
-
     model = TestModel()
     model.val_dataloader = None
 
@@ -730,8 +558,7 @@ def optimizer_closure():
             assert not torch.equal(weight_before, weight_after)
 
         def configure_optimizers(self):
-            optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            return optimizer
+            return torch.optim.SGD(self.layer.parameters(), lr=0.1)
 
     model = TestModel()
     model.val_dataloader = None
@@ -784,14 +611,13 @@ def optimizer_closure():
             opt.step(closure=optimizer_closure)
 
             weight_after = self.layer.weight.clone()
-            if not self.trainer.train_loop.should_accumulate():
+            if not self.trainer.fit_loop.should_accumulate():
                 assert not torch.equal(weight_before, weight_after)
             else:
                 assert self.layer.weight.grad is not None
 
         def configure_optimizers(self):
-            optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            return optimizer
+            return torch.optim.SGD(self.layer.parameters(), lr=0.1)
 
     model = TestModel()
     model.val_dataloader = None
@@ -804,14 +630,12 @@ def configure_optimizers(self):
         limit_val_batches=2,
         max_epochs=1,
         log_every_n_steps=1,
-        accumulate_grad_batches=2,
     )
 
     trainer.fit(model)
     assert trainer.dev_debugger.count_events('backward_call') == limit_train_batches * 2
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
 @patch("torch.optim.SGD.step")
 def test_step_with_optimizer_closure_and_extra_arguments(step_mock, tmpdir):
     """
@@ -843,8 +667,7 @@ def optimizer_closure():
             opt.zero_grad()
 
         def configure_optimizers(self):
-            optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
-            return optimizer
+            return torch.optim.SGD(self.layer.parameters(), lr=0.1)
 
     model = TestModel()
     model.val_dataloader = None
@@ -857,11 +680,10 @@ def configure_optimizers(self):
         limit_val_batches=2,
         max_epochs=1,
         log_every_n_steps=1,
-        accumulate_grad_batches=2,
     )
 
     trainer.fit(model)
-    expected_calls = [call(closure=ANY) for s in range(2)]
+    expected_calls = [call(closure=ANY) for _ in range(2)]
     step_mock.assert_has_calls(expected_calls)
 
 
@@ -932,7 +754,6 @@ def configure_optimizers(self):
         limit_val_batches=2,
         max_epochs=1,
         log_every_n_steps=1,
-        accumulate_grad_batches=2,
     )
 
     trainer.fit(model)
@@ -1042,7 +863,6 @@ def train_manual_optimization(tmpdir, accelerator, model_cls=TesManualOptimizati
         limit_val_batches=2,
         max_epochs=1,
         log_every_n_steps=1,
-        accumulate_grad_batches=2,
         gpus=2,
         accelerator=accelerator,
         callbacks=[TestManualOptimizationDDPCallack()]
@@ -1274,9 +1094,5 @@ def configure_optimizers(self):
 
     trainer.fit(model)
 
-    expected = {'epoch', 'loss_d', 'loss_g'}
-    logged = set(trainer.logged_metrics.keys())
-    assert expected == logged
-    expected = {'loss_d', 'loss_g'}
-    logged = set(trainer.progress_bar_metrics.keys())
-    assert expected == logged
+    assert set(trainer.logged_metrics) == {'epoch', 'loss_d', 'loss_g'}
+    assert set(trainer.progress_bar_metrics) == {'loss_d', 'loss_g'}
diff --git a/tests/trainer/optimization/test_multiple_optimizers.py b/tests/trainer/optimization/test_multiple_optimizers.py
index aba3b53248a57..495f51ab8d394 100644
--- a/tests/trainer/optimization/test_multiple_optimizers.py
+++ b/tests/trainer/optimization/test_multiple_optimizers.py
@@ -30,10 +30,7 @@ def configure_optimizers(self):
 
 
 def test_unbalanced_logging_with_multiple_optimizers(tmpdir):
-    """
-    This tests ensures reduction works in unbalanced logging settings,
-    even when a Callback also logs.
-    """
+    """This tests ensures reduction works in unbalanced logging settings"""
 
     class TestModel(MultiOptModel):
 
@@ -49,22 +46,12 @@ def training_step(self, batch, batch_idx, optimizer_idx):
     model = TestModel()
     model.training_epoch_end = None
 
-    class TestCallback(pl.Callback):
-
-        def on_train_batch_end(self, trainer, pl_module, output, batch, batch_idx, dl_idx):
-            # when this is called, the EpochResultStore state has not been reset yet because we are still
-            # "INSIDE_BATCH_TRAIN_LOOP" and the LoggerConnector runs its `on_train_batch_end` after the
-            # Callback (see `TrainLoop.on_train_batch_end`). For this reason, opt_idx here is the index
-            # of the last optimizer updated (the second, index 1). This produced a KeyError as reported in #5459
-            pl_module.log("test_train_batch_end", trainer.logger_connector.cached_results._opt_idx)
-
     # Initialize a trainer
     trainer = pl.Trainer(
         default_root_dir=tmpdir,
         max_epochs=1,
         limit_train_batches=5,
         limit_val_batches=5,
-        callbacks=[TestCallback()],
         weights_summary=None,
     )
     trainer.fit(model)
@@ -74,8 +61,6 @@ def on_train_batch_end(self, trainer, pl_module, output, batch, batch_idx, dl_id
         # test loss is properly reduced
         torch.testing.assert_allclose(trainer.callback_metrics[f"loss_{k}_epoch"], torch.tensor(v).mean())
 
-    assert trainer.callback_metrics["test_train_batch_end"] == len(model.optimizers()) - 1
-
 
 def test_multiple_optimizers(tmpdir):
 
diff --git a/tests/trainer/optimization/test_optimizers.py b/tests/trainer/optimization/test_optimizers.py
index a81e0eecf5c61..6165aa132153b 100644
--- a/tests/trainer/optimization/test_optimizers.py
+++ b/tests/trainer/optimization/test_optimizers.py
@@ -18,6 +18,7 @@
 from torch import optim
 
 from pytorch_lightning import Callback, Trainer
+from pytorch_lightning.callbacks import ModelCheckpoint
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.base import EvalModelTemplate
 from tests.helpers.boring_model import BoringModel
@@ -620,3 +621,87 @@ def test_lr_scheduler_epoch_step_frequency(mocked_sched, check_val_every_n_epoch
     )
     trainer.fit(model)
     assert mocked_sched.call_count == expected_steps
+
+
+@pytest.mark.parametrize('every_n_train_steps, epoch_interval', [(None, True), (2, False), (2, True)])
+def test_lr_scheduler_state_updated_before_saving(tmpdir, every_n_train_steps, epoch_interval):
+    batches = 2
+    max_epochs = 1
+    lr, gamma = 1, 10
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        progress_bar_refresh_rate=0,
+        logger=False,
+        max_epochs=max_epochs,
+        limit_train_batches=batches,
+        limit_val_batches=1,
+        callbacks=[ModelCheckpoint(dirpath=tmpdir, every_n_train_steps=every_n_train_steps)]
+    )
+
+    class TestModel(BoringModel):
+
+        def configure_optimizers(self):
+            optimizer = torch.optim.SGD(self.parameters(), lr=lr)
+            lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=gamma)
+            lr_dict = {'scheduler': lr_scheduler}
+            if not epoch_interval:
+                lr_dict['interval'] = 'step'
+            return [optimizer], [lr_dict]
+
+        def on_save_checkpoint(self, checkpoint):
+            lr_dict = checkpoint['lr_schedulers'][0]
+            # 2 batches ran. since the lr_dict interval is `step`, the step count should be 2
+            assert self.trainer.global_step + 1 == batches  # the global step hasn't been increased yet
+            compare_to = max_epochs if epoch_interval else batches
+            assert lr_dict['_step_count'] - 1 == compare_to  # step count starts at 1
+            assert lr_dict['_last_lr'] == [lr * gamma**compare_to]
+            self.on_save_checkpoint_called = True
+
+    model = TestModel()
+    trainer.fit(model)
+    assert model.on_save_checkpoint_called
+
+
+def test_plateau_scheduler_lr_step_interval_updated_after_saving(tmpdir):
+    batches = 4
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        progress_bar_refresh_rate=0,
+        logger=False,
+        max_epochs=1,
+        limit_train_batches=batches,
+        limit_val_batches=1,
+        callbacks=[ModelCheckpoint(dirpath=tmpdir)]
+    )
+
+    class TestModel(BoringModel):
+
+        def training_step(self, batch, batch_idx, optimizer_idx):
+            self.log("foo", batch_idx)
+            return super().training_step(batch, batch_idx)
+
+        def configure_optimizers(self):
+            optimizer_1 = torch.optim.Adam(self.parameters())
+            optimizer_2 = torch.optim.Adam(self.parameters())
+
+            lr_scheduler1 = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_1)
+            lr_dict_1 = {'scheduler': lr_scheduler1, 'interval': 'step', 'monitor': 'foo'}
+
+            lr_scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer_2, step_size=1)
+            lr_dict_2 = {'scheduler': lr_scheduler2, 'interval': 'step'}
+            return [optimizer_1, optimizer_2], [lr_dict_1, lr_dict_2]
+
+        def on_save_checkpoint(self, checkpoint):
+            lr_dict_1 = checkpoint['lr_schedulers'][0]
+            # since plateau schedulers are updated after saving checkpoint, last_epoch should be 3
+            assert lr_dict_1['last_epoch'] == batches - 1  # last epoch starts at 0
+
+            lr_dict_2 = checkpoint['lr_schedulers'][1]
+            assert lr_dict_2['_step_count'] - 1 == batches  # step count starts at 1
+
+            self.on_save_checkpoint_called = True
+
+    model = TestModel()
+    model.training_epoch_end = None
+    trainer.fit(model)
+    assert model.on_save_checkpoint_called
diff --git a/tests/trainer/test_config_validator.py b/tests/trainer/test_config_validator.py
index 9fccd9b36440a..6762d65f41bab 100644
--- a/tests/trainer/test_config_validator.py
+++ b/tests/trainer/test_config_validator.py
@@ -128,17 +128,13 @@ def test_dataloader(self):
         def predict_dataloader(self):
             return self._dataloaders
 
-    dataloaders = [torch.utils.data.DataLoader(RandomDataset(32, 2)), torch.utils.data.DataLoader(RandomDataset(32, 2))]
+    data = [torch.utils.data.DataLoader(RandomDataset(32, 2)), torch.utils.data.DataLoader(RandomDataset(32, 2))]
+    if datamodule:
+        data = TestLightningDataModule(data)
 
     model = TestModel()
-
     trainer = Trainer(default_root_dir=tmpdir)
-
-    if datamodule:
-        datamodule = TestLightningDataModule(dataloaders)
-        results = trainer.predict(model, datamodule=datamodule)
-    else:
-        results = trainer.predict(model, dataloaders=dataloaders)
+    results = trainer.predict(model, data)
 
     assert len(results) == 2
     assert results[0][0].shape == torch.Size([1, 2])
@@ -147,3 +143,17 @@ def predict_dataloader(self):
 
     with pytest.raises(MisconfigurationException, match="Dataloader not found for `Trainer.predict`"):
         trainer.predict(model)
+
+
+def test_trainer_manual_optimization_config(tmpdir):
+    """ Test error message when requesting Trainer features unsupported with manual optimization """
+    model = BoringModel()
+    model.automatic_optimization = False
+
+    trainer = Trainer(gradient_clip_val=1.0)
+    with pytest.raises(MisconfigurationException, match="Automatic gradient clipping is not supported"):
+        trainer.fit(model)
+
+    trainer = Trainer(accumulate_grad_batches=2)
+    with pytest.raises(MisconfigurationException, match="Automatic gradient accumulation is not supported"):
+        trainer.fit(model)
diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index 831fc474336b6..5d4da1be7ddbe 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -98,9 +98,13 @@ def check_replace_distributed_sampler(tmpdir, save_preds_on_dl_idx, accelerator,
 
 
 @RunIf(min_gpus=2, special=True)
-@pytest.mark.parametrize("mode", [1, 2])
-def test_replace_distributed_sampler_custom_dataloader_custom_batch_sampler(tmpdir, mode):
-    check_replace_distributed_sampler(tmpdir, True, "ddp", 2, 2, mode)
+def test_replace_distributed_sampler_custom_dataloader_custom_batch_sampler_0(tmpdir):
+    check_replace_distributed_sampler(tmpdir, True, "ddp", 2, 2, mode=1)
+
+
+@RunIf(min_gpus=2, special=True)
+def test_replace_distributed_sampler_custom_dataloader_custom_batch_sampler_1(tmpdir):
+    check_replace_distributed_sampler(tmpdir, True, "ddp", 2, 2, mode=2)
 
 
 @pytest.mark.parametrize("num_workers", [0, 1])
diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py
index c2e5e1c24ac78..14f47a2558eff 100644
--- a/tests/trainer/test_dataloaders.py
+++ b/tests/trainer/test_dataloaders.py
@@ -813,11 +813,11 @@ def test_missing_worker_init_fn():
 
     seed_everything(0)
     dataloader = DataLoader(dataset, batch_size=2, num_workers=2, shuffle=False)
-    batches0 = torch.cat([batch for batch in dataloader])
+    batches0 = torch.cat(list(dataloader))
 
     seed_everything(0)
     dataloader = DataLoader(dataset, batch_size=2, num_workers=2, shuffle=False)
-    batches1 = torch.cat([batch for batch in dataloader])
+    batches1 = torch.cat(list(dataloader))
 
     is_duplicated = len(torch.unique(batches1, dim=0)) < len(dataset)
     is_deterministic = torch.eq(batches0, batches1).all()
@@ -895,6 +895,25 @@ def test_auto_add_worker_init_fn_distributed(tmpdir, monkeypatch):
     trainer.fit(model, train_dataloader=dataloader)
 
 
+def test_warning_with_small_dataloader_and_logging_interval(tmpdir):
+    """ Test that a warning message is shown if the dataloader length is too short for the chosen logging interval. """
+    model = BoringModel()
+    dataloader = DataLoader(RandomDataset(32, length=10))
+    model.train_dataloader = lambda: dataloader
+
+    with pytest.warns(UserWarning, match=r"The number of training samples \(10\) is smaller than the logging interval"):
+        trainer = Trainer(
+            default_root_dir=tmpdir,
+            max_epochs=1,
+            log_every_n_steps=11,
+        )
+        trainer.fit(model)
+
+    with pytest.warns(UserWarning, match=r"The number of training samples \(1\) is smaller than the logging interval"):
+        trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, log_every_n_steps=2, limit_train_batches=1)
+        trainer.fit(model)
+
+
 def test_warning_with_iterable_dataset_and_len(tmpdir):
     """ Tests that a warning message is shown when an IterableDataset defines `__len__`. """
     model = BoringModel()
diff --git a/tests/trainer/test_progress.py b/tests/trainer/test_progress.py
index 7db052218eb39..8c287e8cb37d1 100644
--- a/tests/trainer/test_progress.py
+++ b/tests/trainer/test_progress.py
@@ -13,7 +13,15 @@
 # limitations under the License.
 import pytest
 
-from pytorch_lightning.trainer.progress import LoopProgress, Progress, Tracker
+from pytorch_lightning.trainer.progress import (
+    BatchProgress,
+    EpochLoopProgress,
+    EpochProgress,
+    FitLoopProgress,
+    OptimizerProgress,
+    Progress,
+    Tracker,
+)
 
 
 def test_progress_geattr_setattr():
@@ -60,51 +68,151 @@ def test_base_progress_from_defaults():
     assert actual == expected
 
 
-def test_loop_progress_increment_epoch():
-    p = LoopProgress()
+def test_epoch_loop_progress_increment_epoch():
+    p = EpochLoopProgress()
     p.increment_epoch_completed()
     p.increment_epoch_completed()
     assert p.epoch.total == Tracker(completed=2)
     assert p.epoch.current == Tracker()
-    assert p.batch.current == Tracker()
-
-
-def test_loop_progress_increment_sequence():
-    """ Test sequences for incrementing batches reads and epochs. """
-    p = LoopProgress(batch=Progress(total=Tracker(started=None)))
-
-    p.batch.increment_ready()
-    assert p.batch.total == Tracker(ready=1, started=None)
-    assert p.batch.current == Tracker(ready=1)
-
-    p.batch.increment_started()
-    assert p.batch.total == Tracker(ready=1, started=None)
-    assert p.batch.current == Tracker(ready=1)
-
-    p.batch.increment_processed()
-    assert p.batch.total == Tracker(ready=1, started=None, processed=1)
-    assert p.batch.current == Tracker(ready=1, processed=1)
-
-    p.batch.increment_completed()
-    assert p.batch.total == Tracker(ready=1, started=None, processed=1, completed=1)
-    assert p.batch.current == Tracker(ready=1, processed=1, completed=1)
-
-    assert p.epoch.total == Tracker()
-    assert p.epoch.current == Tracker()
-    p.increment_epoch_completed()
-    assert p.batch.total == Tracker(ready=1, started=None, processed=1, completed=1)
-    assert p.batch.current == Tracker()
-    assert p.epoch.total == Tracker(completed=1)
-    assert p.epoch.current == Tracker()
-
-    p.batch.increment_ready()
-    assert p.batch.total == Tracker(ready=2, started=None, processed=1, completed=1)
-    assert p.batch.current == Tracker(ready=1)
-    assert p.epoch.total == Tracker(completed=1)
-    assert p.epoch.current == Tracker()
-
-    p.reset_on_epoch()
-    assert p.batch.total == Tracker(ready=2, started=None, processed=1, completed=1)
-    assert p.batch.current == Tracker()
-    assert p.epoch.total == Tracker(completed=1)
-    assert p.epoch.current == Tracker()
+    assert p.epoch.batch.current == Tracker()
+
+
+def test_epoch_loop_progress_increment_sequence():
+    """Test sequences for incrementing batches reads and epochs."""
+    batch = BatchProgress(total=Tracker(started=None))
+    epoch = EpochProgress(batch=batch)
+    loop = EpochLoopProgress(epoch=epoch)
+
+    batch.increment_ready()
+    assert batch.total == Tracker(ready=1, started=None)
+    assert batch.current == Tracker(ready=1)
+
+    batch.increment_started()
+    assert batch.total == Tracker(ready=1, started=None)
+    assert batch.current == Tracker(ready=1)
+
+    batch.increment_processed()
+    assert batch.total == Tracker(ready=1, started=None, processed=1)
+    assert batch.current == Tracker(ready=1, processed=1)
+
+    batch.increment_completed()
+    assert batch.total == Tracker(ready=1, started=None, processed=1, completed=1)
+    assert batch.current == Tracker(ready=1, processed=1, completed=1)
+
+    assert epoch.total == Tracker()
+    assert epoch.current == Tracker()
+    loop.increment_epoch_completed()
+    assert batch.total == Tracker(ready=1, started=None, processed=1, completed=1)
+    assert batch.current == Tracker()
+    assert epoch.total == Tracker(completed=1)
+    assert epoch.current == Tracker()
+
+    batch.increment_ready()
+    assert batch.total == Tracker(ready=2, started=None, processed=1, completed=1)
+    assert batch.current == Tracker(ready=1)
+    assert epoch.total == Tracker(completed=1)
+    assert epoch.current == Tracker()
+
+    loop.reset_on_epoch()
+    assert batch.total == Tracker(ready=2, started=None, processed=1, completed=1)
+    assert batch.current == Tracker()
+    assert epoch.total == Tracker(completed=1)
+    assert epoch.current == Tracker()
+
+
+def test_optimizer_progress_default_factory():
+    """
+    Ensure that the defaults are created appropiately. If `default_factory` was not used, the default would
+    be shared between instances.
+    """
+    p1 = OptimizerProgress()
+    p2 = OptimizerProgress()
+    p1.step.increment_completed()
+    assert p1.step.total.completed == p1.step.current.completed
+    assert p1.step.total.completed == 1
+    assert p2.step.total.completed == 0
+
+
+def test_fit_loop_progress_serialization():
+    fit_loop = FitLoopProgress()
+    state_dict = fit_loop.state_dict()
+    # yapf: disable
+    assert state_dict == {
+        'epoch': {
+            # number of epochs across `fit` calls
+            'total': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+            # number of epochs this `fit` call
+            'current': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+            'batch': {
+                # number of batches across `fit` calls
+                'total': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+                # number of batches this epoch
+                'current': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+            },
+            # `fit` optimization progress
+            'optim': {
+                # optimizers progress
+                'optimizer': {
+                    'step': {
+                        # `optimizer.step` calls across `fit` calls
+                        'total': {'completed': 0, 'processed': None, 'ready': 0, 'started': 0},
+                        # `optimizer.step` calls this epoch
+                        'current': {'completed': 0, 'processed': None, 'ready': 0, 'started': 0},
+                    },
+                    'zero_grad': {
+                        # `optimizer.zero_grad` calls across `fit` calls
+                        'total': {'completed': 0, 'processed': None, 'ready': 0, 'started': 0},
+                        # `optimizer.zero_grad` calls this epoch
+                        'current': {'completed': 0, 'processed': None, 'ready': 0, 'started': 0},
+                    },
+                },
+                'scheduler': {
+                    # `scheduler.step` calls across `fit` calls
+                    'total': {'completed': 0, 'processed': None, 'ready': 0, 'started': None},
+                    # `scheduler.step` calls this epoch
+                    'current': {'completed': 0, 'processed': None, 'ready': 0, 'started': None},
+                },
+            },
+            # `fit` validation progress
+            'val': {
+                'epoch': {
+                    # number of `validation` calls across `fit` calls
+                    'total': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+                    # number of `validation` calls this `fit` call
+                    'current': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+                    'batch': {
+                        # number of batches across `fit` `validation` calls
+                        'total': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+                        # number of batches this `fit` `validation` call
+                        'current': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+                    },
+                }
+            },
+        }
+    }
+    # yapf: enable
+    new_loop = FitLoopProgress.from_state_dict(state_dict)
+    assert fit_loop == new_loop
+
+
+def test_epoch_loop_progress_serialization():
+    loop = EpochLoopProgress()
+    state_dict = loop.state_dict()
+    # yapf: disable
+    assert state_dict == {
+        'epoch': {
+            # number of times `validate` has been called
+            'total': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+            # either 0 or 1 as `max_epochs` does not apply to the `validate` loop
+            'current': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+            'batch': {
+                # number of batches across `validate` calls
+                'total': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+                # number of batches this `validate` call
+                'current': {'completed': 0, 'processed': 0, 'ready': 0, 'started': 0},
+            },
+        }
+    }
+    # yapf: enable
+    new_loop = EpochLoopProgress.from_state_dict(state_dict)
+    assert loop == new_loop
diff --git a/tests/trainer/test_states.py b/tests/trainer/test_states.py
index c9fb50e8501dd..2614eda6d4634 100644
--- a/tests/trainer/test_states.py
+++ b/tests/trainer/test_states.py
@@ -37,25 +37,28 @@ class TestModel(BoringModel):
 
         def __init__(self, expected_fn, expected_stage):
             super().__init__()
-            self.expected_state = expected_fn
+            self.expected_fn = expected_fn
             self.expected_stage = expected_stage
             self.lr = 0.1
 
-        def on_batch_start(self, *_):
-            assert self.trainer.state == TrainerState(
-                status=TrainerStatus.RUNNING, fn=self.expected_fn, stage=self.expected_stage
-            )
-
         def on_train_batch_start(self, *_):
+            assert self.trainer.state.status == TrainerStatus.RUNNING
+            assert self.trainer.state.fn == self.expected_fn
             assert self.trainer.training
 
         def on_sanity_check_start(self, *_):
+            assert self.trainer.state.status == TrainerStatus.RUNNING
+            assert self.trainer.state.fn == self.expected_fn
             assert self.trainer.sanity_checking
 
         def on_validation_batch_start(self, *_):
+            assert self.trainer.state.status == TrainerStatus.RUNNING
+            assert self.trainer.state.fn == self.expected_fn
             assert self.trainer.validating or self.trainer.sanity_checking
 
         def on_test_batch_start(self, *_):
+            assert self.trainer.state.status == TrainerStatus.RUNNING
+            assert self.trainer.state.fn == self.expected_fn
             assert self.trainer.testing
 
     model = TestModel(TrainerFn.TUNING, RunningStage.TRAINING)
diff --git a/tests/trainer/test_supporters.py b/tests/trainer/test_supporters.py
index 169c8cb80b04d..2e4d6bcc5b833 100644
--- a/tests/trainer/test_supporters.py
+++ b/tests/trainer/test_supporters.py
@@ -91,7 +91,7 @@ def __iter__(self):
 
     dataset = IterDataset()
     iterator = prefetch_iterator(dataset)
-    assert [item for item in iterator] == [(1, False), (2, False), (3, True)]
+    assert list(iterator) == [(1, False), (2, False), (3, True)]
 
     class EmptyIterDataset(IterableDataset):
 
@@ -100,7 +100,7 @@ def __iter__(self):
 
     dataset = EmptyIterDataset()
     iterator = prefetch_iterator(dataset)
-    assert [item for item in iterator] == []
+    assert list(iterator) == []
 
 
 @pytest.mark.parametrize(
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index a8567db70d0a6..0a4cfc42ffd3a 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -39,7 +39,7 @@
 from pytorch_lightning.trainer.states import TrainerFn
 from pytorch_lightning.utilities import DeviceType, DistributedType
 from pytorch_lightning.utilities.cloud_io import load as pl_load
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.exceptions import DeadlockDetectedException, MisconfigurationException
 from pytorch_lightning.utilities.seed import seed_everything
 from tests.base import EvalModelTemplate
 from tests.helpers import BoringModel, RandomDataset
@@ -232,29 +232,61 @@ def test_trainer_accumulate_grad_batches_zero_grad(tmpdir, accumulate_grad_batch
 def test_gradient_accumulation_scheduling_last_batch(tmpdir, accumulate_grad_batches, limit_train_batches):
     """ Verify optimizer.step() applied to last batch while grad accumulation """
 
-    class CurrentModel(BoringModel):
+    class TestModel(BoringModel):
 
-        def on_batch_start(self, *_):
-            self.on_train_batch_start_state_dict = self.state_dict()
+        def state_dict(self, *args, **kwargs):
+            return deepcopy(super().state_dict(*args, **kwargs))
 
-        def on_batch_end(self, outputs, batch, batch_idx, *_):
-            self.on_train_batch_start_end_dict = self.state_dict()
-            for key in self.on_train_batch_start_end_dict.keys():
-                equal = torch.equal(self.on_train_batch_start_state_dict[key], self.on_train_batch_start_end_dict[key])
-                if (batch_idx + 1) == self.trainer.num_training_batches:
-                    assert equal
-                else:
-                    assert not equal
+        def check(self, d1, d2, equal=True):
+            keys = d1.keys() | d2.keys()
+            values = [torch.equal(d1[k], d2[k]) for k in keys]
+            return all(values) if equal else not any(values)
 
-    model = CurrentModel()
+        def backward(self, *args, **kwargs) -> None:
+            pre_bwd_state_dict = self.state_dict()
+            assert self.check(self.start_state_dict, pre_bwd_state_dict)
+
+            out = super().backward(*args, **kwargs)
+
+            # state dict is equal, just the gradients changed
+            assert self.check(pre_bwd_state_dict, self.state_dict())
+
+            return out
+
+        def optimizer_step(self, *args, **kwargs):
+            pre_opt_step_state_dict = self.state_dict()
+            assert self.check(self.start_state_dict, pre_opt_step_state_dict)
+
+            # this calls `backward` and `on_after_backward` inside the closure
+            out = super().optimizer_step(*args, **kwargs)
 
+            # the state dict changed
+            assert self.check(pre_opt_step_state_dict, self.state_dict(), equal=False)
+
+            self.opt_step_called = True
+            return out
+
+        def on_train_batch_start(self, *_):
+            self.start_state_dict = self.state_dict()
+            self.opt_step_called = False
+
+        def on_train_batch_end(self, outputs, batch, batch_idx, *_):
+            end_state_dict = self.state_dict()
+            is_last_batch = (batch_idx + 1) == self.trainer.num_training_batches
+
+            if is_last_batch or self.opt_step_called:
+                assert self.check(self.start_state_dict, end_state_dict, equal=False)
+            else:
+                assert self.check(self.start_state_dict, end_state_dict)
+
+    model = TestModel()
     trainer = Trainer(
         accumulate_grad_batches=accumulate_grad_batches,
         max_epochs=2,
         limit_train_batches=limit_train_batches,
         limit_val_batches=0,
-        limit_test_batches=0,
         default_root_dir=tmpdir,
+        progress_bar_refresh_rate=0,
     )
 
     trainer.fit(model)
@@ -339,9 +371,9 @@ def mock_save_function(filepath, *args):
 
     # emulate callback's calls during the training
     for i, loss in enumerate(losses):
-        trainer.train_loop.current_epoch = i
-        trainer.train_loop.global_step = i
-        trainer.logger_connector.callback_metrics = {"checkpoint_on": torch.tensor(loss)}
+        trainer.fit_loop.current_epoch = i
+        trainer.fit_loop.global_step = i
+        trainer.logger_connector.callback_metrics.update({"checkpoint_on": loss})
         checkpoint_callback.on_validation_end(trainer, trainer.lightning_module)
 
     file_lists = set(os.listdir(tmpdir))
@@ -391,7 +423,7 @@ def test_model_checkpoint_only_weights(tmpdir):
 
     # assert restoring train state fails
     with pytest.raises(KeyError, match="checkpoint contains only the model"):
-        trainer.checkpoint_connector.restore_training_state(checkpoint)
+        trainer.checkpoint_connector.restore(new_weights_path)
 
 
 def test_model_freeze_unfreeze():
@@ -894,21 +926,21 @@ def test_gradient_clipping(tmpdir):
         default_root_dir=tmpdir,
     )
 
-    trainer.train_loop.old_training_step_and_backward = trainer.train_loop.training_step_and_backward
+    old_training_step_and_backward = trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward
 
     def training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens):
         """
         wrap the forward step in a closure so second order methods work
         """
         # test that gradient is clipped correctly
-        ret_val = trainer.train_loop.old_training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
+        ret_val = old_training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
         parameters = model.parameters()
         grad_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), 2) for p in parameters]), 2)
         assert (grad_norm - 1.0).abs() < 0.01, "Gradient norm != 1.0: {grad_norm}".format(grad_norm=grad_norm)
 
         return ret_val
 
-    trainer.train_loop.training_step_and_backward = training_step_and_backward
+    trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward = training_step_and_backward
     # for the test
     model.prev_called_batch_idx = 0
 
@@ -932,14 +964,14 @@ def test_gradient_clipping_by_value(tmpdir):
         default_root_dir=tmpdir
     )
 
-    trainer.train_loop.old_training_step_and_backward = trainer.train_loop.training_step_and_backward
+    old_training_step_and_backward = trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward
 
     def training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens):
         """
         wrap the forward step in a closure so second order methods work
         """
         # test that gradient is clipped correctly
-        ret_val = trainer.train_loop.old_training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
+        ret_val = old_training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
         parameters = model.parameters()
         grad_max_list = [torch.max(p.grad.detach().abs()) for p in parameters]
         grad_max = torch.max(torch.stack(grad_max_list))
@@ -948,7 +980,7 @@ def training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hidde
 
         return ret_val
 
-    trainer.train_loop.training_step_and_backward = training_step_and_backward
+    trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward = training_step_and_backward
     # for the test
     model.prev_called_batch_idx = 0
 
@@ -973,21 +1005,21 @@ def test_gradient_clipping_fp16(tmpdir):
         default_root_dir=tmpdir,
     )
 
-    trainer.train_loop.old_training_step_and_backward = trainer.train_loop.training_step_and_backward
+    old_training_step_and_backward = trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward
 
     def training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens):
         """
         wrap the forward step in a closure so second order methods work
         """
         # test that gradient is clipped correctly
-        ret_val = trainer.train_loop.old_training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
+        ret_val = old_training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
         parameters = model.parameters()
         grad_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), 2) for p in parameters]), 2)
         assert (grad_norm - 1.0).abs() < 0.01, "Gradient norm != 1.0: {grad_norm}".format(grad_norm=grad_norm)
 
         return ret_val
 
-    trainer.train_loop.training_step_and_backward = training_step_and_backward
+    trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward = training_step_and_backward
     model.prev_called_batch_idx = 0
 
     trainer.fit(model)
@@ -1012,14 +1044,14 @@ def test_gradient_clipping_by_value_fp16(tmpdir):
         default_root_dir=tmpdir,
     )
 
-    trainer.train_loop.old_training_step_and_backward = trainer.train_loop.training_step_and_backward
+    old_training_step_and_backward = trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward
 
     def training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens):
         """
         wrap the forward step in a closure so second order methods work
         """
         # test that gradient is clipped correctly
-        ret_val = trainer.train_loop.old_training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
+        ret_val = old_training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
         parameters = model.parameters()
         grad_max_list = [torch.max(p.grad.detach().abs()) for p in parameters]
         grad_max = torch.max(torch.stack(grad_max_list))
@@ -1028,7 +1060,7 @@ def training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hidde
 
         return ret_val
 
-    trainer.train_loop.training_step_and_backward = training_step_and_backward
+    trainer.fit_loop.epoch_loop.batch_loop.training_step_and_backward = training_step_and_backward
     model.prev_called_batch_idx = 0
 
     trainer.fit(model)
@@ -1069,7 +1101,9 @@ def test_num_sanity_val_steps(tmpdir, limit_val_batches):
     assert trainer.num_sanity_val_steps == num_sanity_val_steps
 
     with patch.object(
-        trainer.evaluation_loop, "evaluation_step", wraps=trainer.evaluation_loop.evaluation_step
+        trainer.fit_loop.epoch_loop.val_loop.epoch_loop,
+        "evaluation_step",
+        wraps=trainer.fit_loop.epoch_loop.val_loop.epoch_loop.evaluation_step
     ) as mocked:
         val_dataloaders = model.val_dataloader__multiple_mixed_length()
         trainer.fit(model, val_dataloaders=val_dataloaders)
@@ -1097,7 +1131,9 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
     assert trainer.num_sanity_val_steps == float("inf")
 
     with patch.object(
-        trainer.evaluation_loop, "evaluation_step", wraps=trainer.evaluation_loop.evaluation_step
+        trainer.fit_loop.epoch_loop.val_loop.epoch_loop,
+        "evaluation_step",
+        wraps=trainer.fit_loop.epoch_loop.val_loop.epoch_loop.evaluation_step
     ) as mocked:
         val_dataloaders = model.val_dataloader__multiple()
         trainer.fit(model, val_dataloaders=val_dataloaders)
@@ -1733,7 +1769,7 @@ def compare_optimizers():
     trainer.fit(model)
     compare_optimizers()
 
-    trainer.train_loop.max_epochs = 2  # simulate multiple fit calls
+    trainer.fit_loop.max_epochs = 2  # simulate multiple fit calls
     trainer.fit(model)
     compare_optimizers()
 
@@ -1901,3 +1937,35 @@ def test_exception_when_lightning_module_is_not_set_on_trainer():
         trainer.test()
     with pytest.raises(MisconfigurationException, match=r"`model` must be provided.*predict"):
         trainer.predict()
+
+
+@RunIf(min_gpus=2, special=True)
+def test_ddp_terminate_when_deadlock_is_detected(tmpdir):
+    """ Test that DDP kills the remaining processes when only one rank is throwing an exception. """
+
+    class CustomException(Exception):
+        pass
+
+    class TestModel(BoringModel):
+
+        def training_step(self, batch, batch_idx):
+            if batch_idx == 1 and self.trainer.is_global_zero:
+                # rank 0: raises an exception
+                # rank 1: continues training but will hang on the next barrier in the training loop
+                raise CustomException
+            return super().training_step(batch, batch_idx)
+
+    model = TestModel()
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+        limit_train_batches=5,
+        num_sanity_val_steps=0,
+        gpus=2,
+        accelerator="ddp",
+    )
+
+    # simulate random failure in training_step on rank 0
+    with pytest.raises(DeadlockDetectedException, match="CustomException"):
+        trainer.fit(model)
diff --git a/tests/tuner/test_auto_gpu_select.py b/tests/tuner/test_auto_gpu_select.py
index 32ec0282c8ce4..2d13855f93239 100644
--- a/tests/tuner/test_auto_gpu_select.py
+++ b/tests/tuner/test_auto_gpu_select.py
@@ -51,7 +51,7 @@ def test_trainer_with_gpus_options_combination_at_available_gpus_env(auto_select
     ["nb", "expected_gpu_idxs", "expected_error"],
     [
         (0, [], MisconfigurationException),
-        (-1, [i for i in range(torch.cuda.device_count())], None),
+        (-1, list(range(torch.cuda.device_count())), None),
         (1, [0], None),
     ],
 )
diff --git a/tests/utilities/distributed.py b/tests/utilities/distributed.py
index 80c0246ce6c57..0a1a6dbc5badd 100644
--- a/tests/utilities/distributed.py
+++ b/tests/utilities/distributed.py
@@ -20,12 +20,13 @@
 import pytorch_lightning
 
 
-def call_training_script(module_file, cli_args, method, tmpdir, timeout=60):
+def call_training_script(module_file, cli_args, method, tmpdir, timeout=60, as_module=False):
     file = Path(module_file.__file__).absolute()
     cli_args = cli_args.split(' ') if cli_args else []
     cli_args += ['--tmpdir', str(tmpdir)]
     cli_args += ['--trainer_method', method]
-    command = [sys.executable, str(file)] + cli_args
+    file_args = ["-m", module_file.__spec__.name] if as_module else [str(file)]
+    command = [sys.executable] + file_args + cli_args
 
     # need to set the PYTHONPATH in case pytorch_lightning was not installed into the environment
     env = os.environ.copy()
diff --git a/tests/utilities/test_apply_func.py b/tests/utilities/test_apply_func.py
index a7eea3a749f26..5b8d8c3596e7c 100644
--- a/tests/utilities/test_apply_func.py
+++ b/tests/utilities/test_apply_func.py
@@ -11,18 +11,32 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import dataclasses
 import numbers
-from collections import namedtuple
+from collections import namedtuple, OrderedDict
+from typing import List
 
 import numpy as np
+import pytest
 import torch
 
-from pytorch_lightning.utilities.apply_func import apply_to_collection
+from pytorch_lightning.utilities.apply_func import apply_to_collection, apply_to_collections
 
 
 def test_recursive_application_to_collection():
     ntc = namedtuple('Foo', ['bar'])
 
+    @dataclasses.dataclass
+    class Feature:
+        input_ids: torch.Tensor
+        segment_ids: np.ndarray
+
+    @dataclasses.dataclass
+    class ModelExample:
+        example_ids: List[str]
+        feature: Feature
+        label: torch.Tensor
+
     to_reduce = {
         'a': torch.tensor([1.]),  # Tensor
         'b': [torch.tensor([2.])],  # list
@@ -30,7 +44,13 @@ def test_recursive_application_to_collection():
         'd': ntc(bar=5.),  # named tuple
         'e': np.array([10.]),  # numpy array
         'f': 'this_is_a_dummy_str',  # string
-        'g': 12.  # number
+        'g': 12.,  # number
+        'h': Feature(input_ids=torch.tensor([1., 2., 3.]), segment_ids=np.array([4., 5., 6.])),  # dataclass
+        'i': ModelExample(
+            example_ids=['i-1', 'i-2', 'i-3'],
+            feature=Feature(input_ids=torch.tensor([1., 2., 3.]), segment_ids=np.array([4., 5., 6.])),
+            label=torch.tensor([7., 8., 9.])
+        )  # nested dataclass
     }
 
     expected_result = {
@@ -40,14 +60,20 @@ def test_recursive_application_to_collection():
         'd': ntc(bar=torch.tensor([10.])),
         'e': np.array([20.]),
         'f': 'this_is_a_dummy_str',
-        'g': 24.
+        'g': 24.,
+        'h': Feature(input_ids=torch.tensor([2., 4., 6.]), segment_ids=np.array([8., 10., 12.])),
+        'i': ModelExample(
+            example_ids=['i-1', 'i-2', 'i-3'],
+            feature=Feature(input_ids=torch.tensor([2., 4., 6.]), segment_ids=np.array([8., 10., 12.])),
+            label=torch.tensor([14., 16., 18.])
+        )
     }
 
     reduced = apply_to_collection(to_reduce, (torch.Tensor, numbers.Number, np.ndarray), lambda x: x * 2)
 
     assert isinstance(reduced, dict), ' Type Consistency of dict not preserved'
-    assert all([x in reduced for x in to_reduce.keys()]), 'Not all entries of the dict were preserved'
-    assert all([isinstance(reduced[k], type(expected_result[k])) for k in to_reduce.keys()]), \
+    assert all([x in reduced for x in to_reduce]), 'Not all entries of the dict were preserved'
+    assert all([isinstance(reduced[k], type(expected_result[k])) for k in to_reduce]), \
         'At least one type was not correctly preserved'
 
     assert isinstance(reduced['a'], torch.Tensor), 'Reduction Result of a Tensor should be a Tensor'
@@ -74,5 +100,112 @@ def test_recursive_application_to_collection():
     assert isinstance(reduced['f'], str), 'A string should not be reduced'
     assert reduced['f'] == expected_result['f'], 'String not preserved during reduction'
 
-    assert isinstance(reduced['g'], numbers.Number), 'Reduction of a number should result in a tensor'
+    assert isinstance(reduced['g'], numbers.Number), 'Reduction of a number should result in a number'
     assert reduced['g'] == expected_result['g'], 'Reduction of a number did not yield the desired result'
+
+    assert dataclasses.is_dataclass(reduced['h']) and not isinstance(reduced['h'], type), \
+        'Reduction of a dataclass should result in a dataclass'
+    assert torch.allclose(reduced['h'].input_ids, expected_result['h'].input_ids), \
+        'Reduction of a dataclass did not yield the desired result'
+    assert np.allclose(reduced['h'].segment_ids, expected_result['h'].segment_ids), \
+        'Reduction of a dataclass did not yield the desired result'
+
+    assert dataclasses.is_dataclass(reduced['i']) and not isinstance(reduced['i'], type), \
+        'Reduction of a dataclass should result in a dataclass'
+    assert dataclasses.is_dataclass(reduced['i'].feature) and not isinstance(reduced['i'].feature, type), \
+        'Reduction of a nested dataclass should result in a nested dataclass'
+    assert reduced['i'].example_ids == expected_result['i'].example_ids, \
+        'Reduction of a nested dataclass did not yield the desired result'
+    assert torch.allclose(reduced['i'].label, expected_result['i'].label), \
+        'Reduction of a nested dataclass did not yield the desired result'
+    assert torch.allclose(reduced['i'].feature.input_ids, expected_result['i'].feature.input_ids), \
+        'Reduction of a nested dataclass did not yield the desired result'
+    assert np.allclose(reduced['i'].feature.segment_ids, expected_result['i'].feature.segment_ids), \
+        'Reduction of a nested dataclass did not yield the desired result'
+
+    # mapping support
+    reduced = apply_to_collection({'a': 1, 'b': 2}, int, lambda x: str(x))
+    assert reduced == {'a': '1', 'b': '2'}
+    reduced = apply_to_collection(OrderedDict([('b', 2), ('a', 1)]), int, lambda x: str(x))
+    assert reduced == OrderedDict([('b', '2'), ('a', '1')])
+
+    # custom mappings
+    class _CustomCollection(dict):
+
+        def __init__(self, initial_dict):
+            super().__init__(initial_dict)
+
+    to_reduce = _CustomCollection({'a': 1, 'b': 2, 'c': 3})
+    reduced = apply_to_collection(to_reduce, int, lambda x: str(x))
+    assert reduced == _CustomCollection({'a': '1', 'b': '2', 'c': '3'})
+
+
+def test_apply_to_collection_include_none():
+    to_reduce = [1, 2, 3.4, 5.6, 7]
+
+    def fn(x):
+        if isinstance(x, float):
+            return x
+
+    reduced = apply_to_collection(to_reduce, (int, float), fn)
+    assert reduced == [None, None, 3.4, 5.6, None]
+
+    reduced = apply_to_collection(to_reduce, (int, float), fn, include_none=False)
+    assert reduced == [3.4, 5.6]
+
+
+def test_apply_to_collections():
+    to_reduce_1 = {'a': {'b': [1, 2]}, 'c': 5}
+    to_reduce_2 = {'a': {'b': [3, 4]}, 'c': 6}
+
+    def fn(a, b):
+        return a + b
+
+    # basic test
+    reduced = apply_to_collections(to_reduce_1, to_reduce_2, int, fn)
+    assert reduced == {'a': {'b': [4, 6]}, 'c': 11}
+
+    with pytest.raises(KeyError):
+        # strict mode - if a key does not exist in both we fail
+        apply_to_collections({**to_reduce_2, 'd': 'foo'}, to_reduce_1, float, fn)
+
+    # multiple dtypes
+    reduced = apply_to_collections(to_reduce_1, to_reduce_2, (list, int), fn)
+    assert reduced == {'a': {'b': [1, 2, 3, 4]}, 'c': 11}
+
+    # wrong dtype
+    reduced = apply_to_collections(to_reduce_1, to_reduce_2, (list, int), fn, wrong_dtype=int)
+    assert reduced == {'a': {'b': [1, 2, 3, 4]}, 'c': 5}
+
+    # list takes precedence because it is the type of data1
+    reduced = apply_to_collections([1, 2, 3], [4], (int, list), fn)
+    assert reduced == [1, 2, 3, 4]
+
+    # different sizes
+    with pytest.raises(AssertionError, match='Sequence collections have different sizes'):
+        apply_to_collections([[1, 2], [3]], [4], int, fn)
+
+    def fn(a, b):
+        return a.keys() | b.keys()
+
+    # base case
+    reduced = apply_to_collections(to_reduce_1, to_reduce_2, dict, fn)
+    assert reduced == {'a', 'c'}
+
+    # type conversion
+    to_reduce = [(1, 2), (3, 4)]
+    reduced = apply_to_collections(to_reduce, to_reduce, int, lambda *x: sum(x))
+    assert reduced == [(2, 4), (6, 8)]
+
+    # named tuple
+    foo = namedtuple('Foo', ['bar'])
+    to_reduce = [foo(1), foo(2), foo(3)]
+    reduced = apply_to_collections(to_reduce, to_reduce, int, lambda *x: sum(x))
+    assert reduced == [foo(2), foo(4), foo(6)]
+
+    # passing none
+    reduced1 = apply_to_collections([1, 2, 3], None, int, lambda x: x * x)
+    reduced2 = apply_to_collections(None, [1, 2, 3], int, lambda x: x * x)
+    assert reduced1 == reduced2 == [1, 4, 9]
+    reduced = apply_to_collections(None, None, int, lambda x: x * x)
+    assert reduced is None
diff --git a/tests/utilities/test_cli.py b/tests/utilities/test_cli.py
index 5780a83e75db8..67866219a76bf 100644
--- a/tests/utilities/test_cli.py
+++ b/tests/utilities/test_cli.py
@@ -20,18 +20,26 @@
 from argparse import Namespace
 from contextlib import redirect_stdout
 from io import StringIO
+from typing import List, Optional
 from unittest import mock
 
 import pytest
+import torch
 import yaml
+from packaging import version
 
 from pytorch_lightning import LightningDataModule, LightningModule, Trainer
 from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
 from pytorch_lightning.plugins.environments import SLURMEnvironment
 from pytorch_lightning.utilities import _TPU_AVAILABLE
-from pytorch_lightning.utilities.cli import LightningArgumentParser, LightningCLI, SaveConfigCallback
+from pytorch_lightning.utilities.cli import instantiate_class, LightningArgumentParser, LightningCLI, SaveConfigCallback
+from pytorch_lightning.utilities.imports import _TORCHVISION_AVAILABLE
 from tests.helpers import BoringDataModule, BoringModel
 
+torchvision_version = version.parse('0')
+if _TORCHVISION_AVAILABLE:
+    torchvision_version = version.parse(__import__('torchvision').__version__)
+
 
 @mock.patch('argparse.ArgumentParser.parse_args')
 def test_default_args(mock_argparse, tmpdir):
@@ -281,6 +289,27 @@ def on_fit_start(self):
     assert cli.trainer.ran_asserts
 
 
+def test_lightning_cli_configurable_callbacks(tmpdir):
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.add_lightning_class_args(LearningRateMonitor, 'learning_rate_monitor')
+
+    cli_args = [
+        f'--trainer.default_root_dir={tmpdir}',
+        '--trainer.max_epochs=1',
+        '--learning_rate_monitor.logging_interval=epoch',
+    ]
+
+    with mock.patch('sys.argv', ['any.py'] + cli_args):
+        cli = MyLightningCLI(BoringModel)
+
+    callback = [c for c in cli.trainer.callbacks if isinstance(c, LearningRateMonitor)]
+    assert len(callback) == 1
+    assert callback[0].logging_interval == 'epoch'
+
+
 def test_lightning_cli_args_cluster_environments(tmpdir):
     plugins = [dict(class_path='pytorch_lightning.plugins.environments.SLURMEnvironment')]
 
@@ -320,6 +349,31 @@ def test_lightning_cli_args(tmpdir):
     assert config['trainer'] == cli.config['trainer']
 
 
+def test_lightning_cli_save_config_cases(tmpdir):
+
+    config_path = tmpdir / 'config.yaml'
+    cli_args = [
+        f'--trainer.default_root_dir={tmpdir}',
+        '--trainer.logger=False',
+        '--trainer.fast_dev_run=1',
+    ]
+
+    # With fast_dev_run!=False config should not be saved
+    with mock.patch('sys.argv', ['any.py'] + cli_args):
+        LightningCLI(BoringModel)
+    assert not os.path.isfile(config_path)
+
+    # With fast_dev_run==False config should be saved
+    cli_args[-1] = '--trainer.max_epochs=1'
+    with mock.patch('sys.argv', ['any.py'] + cli_args):
+        LightningCLI(BoringModel)
+    assert os.path.isfile(config_path)
+
+    # If run again on same directory exception should be raised since config file already exists
+    with mock.patch('sys.argv', ['any.py'] + cli_args), pytest.raises(RuntimeError):
+        LightningCLI(BoringModel)
+
+
 def test_lightning_cli_config_and_subclass_mode(tmpdir):
 
     config = dict(
@@ -438,8 +492,245 @@ def __init__(
     with mock.patch('sys.argv', ['any.py'] + cli_args):
         cli = LightningCLI(MainModule)
 
-    assert cli.config_init['model']['main_param'] == 2
-    assert cli.model.submodule1 == cli.config_init['model']['submodule1']
-    assert cli.model.submodule2 == cli.config_init['model']['submodule2']
-    assert isinstance(cli.config_init['model']['submodule1'], BoringModel)
-    assert isinstance(cli.config_init['model']['submodule2'], BoringModel)
+    assert cli.config['model']['main_param'] == 2
+    assert isinstance(cli.model.submodule1, BoringModel)
+    assert isinstance(cli.model.submodule2, BoringModel)
+
+
+@pytest.mark.skipif(torchvision_version < version.parse('0.8.0'), reason='torchvision>=0.8.0 is required')
+def test_lightning_cli_torch_modules(tmpdir):
+
+    class TestModule(BoringModel):
+
+        def __init__(
+            self,
+            activation: torch.nn.Module = None,
+            transform: Optional[List[torch.nn.Module]] = None,
+        ):
+            super().__init__()
+            self.activation = activation
+            self.transform = transform
+
+    config = """model:
+        activation:
+          class_path: torch.nn.LeakyReLU
+          init_args:
+            negative_slope: 0.2
+        transform:
+          - class_path: torchvision.transforms.Resize
+            init_args:
+              size: 64
+          - class_path: torchvision.transforms.CenterCrop
+            init_args:
+              size: 64
+    """
+    config_path = tmpdir / 'config.yaml'
+    with open(config_path, 'w') as f:
+        f.write(config)
+
+    cli_args = [
+        f'--trainer.default_root_dir={tmpdir}',
+        '--trainer.max_epochs=1',
+        f'--config={str(config_path)}',
+    ]
+
+    with mock.patch('sys.argv', ['any.py'] + cli_args):
+        cli = LightningCLI(TestModule)
+
+    assert isinstance(cli.model.activation, torch.nn.LeakyReLU)
+    assert cli.model.activation.negative_slope == 0.2
+    assert len(cli.model.transform) == 2
+    assert all(isinstance(v, torch.nn.Module) for v in cli.model.transform)
+
+
+class BoringModelRequiredClasses(BoringModel):
+
+    def __init__(
+        self,
+        num_classes: int,
+        batch_size: int = 8,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.batch_size = batch_size
+
+
+class BoringDataModuleBatchSizeAndClasses(BoringDataModule):
+
+    def __init__(
+        self,
+        batch_size: int = 8,
+    ):
+        super().__init__()
+        self.batch_size = batch_size
+        self.num_classes = 5  # only available after instantiation
+
+
+def test_lightning_cli_link_arguments(tmpdir):
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.link_arguments('data.batch_size', 'model.batch_size')
+            parser.link_arguments('data.num_classes', 'model.num_classes', apply_on='instantiate')
+
+    cli_args = [
+        f'--trainer.default_root_dir={tmpdir}',
+        '--trainer.max_epochs=1',
+        '--data.batch_size=12',
+    ]
+
+    with mock.patch('sys.argv', ['any.py'] + cli_args):
+        cli = MyLightningCLI(BoringModelRequiredClasses, BoringDataModuleBatchSizeAndClasses)
+
+    assert cli.model.batch_size == 12
+    assert cli.model.num_classes == 5
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.link_arguments('data.batch_size', 'model.init_args.batch_size')
+            parser.link_arguments('data.num_classes', 'model.init_args.num_classes', apply_on='instantiate')
+
+    cli_args[-1] = '--model=tests.utilities.test_cli.BoringModelRequiredClasses'
+
+    with mock.patch('sys.argv', ['any.py'] + cli_args):
+        cli = MyLightningCLI(
+            BoringModelRequiredClasses,
+            BoringDataModuleBatchSizeAndClasses,
+            subclass_mode_model=True,
+        )
+
+    assert cli.model.batch_size == 8
+    assert cli.model.num_classes == 5
+
+
+def test_cli_config_overwrite(tmpdir):
+    trainer_defaults = {'default_root_dir': str(tmpdir), 'logger': False, 'max_steps': 1, 'max_epochs': 1}
+
+    with mock.patch('sys.argv', ['any.py']):
+        LightningCLI(BoringModel, trainer_defaults=trainer_defaults)
+    with mock.patch('sys.argv', ['any.py']), pytest.raises(RuntimeError, match='Aborting to avoid overwriting'):
+        LightningCLI(BoringModel, trainer_defaults=trainer_defaults)
+    with mock.patch('sys.argv', ['any.py']):
+        LightningCLI(BoringModel, save_config_overwrite=True, trainer_defaults=trainer_defaults)
+
+
+def test_lightning_cli_optimizer(tmpdir):
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.add_optimizer_args(torch.optim.Adam)
+
+    cli_args = [
+        f'--trainer.default_root_dir={tmpdir}',
+        '--trainer.max_epochs=1',
+    ]
+
+    match = (
+        'BoringModel.configure_optimizers` will be overridden by '
+        '`MyLightningCLI.add_configure_optimizers_method_to_model`'
+    )
+    with mock.patch('sys.argv', ['any.py'] + cli_args), pytest.warns(UserWarning, match=match):
+        cli = MyLightningCLI(BoringModel)
+
+    assert cli.model.configure_optimizers is not BoringModel.configure_optimizers
+    assert len(cli.trainer.optimizers) == 1
+    assert isinstance(cli.trainer.optimizers[0], torch.optim.Adam)
+    assert len(cli.trainer.lr_schedulers) == 0
+
+
+def test_lightning_cli_optimizer_and_lr_scheduler(tmpdir):
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.add_optimizer_args(torch.optim.Adam)
+            parser.add_lr_scheduler_args(torch.optim.lr_scheduler.ExponentialLR)
+
+    cli_args = [
+        f'--trainer.default_root_dir={tmpdir}',
+        '--trainer.max_epochs=1',
+        '--lr_scheduler.gamma=0.8',
+    ]
+
+    with mock.patch('sys.argv', ['any.py'] + cli_args):
+        cli = MyLightningCLI(BoringModel)
+
+    assert cli.model.configure_optimizers is not BoringModel.configure_optimizers
+    assert len(cli.trainer.optimizers) == 1
+    assert isinstance(cli.trainer.optimizers[0], torch.optim.Adam)
+    assert len(cli.trainer.lr_schedulers) == 1
+    assert isinstance(cli.trainer.lr_schedulers[0]['scheduler'], torch.optim.lr_scheduler.ExponentialLR)
+    assert cli.trainer.lr_schedulers[0]['scheduler'].gamma == 0.8
+
+
+def test_lightning_cli_optimizer_and_lr_scheduler_subclasses(tmpdir):
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.add_optimizer_args((torch.optim.SGD, torch.optim.Adam))
+            parser.add_lr_scheduler_args((torch.optim.lr_scheduler.StepLR, torch.optim.lr_scheduler.ExponentialLR))
+
+    optimizer_arg = dict(
+        class_path='torch.optim.Adam',
+        init_args=dict(lr=0.01),
+    )
+    lr_scheduler_arg = dict(
+        class_path='torch.optim.lr_scheduler.StepLR',
+        init_args=dict(step_size=50),
+    )
+    cli_args = [
+        f'--trainer.default_root_dir={tmpdir}',
+        '--trainer.max_epochs=1',
+        f'--optimizer={json.dumps(optimizer_arg)}',
+        f'--lr_scheduler={json.dumps(lr_scheduler_arg)}',
+    ]
+
+    with mock.patch('sys.argv', ['any.py'] + cli_args):
+        cli = MyLightningCLI(BoringModel)
+
+    assert len(cli.trainer.optimizers) == 1
+    assert isinstance(cli.trainer.optimizers[0], torch.optim.Adam)
+    assert len(cli.trainer.lr_schedulers) == 1
+    assert isinstance(cli.trainer.lr_schedulers[0]['scheduler'], torch.optim.lr_scheduler.StepLR)
+    assert cli.trainer.lr_schedulers[0]['scheduler'].step_size == 50
+
+
+def test_lightning_cli_optimizers_and_lr_scheduler_with_link_to(tmpdir):
+
+    class MyLightningCLI(LightningCLI):
+
+        def add_arguments_to_parser(self, parser):
+            parser.add_optimizer_args(torch.optim.Adam, nested_key='optim1', link_to='model.optim1')
+            parser.add_optimizer_args((torch.optim.ASGD, torch.optim.SGD), nested_key='optim2', link_to='model.optim2')
+            parser.add_lr_scheduler_args(torch.optim.lr_scheduler.ExponentialLR, link_to='model.scheduler')
+
+    class TestModel(BoringModel):
+
+        def __init__(
+            self,
+            optim1: dict,
+            optim2: dict,
+            scheduler: dict,
+        ):
+            super().__init__()
+            self.optim1 = instantiate_class(self.parameters(), optim1)
+            self.optim2 = instantiate_class(self.parameters(), optim2)
+            self.scheduler = instantiate_class(self.optim1, scheduler)
+
+    cli_args = [
+        f'--trainer.default_root_dir={tmpdir}',
+        '--trainer.max_epochs=1',
+        '--optim2={"class_path": "torch.optim.SGD", "init_args": {"lr": 0.01}}',
+        '--lr_scheduler.gamma=0.2',
+    ]
+
+    with mock.patch('sys.argv', ['any.py'] + cli_args):
+        cli = MyLightningCLI(TestModel)
+
+    assert isinstance(cli.model.optim1, torch.optim.Adam)
+    assert isinstance(cli.model.optim2, torch.optim.SGD)
+    assert isinstance(cli.model.scheduler, torch.optim.lr_scheduler.ExponentialLR)
diff --git a/tests/utilities/test_model_helpers.py b/tests/utilities/test_model_helpers.py
new file mode 100644
index 0000000000000..f63d46bdb67b9
--- /dev/null
+++ b/tests/utilities/test_model_helpers.py
@@ -0,0 +1,67 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+from unittest.mock import Mock
+
+import pytest
+
+from pytorch_lightning import LightningDataModule, Trainer
+from pytorch_lightning.utilities.model_helpers import is_overridden
+from tests.helpers import BoringDataModule, BoringModel
+
+
+def test_is_overridden():
+    model = BoringModel()
+    datamodule = BoringDataModule()
+
+    # edge cases
+    assert not is_overridden("whatever", None)
+    with pytest.raises(ValueError, match="Expected a parent"):
+        is_overridden("whatever", object())
+    assert not is_overridden("whatever", model)
+    assert not is_overridden("whatever", model, parent=LightningDataModule)
+
+    class TestModel(BoringModel):
+
+        def foo(self):
+            pass
+
+    with pytest.raises(ValueError, match="The parent should define the method"):
+        is_overridden("foo", TestModel())
+
+    # normal usage
+    assert is_overridden("training_step", model)
+    assert is_overridden("train_dataloader", datamodule)
+
+    # `Mock` support
+    mock = Mock(spec=BoringModel, wraps=model)
+    assert is_overridden("training_step", mock)
+    mock = Mock(spec=BoringDataModule, wraps=datamodule)
+    assert is_overridden("train_dataloader", mock)
+
+    # `partial` support
+    model.training_step = partial(model.training_step)
+    assert is_overridden("training_step", model)
+
+    # `_PatchDataLoader.patch_loader_code` support
+    class TestModel(BoringModel):
+
+        def on_fit_start(self):
+            assert is_overridden("train_dataloader", self)
+            self.on_fit_start_called = True
+
+    model = TestModel()
+    trainer = Trainer(fast_dev_run=1)
+    trainer.fit(model, train_dataloader=model.train_dataloader())
+    assert model.on_fit_start_called
diff --git a/tests/utilities/test_warnings.py b/tests/utilities/test_warnings.py
new file mode 100644
index 0000000000000..2e0c372e5c39f
--- /dev/null
+++ b/tests/utilities/test_warnings.py
@@ -0,0 +1,52 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test that the warnings actually appear and they have the correct `stacklevel`
+
+Needs to be run outside of `pytest` as it captures all the warnings.
+"""
+import os
+from contextlib import redirect_stderr
+from io import StringIO
+
+from pytorch_lightning.utilities.warnings import _warn, rank_zero_deprecation, rank_zero_warn, WarningCache
+
+running_special = os.getenv("PL_RUNNING_SPECIAL_TESTS", "0") == "1"
+if running_special:
+
+    stderr = StringIO()
+    with redirect_stderr(stderr):
+        _warn("test1")
+        _warn("test2", DeprecationWarning)
+
+        rank_zero_warn("test3")
+        rank_zero_warn("test4", DeprecationWarning)
+
+        rank_zero_deprecation("test5")
+
+        cache = WarningCache()
+        cache.warn("test6")
+        cache.deprecation("test7")
+
+    output = stderr.getvalue()
+    assert "test_warnings.py:30: UserWarning: test1" in output
+    assert "test_warnings.py:31: DeprecationWarning: test2" in output
+
+    assert "test_warnings.py:33: UserWarning: test3" in output
+    assert "test_warnings.py:34: DeprecationWarning: test4" in output
+
+    assert "test_warnings.py:36: LightningDeprecationWarning: test5" in output
+
+    assert "test_warnings.py:39: UserWarning: test6" in output
+    assert "test_warnings.py:40: LightningDeprecationWarning: test7" in output

From cbc1136bf28eb9d60b596ccb6bb6dcc97ac5814e Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Sun, 4 Jul 2021 17:37:43 +0200
Subject: [PATCH 05/26] Fix test

---
 pytorch_lightning/callbacks/early_stopping.py | 1 -
 tests/callbacks/test_early_stopping.py        | 7 ++++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 6f9ea07c0716d..719607b718bb1 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -175,7 +175,6 @@ def on_train_epoch_end(self, trainer, pl_module) -> None:
     def on_validation_end(self, trainer, pl_module) -> None:
         if self._check_on_train_epoch_end or self._should_skip_check(trainer):
             return
-
         self._run_early_stopping_check(trainer)
 
     def _run_early_stopping_check(self, trainer) -> None:
diff --git a/tests/callbacks/test_early_stopping.py b/tests/callbacks/test_early_stopping.py
index d7a6f15459912..1582a8ed90c91 100644
--- a/tests/callbacks/test_early_stopping.py
+++ b/tests/callbacks/test_early_stopping.py
@@ -45,8 +45,8 @@ def on_train_start(self, trainer, pl_module):
         if self.expected_state:
             assert self.on_save_checkpoint(trainer, pl_module, {}) == self.expected_state
 
-    def on_validation_end(self, trainer, pl_module):
-        super().on_validation_end(trainer, pl_module)
+    def on_train_epoch_end(self, trainer, pl_module):
+        super().on_train_epoch_end(trainer, pl_module)
         self.saved_states.append(self.on_save_checkpoint(trainer, pl_module, {}).copy())
 
 
@@ -69,12 +69,13 @@ def test_resume_early_stopping_from_checkpoint(tmpdir):
     )
     trainer.fit(model, datamodule=dm)
 
+    assert len(early_stop_callback.saved_states) == 4
+
     checkpoint_filepath = checkpoint_callback.kth_best_model_path
     # ensure state is persisted properly
     checkpoint = torch.load(checkpoint_filepath)
     # the checkpoint saves "epoch + 1"
     early_stop_callback_state = early_stop_callback.saved_states[checkpoint["epoch"] - 1]
-    assert 4 == len(early_stop_callback.saved_states)
     assert checkpoint["callbacks"][type(early_stop_callback)] == early_stop_callback_state
 
     # ensure state is reloaded properly (assertion in the callback)

From 56e9d893a42e4cd85f91976730793d6f13347a6e Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Sun, 4 Jul 2021 17:55:35 +0200
Subject: [PATCH 06/26] Fix test

---
 tests/trainer/optimization/test_optimizers.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/trainer/optimization/test_optimizers.py b/tests/trainer/optimization/test_optimizers.py
index 6165aa132153b..faf5434d6ba5a 100644
--- a/tests/trainer/optimization/test_optimizers.py
+++ b/tests/trainer/optimization/test_optimizers.py
@@ -662,7 +662,8 @@ def on_save_checkpoint(self, checkpoint):
     assert model.on_save_checkpoint_called
 
 
-def test_plateau_scheduler_lr_step_interval_updated_after_saving(tmpdir):
+@pytest.mark.parametrize("save_on_train_epoch_end", (False, True))
+def test_plateau_scheduler_lr_step_interval_updated_after_saving(tmpdir, save_on_train_epoch_end):
     batches = 4
     trainer = Trainer(
         default_root_dir=tmpdir,
@@ -671,7 +672,7 @@ def test_plateau_scheduler_lr_step_interval_updated_after_saving(tmpdir):
         max_epochs=1,
         limit_train_batches=batches,
         limit_val_batches=1,
-        callbacks=[ModelCheckpoint(dirpath=tmpdir)]
+        callbacks=[ModelCheckpoint(dirpath=tmpdir, save_on_train_epoch_end=save_on_train_epoch_end)]
     )
 
     class TestModel(BoringModel):
@@ -693,8 +694,8 @@ def configure_optimizers(self):
 
         def on_save_checkpoint(self, checkpoint):
             lr_dict_1 = checkpoint['lr_schedulers'][0]
-            # since plateau schedulers are updated after saving checkpoint, last_epoch should be 3
-            assert lr_dict_1['last_epoch'] == batches - 1  # last epoch starts at 0
+            last_epoch = lr_dict_1['last_epoch']
+            assert last_epoch == batches - (not save_on_train_epoch_end)  # last epoch starts at 0
 
             lr_dict_2 = checkpoint['lr_schedulers'][1]
             assert lr_dict_2['_step_count'] - 1 == batches  # step count starts at 1

From bbac98b4dda6466bf9a2c0071a02de61f9e7da8f Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Mon, 5 Jul 2021 00:29:20 +0200
Subject: [PATCH 07/26] Fix tests

---
 tests/core/test_metric_result_integration.py |  2 +-
 tests/models/test_hooks.py                   | 10 +++++-----
 tests/trainer/test_dataloaders.py            |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/core/test_metric_result_integration.py b/tests/core/test_metric_result_integration.py
index 7471914886a27..86cfa35746cda 100644
--- a/tests/core/test_metric_result_integration.py
+++ b/tests/core/test_metric_result_integration.py
@@ -329,7 +329,7 @@ def on_save_checkpoint(self, checkpoint) -> None:
             assert new_results['validation_step.v'].value.device.type == 'cpu'
 
     model = LoggingModel()
-    ckpt = ModelCheckpoint(dirpath=tmpdir, save_last=True)
+    ckpt = ModelCheckpoint(dirpath=tmpdir, save_on_train_epoch_end=False)
     trainer = Trainer(
         default_root_dir=tmpdir,
         max_epochs=2,
diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 9a689fe9d725a..630b3f1f8b250 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -450,14 +450,14 @@ def test_trainer_model_hook_system_fit(tmpdir):
         dict(name='on_validation_start'),
         *model._eval_epoch('validation', trainer, model, val_batches, 'x'),
         dict(name='Callback.on_validation_end', args=(trainer, model)),
-        # `ModelCheckpoint.save_checkpoint` is called here from `Callback.on_validation_end`
-        dict(name='Callback.on_save_checkpoint', args=(trainer, model, saved_ckpt)),
-        dict(name='on_save_checkpoint', args=(saved_ckpt, )),
         dict(name='on_validation_end'),
         dict(name='train'),
         dict(name='on_validation_model_train'),
         dict(name='training_epoch_end', args=([dict(loss=ANY)] * train_batches, )),
         dict(name='Callback.on_train_epoch_end', args=(trainer, model, [dict(loss=ANY)] * train_batches)),
+        # `ModelCheckpoint.save_checkpoint` is called here from `Callback.on_train_epoch_end`
+        dict(name='Callback.on_save_checkpoint', args=(trainer, model, saved_ckpt)),
+        dict(name='on_save_checkpoint', args=(saved_ckpt, )),
         dict(name='on_train_epoch_end', args=([dict(loss=ANY)] * train_batches, )),
         dict(name='Callback.on_epoch_end', args=(trainer, model)),
         dict(name='on_epoch_end'),
@@ -562,11 +562,11 @@ def test_trainer_model_hook_system_fit_no_val_and_resume(tmpdir):
             model,
             [dict(loss=ANY)] * train_batches,
         )),
+        dict(name='Callback.on_save_checkpoint', args=(trainer, model, saved_ckpt)),
+        dict(name='on_save_checkpoint', args=(saved_ckpt, )),
         dict(name='on_train_epoch_end', args=([dict(loss=ANY)] * train_batches, )),
         dict(name='Callback.on_epoch_end', args=(trainer, model)),
         dict(name='on_epoch_end'),
-        dict(name='Callback.on_save_checkpoint', args=(trainer, model, saved_ckpt)),
-        dict(name='on_save_checkpoint', args=(saved_ckpt, )),
         dict(name='Callback.on_train_end', args=(trainer, model)),
         dict(name='on_train_end'),
         dict(name='Callback.on_fit_end', args=(trainer, model)),
diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py
index 14f47a2558eff..ff4d74183d9b1 100644
--- a/tests/trainer/test_dataloaders.py
+++ b/tests/trainer/test_dataloaders.py
@@ -582,7 +582,7 @@ def test_dataloaders_with_fast_dev_run(tmpdir, fast_dev_run):
         assert trainer.max_epochs == 1
 
         trainer.fit(model)
-        assert not trainer.disable_validation
+        assert trainer.enable_validation
         assert trainer.num_training_batches == fast_dev_run
         assert trainer.num_val_batches == [fast_dev_run] * len(trainer.val_dataloaders)
 

From a0afd133a24c7fbd2f06759993e07860cc077fb4 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Mon, 5 Jul 2021 01:43:12 +0200
Subject: [PATCH 08/26] Fix test

---
 pytorch_lightning/callbacks/pruning.py |  1 +
 tests/callbacks/test_pruning.py        | 23 +++++++++++++++--------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/pytorch_lightning/callbacks/pruning.py b/pytorch_lightning/callbacks/pruning.py
index ced8d29c14424..a15073ed719f0 100644
--- a/pytorch_lightning/callbacks/pruning.py
+++ b/pytorch_lightning/callbacks/pruning.py
@@ -417,6 +417,7 @@ def on_save_checkpoint(
             rank_zero_debug("`ModelPruning.on_save_checkpoint`. Pruning is made permanent for this checkpoint")
             prev_device = pl_module.device
             # prune a copy so training can continue with the same buffers
+            rank_zero_debug(f"{pl_module.layer.mlp_3.weight.grad_fn=}")
             copy = deepcopy(pl_module.to("cpu"))
             self.make_pruning_permanent(copy)
             checkpoint["state_dict"] = copy.state_dict()
diff --git a/tests/callbacks/test_pruning.py b/tests/callbacks/test_pruning.py
index 1a5ddad64106e..3e14425a036ed 100644
--- a/tests/callbacks/test_pruning.py
+++ b/tests/callbacks/test_pruning.py
@@ -304,22 +304,27 @@ def test_multiple_pruning_callbacks(tmpdir, caplog, make_pruning_permanent: bool
     assert not has_pruning if make_pruning_permanent else has_pruning
 
 
-@pytest.mark.parametrize("on_train_epoch_end", (False, True))
-def test_permanent_when_model_is_saved_multiple_times(tmpdir, caplog, on_train_epoch_end):
+@pytest.mark.parametrize("prune_on_train_epoch_end", (False, True))
+@pytest.mark.parametrize("save_on_train_epoch_end", (False, True))
+def test_permanent_when_model_is_saved_multiple_times(
+    tmpdir, caplog, prune_on_train_epoch_end, save_on_train_epoch_end
+):
     """
     When a model is saved multiple times and make_permanent=True, we need to
     make sure a copy is pruned and not the trained model if we want to continue
     with the same pruning buffers.
     """
+    if prune_on_train_epoch_end and save_on_train_epoch_end:
+        # TODO: is this expected?
+        pytest.xfail("`pytorch_prune.global_unstructured` sets the `grad_fn` so we can't deepcopy on save")
 
     class TestPruning(ModelPruning):
 
         def on_save_checkpoint(self, trainer, pl_module, checkpoint):
+            had_buffers = hasattr(pl_module.layer.mlp_3, "weight_orig")
             super().on_save_checkpoint(trainer, pl_module, checkpoint)
-            if not on_train_epoch_end:
-                # these checks only work if pruning on `validation_epoch_end`
-                # because `on_save_checkpoint` is called before `on_train_epoch_end`
-                assert "layer.mlp_3.weight_orig" not in checkpoint["state_dict"]
+            assert "layer.mlp_3.weight_orig" not in checkpoint["state_dict"]
+            if had_buffers:
                 assert hasattr(pl_module.layer.mlp_3, "weight_orig")
 
     model = TestModel()
@@ -328,9 +333,11 @@ def on_save_checkpoint(self, trainer, pl_module, checkpoint):
         parameters_to_prune=[(model.layer.mlp_3, "weight")],
         verbose=1,
         make_pruning_permanent=True,
-        prune_on_train_epoch_end=on_train_epoch_end,
+        prune_on_train_epoch_end=prune_on_train_epoch_end,
+    )
+    ckpt_callback = ModelCheckpoint(
+        monitor="test", save_top_k=2, save_last=True, save_on_train_epoch_end=save_on_train_epoch_end
     )
-    ckpt_callback = ModelCheckpoint(monitor="test", save_top_k=2, save_last=True)
     trainer = Trainer(callbacks=[pruning_callback, ckpt_callback], max_epochs=3, progress_bar_refresh_rate=0)
     with caplog.at_level(INFO):
         trainer.fit(model)

From 5241864f2cf02ac59351da4f1b1f14b13f21e6e9 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Mon, 5 Jul 2021 01:58:22 +0200
Subject: [PATCH 09/26] Fix test

---
 tests/checkpointing/test_checkpoint_callback_frequency.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/checkpointing/test_checkpoint_callback_frequency.py b/tests/checkpointing/test_checkpoint_callback_frequency.py
index 8617a9f8f7050..2b135c0a5b915 100644
--- a/tests/checkpointing/test_checkpoint_callback_frequency.py
+++ b/tests/checkpointing/test_checkpoint_callback_frequency.py
@@ -50,7 +50,7 @@ def test_mc_called(tmpdir):
 @mock.patch('torch.save')
 @pytest.mark.parametrize(
     ['epochs', 'val_check_interval', 'expected'],
-    [(1, 1.0, 1), (2, 1.0, 2), (1, 0.25, 4), (2, 0.3, 7)],
+    [(1, 1.0, 1), (2, 1.0, 2), (1, 0.25, 4), (2, 0.3, 6)],
 )
 def test_default_checkpoint_freq(save_mock, tmpdir, epochs: int, val_check_interval: float, expected: int):
 
@@ -60,6 +60,7 @@ def test_default_checkpoint_freq(save_mock, tmpdir, epochs: int, val_check_inter
         max_epochs=epochs,
         weights_summary=None,
         val_check_interval=val_check_interval,
+        limit_val_batches=1,
         progress_bar_refresh_rate=0,
     )
     trainer.fit(model)

From 9f5d886c43bcc688bd1bb7597645955d891ff12e Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Mon, 5 Jul 2021 02:01:25 +0200
Subject: [PATCH 10/26] Remove debug statement

---
 pytorch_lightning/callbacks/pruning.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytorch_lightning/callbacks/pruning.py b/pytorch_lightning/callbacks/pruning.py
index a15073ed719f0..ced8d29c14424 100644
--- a/pytorch_lightning/callbacks/pruning.py
+++ b/pytorch_lightning/callbacks/pruning.py
@@ -417,7 +417,6 @@ def on_save_checkpoint(
             rank_zero_debug("`ModelPruning.on_save_checkpoint`. Pruning is made permanent for this checkpoint")
             prev_device = pl_module.device
             # prune a copy so training can continue with the same buffers
-            rank_zero_debug(f"{pl_module.layer.mlp_3.weight.grad_fn=}")
             copy = deepcopy(pl_module.to("cpu"))
             self.make_pruning_permanent(copy)
             checkpoint["state_dict"] = copy.state_dict()

From 45156ee89f81e1a6d48d55da84d4482fee2b8f3b Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Mon, 5 Jul 2021 12:26:53 +0200
Subject: [PATCH 11/26] Fix test

---
 tests/checkpointing/test_checkpoint_callback_frequency.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/checkpointing/test_checkpoint_callback_frequency.py b/tests/checkpointing/test_checkpoint_callback_frequency.py
index 2b135c0a5b915..2d18aacde4489 100644
--- a/tests/checkpointing/test_checkpoint_callback_frequency.py
+++ b/tests/checkpointing/test_checkpoint_callback_frequency.py
@@ -74,7 +74,7 @@ def test_default_checkpoint_freq(save_mock, tmpdir, epochs: int, val_check_inter
     (1, 1, 1.0, 1),
     (2, 2, 1.0, 2),
     (2, 1, 0.25, 4),
-    (2, 2, 0.3, 7),
+    (2, 2, 0.3, 6),
 ])
 def test_top_k(save_mock, tmpdir, k: int, epochs: int, val_check_interval: float, expected: int):
 

From 76c6be731fe45e01a6833a20c9c4c84dd5343a2c Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Thu, 8 Jul 2021 01:15:21 +0200
Subject: [PATCH 12/26] Docs and deprecation

---
 pytorch_lightning/callbacks/early_stopping.py |  2 +-
 .../callbacks/model_checkpoint.py             | 86 +++++++++++--------
 tests/deprecated_api/test_remove_1-6.py       |  6 ++
 tests/loggers/test_wandb.py                   |  1 -
 4 files changed, 56 insertions(+), 39 deletions(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index c28e5cec5b982..0015ac47f0d41 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -61,7 +61,7 @@ class EarlyStopping(Callback):
         stopping_threshold: Stop training immediately once the monitored quantity reaches this threshold.
         divergence_threshold: Stop training as soon as the monitored quantity becomes worse than this threshold.
         check_on_train_epoch_end: whether to run early stopping at the end of the training epoch.
-            If this is ``False``, then the check runs at the end of the validation epoch.
+            If this is ``False``, then the check runs at the end of the validation.
 
     Raises:
         MisconfigurationException:
diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index 829fcc0df8dcd..3a1317a4cb724 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -104,28 +104,35 @@ class ModelCheckpoint(Callback):
         every_n_train_steps: Number of training steps between checkpoints.
             If ``every_n_train_steps == None or every_n_train_steps == 0``, we skip saving during training.
             To disable, set ``every_n_train_steps = 0``. This value must be ``None`` or non-negative.
-            This must be mutually exclusive with ``train_time_interval`` and ``every_n_val_epochs``.
+            This must be mutually exclusive with ``train_time_interval`` and ``every_n_epochs``.
         train_time_interval: Checkpoints are monitored at the specified time interval.
             For all practical purposes, this cannot be smaller than the amount
             of time it takes to process a single training batch. This is not
             guaranteed to execute at the exact time specified, but should be close.
-            This must be mutually exclusive with ``every_n_train_steps`` and ``every_n_val_epochs``.
-        FIXME
-        every_n_val_epochs: Number of validation epochs between checkpoints.
-            If ``every_n_val_epochs == None or every_n_val_epochs == 0``, we skip saving on validation end.
-            To disable, set ``every_n_val_epochs = 0``. This value must be ``None`` or non-negative.
+            This must be mutually exclusive with ``every_n_train_steps`` and ``every_n_epochs``.
+        every_n_epochs: Number of epochs between checkpoints.
+            If ``every_n_epochs == None or every_n_epochs == 0``, we skip saving when the epoch ends.
+            To disable, set ``every_n_epochs = 0``. This value must be ``None`` or non-negative.
             This must be mutually exclusive with ``every_n_train_steps`` and ``train_time_interval``.
-            Setting both ``ModelCheckpoint(..., every_n_val_epochs=V)`` and
+            Setting both ``ModelCheckpoint(..., every_n_epochs=V, save_on_train_epoch_end=False)`` and
             ``Trainer(max_epochs=N, check_val_every_n_epoch=M)``
             will only save checkpoints at epochs 0 < E <= N
-            where both values for ``every_n_val_epochs`` and ``check_val_every_n_epoch`` evenly divide E.
+            where both values for ``every_n_epochs`` and ``check_val_every_n_epoch`` evenly divide E.
+        save_on_train_epoch_end: Whether to run checkpointing at the end of the training epoch.
+            If this is ``False``, then the check runs at the end of the validation.
         period: Interval (number of epochs) between checkpoints.
-        save_on_train_epoch_end: FIXME
 
             .. warning::
                This argument has been deprecated in v1.3 and will be removed in v1.5.
 
-            Use ``every_n_val_epochs`` instead.
+            Use ``every_n_epochs`` instead.
+        every_n_val_epochs: Number of epochs between checkpoints.
+
+            .. warning::
+               This argument has been deprecated in v1.4 and will be removed in v1.6.
+
+            Use ``every_n_epochs`` instead.
+
 
     Note:
         For extra customization, ModelCheckpoint includes the following attributes:
@@ -203,9 +210,10 @@ def __init__(
         auto_insert_metric_name: bool = True,
         every_n_train_steps: Optional[int] = None,
         train_time_interval: Optional[timedelta] = None,
-        every_n_val_epochs: Optional[int] = None,
-        period: Optional[int] = None,
+        every_n_epochs: Optional[int] = None,
         save_on_train_epoch_end: Optional[bool] = None,
+        period: Optional[int] = None,
+        every_n_val_epochs: Optional[int] = None,
     ):
         super().__init__()
         self.monitor = monitor
@@ -224,9 +232,16 @@ def __init__(
         self.best_model_path = ""
         self.last_model_path = ""
 
+        if every_n_val_epochs is not None:
+            rank_zero_deprecation(
+                '`ModelCheckpoint(every_n_val_epochs)` is deprecated in v1.4 and will be removed in v1.6.'
+                ' Please use `every_n_epochs` instead.'
+            )
+            every_n_epochs = every_n_val_epochs
+
         self.__init_monitor_mode(mode)
         self.__init_ckpt_dir(dirpath, filename)
-        self.__init_triggers(every_n_train_steps, every_n_val_epochs, train_time_interval, period)
+        self.__init_triggers(every_n_train_steps, every_n_epochs, train_time_interval, period)
         self.__validate_init_configuration()
         self._save_function = None
 
@@ -282,8 +297,7 @@ def on_train_epoch_end(
         """ Save a checkpoint at the end of the training epoch. """
         if (
             self._should_skip_saving_checkpoint(trainer) or not self._save_on_train_epoch_end
-            # FIXME: repurpose every_n_val_epochs to work for this hook
-            or self._every_n_val_epochs < 1 or (trainer.current_epoch + 1) % self._every_n_val_epochs != 0
+            or self._every_n_epochs < 1 or (trainer.current_epoch + 1) % self._every_n_epochs != 0
         ):
             return
         # as we advance one step at end of training, we use `global_step - 1` to avoid saving duplicates
@@ -295,8 +309,8 @@ def on_train_epoch_end(
     def on_validation_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -> None:
         """ Save a checkpoint at the end of the validation stage. """
         if (
-            self._should_skip_saving_checkpoint(trainer) or self._save_on_train_epoch_end
-            or self._every_n_val_epochs < 1 or (trainer.current_epoch + 1) % self._every_n_val_epochs != 0
+            self._should_skip_saving_checkpoint(trainer) or self._save_on_train_epoch_end or self._every_n_epochs < 1
+            or (trainer.current_epoch + 1) % self._every_n_epochs != 0
         ):
             return
         self.save_checkpoint(trainer)
@@ -391,18 +405,16 @@ def __validate_init_configuration(self) -> None:
             raise MisconfigurationException(
                 f'Invalid value for every_n_train_steps={self._every_n_train_steps}. Must be >= 0'
             )
-        if self._every_n_val_epochs < 0:
-            raise MisconfigurationException(
-                f'Invalid value for every_n_val_epochs={self._every_n_val_epochs}. Must be >= 0'
-            )
+        if self._every_n_epochs < 0:
+            raise MisconfigurationException(f'Invalid value for every_n_epochs={self._every_n_epochs}. Must be >= 0')
 
         every_n_train_steps_triggered = self._every_n_train_steps >= 1
-        every_n_val_epochs_triggered = self._every_n_val_epochs >= 1
+        every_n_epochs_triggered = self._every_n_epochs >= 1
         train_time_interval_triggered = self._train_time_interval is not None
-        if every_n_train_steps_triggered + every_n_val_epochs_triggered + train_time_interval_triggered > 1:
+        if every_n_train_steps_triggered + every_n_epochs_triggered + train_time_interval_triggered > 1:
             raise MisconfigurationException(
                 f"Combination of parameters every_n_train_steps={self._every_n_train_steps}, "
-                f"every_n_val_epochs={self._every_n_val_epochs} and train_time_interval={self._train_time_interval} "
+                f"every_n_epochs={self._every_n_epochs} and train_time_interval={self._train_time_interval} "
                 "should be mutually exclusive."
             )
 
@@ -451,39 +463,39 @@ def __init_monitor_mode(self, mode: str) -> None:
     def __init_triggers(
         self,
         every_n_train_steps: Optional[int],
-        every_n_val_epochs: Optional[int],
+        every_n_epochs: Optional[int],
         train_time_interval: Optional[timedelta],
         period: Optional[int],
     ) -> None:
 
         # Default to running once after each validation epoch if neither
-        # every_n_train_steps nor every_n_val_epochs is set
-        if every_n_train_steps is None and every_n_val_epochs is None and train_time_interval is None:
-            every_n_val_epochs = 1
+        # every_n_train_steps nor every_n_epochs is set
+        if every_n_train_steps is None and every_n_epochs is None and train_time_interval is None:
+            every_n_epochs = 1
             every_n_train_steps = 0
-            log.debug("Both every_n_train_steps and every_n_val_epochs are not set. Setting every_n_val_epochs=1")
+            log.debug("Both every_n_train_steps and every_n_epochs are not set. Setting every_n_epochs=1")
         else:
-            every_n_val_epochs = every_n_val_epochs or 0
+            every_n_epochs = every_n_epochs or 0
             every_n_train_steps = every_n_train_steps or 0
 
         self._train_time_interval: Optional[timedelta] = train_time_interval
-        self._every_n_val_epochs: int = every_n_val_epochs
+        self._every_n_epochs: int = every_n_epochs
         self._every_n_train_steps: int = every_n_train_steps
 
-        # period takes precedence over every_n_val_epochs for backwards compatibility
+        # period takes precedence over every_n_epochs for backwards compatibility
         if period is not None:
             rank_zero_deprecation(
                 'Argument `period` in `ModelCheckpoint` is deprecated in v1.3 and will be removed in v1.5.'
-                ' Please use `every_n_val_epochs` instead.'
+                ' Please use `every_n_epochs` instead.'
             )
-            self._every_n_val_epochs = period
-        self._period = self._every_n_val_epochs
+            self._every_n_epochs = period
+        self._period = self._every_n_epochs
 
     @property
     def period(self) -> Optional[int]:
         rank_zero_deprecation(
             'Property `period` in `ModelCheckpoint` is deprecated in v1.3 and will be removed in v1.5.'
-            ' Please use `every_n_val_epochs` instead.'
+            ' Please use `every_n_epochs` instead.'
         )
         return self._period
 
@@ -491,7 +503,7 @@ def period(self) -> Optional[int]:
     def period(self, value: Optional[int]) -> None:
         rank_zero_deprecation(
             'Property `period` in `ModelCheckpoint` is deprecated in v1.3 and will be removed in v1.5.'
-            ' Please use `every_n_val_epochs` instead.'
+            ' Please use `every_n_epochs` instead.'
         )
         self._period = value
 
diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py
index 69d2a45530607..ddb551631cb57 100644
--- a/tests/deprecated_api/test_remove_1-6.py
+++ b/tests/deprecated_api/test_remove_1-6.py
@@ -15,6 +15,7 @@
 import pytest
 
 from pytorch_lightning import Trainer
+from pytorch_lightning.callbacks import ModelCheckpoint
 from pytorch_lightning.callbacks.early_stopping import EarlyStopping
 from pytorch_lightning.core.memory import ModelSummary
 from pytorch_lightning.plugins.training_type import DDPPlugin, DDPSpawnPlugin
@@ -303,3 +304,8 @@ def test_v1_6_0_deprecated_disable_validation():
     trainer = Trainer()
     with pytest.deprecated_call(match="disable_validation` is deprecated in v1.4"):
         _ = trainer.disable_validation
+
+
+def test_v1_6_0_every_n_val_epochs():
+    with pytest.deprecated_call(match="use `every_n_epochs` instead"):
+        _ = ModelCheckpoint(every_n_val_epochs=1)
diff --git a/tests/loggers/test_wandb.py b/tests/loggers/test_wandb.py
index 27b83b75c24b9..4956a08c2fd35 100644
--- a/tests/loggers/test_wandb.py
+++ b/tests/loggers/test_wandb.py
@@ -213,7 +213,6 @@ def test_wandb_log_model(wandb, tmpdir):
                 'save_top_k': 1,
                 'save_weights_only': False,
                 '_every_n_train_steps': 0,
-                '_every_n_val_epochs': 1
             }
         }
     )

From f9ee8b81f3d71a2cd6d9347f3b0fe5fdaad8702e Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Thu, 8 Jul 2021 01:16:00 +0200
Subject: [PATCH 13/26] fix test

---
 tests/checkpointing/test_model_checkpoint.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/checkpointing/test_model_checkpoint.py b/tests/checkpointing/test_model_checkpoint.py
index f0dc9907a26f4..d7aef3414dc8e 100644
--- a/tests/checkpointing/test_model_checkpoint.py
+++ b/tests/checkpointing/test_model_checkpoint.py
@@ -590,7 +590,7 @@ def test_invalid_trigger_combination(tmpdir):
 def test_none_every_n_train_steps_val_epochs(tmpdir):
     checkpoint_callback = ModelCheckpoint(dirpath=tmpdir)
     assert checkpoint_callback.period == 1
-    assert checkpoint_callback._every_n_val_epochs == 1
+    assert checkpoint_callback._every_n_epochs == 1
     assert checkpoint_callback._every_n_train_steps == 0
 
 

From 15a8575ac0625cadec0d8a60c646ba1c187fc8d3 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Tue, 13 Jul 2021 17:47:11 +0200
Subject: [PATCH 14/26] Docs

---
 pytorch_lightning/callbacks/model_checkpoint.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index 78f57f6f5137f..9ee151d571497 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -319,11 +319,10 @@ def on_validation_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModul
 
     def on_train_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -> None:
         """
-        Save a checkpoint at the very end of training.
+        Save a checkpoint when training stops.
 
-        This will only save a checkpoint if `save_last` is also enabled
-        as the monitor metrics logged during training/validation steps or end of epochs
-        are not guaranteed to be available at this stage.
+        This will only save a checkpoint if `save_last` is also enabled as the monitor metrics logged during
+        training/validation steps or end of epochs are not guaranteed to be available at this stage.
         """
         if self._should_skip_saving_checkpoint(trainer):
             return

From e14a80db9db89e939563af7e9dda7f5897fcdf55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Wed, 14 Jul 2021 12:13:48 +0200
Subject: [PATCH 15/26] Update pytorch_lightning/callbacks/model_checkpoint.py

Co-authored-by: thomas chaton <thomas@grid.ai>
---
 pytorch_lightning/callbacks/model_checkpoint.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index 9ee151d571497..1a883f82dff45 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -324,9 +324,9 @@ def on_train_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -
         This will only save a checkpoint if `save_last` is also enabled as the monitor metrics logged during
         training/validation steps or end of epochs are not guaranteed to be available at this stage.
         """
-        if self._should_skip_saving_checkpoint(trainer):
+        if self._should_skip_saving_checkpoint(trainer) or not self.save_last:
             return
-        if self.save_last and self.verbose:
+        if self.verbose:
             rank_zero_info("Saving latest checkpoint...")
         # as we advance one step at end of training, we use `global_step - 1` to avoid saving duplicates
         monitor_candidates = self._monitor_candidates(trainer, trainer.current_epoch, trainer.global_step - 1)

From 6a0f13c1d5f10537a4e37d2a642029cdd55bdac9 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Wed, 14 Jul 2021 12:32:38 +0200
Subject: [PATCH 16/26] Parametrize with save last

---
 .../checkpointing/test_checkpoint_callback_frequency.py  | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/checkpointing/test_checkpoint_callback_frequency.py b/tests/checkpointing/test_checkpoint_callback_frequency.py
index 2d18aacde4489..6e1b4ece6df97 100644
--- a/tests/checkpointing/test_checkpoint_callback_frequency.py
+++ b/tests/checkpointing/test_checkpoint_callback_frequency.py
@@ -76,7 +76,8 @@ def test_default_checkpoint_freq(save_mock, tmpdir, epochs: int, val_check_inter
     (2, 1, 0.25, 4),
     (2, 2, 0.3, 6),
 ])
-def test_top_k(save_mock, tmpdir, k: int, epochs: int, val_check_interval: float, expected: int):
+@pytest.mark.parametrize("save_last", (False, True))
+def test_top_k(save_mock, tmpdir, k: int, epochs: int, val_check_interval: float, expected: int, save_last: bool):
 
     class TestModel(BoringModel):
 
@@ -94,7 +95,7 @@ def training_step(self, batch, batch_idx):
 
     model = TestModel()
     trainer = Trainer(
-        callbacks=[callbacks.ModelCheckpoint(dirpath=tmpdir, monitor='my_loss', save_top_k=k)],
+        callbacks=[callbacks.ModelCheckpoint(dirpath=tmpdir, monitor='my_loss', save_top_k=k, save_last=save_last)],
         default_root_dir=tmpdir,
         max_epochs=epochs,
         weights_summary=None,
@@ -102,7 +103,9 @@ def training_step(self, batch, batch_idx):
     )
     trainer.fit(model)
 
-    # make sure types are correct
+    if save_last:
+        # last epochs are saved every step (so double the save calls) and once `on_train_end`
+        expected = expected * 2 + 1
     assert save_mock.call_count == expected
 
 

From 206eefc8da86618e36d852a8d33d397f231fdf3d Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Wed, 14 Jul 2021 19:04:23 +0200
Subject: [PATCH 17/26] Fix ddp test

---
 tests/checkpointing/test_checkpoint_callback_frequency.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/checkpointing/test_checkpoint_callback_frequency.py b/tests/checkpointing/test_checkpoint_callback_frequency.py
index 6e1b4ece6df97..4aae78e622ee7 100644
--- a/tests/checkpointing/test_checkpoint_callback_frequency.py
+++ b/tests/checkpointing/test_checkpoint_callback_frequency.py
@@ -118,7 +118,7 @@ def test_top_k_ddp_0(save_mock, tmpdir):
 @mock.patch('torch.save')
 @RunIf(special=True, min_gpus=2)
 def test_top_k_ddp_1(save_mock, tmpdir):
-    _top_k_ddp(save_mock, tmpdir, k=2, epochs=2, val_check_interval=0.3, expected=5)
+    _top_k_ddp(save_mock, tmpdir, k=2, epochs=2, val_check_interval=0.3, expected=4)
 
 
 def _top_k_ddp(save_mock, tmpdir, k, epochs, val_check_interval, expected):

From 238022820ecfb2aad4f342cfd5a4635f54b60d76 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Wed, 14 Jul 2021 19:16:16 +0200
Subject: [PATCH 18/26] Fix pre-commit

---
 .../trainer/connectors/accelerator_connector.py             | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index a9355741a2e6d..14cbb43853d6c 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -459,8 +459,10 @@ def select_precision_plugin(self) -> PrecisionPlugin:
                         "You have asked for native AMP on CPU, but AMP is only available on GPU."
                     )
                 elif not _NATIVE_AMP_AVAILABLE:
-                    msg = "You have asked for native AMP but your PyTorch version does not support it." \
-                          " Consider upgrading with `pip install torch>=1.6`."
+                    msg = (
+                        "You have asked for native AMP but your PyTorch version does not support it."
+                        " Consider upgrading with `pip install torch>=1.6`."
+                    )
                     if _APEX_AVAILABLE:
                         self.amp_type = AMPType.APEX
                         msg += " We will attempt to use NVIDIA Apex for this session."

From b709a8fdac5979ba511315a0ef9618c504490e50 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Fri, 16 Jul 2021 03:01:53 +0200
Subject: [PATCH 19/26] Avoid file not found

---
 pl_examples/__init__.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pl_examples/__init__.py b/pl_examples/__init__.py
index f22cb5b8e4805..093b1fd11650b 100644
--- a/pl_examples/__init__.py
+++ b/pl_examples/__init__.py
@@ -14,7 +14,7 @@
 _PACKAGE_ROOT = os.path.dirname(_EXAMPLES_ROOT)
 _DATASETS_PATH = os.path.join(_PACKAGE_ROOT, 'Datasets')
 
-_TORCHVISION_MNIST_AVAILABLE = not bool(os.environ.get("PL_USE_MOCKED_MNIST", False))
+_TORCHVISION_MNIST_AVAILABLE = not bool(os.getenv("PL_USE_MOCKED_MNIST", False))
 _DALI_AVAILABLE = _module_available("nvidia.dali")
 
 if _TORCHVISION_MNIST_AVAILABLE:
@@ -23,6 +23,13 @@
         MNIST(_DATASETS_PATH, download=True)
     except HTTPError:
         _TORCHVISION_MNIST_AVAILABLE = False
+    except RuntimeError as e:
+        # `torchvision` can produce the following error randomly.
+        # File "/usr/local/lib/python3.7/dist-packages/torchvision/datasets/utils.py", line 145, in download_url
+        #   raise RuntimeError("File not found or corrupted.")
+        if "File not found" not in str(e):
+            raise
+        _TORCHVISION_MNIST_AVAILABLE = False
 
 LIGHTNING_LOGO = """
                     ####

From 5fcd3d7dad0b4f65ae02852875f05b52c7ee9271 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Sun, 18 Jul 2021 22:31:30 +0200
Subject: [PATCH 20/26] Debug

---
 .azure-pipelines/gpu-tests.yml | 10 ----------
 tests/special_tests.sh         |  2 +-
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml
index 6190d27362c52..5f31d45ab8976 100644
--- a/.azure-pipelines/gpu-tests.yml
+++ b/.azure-pipelines/gpu-tests.yml
@@ -61,16 +61,6 @@ jobs:
         python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'"
       displayName: 'Env details'
 
-    - bash: |
-        wget https://pl-public-data.s3.amazonaws.com/legacy/checkpoints.zip -P legacy/
-        unzip -o legacy/checkpoints.zip -d legacy/
-        ls -l legacy/checkpoints/
-      displayName: 'Get legacy checkpoints'
-
-    - bash: |
-        python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
-      displayName: 'Testing: standard'
-
     - bash: |
         bash tests/special_tests.sh
       displayName: 'Testing: special'
diff --git a/tests/special_tests.sh b/tests/special_tests.sh
index 96d1e3ba4affb..9ee7bd3d1bcb9 100755
--- a/tests/special_tests.sh
+++ b/tests/special_tests.sh
@@ -62,7 +62,7 @@ for i in "${!files_arr[@]}"; do
 
       # run the test
       report+="Ran\t$file:$lineno::$test_name\n"
-      python ${defaults} "${file}::${test_name}"
+      NCCL_DEBUG=INFO python ${defaults} "${file}::${test_name}"
       break
     fi
   done < <(echo "$test_code")

From 8d978ccba3d209081de955fb76fd112124d0836a Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Sun, 18 Jul 2021 22:44:26 +0200
Subject: [PATCH 21/26] Increase SHM size

---
 .azure-pipelines/gpu-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml
index 5f31d45ab8976..c531aa0a8f2ad 100644
--- a/.azure-pipelines/gpu-tests.yml
+++ b/.azure-pipelines/gpu-tests.yml
@@ -32,7 +32,7 @@ jobs:
       image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.8"
       # default shm size is 64m. Increase it to avoid:
       # 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
-      options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=256m"
+      options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=512m"
 
     workspace:
       clean: all

From d9118c5aea14504e3a146dff3d0280c76a176924 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Mon, 19 Jul 2021 02:05:03 +0200
Subject: [PATCH 22/26] Debug

---
 tests/special_tests.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/special_tests.sh b/tests/special_tests.sh
index 9ee7bd3d1bcb9..977c37c15bac8 100755
--- a/tests/special_tests.sh
+++ b/tests/special_tests.sh
@@ -62,7 +62,7 @@ for i in "${!files_arr[@]}"; do
 
       # run the test
       report+="Ran\t$file:$lineno::$test_name\n"
-      NCCL_DEBUG=INFO python ${defaults} "${file}::${test_name}"
+      #python ${defaults} "${file}::${test_name}"
       break
     fi
   done < <(echo "$test_code")

From b3748c45b89a00eab62d6717fb130d022055b53f Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Mon, 19 Jul 2021 02:23:31 +0200
Subject: [PATCH 23/26] Refactor MNIST imports

---
 .azure-pipelines/gpu-tests.yml                   |  2 ++
 pl_examples/__init__.py                          |  9 ---------
 pl_examples/basic_examples/autoencoder.py        |  7 ++-----
 .../basic_examples/backbone_image_classifier.py  |  7 ++-----
 .../basic_examples/dali_image_classifier.py      |  7 ++-----
 pl_examples/basic_examples/mnist_datamodule.py   | 16 +++++++++++++---
 .../generative_adversarial_net.py                |  7 ++-----
 7 files changed, 23 insertions(+), 32 deletions(-)

diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml
index c531aa0a8f2ad..d239579e6960e 100644
--- a/.azure-pipelines/gpu-tests.yml
+++ b/.azure-pipelines/gpu-tests.yml
@@ -63,6 +63,8 @@ jobs:
 
     - bash: |
         bash tests/special_tests.sh
+      env:
+        PL_USE_MOCKED_MNIST: "1"
       displayName: 'Testing: special'
 
     - bash: |
diff --git a/pl_examples/__init__.py b/pl_examples/__init__.py
index f22cb5b8e4805..22d2946db8f8c 100644
--- a/pl_examples/__init__.py
+++ b/pl_examples/__init__.py
@@ -1,5 +1,4 @@
 import os
-from urllib.error import HTTPError
 
 from six.moves import urllib
 
@@ -14,16 +13,8 @@
 _PACKAGE_ROOT = os.path.dirname(_EXAMPLES_ROOT)
 _DATASETS_PATH = os.path.join(_PACKAGE_ROOT, 'Datasets')
 
-_TORCHVISION_MNIST_AVAILABLE = not bool(os.environ.get("PL_USE_MOCKED_MNIST", False))
 _DALI_AVAILABLE = _module_available("nvidia.dali")
 
-if _TORCHVISION_MNIST_AVAILABLE:
-    try:
-        from torchvision.datasets.mnist import MNIST
-        MNIST(_DATASETS_PATH, download=True)
-    except HTTPError:
-        _TORCHVISION_MNIST_AVAILABLE = False
-
 LIGHTNING_LOGO = """
                     ####
                 ###########
diff --git a/pl_examples/basic_examples/autoencoder.py b/pl_examples/basic_examples/autoencoder.py
index 94e4fbfcf7ae2..8278e695af452 100644
--- a/pl_examples/basic_examples/autoencoder.py
+++ b/pl_examples/basic_examples/autoencoder.py
@@ -24,16 +24,13 @@
 from torch.utils.data import DataLoader, random_split
 
 import pytorch_lightning as pl
-from pl_examples import _DATASETS_PATH, _TORCHVISION_MNIST_AVAILABLE, cli_lightning_logo
+from pl_examples import _DATASETS_PATH, cli_lightning_logo
+from pl_examples.basic_examples.mnist_datamodule import MNIST
 from pytorch_lightning.utilities.cli import LightningCLI
 from pytorch_lightning.utilities.imports import _TORCHVISION_AVAILABLE
 
 if _TORCHVISION_AVAILABLE:
     from torchvision import transforms
-if _TORCHVISION_MNIST_AVAILABLE:
-    from torchvision.datasets import MNIST
-else:
-    from tests.helpers.datasets import MNIST
 
 
 class LitAutoEncoder(pl.LightningModule):
diff --git a/pl_examples/basic_examples/backbone_image_classifier.py b/pl_examples/basic_examples/backbone_image_classifier.py
index 381cda088ea9d..c25d27bc4288d 100644
--- a/pl_examples/basic_examples/backbone_image_classifier.py
+++ b/pl_examples/basic_examples/backbone_image_classifier.py
@@ -24,16 +24,13 @@
 from torch.utils.data import DataLoader, random_split
 
 import pytorch_lightning as pl
-from pl_examples import _DATASETS_PATH, _TORCHVISION_MNIST_AVAILABLE, cli_lightning_logo
+from pl_examples import _DATASETS_PATH, cli_lightning_logo
+from pl_examples.basic_examples.mnist_datamodule import MNIST
 from pytorch_lightning.utilities.cli import LightningCLI
 from pytorch_lightning.utilities.imports import _TORCHVISION_AVAILABLE
 
 if _TORCHVISION_AVAILABLE:
     from torchvision import transforms
-if _TORCHVISION_MNIST_AVAILABLE:
-    from torchvision.datasets import MNIST
-else:
-    from tests.helpers.datasets import MNIST
 
 
 class Backbone(torch.nn.Module):
diff --git a/pl_examples/basic_examples/dali_image_classifier.py b/pl_examples/basic_examples/dali_image_classifier.py
index 572f9a3a0c596..70c63c234c9ca 100644
--- a/pl_examples/basic_examples/dali_image_classifier.py
+++ b/pl_examples/basic_examples/dali_image_classifier.py
@@ -22,16 +22,13 @@
 from torch.utils.data import random_split
 
 import pytorch_lightning as pl
-from pl_examples import _DALI_AVAILABLE, _DATASETS_PATH, _TORCHVISION_MNIST_AVAILABLE, cli_lightning_logo
+from pl_examples import _DALI_AVAILABLE, _DATASETS_PATH, cli_lightning_logo
+from pl_examples.basic_examples.mnist_datamodule import MNIST
 from pytorch_lightning.utilities.cli import LightningCLI
 from pytorch_lightning.utilities.imports import _TORCHVISION_AVAILABLE
 
 if _TORCHVISION_AVAILABLE:
     from torchvision import transforms
-if _TORCHVISION_MNIST_AVAILABLE:
-    from torchvision.datasets import MNIST
-else:
-    from tests.helpers.datasets import MNIST
 
 if _DALI_AVAILABLE:
     from nvidia.dali import __version__ as dali_version
diff --git a/pl_examples/basic_examples/mnist_datamodule.py b/pl_examples/basic_examples/mnist_datamodule.py
index ffb507a9404e6..6da3c768f5fb6 100644
--- a/pl_examples/basic_examples/mnist_datamodule.py
+++ b/pl_examples/basic_examples/mnist_datamodule.py
@@ -11,21 +11,31 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 import platform
 from typing import Optional
+from urllib.error import HTTPError
 from warnings import warn
 
 from torch.utils.data import DataLoader, random_split
 
-from pl_examples import _DATASETS_PATH, _TORCHVISION_MNIST_AVAILABLE
+from pl_examples import _DATASETS_PATH
 from pytorch_lightning import LightningDataModule
 from pytorch_lightning.utilities.imports import _TORCHVISION_AVAILABLE
 
 if _TORCHVISION_AVAILABLE:
     from torchvision import transforms as transform_lib
+
+_TORCHVISION_MNIST_AVAILABLE = not bool(os.getenv("PL_USE_MOCKED_MNIST", False))
 if _TORCHVISION_MNIST_AVAILABLE:
-    from torchvision.datasets import MNIST
-else:
+    try:
+        from torchvision.datasets.mnist import MNIST
+        MNIST(_DATASETS_PATH, download=True)
+    except HTTPError as e:
+        print(f"Error {e} downloading torchvision.MNIST")
+        _TORCHVISION_MNIST_AVAILABLE = False
+if not _TORCHVISION_MNIST_AVAILABLE:
+    print("torchvision MNIST not available. Using our own")
     from tests.helpers.datasets import MNIST
 
 
diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py
index 19bce65746f65..70524bab3e845 100644
--- a/pl_examples/domain_templates/generative_adversarial_net.py
+++ b/pl_examples/domain_templates/generative_adversarial_net.py
@@ -28,7 +28,8 @@
 import torch.nn.functional as F  # noqa
 from torch.utils.data import DataLoader
 
-from pl_examples import _TORCHVISION_MNIST_AVAILABLE, cli_lightning_logo
+from pl_examples import cli_lightning_logo
+from pl_examples.basic_examples.mnist_datamodule import MNIST
 from pytorch_lightning.core import LightningDataModule, LightningModule
 from pytorch_lightning.trainer import Trainer
 from pytorch_lightning.utilities.imports import _TORCHVISION_AVAILABLE
@@ -36,10 +37,6 @@
 if _TORCHVISION_AVAILABLE:
     import torchvision
     from torchvision import transforms
-if _TORCHVISION_MNIST_AVAILABLE:
-    from torchvision.datasets import MNIST
-else:
-    from tests.helpers.datasets import MNIST
 
 
 class Generator(nn.Module):

From 45b0d5120bd0e8fedec3f9d117eff5b00d2dda5e Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Mon, 19 Jul 2021 02:31:25 +0200
Subject: [PATCH 24/26] Undo debugging

---
 .azure-pipelines/gpu-tests.yml | 17 +++++++++++++----
 tests/special_tests.sh         |  2 +-
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml
index d239579e6960e..dec51b7cf8dd1 100644
--- a/.azure-pipelines/gpu-tests.yml
+++ b/.azure-pipelines/gpu-tests.yml
@@ -61,6 +61,15 @@ jobs:
         python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'"
       displayName: 'Env details'
 
+    - bash: |
+        wget https://pl-public-data.s3.amazonaws.com/legacy/checkpoints.zip -P legacy/
+        unzip -o legacy/checkpoints.zip -d legacy/
+        ls -l legacy/checkpoints/
+      displayName: 'Get legacy checkpoints'
+    - bash: |
+        python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
+      displayName: 'Testing: standard'
+
     - bash: |
         bash tests/special_tests.sh
       env:
@@ -91,10 +100,6 @@ jobs:
         testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
       condition: succeededOrFailed()
 
-    - bash: |
-        python -m pytest benchmarks -v --maxfail=2 --durations=0
-      displayName: 'Testing: benchmarks'
-
     - script: |
         set -e
         python -m pytest pl_examples -v --maxfail=2 --durations=0
@@ -104,3 +109,7 @@ jobs:
       env:
         PL_USE_MOCKED_MNIST: "1"
       displayName: 'Testing: examples'
+
+    - bash: |
+        python -m pytest benchmarks -v --maxfail=2 --durations=0
+      displayName: 'Testing: benchmarks'
diff --git a/tests/special_tests.sh b/tests/special_tests.sh
index 977c37c15bac8..96d1e3ba4affb 100755
--- a/tests/special_tests.sh
+++ b/tests/special_tests.sh
@@ -62,7 +62,7 @@ for i in "${!files_arr[@]}"; do
 
       # run the test
       report+="Ran\t$file:$lineno::$test_name\n"
-      #python ${defaults} "${file}::${test_name}"
+      python ${defaults} "${file}::${test_name}"
       break
     fi
   done < <(echo "$test_code")

From bdae378f2ef7ea9c349eee5c29b101f2c15809ee Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Mon, 19 Jul 2021 02:45:16 +0200
Subject: [PATCH 25/26] Prints

---
 .azure-pipelines/gpu-tests.yml                 | 1 +
 pl_examples/basic_examples/mnist_datamodule.py | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml
index dec51b7cf8dd1..bdf32cb45adce 100644
--- a/.azure-pipelines/gpu-tests.yml
+++ b/.azure-pipelines/gpu-tests.yml
@@ -66,6 +66,7 @@ jobs:
         unzip -o legacy/checkpoints.zip -d legacy/
         ls -l legacy/checkpoints/
       displayName: 'Get legacy checkpoints'
+
     - bash: |
         python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
       displayName: 'Testing: standard'
diff --git a/pl_examples/basic_examples/mnist_datamodule.py b/pl_examples/basic_examples/mnist_datamodule.py
index 6da3c768f5fb6..90053c04e79ee 100644
--- a/pl_examples/basic_examples/mnist_datamodule.py
+++ b/pl_examples/basic_examples/mnist_datamodule.py
@@ -29,13 +29,13 @@
 _TORCHVISION_MNIST_AVAILABLE = not bool(os.getenv("PL_USE_MOCKED_MNIST", False))
 if _TORCHVISION_MNIST_AVAILABLE:
     try:
-        from torchvision.datasets.mnist import MNIST
+        from torchvision.datasets import MNIST
         MNIST(_DATASETS_PATH, download=True)
     except HTTPError as e:
-        print(f"Error {e} downloading torchvision.MNIST")
+        print(f"Error {e} downloading `torchvision.datasets.MNIST`")
         _TORCHVISION_MNIST_AVAILABLE = False
 if not _TORCHVISION_MNIST_AVAILABLE:
-    print("torchvision MNIST not available. Using our own")
+    print("`torchvision.datasets.MNIST` not available. Using our hosted version")
     from tests.helpers.datasets import MNIST
 
 

From 4df2ac2cd0b78aa02ba01c1b4a1c56cc47bf3ae6 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Mon, 19 Jul 2021 03:00:33 +0200
Subject: [PATCH 26/26] Revert "Avoid file not found"

This reverts commit b709a8fdac5979ba511315a0ef9618c504490e50.
---
 pl_examples/__init__.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/pl_examples/__init__.py b/pl_examples/__init__.py
index 093b1fd11650b..f22cb5b8e4805 100644
--- a/pl_examples/__init__.py
+++ b/pl_examples/__init__.py
@@ -14,7 +14,7 @@
 _PACKAGE_ROOT = os.path.dirname(_EXAMPLES_ROOT)
 _DATASETS_PATH = os.path.join(_PACKAGE_ROOT, 'Datasets')
 
-_TORCHVISION_MNIST_AVAILABLE = not bool(os.getenv("PL_USE_MOCKED_MNIST", False))
+_TORCHVISION_MNIST_AVAILABLE = not bool(os.environ.get("PL_USE_MOCKED_MNIST", False))
 _DALI_AVAILABLE = _module_available("nvidia.dali")
 
 if _TORCHVISION_MNIST_AVAILABLE:
@@ -23,13 +23,6 @@
         MNIST(_DATASETS_PATH, download=True)
     except HTTPError:
         _TORCHVISION_MNIST_AVAILABLE = False
-    except RuntimeError as e:
-        # `torchvision` can produce the following error randomly.
-        # File "/usr/local/lib/python3.7/dist-packages/torchvision/datasets/utils.py", line 145, in download_url
-        #   raise RuntimeError("File not found or corrupted.")
-        if "File not found" not in str(e):
-            raise
-        _TORCHVISION_MNIST_AVAILABLE = False
 
 LIGHTNING_LOGO = """
                     ####