From 51e95912fd4a8201c09d0275f6c636150a13b8a7 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Tue, 30 Mar 2021 00:06:24 +0200
Subject: [PATCH 01/18] update readme by v1.2.x (#6728)

---
 README.md | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)
diff --git a/README.md b/README.md
index 9d085e2631d89..eebfd04c06d59 100644
--- a/README.md
+++ b/README.md
@@ -91,19 +91,6 @@ Lightning is rigurously tested across multiple GPUs, TPUs CPUs and against major
   </center>
 </details>
 
-<details>
-  <summary>Bleeding edge build status (1.2)</summary>
-
-  <center>
-
-  ![CI base testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20base%20testing/badge.svg?branch=release%2F1.2-dev&event=push)
-  ![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=release%2F1.2-dev&event=push)
-  ![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=release%2F1.2-dev&event=push)
-  ![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg?branch=release%2F1.2-dev&event=push)
-  ![Docs check](https://github.com/PyTorchLightning/pytorch-lightning/workflows/Docs%20check/badge.svg?branch=release%2F1.2-dev&event=push)
-  </center>
-</details>
-
 ---
 
 ## How To Use
@@ -132,22 +119,22 @@ pip install pytorch-lightning
   conda install pytorch-lightning -c conda-forge
   ```
 
-  #### Install stable - future 1.1.x
+  #### Install stable 1.2.x
 
-  the actual status of 1.1 [stable] is following:
+  the actual status of 1.2 [stable] is following:
 
-  ![CI base testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20base%20testing/badge.svg?branch=release%2F1.1.x&event=push)
-  ![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=release%2F1.1.x&event=push)
-  ![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=release%2F1.1.x&event=push)
-  ![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg?branch=release%2F1.1.x&event=push)
-  ![Docs check](https://github.com/PyTorchLightning/pytorch-lightning/workflows/Docs%20check/badge.svg?branch=release%2F1.1.x&event=push)
+  ![CI base testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20base%20testing/badge.svg?branch=release%2F1.2.x&event=push)
+  ![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=release%2F1.2.x&event=push)
+  ![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=release%2F1.2.x&event=push)
+  ![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg?branch=release%2F1.2.x&event=push)
+  ![Docs check](https://github.com/PyTorchLightning/pytorch-lightning/workflows/Docs%20check/badge.svg?branch=release%2F1.2.x&event=push)
 
   Install future release from the source
   ```bash
-  pip install git+https://github.com/PytorchLightning/pytorch-lightning.git@release/1.1.x --upgrade
+  pip install git+https://github.com/PytorchLightning/pytorch-lightning.git@release/1.2.x --upgrade
   ```
 
-  #### Install bleeding-edge - future 1.2
+  #### Install bleeding-edge - future 1.3
 
   Install nightly from the source (no guarantees)
   ```bash

From 553c3e5943578fd3a264c8c4b3feb669c28a7323 Mon Sep 17 00:00:00 2001
From: thomas chaton <thomas@grid.ai>
Date: Tue, 30 Mar 2021 16:21:25 +0100
Subject: [PATCH 02/18] [bugfix] Add support for omegaconf and tpu (#6741)

* fix_hydra

* update changelog

Co-authored-by: Your Name <you@example.com>
---
 CHANGELOG.md                                         |  2 ++
 pytorch_lightning/plugins/training_type/tpu_spawn.py | 10 ++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8a20ee5914854..14578579eefbf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added Autocast in validation, test and predict modes for Native AMP ([#6565](https://github.com/PyTorchLightning/pytorch-lightning/pull/6565))
 
 
+- Fixed resolve a bug with omegaconf and xm.save ([#6741](https://github.com/PyTorchLightning/pytorch-lightning/pull/6741))
+
 ## [1.2.4] - 2021-03-16
 
 ### Changed
diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py
index 09603f9a22bc2..d6b5df0c1e021 100644
--- a/pytorch_lightning/plugins/training_type/tpu_spawn.py
+++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py
@@ -9,7 +9,8 @@
 from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin
 from pytorch_lightning.plugins.training_type.utils import on_colab_kaggle
-from pytorch_lightning.utilities import _TPU_AVAILABLE, rank_zero_warn
+from pytorch_lightning.utilities import _OMEGACONF_AVAILABLE, _TPU_AVAILABLE, rank_zero_warn
+from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.seed import seed_everything
@@ -23,6 +24,9 @@
 else:
     xm, xla_pl, xmp, ParallelLoader, rendezvous = [None] * 5
 
+if _OMEGACONF_AVAILABLE:
+    from omegaconf import DictConfig, ListConfig, OmegaConf
+
 
 class TPUSpawnPlugin(DDPSpawnPlugin):
 
@@ -294,4 +298,6 @@ def save_checkpoint(self, filepath, weights_only: bool = False):
         # dump states as a checkpoint dictionary object
         _checkpoint = self.lightning_module.trainer.checkpoint_connector.dump_checkpoint(weights_only)
         # Todo: TypeError: 'mappingproxy' object does not support item assignment
-        self.save({k: v for k, v in _checkpoint.items() if k != "callbacks"}, filepath)
+        if _OMEGACONF_AVAILABLE:
+            checkpoint = apply_to_collection(checkpoint, (DictConfig, ListConfig), OmegaConf.to_container)
+        self.save({k: v for k, v in checkpoint.items() if k != "callbacks"}, filepath)

From 34e728263554ac7642af5f0ff9a304c1d660680f Mon Sep 17 00:00:00 2001
From: Akihiro Nitta <nitta@akihironitta.com>
Date: Wed, 31 Mar 2021 02:22:59 +0900
Subject: [PATCH 03/18] [docs] Update Bolts link (#6743)

* Update Bolts link

* Update Bolts link

* formt

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
---
 README.md                                     | 20 +++++++++----------
 docs/source/ecosystem/bolts.rst               |  4 ++--
 docs/source/extensions/callbacks.rst          | 10 +++++-----
 docs/source/index.rst                         | 16 +++++++--------
 notebooks/07-cifar10-baseline.ipynb           |  2 +-
 pl_examples/README.md                         |  4 ++--
 .../basic_examples/conv_sequential_example.py |  2 +-
 7 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/README.md b/README.md
index eebfd04c06d59..d658953cb8014 100644
--- a/README.md
+++ b/README.md
@@ -343,27 +343,27 @@ class LitAutoEncoder(pl.LightningModule):
 - [MNIST on TPUs](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/06-mnist-tpu-training.ipynb)
 
 ###### Contrastive Learning
-- [BYOL](https://pytorch-lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#byol)
-- [CPC v2](https://pytorch-lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#cpc-v2)
-- [Moco v2](https://pytorch-lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#moco-v2)
-- [SIMCLR](https://pytorch-lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#simclr)
+- [BYOL](https://lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#byol)
+- [CPC v2](https://lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#cpc-v2)
+- [Moco v2](https://lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#moco-v2)
+- [SIMCLR](https://lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#simclr)
 
 ###### NLP
 - [BERT](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/04-transformers-text-classification.ipynb)
-- [GPT-2](https://pytorch-lightning-bolts.readthedocs.io/en/latest/convolutional.html#gpt-2)
+- [GPT-2](https://lightning-bolts.readthedocs.io/en/latest/convolutional.html#gpt-2)
 
 
 ###### Reinforcement Learning
-- [DQN](https://pytorch-lightning-bolts.readthedocs.io/en/latest/reinforce_learn.html#dqn-models)
-- [Dueling-DQN](https://pytorch-lightning-bolts.readthedocs.io/en/latest/reinforce_learn.html#dueling-dqn)
-- [Reinforce](https://pytorch-lightning-bolts.readthedocs.io/en/latest/reinforce_learn.html#reinforce)
+- [DQN](https://lightning-bolts.readthedocs.io/en/latest/reinforce_learn.html#dqn-models)
+- [Dueling-DQN](https://lightning-bolts.readthedocs.io/en/latest/reinforce_learn.html#dueling-dqn)
+- [Reinforce](https://lightning-bolts.readthedocs.io/en/latest/reinforce_learn.html#reinforce)
 
 ###### Vision
 - [GAN](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/03-basic-gan.ipynb)
 
 ###### Classic ML
-- [Logistic Regression](https://pytorch-lightning-bolts.readthedocs.io/en/latest/classic_ml.html#logistic-regression)
-- [Linear Regression](https://pytorch-lightning-bolts.readthedocs.io/en/latest/classic_ml.html#linear-regression)
+- [Logistic Regression](https://lightning-bolts.readthedocs.io/en/latest/classic_ml.html#logistic-regression)
+- [Linear Regression](https://lightning-bolts.readthedocs.io/en/latest/classic_ml.html#linear-regression)
 
 ---
 
diff --git a/docs/source/ecosystem/bolts.rst b/docs/source/ecosystem/bolts.rst
index f3a4ab9c858be..c10097fa4bd05 100644
--- a/docs/source/ecosystem/bolts.rst
+++ b/docs/source/ecosystem/bolts.rst
@@ -1,11 +1,11 @@
 Bolts
 =====
-`PyTorch Lightning Bolts <https://pytorch-lightning-bolts.readthedocs.io/en/latest/>`_, is our official collection
+`PyTorch Lightning Bolts <https://lightning-bolts.readthedocs.io/en/latest/>`_, is our official collection
 of prebuilt models across many research domains.
 
 .. code-block:: bash
 
-    pip install pytorch-lightning-bolts
+    pip install lightning-bolts
 
 In bolts we have:
 
diff --git a/docs/source/extensions/callbacks.rst b/docs/source/extensions/callbacks.rst
index 73691c6dd76f5..dd46e910ff541 100644
--- a/docs/source/extensions/callbacks.rst
+++ b/docs/source/extensions/callbacks.rst
@@ -71,10 +71,10 @@ Examples
 --------
 You can do pretty much anything with callbacks.
 
-- `Add a MLP to fine-tune self-supervised networks <https://pytorch-lightning-bolts.readthedocs.io/en/latest/self_supervised_callbacks.html#sslonlineevaluator>`_.
-- `Find how to modify an image input to trick the classification result <https://pytorch-lightning-bolts.readthedocs.io/en/latest/vision_callbacks.html#confused-logit>`_.
-- `Interpolate the latent space of any variational model <https://pytorch-lightning-bolts.readthedocs.io/en/latest/variational_callbacks.html#latent-dim-interpolator>`_.
-- `Log images to Tensorboard for any model <https://pytorch-lightning-bolts.readthedocs.io/en/latest/vision_callbacks.html#tensorboard-image-generator>`_.
+- `Add a MLP to fine-tune self-supervised networks <https://lightning-bolts.readthedocs.io/en/latest/self_supervised_callbacks.html#sslonlineevaluator>`_.
+- `Find how to modify an image input to trick the classification result <https://lightning-bolts.readthedocs.io/en/latest/vision_callbacks.html#confused-logit>`_.
+- `Interpolate the latent space of any variational model <https://lightning-bolts.readthedocs.io/en/latest/variational_callbacks.html#latent-dim-interpolator>`_.
+- `Log images to Tensorboard for any model <https://lightning-bolts.readthedocs.io/en/latest/vision_callbacks.html#tensorboard-image-generator>`_.
 
 
 --------------
@@ -85,7 +85,7 @@ Lightning has a few built-in callbacks.
 
 .. note::
     For a richer collection of callbacks, check out our
-    `bolts library <https://pytorch-lightning-bolts.readthedocs.io/en/latest/callbacks.html>`_.
+    `bolts library <https://lightning-bolts.readthedocs.io/en/latest/callbacks.html>`_.
 
 .. currentmodule:: pytorch_lightning.callbacks
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 81011cbf14724..1432badf2038f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -79,14 +79,14 @@ PyTorch Lightning Documentation
 
    ecosystem/pytorch_ecoystem
    ecosystem/community_examples
-   Autoencoder <https://pytorch-lightning-bolts.readthedocs.io/en/latest/autoencoders.html#autoencoders>
-   BYOL <https://pytorch-lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#byol>
-   DQN <https://pytorch-lightning-bolts.readthedocs.io/en/latest/reinforce_learn.html#deep-q-network-dqn>
-   GAN <https://pytorch-lightning-bolts.readthedocs.io/en/latest/gans.html#basic-gan>
-   GPT-2 <https://pytorch-lightning-bolts.readthedocs.io/en/latest/convolutional.html#gpt-2>
-   Image-GPT <https://pytorch-lightning-bolts.readthedocs.io/en/latest/convolutional.html#image-gpt>
-   SimCLR <https://pytorch-lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#simclr>
-   VAE <https://pytorch-lightning-bolts.readthedocs.io/en/latest/autoencoders.html#basic-vae>
+   Autoencoder <https://lightning-bolts.readthedocs.io/en/latest/autoencoders.html#autoencoders>
+   BYOL <https://lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#byol>
+   DQN <https://lightning-bolts.readthedocs.io/en/latest/reinforce_learn.html#deep-q-network-dqn>
+   GAN <https://lightning-bolts.readthedocs.io/en/latest/gans.html#basic-gan>
+   GPT-2 <https://lightning-bolts.readthedocs.io/en/latest/convolutional.html#gpt-2>
+   Image-GPT <https://lightning-bolts.readthedocs.io/en/latest/convolutional.html#image-gpt>
+   SimCLR <https://lightning-bolts.readthedocs.io/en/latest/self_supervised_models.html#simclr>
+   VAE <https://lightning-bolts.readthedocs.io/en/latest/autoencoders.html#basic-vae>
 
 .. toctree::
    :maxdepth: 1
diff --git a/notebooks/07-cifar10-baseline.ipynb b/notebooks/07-cifar10-baseline.ipynb
index 9f3209a8bbc02..8e9394d653846 100644
--- a/notebooks/07-cifar10-baseline.ipynb
+++ b/notebooks/07-cifar10-baseline.ipynb
@@ -61,7 +61,7 @@
         "id": "ziAQCrE-TYWG"
       },
       "source": [
-        "! pip install pytorch-lightning pytorch-lightning-bolts -qU"
+        "! pip install pytorch-lightning lightning-bolts -qU"
       ],
       "execution_count": null,
       "outputs": []
diff --git a/pl_examples/README.md b/pl_examples/README.md
index bed553322edf3..30a891f6b9bfc 100644
--- a/pl_examples/README.md
+++ b/pl_examples/README.md
@@ -1,6 +1,6 @@
 # Examples
 Our most robust examples showing all sorts of implementations
-can be found in our sister library [PyTorch-Lightning-Bolts](https://pytorch-lightning-bolts.readthedocs.io/en/latest/convolutional.html#gpt-2).
+can be found in our sister library [PyTorch-Lightning-Bolts](https://lightning-bolts.readthedocs.io/en/latest/convolutional.html#gpt-2).
 
 ---
 
@@ -15,5 +15,5 @@ In this folder we add 3 simple examples:
 
 ## Domain examples
 This folder contains older examples. You should instead use the examples
-in [PyTorch-Lightning-Bolts](https://pytorch-lightning-bolts.readthedocs.io/en/latest/convolutional.html#gpt-2)
+in [PyTorch-Lightning-Bolts](https://lightning-bolts.readthedocs.io/en/latest/convolutional.html#gpt-2)
 for advanced use cases.
diff --git a/pl_examples/basic_examples/conv_sequential_example.py b/pl_examples/basic_examples/conv_sequential_example.py
index 6cfb6109f04fc..db59f52b103b2 100644
--- a/pl_examples/basic_examples/conv_sequential_example.py
+++ b/pl_examples/basic_examples/conv_sequential_example.py
@@ -202,7 +202,7 @@ def instantiate_datamodule(args):
 if __name__ == "__main__":
     cli_lightning_logo()
 
-    assert _BOLTS_AVAILABLE, "Bolts is required for this example, install it via pip install pytorch-lightning-bolts"
+    assert _BOLTS_AVAILABLE, "Bolts is required for this example, install it via `pip install lightning-bolts`"
     assert _FAIRSCALE_PIPE_AVAILABLE, "FairScale and PyTorch 1.6 is required for this example."
 
     parser = ArgumentParser(description="Pipe Example")

From 709b6957430db064816fbe48966df6ae6bb71935 Mon Sep 17 00:00:00 2001
From: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
Date: Thu, 1 Apr 2021 03:04:33 +0530
Subject: [PATCH 04/18] Update logic for checking TPUs availability (#6767)

* Update logic for checking TPUs availability

* fix flake8

* add fix
---
 .../plugins/training_type/tpu_spawn.py        |  4 ++++
 pytorch_lightning/utilities/xla_device.py     | 19 +++----------------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py
index d6b5df0c1e021..c322ff1493ae5 100644
--- a/pytorch_lightning/plugins/training_type/tpu_spawn.py
+++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py
@@ -1,6 +1,7 @@
 import io
 import os
 import re
+import time
 from typing import Any, Dict, Iterable, List, Optional, Union
 
 import torch
@@ -106,6 +107,9 @@ def new_process(self, process_idx: int, trainer, mp_queue) -> None:
         self.__save_end_of_training_weights(self.lightning_module)
         self.transfer_distrib_spawn_state_on_fit_end(results)
 
+        if self.global_rank == 0:
+            time.sleep(2)
+
         self.barrier("end-process")
 
     def __save_end_of_training_weights(self, model: LightningModule) -> None:
diff --git a/pytorch_lightning/utilities/xla_device.py b/pytorch_lightning/utilities/xla_device.py
index 294d3d2c5ec40..49ec176d4cdbb 100644
--- a/pytorch_lightning/utilities/xla_device.py
+++ b/pytorch_lightning/utilities/xla_device.py
@@ -17,13 +17,10 @@
 import traceback
 from multiprocessing import Process, Queue
 
-import torch.multiprocessing as mp
-
 from pytorch_lightning.utilities.imports import _XLA_AVAILABLE
 
 if _XLA_AVAILABLE:
     import torch_xla.core.xla_model as xm
-    import torch_xla.distributed.xla_multiprocessing as xmp
 
 #: define waiting time got checking TPU available in sec
 TPU_CHECK_TIMEOUT = 25
@@ -64,23 +61,13 @@ class XLADeviceUtils:
     @pl_multi_process
     def _is_device_tpu() -> bool:
         """
-        Check if device is TPU
+        Check if TPU devices are available
 
         Return:
-            A boolean value indicating if the xla device is a TPU device or not
+            A boolean value indicating if TPU devices are available
         """
 
-        def _fn(_: int, mp_queue):
-            try:
-                device = xm.xla_device()
-                mp_queue.put(device.type == 'xla')
-            except Exception:
-                mp_queue.put(False)
-
-        smp = mp.get_context("spawn")
-        queue = smp.SimpleQueue()
-        xmp.spawn(_fn, args=(queue, ), nprocs=1)
-        return queue.get()
+        return len(xm.get_xla_supported_devices("TPU")) > 0
 
     @staticmethod
     def xla_available() -> bool:

From 8c01064078dc3e57cc1040452d93aed8b1a9da46 Mon Sep 17 00:00:00 2001
From: thomas chaton <thomas@grid.ai>
Date: Thu, 1 Apr 2021 11:43:23 +0100
Subject: [PATCH 05/18] resolve bug (#6781)

---
 .../plugins/training_type/tpu_spawn.py          | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py
index c322ff1493ae5..a40e1e38ed2c1 100644
--- a/pytorch_lightning/plugins/training_type/tpu_spawn.py
+++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py
@@ -107,11 +107,13 @@ def new_process(self, process_idx: int, trainer, mp_queue) -> None:
         self.__save_end_of_training_weights(self.lightning_module)
         self.transfer_distrib_spawn_state_on_fit_end(results)
 
+        # https://github.com/pytorch/xla/issues/1801#issuecomment-602799542
+        self.barrier("end-process")
+
+        # https://github.com/pytorch/xla/issues/2190#issuecomment-641665358
         if self.global_rank == 0:
             time.sleep(2)
 
-        self.barrier("end-process")
-
     def __save_end_of_training_weights(self, model: LightningModule) -> None:
         # when training ends on these platforms dump weights to get out of the main process
         if on_colab_kaggle():
@@ -145,16 +147,7 @@ def transfer_distrib_spawn_state_on_fit_end(self, results):
                 self.mp_queue.put(results)
 
     def save(self, state_dict: Dict, path: str) -> None:
-        """
-        Saving with ``xm.save`` can be unstable and miss the rendez-vous after ``torch.save``.
-        The rendez-vous doesn't affect directly saving.
-        We can ignore the ``RuntimeError`` to reduce friction with TPUs.
-        """
-        try:
-            xm.save(state_dict, path)
-        except RuntimeError as e:
-            if "Failed to meet rendezvous" not in str(e):
-                raise e
+        xm.save(state_dict, path)
 
     def broadcast(self, obj: object, src: int = 0) -> object:
         buffer = io.BytesIO()

From 5c9dbc38cb12267239ec69476582b1094cdcccd5 Mon Sep 17 00:00:00 2001
From: Yuan-Hang Zhang <sailordiary@users.noreply.github.com>
Date: Fri, 2 Apr 2021 16:40:41 +0800
Subject: [PATCH 06/18] Fix validation progress counter with
 check_val_every_n_epoch > 1 (#5952)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: rohitgr7 <rohitgr1998@gmail.com>
Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 pytorch_lightning/callbacks/progress.py       |  5 +-
 .../flags/test_check_val_every_n_epoch.py     | 53 +++++++++++++++++++
 2 files changed, 56 insertions(+), 2 deletions(-)
 create mode 100644 tests/trainer/flags/test_check_val_every_n_epoch.py

diff --git a/pytorch_lightning/callbacks/progress.py b/pytorch_lightning/callbacks/progress.py
index 46331e004c1c7..649243f7600ba 100644
--- a/pytorch_lightning/callbacks/progress.py
+++ b/pytorch_lightning/callbacks/progress.py
@@ -146,9 +146,10 @@ def total_val_batches(self) -> int:
         validation dataloader is of infinite size.
         """
         total_val_batches = 0
-        if not self.trainer.disable_validation:
-            is_val_epoch = (self.trainer.current_epoch) % self.trainer.check_val_every_n_epoch == 0
+        if self.trainer.enable_validation:
+            is_val_epoch = (self.trainer.current_epoch + 1) % self.trainer.check_val_every_n_epoch == 0
             total_val_batches = sum(self.trainer.num_val_batches) if is_val_epoch else 0
+
         return total_val_batches
 
     @property
diff --git a/tests/trainer/flags/test_check_val_every_n_epoch.py b/tests/trainer/flags/test_check_val_every_n_epoch.py
new file mode 100644
index 0000000000000..f7f1403ecdbfd
--- /dev/null
+++ b/tests/trainer/flags/test_check_val_every_n_epoch.py
@@ -0,0 +1,53 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+
+from pytorch_lightning.trainer import Trainer
+from pytorch_lightning.trainer.states import TrainerState
+from tests.helpers import BoringModel
+
+
+@pytest.mark.parametrize(
+    'max_epochs,expected_val_loop_calls,expected_val_batches', [
+        (1, 0, [0]),
+        (4, 2, [0, 2, 0, 2]),
+        (5, 2, [0, 2, 0, 2, 0]),
+    ]
+)
+def test_check_val_every_n_epoch(tmpdir, max_epochs, expected_val_loop_calls, expected_val_batches):
+
+    class TestModel(BoringModel):
+        val_epoch_calls = 0
+        val_batches = []
+
+        def on_train_epoch_end(self, *args, **kwargs):
+            self.val_batches.append(self.trainer.progress_bar_callback.total_val_batches)
+
+        def on_validation_epoch_start(self) -> None:
+            self.val_epoch_calls += 1
+
+    model = TestModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=max_epochs,
+        num_sanity_val_steps=0,
+        limit_val_batches=2,
+        check_val_every_n_epoch=2,
+        logger=False,
+    )
+    trainer.fit(model)
+    assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
+
+    assert model.val_epoch_calls == expected_val_loop_calls
+    assert model.val_batches == expected_val_batches

From 591a087096d9debdd648a7cd6f2936f8ce3b7511 Mon Sep 17 00:00:00 2001
From: Elizaveta Logacheva <elimohl@gmail.com>
Date: Fri, 2 Apr 2021 21:49:20 +0300
Subject: [PATCH 07/18] Remove extinct parameters from lightning_module.rst
 (#6801)

Fixes  #6800
---
 docs/source/common/lightning_module.rst | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/docs/source/common/lightning_module.rst b/docs/source/common/lightning_module.rst
index 7f0df33a351e4..da8ae5971aea6 100644
--- a/docs/source/common/lightning_module.rst
+++ b/docs/source/common/lightning_module.rst
@@ -907,30 +907,6 @@ use_amp
 ~~~~~~~
 True if using Automatic Mixed Precision (AMP)
 
-------------
-
-use_ddp
-~~~~~~~
-True if using ddp
-
-------------
-
-use_ddp2
-~~~~~~~~
-True if using ddp2
-
-------------
-
-use_dp
-~~~~~~
-True if using dp
-
-------------
-
-use_tpu
-~~~~~~~
-True if using TPUs
-
 --------------
 
 automatic_optimization

From 2ceed8a7c51e0ae518eeb95892583945bca6531b Mon Sep 17 00:00:00 2001
From: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
Date: Sun, 4 Apr 2021 00:19:43 +0530
Subject: [PATCH 08/18] Update TPU docs for installation (#6794)

---
 docs/source/advanced/tpu.rst               | 3 +--
 docs/source/starter/introduction_guide.rst | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/docs/source/advanced/tpu.rst b/docs/source/advanced/tpu.rst
index b9688ce425b5f..09a614f31c854 100644
--- a/docs/source/advanced/tpu.rst
+++ b/docs/source/advanced/tpu.rst
@@ -64,8 +64,7 @@ To get a TPU on colab, follow these steps:
 
    .. code-block::
 
-        !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
-        !python pytorch-xla-env-setup.py --version 1.7 --apt-packages libomp5 libopenblas-dev
+        !pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8-cp37-cp37m-linux_x86_64.whl
 
 5. Once the above is done, install PyTorch Lightning (v 0.7.0+).
 
diff --git a/docs/source/starter/introduction_guide.rst b/docs/source/starter/introduction_guide.rst
index 2ee31304299e0..5625140cc12cf 100644
--- a/docs/source/starter/introduction_guide.rst
+++ b/docs/source/starter/introduction_guide.rst
@@ -572,9 +572,7 @@ Next, install the required xla library (adds support for PyTorch on TPUs)
 
 .. code-block:: shell
 
-    !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
-
-    !python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev
+    !pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8-cp37-cp37m-linux_x86_64.whl
 
 In distributed training (multiple GPUs and multiple TPU cores) each GPU or TPU core will run a copy
 of this program. This means that without taking any care you will download the dataset N times which

From 17969f317e965a2644b1661904e0d239c553c2df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Mon, 5 Apr 2021 18:47:21 +0200
Subject: [PATCH 09/18] fix boolean check on iterable dataset when len not
 defined (#6828)

* fix iterable dataset len check

* update predict and validate

* add validate to test

* add changelog

* add predict
---
 .../plugins/training_type/tpu_spawn.py        |  7 ++---
 pytorch_lightning/trainer/trainer.py          |  4 +--
 tests/trainer/test_dataloaders.py             | 26 ++++++++++++++-----
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py
index a40e1e38ed2c1..7dab800e1c453 100644
--- a/pytorch_lightning/plugins/training_type/tpu_spawn.py
+++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py
@@ -285,15 +285,12 @@ def test_step(self, *args, **kwargs):
     def predict(self, *args, **kwargs):
         return self.lightning_module.predict(*args, **kwargs)
 
-    def save_checkpoint(self, filepath, weights_only: bool = False):
+    def save_checkpoint(self, checkpoint: Dict[str, Any], filepath: str) -> None:
         """Save model/training states as a checkpoint file through state-dump and file-write.
-
         Args:
+            checkpoint: dict containing model and trainer state
             filepath: write-target file's path
-            weights_only: saving model weights only
         """
-        # dump states as a checkpoint dictionary object
-        _checkpoint = self.lightning_module.trainer.checkpoint_connector.dump_checkpoint(weights_only)
         # Todo: TypeError: 'mappingproxy' object does not support item assignment
         if _OMEGACONF_AVAILABLE:
             checkpoint = apply_to_collection(checkpoint, (DictConfig, ListConfig), OmegaConf.to_container)
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 82bb858ef6c53..e90837bf980ed 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -898,7 +898,7 @@ def test(
         self._set_running_stage(RunningStage.TESTING, model or self.lightning_module)
 
         # If you supply a datamodule you can't supply train_dataloader or val_dataloaders
-        if test_dataloaders and datamodule:
+        if test_dataloaders is not None and datamodule:
             raise MisconfigurationException(
                 'You cannot pass test_dataloaders to trainer.test if you supply a datamodule'
             )
@@ -1008,7 +1008,7 @@ def predict(
 
         self._set_running_stage(RunningStage.PREDICTING, model)
 
-        if dataloaders and datamodule:
+        if dataloaders is not None and datamodule:
             raise MisconfigurationException(
                 'You cannot pass dataloaders to trainer.predict if you supply a datamodule.'
             )
diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py
index bca8e5dcc531b..a26057da32b4f 100644
--- a/tests/trainer/test_dataloaders.py
+++ b/tests/trainer/test_dataloaders.py
@@ -703,28 +703,42 @@ def test_warning_with_few_workers_multi_loader(mock, tmpdir, ckpt_path):
 
 def test_warning_with_iterable_dataset_and_len(tmpdir):
     """ Tests that a warning message is shown when an IterableDataset defines `__len__`. """
-    model = EvalModelTemplate()
+    model = BoringModel()
     original_dataset = model.train_dataloader().dataset
 
-    class IterableWithLen(IterableDataset):
+    class IterableWithoutLen(IterableDataset):
 
         def __iter__(self):
             return iter(original_dataset)
 
+    class IterableWithLen(IterableWithoutLen):
+
         def __len__(self):
             return len(original_dataset)
 
+    # with __len__ defined
     dataloader = DataLoader(IterableWithLen(), batch_size=16)
     assert has_len(dataloader)
     assert has_iterable_dataset(dataloader)
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        max_steps=3,
-    )
+    trainer = Trainer(default_root_dir=tmpdir, max_steps=3)
+    with pytest.warns(UserWarning, match='Your `IterableDataset` has `__len__` defined.'):
+        trainer.validate(model, val_dataloaders=[dataloader])
     with pytest.warns(UserWarning, match='Your `IterableDataset` has `__len__` defined.'):
         trainer.fit(model, train_dataloader=dataloader, val_dataloaders=[dataloader])
     with pytest.warns(UserWarning, match='Your `IterableDataset` has `__len__` defined.'):
         trainer.test(model, test_dataloaders=[dataloader])
+    with pytest.warns(UserWarning, match='Your `IterableDataset` has `__len__` defined.'):
+        trainer.predict(model, dataloaders=[dataloader])
+
+    # without __len__ defined
+    dataloader = DataLoader(IterableWithoutLen(), batch_size=16)
+    assert not has_len(dataloader)
+    assert has_iterable_dataset(dataloader)
+    trainer = Trainer(default_root_dir=tmpdir, max_steps=3)
+    trainer.validate(model, val_dataloaders=dataloader)
+    trainer.fit(model, train_dataloader=dataloader, val_dataloaders=[dataloader])
+    trainer.test(model, test_dataloaders=dataloader)
+    trainer.predict(model, dataloaders=dataloader)
 
 
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason='Test requires multiple GPUs')

From cd997d6b075ba2574a3425724f9b1a503b3b76f5 Mon Sep 17 00:00:00 2001
From: Karthik Prasad <prasadkr@uci.edu>
Date: Mon, 5 Apr 2021 16:47:59 -0700
Subject: [PATCH 10/18] Sanitize `None` params during pruning (#6836)

* sanitize none params during pruning

* amend
---
 pytorch_lightning/callbacks/pruning.py |  4 +++-
 tests/callbacks/test_pruning.py        | 19 +++++++++++--------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/pytorch_lightning/callbacks/pruning.py b/pytorch_lightning/callbacks/pruning.py
index 3f82ab3565403..36622af0edaff 100644
--- a/pytorch_lightning/callbacks/pruning.py
+++ b/pytorch_lightning/callbacks/pruning.py
@@ -422,7 +422,9 @@ def sanitize_parameters_to_prune(
         current_modules = [m for m in pl_module.modules() if not isinstance(m, _MODULE_CONTAINERS)]
 
         if parameters_to_prune is None:
-            parameters_to_prune = [(m, p) for p in parameters for m in current_modules if hasattr(m, p)]
+            parameters_to_prune = [
+                (m, p) for p in parameters for m in current_modules if getattr(m, p, None) is not None
+            ]
         elif (
             isinstance(parameters_to_prune, (list, tuple)) and len(parameters_to_prune) > 0
             and all(len(p) == 2 for p in parameters_to_prune)
diff --git a/tests/callbacks/test_pruning.py b/tests/callbacks/test_pruning.py
index 484b09e27bc0d..e995a7c658101 100644
--- a/tests/callbacks/test_pruning.py
+++ b/tests/callbacks/test_pruning.py
@@ -36,7 +36,7 @@ def __init__(self):
         self.layer = Sequential(
             OrderedDict([
                 ("mlp_1", nn.Linear(32, 32)),
-                ("mlp_2", nn.Linear(32, 32)),
+                ("mlp_2", nn.Linear(32, 32, bias=False)),
                 ("mlp_3", nn.Linear(32, 2)),
             ])
         )
@@ -85,7 +85,10 @@ def train_with_pruning_callback(
     if parameters_to_prune:
         pruning_kwargs["parameters_to_prune"] = [(model.layer.mlp_1, "weight"), (model.layer.mlp_2, "weight")]
     else:
-        pruning_kwargs["parameter_names"] = ["weight"]
+        if isinstance(pruning_fn, str) and pruning_fn.endswith("_structured"):
+            pruning_kwargs["parameter_names"] = ["weight"]
+        else:
+            pruning_kwargs["parameter_names"] = ["weight", "bias"]
     if isinstance(pruning_fn, str) and pruning_fn.endswith("_structured"):
         pruning_kwargs["pruning_dim"] = 0
     if pruning_fn == "ln_structured":
@@ -250,14 +253,14 @@ def test_multiple_pruning_callbacks(tmpdir, caplog, make_pruning_permanent):
     actual = [m for m in actual if m.startswith("Applied")]
     assert actual == [
         "Applied `L1Unstructured`. Pruned: 0/1122 (0.00%) -> 544/1122 (48.48%)",
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 506 (49.41%)",  # noqa: E501
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 38 (59.38%)",  # noqa: E501
+        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 500 (48.83%)",  # noqa: E501
+        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 44 (68.75%)",  # noqa: E501
         "Applied `RandomUnstructured`. Pruned: 544/1122 (48.48%) -> 680/1122 (60.61%)",
-        "Applied `RandomUnstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.25. Pruned: 506 (49.41%) -> 633 (61.82%)",  # noqa: E501
-        "Applied `RandomUnstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.25. Pruned: 38 (59.38%) -> 47 (73.44%)",  # noqa: E501
+        "Applied `RandomUnstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.25. Pruned: 500 (48.83%) -> 635 (62.01%)",  # noqa: E501
+        "Applied `RandomUnstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.25. Pruned: 44 (68.75%) -> 45 (70.31%)",  # noqa: E501
         "Applied `L1Unstructured`. Pruned: 680/1122 (60.61%) -> 884/1122 (78.79%)",
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 633 (61.82%) -> 828 (80.86%)",  # noqa: E501
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 47 (73.44%) -> 56 (87.50%)",  # noqa: E501
+        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 635 (62.01%) -> 830 (81.05%)",  # noqa: E501
+        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 45 (70.31%) -> 54 (84.38%)",  # noqa: E501
     ]
 
     filepath = str(tmpdir / "foo.ckpt")

From c92f84aaf3b597195a7eb1f27a9c51f6f3d4d10d Mon Sep 17 00:00:00 2001
From: Sadiq Jaffer <sadiq@toao.com>
Date: Tue, 6 Apr 2021 00:50:42 +0100
Subject: [PATCH 11/18] Fix `unfreeze_and_add_param_group` expects `modules`
 rather than `module` (#6822)

---
 pytorch_lightning/callbacks/finetuning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/callbacks/finetuning.py b/pytorch_lightning/callbacks/finetuning.py
index b25e5e06e8b86..f37e3bb31cc5e 100644
--- a/pytorch_lightning/callbacks/finetuning.py
+++ b/pytorch_lightning/callbacks/finetuning.py
@@ -77,7 +77,7 @@ def finetune_function(self, pl_module, current_epoch, optimizer, optimizer_idx):
                 # When `current_epoch` is 10, feature_extractor will start training.
                 if current_epoch == self._unfreeze_at_epoch:
                     self.unfreeze_and_add_param_group(
-                        module=pl_module.feature_extractor,
+                        modules=pl_module.feature_extractor,
                         optimizer=optimizer,
                         train_bn=True,
                     )

From 31b2d2b9ae3b7c995987942abb01a68b85e968d0 Mon Sep 17 00:00:00 2001
From: Michael Baumgartner <m.baumgartner@ymail.com>
Date: Tue, 6 Apr 2021 04:57:33 +0200
Subject: [PATCH 12/18] Enforce an epoch scheduler interval when using SWA
 (#6588)

Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com>
---
 pytorch_lightning/callbacks/swa.py |  7 ++++---
 tests/callbacks/test_swa.py        | 29 +++++++++++++++++++++++++----
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/pytorch_lightning/callbacks/swa.py b/pytorch_lightning/callbacks/swa.py
index c8cf367cb4d5e..cc4bbd516a87c 100644
--- a/pytorch_lightning/callbacks/swa.py
+++ b/pytorch_lightning/callbacks/swa.py
@@ -189,14 +189,15 @@ def on_train_epoch_start(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningMo
                 anneal_strategy=self._annealing_strategy,
                 last_epoch=trainer.max_epochs if self._annealing_strategy == "cos" else -1
             )
+            _scheduler_config = _get_default_scheduler_config()
+            assert _scheduler_config["interval"] == "epoch" and _scheduler_config["frequency"] == 1
+            _scheduler_config["scheduler"] = self._swa_scheduler
 
             if trainer.lr_schedulers:
                 lr_scheduler = trainer.lr_schedulers[0]["scheduler"]
                 rank_zero_warn(f"Swapping lr_scheduler {lr_scheduler} for {self._swa_scheduler}")
-                trainer.lr_schedulers[0]["scheduler"] = self._swa_scheduler
+                trainer.lr_schedulers[0] = _scheduler_config
             else:
-                _scheduler_config = _get_default_scheduler_config()
-                _scheduler_config["scheduler"] = self._swa_scheduler
                 trainer.lr_schedulers.append(_scheduler_config)
 
             self.n_averaged = torch.tensor(0, dtype=torch.long, device=pl_module.device)
diff --git a/tests/callbacks/test_swa.py b/tests/callbacks/test_swa.py
index ea8e368e39542..eb4c8f1536a22 100644
--- a/tests/callbacks/test_swa.py
+++ b/tests/callbacks/test_swa.py
@@ -24,19 +24,22 @@
 from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_6
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel, RandomDataset
+from tests.helpers.runif import RunIf
 
 if _TORCH_GREATER_EQUAL_1_6:
     from pytorch_lightning.callbacks import StochasticWeightAveraging
+    from torch.optim.swa_utils import SWALR
 
     class SwaTestModel(BoringModel):
 
-        def __init__(self, batchnorm: bool = True):
+        def __init__(self, batchnorm: bool = True, interval: str = "epoch"):
             super().__init__()
             layers = [nn.Linear(32, 32)]
             if batchnorm:
                 layers.append(nn.BatchNorm1d(32))
             layers += [nn.ReLU(), nn.Linear(32, 2)]
             self.layer = nn.Sequential(*layers)
+            self.interval = interval
 
         def training_step(self, batch, batch_idx):
             output = self.forward(batch)
@@ -46,6 +49,14 @@ def training_step(self, batch, batch_idx):
         def train_dataloader(self):
             return DataLoader(RandomDataset(32, 64), batch_size=2)
 
+        def configure_optimizers(self):
+            optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
+            return {
+                "optimizer": optimizer,
+                "scheduler": torch.optim.lr_scheduler.StepLR(optimizer, step_size=1),
+                "interval": self.interval,
+            }
+
     class SwaTestCallback(StochasticWeightAveraging):
         update_parameters_calls: int = 0
         transfer_weights_calls: int = 0
@@ -61,6 +72,10 @@ def transfer_weights(self, *args, **kwargs):
         def on_train_epoch_start(self, trainer, *args):
             super().on_train_epoch_start(trainer, *args)
             assert trainer.train_loop._skip_backward == (trainer.current_epoch > self.swa_end)
+            if self.swa_start <= trainer.current_epoch:
+                assert isinstance(trainer.lr_schedulers[0]["scheduler"], SWALR)
+                assert trainer.lr_schedulers[0]["interval"] == "epoch"
+                assert trainer.lr_schedulers[0]["frequency"] == 1
 
         def on_train_epoch_end(self, trainer, *args):
             super().on_train_epoch_end(trainer, *args)
@@ -89,8 +104,8 @@ def on_train_end(self, trainer, pl_module):
 
 
 @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-def train_with_swa(tmpdir, batchnorm=True, accelerator=None, gpus=None, num_processes=1):
-    model = SwaTestModel(batchnorm=batchnorm)
+def train_with_swa(tmpdir, batchnorm=True, accelerator=None, gpus=None, num_processes=1, interval="epoch"):
+    model = SwaTestModel(batchnorm=batchnorm, interval=interval)
     swa_start = 2
     max_epochs = 5
     swa_callback = SwaTestCallback(swa_epoch_start=swa_start, swa_lrs=0.1)
@@ -147,7 +162,13 @@ def test_swa_callback(tmpdir, batchnorm):
     train_with_swa(tmpdir, batchnorm=batchnorm)
 
 
-@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_6, reason="SWA available from PyTorch 1.6.0")
+@RunIf(min_torch="1.6.0")
+@pytest.mark.parametrize("interval", ("epoch", "step"))
+def test_swa_callback_scheduler_step(tmpdir, interval: bool):
+    train_with_swa(tmpdir, interval=interval)
+
+
+@RunIf(min_torch="1.6.0")
 def test_swa_raises():
     with pytest.raises(MisconfigurationException, match=">0 integer or a float between 0 and 1"):
         StochasticWeightAveraging(swa_epoch_start=0, swa_lrs=0.1)

From 215a9c9c89df01b7370b55d0269ac94a04aacb55 Mon Sep 17 00:00:00 2001
From: Eugene Khvedchenya <ekhvedchenya@gmail.com>
Date: Tue, 6 Apr 2021 10:40:29 +0300
Subject: [PATCH 13/18] Fix DPP + SyncBN (#6838)

* Fix DPP + SyncBN

Ensure that model is already on correct GPU before applying SyncBN conversion

* Fix order of SyncBN for ddp_spawn
---
 pytorch_lightning/plugins/training_type/ddp.py       | 6 +++---
 pytorch_lightning/plugins/training_type/ddp_spawn.py | 6 +++---
 tests/trainer/test_dataloaders.py                    | 3 ---
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py
index 007f898a27cc7..e6ece8c8cffb1 100644
--- a/pytorch_lightning/plugins/training_type/ddp.py
+++ b/pytorch_lightning/plugins/training_type/ddp.py
@@ -257,12 +257,12 @@ def pre_dispatch(self):
         self.dist.rank = self.global_rank
         self.dist.device = self.root_device
 
-        if self.sync_batchnorm:
-            self.model = self.configure_sync_batchnorm(self.model)
-
         # move the model to the correct device
         self.model_to_device()
 
+        if self.sync_batchnorm:
+            self.model = self.configure_sync_batchnorm(self.model)
+
         self.configure_ddp()
 
         self.barrier()
diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py
index fdb88a3c5cba5..dcd6443b0e6fd 100644
--- a/pytorch_lightning/plugins/training_type/ddp_spawn.py
+++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py
@@ -148,12 +148,12 @@ def new_process(self, process_idx, trainer, mp_queue):
         self.dist.rank = self.global_rank
         self.dist.device = self.root_device
 
-        if self.sync_batchnorm:
-            self.model = self.configure_sync_batchnorm(self.model)
-
         # move the model to the correct device
         self.model_to_device()
 
+        if self.sync_batchnorm:
+            self.model = self.configure_sync_batchnorm(self.model)
+
         self.configure_ddp()
 
         self.barrier()
diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py
index a26057da32b4f..69d199a76dfff 100644
--- a/tests/trainer/test_dataloaders.py
+++ b/tests/trainer/test_dataloaders.py
@@ -721,8 +721,6 @@ def __len__(self):
     assert has_len(dataloader)
     assert has_iterable_dataset(dataloader)
     trainer = Trainer(default_root_dir=tmpdir, max_steps=3)
-    with pytest.warns(UserWarning, match='Your `IterableDataset` has `__len__` defined.'):
-        trainer.validate(model, val_dataloaders=[dataloader])
     with pytest.warns(UserWarning, match='Your `IterableDataset` has `__len__` defined.'):
         trainer.fit(model, train_dataloader=dataloader, val_dataloaders=[dataloader])
     with pytest.warns(UserWarning, match='Your `IterableDataset` has `__len__` defined.'):
@@ -735,7 +733,6 @@ def __len__(self):
     assert not has_len(dataloader)
     assert has_iterable_dataset(dataloader)
     trainer = Trainer(default_root_dir=tmpdir, max_steps=3)
-    trainer.validate(model, val_dataloaders=dataloader)
     trainer.fit(model, train_dataloader=dataloader, val_dataloaders=[dataloader])
     trainer.test(model, test_dataloaders=dataloader)
     trainer.predict(model, dataloaders=dataloader)

From 3643954d3fb1c3b32df3d252718b7c8b9843a33f Mon Sep 17 00:00:00 2001
From: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
Date: Tue, 6 Apr 2021 15:02:44 +0530
Subject: [PATCH 14/18] [Fix] TPU Training Type Plugin (#6816)

---
 .../plugins/training_type/single_tpu.py       | 50 +++++-----
 .../plugins/training_type/tpu_spawn.py        | 96 ++-----------------
 .../connectors/accelerator_connector.py       | 21 ++--
 pytorch_lightning/utilities/enums.py          | 13 +++
 tests/models/test_tpu.py                      |  4 +-
 5 files changed, 55 insertions(+), 129 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/single_tpu.py b/pytorch_lightning/plugins/training_type/single_tpu.py
index 3ddfd98128787..efc8f921cff41 100644
--- a/pytorch_lightning/plugins/training_type/single_tpu.py
+++ b/pytorch_lightning/plugins/training_type/single_tpu.py
@@ -1,12 +1,20 @@
-import os
-from typing import Optional, Union
-
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import torch
 
-from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.plugins.training_type.single_device import SingleDevicePlugin
-from pytorch_lightning.plugins.training_type.utils import on_colab_kaggle
-from pytorch_lightning.utilities import _TPU_AVAILABLE, rank_zero_warn
+from pytorch_lightning.utilities import _TPU_AVAILABLE
 from pytorch_lightning.utilities.apply_func import move_data_to_device
 
 if _TPU_AVAILABLE:
@@ -15,14 +23,15 @@
 
 class SingleTPUPlugin(SingleDevicePlugin):
 
-    def __init__(self, device: Union[torch.device, int]):
-        if isinstance(device, int):
-            device = xm.xla_device(device)
+    def __init__(self, device: int):
+
+        device = xm.xla_device(device)
         super().__init__(device)
 
         self.tpu_local_core_rank = 0
         self.tpu_global_core_rank = 0
 
+    @property
     def on_tpu(self) -> bool:
         return True
 
@@ -30,6 +39,10 @@ def connect(self, model: torch.nn.Module) -> torch.nn.Module:
         self._model = model
         self.model_to_device()
         return self._model
+        
+    @property
+    def is_distributed(self) -> bool:
+        return False
 
     def model_to_device(self) -> None:
         self._model.to(self.root_device)
@@ -41,21 +54,6 @@ def pre_dispatch(self) -> None:
         self.tpu_local_core_rank = xm.get_local_ordinal()
         self.tpu_global_core_rank = xm.get_ordinal()
 
-    def post_dispatch(self) -> None:
-        model = self.lightning_module
-
-        if on_colab_kaggle():
-            rank_zero_warn("cleaning up... please do not interrupt")
-            self.save_spawn_weights(model)
-
-    def save_spawn_weights(self, model: LightningModule) -> Optional[str]:
-        """
-        Dump a temporary checkpoint after ddp ends to get weights out of the process
-        """
-        path = os.path.join(model.trainer.default_root_dir, "__temp_weight_distributed_end.ckpt")
-        model.trainer.save_checkpoint(path)
-        return path
-
     def on_save(self, checkpoint: dict) -> dict:
         """
         Move XLA tensors to CPU before saving
@@ -63,7 +61,3 @@ def on_save(self, checkpoint: dict) -> dict:
         https://github.com/pytorch/xla/blob/master/API_GUIDE.md#saving-and-loading-xla-tensors
         """
         return move_data_to_device(checkpoint, torch.device("cpu"))
-
-    @property
-    def is_distributed(self):
-        return False
diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py
index 7dab800e1c453..44422cc49cddc 100644
--- a/pytorch_lightning/plugins/training_type/tpu_spawn.py
+++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py
@@ -7,9 +7,7 @@
 import torch
 import torch.multiprocessing as mp
 
-from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin
-from pytorch_lightning.plugins.training_type.utils import on_colab_kaggle
 from pytorch_lightning.utilities import _OMEGACONF_AVAILABLE, _TPU_AVAILABLE, rank_zero_warn
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp
@@ -18,12 +16,11 @@
 
 if _TPU_AVAILABLE:
     import torch_xla.core.xla_model as xm
-    import torch_xla.distributed.parallel_loader as xla_pl
     import torch_xla.distributed.xla_multiprocessing as xmp
     from torch_xla.core.xla_model import rendezvous
-    from torch_xla.distributed.parallel_loader import ParallelLoader
+    from torch_xla.distributed.parallel_loader import MpDeviceLoader
 else:
-    xm, xla_pl, xmp, ParallelLoader, rendezvous = [None] * 5
+    xm, xmp, MpDeviceLoader, rendezvous = [None] * 4
 
 if _OMEGACONF_AVAILABLE:
     from omegaconf import DictConfig, ListConfig, OmegaConf
@@ -31,15 +28,8 @@
 
 class TPUSpawnPlugin(DDPSpawnPlugin):
 
-    def __init__(
-        self,
-        parallel_devices: Optional[List[torch.device]] = None,
-        num_nodes: int = 1,
-        **kwargs: Dict[str, Any]
-    ) -> None:
-        super().__init__(
-            parallel_devices, num_nodes=num_nodes, cluster_environment=None, sync_batchnorm=False, **kwargs
-        )
+    def __init__(self, parallel_devices: Optional[List[int]] = None, **kwargs: Dict[str, Any]) -> None:
+        super().__init__(parallel_devices, num_nodes=1, cluster_environment=None, sync_batchnorm=False)
         self.tpu_local_core_rank = 0
         self.start_method = None
 
@@ -61,10 +51,9 @@ def distributed_sampler_kwargs(self) -> dict:
     def is_distributed(self):
         return self.world_size != 1
 
-    def process_dataloader(self, dataloader: Union[Iterable, torch.utils.data.DataLoader]) -> ParallelLoader:
+    def process_dataloader(self, dataloader: Union[Iterable, torch.utils.data.DataLoader]) -> MpDeviceLoader:
         device = xm.xla_device()
-        dataloader = xla_pl.ParallelLoader(dataloader, [device])
-        dataloader = dataloader.per_device_loader(device)
+        dataloader = MpDeviceLoader(dataloader, device)
         return dataloader
 
     def configure_ddp(self) -> None:
@@ -104,7 +93,6 @@ def new_process(self, process_idx: int, trainer, mp_queue) -> None:
 
         results = trainer.train_or_test_or_predict()
 
-        self.__save_end_of_training_weights(self.lightning_module)
         self.transfer_distrib_spawn_state_on_fit_end(results)
 
         # https://github.com/pytorch/xla/issues/1801#issuecomment-602799542
@@ -114,12 +102,6 @@ def new_process(self, process_idx: int, trainer, mp_queue) -> None:
         if self.global_rank == 0:
             time.sleep(2)
 
-    def __save_end_of_training_weights(self, model: LightningModule) -> None:
-        # when training ends on these platforms dump weights to get out of the main process
-        if on_colab_kaggle():
-            rank_zero_warn("cleaning up... please do not interrupt")
-            self.save_spawn_weights(model)
-
     def model_to_device(self) -> None:
         self._model.to(xm.xla_device())
 
@@ -159,37 +141,7 @@ def broadcast(self, obj: object, src: int = 0) -> object:
         obj = torch.load(buffer)
         return obj
 
-    def load_spawn_weights(self, original_model: LightningModule) -> LightningModule:
-        """
-        Load the temp weights saved in the process
-        To recover the trained model from the ddp process we load the saved weights
-        """
-
-        loaded_model = original_model
-
-        if self.is_global_zero:
-            # load weights saved in ddp
-            path = os.path.join(original_model.trainer.default_root_dir, "__temp_weight_distributed_end.ckpt")
-            loaded_model = original_model.__class__.load_from_checkpoint(path)
-
-            # copy loaded weights to old model
-            original_model.load_state_dict(loaded_model.state_dict())
-
-            # remove ddp weights
-            os.remove(path)
-
-        return loaded_model
-
-    def save_spawn_weights(self, model: LightningModule) -> Optional[str]:
-        """
-        Dump a temporary checkpoint after ddp ends to get weights out of the process
-        """
-        if model.trainer.is_global_zero:
-            path = os.path.join(model.trainer.default_root_dir, "__temp_weight_distributed_end.ckpt")
-            model.trainer.save_checkpoint(path)
-            return path
-
-    def reduce_decision(self, decision: bool) -> bool:
+    def reduce_boolean_decision(self, decision: bool) -> bool:
         decision = torch.tensor(int(decision), device=self.device)
         decision = self.reduce(decision, "sum")
         decision = bool(decision == self.world_size)
@@ -213,40 +165,6 @@ def reduce(self, output, group: Optional[Any] = None, reduce_op: Optional[Union[
 
         return output
 
-    def post_dispatch(self) -> None:
-        # TODO: Check if trainer references can be resolved otherwise
-        model = self.lightning_module
-
-        # restore main state with best weights
-        best_path = self.mp_queue.get()
-        last_path = self.mp_queue.get()
-        self._results = self.mp_queue.get()
-
-        # transfer back the best path to the trainer
-        if self.lightning_module.trainer.checkpoint_callback is not None:
-            self.lightning_module.trainer.checkpoint_callback.best_model_path = best_path
-        # todo, pass also bets score
-
-        # load last weights
-        if last_path and not self.lightning_module.trainer.testing:
-            ckpt = torch.load(last_path, map_location=lambda storage, loc: storage)
-            model.load_state_dict(ckpt)
-
-        self._model = model
-
-        # when training completes, load the weights back in main process
-        self.__load_weights_on_main_process()
-
-    def __load_weights_on_main_process(self) -> None:
-        model = self.lightning_module
-
-        # load weights if not interrupted
-        # TODO: check for trainer reference
-        if on_colab_kaggle() and not model.trainer.testing:
-            self.load_spawn_weights(model)
-
-        self._model = model
-
     def _close_logger(self, trainer) -> None:
         if trainer.logger is not None:
             trainer.logger.finalize("success")
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 83eddfed6c4dc..c53db011d837a 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -251,7 +251,7 @@ def use_dp(self) -> bool:
     def use_ddp(self) -> bool:
         return self._distrib_type in (
             DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP_SHARDED,
-            DistributedType.DDP_SHARDED_SPAWN, DistributedType.DEEPSPEED
+            DistributedType.DDP_SHARDED_SPAWN, DistributedType.DEEPSPEED, DistributedType.TPU_SPAWN
         )
 
     @property
@@ -291,7 +291,8 @@ def parallel_devices(self) -> Union[List[torch.device], int]:
         elif self.on_tpu:
             # explicitly don't make a tpu device here!
             # https://github.com/PyTorchLightning/pytorch-lightning/issues/3169
-            devices = [i for i in self.parallel_device_ids]
+            if isinstance(self.tpu_cores, int):
+                devices = list(range(self.tpu_cores))
         else:
             devices = [torch.device("cpu")] * self.num_processes
         return devices
@@ -369,6 +370,7 @@ def select_training_type_plugin(self) -> TrainingTypePlugin:
             use_torchelastic_ddp = self.use_ddp and self.is_using_torchelastic
             use_ddp_spawn = self._distrib_type == DistributedType.DDP_SPAWN
             use_ddp_cpu_spawn = self.use_ddp and self.on_cpu
+            use_tpu_spawn = self.on_tpu and self._distrib_type == DistributedType.TPU_SPAWN
             use_ddp_cpu_torch_elastic = use_ddp_cpu_spawn and self.is_using_torchelastic
             use_ddp_cpu_slurm = use_ddp_cpu_spawn and self.is_slurm_managing_tasks
             use_ddp_sharded = self._distrib_type == DistributedType.DDP_SHARDED
@@ -379,7 +381,7 @@ def select_training_type_plugin(self) -> TrainingTypePlugin:
             if os.environ.get("PL_IN_DDP_SUBPROCESS", False):
                 use_torchelastic_ddp = False
 
-            if self.on_tpu:
+            if use_tpu_spawn:
                 ddp_plugin_cls = TPUSpawnPlugin
             elif use_ddp_sharded:
                 ddp_plugin_cls = DDPShardedPlugin
@@ -402,11 +404,8 @@ def select_training_type_plugin(self) -> TrainingTypePlugin:
             plugin = DataParallelPlugin(parallel_devices=self.parallel_devices)
         elif self.use_horovod:
             plugin = HorovodPlugin(parallel_devices=self.parallel_devices)
-        elif self.on_tpu:
-            if isinstance(self.tpu_cores, list):
-                plugin = SingleTPUPlugin(self.tpu_id)
-            else:
-                plugin = TPUSpawnPlugin(parallel_devices=list(range(self.tpu_cores)))
+        elif self.on_tpu and isinstance(self.tpu_cores, list):
+            plugin = SingleTPUPlugin(self.tpu_id)
         else:
             single_gpu_ordinal = device_parser.determine_root_gpu_device(self.parallel_device_ids)
             plugin = SingleDevicePlugin(device=torch.device(f"cuda:{single_gpu_ordinal}" if self.on_gpu else "cpu"))
@@ -507,6 +506,8 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
         # special case with TPUs
         elif self.distributed_backend == 'tpu' or self.tpu_cores is not None:
             self._device_type = DeviceType.TPU
+            if isinstance(self.tpu_cores, int):
+                self._distrib_type = DistributedType.TPU_SPAWN
         elif self.distributed_backend and self._distrib_type is None:
             self._distrib_type = DistributedType(self.distributed_backend)
 
@@ -515,9 +516,9 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
         if self.num_gpus > 0 and not _on_cpu:
             self._device_type = DeviceType.GPU
 
-        _distrib_types = (DistributedType.DP, DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2)
+        _gpu_distrib_types = (DistributedType.DP, DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2)
         # DP and DDP2 cannot run without GPU
-        if self.num_gpus == 0 and self._distrib_type in _distrib_types and not _on_cpu:
+        if self.num_gpus == 0 and self._distrib_type in _gpu_distrib_types and not _on_cpu:
             rank_zero_warn(
                 'You requested distributed training on GPUs, but none is available, so we set backend to `ddp_cpu`.'
             )
diff --git a/pytorch_lightning/utilities/enums.py b/pytorch_lightning/utilities/enums.py
index 3e4add4fb68d1..8c4bbbc23500f 100644
--- a/pytorch_lightning/utilities/enums.py
+++ b/pytorch_lightning/utilities/enums.py
@@ -58,10 +58,23 @@ class DistributedType(LightningEnum):
     >>> DistributedType.DDP2 in ('ddp2', )
     True
     """
+
+    @staticmethod
+    def interactive_compatible_types() -> List['DistributedType']:
+        """Returns a list containing interactive compatible DistributeTypes"""
+        return [
+            DistributedType.DP, DistributedType.DDP_SPAWN, DistributedType.DDP_SHARDED_SPAWN, DistributedType.TPU_SPAWN
+        ]
+
+    def is_interactive_compatible(self) -> bool:
+        """Returns whether self is interactive compatible"""
+        return self in DistributedType.interactive_compatible_types()
+
     DP = 'dp'
     DDP = 'ddp'
     DDP2 = 'ddp2'
     DDP_SPAWN = 'ddp_spawn'
+    TPU_SPAWN = 'tpu_spawn'
     DEEPSPEED = 'deepspeed'
     HOROVOD = 'horovod'
     DDP_SHARDED = 'ddp_sharded'
diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index 2befc5bd7dbd2..b64b22c66caa7 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -210,8 +210,8 @@ def test_tpu_grad_norm(tmpdir):
         progress_bar_refresh_rate=0,
         max_epochs=4,
         tpu_cores=1,
-        limit_train_batches=4,
-        limit_val_batches=4,
+        limit_train_batches=0.4,
+        limit_val_batches=0.4,
         gradient_clip_val=0.5,
     )
 

From 716ade5362c354079a0873e1ce4be47b22f4a96f Mon Sep 17 00:00:00 2001
From: Ethan Harris <ewah1g13@soton.ac.uk>
Date: Tue, 6 Apr 2021 10:36:25 +0100
Subject: [PATCH 15/18] Fix support for symlink save_dir in TensorBoardLogger
 (#6730)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add test for symlink support and initial fix

* Respond to comment and add docstring

* Update CHANGELOG.md

* Simplify

* Update pytorch_lightning/utilities/cloud_io.py

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>

* Make `LightningLocalFileSystem` protected

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 CHANGELOG.md                            |  3 +++
 pytorch_lightning/utilities/cloud_io.py | 16 +++++++++++++++-
 tests/loggers/test_tensorboard.py       | 19 +++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14578579eefbf..001adcac2d487 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,6 +40,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Fixed resolve a bug with omegaconf and xm.save ([#6741](https://github.com/PyTorchLightning/pytorch-lightning/pull/6741))
 
+- Fixed a bug where `TensorBoardLogger` would give a warning and not log correctly to a symbolic link `save_dir` ([#6730](https://github.com/PyTorchLightning/pytorch-lightning/pull/6730))
+
+
 ## [1.2.4] - 2021-03-16
 
 ### Changed
diff --git a/pytorch_lightning/utilities/cloud_io.py b/pytorch_lightning/utilities/cloud_io.py
index e94934020107d..c179d0d0d0bf8 100644
--- a/pytorch_lightning/utilities/cloud_io.py
+++ b/pytorch_lightning/utilities/cloud_io.py
@@ -12,15 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import io
 from distutils.version import LooseVersion
 from pathlib import Path
 from typing import IO, Union
 
 import fsspec
+from fsspec.implementations.local import LocalFileSystem
+
 import torch
 
 
+class _LightningLocalFileSystem(LocalFileSystem):
+    """Extension of ``fsspec.implementations.local.LocalFileSystem`` where ``LightningLocalFileSystem.isdir`` behaves
+    the same as ``os.isdir``.
+
+    To be removed when https://github.com/intake/filesystem_spec/issues/591 is fixed.
+    """
+
+    def isdir(self, path: str) -> bool:
+        return os.path.isdir(path)  # follows symlinks
+
+
 def load(path_or_url: Union[str, IO, Path], map_location=None):
     if not isinstance(path_or_url, (str, Path)):
         # any sort of BytesIO or similiar
@@ -39,7 +53,7 @@ def get_filesystem(path: Union[str, Path]):
         return fsspec.filesystem(path.split(":", 1)[0])
     else:
         # use local filesystem
-        return fsspec.filesystem("file")
+        return _LightningLocalFileSystem()
 
 
 def atomic_save(checkpoint, filepath: str):
diff --git a/tests/loggers/test_tensorboard.py b/tests/loggers/test_tensorboard.py
index e5e3f231d3ac7..316390e61d9f2 100644
--- a/tests/loggers/test_tensorboard.py
+++ b/tests/loggers/test_tensorboard.py
@@ -303,3 +303,22 @@ def test_tensorboard_save_hparams_to_yaml_once(tmpdir):
     hparams_file = "hparams.yaml"
     assert os.path.isfile(os.path.join(trainer.log_dir, hparams_file))
     assert not os.path.isfile(os.path.join(tmpdir, hparams_file))
+
+
+@mock.patch('pytorch_lightning.loggers.tensorboard.log')
+def test_tensorboard_with_symlink(log, tmpdir):
+    """
+    Tests a specific failure case when tensorboard logger is used with empty name, symbolic link ``save_dir``, and
+    relative paths.
+    """
+    os.chdir(tmpdir)  # need to use relative paths
+    source = os.path.join('.', 'lightning_logs')
+    dest = os.path.join('.', 'sym_lightning_logs')
+
+    os.makedirs(source, exist_ok=True)
+    os.symlink(source, dest)
+
+    logger = TensorBoardLogger(save_dir=dest, name='')
+    _ = logger.version
+
+    log.warning.assert_not_called()

From 653232c00ef5c9ea4fd355fd0bfc90efd7aad70e Mon Sep 17 00:00:00 2001
From: Tharindu Hasthika <tharindubathigama@gmail.com>
Date: Tue, 6 Apr 2021 15:07:15 +0530
Subject: [PATCH 16/18] Fixed missing arguments in `lr_find` call (#6784)

There seem to be 3 arguments missing in the `lr_find` call in the tunining.py file.
---
 pytorch_lightning/tuner/tuning.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/tuner/tuning.py b/pytorch_lightning/tuner/tuning.py
index c5256c6ddc65f..d46db5de1ddc8 100644
--- a/pytorch_lightning/tuner/tuning.py
+++ b/pytorch_lightning/tuner/tuning.py
@@ -60,7 +60,13 @@ def tune(self, model, train_dataloader, val_dataloaders, datamodule):
 
         # Run learning rate finder:
         if self.trainer.auto_lr_find:
-            self.lr_find(model, update_attr=True)
+            self.lr_find(
+                model,
+                update_attr=True,
+                train_dataloader=train_dataloader,
+                val_dataloaders=val_dataloaders,
+                datamodule=datamodule,
+            )
 
     def scale_batch_size(
         self,

From c8c5d056326403c6d52e091be32692107efdff02 Mon Sep 17 00:00:00 2001
From: Kaushik Bokka <kaushikbokka@gmail.com>
Date: Tue, 6 Apr 2021 17:51:30 +0530
Subject: [PATCH 17/18] Update Changelog & version

---
 CHANGELOG.md                                    | 17 ++++++++++++-----
 pytorch_lightning/info.py                       |  2 +-
 .../plugins/training_type/single_tpu.py         |  2 +-
 pytorch_lightning/utilities/enums.py            |  2 +-
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 001adcac2d487..e4a2d76c1fdd0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
+## [1.2.7] - 2021-04-06
+
+### Fixed
+
+- Fixed resolve a bug with omegaconf and xm.save ([#6741](https://github.com/PyTorchLightning/pytorch-lightning/pull/6741))
+- Fixed an issue with IterableDataset when __len__ is not defined ([#6828](https://github.com/PyTorchLightning/pytorch-lightning/pull/6828))
+- Sanitize None params during pruning ([#6836](https://github.com/PyTorchLightning/pytorch-lightning/pull/6836))
+- Enforce an epoch scheduler interval when using SWA ([#6588](https://github.com/PyTorchLightning/pytorch-lightning/pull/6588))
+- Fixed TPU Colab hang issue, post training ([#6816](https://github.com/PyTorchLightning/pytorch-lightning/pull/6816))
+- Fixed a bug where `TensorBoardLogger` would give a warning and not log correctly to a symbolic link `save_dir` ([#6730](https://github.com/PyTorchLightning/pytorch-lightning/pull/6730))
+
+
 ## [1.2.6] - 2021-03-30
 
 ### Changed
@@ -38,11 +50,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added Autocast in validation, test and predict modes for Native AMP ([#6565](https://github.com/PyTorchLightning/pytorch-lightning/pull/6565))
 
 
-- Fixed resolve a bug with omegaconf and xm.save ([#6741](https://github.com/PyTorchLightning/pytorch-lightning/pull/6741))
-
-- Fixed a bug where `TensorBoardLogger` would give a warning and not log correctly to a symbolic link `save_dir` ([#6730](https://github.com/PyTorchLightning/pytorch-lightning/pull/6730))
-
-
 ## [1.2.4] - 2021-03-16
 
 ### Changed
diff --git a/pytorch_lightning/info.py b/pytorch_lightning/info.py
index 5b383b78e8a41..fbabb2b0bb231 100644
--- a/pytorch_lightning/info.py
+++ b/pytorch_lightning/info.py
@@ -1,7 +1,7 @@
 import time
 
 _this_year = time.strftime("%Y")
-__version__ = '1.2.6'
+__version__ = '1.2.7'
 __author__ = 'William Falcon et al.'
 __author_email__ = 'waf2107@columbia.edu'
 __license__ = 'Apache-2.0'
diff --git a/pytorch_lightning/plugins/training_type/single_tpu.py b/pytorch_lightning/plugins/training_type/single_tpu.py
index efc8f921cff41..1ef2c7676ae72 100644
--- a/pytorch_lightning/plugins/training_type/single_tpu.py
+++ b/pytorch_lightning/plugins/training_type/single_tpu.py
@@ -39,7 +39,7 @@ def connect(self, model: torch.nn.Module) -> torch.nn.Module:
         self._model = model
         self.model_to_device()
         return self._model
-        
+
     @property
     def is_distributed(self) -> bool:
         return False
diff --git a/pytorch_lightning/utilities/enums.py b/pytorch_lightning/utilities/enums.py
index 8c4bbbc23500f..eb912d1dc3aae 100644
--- a/pytorch_lightning/utilities/enums.py
+++ b/pytorch_lightning/utilities/enums.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Enumerated utilities"""
 from enum import Enum
-from typing import Union
+from typing import List, Union
 
 
 class LightningEnum(str, Enum):

From 86f6d08ff93b5b6e5ffcf453101cee6f9633255a Mon Sep 17 00:00:00 2001
From: Kaushik Bokka <kaushikbokka@gmail.com>
Date: Tue, 6 Apr 2021 19:27:51 +0530
Subject: [PATCH 18/18] Fix TPU tests for checkpoint

Skip advanced profiler for torch > 1.8

Skip pytorch profiler for torch > 1.8

Fix save checkpoint logic for TPUs
---
 pytorch_lightning/plugins/training_type/tpu_spawn.py | 6 ++++--
 tests/models/test_tpu.py                             | 6 +++---
 tests/test_profiler.py                               | 2 ++
 tests/trainer/test_trainer.py                        | 2 ++
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py
index 44422cc49cddc..0f55100bf1ab9 100644
--- a/pytorch_lightning/plugins/training_type/tpu_spawn.py
+++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py
@@ -203,12 +203,14 @@ def test_step(self, *args, **kwargs):
     def predict(self, *args, **kwargs):
         return self.lightning_module.predict(*args, **kwargs)
 
-    def save_checkpoint(self, checkpoint: Dict[str, Any], filepath: str) -> None:
+    def save_checkpoint(self, filepath: str, weights_only: bool = False) -> None:
         """Save model/training states as a checkpoint file through state-dump and file-write.
         Args:
-            checkpoint: dict containing model and trainer state
             filepath: write-target file's path
+            weights_only: saving model weights only
         """
+        # dump states as a checkpoint dictionary object
+        checkpoint = self.lightning_module.trainer.checkpoint_connector.dump_checkpoint(weights_only)
         # Todo: TypeError: 'mappingproxy' object does not support item assignment
         if _OMEGACONF_AVAILABLE:
             checkpoint = apply_to_collection(checkpoint, (DictConfig, ListConfig), OmegaConf.to_container)
diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index b64b22c66caa7..24c0b615b95bb 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -122,7 +122,7 @@ def test_model_16bit_tpu_cores_1(tmpdir):
         progress_bar_refresh_rate=0,
         max_epochs=2,
         tpu_cores=1,
-        limit_train_batches=8,
+        limit_train_batches=0.7,
         limit_val_batches=2,
     )
 
@@ -210,8 +210,8 @@ def test_tpu_grad_norm(tmpdir):
         progress_bar_refresh_rate=0,
         max_epochs=4,
         tpu_cores=1,
-        limit_train_batches=0.4,
-        limit_val_batches=0.4,
+        limit_train_batches=10,
+        limit_val_batches=10,
         gradient_clip_val=0.5,
     )
 
diff --git a/tests/test_profiler.py b/tests/test_profiler.py
index 667e153a9edd4..6abcf17a04893 100644
--- a/tests/test_profiler.py
+++ b/tests/test_profiler.py
@@ -20,6 +20,7 @@
 import pytest
 
 from pytorch_lightning.profiler import AdvancedProfiler, SimpleProfiler
+from tests.helpers.runif import RunIf
 
 PROFILER_OVERHEAD_MAX_TOLERANCE = 0.0005
 
@@ -165,6 +166,7 @@ def test_advanced_profiler_overhead(advanced_profiler, n_iter=5):
     assert average_duration < PROFILER_OVERHEAD_MAX_TOLERANCE
 
 
+@RunIf(max_torch="1.8.1")
 def test_advanced_profiler_describe(tmpdir, advanced_profiler):
     """
     ensure the profiler won't fail when reporting the summary
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index fd2b48a3fa140..306d38d2d651b 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -42,6 +42,7 @@
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.base import EvalModelTemplate
 from tests.helpers import BoringModel, RandomDataset
+from tests.helpers.runif import RunIf
 
 
 @pytest.fixture
@@ -1499,6 +1500,7 @@ def test_trainer_predict_ddp_cpu(tmpdir):
     predict(tmpdir, "ddp_cpu", 0, 2)
 
 
+@RunIf(max_torch="1.8.1")
 def test_pytorch_profiler_describe(pytorch_profiler):
     """Ensure the profiler won't fail when reporting the summary."""
     with pytorch_profiler.profile("test_step"):