From e7b31da89d53ceaa800aa43f5ee539c232c513d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 16:44:35 +0200
Subject: [PATCH 01/24] optimization docs

---
 pytorch_lightning/core/lightning.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 2478f698e659b..943ae42275c18 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -117,6 +117,17 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         self._metric_attributes: Optional[Dict[int, str]] = None
 
     def optimizers(self, use_pl_optimizer: bool = True) -> Union[Optimizer, List[Optimizer], List[LightningOptimizer]]:
+        """
+        Returns the optimizer(s) that are being use during training. Useful for manual optimization.
+
+        Args:
+            use_pl_optimizer: If ``True``, will wrap the optimizer(s) in a
+                :class:`~pytorch_lighting.core.optimizer.LightningOptimizer` for automatic handling of precision and
+                profiling.
+
+        Returns:
+            A single optimizer, or a list of optimizers in case multiple ones are present.
+        """
         if use_pl_optimizer:
             opts = list(self.trainer.lightning_optimizers.values())
         else:
@@ -129,6 +140,13 @@ def optimizers(self, use_pl_optimizer: bool = True) -> Union[Optimizer, List[Opt
         return opts
 
     def lr_schedulers(self) -> Optional[Union[Any, List[Any]]]:
+        """
+        Returns the learning rate scheduler(s) that are being use during training. Useful for manual optimization.
+
+        Returns:
+            A single scheduler, or a list of schedulers in case multiple ones are present, or ``None`` if no
+            schedulers were returned in :meth:´configure_optimizers´.
+        """
         if not self.trainer.lr_schedulers:
             return None
 

From 55bdc3fb4814a332b21e4e27b12d6f4bc50376bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 16:45:05 +0200
Subject: [PATCH 02/24] example input array docs

---
 pytorch_lightning/core/lightning.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 943ae42275c18..c177e66e1a64e 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -162,8 +162,23 @@ def lr_schedulers(self) -> Optional[Union[Any, List[Any]]]:
 
     @property
     def example_input_array(self) -> Any:
+        """
+        The example input array is a specification of what the module can consume in the :meth:`forward` method.
+        The return type is interpreted as follows:
+
+            - Single tensor: It is assumed the model takes a single argument, i.e.,
+                ``model.forward(model.example_input_array)``
+            - Tuple: The input array should be interpreted as a sequence of positional arguments, i.e.,
+                ``model.forward(*model.example_input_array)``
+            - Dict: The input array represents named keyword arguments, i.e.,
+                ``model.forward(**model.example_input_array)``
+        """
         return self._example_input_array
 
+    @example_input_array.setter
+    def example_input_array(self, example: Any) -> None:
+        self._example_input_array = example
+
     @property
     def current_epoch(self) -> int:
         """The current epoch"""
@@ -184,10 +199,6 @@ def local_rank(self) -> int:
         """ The index of the current process within a single node. """
         return self.trainer.local_rank if self.trainer else 0
 
-    @example_input_array.setter
-    def example_input_array(self, example: Any) -> None:
-        self._example_input_array = example
-
     @property
     def datamodule(self) -> Any:
         warning_cache.deprecation(

From 0a8d043ba9d974a26211ecd211549d936eb4d782 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 16:52:07 +0200
Subject: [PATCH 03/24] step and rank

---
 pytorch_lightning/core/lightning.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index c177e66e1a64e..97be00b74a0d3 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -181,22 +181,22 @@ def example_input_array(self, example: Any) -> None:
 
     @property
     def current_epoch(self) -> int:
-        """The current epoch"""
+        """The current epoch in the Trainer. If no Trainer is attached, this propery is 0."""
         return self.trainer.current_epoch if self.trainer else 0
 
     @property
     def global_step(self) -> int:
-        """Total training batches seen across all epochs"""
+        """Total training batches seen across all epochs. If no Trainer is attached, this propery is 0."""
         return self.trainer.global_step if self.trainer else 0
 
     @property
     def global_rank(self) -> int:
-        """ The index of the current process across all nodes and devices. """
+        """The index of the current process across all nodes and devices."""
         return self.trainer.global_rank if self.trainer else 0
 
     @property
     def local_rank(self) -> int:
-        """ The index of the current process within a single node. """
+        """The index of the current process within a single node."""
         return self.trainer.local_rank if self.trainer else 0
 
     @property

From 47718fce149da24bb5e8c9bdde321ec20fac7ea9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 16:52:35 +0200
Subject: [PATCH 04/24] on_gpu

---
 pytorch_lightning/core/lightning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 97be00b74a0d3..0f1208ca54bc8 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -233,7 +233,7 @@ def datamodule(self, datamodule: Any) -> None:
     @property
     def on_gpu(self):
         """
-        True if your model is currently running on GPUs.
+        Returns ``True`` if this model is currently located on a GPU.
         Useful to set flags around the LightningModule for different CPU vs GPU behavior.
         """
         return self.device.type == "cuda"

From 103c803266eff6cef628b35eff1240dcb766cb7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 16:53:26 +0200
Subject: [PATCH 05/24] auto-opt

---
 pytorch_lightning/core/lightning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 0f1208ca54bc8..f0600b069095c 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -241,7 +241,7 @@ def on_gpu(self):
     @property
     def automatic_optimization(self) -> bool:
         """
-        If False you are responsible for calling .backward, .step, zero_grad.
+        If set to ``False`` you are responsible for calling ``.backward()``, ``.step()``, ``.zero_grad()``.
         """
         return self._automatic_optimization
 

From 3550683c02d1724635acf418331a3e2e5076936c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 17:07:28 +0200
Subject: [PATCH 06/24] truncated backprop

---
 pytorch_lightning/core/lightning.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index f0600b069095c..80206baaf9c31 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -252,8 +252,9 @@ def automatic_optimization(self, automatic_optimization: bool) -> None:
     @property
     def truncated_bptt_steps(self) -> int:
         """
-        truncated_bptt_steps: Truncated back prop breaks performs backprop every k steps of much a longer sequence.
-        If this is > 0, the training step is passed ``hiddens``.
+        Enables `Truncated Backpropagation Through Time` in the Trainer when set to a positive integer. It represents
+        the number of times :meth:`trainnig_step` gets called before backpropagation. If this is > 0, the
+        :meth:`trainnig_step` receives an additional argument ``hiddens`` and is expected to return a hidden state.
         """
         return self._truncated_bptt_steps
 

From c4a366686370e3c40cea39ca60dd58811c390d28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 17:31:16 +0200
Subject: [PATCH 07/24] fixes

---
 pytorch_lightning/core/lightning.py | 30 ++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 80206baaf9c31..e1ee0f96b50af 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -93,16 +93,16 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 
         self._loaded_optimizer_states_dict = {}
 
-        #: Pointer to the trainer object
+        # pointer to the trainer object
         self.trainer = None
 
         self._distrib_type = None
         self._device_type = None
 
-        #: True if using amp
+        # true if using amp
         self.use_amp: bool = False
 
-        #: The precision used
+        # the precision used
         self.precision: int = 32
 
         # optionally can be set by user
@@ -145,7 +145,7 @@ def lr_schedulers(self) -> Optional[Union[Any, List[Any]]]:
 
         Returns:
             A single scheduler, or a list of schedulers in case multiple ones are present, or ``None`` if no
-            schedulers were returned in :meth:´configure_optimizers´.
+            schedulers were returned in :meth:`configure_optimizers`.
         """
         if not self.trainer.lr_schedulers:
             return None
@@ -166,12 +166,12 @@ def example_input_array(self) -> Any:
         The example input array is a specification of what the module can consume in the :meth:`forward` method.
         The return type is interpreted as follows:
 
-            - Single tensor: It is assumed the model takes a single argument, i.e.,
-                ``model.forward(model.example_input_array)``
-            - Tuple: The input array should be interpreted as a sequence of positional arguments, i.e.,
-                ``model.forward(*model.example_input_array)``
-            - Dict: The input array represents named keyword arguments, i.e.,
-                ``model.forward(**model.example_input_array)``
+        -   Single tensor: It is assumed the model takes a single argument, i.e.,
+            ``model.forward(model.example_input_array)``
+        -   Tuple: The input array should be interpreted as a sequence of positional arguments, i.e.,
+            ``model.forward(*model.example_input_array)``
+        -   Dict: The input array represents named keyword arguments, i.e.,
+            ``model.forward(**model.example_input_array)``
         """
         return self._example_input_array
 
@@ -253,8 +253,8 @@ def automatic_optimization(self, automatic_optimization: bool) -> None:
     def truncated_bptt_steps(self) -> int:
         """
         Enables `Truncated Backpropagation Through Time` in the Trainer when set to a positive integer. It represents
-        the number of times :meth:`trainnig_step` gets called before backpropagation. If this is > 0, the
-        :meth:`trainnig_step` receives an additional argument ``hiddens`` and is expected to return a hidden state.
+        the number of times :meth:`training_step` gets called before backpropagation. If this is > 0, the
+        :meth:`training_step` receives an additional argument ``hiddens`` and is expected to return a hidden state.
         """
         return self._truncated_bptt_steps
 
@@ -328,13 +328,13 @@ def log(
         rank_zero_only: Optional[bool] = None,
     ) -> None:
         """
-        Log a key, value
+        Log a key, value pair.
 
         Example::
 
             self.log('train_loss', loss)
 
-        The default behavior per hook is as follows
+        The default behavior per hook is as follows:
 
         .. csv-table:: ``*`` also applies to the test loop
            :header: "LightningModule Hook", "on_step", "on_epoch", "prog_bar", "logger"
@@ -478,7 +478,7 @@ def log_dict(
         add_dataloader_idx: bool = True,
     ) -> None:
         """
-        Log a dictionary of values at once
+        Log a dictionary of values at once.
 
         Example::
 

From ea3c552516cd6a052ac864aff4620857c3e60078 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 17:41:39 +0200
Subject: [PATCH 08/24] manual backward

---
 pytorch_lightning/core/lightning.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index e1ee0f96b50af..5ff72814b6052 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -118,7 +118,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 
     def optimizers(self, use_pl_optimizer: bool = True) -> Union[Optimizer, List[Optimizer], List[LightningOptimizer]]:
         """
-        Returns the optimizer(s) that are being use during training. Useful for manual optimization.
+        Returns the optimizer(s) that are being used during training. Useful for manual optimization.
 
         Args:
             use_pl_optimizer: If ``True``, will wrap the optimizer(s) in a
@@ -141,7 +141,7 @@ def optimizers(self, use_pl_optimizer: bool = True) -> Union[Optimizer, List[Opt
 
     def lr_schedulers(self) -> Optional[Union[Any, List[Any]]]:
         """
-        Returns the learning rate scheduler(s) that are being use during training. Useful for manual optimization.
+        Returns the learning rate scheduler(s) that are being used during training. Useful for manual optimization.
 
         Returns:
             A single scheduler, or a list of schedulers in case multiple ones are present, or ``None`` if no
@@ -1424,10 +1424,8 @@ def configure_optimizers(self):
 
     def manual_backward(self, loss: Tensor, optimizer: Optional[Optimizer] = None, *args, **kwargs) -> None:
         """
-        Call this directly from your training_step when doing optimizations manually.
-        By using this we can ensure that all the proper scaling when using 16-bit etc has been done for you.
-
-        This function forwards all args to the .backward() call as well.
+        Call this directly from your :meth:`training_step` when doing optimizations manually.
+        By using this, Lightning can ensure that all the proper scaling gets applied when using mixed precision.
 
         See :ref:`manual optimization<common/optimizers:Manual optimization>` for more examples.
 
@@ -1440,6 +1438,12 @@ def training_step(...):
                 # automatically applies scaling, etc...
                 self.manual_backward(loss)
                 opt.step()
+
+        Args:
+            loss: The tensor on which to compute gradients. Must have a graph attached.
+            optimizer: This argument is unused and deprecated. It will be removed in v1.4.
+            *args: Additional positional arguments to be forwarded to :meth:`~torch.Tensor.backward`
+            **kwargs: Additional keyword arguments to be forwarded to :meth:`~torch.Tensor.backward`
         """
         if optimizer is not None:
             rank_zero_deprecation(

From 21f018314c71023d17a301db8bde5d3096f918b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 17:52:08 +0200
Subject: [PATCH 09/24] backward

---
 pytorch_lightning/core/lightning.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 5ff72814b6052..d1e46be1fd565 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1460,22 +1460,19 @@ def training_step(...):
 
     def backward(self, loss: Tensor, optimizer: Optimizer, optimizer_idx: int, *args, **kwargs) -> None:
         """
-        Override backward with your own implementation if you need to.
+        Called to perform backward on the loss returned in :meth:`training_step`.
+        Override this hook with your own implementation if you need to.
 
         Args:
-            loss: Loss is already scaled by accumulated grads
+            loss: The loss tensor returned by :meth:`training_step`. If gradient accumulation is used, the loss here
+                holds the normalized value (scaled by 1 / accumulation steps).
             optimizer: Current optimizer being used
             optimizer_idx: Index of the current optimizer being used
 
-        Called to perform backward step.
-        Feel free to override as needed.
-        The loss passed in has already been scaled for accumulated gradients if requested.
-
         Example::
 
             def backward(self, loss, optimizer, optimizer_idx):
                 loss.backward()
-
         """
         if self.automatic_optimization or self._running_manual_backward:
             loss.backward(*args, **kwargs)

From e869fc1d309f5f627517296a8ac1f3ea2eb45a4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 18:09:24 +0200
Subject: [PATCH 10/24] all gather

---
 pytorch_lightning/core/lightning.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index d1e46be1fd565..b19b000671694 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -622,16 +622,14 @@ def all_gather(
         sync_grads: bool = False,
     ):
         r"""
-        Allows users to call ``self.all_gather()`` from the LightningModule, thus making
-        the ```all_gather``` operation accelerator agnostic.
-
-        ```all_gather``` is a function provided by accelerators to gather a tensor from several
-        distributed processes
+        Allows users to call ``self.all_gather()`` from the LightningModule, thus making the ``all_gather`` operation
+        accelerator agnostic. ``all_gather`` is a function provided by accelerators to gather a tensor from several
+        distributed processes.
 
         Args:
-            tensor: int, float, tensor of shape (batch, ...), or a (possibly nested) collection thereof.
+            data: int, float, tensor of shape (batch, ...), or a (possibly nested) collection thereof.
             group: the process group to gather results from. Defaults to all processes (world)
-            sync_grads: flag that allows users to synchronize gradients for all_gather op
+            sync_grads: flag that allows users to synchronize gradients for the all_gather operation
 
         Return:
             A tensor of shape (world_size, batch, ...), or if the input was a collection

From 129ad196a09854d9a6041ea3033d718bd0913bb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 18:15:58 +0200
Subject: [PATCH 11/24] fix configure optimizers html

---
 pytorch_lightning/core/lightning.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index b19b000671694..53bdf5186f44f 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1265,9 +1265,9 @@ def configure_optimizers(self):
             - **Single optimizer**.
             - **List or Tuple** of optimizers.
             - **Two lists** - The first list has multiple optimizers, and the second has multiple LR schedulers
-                (or multiple ``lr_dict``).
+              (or multiple ``lr_dict``).
             - **Dictionary**, with an ``"optimizer"`` key, and (optionally) a ``"lr_scheduler"``
-                key whose value is a single LR scheduler or ``lr_dict``.
+              key whose value is a single LR scheduler or ``lr_dict``.
             - **Tuple of dictionaries** as described above, with an optional ``"frequency"`` key.
             - **None** - Fit will run without any optimizer.
 

From 54f0e8fbab9fbfb1ef5e112358d62666ffebda14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 18:16:05 +0200
Subject: [PATCH 12/24] whitespace

---
 pytorch_lightning/core/lightning.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 53bdf5186f44f..40adb7e9f6b5e 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -2064,8 +2064,8 @@ def model_size(self) -> float:
         return size_mb
 
     def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None:
-        """Appends the :attr:`trainer.callback_metrics` dictionary to the given queue.
-
+        """
+        Appends the :attr:`trainer.callback_metrics` dictionary to the given queue.
         To avoid issues with memory sharing, we cast the data to numpy.
 
         Args:
@@ -2077,8 +2077,8 @@ def add_to_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None:
         queue.put(callback_metrics)
 
     def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None:
-        """Retrieve the :attr:`trainer.callback_metrics` dictionary from the given queue.
-
+        """
+        Retrieve the :attr:`trainer.callback_metrics` dictionary from the given queue.
         To preserve consistency, we cast back the data to ``torch.Tensor``.
 
         Args:

From 8d5360e6e6bfaa4505a154ae36f37d3b7d6b3a2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 18:48:39 +0200
Subject: [PATCH 13/24] toggle, untoggle

---
 pytorch_lightning/core/lightning.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 40adb7e9f6b5e..4294012adcb4f 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1479,18 +1479,16 @@ def toggle_optimizer(self, optimizer: Optimizer, optimizer_idx: int):
         """
         Makes sure only the gradients of the current optimizer's parameters are calculated
         in the training step to prevent dangling gradients in multiple-optimizer setup.
-
-        .. note:: Only called when using multiple optimizers
-
-        Override for your own behavior
-
-        It works with ``untoggle_optimizer`` to make sure param_requires_grad_state is properly reset.
+        It works with :meth:`untoggle_optimizer` to make sure ``param_requires_grad_state`` is properly reset.
+        Override for your own behavior.
 
         Args:
-            optimizer: Current optimizer used in training_loop
-            optimizer_idx: Current optimizer idx in training_loop
-        """
+            optimizer: Current optimizer used in the training loop
+            optimizer_idx: Current optimizer idx in the training loop
 
+        Note:
+            Only called when using multiple optimizers
+        """
         # Iterate over all optimizer parameters to preserve their `requires_grad` information
         # in case these are pre-defined during `configure_optimizers`
         param_requires_grad_state = {}
@@ -1512,12 +1510,14 @@ def toggle_optimizer(self, optimizer: Optimizer, optimizer_idx: int):
 
     def untoggle_optimizer(self, optimizer_idx: int):
         """
-        .. note:: Only called when using multiple optimizers
-
-        Override for your own behavior
+        Resets the state of required gradients that were toggled with :meth:`toggle_optimizer`.
+        Override for your own behavior.
 
         Args:
-            optimizer_idx: Current optimizer idx in training_loop
+            optimizer_idx: Current optimizer idx in the training loop
+
+        Note:
+            Only called when using multiple optimizers
         """
         for opt_idx, opt in enumerate(self.optimizers(use_pl_optimizer=False)):
             if optimizer_idx != opt_idx:

From 2009e61713c4136171998d466ddfb7cd76d5cb62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 18:49:00 +0200
Subject: [PATCH 14/24] scripting

---
 pytorch_lightning/core/lightning.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 4294012adcb4f..f757d93067894 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1918,7 +1918,7 @@ def to_onnx(
         **kwargs,
     ):
         """
-        Saves the model in ONNX format
+        Saves the model in ONNX format.
 
         Args:
             file_path: The path of the file the onnx model should be saved to.
@@ -1970,8 +1970,8 @@ def to_torchscript(
     ) -> Union[ScriptModule, Dict[str, ScriptModule]]:
         """
         By default compiles the whole model to a :class:`~torch.jit.ScriptModule`.
-        If you want to use tracing, please provided the argument `method='trace'` and make sure that either the
-        example_inputs argument is provided, or the model has self.example_input_array set.
+        If you want to use tracing, please provided the argument ``method='trace'`` and make sure that either the
+        `example_inputs` argument is provided, or the model has :attr:`example_input_array` set.
         If you would like to customize the modules that are scripted you should override this method.
         In case you want to return multiple modules, we recommend using a dictionary.
 
@@ -1979,7 +1979,7 @@ def to_torchscript(
             file_path: Path where to save the torchscript. Default: None (no file saved).
             method: Whether to use TorchScript's script or trace method. Default: 'script'
             example_inputs: An input to be used to do tracing when method is set to 'trace'.
-              Default: None (Use self.example_input_array)
+              Default: None (uses :attr:`example_input_array`)
             **kwargs: Additional arguments that will be passed to the :func:`torch.jit.script` or
               :func:`torch.jit.trace` function.
 
@@ -2009,7 +2009,7 @@ def to_torchscript(
             True
 
         Return:
-            This LightningModule as a torchscript, regardless of whether file_path is
+            This LightningModule as a torchscript, regardless of whether `file_path` is
             defined or not.
         """
         mode = self.training

From 77395b6794f75194e6ff143bd2b7eaa1a10617fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 18:49:14 +0200
Subject: [PATCH 15/24] save hyperparameters

---
 pytorch_lightning/core/lightning.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index f757d93067894..066da1783a90d 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1830,16 +1830,16 @@ def save_hyperparameters(
         ignore: Optional[Union[Sequence[str], str]] = None,
         frame: Optional[types.FrameType] = None
     ) -> None:
-        """Save model arguments to ``hparams`` attribute.
+        """Save model arguments to the ``hparams`` attribute.
 
         Args:
-            args: single object of `dict`, `NameSpace` or `OmegaConf`
-                or string names or arguments from class ``__init__``
-            ignore: an argument name or a list of argument names from
-                class ``__init__`` to be ignored
-            frame: a frame object. Default is None
+            args: single object of type :class:`dict`, :class:`~argparse.Namespace`, `OmegaConf`
+                or strings representing the argument names in ``__init__``.
+            ignore: an argument name or a list of argument names in ``__init__`` to be ignored
+            frame: a frame object. Default is ``None``.
 
         Example::
+
             >>> class ManuallyArgsModel(LightningModule):
             ...     def __init__(self, arg1, arg2, arg3):
             ...         super().__init__()

From b8409a932e93735eb1cdda400280b4bc1b0c0966 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 18:49:27 +0200
Subject: [PATCH 16/24] optimizer step

---
 pytorch_lightning/core/lightning.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 066da1783a90d..c409161bd1cc3 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1549,8 +1549,7 @@ def optimizer_step(
         Warning:
             If you are overriding this method, make sure that you pass the ``optimizer_closure`` parameter
             to ``optimizer.step()`` function as shown in the examples. This ensures that
-            ``training_step()``, ``optimizer.zero_grad()``, ``backward()`` are called within
-            :meth:`~pytorch_lightning.loops.training_batch_loop.TrainingBatchLoop.advance`.
+            ``training_step()``, ``optimizer.zero_grad()``, ``backward()`` are called within the training loop.
 
         Args:
             epoch: Current epoch

From d482c04595cd5aa44427edaa335ffeb9541ed9ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 18:52:13 +0200
Subject: [PATCH 17/24] step functions

---
 pytorch_lightning/core/lightning.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index c409161bd1cc3..1b194f2426830 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -784,7 +784,7 @@ def training_step_end(self, training_step_outputs):
     def training_epoch_end(self, outputs: EPOCH_OUTPUT) -> None:
         """
         Called at the end of the training epoch with the outputs of all training steps.
-        Use this in case you need to do something with all the outputs for every training_step.
+        Use this in case you need to do something with all the outputs returned by :meth:`training_step`.
 
         .. code-block:: python
 
@@ -844,8 +844,6 @@ def validation_step(self, *args, **kwargs) -> Optional[STEP_OUTPUT]:
                 (only if multiple val dataloaders used)
 
         Return:
-           Any of.
-
             - Any object or value
             - ``None`` - Validation will skip to the next batch
 

From 12afe112fc45750fa76df5f8cf2c91298d5cf8f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 19:44:55 +0200
Subject: [PATCH 18/24] hparams

---
 pytorch_lightning/core/lightning.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 1b194f2426830..acc6ad05108ea 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -2040,12 +2040,20 @@ def to_torchscript(
 
     @property
     def hparams(self) -> Union[AttributeDict, dict, Namespace]:
+        """
+        The collection of hyperparameters saved with :meth:`save_hyperparameters`. It is mutable by the user.
+        For the frozen set of initial hyperparameters, use :attr:`hparams_initial`.
+        """
         if not hasattr(self, "_hparams"):
             self._hparams = AttributeDict()
         return self._hparams
 
     @property
     def hparams_initial(self) -> AttributeDict:
+        """
+        The collection of hyperparameters saved with :meth:`save_hyperparameters`. These contents are read-only.
+        Manual updates to the saved hyperparameters can instead be performed through :attr:`hparams`.
+        """
         if not hasattr(self, "_hparams_initial"):
             return AttributeDict()
         # prevent any change

From b372a98953e8cf8f7625c767f639f7ef3326db17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 19:45:01 +0200
Subject: [PATCH 19/24] model size

---
 pytorch_lightning/core/lightning.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index acc6ad05108ea..18ee08565603e 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -2061,6 +2061,10 @@ def hparams_initial(self) -> AttributeDict:
 
     @property
     def model_size(self) -> float:
+        """
+        The model's size in megabytes. The computation includes everything in the
+        :meth:`~torch.nn.Module.state_dict`, i.e., by default the parameteters and buffers.
+        """
         # todo: think about better way without need to dump model to drive
         tmp_name = f"{uuid.uuid4().hex}.pt"
         torch.save(self.state_dict(), tmp_name)

From af710855b0e254a696d7151392ac703d5b61fe86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 19:45:07 +0200
Subject: [PATCH 20/24] summarize

---
 pytorch_lightning/core/lightning.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 18ee08565603e..1e3c6362f5889 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1688,6 +1688,24 @@ def tbptt_split_batch(self, batch, split_size):
         return splits
 
     def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None) -> Optional[ModelSummary]:
+        """
+        Summarize this LightingModule.
+
+        Args:
+            mode: Can be either ``'top'`` (summarize only direct submodules) or ``'full'`` (summarize all layers).
+
+                .. deprecated:: v1.4
+                    This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6.
+
+            max_depth: The maximum depth of layer nesting that the summary will include. A value of 0 turns the
+                layer summary off. Default: 1.
+
+        Return:
+            ModelSummary: The model summary object
+
+        See Also:
+            - Model summary utility class: :class:`~pytorch_lightning.memory.ModelSummary`
+        """
         model_summary = None
 
         # temporary mapping from mode to max_depth

From ab7e4cd5b5a3504ac0ed582e66d30a950a7122f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 19:48:10 +0200
Subject: [PATCH 21/24] header

---
 pytorch_lightning/core/lightning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 1e3c6362f5889..8228f11af687c 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""nn.Module with additional great features."""
+"""The LightningModule - an nn.Module with many additional features."""
 
 import collections
 import copy

From 3a704130f29868c638c39a4b74bb258f9c7ad4a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 20:02:32 +0200
Subject: [PATCH 22/24] rm model summary

---
 pytorch_lightning/core/lightning.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 8228f11af687c..4518078c4933a 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1701,10 +1701,7 @@ def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None
                 layer summary off. Default: 1.
 
         Return:
-            ModelSummary: The model summary object
-
-        See Also:
-            - Model summary utility class: :class:`~pytorch_lightning.memory.ModelSummary`
+            The model summary object
         """
         model_summary = None
 

From 1d9a346830c8424110b27f38f20d51fb7cc214a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 22:51:05 +0200
Subject: [PATCH 23/24] Update pytorch_lightning/core/lightning.py

---
 pytorch_lightning/core/lightning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 4518078c4933a..c9ab966586eb8 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1689,7 +1689,7 @@ def tbptt_split_batch(self, batch, split_size):
 
     def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None) -> Optional[ModelSummary]:
         """
-        Summarize this LightingModule.
+        Summarize this LightningModule.
 
         Args:
             mode: Can be either ``'top'`` (summarize only direct submodules) or ``'full'`` (summarize all layers).

From 426549699e4ebbc8937f7a6e3c90b9f2f03b1cfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Sun, 4 Jul 2021 22:51:34 +0200
Subject: [PATCH 24/24] Update pytorch_lightning/core/lightning.py

---
 pytorch_lightning/core/lightning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index c9ab966586eb8..6697181bb98f3 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -122,7 +122,7 @@ def optimizers(self, use_pl_optimizer: bool = True) -> Union[Optimizer, List[Opt
 
         Args:
             use_pl_optimizer: If ``True``, will wrap the optimizer(s) in a
-                :class:`~pytorch_lighting.core.optimizer.LightningOptimizer` for automatic handling of precision and
+                :class:`~pytorch_lightning.core.optimizer.LightningOptimizer` for automatic handling of precision and
                 profiling.
 
         Returns: