Lightning-AI
diff --git a/‎docs/source/extensions/logging.rst‎
Lines changed: 81 additions & 88 deletions b/‎docs/source/extensions/logging.rst‎
Lines changed: 81 additions & 88 deletions
diff --git a/‎pytorch_lightning/plugins/training_type/deepspeed.py‎
Lines changed: 12 additions & 4 deletions b/‎pytorch_lightning/plugins/training_type/deepspeed.py‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎pytorch_lightning/plugins/training_type/fully_sharded.py‎
Lines changed: 2 additions & 2 deletions b/‎pytorch_lightning/plugins/training_type/fully_sharded.py‎
Lines changed: 2 additions & 2 deletions
@@ -14,45 +14,78 @@
 Logging
 #######
 
-Lightning supports the most popular logging frameworks (TensorBoard, Comet, etc...).
+Supported Loggers
+=================
+
+The following are loggers we support:
 
-By default, Lightning uses `PyTorch TensorBoard <https://pytorch.org/docs/stable/tensorboard.html>`__ logging  under the hood, and stores the logs to a directory (by default in ``lightning_logs/``).
+.. note::
+    The following loggers will normally plot an additional chart (**global_step VS epoch**).
+
+.. note::
+    Depending on the loggers you use, there might be some additional charts.
+
+.. currentmodule:: pytorch_lightning.loggers
+
+.. autosummary::
+    :toctree: generated
+    :nosignatures:
+    :template: classtemplate.rst
+
+    CometLogger
+    CSVLogger
+    MLFlowLogger
+    NeptuneLogger
+    TensorBoardLogger
+    TestTubeLogger
+    WandbLogger
+
+
+By default, Lightning uses ``TensorBoard`` logger under the hood, and stores the logs to a directory (by default in ``lightning_logs/``).
 
 .. testcode::
 
     from pytorch_lightning import Trainer
 
-    # Automatically logs to a directory
-    # (by default ``lightning_logs/``)
+    # Automatically logs to a directory (by default lightning_logs/)
     trainer = Trainer()
 
 To see your logs:
 
 .. code-block:: bash
 
+    # Install tensorboard
+    pip install tensorboard
     tensorboard --logdir=lightning_logs/
 
+To run tensorboard in a jupyter notebook environment, use the following in a jupyter cell:
+
+.. code-block:: bash
+
+    %reload_ext tensorboard
+    %tensorboard --logdir=lightning_logs/
+
 You can also pass a custom Logger to the :class:`~pytorch_lightning.trainer.trainer.Trainer`.
 
 .. testcode::
 
     from pytorch_lightning import loggers as pl_loggers
 
-    tb_logger = pl_loggers.TensorBoardLogger("logs/")
+    tb_logger = pl_loggers.TensorBoardLogger(save_dir="logs/")
     trainer = Trainer(logger=tb_logger)
 
-Choose from any of the others such as MLflow, Comet, Neptune, WandB, ...
+Choose from any of the others such as MLflow, Comet, Neptune, WandB, etc.
 
 .. testcode::
 
     comet_logger = pl_loggers.CometLogger(save_dir="logs/")
     trainer = Trainer(logger=comet_logger)
 
-To use multiple loggers, simply pass in a ``list`` or ``tuple`` of loggers ...
+To use multiple loggers, simply pass in a ``list`` or ``tuple`` of loggers.
 
 .. testcode::
 
-    tb_logger = pl_loggers.TensorBoardLogger("logs/")
+    tb_logger = pl_loggers.TensorBoardLogger(save_dir="logs/")
     comet_logger = pl_loggers.CometLogger(save_dir="logs/")
     trainer = Trainer(logger=[tb_logger, comet_logger])
 
@@ -62,8 +95,8 @@ To use multiple loggers, simply pass in a ``list`` or ``tuple`` of loggers ...
 
 .. note::
 
-    All loggers log by default to `os.getcwd()`. To change the path without creating a logger set
-    `Trainer(default_root_dir='/your/path/to/save/checkpoints')`
+    All loggers log by default to ``os.getcwd()``. To change the path without creating a logger set
+    ``Trainer(default_root_dir='/your/path/to/save/checkpoints')``
 
 ----------
 
@@ -75,55 +108,52 @@ Lightning offers automatic log functionalities for logging scalars, or manual lo
 
 Automatic Logging
 =================
-Use the :func:`~~pytorch_lightning.core.lightning.LightningModule.log`
+Use the :meth:`~pytorch_lightning.core.lightning.LightningModule.log`
 method to log from anywhere in a :doc:`lightning module <../common/lightning_module>` and :doc:`callbacks <../extensions/callbacks>`
-except functions with `batch_start` in their names.
+except functions with ``batch_start`` in their names.
+# TODO: check the hooks that doesn't support logging
 
 .. code-block:: python
 
     def training_step(self, batch, batch_idx):
         self.log("my_metric", x)
 
 
-    # or a dict
+    # or a dict to get multiple metrics on the same plot of the logger supports it
     def training_step(self, batch, batch_idx):
         self.log("performance", {"acc": acc, "recall": recall})
 
-Depending on where log is called from, Lightning auto-determines the correct logging mode for you. \
-But of course you can override the default behavior by manually setting the :func:`~~pytorch_lightning.core.lightning.LightningModule.log` parameters.
+Depending on where log is called from, Lightning auto-determines the correct logging mode for you. But of course you can
+override the default behavior by manually setting the :meth:`~pytorch_lightning.core.lightning.LightningModule.log` parameters.
 
 .. code-block:: python
 
     def training_step(self, batch, batch_idx):
         self.log("my_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
 
-The :func:`~~pytorch_lightning.core.lightning.LightningModule.log` method has a few options:
-
-* `on_step`: Logs the metric at the current step. Defaults to `True` in :func:`~~pytorch_lightning.core.lightning.LightningModule.training_step`, and :func:`~pytorch_lightning.core.lightning.LightningModule.training_step_end`.
-
-* `on_epoch`: Automatically accumulates and logs at the end of the epoch. Defaults to True anywhere in validation or test loops, and in :func:`~~pytorch_lightning.core.lightning.LightningModule.training_epoch_end`.
-
-* `prog_bar`: Logs to the progress bar.
-
-* `logger`: Logs to the logger like Tensorboard, or any other custom logger passed to the :class:`~pytorch_lightning.trainer.trainer.Trainer`.
+The :meth:`~pytorch_lightning.core.lightning.LightningModule.log` method has a few options:
 
+* ``on_step``: Logs the metric at the current step.
+* ``on_epoch``: Automatically accumulates and logs at the end of the epoch.
+* ``prog_bar``: Logs to the progress bar.
+* ``logger``: Logs to the logger like ``Tensorboard``, or any other custom logger passed to the :class:`~pytorch_lightning.trainer.trainer.Trainer`.
 
 .. note::
 
     -   Setting ``on_epoch=True`` will cache all your logged values during the full training epoch and perform a
         reduction in ``on_train_epoch_end``. We recommend using `TorchMetrics <https://torchmetrics.readthedocs.io/>`_, when working with custom reduction.
 
     -   Setting both ``on_step=True`` and ``on_epoch=True`` will create two keys per metric you log with
-        suffix ``_step`` and ``_epoch``, respectively. You can refer to these keys e.g. in the `monitor`
+        suffix ``_step`` and ``_epoch`` respectively. You can refer to these keys e.g. in the `monitor`
         argument of :class:`~pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint` or in the graphs plotted to the logger of your choice.
 
 
-If your work requires to log in an unsupported function, please open an issue with a clear description of why it is blocking you.
+If your work requires to log in an unsupported method, please open an issue with a clear description of why it is blocking you.
 
 
-Manual logging
-==============
-If you want to log anything that is not a scalar, like histograms, text, images, etc... you may need to use the logger object directly.
+Manual logging Non-Scalar Artifacts
+===================================
+If you want to log anything that is not a scalar, like histograms, text, images, etc. you may need to use the logger object directly.
 
 .. code-block:: python
 
@@ -136,14 +166,6 @@ If you want to log anything that is not a scalar, like histograms, text, images,
         tensorboard.add_figure(...)
 
 
-Access your logs
-================
-Once your training starts, you can view the logs by using your favorite logger or booting up the Tensorboard logs:
-
-.. code-block:: bash
-
-    tensorboard --logdir ./lightning_logs
-
 ----------
 
 ********************
@@ -155,9 +177,8 @@ Use the :func:`~pytorch_lightning.loggers.base.rank_zero_experiment` and :func:`
 
 .. testcode::
 
-    from pytorch_lightning.utilities import rank_zero_only
-    from pytorch_lightning.loggers import LightningLoggerBase
-    from pytorch_lightning.loggers.base import rank_zero_experiment
+    from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment
+    from pytorch_lightning.utilities.distributed import rank_zero_only
 
 
     class MyLogger(LightningLoggerBase):
@@ -217,27 +238,26 @@ Logging frequency
 =================
 
 It may slow training down to log every single batch. By default, Lightning logs every 50 rows, or 50 training steps.
-To change this behaviour, set the `log_every_n_steps` :class:`~pytorch_lightning.trainer.trainer.Trainer` flag.
+To change this behaviour, set the ``log_every_n_steps`` :class:`~pytorch_lightning.trainer.trainer.Trainer` flag.
 
 .. testcode::
 
    k = 10
    trainer = Trainer(log_every_n_steps=k)
 
 
-
 Log writing frequency
 =====================
 
 Writing to a logger can be expensive, so by default Lightning writes logs to disk or to the given logger every 100 training steps.
-To change this behaviour, set the interval at which you wish to flush logs to the filesystem using the `flush_logs_every_n_steps` :class:`~pytorch_lightning.trainer.trainer.Trainer` flag.
+To change this behaviour, set the interval at which you wish to flush logs to the filesystem using the ``flush_logs_every_n_steps`` :class:`~pytorch_lightning.trainer.trainer.Trainer` flag.
 
 .. testcode::
 
     k = 100
     trainer = Trainer(flush_logs_every_n_steps=k)
 
-Unlike the `log_every_n_steps`, this argument does not apply to all loggers.
+Unlike the ``log_every_n_steps``, this argument does not apply to all loggers.
 The example shown here works with :class:`~pytorch_lightning.loggers.tensorboard.TensorBoardLogger`,
 which is the default logger in Lightning.
 
@@ -246,8 +266,8 @@ which is the default logger in Lightning.
 ************
 Progress Bar
 ************
-You can add any metric to the progress bar using :func:`~~pytorch_lightning.core.lightning.LightningModule.log`
-method, setting `prog_bar=True`.
+You can add any metric to the progress bar using :meth:`~pytorch_lightning.core.lightning.LightningModule.log`
+method, setting ``prog_bar=True``.
 
 
 .. code-block:: python
@@ -261,15 +281,19 @@ Modifying the progress bar
 
 The progress bar by default already includes the training loss and version number of the experiment
 if you are using a logger. These defaults can be customized by overriding the
-:func:`~pytorch_lightning.callbacks.base.ProgressBarBase.get_metrics` hook in your module.
+:meth:`~pytorch_lightning.callbacks.progress.base.ProgressBarBase.get_metrics` hook in your logger.
 
 .. code-block:: python
 
-    def get_metrics(self):
-        # don't show the version number
-        items = super().get_metrics()
-        items.pop("v_num", None)
-        return items
+    from pytorch_lightning.callbacks.progress import Tqdm
+
+
+    class CustomProgressBar(Tqdm):
+        def get_metrics(self, *args, **kwargs):
+            # don't show the version number
+            items = super().get_metrics()
+            items.pop("v_num", None)
+            return items
 
 
 ----------
@@ -303,16 +327,16 @@ Read more about custom Python logging `here <https://docs.python.org/3/library/l
 Logging hyperparameters
 ***********************
 
-When training a model, it's useful to know what hyperparams went into that model.
-When Lightning creates a checkpoint, it stores a key "hyper_parameters" with the hyperparams.
+When training a model, it is useful to know what hyperparams went into that model.
+When Lightning creates a checkpoint, it stores a key ``"hyper_parameters"`` with the hyperparams.
 
 .. code-block:: python
 
     lightning_checkpoint = torch.load(filepath, map_location=lambda storage, loc: storage)
     hyperparams = lightning_checkpoint["hyper_parameters"]
 
 Some loggers also allow logging the hyperparams used in the experiment. For instance,
-when using the TestTubeLogger or the TensorBoardLogger, all hyperparams will show
+when using the ``TestTubeLogger`` or the ``TensorBoardLogger``, all hyperparams will show
 in the `hparams tab <https://pytorch.org/docs/stable/tensorboard.html#torch.utils.tensorboard.writer.SummaryWriter.add_hparams>`_.
 
 .. note::
@@ -334,7 +358,7 @@ in the `hparams tab <https://pytorch.org/docs/stable/tensorboard.html#torch.util
             self.log("hp/metric_1", some_scalar_1)
             self.log("hp/metric_2", some_scalar_2)
 
-    In the example, using `hp/` as a prefix allows for the metrics to be grouped under "hp" in the tensorboard scalar tab where you can collapse them.
+    In the example, using ``"hp/"`` as a prefix allows for the metrics to be grouped under "hp" in the tensorboard scalar tab where you can collapse them.
 
 ----------
 
@@ -343,7 +367,7 @@ Snapshot code
 *************
 
 Loggers also allow you to snapshot a copy of the code used in this experiment.
-For example, TestTubeLogger does this with a flag:
+For example, ``TestTubeLogger`` does this with a flag:
 
 .. code-block:: python
 
@@ -352,34 +376,3 @@ For example, TestTubeLogger does this with a flag:
     logger = TestTubeLogger(".", create_git_tag=True)
 
 ----------
-
-*****************
-Supported Loggers
-*****************
-
-The following are loggers we support
-
-.. note::
-    The following loggers will normally plot an additional chart (**global_step VS epoch**).
-
-.. note::
-    postfix ``_step`` and ``_epoch`` will be appended to the name you logged
-    if ``on_step`` and ``on_epoch`` are set to ``True`` in ``self.log()``.
-
-.. note::
-    Depending on the loggers you use, there might be some additional charts.
-
-.. currentmodule:: pytorch_lightning.loggers
-
-.. autosummary::
-    :toctree: generated
-    :nosignatures:
-    :template: classtemplate.rst
-
-    CometLogger
-    CSVLogger
-    MLFlowLogger
-    NeptuneLogger
-    TensorBoardLogger
-    TestTubeLogger
-    WandbLogger
@@ -37,7 +37,7 @@
 from pytorch_lightning.utilities import GradClipAlgorithmType
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.distributed import log, rank_zero_info
-from pytorch_lightning.utilities.enums import _StrategyType, AMPType
+from pytorch_lightning.utilities.enums import _StrategyType, AMPType, PrecisionType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE
 from pytorch_lightning.utilities.model_helpers import is_overridden
@@ -445,7 +445,11 @@ def init_deepspeed(self):
 
         if self.zero_stage_3 and self.partition_module:
             # Ensure the entire model has been moved to the appropriate device
-            dtype = torch.float16 if self.precision_plugin.precision in (16, "mixed") else torch.float32
+            dtype = (
+                torch.float16
+                if self.precision_plugin.precision in (PrecisionType.HALF, PrecisionType.MIXED)
+                else torch.float32
+            )
             deepspeed.zero.Init(
                 module=model, remote_device=self.remote_device, pin_memory=True, config=self.config, dtype=dtype
             )
@@ -502,7 +506,11 @@ def _initialize_deepspeed_train(self, model):
     def model_sharded_context(self) -> Generator[None, None, None]:
         if self.zero_stage_3:
             assert self._config_initialized
-            dtype = torch.float16 if self.precision_plugin.precision in (16, "mixed") else torch.float32
+            dtype = (
+                torch.float16
+                if self.precision_plugin.precision in (PrecisionType.HALF, PrecisionType.MIXED)
+                else torch.float32
+            )
             model_parallel_context = deepspeed.zero.Init(
                 remote_device=self.remote_device, pin_memory=True, config=self.config, dtype=dtype
             )
@@ -629,7 +637,7 @@ def _auto_select_batch_size(self):
         return batch_size
 
     def _format_precision_config(self) -> None:
-        if self.precision_plugin.precision in (16, "mixed"):
+        if self.precision_plugin.precision in (PrecisionType.HALF, PrecisionType.MIXED):
             if "fp16" not in self.config and self.precision_plugin.amp_type == AMPType.NATIVE:
                 # FP16 is a DeepSpeed standalone AMP implementation
                 rank_zero_info("Enabling DeepSpeed FP16.")
 
@@ -21,7 +21,7 @@
 from pytorch_lightning.plugins.precision import PrecisionPlugin
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.utilities import _FAIRSCALE_FULLY_SHARDED_AVAILABLE
-from pytorch_lightning.utilities.enums import _StrategyType
+from pytorch_lightning.utilities.enums import _StrategyType, PrecisionType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _FAIRSCALE_FULLY_SHARDED_AVAILABLE:
@@ -139,7 +139,7 @@ def wrap_policy(*args, **kwargs):
             cpu_offload=self.cpu_offload,
             move_grads_to_cpu=self.move_grads_to_cpu,
             flatten_parameters=self.flatten_parameters,
-            mixed_precision=precision == "mixed",
+            mixed_precision=(precision == PrecisionType.MIXED),
             reshard_after_forward=self.reshard_after_forward,
             fp32_reduce_scatter=self.fp32_reduce_scatter,
             compute_dtype=self.compute_dtype,