Lightning-AI
diff --git a/‎CHANGELOG.md‎
Lines changed: 8 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/source/conf.py‎
Lines changed: 5 additions & 1 deletion b/‎docs/source/conf.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎docs/source/debugging.rst‎
Lines changed: 6 additions & 1 deletion b/‎docs/source/debugging.rst‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎docs/source/trainer.rst‎
Lines changed: 5 additions & 4 deletions b/‎docs/source/trainer.rst‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎environment.yml‎
Lines changed: 1 addition & 1 deletion b/‎environment.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pytorch_lightning/callbacks/early_stopping.py‎
Lines changed: 9 additions & 14 deletions b/‎pytorch_lightning/callbacks/early_stopping.py‎
Lines changed: 9 additions & 14 deletions
diff --git a/‎pytorch_lightning/callbacks/gpu_stats_monitor.py‎
Lines changed: 1 addition & 2 deletions b/‎pytorch_lightning/callbacks/gpu_stats_monitor.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎pytorch_lightning/callbacks/lr_monitor.py‎
Lines changed: 40 additions & 29 deletions b/‎pytorch_lightning/callbacks/lr_monitor.py‎
Lines changed: 40 additions & 29 deletions
diff --git a/‎pytorch_lightning/callbacks/model_checkpoint.py‎
Lines changed: 6 additions & 5 deletions b/‎pytorch_lightning/callbacks/model_checkpoint.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎pytorch_lightning/callbacks/progress.py‎
Lines changed: 1 addition & 2 deletions b/‎pytorch_lightning/callbacks/progress.py‎
Lines changed: 1 addition & 2 deletions
@@ -9,6 +9,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- Added a check for optimizer attached to lr_scheduler ([#5338](https://github.com/PyTorchLightning/pytorch-lightning/pull/5338))
+
+- Added `resume_from_checkpoint` accept non-existing file path ([#4402](https://github.com/PyTorchLightning/pytorch-lightning/pull/4402))
+
 
 ### Changed
 
@@ -21,6 +25,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
+- Allowed `log_momentum` for adaptive optimizers in `LearningRateMonitor` ([#5333](https://github.com/PyTorchLightning/pytorch-lightning/pull/5333))
+
+- Disabled checkpointing, earlystopping and logger with `fast_dev_run` ([#5277](https://github.com/PyTorchLightning/pytorch-lightning/pull/5277))
+
 
 
 ## [1.1.2] - 2020-12-23
 
@@ -294,10 +294,14 @@ def setup(app):
 # Ignoring Third-party packages
 # https://stackoverflow.com/questions/15889621/sphinx-how-to-exclude-imports-in-automodule
 def package_list_from_file(file):
+    """List up package name (not containing version and extras) from a package list file
+    """
     mocked_packages = []
     with open(file, 'r') as fp:
         for ln in fp.readlines():
-            found = [ln.index(ch) for ch in list(',=<>#') if ch in ln]
+            # Example: `tqdm>=4.41.0` => `tqdm`
+            # `[` is for package with extras
+            found = [ln.index(ch) for ch in list(',=<>#[') if ch in ln]
             pkg = ln[:min(found)] if found else ln
             if pkg.rstrip():
                 mocked_packages.append(pkg.rstrip())
 
@@ -28,13 +28,18 @@ The point is to detect any bugs in the training/validation loop without having t
 argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
 
 .. testcode::
-    
+
     # runs 1 train, val, test batch and program ends
     trainer = Trainer(fast_dev_run=True)
 
     # runs 7 train, val, test batches and program ends
     trainer = Trainer(fast_dev_run=7)
 
+.. note::
+
+    This argument will disable tuner, checkpoint callbacks, early stopping callbacks,
+    loggers and logger callbacks like ``LearningRateLogger`` and runs for only 1 epoch.
+
 ----------------
 
 Inspect gradient norms
 
@@ -666,9 +666,9 @@ Under the hood the pseudocode looks like this when running *fast_dev_run* with a
 .. note::
 
     This argument is a bit different from ``limit_train/val/test_batches``. Setting this argument will
-    disable tuner, logger callbacks like ``LearningRateLogger`` and runs for only 1 epoch. This must be
-    used only for debugging purposes. ``limit_train/val/test_batches`` only limits the number of batches and won't
-    disable anything.
+    disable tuner, checkpoint callbacks, early stopping callbacks, loggers and logger callbacks like
+    ``LearningRateLogger`` and runs for only 1 epoch. This must be used only for debugging purposes.
+    ``limit_train/val/test_batches`` only limits the number of batches and won't disable anything.
 
 flush_logs_every_n_steps
 ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1328,7 +1328,8 @@ resume_from_checkpoint
 
 |
 
-To resume training from a specific checkpoint pass in the path here.
+To resume training from a specific checkpoint pass in the path here. If resuming from a mid-epoch
+checkpoint, training will start from the beginning of the next epoch.
 
 .. testcode::
 
 
@@ -30,7 +30,7 @@ dependencies:
     - future>=0.17.1
     - PyYAML>=5.1
     - tqdm>=4.41.0
-    - fsspec>=0.8.0
+    - fsspec[http]>=0.8.1
     #- tensorboard>=2.2.0  # not needed, already included in pytorch
 
     # Optional
 
@@ -28,7 +28,7 @@
 from pytorch_lightning import _logger as log
 from pytorch_lightning.callbacks.base import Callback
 from pytorch_lightning.metrics.metric import Metric
-from pytorch_lightning.utilities import TPU_AVAILABLE, rank_zero_info, rank_zero_warn
+from pytorch_lightning.utilities import rank_zero_info, rank_zero_warn, TPU_AVAILABLE
 
 
 class EarlyStopping(Callback):
@@ -166,10 +166,10 @@ def on_validation_end(self, trainer, pl_module):
         self._run_early_stopping_check(trainer, pl_module)
 
     def on_validation_epoch_end(self, trainer, pl_module):
-        if trainer.running_sanity_check:
+        if trainer.fast_dev_run or trainer.running_sanity_check:
             return
 
-        if self._validate_condition_metric(trainer.logger_connector.callback_metrics):
+        if self._validate_condition_metric(trainer.callback_metrics):
             # turn off early stopping in on_train_epoch_end
             self.based_on_eval_results = True
 
@@ -178,24 +178,19 @@ def on_train_epoch_end(self, trainer, pl_module, outputs):
         if self.based_on_eval_results:
             return
 
-        # early stopping can also work in the train loop when there is no val loop
-        should_check_early_stop = False
-
-        # fallback to monitor key in result dict
-        if trainer.logger_connector.callback_metrics.get(self.monitor, None) is not None:
-            should_check_early_stop = True
-
-        if should_check_early_stop:
-            self._run_early_stopping_check(trainer, pl_module)
+        self._run_early_stopping_check(trainer, pl_module)
 
     def _run_early_stopping_check(self, trainer, pl_module):
         """
         Checks whether the early stopping condition is met
         and if so tells the trainer to stop the training.
         """
-        logs = trainer.logger_connector.callback_metrics
+        logs = trainer.callback_metrics
 
-        if not self._validate_condition_metric(logs):
+        if (
+            trainer.fast_dev_run  # disable early_stopping with fast_dev_run
+            or not self._validate_condition_metric(logs)  # short circuit if metric not present
+        ):
             return  # short circuit if metric not present
 
         current = logs.get(self.monitor)
 
@@ -24,7 +24,7 @@
 import shutil
 import subprocess
 import time
-from typing import List, Tuple, Dict
+from typing import Dict, List, Tuple
 
 from pytorch_lightning.callbacks.base import Callback
 from pytorch_lightning.utilities import rank_zero_only
@@ -213,5 +213,4 @@ def _should_log(trainer) -> bool:
             or trainer.should_stop
         )
 
-        should_log = should_log and not trainer.fast_dev_run
         return should_log
@@ -33,11 +33,11 @@ class LearningRateMonitor(Callback):
     Automatically monitor and logs learning rate for learning rate schedulers during training.
 
     Args:
-        logging_interval: set to `epoch` or `step` to log `lr` of all optimizers
-            at the same interval, set to `None` to log at individual interval
-            according to the `interval` key of each scheduler. Defaults to ``None``.
+        logging_interval: set to ``'epoch'`` or ``'step'`` to log ``lr`` of all optimizers
+            at the same interval, set to ``None`` to log at individual interval
+            according to the ``interval`` key of each scheduler. Defaults to ``None``.
         log_momentum: option to also log the momentum values of the optimizer, if the optimizer
-            has the `momentum` attribute. Defaults to ``False``.
+            has the ``momentum`` or ``betas`` attribute. Defaults to ``False``.
 
     Example::
 
@@ -47,17 +47,19 @@ class LearningRateMonitor(Callback):
         >>> trainer = Trainer(callbacks=[lr_monitor])
 
     Logging names are automatically determined based on optimizer class name.
-    In case of multiple optimizers of same type, they will be named `Adam`,
-    `Adam-1` etc. If a optimizer has multiple parameter groups they will
-    be named `Adam/pg1`, `Adam/pg2` etc. To control naming, pass in a
-    `name` keyword in the construction of the learning rate schdulers
+    In case of multiple optimizers of same type, they will be named ``Adam``,
+    ``Adam-1`` etc. If a optimizer has multiple parameter groups they will
+    be named ``Adam/pg1``, ``Adam/pg2`` etc. To control naming, pass in a
+    ``name`` keyword in the construction of the learning rate schdulers
 
     Example::
 
         def configure_optimizer(self):
             optimizer = torch.optim.Adam(...)
-            lr_scheduler = {'scheduler': torch.optim.lr_scheduler.LambdaLR(optimizer, ...)
-                            'name': 'my_logging_name'}
+            lr_scheduler = {
+                'scheduler': torch.optim.lr_scheduler.LambdaLR(optimizer, ...)
+                'name': 'my_logging_name'
+            }
             return [optimizer], [lr_scheduler]
 
     """
@@ -80,16 +82,28 @@ def on_train_start(self, trainer, *args, **kwargs):
         """
         if not trainer.logger:
             raise MisconfigurationException(
-                'Cannot use LearningRateMonitor callback with Trainer that has no logger.'
+                'Cannot use `LearningRateMonitor` callback with `Trainer` that has no logger.'
             )
 
         if not trainer.lr_schedulers:
             rank_zero_warn(
-                'You are using LearningRateMonitor callback with models that'
+                'You are using `LearningRateMonitor` callback with models that'
                 ' have no learning rate schedulers. Please see documentation'
                 ' for `configure_optimizers` method.', RuntimeWarning
             )
 
+        if self.log_momentum:
+            def _check_no_key(key):
+                return any(
+                    key not in sch['scheduler'].optimizer.defaults for sch in trainer.lr_schedulers
+                )
+
+            if _check_no_key('momentum') and _check_no_key('betas'):
+                rank_zero_warn(
+                    "You have set log_momentum=True, but some optimizers do not"
+                    " have momentum. This will log a value 0 for the momentum.", RuntimeWarning
+                )
+
         # Find names for schedulers
         names = self._find_names(trainer.lr_schedulers)
 
@@ -105,35 +119,33 @@ def on_train_batch_start(self, trainer, *args, **kwargs):
             interval = 'step' if self.logging_interval is None else 'any'
             latest_stat = self._extract_stats(trainer, interval)
 
-            if trainer.logger is not None and latest_stat:
+            if latest_stat:
                 trainer.logger.log_metrics(latest_stat, step=trainer.global_step)
 
     def on_train_epoch_start(self, trainer, *args, **kwargs):
         if self.logging_interval != 'step':
             interval = 'epoch' if self.logging_interval is None else 'any'
             latest_stat = self._extract_stats(trainer, interval)
 
-            if trainer.logger is not None and latest_stat:
+            if latest_stat:
                 trainer.logger.log_metrics(latest_stat, step=trainer.global_step)
 
     def _extract_stats(self, trainer, interval: str) -> Dict[str, float]:
         latest_stat = {}
 
         for name, scheduler in zip(self.lr_sch_names, trainer.lr_schedulers):
             if scheduler['interval'] == interval or interval == 'any':
-                param_groups = scheduler['scheduler'].optimizer.param_groups
-                if len(param_groups) != 1:
-                    for i, pg in enumerate(param_groups):
-                        lr = self._extract_lr(param_group=pg, name=f'{name}/pg{i + 1}')
-                        latest_stat.update(lr)
-                        momentum = self._extract_momentum(param_group=pg, name=f'{name}-momentum/pg{i + 1}')
-                        latest_stat.update(momentum)
-
-                else:
-                    pg = param_groups[0]
-                    lr = self._extract_lr(param_group=pg, name=name)
+                opt = scheduler['scheduler'].optimizer
+                param_groups = opt.param_groups
+                use_betas = 'betas' in opt.defaults
+
+                for i, pg in enumerate(param_groups):
+                    suffix = f'/pg{i + 1}' if len(param_groups) > 1 else ''
+                    lr = self._extract_lr(param_group=pg, name=f'{name}{suffix}')
                     latest_stat.update(lr)
-                    momentum = self._extract_momentum(param_group=pg, name=f'{name}-momentum')
+                    momentum = self._extract_momentum(
+                        param_group=pg, name=f'{name}-momentum{suffix}', use_betas=use_betas
+                    )
                     latest_stat.update(momentum)
 
         return latest_stat
@@ -143,11 +155,11 @@ def _extract_lr(self, param_group, name: str) -> Dict[str, float]:
         self.lrs[name].append(lr)
         return {name: lr}
 
-    def _extract_momentum(self, param_group, name: str) -> Dict[str, float]:
+    def _extract_momentum(self, param_group, name: str, use_betas: bool) -> Dict[str, float]:
         if not self.log_momentum:
             return {}
 
-        momentum = param_group.get('momentum')
+        momentum = param_group.get('betas')[0] if use_betas else param_group.get('momentum', 0)
         self.last_momentum_values[name] = momentum
         return {name: momentum}
 
@@ -190,5 +202,4 @@ def _should_log(trainer) -> bool:
             or trainer.should_stop
         )
 
-        should_log = should_log and not trainer.fast_dev_run
         return should_log
@@ -20,11 +20,11 @@
 
 """
 
-from copy import deepcopy
 import numbers
 import os
-from pathlib import Path
 import re
+from copy import deepcopy
+from pathlib import Path
 from typing import Any, Dict, Optional, Union
 
 import numpy as np
@@ -224,7 +224,8 @@ def save_checkpoint(self, trainer, pl_module):
         global_step = trainer.global_step
 
         if (
-            self.save_top_k == 0  # no models are saved
+            trainer.fast_dev_run  # disable checkpointing with fast_dev_run
+            or self.save_top_k == 0  # no models are saved
             or self.period < 1  # no models are saved
             or (epoch + 1) % self.period  # skip epoch
             or trainer.running_sanity_check  # don't save anything during sanity check
@@ -478,14 +479,14 @@ def __resolve_ckpt_dir(self, trainer, pl_module):
             version, name = trainer.accelerator_backend.broadcast((version, trainer.logger.name))
 
             ckpt_path = os.path.join(
-                save_dir, name, version, "checkpoints"
+                save_dir, str(name), version, "checkpoints"
             )
         else:
             ckpt_path = os.path.join(trainer.weights_save_path, "checkpoints")
 
         self.dirpath = ckpt_path
 
-        if trainer.is_global_zero:
+        if not trainer.fast_dev_run and trainer.is_global_zero:
             self._fs.makedirs(self.dirpath, exist_ok=True)
 
     def _add_backward_monitor_support(self, trainer):
 
@@ -22,7 +22,6 @@
 import importlib
 import sys
 
-
 # check if ipywidgets is installed before importing tqdm.auto
 # to ensure it won't fail and a progress bar is displayed
 if importlib.util.find_spec('ipywidgets') is not None:
@@ -323,7 +322,7 @@ def on_epoch_start(self, trainer, pl_module):
         super().on_epoch_start(trainer, pl_module)
         total_train_batches = self.total_train_batches
         total_val_batches = self.total_val_batches
-        if total_train_batches != float('inf') and not trainer.fast_dev_run:
+        if total_train_batches != float('inf'):
             # val can be checked multiple times per epoch
             val_checks_per_epoch = total_train_batches // trainer.val_check_batch
             total_val_batches = total_val_batches * val_checks_per_epoch