Lightning-AI
diff --git a/‎.github/workflows/ci_test-conda.yml‎
Lines changed: 6 additions & 7 deletions b/‎.github/workflows/ci_test-conda.yml‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎.github/workflows/ci_test-full.yml‎
Lines changed: 12 additions & 1 deletion b/‎.github/workflows/ci_test-full.yml‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎azure-pipelines.yml‎
Lines changed: 15 additions & 8 deletions b/‎azure-pipelines.yml‎
Lines changed: 15 additions & 8 deletions
diff --git a/‎docs/source/advanced/multiple_loaders.rst‎
Lines changed: 23 additions & 0 deletions b/‎docs/source/advanced/multiple_loaders.rst‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎docs/source/governance.rst‎
Lines changed: 0 additions & 1 deletion b/‎docs/source/governance.rst‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎pytorch_lightning/accelerators/accelerator.py‎
Lines changed: 1 addition & 1 deletion b/‎pytorch_lightning/accelerators/accelerator.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pytorch_lightning/callbacks/model_checkpoint.py‎
Lines changed: 6 additions & 7 deletions b/‎pytorch_lightning/callbacks/model_checkpoint.py‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎pytorch_lightning/core/hooks.py‎
Lines changed: 32 additions & 3 deletions b/‎pytorch_lightning/core/hooks.py‎
Lines changed: 32 additions & 3 deletions
diff --git a/‎pytorch_lightning/core/lightning.py‎
Lines changed: 72 additions & 42 deletions b/‎pytorch_lightning/core/lightning.py‎
Lines changed: 72 additions & 42 deletions
@@ -30,13 +30,6 @@ jobs:
         pip install --requirement requirements/devel.txt --upgrade-strategy only-if-needed
         pip list
 
-    - name: Cache datasets
-      # todo this probably does not work with docker images, rather cache dockers
-      uses: actions/cache@v2
-      with:
-        path: Datasets
-        key: pl-dataset
-
     - name: Pull checkpoints from S3
       # todo: consider adding coma caching, but ATM all models have less then 100KB
       run: |
@@ -46,6 +39,12 @@ jobs:
         unzip -o checkpoints.zip
         ls -l checkpoints/
 
+    # todo: require proper fix in docker image
+    - name: Hotfix dependency
+      run: |
+        pip install torchtext==0.6.0 -U
+      shell: bash
+
     - name: Tests
       run: |
         # NOTE: run coverage on tests does not propagare faler status for Win, https://github.com/nedbat/coveragepy/issues/1003
 
@@ -112,6 +112,12 @@ jobs:
         pip list
       shell: bash
 
+    # todo: require proper fix in docker image
+    - name: Hotfix dependency
+      run: |
+        pip install torchtext==0.6.0 -U
+      shell: bash
+
     - name: Reinstall Horovod if necessary
       if: runner.os != 'windows'
       env:
@@ -135,7 +141,12 @@ jobs:
     - name: Tests
       run: |
         # NOTE: do not include coverage report here, see: https://github.com/nedbat/coveragepy/issues/1003
-        coverage run --source pytorch_lightning -m pytest pytorch_lightning tests pl_examples -v --durations=50 --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml
+        coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --durations=50 --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml
+
+    # todo: put this back just when TorchVision can download datasets
+    #- name: Examples
+    #  run: |
+    #    python -m pytest pl_examples -v --durations=10
 
     - name: Upload pytest test results
       uses: actions/upload-artifact@v2
 
@@ -71,6 +71,11 @@ jobs:
         python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'"
       displayName: 'Env details'
 
+    # todo: require proper fix in docker image
+    - bash: |
+        pip install torchtext==0.7 -U
+      displayName: 'HotFix'
+
     - bash: |
         wget https://pl-public-data.s3.amazonaws.com/legacy/checkpoints.zip -P legacy/
         unzip -o legacy/checkpoints.zip -d legacy/
@@ -92,11 +97,13 @@ jobs:
       displayName: 'Statistics'
 
     - bash: |
-        python -m pytest benchmarks pl_examples -v --maxfail=2 --durations=0
-      displayName: 'Testing: extended'
-
-    - bash: |
-        python setup.py install --user --quiet
-        bash pl_examples/run_ddp-example.sh
-        pip uninstall -y pytorch-lightning
-      displayName: 'Examples'
+        python -m pytest benchmarks -v --maxfail=2 --durations=0
+      displayName: 'Testing: benchmarks'
+
+    # todo: put this back just when TorchVision can download datasets
+    #- bash: |
+    #    python -m pytest pl_examples -v --maxfail=2 --durations=0
+    #    python setup.py install --user --quiet
+    #    bash pl_examples/run_ddp-example.sh
+    #    pip uninstall -y pytorch-lightning
+    #  displayName: 'Examples'
@@ -16,6 +16,8 @@ Lightning supports multiple dataloaders in a few ways.
 
 ----------
 
+.. _multiple-training-dataloaders:
+
 Multiple training dataloaders
 -----------------------------
 For training, the usual way to use multiple dataloaders is to create a ``DataLoader`` class
@@ -86,6 +88,27 @@ For more details please have a look at :attr:`~pytorch_lightning.trainer.trainer
 
             return loaders
 
+Furthermore, Lightning also supports that nested lists and dicts (or a combination) can
+be returned 
+
+.. testcode::
+
+    class LitModel(LightningModule):
+
+        def train_dataloader(self):
+
+            loader_a = torch.utils.data.DataLoader(range(8), batch_size=4)
+            loader_b = torch.utils.data.DataLoader(range(16), batch_size=4)
+            loader_c = torch.utils.data.DataLoader(range(32), batch_size=4)
+            loader_c = torch.utils.data.DataLoader(range(64), batch_size=4)
+
+            # pass loaders as a nested dict. This will create batches like this:
+            # {'loader_a_b': {'a': batch from loader a, 'b': batch from loader b},
+            #  'loader_c_d': {'c': batch from loader c, 'd': batch from loader d}}
+            loaders = {'loaders_a_b': {'a': loader_a, 'b': loader_b},
+                       'loaders_c_d': {'c': loader_c, 'd': loader_d}}
+            return loaders
+
 ----------
 
 Test/Val dataloaders
 
@@ -21,7 +21,6 @@ Core Maintainers
 - Nicki Skafte (`skaftenicki <https://github.com/SkafteNicki>`_)
 - Peter Yu (`yukw777 <https://github.com/yukw777>`_)
 - Rohit Gupta (`rohitgr7 <https://github.com/rohitgr7>`_)
-- Lezwon Castelino (`lezwon <https://github.com/lezwon>`_)
 - Jeff Yang (`ydcjeff <https://github.com/ydcjeff>`_)
 - Roger Shieh (`s-rog <https://github.com/s-rog>`_)
 - Carlos Mocholí (`carmocca <https://github.com/carmocca>`_)
 
@@ -379,7 +379,7 @@ def optimizer_state(self, optimizer: Optimizer) -> Dict[str, torch.Tensor]:
         return getattr(self.training_type_plugin, 'optimizer_state', lambda x: x.state_dict())(optimizer)
 
     def on_save(self, checkpoint: Dict[str, Union[Any, torch.Tensor]]) -> Dict[str, Union[Any, torch.Tensor]]:
-        return checkpoint
+        return self.training_type_plugin.on_save(checkpoint)
 
     def barrier(self, name: Optional[str] = None) -> None:
         self.training_type_plugin.barrier(name=name)
 
@@ -239,7 +239,7 @@ def save_checkpoint(self, trainer, pl_module):
             self._save_top_k_checkpoints(trainer, pl_module, monitor_candidates)
 
         # Mode 2: save the last checkpoint
-        self._save_last_checkpoint(trainer, pl_module, monitor_candidates)
+        self._save_last_checkpoint(trainer, monitor_candidates)
 
     def __validate_init_configuration(self):
         if self.save_top_k is not None and self.save_top_k < -1:
@@ -291,8 +291,7 @@ def _del_model(self, filepath: str):
             self._fs.rm(filepath)
             log.debug(f"Removed checkpoint: {filepath}")
 
-    def _save_model(self, filepath: str, trainer, pl_module):
-        # Todo: required argument `pl_module` is not used
+    def _save_model(self, filepath: str, trainer):
         # in debugging, track when we save checkpoints
         trainer.dev_debugger.track_checkpointing_history(filepath)
 
@@ -481,7 +480,7 @@ def _monitor_candidates(self, trainer):
         monitor_candidates.update(step=trainer.global_step, epoch=trainer.current_epoch)
         return monitor_candidates
 
-    def _save_last_checkpoint(self, trainer, pl_module, ckpt_name_metrics):
+    def _save_last_checkpoint(self, trainer, ckpt_name_metrics):
         should_save_last = self.monitor is None or self.save_last
         if not should_save_last:
             return
@@ -505,9 +504,9 @@ def _save_last_checkpoint(self, trainer, pl_module, ckpt_name_metrics):
 
         if trainer.training_type_plugin.rpc_enabled:
             # RPCPlugin manages saving all model states
-            trainer.training_type_plugin.rpc_save_model(self._save_model, last_filepath, trainer, pl_module)
+            trainer.training_type_plugin.rpc_save_model(self._save_model, last_filepath, trainer)
         else:
-            self._save_model(last_filepath, trainer, pl_module)
+            self._save_model(last_filepath, trainer)
         if (
             self.last_model_path and self.last_model_path != last_filepath
             and (self.save_top_k != -1 or self.save_last) and trainer.is_global_zero
@@ -574,7 +573,7 @@ def _update_best_and_save(
                 f"Epoch {epoch:d}, global step {step:d}: {self.monitor} reached {current:0.5f}"
                 f' (best {self.best_model_score:0.5f}), saving model to "{filepath}" as top {k}'
             )
-        self._save_model(filepath, trainer, pl_module)
+        self._save_model(filepath, trainer)
 
         if del_filepath is not None and filepath != del_filepath:
             self._del_model(del_filepath)
 
@@ -383,12 +383,14 @@ def prepare_data(self):
             model.test_dataloader()
         """
 
-    def train_dataloader(self) -> DataLoader:
+    def train_dataloader(self) -> Any:
         """
-        Implement a PyTorch DataLoader for training.
+        Implement one or more PyTorch DataLoaders for training.
 
         Return:
-            Single PyTorch :class:`~torch.utils.data.DataLoader`.
+            Either a single PyTorch :class:`~torch.utils.data.DataLoader` or a collection of these
+            (list, dict, nested lists and dicts). In the case of multiple dataloaders, please see
+            this :ref:`page <multiple-training-dataloaders>`
 
         The dataloader you return will not be called every epoch unless you set
         :paramref:`~pytorch_lightning.trainer.Trainer.reload_dataloaders_every_epoch` to ``True``.
@@ -414,6 +416,7 @@ def train_dataloader(self) -> DataLoader:
 
         Example::
 
+            # single dataloader
             def train_dataloader(self):
                 transform = transforms.Compose([transforms.ToTensor(),
                                                 transforms.Normalize((0.5,), (1.0,))])
@@ -426,6 +429,32 @@ def train_dataloader(self):
                 )
                 return loader
 
+            # multiple dataloaders, return as list
+            def train_dataloader(self):
+                mnist = MNIST(...)
+                cifar = CIFAR(...)
+                mnist_loader = torch.utils.data.DataLoader(
+                    dataset=mnist, batch_size=self.batch_size, shuffle=True
+                )
+                cifar_loader = torch.utils.data.DataLoader(
+                    dataset=cifar, batch_size=self.batch_size, shuffle=True
+                )
+                # each batch will be a list of tensors: [batch_mnist, batch_cifar]
+                return [mnist_loader, cifar_loader]
+
+            # multiple dataloader, return as dict
+            def train_dataloader(self):
+                mnist = MNIST(...)
+                cifar = CIFAR(...)
+                mnist_loader = torch.utils.data.DataLoader(
+                    dataset=mnist, batch_size=self.batch_size, shuffle=True
+                )
+                cifar_loader = torch.utils.data.DataLoader(
+                    dataset=cifar, batch_size=self.batch_size, shuffle=True
+                )
+                # each batch will be a dict of tensors: {'mnist': batch_mnist, 'cifar': batch_cifar}
+                return {'mnist': mnist_loader, 'cifar': cifar_loader}
+
         """
         rank_zero_warn("`train_dataloader` must be implemented to be used with the Lightning Trainer")
 
 
@@ -19,12 +19,13 @@
 import logging
 import os
 import tempfile
+import types
 import uuid
 from abc import ABC
 from argparse import Namespace
 from functools import partial
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union
 
 import torch
 from torch import ScriptModule, Tensor
@@ -1591,55 +1592,84 @@ def _auto_collect_arguments(cls, frame=None) -> Tuple[Dict, Dict]:
             parents_arguments.update(args)
         return self_arguments, parents_arguments
 
-    def save_hyperparameters(self, *args, frame=None) -> None:
-        """Save all model arguments.
+    def save_hyperparameters(
+        self,
+        *args,
+        ignore: Optional[Union[Sequence[str], str]] = None,
+        frame: Optional[types.FrameType] = None
+    ) -> None:
+        """Save model arguments to ``hparams`` attribute.
 
         Args:
             args: single object of `dict`, `NameSpace` or `OmegaConf`
-             or string names or arguments from class `__init__`
-
-        >>> class ManuallyArgsModel(LightningModule):
-        ...     def __init__(self, arg1, arg2, arg3):
-        ...         super().__init__()
-        ...         # manually assign arguments
-        ...         self.save_hyperparameters('arg1', 'arg3')
-        ...     def forward(self, *args, **kwargs):
-        ...         ...
-        >>> model = ManuallyArgsModel(1, 'abc', 3.14)
-        >>> model.hparams
-        "arg1": 1
-        "arg3": 3.14
-
-        >>> class AutomaticArgsModel(LightningModule):
-        ...     def __init__(self, arg1, arg2, arg3):
-        ...         super().__init__()
-        ...         # equivalent automatic
-        ...         self.save_hyperparameters()
-        ...     def forward(self, *args, **kwargs):
-        ...         ...
-        >>> model = AutomaticArgsModel(1, 'abc', 3.14)
-        >>> model.hparams
-        "arg1": 1
-        "arg2": abc
-        "arg3": 3.14
-
-        >>> class SingleArgModel(LightningModule):
-        ...     def __init__(self, params):
-        ...         super().__init__()
-        ...         # manually assign single argument
-        ...         self.save_hyperparameters(params)
-        ...     def forward(self, *args, **kwargs):
-        ...         ...
-        >>> model = SingleArgModel(Namespace(p1=1, p2='abc', p3=3.14))
-        >>> model.hparams
-        "p1": 1
-        "p2": abc
-        "p3": 3.14
+                or string names or arguments from class ``__init__``
+            ignore: an argument name or a list of argument names from
+                class ``__init__`` to be ignored
+            frame: a frame object. Default is None
+
+        Example::
+            >>> class ManuallyArgsModel(LightningModule):
+            ...     def __init__(self, arg1, arg2, arg3):
+            ...         super().__init__()
+            ...         # manually assign arguments
+            ...         self.save_hyperparameters('arg1', 'arg3')
+            ...     def forward(self, *args, **kwargs):
+            ...         ...
+            >>> model = ManuallyArgsModel(1, 'abc', 3.14)
+            >>> model.hparams
+            "arg1": 1
+            "arg3": 3.14
+
+            >>> class AutomaticArgsModel(LightningModule):
+            ...     def __init__(self, arg1, arg2, arg3):
+            ...         super().__init__()
+            ...         # equivalent automatic
+            ...         self.save_hyperparameters()
+            ...     def forward(self, *args, **kwargs):
+            ...         ...
+            >>> model = AutomaticArgsModel(1, 'abc', 3.14)
+            >>> model.hparams
+            "arg1": 1
+            "arg2": abc
+            "arg3": 3.14
+
+            >>> class SingleArgModel(LightningModule):
+            ...     def __init__(self, params):
+            ...         super().__init__()
+            ...         # manually assign single argument
+            ...         self.save_hyperparameters(params)
+            ...     def forward(self, *args, **kwargs):
+            ...         ...
+            >>> model = SingleArgModel(Namespace(p1=1, p2='abc', p3=3.14))
+            >>> model.hparams
+            "p1": 1
+            "p2": abc
+            "p3": 3.14
+
+            >>> class ManuallyArgsModel(LightningModule):
+            ...     def __init__(self, arg1, arg2, arg3):
+            ...         super().__init__()
+            ...         # pass argument(s) to ignore as a string or in a list
+            ...         self.save_hyperparameters(ignore='arg2')
+            ...     def forward(self, *args, **kwargs):
+            ...         ...
+            >>> model = ManuallyArgsModel(1, 'abc', 3.14)
+            >>> model.hparams
+            "arg1": 1
+            "arg3": 3.14
         """
         if not frame:
             frame = inspect.currentframe().f_back
         init_args = get_init_args(frame)
         assert init_args, "failed to inspect the self init"
+
+        if ignore is not None:
+            if isinstance(ignore, str):
+                ignore = [ignore]
+            if isinstance(ignore, (list, tuple)):
+                ignore = [arg for arg in ignore if isinstance(arg, str)]
+            init_args = {k: v for k, v in init_args.items() if k not in ignore}
+
         if not args:
             # take all arguments
             hp = init_args