Lightning-AI
diff --git a/‎.mergify.yml‎
Lines changed: 56 additions & 56 deletions b/‎.mergify.yml‎
Lines changed: 56 additions & 56 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 22 additions & 3 deletions b/‎CHANGELOG.md‎
Lines changed: 22 additions & 3 deletions
diff --git a/‎benchmarks/__init__.py‎
Lines changed: 17 additions & 0 deletions b/‎benchmarks/__init__.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎benchmarks/generate_comparison.py‎
Lines changed: 60 additions & 0 deletions b/‎benchmarks/generate_comparison.py‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎benchmarks/test_parity.py‎ renamed to ‎benchmarks/test_basic_parity.py‎
Lines changed: 41 additions & 21 deletions b/‎benchmarks/test_parity.py‎ renamed to ‎benchmarks/test_basic_parity.py‎
Lines changed: 41 additions & 21 deletions
diff --git a/‎benchmarks/test_sharded_parity.py‎
Lines changed: 14 additions & 0 deletions b/‎benchmarks/test_sharded_parity.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎dockers/base-xla/Dockerfile‎
Lines changed: 2 additions & 0 deletions b/‎dockers/base-xla/Dockerfile‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎dockers/tpu-tests/Dockerfile‎
Lines changed: 3 additions & 1 deletion b/‎dockers/tpu-tests/Dockerfile‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/source/_images/benchmarks/figure-parity-times.png‎
30.8 KB b/‎docs/source/_images/benchmarks/figure-parity-times.png‎
30.8 KB
@@ -12,59 +12,59 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-pull_request_rules:
-
-  - name: Automatic merge on approval
-    conditions:
-      - base=master
-      # number of review approvals
-      - "#approved-reviews-by>=3"
-      # no waiting or assigned review
-      - "#review-requested=0"
-      # no requested chnages from any reviewer
-      - "#changes-requested-reviews-by=0"
-      # this serves as ALL check has to pass as we have actually around 40 tests in total
-      - "#status-success>=54"
-      # this is just in case since we rely on GPU tests (note: redundand to the above)
-      - status-success=continuous-integration/drone/pr
-      - "status-success=ci/circleci: TPU-tests"
-      # this is patter-like, unofrunatly serves as `any(...)` (note: redundand to the above)
-      #- "status-success~=^ci/circleci:"
-      # no conflict with master branch
-      - -conflict
-      # was not closed yet
-      - -closed
-      # filter-out GH draft PRs
-      - -draft
-    actions:
-      delete_head_branch: {}
-      merge:
-        # https://doc.mergify.io/merge-action.html#strict-merge
-        # (on head branch) $ git merge --no-ff base
-        # (on head branch) # Wait for CI to go green
-        # (on head branch) # Squash all commits
-        # (on base branch) $ git merge --ff head
-        strict: true
-        method: squash
-      comment:
-        message: Great job! =)
-
-  - name: warn on conflicts
-    conditions:
-      - conflict
-      # filter-out GH draft PRs
-      - -draft
-    actions:
-      comment:
-        message: This pull request is now in conflict... :(
-
-  - name: add core reviewer
-    conditions:
-      # filter-out GH draft PRs
-      - -draft
-      # number of review approvals
-      - "#approved-reviews-by<3"
-    actions:
-      request_reviews:
-        teams:
-          - core-contributors
+#pull_request_rules:
+#
+#  - name: Automatic merge on approval
+#    conditions:
+#      - base=master
+#      # number of review approvals
+#      - "#approved-reviews-by>=3"
+#      # no waiting or assigned review
+#      - "#review-requested=0"
+#      # no requested chnages from any reviewer
+#      - "#changes-requested-reviews-by=0"
+#      # this serves as ALL check has to pass as we have actually around 40 tests in total
+#      - "#status-success>=54"
+#      # this is just in case since we rely on GPU tests (note: redundand to the above)
+#      - status-success=continuous-integration/drone/pr
+#      - "status-success=ci/circleci: TPU-tests"
+#      # this is patter-like, unofrunatly serves as `any(...)` (note: redundand to the above)
+#      #- "status-success~=^ci/circleci:"
+#      # no conflict with master branch
+#      - -conflict
+#      # was not closed yet
+#      - -closed
+#      # filter-out GH draft PRs
+#      - -draft
+#    actions:
+#      delete_head_branch: {}
+#      merge:
+#        # https://doc.mergify.io/merge-action.html#strict-merge
+#        # (on head branch) $ git merge --no-ff base
+#        # (on head branch) # Wait for CI to go green
+#        # (on head branch) # Squash all commits
+#        # (on base branch) $ git merge --ff head
+#        strict: true
+#        method: squash
+#      comment:
+#        message: Great job! =)
+#
+#  - name: warn on conflicts
+#    conditions:
+#      - conflict
+#      # filter-out GH draft PRs
+#      - -draft
+#    actions:
+#      comment:
+#        message: This pull request is now in conflict... :(
+#
+#  - name: add core reviewer
+#    conditions:
+#      # filter-out GH draft PRs
+#      - -draft
+#      # number of review approvals
+#      - "#approved-reviews-by<3"
+#    actions:
+#      request_reviews:
+#        teams:
+#          - core-contributors
@@ -5,10 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 
-## [unreleased.BugFix] - YYYY-MM-DD
+## [1.1.2rc1] - 2020-12-17
 
 ### Added
 
+- Support number for logging with `sync_dist=True` ([#5080](https://github.com/PyTorchLightning/pytorch-lightning/pull/5080)
+
 
 ### Changed
 
@@ -18,9 +20,28 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Removed
 
+- `enable_pl_optimizer=False` by default to temporarily fix AMP issues ([#5163](https://github.com/PyTorchLightning/pytorch-lightning/pull/5163)
+
 
 ### Fixed
 
+- Metric reduction with Logging ([#5150](https://github.com/PyTorchLightning/pytorch-lightning/pull/5150)
+
+
+- Remove nan loss in manual optimization ([#5121](https://github.com/PyTorchLightning/pytorch-lightning/pull/5121)
+
+
+- Un-balanced logging properly supported ([#5119](https://github.com/PyTorchLightning/pytorch-lightning/pull/5119)
+
+
+- Fix hanging in DDP HPC accelerators ([#5157](https://github.com/PyTorchLightning/pytorch-lightning/pull/5157)
+
+
+- Fix saved filename in `ModelCheckpoint` if it already exists ([#4861](https://github.com/PyTorchLightning/pytorch-lightning/pull/4861)
+
+
+- Fix reset `TensorRunningAccum` ([#5106](https://github.com/PyTorchLightning/pytorch-lightning/pull/5106)
+
 
 ## [1.1.1] - 2020-12-15
 
@@ -34,8 +55,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Refactor load in checkpoint connector ([#4593](https://github.com/PyTorchLightning/pytorch-lightning/pull/4593)
 - Fixed the saved filename in `ModelCheckpoint` when it already exists ([#4861](https://github.com/PyTorchLightning/pytorch-lightning/pull/4861))
 
-
-=======
 ### Removed
 
 - Drop duplicate metrics ([#5014](https://github.com/PyTorchLightning/pytorch-lightning/pull/5014)
 
@@ -0,0 +1,17 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+BENCHMARK_ROOT = os.path.dirname(__file__)
+PROJECT_ROOT = os.path.dirname(BENCHMARK_ROOT)
@@ -0,0 +1,60 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+import matplotlib.pylab as plt
+import pandas as pd
+
+from benchmarks.test_basic_parity import lightning_loop, vanilla_loop
+from tests.base.models import ParityModuleMNIST, ParityModuleRNN
+
+NUM_EPOCHS = 20
+NUM_RUNS = 50
+MODEL_CLASSES = (ParityModuleRNN, ParityModuleMNIST)
+PATH_HERE = os.path.dirname(__file__)
+FIGURE_EXTENSION = '.png'
+
+
+def _main():
+    fig, axarr = plt.subplots(nrows=len(MODEL_CLASSES))
+
+    for i, cls_model in enumerate(MODEL_CLASSES):
+        path_csv = os.path.join(PATH_HERE, f'dump-times_{cls_model.__name__}.csv')
+        if os.path.isfile(path_csv):
+            df_time = pd.read_csv(path_csv, index_col=0)
+        else:
+            vanilla = vanilla_loop(cls_model, num_epochs=NUM_EPOCHS, num_runs=NUM_RUNS)
+            lightning = lightning_loop(cls_model, num_epochs=NUM_EPOCHS, num_runs=NUM_RUNS)
+
+            df_time = pd.DataFrame({'vanilla PT': vanilla['durations'][1:], 'PT Lightning': lightning['durations'][1:]})
+            df_time /= NUM_RUNS
+            df_time.to_csv(os.path.join(PATH_HERE, f'dump-times_{cls_model.__name__}.csv'))
+        # todo: add also relative X-axis ticks to see both: relative and absolute time differences
+        df_time.plot.hist(
+            ax=axarr[i],
+            bins=20,
+            alpha=0.5,
+            title=cls_model.__name__,
+            legend=True,
+            grid=True,
+        )
+        axarr[i].set(xlabel='time [seconds]')
+
+    path_fig = os.path.join(PATH_HERE, f'figure-parity-times{FIGURE_EXTENSION}')
+    fig.tight_layout()
+    fig.savefig(path_fig)
+
+
+if __name__ == '__main__':
+    _main()
@@ -1,8 +1,23 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import time
 
 import numpy as np
 import pytest
 import torch
+from tqdm import tqdm
 
 from pytorch_lightning import seed_everything, Trainer
 import tests.base.develop_utils as tutils
@@ -15,34 +30,33 @@
     (ParityModuleMNIST, 0.25),  # todo: lower this thr
 ])
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
-def test_pytorch_parity(tmpdir, cls_model, max_diff):
+def test_pytorch_parity(tmpdir, cls_model, max_diff: float, num_epochs: int = 4, num_runs: int = 3):
     """
     Verify that the same  pytorch and lightning models achieve the same results
     """
-    num_epochs = 4
-    num_rums = 3
-    lightning_outs, pl_times = lightning_loop(cls_model, num_rums, num_epochs)
-    manual_outs, pt_times = vanilla_loop(cls_model, num_rums, num_epochs)
+    lightning = lightning_loop(cls_model, num_runs, num_epochs)
+    vanilla = vanilla_loop(cls_model, num_runs, num_epochs)
 
     # make sure the losses match exactly  to 5 decimal places
-    for pl_out, pt_out in zip(lightning_outs, manual_outs):
+    for pl_out, pt_out in zip(lightning['losses'], vanilla['losses']):
         np.testing.assert_almost_equal(pl_out, pt_out, 5)
 
     # the fist run initialize dataset (download & filter)
-    tutils.assert_speed_parity_absolute(pl_times[1:], pt_times[1:],
-                                        nb_epochs=num_epochs, max_diff=max_diff)
+    tutils.assert_speed_parity_absolute(
+        lightning['durations'][1:], vanilla['durations'][1:], nb_epochs=num_epochs, max_diff=max_diff
+    )
 
 
 def vanilla_loop(cls_model, num_runs=10, num_epochs=10):
     """
     Returns an array with the last loss from each epoch for each run
     """
-    device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
-    errors = []
-    times = []
+    hist_losses = []
+    hist_durations = []
 
+    device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
     torch.backends.cudnn.deterministic = True
-    for i in range(num_runs):
+    for i in tqdm(range(num_runs), desc=f'Vanilla PT with {cls_model.__name__}'):
         time_start = time.perf_counter()
 
         # set seed
@@ -74,18 +88,21 @@ def vanilla_loop(cls_model, num_runs=10, num_epochs=10):
             epoch_losses.append(loss.item())
 
         time_end = time.perf_counter()
-        times.append(time_end - time_start)
+        hist_durations.append(time_end - time_start)
 
-        errors.append(epoch_losses[-1])
+        hist_losses.append(epoch_losses[-1])
 
-    return errors, times
+    return {
+        'losses': hist_losses,
+        'durations': hist_durations,
+    }
 
 
 def lightning_loop(cls_model, num_runs=10, num_epochs=10):
-    errors = []
-    times = []
+    hist_losses = []
+    hist_durations = []
 
-    for i in range(num_runs):
+    for i in tqdm(range(num_runs), desc=f'PT Lightning with {cls_model.__name__}'):
         time_start = time.perf_counter()
 
         # set seed
@@ -108,9 +125,12 @@ def lightning_loop(cls_model, num_runs=10, num_epochs=10):
         trainer.fit(model)
 
         final_loss = trainer.train_loop.running_loss.last().item()
-        errors.append(final_loss)
+        hist_losses.append(final_loss)
 
         time_end = time.perf_counter()
-        times.append(time_end - time_start)
+        hist_durations.append(time_end - time_start)
 
-    return errors, times
+    return {
+        'losses': hist_losses,
+        'durations': hist_durations,
+    }
@@ -1,3 +1,17 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import platform
 import time
 
@@ -97,6 +97,8 @@ RUN \
     python -c "fname = 'requirements.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('torch')] ; open(fname, 'w').writelines(lines)" && \
     # drop Horovod as it is not needed
     python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \
+    # drop fairscale as it is not needed
+    python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)" && \
     # drop TorchVision as it was installed with XLA
     python -c "fname = 'requirements/examples.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('torchvision')] ; open(fname, 'w').writelines(lines)" && \
     pip install --requirement ./requirements/devel.txt --upgrade-strategy only-if-needed && \
 
@@ -27,8 +27,10 @@ COPY ./ ./pytorch-lightning/
 RUN \
     # Install pytorch-lightning at the current PR, plus dependencies.
     #pip install -r pytorch-lightning/requirements.txt --no-cache-dir && \
-    # drop Horovod
+    # drop Horovod as it is not needed
     python -c "fname = 'pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \
+    # drop fairscale as it is not needed
+    python -c "fname = 'pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)" && \
     pip install -r pytorch-lightning/requirements/devel.txt --no-cache-dir --upgrade-strategy only-if-needed
 
 #RUN python -c "import pytorch_lightning as pl; print(pl.__version__)"