From 9b1e4b259df8b9a6ec0970c039a4d4ac7c6f38cb Mon Sep 17 00:00:00 2001 From: Shachar Mirkin Date: Mon, 14 Dec 2020 13:39:29 +0100 Subject: [PATCH 01/37] Add Google Colab badges (#5111) * Add colab badges to notebook Add colab badges to notebook to notebooks 4 & 5 * Add colab badges Co-authored-by: chaton --- notebooks/04-transformers-text-classification.ipynb | 7 +++++++ notebooks/05-trainer-flags-overview.ipynb | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/notebooks/04-transformers-text-classification.ipynb b/notebooks/04-transformers-text-classification.ipynb index 037b24e4ddd9d..d52af84a76d97 100644 --- a/notebooks/04-transformers-text-classification.ipynb +++ b/notebooks/04-transformers-text-classification.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open" + ] + }, { "cell_type": "markdown", "metadata": { diff --git a/notebooks/05-trainer-flags-overview.ipynb b/notebooks/05-trainer-flags-overview.ipynb index 6413e8239bb2e..da044a9c9b5c6 100644 --- a/notebooks/05-trainer-flags-overview.ipynb +++ b/notebooks/05-trainer-flags-overview.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open" + ] + }, { "cell_type": "markdown", "metadata": { From 76081a729f144e775c21a3a902dd3e739a810f5e Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 14 Dec 2020 22:46:14 +0100 Subject: [PATCH 02/37] simplify changelog (#5135) --- CHANGELOG.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 233d54476889b..8cdeecbb7b409 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,30 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed distributed setting and `ddp_cpu` only with `num_processes>1` ([#5297](https://github.com/PyTorchLightning/pytorch-lightning/pull/5297)) +## [unreleased.BugFix] - YYYY-MM-DD + +### Added + + +### Changed + + +### Deprecated + + +### Removed + + +### Fixed + +- Fixed trainer by default `None` in `DDPAccelerator` ([#4915](https://github.com/PyTorchLightning/pytorch-lightning/pull/4915)) + + +- Fixed `LightningOptimizer` exposes optimizer attributes ([#5095](https://github.com/PyTorchLightning/pytorch-lightning/pull/5095)) + + +- Do not warn when the `name` key is used in the `lr_scheduler` dict ([#5057](https://github.com/PyTorchLightning/pytorch-lightning/pull/5057)) + ## [1.1.0] - 2020-12-09 From 35401706bf0b89b07bc1748fdc2df612baa2be2a Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 15 Dec 2020 18:59:13 +0100 Subject: [PATCH 03/37] add copyright to tests (#5143) --- tests/__init__.py | 13 +++++++++++++ tests/collect_env_details.py | 13 +++++++++++++ tests/conftest.py | 14 ++++++++++++++ tests/test_profiler.py | 14 ++++++++++++++ 4 files changed, 54 insertions(+) diff --git a/tests/__init__.py b/tests/__init__.py index 981d685430da9..1bb81c466e6eb 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,3 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import numpy as np diff --git a/tests/collect_env_details.py b/tests/collect_env_details.py index 1d443795d2876..2b8c4b3fafeed 100644 --- a/tests/collect_env_details.py +++ b/tests/collect_env_details.py @@ -1,3 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Diagnose your system and show basic information This server mainly to get detail info for better bug reporting. diff --git a/tests/conftest.py b/tests/conftest.py index ad4b7169456a8..07188fed4dbed 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,17 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys import threading from functools import partial, wraps diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 3bce379c1115c..4728b11582dfc 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -1,3 +1,17 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import time from pathlib import Path From 81e9d4260e4b056977b623f66cc2444ee214ad02 Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Wed, 16 Dec 2020 11:10:33 +0530 Subject: [PATCH 04/37] Fix saved filename in ModelCheckpoint if it already exists (#4861) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * disable version if not required * disable version if not required * pep * chlog * improve test * improve test * parametrize test and update del_list * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Carlos Mocholí * try appending version to already saved ckpt_file * Revert "try appending version to already saved ckpt_file" This reverts commit 710e05e01f738d982aabf1f36c09fa59293e5c0c. * add more assertions * use BoringModel Co-authored-by: Carlos Mocholí Co-authored-by: chaton Co-authored-by: Roger Shieh --- CHANGELOG.md | 4 ++ .../callbacks/model_checkpoint.py | 49 +++++++++++-------- tests/checkpointing/test_model_checkpoint.py | 39 +++++++++++++++ 3 files changed, 71 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cdeecbb7b409..4c27a738e3554 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,9 +72,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `LightningOptimizer` exposes optimizer attributes ([#5095](https://github.com/PyTorchLightning/pytorch-lightning/pull/5095)) +- Fixed the saved filename in `ModelCheckpoint` when it already exists ([#4861](https://github.com/PyTorchLightning/pytorch-lightning/pull/4861)) + + - Do not warn when the `name` key is used in the `lr_scheduler` dict ([#5057](https://github.com/PyTorchLightning/pytorch-lightning/pull/5057)) + ## [1.1.0] - 2020-12-09 ### Added diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py index 2a22c78b51a85..6038a58b77b8d 100644 --- a/pytorch_lightning/callbacks/model_checkpoint.py +++ b/pytorch_lightning/callbacks/model_checkpoint.py @@ -230,17 +230,14 @@ def save_checkpoint(self, trainer, pl_module): # what can be monitored monitor_candidates = self._monitor_candidates(trainer) - # ie: path/val_loss=0.5.ckpt - filepath = self._get_metric_interpolated_filepath_name(monitor_candidates, epoch, global_step) - # callback supports multiple simultaneous modes # here we call each mode sequentially # Mode 1: save all checkpoints OR only the top k if self.save_top_k: - self._save_top_k_checkpoints(monitor_candidates, trainer, pl_module, filepath) + self._save_top_k_checkpoints(trainer, pl_module, monitor_candidates) # Mode 2: save the last checkpoint - self._save_last_checkpoint(trainer, pl_module, monitor_candidates, filepath) + self._save_last_checkpoint(trainer, pl_module, monitor_candidates) def __validate_init_configuration(self): if self.save_top_k is not None and self.save_top_k < -1: @@ -414,6 +411,7 @@ def format_checkpoint_name( ) if ver is not None: filename = self.CHECKPOINT_JOIN_CHAR.join((filename, f"v{ver}")) + ckpt_name = f"{filename}{self.FILE_EXTENSION}" return os.path.join(self.dirpath, ckpt_name) if self.dirpath else ckpt_name @@ -486,13 +484,20 @@ def _validate_monitor_key(self, trainer): ) raise MisconfigurationException(m) - def _get_metric_interpolated_filepath_name(self, ckpt_name_metrics: Dict[str, Any], epoch: int, step: int): + def _get_metric_interpolated_filepath_name( + self, + ckpt_name_metrics: Dict[str, Any], + epoch: int, + step: int, + del_filepath: Optional[str] = None + ) -> str: filepath = self.format_checkpoint_name(epoch, step, ckpt_name_metrics) + version_cnt = 0 - while self._fs.exists(filepath): + while self._fs.exists(filepath) and filepath != del_filepath: filepath = self.format_checkpoint_name(epoch, step, ckpt_name_metrics, ver=version_cnt) - # this epoch called before version_cnt += 1 + return filepath def _monitor_candidates(self, trainer): @@ -502,13 +507,11 @@ def _monitor_candidates(self, trainer): ckpt_name_metrics.update({"step": trainer.global_step, "epoch": trainer.current_epoch}) return ckpt_name_metrics - def _save_last_checkpoint(self, trainer, pl_module, ckpt_name_metrics, filepath): + def _save_last_checkpoint(self, trainer, pl_module, ckpt_name_metrics): should_save_last = self.monitor is None or self.save_last if not should_save_last: return - last_filepath = filepath - # when user ALSO asked for the 'last.ckpt' change the name if self.save_last: last_filepath = self._format_checkpoint_name( @@ -519,6 +522,10 @@ def _save_last_checkpoint(self, trainer, pl_module, ckpt_name_metrics, filepath) prefix=self.prefix ) last_filepath = os.path.join(self.dirpath, f"{last_filepath}{self.FILE_EXTENSION}") + else: + last_filepath = self._get_metric_interpolated_filepath_name( + ckpt_name_metrics, trainer.current_epoch, trainer.global_step + ) accelerator_backend = trainer.accelerator_backend @@ -539,7 +546,7 @@ def _save_last_checkpoint(self, trainer, pl_module, ckpt_name_metrics, filepath) if self.monitor is None: self.best_model_path = self.last_model_path - def _save_top_k_checkpoints(self, metrics, trainer, pl_module, filepath): + def _save_top_k_checkpoints(self, trainer, pl_module, metrics): current = metrics.get(self.monitor) epoch = metrics.get("epoch") step = metrics.get("step") @@ -548,7 +555,7 @@ def _save_top_k_checkpoints(self, metrics, trainer, pl_module, filepath): current = torch.tensor(current, device=pl_module.device) if self.check_monitor_top_k(current): - self._update_best_and_save(filepath, current, epoch, step, trainer, pl_module) + self._update_best_and_save(current, epoch, step, trainer, pl_module, metrics) elif self.verbose: rank_zero_info( f"Epoch {epoch:d}, step {step:d}: {self.monitor} was not in top {self.save_top_k}" @@ -559,25 +566,26 @@ def _is_valid_monitor_key(self, metrics): def _update_best_and_save( self, - filepath: str, current: torch.Tensor, epoch: int, step: int, trainer, pl_module, + ckpt_name_metrics ): k = len(self.best_k_models) + 1 if self.save_top_k == -1 else self.save_top_k - del_list = [] + del_filepath = None if len(self.best_k_models) == k and k > 0: - delpath = self.kth_best_model_path - self.best_k_models.pop(self.kth_best_model_path) - del_list.append(delpath) + del_filepath = self.kth_best_model_path + self.best_k_models.pop(del_filepath) # do not save nan, replace with +/- inf if torch.isnan(current): current = torch.tensor(float('inf' if self.mode == "min" else '-inf')) + filepath = self._get_metric_interpolated_filepath_name(ckpt_name_metrics, epoch, step, del_filepath) + # save the current score self.current_score = current self.best_k_models[filepath] = current @@ -601,9 +609,8 @@ def _update_best_and_save( ) self._save_model(filepath, trainer, pl_module) - for cur_path in del_list: - if cur_path != filepath: - self._del_model(cur_path) + if del_filepath is not None and filepath != del_filepath: + self._del_model(del_filepath) def to_yaml(self, filepath: Optional[Union[str, Path]] = None): """ diff --git a/tests/checkpointing/test_model_checkpoint.py b/tests/checkpointing/test_model_checkpoint.py index 27f484c63d87c..1f3e44f58173e 100644 --- a/tests/checkpointing/test_model_checkpoint.py +++ b/tests/checkpointing/test_model_checkpoint.py @@ -905,3 +905,42 @@ def __init__(self, hparams): else: # make sure it's not AttributeDict assert type(ckpt[model.CHECKPOINT_HYPER_PARAMS_KEY]) == hparams_type + + +@pytest.mark.parametrize('max_epochs', [3, 4]) +@pytest.mark.parametrize( + 'save_top_k, expected', + [ + (1, ['curr_epoch.ckpt']), + (2, ['curr_epoch.ckpt', 'curr_epoch-v0.ckpt']), + ] +) +def test_model_checkpoint_file_already_exists(tmpdir, max_epochs, save_top_k, expected): + """ + Test that version is added to filename if required and it already exists in dirpath. + """ + model_checkpoint = ModelCheckpoint( + dirpath=tmpdir, + filename='curr_epoch', + save_top_k=save_top_k, + monitor='epoch', + mode='max', + ) + trainer = Trainer( + default_root_dir=tmpdir, + callbacks=[model_checkpoint], + max_epochs=max_epochs, + limit_train_batches=2, + limit_val_batches=2, + logger=None, + weights_summary=None, + progress_bar_refresh_rate=0, + ) + + model = BoringModel() + trainer.fit(model) + ckpt_files = os.listdir(tmpdir) + assert set(ckpt_files) == set(expected) + + epochs_in_ckpt_files = [pl_load(os.path.join(tmpdir, f))['epoch'] - 1 for f in ckpt_files] + assert sorted(epochs_in_ckpt_files) == list(range(max_epochs - save_top_k, max_epochs)) From 151d86e40b69b5239de938f30dcf66bfe24056c2 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Wed, 16 Dec 2020 15:09:26 +0900 Subject: [PATCH 05/37] Update isort config (#5142) * Update isort config * Apply isort with new config * Fix typo in isort config * fix rebase Co-authored-by: Rohit Gupta --- benchmarks/test_parity.py | 2 +- benchmarks/test_sharded_parity.py | 2 +- pyproject.toml | 4 +++- pytorch_lightning/setup_tools.py | 4 ++-- tests/conftest.py | 4 ++-- tests/test_profiler.py | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/benchmarks/test_parity.py b/benchmarks/test_parity.py index 41bba9533e10d..3508d5a3c28ac 100644 --- a/benchmarks/test_parity.py +++ b/benchmarks/test_parity.py @@ -4,8 +4,8 @@ import pytest import torch +from pytorch_lightning import seed_everything, Trainer import tests.base.develop_utils as tutils -from pytorch_lightning import Trainer, seed_everything from tests.base.models import ParityModuleMNIST, ParityModuleRNN diff --git a/benchmarks/test_sharded_parity.py b/benchmarks/test_sharded_parity.py index 0f58cb882bcf9..5d3c73347052c 100644 --- a/benchmarks/test_sharded_parity.py +++ b/benchmarks/test_sharded_parity.py @@ -6,7 +6,7 @@ import pytest import torch -from pytorch_lightning import Trainer, seed_everything +from pytorch_lightning import seed_everything, Trainer from pytorch_lightning.plugins.ddp_plugin import DDPPlugin from pytorch_lightning.plugins.sharded_plugin import DDPShardedPlugin from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, _NATIVE_AMP_AVAILABLE diff --git a/pyproject.toml b/pyproject.toml index 760421a56ece8..01e416aa51d8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ exclude = "(.eggs|.git|.hg|.mypy_cache|.nox|.tox|.venv|.svn|_build|buck-out|buil [tool.isort] known_first_party = [ - "bencharmks", + "benchmarks", "docs", "pl_examples", "pytorch_lightning", @@ -52,3 +52,5 @@ skip_glob = [ ] profile = "black" line_length = 120 +force_sort_within_sections = "True" +order_by_type = "False" diff --git a/pytorch_lightning/setup_tools.py b/pytorch_lightning/setup_tools.py index 26a607a2955b8..e04c4ceec56ef 100644 --- a/pytorch_lightning/setup_tools.py +++ b/pytorch_lightning/setup_tools.py @@ -14,12 +14,12 @@ # limitations under the License. import os import re -import warnings from typing import Iterable, List from urllib.error import HTTPError, URLError from urllib.request import Request, urlopen +import warnings -from pytorch_lightning import PROJECT_ROOT, __homepage__, __version__ +from pytorch_lightning import __homepage__, __version__, PROJECT_ROOT _PATH_BADGES = os.path.join('.', 'docs', 'source', '_images', 'badges') # badge to download diff --git a/tests/conftest.py b/tests/conftest.py index 07188fed4dbed..c6a14a99b2478 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -import threading from functools import partial, wraps from http.server import SimpleHTTPRequestHandler +import sys +import threading import pytest import torch.multiprocessing as mp diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 4728b11582dfc..91a8631a73287 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -13,8 +13,8 @@ # limitations under the License. import os -import time from pathlib import Path +import time import numpy as np import pytest From 1d1394360574409d44637869f0ce70eea79dfa5c Mon Sep 17 00:00:00 2001 From: Loi Ly Date: Wed, 16 Dec 2020 13:44:30 +0700 Subject: [PATCH 06/37] Fix reset TensorRunningAccum (#5106) * Fix reset TensorRunningAccum * add test for TensorRunningAccum's reset method * fix CI failed due to PEP8 Co-authored-by: Rohit Gupta --- pytorch_lightning/trainer/supporters.py | 2 +- tests/trainer/test_supporters.py | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py index db51fb8014de0..04fa3f4cc842b 100644 --- a/pytorch_lightning/trainer/supporters.py +++ b/pytorch_lightning/trainer/supporters.py @@ -56,7 +56,7 @@ def __init__(self, window_length: int): def reset(self) -> None: """Empty the accumulator.""" - self = TensorRunningAccum(self.window_length) + self.__init__(self.window_length) def last(self): """Get the last added element.""" diff --git a/tests/trainer/test_supporters.py b/tests/trainer/test_supporters.py index 6195d7ddeb0b0..b1b0db749ef8d 100644 --- a/tests/trainer/test_supporters.py +++ b/tests/trainer/test_supporters.py @@ -17,10 +17,33 @@ import torch from torch.utils.data import TensorDataset -from pytorch_lightning.trainer.supporters import CycleIterator, CombinedLoader, CombinedDataset, CombinedLoaderIterator +from pytorch_lightning.trainer.supporters import ( + CycleIterator, CombinedLoader, CombinedDataset, CombinedLoaderIterator, TensorRunningAccum) from pytorch_lightning.utilities.exceptions import MisconfigurationException + +def test_tensor_running_accum_reset(): + """ Test that reset would set all attributes to the initialization state """ + + window_length = 10 + + accum = TensorRunningAccum(window_length=window_length) + assert accum.last() is None + assert accum.mean() is None + + accum.append(torch.tensor(1.5)) + assert accum.last() == torch.tensor(1.5) + assert accum.mean() == torch.tensor(1.5) + + accum.reset() + assert accum.window_length == window_length + assert accum.memory is None + assert accum.current_idx == 0 + assert accum.last_idx is None + assert not accum.rotated + + def test_cycle_iterator(): """Test the cycling function of `CycleIterator`""" iterator = CycleIterator(range(100), 1000) From a7fe24e9a1a7b13958e70bca229fb9510a17bc58 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 16 Dec 2020 12:07:11 -0800 Subject: [PATCH 07/37] Fix hang in DDP HPC accelerators (#5157) * Fix hang in DDP HPC accelerators init_device was never called * Update CHANGELOG.md --- CHANGELOG.md | 1 + pytorch_lightning/accelerators/ddp_cpu_hpc_accelerator.py | 3 +++ pytorch_lightning/accelerators/ddp_hpc_accelerator.py | 1 + 3 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c27a738e3554..7ac27aacf9e0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,6 +77,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Do not warn when the `name` key is used in the `lr_scheduler` dict ([#5057](https://github.com/PyTorchLightning/pytorch-lightning/pull/5057)) +- Fixed `DDPHPCAccelerator` hangs in DDP construction by calling `init_device` ([#5157](https://github.com/PyTorchLightning/pytorch-lightning/pull/5157)) ## [1.1.0] - 2020-12-09 diff --git a/pytorch_lightning/accelerators/ddp_cpu_hpc_accelerator.py b/pytorch_lightning/accelerators/ddp_cpu_hpc_accelerator.py index 4f45b7456cc9c..7db8e3defdb21 100644 --- a/pytorch_lightning/accelerators/ddp_cpu_hpc_accelerator.py +++ b/pytorch_lightning/accelerators/ddp_cpu_hpc_accelerator.py @@ -43,3 +43,6 @@ def model_to_device(self, model, process_idx): def get_device_ids(self): device_ids = None return device_ids + + def init_device(self, process_idx): + pass diff --git a/pytorch_lightning/accelerators/ddp_hpc_accelerator.py b/pytorch_lightning/accelerators/ddp_hpc_accelerator.py index a32e3d6c2f1fe..47c1b736fd8b4 100644 --- a/pytorch_lightning/accelerators/ddp_hpc_accelerator.py +++ b/pytorch_lightning/accelerators/ddp_hpc_accelerator.py @@ -121,6 +121,7 @@ def ddp_train(self, process_idx, model): """ # determine which process we are and world size self.set_world_ranks(process_idx) + self.init_device(process_idx) # toggle prog bar if (self.trainer.node_rank != 0 or process_idx != 0) and self.trainer.progress_bar_callback is not None: From 89ff7b49d348ff4af887ff82f5edf2655c713778 Mon Sep 17 00:00:00 2001 From: Sean Naren Date: Tue, 15 Dec 2020 19:59:35 +0000 Subject: [PATCH 08/37] Update changelog, increment version (#5148) --- CHANGELOG.md | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ac27aacf9e0b..1f9e644b333db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,34 +50,46 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed distributed setting and `ddp_cpu` only with `num_processes>1` ([#5297](https://github.com/PyTorchLightning/pytorch-lightning/pull/5297)) -## [unreleased.BugFix] - YYYY-MM-DD - -### Added - - -### Changed +- Fixed the saved filename in `ModelCheckpoint` when it already exists ([#4861](https://github.com/PyTorchLightning/pytorch-lightning/pull/4861)) -### Deprecated +- Fixed `DDPHPCAccelerator` hangs in DDP construction by calling `init_device` ([#5157](https://github.com/PyTorchLightning/pytorch-lightning/pull/5157)) -### Removed +## [1.1.1] - 2020-12-15 -### Fixed +### Added -- Fixed trainer by default `None` in `DDPAccelerator` ([#4915](https://github.com/PyTorchLightning/pytorch-lightning/pull/4915)) +- Add a notebook example to reach a quick baseline of ~94% accuracy on CIFAR10 using Resnet in Lightning ([#4818](https://github.com/PyTorchLightning/pytorch-lightning/pull/4818)) +- Add Google Colab badges ([#5111](https://github.com/PyTorchLightning/pytorch-lightning/pull/5111)) +### Changed -- Fixed `LightningOptimizer` exposes optimizer attributes ([#5095](https://github.com/PyTorchLightning/pytorch-lightning/pull/5095)) +- Update usage of deprecated profiler ([#5010](https://github.com/PyTorchLightning/pytorch-lightning/pull/5010)) +- Update usage of deprecated automatic_optimization ([#5011](https://github.com/PyTorchLightning/pytorch-lightning/pull/5011)) +- Simplify accelerator steps ([#5015](https://github.com/PyTorchLightning/pytorch-lightning/pull/5015)) +- Split tests for deprecated api ([#5071](https://github.com/PyTorchLightning/pytorch-lightning/pull/5071)) +- Improve some tests ([#5049](https://github.com/PyTorchLightning/pytorch-lightning/pull/5049)) +- Refactor load in checkpoint connector ([#4593](https://github.com/PyTorchLightning/pytorch-lightning/pull/4593)) +### Removed -- Fixed the saved filename in `ModelCheckpoint` when it already exists ([#4861](https://github.com/PyTorchLightning/pytorch-lightning/pull/4861)) +- Drop duplicate metrics (#5014) ([#5014](https://github.com/PyTorchLightning/pytorch-lightning/pull/5014)) +- Remove beta arg from F1 class and functional ([#5076](https://github.com/PyTorchLightning/pytorch-lightning/pull/5076)) +- Drop unused test with results API ([#5058](https://github.com/PyTorchLightning/pytorch-lightning/pull/5058)) +### Fixed +- Fixed trainer by default `None` in `DDPAccelerator` ([#4915](https://github.com/PyTorchLightning/pytorch-lightning/pull/4915)) +- Fixed `LightningOptimizer` to expose optimizer attributes ([#5095](https://github.com/PyTorchLightning/pytorch-lightning/pull/5095)) - Do not warn when the `name` key is used in the `lr_scheduler` dict ([#5057](https://github.com/PyTorchLightning/pytorch-lightning/pull/5057)) - -- Fixed `DDPHPCAccelerator` hangs in DDP construction by calling `init_device` ([#5157](https://github.com/PyTorchLightning/pytorch-lightning/pull/5157)) +- Check if optimizer supports closure ([#4981](https://github.com/PyTorchLightning/pytorch-lightning/pull/4981) +- Add deprecated metric utility functions back to functional ( + [#5067](https://github.com/PyTorchLightning/pytorch-lightning/pull/5067), + [#5068](https://github.com/PyTorchLightning/pytorch-lightning/pull/5068)) +- Allow any input in to_onnx and to_torchscript ([#4378](https://github.com/PyTorchLightning/pytorch-lightning/pull/4378)) +- Fix hanging metrics tests ([#5134](https://github.com/PyTorchLightning/pytorch-lightning/pull/5134)) ## [1.1.0] - 2020-12-09 @@ -94,8 +106,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added logging using `self.log` in train and evaluation for epoch end hooks ( [#4552](https://github.com/PyTorchLightning/pytorch-lightning/pull/4552), [#4495](https://github.com/PyTorchLightning/pytorch-lightning/pull/4495), - [#4439](https://github.com/PyTorchLightning/pytorch-lightning/pull/4439)) - [#4684](https://github.com/PyTorchLightning/pytorch-lightning/pull/4684)) + [#4439](https://github.com/PyTorchLightning/pytorch-lightning/pull/4439), + [#4684](https://github.com/PyTorchLightning/pytorch-lightning/pull/4684), [#4913](https://github.com/PyTorchLightning/pytorch-lightning/pull/4913)) - Added ability for DDP plugin to modify optimizer state saving ([#4675](https://github.com/PyTorchLightning/pytorch-lightning/pull/4675)) - Added casting to python types for numpy scalars when logging hparams ([#4647](https://github.com/PyTorchLightning/pytorch-lightning/pull/4647)) From 2194d2dbbc04fa6d69f0f9ef2d54b6226cbc1049 Mon Sep 17 00:00:00 2001 From: Sean Naren Date: Tue, 15 Dec 2020 22:58:28 +0000 Subject: [PATCH 09/37] Prune CHANGELOG.md (#5151) * Prune CHANGELOG.md * Update CHANGELOG.md Co-authored-by: Jirka Borovec * Update CHANGELOG.md Co-authored-by: Jirka Borovec * Update CHANGELOG.md Co-authored-by: Jirka Borovec * Update CHANGELOG.md Co-authored-by: Jirka Borovec * Update CHANGELOG.md Co-authored-by: Jirka Borovec Co-authored-by: Jirka Borovec --- CHANGELOG.md | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f9e644b333db..5f99764e5d460 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,22 +62,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added - Add a notebook example to reach a quick baseline of ~94% accuracy on CIFAR10 using Resnet in Lightning ([#4818](https://github.com/PyTorchLightning/pytorch-lightning/pull/4818)) -- Add Google Colab badges ([#5111](https://github.com/PyTorchLightning/pytorch-lightning/pull/5111)) ### Changed -- Update usage of deprecated profiler ([#5010](https://github.com/PyTorchLightning/pytorch-lightning/pull/5010)) -- Update usage of deprecated automatic_optimization ([#5011](https://github.com/PyTorchLightning/pytorch-lightning/pull/5011)) - Simplify accelerator steps ([#5015](https://github.com/PyTorchLightning/pytorch-lightning/pull/5015)) -- Split tests for deprecated api ([#5071](https://github.com/PyTorchLightning/pytorch-lightning/pull/5071)) -- Improve some tests ([#5049](https://github.com/PyTorchLightning/pytorch-lightning/pull/5049)) - Refactor load in checkpoint connector ([#4593](https://github.com/PyTorchLightning/pytorch-lightning/pull/4593)) ### Removed -- Drop duplicate metrics (#5014) ([#5014](https://github.com/PyTorchLightning/pytorch-lightning/pull/5014)) +- Drop duplicate metrics ([#5014](https://github.com/PyTorchLightning/pytorch-lightning/pull/5014)) - Remove beta arg from F1 class and functional ([#5076](https://github.com/PyTorchLightning/pytorch-lightning/pull/5076)) -- Drop unused test with results API ([#5058](https://github.com/PyTorchLightning/pytorch-lightning/pull/5058)) ### Fixed @@ -85,11 +79,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `LightningOptimizer` to expose optimizer attributes ([#5095](https://github.com/PyTorchLightning/pytorch-lightning/pull/5095)) - Do not warn when the `name` key is used in the `lr_scheduler` dict ([#5057](https://github.com/PyTorchLightning/pytorch-lightning/pull/5057)) - Check if optimizer supports closure ([#4981](https://github.com/PyTorchLightning/pytorch-lightning/pull/4981) +- Extend LightningOptimizer to exposure underlying Optimizer attributes + update doc ([#5095](https://github.com/PyTorchLightning/pytorch-lightning/pull/5095)) - Add deprecated metric utility functions back to functional ( [#5067](https://github.com/PyTorchLightning/pytorch-lightning/pull/5067), [#5068](https://github.com/PyTorchLightning/pytorch-lightning/pull/5068)) -- Allow any input in to_onnx and to_torchscript ([#4378](https://github.com/PyTorchLightning/pytorch-lightning/pull/4378)) -- Fix hanging metrics tests ([#5134](https://github.com/PyTorchLightning/pytorch-lightning/pull/5134)) +- Allow any input in `to_onnx` and `to_torchscript` ([#4378](https://github.com/PyTorchLightning/pytorch-lightning/pull/4378) +- Do not warn when the name key is used in the `lr_scheduler` dict ([#5057](https://github.com/PyTorchLightning/pytorch-lightning/pull/5057)) ## [1.1.0] - 2020-12-09 From 58a2993766f9634b2d6711c0b43242f1592bd240 Mon Sep 17 00:00:00 2001 From: chaton Date: Wed, 16 Dec 2020 22:06:54 +0100 Subject: [PATCH 10/37] support number for logging with sync_dist=True (#5080) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * support number * add two tests * wip * add ddp in special test * remove a test * move device to bottom * simplify test * update test * Update pytorch_lightning/core/step_result.py Co-authored-by: Carlos Mocholí * resolve sync_ddp Co-authored-by: Carlos Mocholí --- pytorch_lightning/core/lightning.py | 1 + pytorch_lightning/core/step_result.py | 12 ++++-- pytorch_lightning/utilities/distributed.py | 15 ++++--- tests/special_tests.sh | 2 +- .../test_train_loop_logging_1_0.py | 39 +++++++++++++++++++ 5 files changed, 56 insertions(+), 13 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 6a2f75b5b2d02..cbdd86e24d1eb 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -276,6 +276,7 @@ def log( sync_dist_group, accelerator.sync_tensor, self._current_dataloader_idx, + self.device, ) def log_dict( diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py index c406fe883db01..d8bbd2096d715 100644 --- a/pytorch_lightning/core/step_result.py +++ b/pytorch_lightning/core/step_result.py @@ -15,15 +15,15 @@ """[Train, Eval]Result for easier logging, checkpointing, early stopping, epoch-wise reduction.""" import numbers +import os from copy import copy -from typing import Optional, Dict, Union, Sequence, Callable, MutableMapping, Any, List, Tuple, Iterable +from typing import Any, Callable, Dict, Iterable, List, MutableMapping, Optional, Sequence, Tuple, Union import torch from torch import Tensor -import os -from pytorch_lightning.utilities.distributed import sync_ddp_if_available from pytorch_lightning.metrics import Metric +from pytorch_lightning.utilities.distributed import sync_ddp_if_available class Result(Dict): @@ -128,6 +128,7 @@ def log( sync_dist_group: Optional[Any] = None, sync_fn: Callable = None, dataloader_idx: Optional[int] = None, + device: torch.device = None, ): # no metrics should be logged with graphs if not enable_graph and isinstance(value, torch.Tensor): @@ -138,7 +139,10 @@ def log( if sync_dist and isinstance(value, (torch.Tensor, numbers.Number)): is_dist_initialized = torch.distributed.is_available() and torch.distributed.is_initialized() # TODO: Find a way to make the reduction only once, so we don't need to clone. - value = value.clone() if is_dist_initialized else value + if is_dist_initialized and isinstance(value, torch.Tensor): + value = value.clone() + else: + value = torch.tensor(value, device=device, dtype=torch.float) value = sync_fn(value, group=sync_dist_group, reduce_op=sync_dist_op) if 'meta' not in self: diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py index 9724f05247c00..c315c6633b6fb 100644 --- a/pytorch_lightning/utilities/distributed.py +++ b/pytorch_lightning/utilities/distributed.py @@ -15,14 +15,14 @@ import os import warnings from functools import wraps +from typing import Any, Optional, Union import torch + from pytorch_lightning import _logger as log -from typing import Union, Optional, Any if torch.distributed.is_available(): - from torch.distributed import ReduceOp - from torch.distributed import group + from torch.distributed import ReduceOp, group else: class ReduceOp: SUM = None @@ -145,15 +145,14 @@ def sync_ddp( if group is None: group = torch.distributed.group.WORLD - if reduce_op is None: - reduce_op = torch.distributed.ReduceOp.SUM - elif isinstance(reduce_op, str) and reduce_op in ("avg", "mean"): - reduce_op = torch.distributed.ReduceOp.SUM + op = reduce_op if isinstance(reduce_op, ReduceOp) else ReduceOp.SUM + + if isinstance(reduce_op, str) and reduce_op.lower() in ("avg", "mean"): divide_by_world_size = True # sync all processes before reduction torch.distributed.barrier(group=group) - torch.distributed.all_reduce(result, op=reduce_op, group=group, async_op=False) + torch.distributed.all_reduce(result, op=op, group=group, async_op=False) if divide_by_world_size: result = result / torch.distributed.get_world_size(group) diff --git a/tests/special_tests.sh b/tests/special_tests.sh index f7cb581951783..950e3776bbc7f 100644 --- a/tests/special_tests.sh +++ b/tests/special_tests.sh @@ -19,4 +19,4 @@ python ${DEFAULTS} tests/plugins/test_rpc_plugin.py::test_rpc_function_calls_ddp python ${DEFAULTS} tests/plugins/test_ddp_sequential_plugin.py::test_ddp_sequential_plugin_ddp_rpc_manual python ${DEFAULTS} tests/plugins/test_ddp_sequential_plugin.py::test_ddp_sequential_plugin_ddp_rpc_manual_amp python ${DEFAULTS} tests/plugins/test_ddp_sequential_plugin.py::test_ddp_sequential_plugin_ddp_rpc_automatic -# python ${DEFAULTS} tests/plugins/test_ddp_sequential_plugin.py::test_ddp_sequential_plugin_ddp_rpc_with_wrong_balance +python ${DEFAULTS} tests/trainer/logging_tests/test_train_loop_logging_1_0.py::test_logging_sync_dist_true_ddp diff --git a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py index d5a985489a909..8edb2ba8fa1f2 100644 --- a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py +++ b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py @@ -18,6 +18,7 @@ import collections import itertools import os +import platform from unittest import mock import numpy as np @@ -687,6 +688,7 @@ class TestModel(BoringModel): def training_step(self, batch, batch_idx): acc = self.step(batch[0]) self.log('foo', torch.tensor(fake_result), on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='sum') + self.log('foo_2', 2, on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='sum') return acc def validation_step(self, batch, batch_idx): @@ -706,9 +708,46 @@ def validation_step(self, batch, batch_idx): trainer.fit(model) assert trainer.logged_metrics['foo'] == fake_result + assert trainer.logged_metrics['foo_2'] == 2 assert trainer.logged_metrics['bar'] == fake_result +@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") +@pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1', + reason="test should be run outside of pytest") +def test_logging_sync_dist_true_ddp(tmpdir): + """ + Tests to ensure that the sync_dist flag works with ddp + """ + class TestLoggingSyncDistModel(BoringModel): + def training_step(self, batch, batch_idx): + acc = self.step(batch[0]) + self.log('foo', 1, on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='SUM') + return acc + + def validation_step(self, batch, batch_idx): + self.training_step_called = True + output = self.layer(batch) + loss = self.loss(batch, output) + self.log('bar', 2, on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='AVG') + return {"x": loss} + + model = TestLoggingSyncDistModel() + trainer = Trainer( + default_root_dir=tmpdir, + limit_train_batches=1, + limit_val_batches=1, + max_epochs=2, + weights_summary=None, + accelerator="ddp", + gpus=2, + ) + trainer.fit(model) + + assert trainer.logged_metrics['foo'] == 2 + assert trainer.logged_metrics['bar'] == 2 + + @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") def test_logging_sync_dist_true_gpu(tmpdir): """ From 13bbf4b3f2aed852fc12740b47ab28864a0fee43 Mon Sep 17 00:00:00 2001 From: chaton Date: Wed, 16 Dec 2020 22:07:17 +0100 Subject: [PATCH 11/37] Un-balanced logging properly supported (#5119) * resolve bug * clean code * resolve comments * Update tests/trainer/optimization/test_multiple_optimizers.py Co-authored-by: Rohit Gupta * resolve another bug * add comments * use abs to find diff * update * resolve flake8 Co-authored-by: Rohit Gupta --- .../logger_connector/epoch_result_store.py | 26 ++++---- .../optimization/test_multiple_optimizers.py | 63 +++++++++++++++++++ 2 files changed, 78 insertions(+), 11 deletions(-) create mode 100644 tests/trainer/optimization/test_multiple_optimizers.py diff --git a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py index 28025859814cc..6d206f3dd929e 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py @@ -91,11 +91,13 @@ def check_dataloader_idx(self, result: Result) -> bool: random_key = list(result.keys())[-1] return result["meta"][random_key]["dataloader_idx"] is not None - def get_latest_from_func_name(self, latest_result, func_name: str, *args, **kwargs) -> Dict: + def get_latest_from_func_name(self, latest_result_opt, func_name: str, *args, **kwargs) -> Dict: results = {} - add_dataloader_idx = self.check_dataloader_idx(latest_result) - func = getattr(latest_result, func_name) - results.update(func(*args, add_dataloader_idx=add_dataloader_idx, **kwargs)) + for opt_idx in latest_result_opt: + latest_result = latest_result_opt[opt_idx] + add_dataloader_idx = self.check_dataloader_idx(latest_result) + func = getattr(latest_result, func_name) + results.update(func(*args, add_dataloader_idx=add_dataloader_idx, **kwargs)) return results def run_latest_batch_metrics_with_func_name(self, func_name, *args, **kwargs) -> List[Dict]: @@ -156,6 +158,7 @@ def append(self, result, dataloader_idx: Optional[int] = None, extra_info: Optio assert isinstance(result, Result) if dataloader_idx is None: dataloader_idx = 0 + if extra_info is None: extra_info = {} @@ -166,6 +169,7 @@ def append(self, result, dataloader_idx: Optional[int] = None, extra_info: Optio if dataloader_idx not in self._internals: self._internals[dataloader_idx] = {} self._internals_reduced[dataloader_idx] = defaultdict(dict) + self._latest_ref[dataloader_idx] = {} # extract infos opt_idx = extra_info["opt_idx"] @@ -173,7 +177,7 @@ def append(self, result, dataloader_idx: Optional[int] = None, extra_info: Optio self._append_to_structure(self._internals[dataloader_idx], opt_idx, batch_idx, result) - self._latest_ref[dataloader_idx] = result + self._latest_ref[dataloader_idx][opt_idx] = result # [dataloader_idx] is a list else: @@ -181,7 +185,11 @@ def append(self, result, dataloader_idx: Optional[int] = None, extra_info: Optio self._internals.setdefault(dataloader_idx, []) self._internals[dataloader_idx].append(result) - self._latest_ref[dataloader_idx] = result + if dataloader_idx not in self._latest_ref: + self._latest_ref[dataloader_idx] = {} + self._latest_ref[dataloader_idx][0] = {} + + self._latest_ref[dataloader_idx][0] = result def auto_reduce_results_on_epoch_end(self) -> None: """ @@ -206,13 +214,9 @@ def auto_reduce_results_on_epoch_end(self) -> None: # TODO: How to start training in middle of epoch opt_outputs = epoch_metrics[opt_idx] - num_batch_idx = len(self._internals[dl_idx][num_opt_idx]) - 1 - assert num_batch_idx >= 0 - batch_indexes = self._internals[dl_idx][num_opt_idx].keys() - # reduce across time first time_reduced_outputs = [] - for batch_idx in batch_indexes: + for batch_idx in opt_outputs.keys(): tbptt_outs = opt_outputs[batch_idx] tbptt_outs = tbptt_outs[0].__class__.reduce_across_time(tbptt_outs) if len(tbptt_outs) > 1: diff --git a/tests/trainer/optimization/test_multiple_optimizers.py b/tests/trainer/optimization/test_multiple_optimizers.py new file mode 100644 index 0000000000000..78b6f8f7ff84a --- /dev/null +++ b/tests/trainer/optimization/test_multiple_optimizers.py @@ -0,0 +1,63 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Tests to ensure that the behaviours related to multiple optimizers works +""" +import torch + +import pytorch_lightning as pl +from tests.base.boring_model import BoringModel + + +def test_unbalanced_logging_with_multiple_optimizers(tmpdir): + """ + This tests ensures reduction works in un-balanced logging settings + """ + class TestModel(BoringModel): + + loss_1 = [] + loss_2 = [] + + def training_step(self, batch, batch_idx, optimizer_idx): + output = self.layer(batch) + loss = self.loss(batch, output) + if optimizer_idx == 0 and self.trainer.global_step > 10: + self.log("loss_1", loss, on_epoch=True, prog_bar=True) + self.loss_1.append(loss.detach().clone()) + elif optimizer_idx == 1: + self.log("loss_2", loss, on_epoch=True, prog_bar=True) + self.loss_2.append(loss.detach().clone()) + return {"loss": loss} + + def configure_optimizers(self): + optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.001) + optimizer2 = torch.optim.SGD(self.layer.parameters(), lr=0.001) + return [optimizer, optimizer2] + + model = TestModel() + model.training_epoch_end = None + + # Initialize a trainer + trainer = pl.Trainer( + default_root_dir=tmpdir, + max_epochs=1, + ) + + trainer.fit(model) + + assert torch.equal(trainer.callback_metrics["loss_2_step"], model.loss_2[-1]) + assert torch.equal(trainer.callback_metrics["loss_1_step"], model.loss_1[-1]) + # test loss are properly reduced + assert torch.abs(trainer.callback_metrics["loss_2_epoch"] - torch.FloatTensor(model.loss_2).mean()) < 1e-6 + assert torch.abs(trainer.callback_metrics["loss_1_epoch"] - torch.FloatTensor(model.loss_1).mean()) < 1e-6 From 9669c80f293b0ef049d25a6bf8899533a42acaff Mon Sep 17 00:00:00 2001 From: chaton Date: Wed, 16 Dec 2020 22:07:35 +0100 Subject: [PATCH 12/37] [bugfix] remove nan loss in manual optimization (#5121) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * remove nan loss whe missing * Update pytorch_lightning/core/lightning.py Co-authored-by: Carlos Mocholí * Apply suggestions from code review Co-authored-by: Carlos Mocholí Co-authored-by: Rohit Gupta --- pytorch_lightning/core/lightning.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index cbdd86e24d1eb..e8c19ec269366 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1392,12 +1392,15 @@ def get_progress_bar_dict(self): """ # call .item() only once but store elements without graphs running_train_loss = self.trainer.train_loop.running_loss.mean() - avg_training_loss = ( - running_train_loss.cpu().item() - if running_train_loss is not None - else float("NaN") - ) - tqdm_dict = {"loss": "{:.3g}".format(avg_training_loss)} + avg_training_loss = None + if running_train_loss is not None: + avg_training_loss = running_train_loss.cpu().item() + elif self.trainer.train_loop.automatic_optimization: + avg_training_loss = float('NaN') + + tqdm_dict = {} + if avg_training_loss is not None: + tqdm_dict["loss"] = f"{avg_training_loss:.3g}" if self.trainer.truncated_bptt_steps is not None: tqdm_dict["split_idx"] = self.trainer.split_idx From 6b19198aaeaddebd39d54ed44074937ca1eac9c1 Mon Sep 17 00:00:00 2001 From: chaton Date: Wed, 16 Dec 2020 22:08:06 +0100 Subject: [PATCH 13/37] [bug-fix] Metric reduction with Logging (#5150) * add test * resolve bug * udpate test * wrongly copy / paste * update test * resolve a second bug Co-authored-by: Ubuntu --- pytorch_lightning/callbacks/early_stopping.py | 11 +++-- .../callbacks/model_checkpoint.py | 9 +++- pytorch_lightning/core/step_result.py | 5 +- .../test_train_loop_logging_1_0.py | 49 ++++++++++++++++++- 4 files changed, 66 insertions(+), 8 deletions(-) diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py index 066effc68a03c..670ad1813467a 100644 --- a/pytorch_lightning/callbacks/early_stopping.py +++ b/pytorch_lightning/callbacks/early_stopping.py @@ -19,12 +19,14 @@ Monitor a metric and stop training when it stops improving. """ +import numbers import numpy as np import torch from pytorch_lightning.callbacks.base import Callback -from pytorch_lightning.utilities import rank_zero_info, rank_zero_warn, _TPU_AVAILABLE +from pytorch_lightning.metrics.metric import Metric +from pytorch_lightning.utilities import _TPU_AVAILABLE, rank_zero_info, rank_zero_warn class EarlyStopping(Callback): @@ -199,8 +201,11 @@ def _run_early_stopping_check(self, trainer, pl_module): # when in dev debugging trainer.dev_debugger.track_early_stopping_history(self, current) - if not isinstance(current, torch.Tensor): - current = torch.tensor(current, device=pl_module.device) + if current is not None: + if isinstance(current, Metric): + current = current.compute() + elif isinstance(current, numbers.Number): + current = torch.tensor(current, device=pl_module.device, dtype=torch.float) if trainer.use_tpu and _TPU_AVAILABLE: current = current.cpu() diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py index 6038a58b77b8d..cc5252961d757 100644 --- a/pytorch_lightning/callbacks/model_checkpoint.py +++ b/pytorch_lightning/callbacks/model_checkpoint.py @@ -20,6 +20,7 @@ """ +import numbers import os import re from copy import deepcopy @@ -32,6 +33,7 @@ from pytorch_lightning import _logger as log from pytorch_lightning.callbacks.base import Callback +from pytorch_lightning.metrics.metric import Metric from pytorch_lightning.utilities import rank_zero_info, rank_zero_only, rank_zero_warn from pytorch_lightning.utilities.cloud_io import get_filesystem from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -551,8 +553,11 @@ def _save_top_k_checkpoints(self, trainer, pl_module, metrics): epoch = metrics.get("epoch") step = metrics.get("step") - if not isinstance(current, torch.Tensor) and current is not None: - current = torch.tensor(current, device=pl_module.device) + if current is not None: + if isinstance(current, Metric): + current = current.compute() + elif isinstance(current, numbers.Number): + current = torch.tensor(current, device=pl_module.device, dtype=torch.float) if self.check_monitor_top_k(current): self._update_best_and_save(current, epoch, step, trainer, pl_module, metrics) diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py index d8bbd2096d715..091f9a789efda 100644 --- a/pytorch_lightning/core/step_result.py +++ b/pytorch_lightning/core/step_result.py @@ -371,7 +371,10 @@ def get_forked_metrics(self, add_dataloader_idx=False): dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx) if options['forked']: - result[dl_key] = self[k] + if isinstance(self[k], Metric): + result[dl_key] = self[k].compute().detach() + else: + result[dl_key] = self[k] return result diff --git a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py index 8edb2ba8fa1f2..61ed45fa254fa 100644 --- a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py +++ b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py @@ -27,8 +27,8 @@ from torch.utils.data import Dataset import pytorch_lightning as pl -from pytorch_lightning import Trainer, callbacks -from pytorch_lightning.callbacks import ModelCheckpoint +from pytorch_lightning import callbacks, Trainer +from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint from pytorch_lightning.core.lightning import LightningModule from tests.base.boring_model import BoringModel, RandomDictDataset, RandomDictStringDataset from tests.base.deterministic_model import DeterministicModel @@ -857,3 +857,48 @@ def on_train_epoch_end(self, trainer, pl_module, outputs): 'on_epoch_end': 5, 'on_train_epoch_end': 6} assert trainer.callback_metrics == expected + + +@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU machine") +def test_metric_are_properly_reduced(tmpdir): + class TestingModel(BoringModel): + def __init__(self, *args, **kwargs): + super().__init__() + self.train_acc = pl.metrics.Accuracy() + self.val_acc = pl.metrics.Accuracy() + + def training_step(self, batch, batch_idx): + self.train_acc(torch.rand(1, 3, device=self.device), torch.randint(0, 2, (1,), device=self.device)) + self.log('train_acc', self.train_acc, on_step=True, on_epoch=True) + return super().training_step(batch, batch_idx) + + def validation_step(self, batch, batch_idx): + preds = torch.tensor(0, device=self.device) + targets = torch.tensor(1, device=self.device) + if batch_idx < 8: + targets = preds + self.val_acc(preds, targets) + self.log('val_acc', self.val_acc, on_step=True, on_epoch=True) + return super().validation_step(batch, batch_idx) + + early_stop = EarlyStopping(monitor='val_acc', mode='max') + + checkpoint = ModelCheckpoint( + monitor='val_acc', + save_last=True, + save_top_k=2, + mode='max', + ) + + model = TestingModel() + trainer = Trainer( + default_root_dir=tmpdir, + gpus=1, + max_epochs=2, + limit_train_batches=5, + limit_val_batches=32, + callbacks=[early_stop, checkpoint]) + trainer.fit(model) + + assert trainer.callback_metrics["val_acc"] == 8 / 32. + assert "train_acc" in trainer.callback_metrics From 0211f7f9b2c7d7544b26f19d1c91354730ef51a0 Mon Sep 17 00:00:00 2001 From: Sean Naren Date: Thu, 17 Dec 2020 01:08:12 +0000 Subject: [PATCH 14/37] Disable pl optimizer temporarily to fix AMP issues (#5163) * Disable pl optimizer temporarily to fix AMP issues * Add todo and enable pl optimizer in the test --- pytorch_lightning/trainer/trainer.py | 2 +- tests/callbacks/test_callbacks.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 014e0a62679dd..06cdc43674d1b 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -133,7 +133,7 @@ def __init__( distributed_backend: Optional[str] = None, automatic_optimization: Optional[bool] = None, move_metrics_to_cpu: bool = False, - enable_pl_optimizer: bool = True, + enable_pl_optimizer: bool = False, multiple_trainloader_mode: str = 'max_size_cycle', ): r""" diff --git a/tests/callbacks/test_callbacks.py b/tests/callbacks/test_callbacks.py index c00c712bb3b13..070bb4e9f6989 100644 --- a/tests/callbacks/test_callbacks.py +++ b/tests/callbacks/test_callbacks.py @@ -33,6 +33,8 @@ def test_trainer_callback_system(torch_save): limit_train_batches=3, limit_test_batches=2, progress_bar_refresh_rate=0, + # todo: enabled since internally we wrap the model for optimizer step, this should be fixed + enable_pl_optimizer=True ) # no call yet From 5119013c81265beb4ffb6de3de0adcc3414d92c4 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 17 Dec 2020 08:27:05 +0100 Subject: [PATCH 15/37] drop install FairScale for TPU (#5113) * drop install FairScale for TPU * typo Co-authored-by: Roger Shieh --- dockers/base-xla/Dockerfile | 2 ++ dockers/tpu-tests/Dockerfile | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/dockers/base-xla/Dockerfile b/dockers/base-xla/Dockerfile index 8eb093295c37b..5dfeac8c9e86e 100644 --- a/dockers/base-xla/Dockerfile +++ b/dockers/base-xla/Dockerfile @@ -97,6 +97,8 @@ RUN \ python -c "fname = 'requirements.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('torch')] ; open(fname, 'w').writelines(lines)" && \ # drop Horovod as it is not needed python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \ + # drop fairscale as it is not needed + python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)" && \ # drop TorchVision as it was installed with XLA python -c "fname = 'requirements/examples.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('torchvision')] ; open(fname, 'w').writelines(lines)" && \ pip install --requirement ./requirements/devel.txt --upgrade-strategy only-if-needed && \ diff --git a/dockers/tpu-tests/Dockerfile b/dockers/tpu-tests/Dockerfile index a514b1c3d35fe..464f7fd8f309e 100644 --- a/dockers/tpu-tests/Dockerfile +++ b/dockers/tpu-tests/Dockerfile @@ -27,8 +27,10 @@ COPY ./ ./pytorch-lightning/ RUN \ # Install pytorch-lightning at the current PR, plus dependencies. #pip install -r pytorch-lightning/requirements.txt --no-cache-dir && \ - # drop Horovod + # drop Horovod as it is not needed python -c "fname = 'pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \ + # drop fairscale as it is not needed + python -c "fname = 'pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)" && \ pip install -r pytorch-lightning/requirements/devel.txt --no-cache-dir --upgrade-strategy only-if-needed #RUN python -c "import pytorch_lightning as pl; print(pl.__version__)" From 5bae6398ecf22379d53de4a59c31a91fe8a5d70a Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 17 Dec 2020 09:20:59 +0100 Subject: [PATCH 16/37] temporarily suspend all mergify rules (#5112) --- .mergify.yml | 112 +++++++++++++++++++++++++-------------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/.mergify.yml b/.mergify.yml index 44c48f2ddced5..cb5ef3ec7519a 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -12,59 +12,59 @@ # See the License for the specific language governing permissions and # limitations under the License. -pull_request_rules: - - - name: Automatic merge on approval - conditions: - - base=master - # number of review approvals - - "#approved-reviews-by>=3" - # no waiting or assigned review - - "#review-requested=0" - # no requested chnages from any reviewer - - "#changes-requested-reviews-by=0" - # this serves as ALL check has to pass as we have actually around 40 tests in total - - "#status-success>=54" - # this is just in case since we rely on GPU tests (note: redundand to the above) - - status-success=continuous-integration/drone/pr - - "status-success=ci/circleci: TPU-tests" - # this is patter-like, unofrunatly serves as `any(...)` (note: redundand to the above) - #- "status-success~=^ci/circleci:" - # no conflict with master branch - - -conflict - # was not closed yet - - -closed - # filter-out GH draft PRs - - -draft - actions: - delete_head_branch: {} - merge: - # https://doc.mergify.io/merge-action.html#strict-merge - # (on head branch) $ git merge --no-ff base - # (on head branch) # Wait for CI to go green - # (on head branch) # Squash all commits - # (on base branch) $ git merge --ff head - strict: true - method: squash - comment: - message: Great job! =) - - - name: warn on conflicts - conditions: - - conflict - # filter-out GH draft PRs - - -draft - actions: - comment: - message: This pull request is now in conflict... :( - - - name: add core reviewer - conditions: - # filter-out GH draft PRs - - -draft - # number of review approvals - - "#approved-reviews-by<3" - actions: - request_reviews: - teams: - - core-contributors +#pull_request_rules: +# +# - name: Automatic merge on approval +# conditions: +# - base=master +# # number of review approvals +# - "#approved-reviews-by>=3" +# # no waiting or assigned review +# - "#review-requested=0" +# # no requested chnages from any reviewer +# - "#changes-requested-reviews-by=0" +# # this serves as ALL check has to pass as we have actually around 40 tests in total +# - "#status-success>=54" +# # this is just in case since we rely on GPU tests (note: redundand to the above) +# - status-success=continuous-integration/drone/pr +# - "status-success=ci/circleci: TPU-tests" +# # this is patter-like, unofrunatly serves as `any(...)` (note: redundand to the above) +# #- "status-success~=^ci/circleci:" +# # no conflict with master branch +# - -conflict +# # was not closed yet +# - -closed +# # filter-out GH draft PRs +# - -draft +# actions: +# delete_head_branch: {} +# merge: +# # https://doc.mergify.io/merge-action.html#strict-merge +# # (on head branch) $ git merge --no-ff base +# # (on head branch) # Wait for CI to go green +# # (on head branch) # Squash all commits +# # (on base branch) $ git merge --ff head +# strict: true +# method: squash +# comment: +# message: Great job! =) +# +# - name: warn on conflicts +# conditions: +# - conflict +# # filter-out GH draft PRs +# - -draft +# actions: +# comment: +# message: This pull request is now in conflict... :( +# +# - name: add core reviewer +# conditions: +# # filter-out GH draft PRs +# - -draft +# # number of review approvals +# - "#approved-reviews-by<3" +# actions: +# request_reviews: +# teams: +# - core-contributors From 3b836668237c5c088d9eac1db86a6f0293a42070 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 17 Dec 2020 10:21:00 +0100 Subject: [PATCH 17/37] prune ecosystem example (#5085) * draft * wip * CI * drop pl geometry * copy * logo --- pl_examples/__init__.py | 37 ++ pl_examples/basic_examples/autoencoder.py | 3 +- .../backbone_image_classifier.py | 3 +- .../basic_examples/conv_sequential_example.py | 2 + .../basic_examples/dali_image_classifier.py | 3 +- .../basic_examples/simple_image_classifier.py | 2 + pl_examples/bug_report_model.py | 3 + .../computer_vision_fine_tuning.py | 15 + .../generative_adversarial_net.py | 15 + pl_examples/domain_templates/imagenet.py | 15 + .../domain_templates/reinforce_learn_Qnet.py | 15 + .../domain_templates/semantic_segmentation.py | 16 + pl_examples/domain_templates/unet.py | 14 + pl_examples/pytorch_ecosystem/__init__.py | 13 + .../pytorch_geometric/README.md | 38 -- .../pytorch_geometric/__init__.py | 0 .../pytorch_geometric/cora_dna.py | 370 ------------------ .../pytorch_geometric/lightning.py | 31 -- .../pytorch_geometric/pyproject.toml | 25 -- pl_examples/test_examples.py | 14 + requirements/examples.txt | 2 +- 21 files changed, 168 insertions(+), 468 deletions(-) create mode 100644 pl_examples/pytorch_ecosystem/__init__.py delete mode 100644 pl_examples/pytorch_ecosystem/pytorch_geometric/README.md delete mode 100644 pl_examples/pytorch_ecosystem/pytorch_geometric/__init__.py delete mode 100644 pl_examples/pytorch_ecosystem/pytorch_geometric/cora_dna.py delete mode 100644 pl_examples/pytorch_ecosystem/pytorch_geometric/lightning.py delete mode 100644 pl_examples/pytorch_ecosystem/pytorch_geometric/pyproject.toml diff --git a/pl_examples/__init__.py b/pl_examples/__init__.py index d7cec9fc1bc3a..147fc330ecd59 100644 --- a/pl_examples/__init__.py +++ b/pl_examples/__init__.py @@ -8,3 +8,40 @@ TORCHVISION_AVAILABLE = _module_available("torchvision") DALI_AVAILABLE = _module_available("nvidia.dali") + + +LIGHTNING_LOGO = """ + #### + ########### + #################### + ############################ + ##################################### +############################################## +######################### ################### +####################### ################### +#################### #################### +################## ##################### +################ ###################### +##################### ################# +###################### ################### +##################### ##################### +#################### ####################### +################### ######################### +############################################## + ##################################### + ############################ + #################### + ########## + #### +""" + + +def nice_print(msg, last=False): + print() + print("\033[0;35m" + msg + "\033[0m") + if last: + print() + + +def cli_lightning_logo(): + nice_print(LIGHTNING_LOGO) diff --git a/pl_examples/basic_examples/autoencoder.py b/pl_examples/basic_examples/autoencoder.py index 58a117a648458..72bfcb17c0872 100644 --- a/pl_examples/basic_examples/autoencoder.py +++ b/pl_examples/basic_examples/autoencoder.py @@ -21,7 +21,7 @@ from torch.utils.data import random_split import pytorch_lightning as pl -from pl_examples import TORCHVISION_AVAILABLE +from pl_examples import TORCHVISION_AVAILABLE, cli_lightning_logo if TORCHVISION_AVAILABLE: from torchvision.datasets.mnist import MNIST @@ -105,4 +105,5 @@ def cli_main(): if __name__ == '__main__': + cli_lightning_logo() cli_main() diff --git a/pl_examples/basic_examples/backbone_image_classifier.py b/pl_examples/basic_examples/backbone_image_classifier.py index 91a8481de7fd9..b0ca2efd5d76b 100644 --- a/pl_examples/basic_examples/backbone_image_classifier.py +++ b/pl_examples/basic_examples/backbone_image_classifier.py @@ -19,7 +19,7 @@ from torch.utils.data import DataLoader, random_split import pytorch_lightning as pl -from pl_examples import DATASETS_PATH, TORCHVISION_AVAILABLE +from pl_examples import DATASETS_PATH, TORCHVISION_AVAILABLE, cli_lightning_logo if TORCHVISION_AVAILABLE: from torchvision.datasets.mnist import MNIST @@ -125,4 +125,5 @@ def cli_main(): if __name__ == '__main__': + cli_lightning_logo() cli_main() diff --git a/pl_examples/basic_examples/conv_sequential_example.py b/pl_examples/basic_examples/conv_sequential_example.py index 4c2986701b27c..1d178c32a3ce3 100644 --- a/pl_examples/basic_examples/conv_sequential_example.py +++ b/pl_examples/basic_examples/conv_sequential_example.py @@ -29,6 +29,7 @@ import torchvision import pytorch_lightning as pl +from pl_examples import cli_lightning_logo from pytorch_lightning import Trainer from pytorch_lightning.metrics.functional import accuracy from pytorch_lightning.plugins.ddp_sequential_plugin import DDPSequentialPlugin @@ -190,6 +191,7 @@ def instantiate_datamodule(args): if __name__ == "__main__": + cli_lightning_logo() parser = ArgumentParser(description="Pipe Example") parser.add_argument("--use_ddp_sequential", action="store_true") parser = Trainer.add_argparse_args(parser) diff --git a/pl_examples/basic_examples/dali_image_classifier.py b/pl_examples/basic_examples/dali_image_classifier.py index 291490d6f93e0..b2c9231fe1851 100644 --- a/pl_examples/basic_examples/dali_image_classifier.py +++ b/pl_examples/basic_examples/dali_image_classifier.py @@ -22,7 +22,7 @@ from torch.utils.data import random_split import pytorch_lightning as pl -from pl_examples import TORCHVISION_AVAILABLE, DALI_AVAILABLE +from pl_examples import TORCHVISION_AVAILABLE, DALI_AVAILABLE, cli_lightning_logo if TORCHVISION_AVAILABLE: from torchvision.datasets.mnist import MNIST @@ -205,4 +205,5 @@ def cli_main(): if __name__ == "__main__": + cli_lightning_logo() cli_main() diff --git a/pl_examples/basic_examples/simple_image_classifier.py b/pl_examples/basic_examples/simple_image_classifier.py index a341728554d31..6b8457e0e4897 100644 --- a/pl_examples/basic_examples/simple_image_classifier.py +++ b/pl_examples/basic_examples/simple_image_classifier.py @@ -19,6 +19,7 @@ from torch.nn import functional as F import pytorch_lightning as pl +from pl_examples import cli_lightning_logo from pl_examples.basic_examples.mnist_datamodule import MNISTDataModule @@ -103,4 +104,5 @@ def cli_main(): if __name__ == '__main__': + cli_lightning_logo() cli_main() diff --git a/pl_examples/bug_report_model.py b/pl_examples/bug_report_model.py index dbea2013d1110..e2201db12f894 100644 --- a/pl_examples/bug_report_model.py +++ b/pl_examples/bug_report_model.py @@ -22,6 +22,8 @@ import os import torch from torch.utils.data import Dataset + +from pl_examples import cli_lightning_logo from pytorch_lightning import Trainer, LightningModule @@ -137,4 +139,5 @@ def on_train_epoch_start(self) -> None: if __name__ == '__main__': + cli_lightning_logo() run_test() diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index 21f6644b09a5b..1c60e3aa6d23f 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -1,3 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Computer vision example on Transfer Learning. This computer vision example illustrates how one could fine-tune a pre-trained @@ -40,6 +53,7 @@ from torchvision.datasets.utils import download_and_extract_archive import pytorch_lightning as pl +from pl_examples import cli_lightning_logo from pytorch_lightning import _logger as log BN_TYPES = (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d, torch.nn.BatchNorm3d) @@ -451,4 +465,5 @@ def get_args() -> argparse.Namespace: if __name__ == '__main__': + cli_lightning_logo() main(get_args()) diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py index 088b625e31d01..210a80721d9a9 100644 --- a/pl_examples/domain_templates/generative_adversarial_net.py +++ b/pl_examples/domain_templates/generative_adversarial_net.py @@ -1,3 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ To run this template just do: python generative_adversarial_net.py @@ -18,6 +31,7 @@ from torch.utils.data import DataLoader from torchvision.datasets import MNIST +from pl_examples import cli_lightning_logo from pytorch_lightning.core import LightningModule, LightningDataModule from pytorch_lightning.trainer import Trainer @@ -211,6 +225,7 @@ def main(args: Namespace) -> None: if __name__ == '__main__': + cli_lightning_logo() parser = ArgumentParser() # Add program level args, if any. diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index b7116547d389b..b1eea307478f9 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -1,3 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ This example is largely adapted from https://github.com/pytorch/examples/blob/master/imagenet/main.py @@ -32,6 +45,7 @@ import torchvision.transforms as transforms import pytorch_lightning as pl +from pl_examples import cli_lightning_logo from pytorch_lightning.core import LightningModule @@ -246,4 +260,5 @@ def run_cli(): if __name__ == '__main__': + cli_lightning_logo() run_cli() diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index 4b01f83e36639..a8b9db095f377 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -1,3 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Deep Reinforcement Learning: Deep Q-network (DQN) @@ -33,6 +46,7 @@ from torch.utils.data.dataset import IterableDataset import pytorch_lightning as pl +from pl_examples import cli_lightning_logo class DQN(nn.Module): @@ -349,6 +363,7 @@ def main(args) -> None: if __name__ == '__main__': + cli_lightning_logo() torch.manual_seed(0) np.random.seed(0) diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index 4ca1ebc2aec76..08bdc1140916a 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -1,3 +1,17 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import random from argparse import ArgumentParser, Namespace @@ -10,6 +24,7 @@ from torch.utils.data import DataLoader, Dataset import pytorch_lightning as pl +from pl_examples import cli_lightning_logo from pl_examples.domain_templates.unet import UNet from pytorch_lightning.loggers import WandbLogger @@ -225,6 +240,7 @@ def main(hparams: Namespace): if __name__ == '__main__': + cli_lightning_logo() parser = ArgumentParser() parser.add_argument("--data_path", type=str, help="path where dataset is stored") parser.add_argument("--gpus", type=int, default=-1, help="number of available GPUs") diff --git a/pl_examples/domain_templates/unet.py b/pl_examples/domain_templates/unet.py index 6117447e5ed33..20b4bdb2a4bf9 100644 --- a/pl_examples/domain_templates/unet.py +++ b/pl_examples/domain_templates/unet.py @@ -1,3 +1,17 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch import torch.nn as nn import torch.nn.functional as F diff --git a/pl_examples/pytorch_ecosystem/__init__.py b/pl_examples/pytorch_ecosystem/__init__.py new file mode 100644 index 0000000000000..d7aa17d7f8468 --- /dev/null +++ b/pl_examples/pytorch_ecosystem/__init__.py @@ -0,0 +1,13 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/pl_examples/pytorch_ecosystem/pytorch_geometric/README.md b/pl_examples/pytorch_ecosystem/pytorch_geometric/README.md deleted file mode 100644 index 5c9a42d5a8942..0000000000000 --- a/pl_examples/pytorch_ecosystem/pytorch_geometric/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# [Pytorch Geometric](https://github.com/rusty1s/pytorch_geometric) examples with Lighting - -### Introduction - -PyTorch Geometric (PyG) is a geometric deep learning extension library for PyTorch. It relies on lower level libraries such as - -* PyTorch Cluster: A package consists of a small extension library of highly optimized graph cluster algorithms in Pytorch -* PyTorch Sparse: A package consists of a small extension library of optimized sparse matrix operations with autograd support in Pytorch -* PyTorch Scatter: A package consists of a small extension library of highly optimized sparse update (scatter and segment) operations for the use in PyTorch - -## Setup - -``` -pyenv install 3.7.8 -pyenv local 3.7.8 -python -m venv -source .venv/bin/activate -poetry install -``` - -Run example - -``` -python cora_dna.py -``` - -## Current example lists - -| `DATASET` | `MODEL` | `TASK` | DATASET DESCRIPTION | MODEL DESCRIPTION | | -| :---: | :---: | :---: | :---: | :---: | :---: | -| Cora | DNA | Node Classification | The citation network datasets "Cora", "CiteSeer" and "PubMed" from the "Revisiting Semi-Supervised Learning with Graph Embeddings" | The dynamic neighborhood aggregation operator from the "Just Jump: Towards Dynamic Neighborhood Aggregation in Graph Neural Networks" - - -## DATASET SIZES - -``` - 16M ./cora -``` diff --git a/pl_examples/pytorch_ecosystem/pytorch_geometric/__init__.py b/pl_examples/pytorch_ecosystem/pytorch_geometric/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pl_examples/pytorch_ecosystem/pytorch_geometric/cora_dna.py b/pl_examples/pytorch_ecosystem/pytorch_geometric/cora_dna.py deleted file mode 100644 index 84df5e1edf31a..0000000000000 --- a/pl_examples/pytorch_ecosystem/pytorch_geometric/cora_dna.py +++ /dev/null @@ -1,370 +0,0 @@ -"""Graph Convolution Example using Pytorch Geometric - -This example illustrates how one could train a graph convolution model with DNA Conv -on Cora Dataset using pytorch-lightning. This example will also demonstrate how this -model can be easily torch-scripted, thanks to Pytorch Geometric. -""" -# python imports -import os.path as osp -from collections import namedtuple -from argparse import ArgumentParser -from typing import List, Optional, NamedTuple - -# thrid parties libraries -from torch import nn -import torch -from torch import Tensor -from torch.optim import Adam -import torch.nn.functional as F - -# Lightning imports -from pytorch_lightning import ( - Trainer, - LightningDataModule, - LightningModule -) -from pytorch_lightning.metrics import Accuracy - -try: - # Pytorch Geometric imports - from torch_geometric.nn import DNAConv, MessagePassing - from torch_geometric.datasets import Planetoid - import torch_geometric.transforms as T - from torch_geometric.data import NeighborSampler - from lightning import lightning_logo, nice_print -except Exception: - HAS_PYTORCH_GEOMETRIC = False -else: - HAS_PYTORCH_GEOMETRIC = True - - -# use to make model jittable -OptTensor = Optional[Tensor] -ListTensor = List[Tensor] - - -class TensorBatch(NamedTuple): - x: Tensor - edge_index: ListTensor - edge_attr: OptTensor - batch: OptTensor - -################################### -# LightningDataModule # -################################### - - -class CoraDataset(LightningDataModule): - - r"""The citation network datasets "Cora", "CiteSeer" and "PubMed" from the - `"Revisiting Semi-Supervised Learning with Graph Embeddings" - `_ paper. - Nodes represent documents and edges represent citation links. - Training, validation and test splits are given by binary masks. - c.f https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/datasets/planetoid.py - """ - - NAME = "cora" - - def __init__(self, - num_workers: int = 1, - batch_size: int = 8, - drop_last: bool = True, - pin_memory: bool = True, - num_layers: int = None): - super().__init__() - - assert num_layers is not None - - self._num_workers = num_workers - self._batch_size = batch_size - self._drop_last = drop_last - self._pin_memory = pin_memory - self._num_layers = num_layers - - self._transform = T.NormalizeFeatures() - - @property - def num_features(self): - return 1433 - - @property - def num_classes(self): - return 7 - - @property - def hyper_parameters(self): - # used to inform the model the dataset specifications - return {"num_features": self.num_features, "num_classes": self.num_classes} - - def prepare_data(self): - path = osp.join( - osp.dirname(osp.realpath(__file__)), "..", "..", "data", self.NAME - ) - self.dataset = Planetoid(path, self.NAME, transform=self._transform) - self.data = self.dataset[0] - - def create_neighbor_sampler(self, batch_size=2, stage=None): - # https://github.com/rusty1s/pytorch_geometric/tree/master/torch_geometric/data/sampler.py#L18 - return NeighborSampler( - self.data.edge_index, - # the nodes that should be considered for sampling. - node_idx=getattr(self.data, f"{stage}_mask"), - # -1 indicates all neighbors will be selected - sizes=[self._num_layers, -1], - num_workers=self._num_workers, - drop_last=self._drop_last, - pin_memory=self._pin_memory, - ) - - def train_dataloader(self): - return self.create_neighbor_sampler(stage="train") - - def validation_dataloader(self): - return self.create_neighbor_sampler(stage="val") - - def test_dataloader(self): - return self.create_neighbor_sampler(stage="test") - - def gather_data_and_convert_to_namedtuple(self, batch, batch_nb): - """ - This function will select features using node_idx - and create a NamedTuple Object. - """ - - usual_keys = ["x", "edge_index", "edge_attr", "batch"] - Batch: TensorBatch = namedtuple("Batch", usual_keys) - return ( - Batch( - self.data.x[batch[1]], - [e.edge_index for e in batch[2]], - None, - None, - ), - self.data.y[batch[1]], - ) - - @staticmethod - def add_argparse_args(parser): - parser.add_argument("--num_workers", type=int, default=1) - parser.add_argument("--batch_size", type=int, default=2) - parser.add_argument("--drop_last", default=True) - parser.add_argument("--pin_memory", default=True) - return parser - - -############################### -# LightningModule # -############################### - - -class DNAConvNet(LightningModule): - - r"""The dynamic neighborhood aggregation operator from the `"Just Jump: - Towards Dynamic Neighborhood Aggregation in Graph Neural Networks" - `_ paper - c.f https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/nn/conv/dna_conv.py#L172 - """ - - def __init__(self, - num_layers: int = 2, - hidden_channels: int = 128, - heads: int = 8, - groups: int = 16, - dropout: float = 0.8, - cached: bool = False, - num_features: int = None, - num_classes: int = None, - ): - super().__init__() - - assert num_features is not None - assert num_classes is not None - - # utils from Lightning to save __init__ arguments - self.save_hyperparameters() - hparams = self.hparams - - # Instantiate metrics - self.val_acc = Accuracy(hparams["num_classes"]) - self.test_acc = Accuracy(hparams["num_classes"]) - - # Define DNA graph convolution model - self.hidden_channels = hparams["hidden_channels"] - self.lin1 = nn.Linear(hparams["num_features"], hparams["hidden_channels"]) - - # Create ModuleList to hold all convolutions - self.convs = nn.ModuleList() - - # Iterate through the number of layers - for _ in range(hparams["num_layers"]): - - # Create a DNA Convolution - This graph convolution relies on MultiHead Attention mechanism - # to route information similar to Transformers. - # https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/nn/conv/dna_conv.py#L172 - self.convs.append( - DNAConv( - hparams["hidden_channels"], - hparams["heads"], - hparams["groups"], - dropout=hparams["dropout"], - cached=False, - ) - ) - # classification MLP - self.lin2 = nn.Linear(hparams["hidden_channels"], hparams["num_classes"], bias=False) - - def forward(self, batch: TensorBatch): - # batch needs to be typed for making this model jittable. - x = batch.x - x = F.relu(self.lin1(x)) - x = F.dropout(x, p=0.5, training=self.training) - x_all = x.view(-1, 1, self.hidden_channels) - - # iterate over all convolutions - for idx, conv in enumerate(self.convs): - # perform convolution using previously concatenated embedding - # through edge_index - x = F.relu(conv(x_all, batch.edge_index[idx])) - x = x.view(-1, 1, self.hidden_channels) - - # concatenate with previously computed embedding - x_all = torch.cat([x_all, x], dim=1) - - # extra latest layer embedding - x = x_all[:, -1] - - x = F.dropout(x, p=0.5, training=self.training) - - # return logits per nodes - return F.log_softmax(self.lin2(x), -1) - - def step(self, batch, batch_nb): - typed_batch, targets = self.gather_data_and_convert_to_namedtuple(batch, batch_nb) - logits = self(typed_batch) - return logits, targets - - def training_step(self, batch, batch_nb): - logits, targets = self.step(batch, batch_nb) - train_loss = F.nll_loss(logits, targets) - self.log("train_loss", train_loss, on_step=True, on_epoch=True, prog_bar=True) - return train_loss - - def validation_step(self, batch, batch_nb): - logits, targets = self.step(batch, batch_nb) - val_loss = F.nll_loss(logits, targets) - self.log("val_loss", val_loss, on_step=False, on_epoch=True, prog_bar=True) - self.log("val_acc", self.val_acc(logits, targets), on_step=False, on_epoch=True, prog_bar=True) - - def test_step(self, batch, batch_nb): - logits, targets = self.step(batch, batch_nb) - test_loss = F.nll_loss(logits, targets) - self.log("test_loss", test_loss, on_step=False, on_epoch=True, prog_bar=True) - self.log("test_acc", self.test_acc(logits, targets), on_step=False, on_epoch=True, prog_bar=True) - - # Use for jittable demonstration. - - def _convert_to_jittable(self, module): - for key, m in module._modules.items(): - if isinstance(m, MessagePassing) and m.jittable is not None: - # Pytorch Geometric MessagePassing implements a `.jittable` function - # which converts the current module into its jittable version. - module._modules[key] = m.jittable() - else: - self._convert_to_jittable(m) - return module - - def jittable(self): - for key, m in self._modules.items(): - self._modules[key] = self._convert_to_jittable(m) - - def configure_optimizers(self): - return Adam(self.parameters(), lr=1e-3) - - @staticmethod - def add_argparse_args(parser): - parser.add_argument("--num_layers", type=int, default=2) - parser.add_argument("--hidden_channels", type=int, default=128) - parser.add_argument("--heads", type=int, default=8) - parser.add_argument("--groups", type=int, default=16) - parser.add_argument("--dropout", type=float, default=0.8) - parser.add_argument("--cached", type=int, default=0) - parser.add_argument("--jit", default=True) - return parser - -################################# -# Instantiate Functions # -################################# - - -def instantiate_datamodule(args): - datamodule = CoraDataset( - num_workers=args.num_workers, - batch_size=args.batch_size, - drop_last=args.drop_last, - pin_memory=args.pin_memory, - num_layers=args.num_layers, - ) - return datamodule - - -def instantiate_model(args, datamodule): - model = DNAConvNet( - num_layers=args.num_layers, - hidden_channels=args.hidden_channels, - heads=args.heads, - groups=args.groups, - dropout=args.dropout, - # provide dataset specific arguments - **datamodule.hyper_parameters, - ) - if args.jit: - model.jittable() - - # Attached datamodule function to model - model.gather_data_and_convert_to_namedtuple = datamodule.gather_data_and_convert_to_namedtuple - return model - - -def get_single_batch(datamodule): - for batch in datamodule.test_dataloader(): - return datamodule.gather_data_and_convert_to_namedtuple(batch, 0) - -####################### -# Trainer Run # -####################### - - -def run(args): - - nice_print("You are about to train a TorchScripted Pytorch Geometric Lightning model !") - nice_print(lightning_logo) - - datamodule: LightningDataModule = instantiate_datamodule(args) - model: LightningModule = instantiate_model(args, datamodule) - trainer = Trainer.from_argparse_args(args) - trainer.fit(model, datamodule) - trainer.test() - - batch = get_single_batch(datamodule) - model.to_torchscript(file_path="model_trace.pt", - method='script', - example_inputs=batch) - - nice_print("Congratulations !") - nice_print("You trained your first TorchScripted Pytorch Geometric Lightning model !", last=True) - - -if __name__ == "__main__": - if not HAS_PYTORCH_GEOMETRIC: - print("Skip training. Pytorch Geometric isn't installed. Please, check README.md !") - - else: - parser = ArgumentParser(description="Pytorch Geometric Example") - parser = Trainer.add_argparse_args(parser) - parser = CoraDataset.add_argparse_args(parser) - parser = DNAConvNet.add_argparse_args(parser) - - cmd_line = '--max_epochs 1'.split(' ') - - run(parser.parse_args(cmd_line)) diff --git a/pl_examples/pytorch_ecosystem/pytorch_geometric/lightning.py b/pl_examples/pytorch_ecosystem/pytorch_geometric/lightning.py deleted file mode 100644 index 2c765d1449c57..0000000000000 --- a/pl_examples/pytorch_ecosystem/pytorch_geometric/lightning.py +++ /dev/null @@ -1,31 +0,0 @@ -def nice_print(msg, last=False): - print() - print("\033[0;35m" + msg + "\033[0m") - if last: - print() - - -lightning_logo = """ - #### - ########### - #################### - ############################ - ##################################### -############################################## -######################### ################### -####################### ################### -#################### #################### -################## ##################### -################ ###################### -##################### ################# -###################### ################### -##################### ##################### -#################### ####################### -################### ######################### -############################################## - ##################################### - ############################ - #################### - ########## - #### -""" diff --git a/pl_examples/pytorch_ecosystem/pytorch_geometric/pyproject.toml b/pl_examples/pytorch_ecosystem/pytorch_geometric/pyproject.toml deleted file mode 100644 index 99f516323e976..0000000000000 --- a/pl_examples/pytorch_ecosystem/pytorch_geometric/pyproject.toml +++ /dev/null @@ -1,25 +0,0 @@ -[tool.poetry] -name = "lightning-geometric" -version = "0.1.0" -description = "TorchScripted Pytorch Geometric Examples with Pytorch Lightning" -authors = ["Thomas Chaton "] - -[tool.poetry.dependencies] -python = "3.7.8" -torch = "^1.6.0" -torch-cluster = "^1.5.7" -torch-sparse = "^0.6.7" -torch-scatter = "^2.0.5" -torch-geometric = "^1.6.1" -pytorch-lightning = "^ 1.0.5" -openmesh = "^1.1.4" -torch-spline-conv = "^1.2.0" -tqdm = "^4.50.0" -pytest = "^6.1.0" - -[tool.poetry.dev-dependencies] -black = {version = "^20.8b1", allow-prereleases = true} - -[build-system] -requires = ["poetry>=0.12"] -build-backend = "poetry.masonry.api" diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index da21384190163..91145c5bd0d0b 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -1,3 +1,17 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import importlib import platform from unittest import mock diff --git a/requirements/examples.txt b/requirements/examples.txt index 6e48778cb222a..c87d10a39346f 100644 --- a/requirements/examples.txt +++ b/requirements/examples.txt @@ -1,2 +1,2 @@ torchvision>=0.4.1 -gym>=0.17.0 +gym>=0.17.0 \ No newline at end of file From 518d91542234c44617b8c44fd2109f9f478809d0 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 17 Dec 2020 11:13:48 +0100 Subject: [PATCH 18/37] add doctests for example 1/n (#5079) * define tests * fix basic * fix gans * unet * test * drop * format * fix * revert Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> --- pl_examples/basic_examples/autoencoder.py | 7 + .../backbone_image_classifier.py | 13 ++ .../basic_examples/conv_sequential_example.py | 6 + .../basic_examples/mnist_datamodule.py | 3 + .../basic_examples/simple_image_classifier.py | 7 + pl_examples/bug_report_model.py | 15 +- .../computer_vision_fine_tuning.py | 36 +++-- .../generative_adversarial_net.py | 63 ++++++-- pl_examples/domain_templates/imagenet.py | 20 ++- .../domain_templates/reinforce_learn_Qnet.py | 136 +++++++++++------- .../domain_templates/semantic_segmentation.py | 57 ++++---- pl_examples/domain_templates/unet.py | 52 +++++-- 12 files changed, 288 insertions(+), 127 deletions(-) diff --git a/pl_examples/basic_examples/autoencoder.py b/pl_examples/basic_examples/autoencoder.py index 72bfcb17c0872..91f7ac0a1569d 100644 --- a/pl_examples/basic_examples/autoencoder.py +++ b/pl_examples/basic_examples/autoencoder.py @@ -31,6 +31,13 @@ class LitAutoEncoder(pl.LightningModule): + """ + >>> LitAutoEncoder() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + LitAutoEncoder( + (encoder): ... + (decoder): ... + ) + """ def __init__(self): super().__init__() diff --git a/pl_examples/basic_examples/backbone_image_classifier.py b/pl_examples/basic_examples/backbone_image_classifier.py index b0ca2efd5d76b..bb1daad301d08 100644 --- a/pl_examples/basic_examples/backbone_image_classifier.py +++ b/pl_examples/basic_examples/backbone_image_classifier.py @@ -29,6 +29,13 @@ class Backbone(torch.nn.Module): + """ + >>> Backbone() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + Backbone( + (l1): Linear(...) + (l2): Linear(...) + ) + """ def __init__(self, hidden_dim=128): super().__init__() self.l1 = torch.nn.Linear(28 * 28, hidden_dim) @@ -42,6 +49,12 @@ def forward(self, x): class LitClassifier(pl.LightningModule): + """ + >>> LitClassifier(Backbone()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + LitClassifier( + (backbone): ... + ) + """ def __init__(self, backbone, learning_rate=1e-3): super().__init__() self.save_hyperparameters() diff --git a/pl_examples/basic_examples/conv_sequential_example.py b/pl_examples/basic_examples/conv_sequential_example.py index 1d178c32a3ce3..84efb4bea7670 100644 --- a/pl_examples/basic_examples/conv_sequential_example.py +++ b/pl_examples/basic_examples/conv_sequential_example.py @@ -55,6 +55,12 @@ def forward(self, x): class LitResnet(pl.LightningModule): + """ + >>> LitResnet() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + LitResnet( + (sequential_module): Sequential(...) + ) + """ def __init__(self, lr=0.05, batch_size=32, manual_optimization=False): super().__init__() diff --git a/pl_examples/basic_examples/mnist_datamodule.py b/pl_examples/basic_examples/mnist_datamodule.py index eb1415cf8b981..95e20d22e1fdd 100644 --- a/pl_examples/basic_examples/mnist_datamodule.py +++ b/pl_examples/basic_examples/mnist_datamodule.py @@ -29,6 +29,9 @@ class MNISTDataModule(LightningDataModule): """ Standard MNIST, train, val, test splits and transforms + + >>> MNISTDataModule() # doctest: +ELLIPSIS + <...mnist_datamodule.MNISTDataModule object at ...> """ name = "mnist" diff --git a/pl_examples/basic_examples/simple_image_classifier.py b/pl_examples/basic_examples/simple_image_classifier.py index 6b8457e0e4897..894eeea619ba9 100644 --- a/pl_examples/basic_examples/simple_image_classifier.py +++ b/pl_examples/basic_examples/simple_image_classifier.py @@ -24,6 +24,13 @@ class LitClassifier(pl.LightningModule): + """ + >>> LitClassifier() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + LitClassifier( + (l1): Linear(...) + (l2): Linear(...) + ) + """ def __init__(self, hidden_dim=128, learning_rate=1e-3): super().__init__() self.save_hyperparameters() diff --git a/pl_examples/bug_report_model.py b/pl_examples/bug_report_model.py index e2201db12f894..30345122e251f 100644 --- a/pl_examples/bug_report_model.py +++ b/pl_examples/bug_report_model.py @@ -28,6 +28,10 @@ class RandomDataset(Dataset): + """ + >>> RandomDataset(size=10, length=20) # doctest: +ELLIPSIS + <...bug_report_model.RandomDataset object at ...> + """ def __init__(self, size, length): self.len = length self.data = torch.randn(length, size) @@ -40,6 +44,12 @@ def __len__(self): class BoringModel(LightningModule): + """ + >>> BoringModel() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + BoringModel( + (layer): Linear(...) + ) + """ def __init__(self): """ @@ -113,10 +123,9 @@ def configure_optimizers(self): # parser = ArgumentParser() # args = parser.parse_args(opt) -def run_test(): +def test_run(): class TestModel(BoringModel): - def on_train_epoch_start(self) -> None: print('override any method to prove your bug') @@ -140,4 +149,4 @@ def on_train_epoch_start(self) -> None: if __name__ == '__main__': cli_lightning_logo() - run_test() + test_run() diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index 1c60e3aa6d23f..4392ac47e837f 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -159,20 +159,30 @@ def _unfreeze_and_add_param_group(module: Module, class TransferLearningModel(pl.LightningModule): """Transfer Learning with pre-trained ResNet50. - Args: - hparams: Model hyperparameters - dl_path: Path where the data will be downloaded + >>> with TemporaryDirectory(dir='.') as tmp_dir: + ... TransferLearningModel(tmp_dir) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + TransferLearningModel( + (feature_extractor): Sequential(...) + (fc): Sequential(...) + ) """ - def __init__(self, - dl_path: Union[str, Path], - backbone: str = 'resnet50', - train_bn: bool = True, - milestones: tuple = (5, 10), - batch_size: int = 8, - lr: float = 1e-2, - lr_scheduler_gamma: float = 1e-1, - num_workers: int = 6, **kwargs) -> None: - super().__init__() + def __init__( + self, + dl_path: Union[str, Path], + backbone: str = 'resnet50', + train_bn: bool = True, + milestones: tuple = (5, 10), + batch_size: int = 8, + lr: float = 1e-2, + lr_scheduler_gamma: float = 1e-1, + num_workers: int = 6, + **kwargs, + ) -> None: + """ + Args: + dl_path: Path where the data will be downloaded + """ + super().__init__(**kwargs) self.dl_path = dl_path self.backbone = backbone self.train_bn = train_bn diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py index 210a80721d9a9..b0c324c193574 100644 --- a/pl_examples/domain_templates/generative_adversarial_net.py +++ b/pl_examples/domain_templates/generative_adversarial_net.py @@ -37,7 +37,13 @@ class Generator(nn.Module): - def __init__(self, latent_dim, img_shape): + """ + >>> Generator(img_shape=(1, 8, 8)) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + Generator( + (model): Sequential(...) + ) + """ + def __init__(self, latent_dim: int = 100, img_shape: tuple = (1, 28, 28)): super().__init__() self.img_shape = img_shape @@ -64,6 +70,12 @@ def forward(self, z): class Discriminator(nn.Module): + """ + >>> Discriminator(img_shape=(1, 28, 28)) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + Discriminator( + (model): Sequential(...) + ) + """ def __init__(self, img_shape): super().__init__() @@ -83,6 +95,37 @@ def forward(self, img): class GAN(LightningModule): + """ + >>> GAN(img_shape=(1, 8, 8)) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + GAN( + (generator): Generator( + (model): Sequential(...) + ) + (discriminator): Discriminator( + (model): Sequential(...) + ) + ) + """ + def __init__( + self, + img_shape: tuple = (1, 28, 28), + lr: float = 0.0002, + b1: float = 0.5, + b2: float = 0.999, + latent_dim: int = 100, + ): + super().__init__() + + self.save_hyperparameters() + + # networks + self.generator = Generator(latent_dim=self.hparams.latent_dim, img_shape=img_shape) + self.discriminator = Discriminator(img_shape=img_shape) + + self.validation_z = torch.randn(8, self.hparams.latent_dim) + + self.example_input_array = torch.zeros(2, self.hparams.latent_dim) + @staticmethod def add_argparse_args(parent_parser: ArgumentParser): parser = ArgumentParser(parents=[parent_parser], add_help=False) @@ -96,20 +139,6 @@ def add_argparse_args(parent_parser: ArgumentParser): return parser - def __init__(self, hparams: Namespace): - super().__init__() - - self.hparams = hparams - - # networks - mnist_shape = (1, 28, 28) - self.generator = Generator(latent_dim=self.hparams.latent_dim, img_shape=mnist_shape) - self.discriminator = Discriminator(img_shape=mnist_shape) - - self.validation_z = torch.randn(8, self.hparams.latent_dim) - - self.example_input_array = torch.zeros(2, self.hparams.latent_dim) - def forward(self, z): return self.generator(z) @@ -180,6 +209,10 @@ def on_epoch_end(self): class MNISTDataModule(LightningDataModule): + """ + >>> MNISTDataModule() # doctest: +ELLIPSIS + <...generative_adversarial_net.MNISTDataModule object at ...> + """ def __init__(self, batch_size: int = 64, data_path: str = os.getcwd(), num_workers: int = 4): super().__init__() self.batch_size = batch_size diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index b1eea307478f9..cc36f3542a1c8 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -50,6 +50,12 @@ class ImageNetLightningModel(LightningModule): + """ + >>> ImageNetLightningModel(data_path='missing') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + ImageNetLightningModel( + (model): ResNet(...) + ) + """ # pull out resnet names from torchvision models MODEL_NAMES = sorted( name for name in models.__dict__ @@ -58,14 +64,14 @@ class ImageNetLightningModel(LightningModule): def __init__( self, - arch: str, - pretrained: bool, - lr: float, - momentum: float, - weight_decay: int, data_path: str, - batch_size: int, - workers: int, + arch: str = 'resnet18', + pretrained: bool = False, + lr: float = 0.1, + momentum: float = 0.9, + weight_decay: float = 1e-4, + batch_size: int = 4, + workers: int = 2, **kwargs, ): super().__init__() diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index a8b9db095f377..6aee8bb6038c1 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -53,13 +53,19 @@ class DQN(nn.Module): """ Simple MLP network - Args: - obs_size: observation/state size of the environment - n_actions: number of discrete actions available in the environment - hidden_size: size of hidden layers + >>> DQN(10, 5) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + DQN( + (net): Sequential(...) + ) """ def __init__(self, obs_size: int, n_actions: int, hidden_size: int = 128): + """ + Args: + obs_size: observation/state size of the environment + n_actions: number of discrete actions available in the environment + hidden_size: size of hidden layers + """ super(DQN, self).__init__() self.net = nn.Sequential( nn.Linear(obs_size, hidden_size), @@ -81,11 +87,15 @@ class ReplayBuffer: """ Replay Buffer for storing past experiences allowing the agent to learn from them - Args: - capacity: size of the buffer + >>> ReplayBuffer(5) # doctest: +ELLIPSIS + <...reinforce_learn_Qnet.ReplayBuffer object at ...> """ def __init__(self, capacity: int) -> None: + """ + Args: + capacity: size of the buffer + """ self.buffer = deque(maxlen=capacity) def __len__(self) -> int: @@ -113,12 +123,16 @@ class RLDataset(IterableDataset): Iterable Dataset containing the ExperienceBuffer which will be updated with new experiences during training - Args: - buffer: replay buffer - sample_size: number of experiences to sample at a time + >>> RLDataset(ReplayBuffer(5)) # doctest: +ELLIPSIS + <...reinforce_learn_Qnet.RLDataset object at ...> """ def __init__(self, buffer: ReplayBuffer, sample_size: int = 200) -> None: + """ + Args: + buffer: replay buffer + sample_size: number of experiences to sample at a time + """ self.buffer = buffer self.sample_size = sample_size @@ -132,12 +146,18 @@ class Agent: """ Base Agent class handling the interaction with the environment - Args: - env: training environment - replay_buffer: replay buffer storing experiences + >>> env = gym.make("CartPole-v0") + >>> buffer = ReplayBuffer(10) + >>> Agent(env, buffer) # doctest: +ELLIPSIS + <...reinforce_learn_Qnet.Agent object at ...> """ def __init__(self, env: gym.Env, replay_buffer: ReplayBuffer) -> None: + """ + Args: + env: training environment + replay_buffer: replay buffer storing experiences + """ self.env = env self.replay_buffer = replay_buffer self.reset() @@ -204,20 +224,34 @@ def play_step(self, net: nn.Module, epsilon: float = 0.0, device: str = 'cpu') - class DQNLightning(pl.LightningModule): - """ Basic DQN Model """ - - def __init__(self, - replay_size, - warm_start_steps: int, - gamma: float, - eps_start: int, - eps_end: int, - eps_last_frame: int, - sync_rate, - lr: float, - episode_length, - batch_size, **kwargs) -> None: - super().__init__() + """ Basic DQN Model + + >>> DQNLightning(env="CartPole-v0") # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + DQNLightning( + (net): DQN( + (net): Sequential(...) + ) + (target_net): DQN( + (net): Sequential(...) + ) + ) + """ + def __init__( + self, + env: str, + replay_size: int = 200, + warm_start_steps: int = 200, + gamma: float = 0.99, + eps_start: float = 1.0, + eps_end: float = 0.01, + eps_last_frame: int = 200, + sync_rate: int = 10, + lr: float = 1e-2, + episode_length: int = 50, + batch_size: int = 4, + **kwargs, + ) -> None: + super().__init__(**kwargs) self.replay_size = replay_size self.warm_start_steps = warm_start_steps self.gamma = gamma @@ -229,7 +263,7 @@ def __init__(self, self.episode_length = episode_length self.batch_size = batch_size - self.env = gym.make(self.env) + self.env = gym.make(env) obs_size = self.env.observation_space.shape[0] n_actions = self.env.action_space.n @@ -302,8 +336,7 @@ def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], nb_batch) -> O Training loss and log metrics """ device = self.get_device(batch) - epsilon = max(self.eps_end, self.eps_start - - self.global_step + 1 / self.eps_last_frame) + epsilon = max(self.eps_end, self.eps_start - self.global_step + 1 / self.eps_last_frame) # step through environment with agent reward, done = self.agent.play_step(self.net, epsilon, device) @@ -349,6 +382,30 @@ def get_device(self, batch) -> str: """Retrieve device currently being used by minibatch""" return batch[0].device.index if self.on_gpu else 'cpu' + @staticmethod + def add_model_specific_args(parent_parser): # pragma: no-cover + parser = argparse.ArgumentParser(parents=[parent_parser]) + parser.add_argument("--batch_size", type=int, default=16, help="size of the batches") + parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") + parser.add_argument("--env", type=str, default="CartPole-v0", help="gym environment tag") + parser.add_argument("--gamma", type=float, default=0.99, help="discount factor") + parser.add_argument("--sync_rate", type=int, default=10, + help="how many frames do we update the target network") + parser.add_argument("--replay_size", type=int, default=1000, + help="capacity of the replay buffer") + parser.add_argument("--warm_start_size", type=int, default=1000, + help="how many samples do we use to fill our buffer at the start of training") + parser.add_argument("--eps_last_frame", type=int, default=1000, + help="what frame should epsilon stop decaying") + parser.add_argument("--eps_start", type=float, default=1.0, help="starting value of epsilon") + parser.add_argument("--eps_end", type=float, default=0.01, help="final value of epsilon") + parser.add_argument("--episode_length", type=int, default=200, help="max length of an episode") + parser.add_argument("--max_episode_reward", type=int, default=200, + help="max episode reward in the environment") + parser.add_argument("--warm_start_steps", type=int, default=1000, + help="max episode reward in the environment") + return parser + def main(args) -> None: model = DQNLightning(**vars(args)) @@ -368,26 +425,7 @@ def main(args) -> None: np.random.seed(0) parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int, default=16, help="size of the batches") - parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") - parser.add_argument("--env", type=str, default="CartPole-v0", help="gym environment tag") - parser.add_argument("--gamma", type=float, default=0.99, help="discount factor") - parser.add_argument("--sync_rate", type=int, default=10, - help="how many frames do we update the target network") - parser.add_argument("--replay_size", type=int, default=1000, - help="capacity of the replay buffer") - parser.add_argument("--warm_start_size", type=int, default=1000, - help="how many samples do we use to fill our buffer at the start of training") - parser.add_argument("--eps_last_frame", type=int, default=1000, - help="what frame should epsilon stop decaying") - parser.add_argument("--eps_start", type=float, default=1.0, help="starting value of epsilon") - parser.add_argument("--eps_end", type=float, default=0.01, help="final value of epsilon") - parser.add_argument("--episode_length", type=int, default=200, help="max length of an episode") - parser.add_argument("--max_episode_reward", type=int, default=200, - help="max episode reward in the environment") - parser.add_argument("--warm_start_steps", type=int, default=1000, - help="max episode reward in the environment") - + parser = DQNLightning.add_model_specific_args(parser) args = parser.parse_args() main(args) diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index 08bdc1140916a..7bcad597a9a68 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -142,15 +142,17 @@ class SegModel(pl.LightningModule): Adam optimizer is used along with Cosine Annealing learning rate scheduler. """ - - def __init__(self, - data_path: str, - batch_size: int, - lr: float, - num_layers: int, - features_start: int, - bilinear: bool, **kwargs): - super().__init__() + def __init__( + self, + data_path: str, + batch_size: int = 4, + lr: float = 1e-3, + num_layers: int = 3, + features_start: int = 64, + bilinear: bool = False, + **kwargs, + ): + super().__init__(**kwargs) self.data_path = data_path self.batch_size = batch_size self.lr = lr @@ -204,6 +206,18 @@ def train_dataloader(self): def val_dataloader(self): return DataLoader(self.validset, batch_size=self.batch_size, shuffle=False) + @staticmethod + def add_model_specific_args(parent_parser): # pragma: no-cover + parser = ArgumentParser(parents=[parent_parser]) + parser.add_argument("--data_path", type=str, help="path where dataset is stored") + parser.add_argument("--batch_size", type=int, default=16, help="size of the batches") + parser.add_argument("--lr", type=float, default=0.001, help="adam: learning rate") + parser.add_argument("--num_layers", type=int, default=5, help="number of layers on u-net") + parser.add_argument("--features_start", type=float, default=64, help="number of features in first layer") + parser.add_argument("--bilinear", action='store_true', default=False, + help="whether to use bilinear interpolation or transposed") + return parser + def main(hparams: Namespace): # ------------------------ @@ -224,14 +238,7 @@ def main(hparams: Namespace): # ------------------------ # 3 INIT TRAINER # ------------------------ - trainer = pl.Trainer( - gpus=hparams.gpus, - logger=logger, - max_epochs=hparams.epochs, - accumulate_grad_batches=hparams.grad_batches, - accelerator=hparams.accelerator, - precision=16 if hparams.use_amp else 32, - ) + trainer = pl.Trainer.from_argparse_args(hparams) # ------------------------ # 5 START TRAINING @@ -242,21 +249,7 @@ def main(hparams: Namespace): if __name__ == '__main__': cli_lightning_logo() parser = ArgumentParser() - parser.add_argument("--data_path", type=str, help="path where dataset is stored") - parser.add_argument("--gpus", type=int, default=-1, help="number of available GPUs") - parser.add_argument('--distributed-backend', type=str, default='dp', choices=('dp', 'ddp', 'ddp2'), - help='supports three options dp, ddp, ddp2') - parser.add_argument('--use_amp', action='store_true', help='if true uses 16 bit precision') - parser.add_argument("--batch_size", type=int, default=4, help="size of the batches") - parser.add_argument("--lr", type=float, default=0.001, help="adam: learning rate") - parser.add_argument("--num_layers", type=int, default=5, help="number of layers on u-net") - parser.add_argument("--features_start", type=float, default=64, help="number of features in first layer") - parser.add_argument("--bilinear", action='store_true', default=False, - help="whether to use bilinear interpolation or transposed") - parser.add_argument("--grad_batches", type=int, default=1, help="number of batches to accumulate") - parser.add_argument("--epochs", type=int, default=20, help="number of epochs to train") - parser.add_argument("--log_wandb", action='store_true', help="log training on Weights & Biases") - + parser = SegModel.add_model_specific_args(parser) hparams = parser.parse_args() main(hparams) diff --git a/pl_examples/domain_templates/unet.py b/pl_examples/domain_templates/unet.py index 20b4bdb2a4bf9..2314e19ddbfc9 100644 --- a/pl_examples/domain_templates/unet.py +++ b/pl_examples/domain_templates/unet.py @@ -22,20 +22,33 @@ class UNet(nn.Module): Architecture based on U-Net: Convolutional Networks for Biomedical Image Segmentation Link - https://arxiv.org/abs/1505.04597 - Parameters: - num_classes: Number of output classes required (default 19 for KITTI dataset) - num_layers: Number of layers in each side of U-net - features_start: Number of features in first layer - bilinear: Whether to use bilinear interpolation or transposed - convolutions for upsampling. + >>> UNet(num_classes=2, num_layers=3) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + UNet( + (layers): ModuleList( + (0): DoubleConv(...) + (1): Down(...) + (2): Down(...) + (3): Up(...) + (4): Up(...) + (5): Conv2d(64, 2, kernel_size=(1, 1), stride=(1, 1)) + ) + ) """ def __init__( - self, num_classes: int = 19, + self, + num_classes: int = 19, num_layers: int = 5, features_start: int = 64, - bilinear: bool = False + bilinear: bool = False, ): + """ + Args: + num_classes: Number of output classes required (default 19 for KITTI dataset) + num_layers: Number of layers in each side of U-net + features_start: Number of features in first layer + bilinear: Whether to use bilinear interpolation or transposed convolutions for upsampling. + """ super().__init__() self.num_layers = num_layers @@ -69,6 +82,11 @@ class DoubleConv(nn.Module): """ Double Convolution and BN and ReLU (3x3 conv -> BN -> ReLU) ** 2 + + >>> DoubleConv(4, 4) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + DoubleConv( + (net): Sequential(...) + ) """ def __init__(self, in_ch: int, out_ch: int): @@ -89,6 +107,16 @@ def forward(self, x): class Down(nn.Module): """ Combination of MaxPool2d and DoubleConv in series + + >>> Down(4, 8) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + Down( + (net): Sequential( + (0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) + (1): DoubleConv( + (net): Sequential(...) + ) + ) + ) """ def __init__(self, in_ch: int, out_ch: int): @@ -107,6 +135,14 @@ class Up(nn.Module): Upsampling (by either bilinear interpolation or transpose convolutions) followed by concatenation of feature map from contracting path, followed by double 3x3 convolution. + + >>> Up(8, 4) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + Up( + (upsample): ConvTranspose2d(8, 4, kernel_size=(2, 2), stride=(2, 2)) + (conv): DoubleConv( + (net): Sequential(...) + ) + ) """ def __init__(self, in_ch: int, out_ch: int, bilinear: bool = False): From 3c5dad71003bed5e97c02f0dd7a466fc6955fbb2 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 17 Dec 2020 12:03:45 +0100 Subject: [PATCH 19/37] Document speed comparison (#2072) * docs * script * dump * desc * import * import * if * norm * t * finished * isort * typing Co-authored-by: Nicki Skafte * xlabel * pandas * time Co-authored-by: Nicki Skafte --- benchmarks/__init__.py | 17 +++++ benchmarks/generate_comparison.py | 60 +++++++++++++++++ .../{test_parity.py => test_basic_parity.py} | 62 ++++++++++++------ benchmarks/test_sharded_parity.py | 14 ++++ .../benchmarks/figure-parity-times.png | Bin 0 -> 31513 bytes docs/source/benchmarking.rst | 14 ++++ docs/source/index.rst | 1 + requirements/test.txt | 1 + tests/base/datasets.py | 9 ++- 9 files changed, 155 insertions(+), 23 deletions(-) create mode 100644 benchmarks/generate_comparison.py rename benchmarks/{test_parity.py => test_basic_parity.py} (61%) create mode 100644 docs/source/_images/benchmarks/figure-parity-times.png create mode 100644 docs/source/benchmarking.rst diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py index e69de29bb2d1d..734288b07235d 100644 --- a/benchmarks/__init__.py +++ b/benchmarks/__init__.py @@ -0,0 +1,17 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +BENCHMARK_ROOT = os.path.dirname(__file__) +PROJECT_ROOT = os.path.dirname(BENCHMARK_ROOT) diff --git a/benchmarks/generate_comparison.py b/benchmarks/generate_comparison.py new file mode 100644 index 0000000000000..69eb47cb7e759 --- /dev/null +++ b/benchmarks/generate_comparison.py @@ -0,0 +1,60 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import matplotlib.pylab as plt +import pandas as pd + +from benchmarks.test_basic_parity import lightning_loop, vanilla_loop +from tests.base.models import ParityModuleMNIST, ParityModuleRNN + +NUM_EPOCHS = 20 +NUM_RUNS = 50 +MODEL_CLASSES = (ParityModuleRNN, ParityModuleMNIST) +PATH_HERE = os.path.dirname(__file__) +FIGURE_EXTENSION = '.png' + + +def _main(): + fig, axarr = plt.subplots(nrows=len(MODEL_CLASSES)) + + for i, cls_model in enumerate(MODEL_CLASSES): + path_csv = os.path.join(PATH_HERE, f'dump-times_{cls_model.__name__}.csv') + if os.path.isfile(path_csv): + df_time = pd.read_csv(path_csv, index_col=0) + else: + vanilla = vanilla_loop(cls_model, num_epochs=NUM_EPOCHS, num_runs=NUM_RUNS) + lightning = lightning_loop(cls_model, num_epochs=NUM_EPOCHS, num_runs=NUM_RUNS) + + df_time = pd.DataFrame({'vanilla PT': vanilla['durations'][1:], 'PT Lightning': lightning['durations'][1:]}) + df_time /= NUM_RUNS + df_time.to_csv(os.path.join(PATH_HERE, f'dump-times_{cls_model.__name__}.csv')) + # todo: add also relative X-axis ticks to see both: relative and absolute time differences + df_time.plot.hist( + ax=axarr[i], + bins=20, + alpha=0.5, + title=cls_model.__name__, + legend=True, + grid=True, + ) + axarr[i].set(xlabel='time [seconds]') + + path_fig = os.path.join(PATH_HERE, f'figure-parity-times{FIGURE_EXTENSION}') + fig.tight_layout() + fig.savefig(path_fig) + + +if __name__ == '__main__': + _main() diff --git a/benchmarks/test_parity.py b/benchmarks/test_basic_parity.py similarity index 61% rename from benchmarks/test_parity.py rename to benchmarks/test_basic_parity.py index 3508d5a3c28ac..c85984b092b9d 100644 --- a/benchmarks/test_parity.py +++ b/benchmarks/test_basic_parity.py @@ -1,8 +1,23 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import time import numpy as np import pytest import torch +from tqdm import tqdm from pytorch_lightning import seed_everything, Trainer import tests.base.develop_utils as tutils @@ -15,34 +30,33 @@ (ParityModuleMNIST, 0.25), # todo: lower this thr ]) @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") -def test_pytorch_parity(tmpdir, cls_model, max_diff): +def test_pytorch_parity(tmpdir, cls_model, max_diff: float, num_epochs: int = 4, num_runs: int = 3): """ Verify that the same pytorch and lightning models achieve the same results """ - num_epochs = 4 - num_rums = 3 - lightning_outs, pl_times = lightning_loop(cls_model, num_rums, num_epochs) - manual_outs, pt_times = vanilla_loop(cls_model, num_rums, num_epochs) + lightning = lightning_loop(cls_model, num_runs, num_epochs) + vanilla = vanilla_loop(cls_model, num_runs, num_epochs) # make sure the losses match exactly to 5 decimal places - for pl_out, pt_out in zip(lightning_outs, manual_outs): + for pl_out, pt_out in zip(lightning['losses'], vanilla['losses']): np.testing.assert_almost_equal(pl_out, pt_out, 5) # the fist run initialize dataset (download & filter) - tutils.assert_speed_parity_absolute(pl_times[1:], pt_times[1:], - nb_epochs=num_epochs, max_diff=max_diff) + tutils.assert_speed_parity_absolute( + lightning['durations'][1:], vanilla['durations'][1:], nb_epochs=num_epochs, max_diff=max_diff + ) def vanilla_loop(cls_model, num_runs=10, num_epochs=10): """ Returns an array with the last loss from each epoch for each run """ - device = torch.device('cuda' if torch.cuda.is_available() else "cpu") - errors = [] - times = [] + hist_losses = [] + hist_durations = [] + device = torch.device('cuda' if torch.cuda.is_available() else "cpu") torch.backends.cudnn.deterministic = True - for i in range(num_runs): + for i in tqdm(range(num_runs), desc=f'Vanilla PT with {cls_model.__name__}'): time_start = time.perf_counter() # set seed @@ -74,18 +88,21 @@ def vanilla_loop(cls_model, num_runs=10, num_epochs=10): epoch_losses.append(loss.item()) time_end = time.perf_counter() - times.append(time_end - time_start) + hist_durations.append(time_end - time_start) - errors.append(epoch_losses[-1]) + hist_losses.append(epoch_losses[-1]) - return errors, times + return { + 'losses': hist_losses, + 'durations': hist_durations, + } def lightning_loop(cls_model, num_runs=10, num_epochs=10): - errors = [] - times = [] + hist_losses = [] + hist_durations = [] - for i in range(num_runs): + for i in tqdm(range(num_runs), desc=f'PT Lightning with {cls_model.__name__}'): time_start = time.perf_counter() # set seed @@ -108,9 +125,12 @@ def lightning_loop(cls_model, num_runs=10, num_epochs=10): trainer.fit(model) final_loss = trainer.train_loop.running_loss.last().item() - errors.append(final_loss) + hist_losses.append(final_loss) time_end = time.perf_counter() - times.append(time_end - time_start) + hist_durations.append(time_end - time_start) - return errors, times + return { + 'losses': hist_losses, + 'durations': hist_durations, + } diff --git a/benchmarks/test_sharded_parity.py b/benchmarks/test_sharded_parity.py index 5d3c73347052c..c3273b6956698 100644 --- a/benchmarks/test_sharded_parity.py +++ b/benchmarks/test_sharded_parity.py @@ -1,3 +1,17 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import platform import time diff --git a/docs/source/_images/benchmarks/figure-parity-times.png b/docs/source/_images/benchmarks/figure-parity-times.png new file mode 100644 index 0000000000000000000000000000000000000000..2e8c5899020d999bdca6f066cd10ef2262f22f2c GIT binary patch literal 31513 zcmd432{hMh`#$YQ=$|iMW_(U7$IXBlSmn( z&?L${p6k(m-~aob^Z%W-&RS=kb@p0&?=pP8pJ%%7>%Ok*eu9o@s;y$+VxUkctJDvu zYEvlG-V_Q|Fa2_SMW(s)GyW&%vd_Tfn7yTotErO(<*=!XgRQ-bt+m<43l>h!*7oP5 zM5RO}MK+#sadB{#7ZbDl_Ya8LJ6VZ|1+y^XAuAjX9CxNrR-2N4sU9k&T2m;O{MA+W z>bS-I?7C=lQ+HNrq^Tim<<`KhY5do{W7iH3lw3(vOgU1RT5vm-F~Tz0+wuWhip9*> zE(SwU>nGtS?q4mmFigA^e6Kfh>u~VamG#r}pTehiFl}8qDNgg}VySWQ*+!3H=O=}n zHFeJRk<7~8f%vE1-!GeHBR@ZXVsI7pb$sDfeHKL!f0vXJp#+g1Sjo5tf902|_#y$C%b|(UYT+AB{+L*;GGNr6zNs@`{RFPyHP` zc8p9;)}$YQHZnD89>1_}zGSM8G2U)p>7;qhHS3ST!N>IV^*;{`eEZS=^JkdX{CAVz zCENHPGh@^b4W3ywRlD|h1Z0*jaM^X_RYh((^d{x-9WzEMk6o9>=y^THA_m6qs{}Bw zS##_OH8bVr!A(1C+r8)J+~e(R+S0FRKN5LAl~P?@y}ZX`lBZ;LSfR42>SagAdgp=q zmk*C79qb%@)}$!h@=HQNVf~?7A`t>c{E{q7AHDnXMCF_NDiW8*&R`Bs)!y9p>eZ{V zTiZ{JZ8(;pXQP*b&l1_b{R@M>fS};A$zMG+jqwV(A6N6p`-X^4w=YrpS(Xj4X8N{wq^+-FHqF1jO-{a>q<&-b)Q@LPBSXysaw=Q+ z185!GSciLa$^_n|s#j74E?%1cAZkWpu zwtlC7SlFryKR+ga&~srr<>DgUefigJig$Q;_`nehOUupj=fC-um6e&9pM7(W>eATI z{Z_qHO&*sY?JQ~fnQdoHa?i3EU;4Uf*|KG8720OTzc&|ObZ-CnF%ggCly}*>urO;M z?|EmZ{r)|B$W&8PQ)gMXY>roa{rWYw68>xWGbSbmf5bmk)gxLxUW+d_Z`iVB%Svup z=G$T>p2MDzg7-aUCzzFc%l#PTJ34Z00-l^KGFzCNP8J=)toUI*oY}5FWH2S8cUqXc zNZFHNn49JBNm<(N<65#Gwj{VyA3AiXc(i(bdx=L;qvz$x;i`9!j!B#-VrL_#SX6EJ+mi3&rodqmEJM!Pl3(n&~{kiXr zeFW75!@~BJdd`cRRnlkt8Tc&jI&8FZ{f?EMo}PgrAr&{b9m}$6+JH&U)(LneB_%~g zk+EqLxp3LV#U+N}ITMfk&B8N+{A;!Z1}^X^DjHLmi(G%P?>yH@CXC(Y{S!7UIcr{C zUSDmLF#pb-`ZfvJ-Ym?_8=qhJ`M8y7kAZL|6nE~GwYbr4kzues1so|nuzRb$DJtZe`^d+UF z_UY){q-;FbS#Uq7tEy@rwnHG!w{`W$Ji9xTjaa2*Yn!(Zf*f33Imr3XnVZG?>^weWA1 z7y7GLW!v{C;uET>tn)5kycpE|;xZ%uWA_5;1FBUF3=CJFJ(JqCd$)u`?;f2@W1c%Z z?3Cl>&!4ifN&D(L*vQjaZ#HGTv6t) zT}3;&cGx~V-dbNk$D7jBDH#RmpT)5z$nJLCEAkh5eq=M~-r(a_7ZwFnwR-J3}{-B5GshVp% z&B=#2o=~&u{*7269Af*;%=qyPhnX>JqXLJ$&Lv%Wb{!+N!kK#>>6E{Iy#kLlczSxp za92^P)V1(%twQs4J8U^cPd@jZpPi(kT=>-;hTY%au(L}h3@O7>78j#^wXSOn4cZ} zmX?`_)G&n;+dDkWBrGh9P4wN!jjFfM?|}8#P&3VwCr_ppW`8wjo2$7m&QCfxJF6Ty zawKTBc;;JLR(AHwkxvy7w{HjFVYd~oy%nEG&a6PHi=CL7nsN%8O|OVz@T2yjTfJXdnHrN9 z&~Rs`d404*75jmGwsv;SnI=l)&4)fb+j92(lascki52CK9zBW`jt_Z|l5#eWV}9rM z?T4*G-@Vhxc)yRsWS7Uw{mnvs&)#L4@G8^eGs=2;77OPS28M^PJvx@kzj^a=gaBdV zi(jHxXD@tvw*sTAF249j97#|+jF*QeV;fh{!{2>;r}%}|_$gDVN9O1~!%5G+_-DW= z?1YP`iMoDa$@IXT0_Q=d>ag_zadB~GnF}63uEUjBYU#;UF0QU>H#Q&6GOJ?9Q0c;Y%X-Y@v_%^m8z0%acIwM*lbnyGc=%v7 zH#_@sKjkaY(VPu(;ro1<3(kLUbFOh4jCVa{Wfh>MG(T485yJG{bM|L`(@(}T9AQX7) zRLGU)hy?P3zt~be=Vzv-`oks6t5=@3v`jQ9^~}o8S3?TlVfWED+q{NfQu23<`Mbxu zi~{>Z%Xap-`$$nDbdEK%y% zE7`UmB#nwLtU8vWL86ejefKV3fB#!Hoq#50jd60aj(y7jy(EzTItv_C(weeND=57& z99}~LyLNFa&5h9f{_{t|W5$K-?)_)ZY-Qq>y?6ec(SZZ>WR}Xym9bB0*6%#0>hA8I zJ$TS_yj6dwBVX*6=*i%4#TokXZ><%lPC@#*7?Dw{iV*?9I)T$;bLH-*PBcD`kyPmVDtEZLi3f75O6=urAIdQ27n%&;nBgVBwQjClL`S!Wc+SWvm)C)_Sw;!;>iew_xX zd4JdQ^I<&x?9W6fKY#wD+BNMbD=W(>$JKx2k#4gX727Da0N0D{SF)PE{Jv z^s0|vh*p??q$6b8UAzv7tnD37N~H*6Sz<*A6BCo=Ny^6$A6~wEdBy8tUEu%em)O2A zd8^+&XK6_HVDz);_w6V0d|$lSZDeHh2El{`jq!_fQxcmsqP-1s&)&Z@{>|-Y?t6#% znezhsG%zNe!e}F^0%S~$XmKhMD_Ga8xv%}`4c>B>`_z#Zt8ecfP4z4;1mFmm_mp@P zTpAYwj0)wHVPRo8`o+{Kz}pqTa$KH|j|xFxYJReZ#9xv|>uznY_|}@%yaZL<-FG3+ z`I>mmvt<_l4wfh<^Zi&_;~edX_2h6D4MkQ?j*8;BIPdyUM`#%>t@VtwZ96mBRG%sW z!dL~!r&*Xe341R-&`XoBs9l?5^PY){ii+Y5DD?8)Wu6AOs*0313KGCuNB`PWjIa^3TZFm?}oyFtbA(QIVdBX&`ytqO$XA_|n5mB?r%W z#eMSm#ymQwxY;}5@6Pz}w5r|o=Bv*Pd-#@hUmWDI{g^|b^~mUG^T%!ga%?Korn|ez z@=_>B8U-$2SwH4jANvuF1cdb0Y@Pq~spRZxY3F^pCi7H8i*yn<-NhvTcMsp;V0P^R#!*ayLDgy9s{ zGErL`zx+CK6U7@4+9tQAriKI-O4g~D)PSPBz{FUQ4X7@z*q2PFTou?vHb9)l6UtiU zUHPaZEBmTKvfW1e7;qp110SB1n4YDuV%Z~#4VL~NvyR=HoI*=~+KuGPF{BhwRXuI_ z|A%iMGN;>AzpR`ZGhBiRUyeQqYxv06IZ+f}e8p z0s;b5A_X%Nu3u;FUv`L`$rUk1R8xp&%a$)U8||ywG=YL+ub$qm0jq{s88%KtE*w2e zCypy)KR&EMLF#VFyMFz8AYt7jWQJ6ALtbZ4ZtZt+k|c59-d>vG`H2qX)LYpqm~_gE+u!f=XRxW?XSL!UY$V625!0<}gUu%Y>nu@(q8Nmt zYI`N_BqmPI7f*@>l`{k-Np71TKijf_>+|5Cd6_pAr_9;4hu42+$tk@3fddEj%uf9L^y^o6gn(KfaI^2TXU~jUtc?ntX!Fl?T`elQ9OPkZYkSJv zJgMcJg@wfg@6h0vJqN0I^=>;a)CMQ;&Lb|F50B}8>s0?1OkX|X{3_tHT^!VAt$3Vfa>}g3kEP={(>sp4mT^Ct1OJ;=T=I15OzS}Bo z-_1Qc`Ah7AUpRTT%j*mDvZA6)qobn-4j1)2Rz`Mqt+N`uyu2V($~rn` zWBOf&JJ&VIZy!xf@Qf5bP4?_F!4p&kr(d&%a?7!Tl-hJRe6X+Yszu$c*D{;1FIc|r z&7&6IzWryRM|wtvg!`03VT$x+M%JdUFERxgKKX3XpXQoTUX2GyZhsn2!^z7Vj`{nZ zlW9`AL0B*Sx(yfk5TPv}7|Pn)4cape6^B`q5Ryabt^C zb=eh~FaQ3zH1EOs7$3kLZ2wiaZfW`JrnPdS3M1$Sug&+A&AY9wZR%41ueenscbu#v zHHv??ugSN2zkJ#Bel0oof;4!4{)L4FP;0j@8;~JI2H|+1T4iNrC3M4SD=QVSRCYZ@ zu9-&1$iy9$d!{a1P!aY8ltp#v;=KJXw-E}( z0kG@y=g+U?R*@k@`Pd43bBGzMB94)owYYc}InLeP8~zM5kfZ~QD)e55yr>S-(~PCj zZa2&+Lor0^e(Tk%R#n~IeffVa8Cw}(LqEv5th~Iovy&4O)nKf)zn5Nmf+n7Tl5s6N z`?nku9OsFN38%v<_Kw!eGq8|&n(0Ky`S=^4f4;OBx}s;K0-xcFxbt$7A>W!(f*oGeSO;B z96hJMaEyIzq1&)w1Bf+{Z*3n%eapa4kmC5ni4(w3e5e(l@fC_@n6M}ja=D)ggH|BK5H;!uxZ=g8Yx zTU)!K_!|Xr`C76DOb3O6YTxiD{u66g^3oX0KoW?<947eA}a{G{I6e&*3qvZ*n)rxyw?gr>8%WS;o>kPW(8oqB*XSot?du zh8BSg@zPelfuBDKRB(2OH`R#;FI@N`YxQor4tz*+JD-l>UmPE$=(=hZ7aiqP4O2xE zpMdA^;|)IS-eor!Rh}5X5UMjcckUd;8%eLCtLtP7%EI%~)^8tF&Rf<;UkMDPZ}^Uw zS<%w6rmM&`z{kf2BdO?n;PPTJN|mpeH$g4ssiNk^Nj#=NO|f5!Ryp=517uqh}H zvViX{$VM^zYlvM7vK4f#e0|d%8WbhhnPZQ1+Mim4WLIpBu%+$AMCTea@?DRL@*^xd zQ1cB7Cd6Q%2m&Kxul@o~m38_|-;Ju9b@Ewu_LZ=(FwLTtg=X zsK40>*#Exqi%Zdx77U6DQ|43C)7Ix``agZTFQr7zH=9--^^uHQgC-i_wp747NoR&I zbusZOawBF%@37-8fBEmzSVdROHJhDv>m7Ar<({T=V3YTKx^L&2Mk&Ta+;i0o*Uul4z(+#qz!Rjs> z`8@CjRsEg-CY}+nsuzD45Q!W? zYQ|5X^d{Bkf5aM2F0Nn}f&Hhz85THwVR&LxcolW`sdo>L(lavN_;*Tcq(b)JhBQv1 zO#{vK?c2AL?Cd`A!RkLAb_PTXfcf$SkBN1S2}-Le4^2uHAM2)V{ &qT+ZB)>2Q8 z9faNylW}6_)a+~$GR3RfT53utmrQWJHwuQo+3Q%Ure9PP z8zS^F1qFq}v))uRSvK!Q3scBXBsxX@5>k8sK? z=NIC|vVv4U^`$l{TFQ#adrQ+C5^OXA-*cC*{D|#3S3rP7L_|>K8r~gDW#H>Y8CQ2( z>`sI8%P(eU3!PnDeDB=3qh6`3YO&=6m8{o-J4w`JM|yfHB0T$}dS=YG@2SQjk*qEh z$V`pT$G$Wzng6IL)OS-iq0b)LX3ef8U=Ewl@N>xq752csaW&wl$TG_Q8u=CD@!{& zyUzz+t!0}}+LNrEO?juLo&ITXNU~zY`AG7nIwgUO^mK>cpVj63`T5sWI2S*Zoik$A zJ@=GHjfg*>mp1wE1HtnrzQM^k{ z?Lw&m%<5;DQH=yl(k4-I0RQ(MI1rd(Vqm}t*hFH>;XAU>?OATXmoY zw8(`E7Yyx+C2t4b-3KHUvg;%>D~s;Uo!xFUzl#4b0g{qKOVQe(R*0hrEsBP6{l*O+ z1Y^_T&O$<(LtSc4)#3${l!&j!W)qpVK1Pa;029x-Q8iLp4uPNKl0I`}px~gw;2$W1 z${Bh|0B!XW*Vje%>q(VU?@>AB#l3O&xa!7k%M@+lNrroZ>hgoXN8*(DCH+BHCR#P! z1?!NT^0S85OBKmTCj7z5l`AU?9Q&D>nJG$2O5`&E*=tDNs>VcdREP7fHZd{LNKoVf z9%zgErmdG8|L6GIw{Mw&?tx~?y1R`tWdj3)Z(gQ7LVsms#G$>tokH258nH6`H%pJz+wQwb2$_D!;6>p{E zu>>>A{INpDvZ*OIxxTG8PzKw?@3FJqHv1@4RY!eW)IoqiQ|MTvmc;u==Rc(OhDgB= z{^tJu`;@!sjZlfXq&IsE=454gLwrK?q}F?+l6?Zs1l!|Lc8fkVfIEU2QP53OFkdTZ3d9P5_l zfLcw(k)mQ^t1n)>82cXDIo-%-z5)Lkj$A>8RgzDPg8QV#Wxs5k{8ezK_;CqWlJ=vG zzTSozZ9;mruF0iu=JInYhK_ALkKz#w70zoFijUmtDaepSu3wVLZQfVoqZ}3&JQ^;y zs?NOo?v=-u%Q3(B%=orNV_&yxvscJ(z12^k>lVh>N4zC zf*psh2MHx7MqNOtGB|N!y#lwS1Q2I|`?Ms)?&^-W%uyS6?&QGCSHHMCsWCj)bx@z8 z_!FoG8mcl%aYz-qwh8}>zwnt?X5W&${|R~gf3qK1+8d}Bwy3G8A@d9De)0Kg zU3K+-=R2`pPk~0;O?sn}k%toBRz4O}nZLGbdRl_k&(9AP3wCJiiHA4!oB3)J9UqNp zY?dGuo6d8UGE^-u@9p(*_};c%RzV>Nk#}nR zy~(E^KWu-zS;8-Vm}jg&TRR^q^>xoi4KptRu&CAl8*hzJD%RT^z7LC!h`k`}KWrT3 zbh71t%q7&dZ=>qz)9TrmehIChvM}BtxPJY;9IakI@`ENg6Jx^m@;tou|}o8sbh zlUMK8uPXo_p>6Stiax0q4p-H=2uD<+P!r-JP7~XX9w=LcAQ0TL#T=#T)I^6pM8S*} zF{q(H(>1SeY-vu@7h8!)1-Ry)n!1IEwG_fhfn(JhEr499lcu*y?Hc#Xx#>R=Nb4ZK z6s1yNV`2tCJfN`i}vevNPKb#M?LXp9#&Q4W0dd4yS9 z_aa!R*YbD4LeZRZ)Ry;sPzLCAyk^TZJs)9Ymb^a@I)zR*!o^~Sg_ng3#W3d#6=e$U z6Go)I_!2N)u{8#sfCC1ACn(5v`uh5wVG=GXC@5=gW8|Wz>2s>(;HH5H}WbQx@!0Pq*CT z9t_mpbd06+yX0LOjpQ1(J^YWIU>Wd7AOm$vvNfmpDep#8Z54IEqU7XF$l)p0f5ykR zPb7wd4LJU15%~nUMuj%7zHz(2lKNGWqio?UhlzShx$jGpvfOz|DamOejHY;cc?kZ&`zs#bK>JF z>BZw3s;A8u7#SlXBNJOK&CPufV6AgyAUy5eyO&epg2*kAoMD&C}I8b)>bwgR&rK=JV>z!sztj=AI7W7%F15oXaQ+yX?w-YtlvN3AtGx* zfh^GjK*vSPpFbPZw<)pKPuN>kXUCTUA+3k4ZtjbU={(#Uk8=+6B%m%%4p$4*9%o#= zTFuSPEvVzhjT^bfMMXt;OP<*Gh=_YpC%AifB*;y+XmQ2JId8UXj9Wp8P4mVMWDH4l znjAClS&+-i&(Ci(g0?1aXI>rpGFrks$9Pf77;8pUm)Ewxx?f;uFAs^m@I_S8xU*vw z(#ueLc2c&Aa5FOCwcg%SG%0Y^83ahJM4 zj|$F>g&qO?zoemNsi=vA$0fzwgXxccsdgsrK|MV^Bp?`zyaColcG8lvtf{F6g~|DI z7RpVQLWOITH!3OdlAplV83@`0nUusBGkBHWWt`w3(`RX!B4W@GLDAuY7kO6S<=zx~$hscXlDn{l=y@$1GZH)FXd}IJn zhZC7M@z0Zwu|nB_vhit{?vW!fkywxMr#v+s4m_Y4b>XKc^dLAg{k0#dV$`FE)aw)o zKb&~*zBaozk@4{tW;QlIf&vjZ@p^PmpFIoX;UW|BI_)OyDF6wAb-^4g(U2$&flUH{ zRM#s=g)9I49~n>N0$;>$$~`>q4ubvvhVEbv{2IGMF88 zH2L*O&5~?^H}_fDwVbwmsW{j-&sdjOJJ{&m)3eXZlB%UR;cT)n#lE{Z2!wgT^XF%C zi=IF4`|;sf)$p42>qB4_iN654%BS+cK$W{a{~lGH5Bv0AiLBnRYY)6yLZ_d+ee6Pe zPw`)*noX`@SVz8nl3s?P<>mE0|4?ku>-`a=vMV0xK!5+MxYYdBdyWokCMd1~I5iJQ zN_xxYjT*yW`F5>?<+S~>L4Vf zzXSTmX^?VPK|Q)J_cfuqPK!qkbMGNbF2wskYO8wG;9&go^LN77X_$X5tOwaAX8t`a zA;=)gk97KyZyT3i<56&#iIa0)PShVl@r85lQqCz}h6?6(8zlaAAVpz#agT z?ty3XsB<_`c;RckLqmZ?Ti&uo?e5O)YmPO<;{yfN-_A_K8T;j8i{zuBPLv?uK0g?; z0;3o!L1=y$`znAA;5s0(7Mxxif87z=a;)K`V+VqyF!_u5T+E9LKe@Wy{~Sh{D& z93HG~?e}Dsm;M@S;wk+V+|-3Q!@|YI0Dumw#yzja1^Ly5;zNU)*VRuHUC4k2zz!aW z!)t#0Hm8(jsDq2kS`1$-ZxOKxJ~3u!rW~nS3GBpm4BW*E)8N}NbuBFp0Jpvulih2) z=0*chT5*6cXy*C@Zn^yB%e|G=)xk=ObI0O#U)Gx&saV|y_#LYVjK(i6eq;_3soCPf zeEIwL`YXAlUpc(qDsZeZ{`T$L(Yr6_fvtEu2l<0jbe+ZN&`!3sYtMWU5Eed!yb*{5 z9YVKy!YL^=dD%5=2of*r9QRcuMVsUhR4O#jz|IevL;D8h7&_VVf(srB64 zB8GXk+pclT@q+;r8>o*F5D^inBv#0;6~7eboojw6baj6D{5g=r*ljNmSTIDarGHC2 zW`jWl1>>kT$okK9W>cgcv2|J3Y}R;FEGi6>Ut)ncUdA$Y~+L8W0(kF10OGhd%A9Q9ijOB01*?#<~D%LZwxVTs{%x-H+tODQNwchZ1 z3bn|xy*4OJ)60l8D0E8JyMx>~)#SAx;M+Ud^=y}ztMbU~6IsxZH^{S)4qI}%;N^e^ z7S)s=v;Sh}D09||x_<&B=J@a(KR?>5SFbuivp1WR160-XqTt;FQc6J>VV4sf7^+Vh z6h#2yYN-A)^GH38pI@;Q6cpS;-b1mz5BtWUw~Q*|B3w$%iTfB2pEz=a84KWmACv3$ zER1#<4tAgUn)*;TjUD1^aFgO}{b?q1E@4-#S-=H=n5#HL)(wB&9ul+e8hZh6Zh_aL z;=;^#F^}#i=^Apb`-lzbW3EjSjFko@?sC3CoeXW!g*F{|+liZ%(5irvII301h0Sm} zHtd=!X%TbXH|3EUUxc84Ao9Wa8)uTWS0EuY^laU9N@G}%yH+tq>%=ugai zn9ip7)H8RoW1NO5rzU=UfR@ICr5pWdlSy>#DL6^TJc^!5S`LFuqC;LnTzHA<2gtYr zL~if*?`bgsJ+3ofn0hBqIu>4rGaq!f3}KoI3k!*M1~UC9q+CFrhF#e1y0vOABNo6H zQ103HlmMOyQSKoglZuoeH-vxdnb7q+HUGGO!Sl@kxykIqJ07;CIO26o5m9{##7gHR zksTz?b;=?(#Qre%sZEkDoK3|tnu0NomUFiBt2ul2bA6Xm2U&ikgg#ihD;}udSUJTM znwnF12Pkg1`$Y&y)FjM*d+|l7naN+_M9V~wmNcBJ7PK11Qr=O(!lXm_2FTh0qHGyf z6H$g9<(ZG)zN+*FN7^^Otxry_XYf-Ve0+~6RSgXp?H;HX?|vx7le2PiDnrCd9=4`p z!^NiEKuu278$-TNoV>X&K$D@e5|zjcK3!WVtchcN6tr-Cef?mm9F3IT@WNNZ@<9kh z3ekIfF*tACoo$0pWXah8GQNKM_GD*?zh!!7ypq=jIL2gdiJ4pjjNpUq4|*yr!L;mR z#_42@v~J3{R(+c6B4Sw$;y7`CLJRFH`c+bah!I8tl~g#NlB)>iaCC024C#*mPkQDf z77IIr*q7E8X(1@&%+TyHGfPY<#Sb9}1`&s?;$UOE5AeSCmfJrrySeRpMt%^b`N=uB zjAMTS|9}wQkI{|0*Zh|F(#f`E(TMJ(SNY6jT2kC3R`(Erv0!PTeU%9T|)ycC0>^UN#*!!3~F(k&2TURuN}c2Nl5|| z!(AaDEr^XW_TtK!Lg7gZ!P7@@T-m@`f!kKl=FQ;&sS+6!0tPEFqnWBSlf3Lc9cb%4a09O zwD2ckF%YvBl#?Spio$^&Wj`^R&{!n?2#qL$#f}&d{`i%CK(}m_#1}ofQWhLZx`NxljIcHlB zlI$C62ho5P$mRXF(sNz%h~Q#4ocj)*s_~Ya|%Az&lCm70e%z68J4qT(2#H~U0f(SgA52elt0+GM4Y3(8(NCZ z&$aGt6W1FCYUP8YCwJIyTnp3|skcG<>JeWGsYR8OwWEGD)Yh+c&UYTvY<{4==IDz{ z=SdR>3n%ARgd9V&yJ1zFg{ZpXAO)@gL@IA=)FkwPeEyD4lkuK~{gWqO7v53yc#h1& zhJvWPsY#27vtaIR#wFm~Vl7u4rb4@ysw&&n!TN1Iu(tNG{nKn+H)ZjulGVc87r5YIu(ume__-i!v#f>GlSoKT)Weg1sRPv zWLdDp_U)@cA^5^OiD(=M7hA?pFksQLj{Jzo_jQEOf#L^>LUQnGU_FgKCFs>D_;UiC z2OIe|Zlr*1T0U}mlaQ3uu~d7+V&eZmpy>mu#hy_CF994vpd&g)s4`znZ~-_3P|={% zl63n4pqWFaffu?Ap@5l{^(s#4%ciExcaBUT*5I!vO?mf@Y`q>CNhlOb89~|jF~}2~ zfYUY5UPyzQMw~1Q3}F;+G+L1QlJHwNQsAquAdXX0NN*Xjh=BnESliSH!wFz4+E&i8 zU4(H98*~{?|5Y4R;x@$_sepe&a3_97C~62%y{Hr916nySJR}%1*t1EMj+hXN*SIZD z2M6ggmI$iz-`=Tfz@w739bHNkX4m0P7G-)CUS2Uu;MkbU;RJ~Agm)b6eMxip1kfCj zrqCT_RP1&X&bQIY9xt1)E}$2@T_|fm)kb8N2d$7}#NfON{;34PUE;zIgN!_6W29Tg zQMB_##KsCyuxyoPlZ&81RI&4Xp=RW4H!e1Sj&u?_HUre$#Fi}YSsXPfTtkbVeri+Z|Y0#kEl z9Dh>7Qb9KKhVgaoyUC(5GC|!?q9ttWq}yL-T@7g} zgv5aMBLZiUzAL`Aq#_rc_~9w`VvLnQi=Q0tcpB>g3s3!C_PiW#BkUl(2^vAxiF5Lkq6sW;TJ;jKKc zJ)G%oFjSEC57JqHQ`+3Q8O}O+Nal2eQ3tu&haRpahjOzTZG2+fML zh$^3IH%7#mL$$Ho7bytglQjH*Pn5VI%a(#oTL4=&6zY0R3L!INZevv_Zwj3X9(Uk9 zq3C&U;Rs=lxY&=rAHsBg`tXwE=>P9b@ynsjKoLiaiwK+y2rL=zOJLQ$E~wt^4Zmv; zErzFl;DEQFdMqyrMK3Qe65>Ia>})T_W>ZG@F=~{wNzQ!|nAqp-KY2Z;?x!>%TaMqY z(Sb?^FFeZj-sU7V`Hq&iZ>4`tqHcVj1J9VvM{od>W;GdT(bGGD6O3-xd#$$;62ko$ zITE2H0kC=lCv)vQ7fS44AlWGQ#*bfW({gfpx^f*Kf1v@F0)=1l8g_04Y>q^vM^_jT z9fuE0&3G%`k8Y{^_hlU3(=r;Dy;n{)$sn2TrG4n)nffJx8JJ58n`mn9T<- zH7F$_ZH{rF_Z&7TUc4YsdZU93>g}xa-}OjfMEYpW;gxrxM{FS7AvjaZed_I2qG&h+ zZJ{mwc+xQnoQYiEWkg5kcUZq&1{_oY+UE+$#VG4;Ob2OiL6fd1kt{RpP?P0aZgL%Z zbKkIo1TM&9-*VPOSbstvAvMM2=SQ|B-#I#GxCwXz69k`a2vBlA>b`d$Ae)jsf!R?8 zo;HQOaT(*Om6F=V|0K$l}Kp0 zAN4Z~{camyQXdqzj6hgP0+xhQvkyQ(!e!{l9C#7=`5pG%q39dNVFM|43NnnmM=(>i zqK5&n-x}C-`S|X)b1r;Esr0_G|MO>G?1{eLU+StRcy_soUl{Iubbe|Mbk2c8hboY7 z4#kn^N2<1Qd5%922r1quiND$&+qxFaj9yHQAb$(FB7rj3V=_g$)Z!*(AR!Vj52>5i z+ZSI!Wd8mmA1aYXygVDt@|9(Pq9HE~_(eoCo;@x=!;Jf6mn&(%?)~v26q{`gIvFeB zjeC^agku5xTWF>qxqS78(8c**UeI5tC~McQJ%wbyiCu-*xc|4k=|Yb^7pAR>CqCLh z`~Y!H;whxqD^P@@gaVRf{>S$8c4xtg^74&KlI%n46KKak3-eTc)NVRDIx+yV-$$^p zH@EE<6BDCoF?c#mFRte}I7Z6ZM>%pV^v>&Yn0DXTpLE1VJ zv~n!wpqa}Vx_2KkFDc{51S-?><$;AjL5LEHw7^qHGx?j!;l4h8*tIILt)-M-Pauj}j4 zx~@!ef=v&=&kTHS883Oiu1)ncTH~Y}+0hCCP+|Hdlk)e`e{(!V;39`p(5x;68 zj#yu-zRC6ro&2QY1T1bVh9H%Q2U2^J;tZ&e6%;%>4DM^2B;f*lS!x)l!3#KFuLFXR zls8|$24T-8qPed<$9fGq_GmD-hf7iZ5%7y+s*;^{fSBM2Y2(TbO@9v@XQW+*o>&6> z{76>;=@*1lci1WcS}s5Cu0W44)yQd?bWBmKZkm_;g&&+yl1Kv0_`?eFBMiXite(uX zr%!?LuVUQjo#hJ7=-%;R)C1FMmCTh?7(ZULaQMIbPIGXlasXR(5I zxtDABFq4qX(3Js`cVoE%A>eI5IZV&MAmo^!Z{k6CB2GnjS+r@pJh?9OAoAP~BLjn} zPLI~JG7z-^&ETGK$y4(1Dgj%+17#p->4iM&C&}i#FU=cy-olV}ZVPHta&reM3Q4X7 zGDHmB`&~~7UW^efQ5NeDS*A3Ych8xxTTp+V`G|&&I+@{Tcm0Vw2QFlCa|(Kw2)cw6 zdCc~TP9>^i9El3lQQ>eckgG+iCWPfh;$mYT7rre#aQ5tD%gfDP00b`8aB8$(I*E#$ z@2vNQd)MA)+9N_|Q=3H34UH*3F1LqndAHe}mhh#;}IFE(} zI8gpSe7LnfPVQNyB3enjLqZq{(+i6H@GPd0kZhzaz3c|t6BO>JVEi!*c)kW(3M~~G z5-*C2Q5lcaiItwo^}<98D-dLg|n0Sn{VQ-`tepxy4 zAo@~&*v=6|xxJBsW(;15iJ$}7RzjL`T6Hf!Cr&5{jzH6l1VCPB~7FL3;S(>Q;8siLOmW9H-xB-Or~ z*eg=pWNI4g7tfyhJ36=RRjw=sa&l z@`0dTUk3NVQFJ_+2i&g`VcY{V3`NV657qGgYt>`KF#!wbQ1=T3?9@;m&E&GUC?4k& z-hqrC(a)ZmH`Zz6ne)dcYf+_`gt zb|0zJbaK&dZ@`7VZ3jn3f+3fhV&+C`2y_gZ2eSzEBlV&O{HYYJYfWi7LOwMoKCpcW zEEL*3kp3fDYF^ylBGtYT8x4{Q#o66kZ|`0zkU`))Xb#mKJTCuAGg1(R2<|djTh#S# zQ6qLNsTZ*wBppfchdZ$OVfUbebpj0i2IArdJ$()Q+3l%AU{r`HS#uW9T4u``H1S1M zB8g!oH@k}MloW-B0@^Y#FkRKTriO+p{K*lX=Nm;sIdDj56`^+h_C7NlAt?!)xoTqD zIoda9WP&~@U1$=xP5Y6~0SygRtVMG1jE$rA^Sw0*Fsi0zJRPj({5v%Lwz!j}o;+}~ z1>l|@RAU;ygEdvwDE9(h5TJLwWU-H)@A{oP1WlHKWhGr;9lAnnU zIs&yDZ2xTo%E$KhG8j@*yJ~BLO`$WsNYmeNZn%IHoB9c7nSTA#AQNdP+(8JLKH-VE*OMlFa8!Z_88X%k!Ox@ zjeZNciC;Y?KlqHX3nk;wN(#?4ntv%;!*5ZwqwHp+pfedIa#C|&Qe;dg1EIkZE(Xnm zQPjE8jHe$}hwKo6o-k~a^|sf1eV2uDN>G>GXx#z64o#J$;~0cVfaM8w&_caGf7)eD z2?^s&;BW}}ptD5CrR34e1vYsTYxIsYToi}9hDQ6?G2GeY*jKrNJPh4IL@>$SK(`)U z%&5!Ik9DGD0K(SDl#sv;g=OSgiZ6fuc1qBk%{5GWfdER{ASk`4w#c0$keP|#k%O2G zu-^9V7wAnb#pz`LpjTn%1YbXjUq1^VLqB!#pp=y(r9$)=g!hbbL zM18I846a1c7WgpuB61b63JY%9Wc?BLrZChbrmwE^QSKVKe1PWw{~gRh(%3-kE*Dwb03*e5vqeK#oU@Y?(Jz43UqbT$L0SWL z)0<{*1L~!`UO3pOu4Vkrw3p zQPhs)vKUxHiOJH&b4F>Y{qylgT%eSdB~I?J1A|LC@rcXxgC{sl@I-`r#l|eeHutH1 z@GA}nbY#5MV_5!cIJqF=D4WdbS4+HdIVwp%SXZ6!-!OwsSw*}$PG{HBm(y!@3@MPm-Pf`;G*nlpOVk8@;b5d*EZo$T*JudLiMG-vzqYLZ5 z?vePv)!zg|fIt5)_VgdM*ZxiloH$JCktpV&W`eQ~0sn+fojo=u9F_3yRo7y!U#F<4 z)vykBqJAR|pzpJwj{lMFVkR5VD22>s5tjP6_n9}QY%F5!iR?e;V~!4-1m#3JZ-9Xb zn+EHTH>wjNIFabm($v+(15AWwG%5-NlNZV^PEK!alvHrT8N8dDLtteE=(vfC3Q461 z;fm~Z*l^pQpBIMoM}<(?u$n@YZDYtV-(`AQ0=!XrzA19GN5fG$=sXlr#HUs=_xn)B zJm~SDH*b!#kb4tf@lO4;Y6wl$V5^R1I8vI4ifhpBFrlEgV?rbQBkhP#s)(Wz-@&`p7o&e zBCZo|Ij3cCnAITcfD@dmk5-%;QNb;K(Aa6AM{r7?VTS9OLP27#JYoOB5AJpWd<#`L z__`;d>m+#o9wu47W#k$+QcokGWn7oTG7_gqztZCLYGUjI5%CEP_F||ZAGGX*n1%y? z6+pjnSax&Pr8F7ev0qq7zrp+tcomPOnvanbT$hI8Bm z0n)<<{00B+-8>Xh1Kfw04O~yI4TYLuV$sBRdMPIEBLzEg%EH=;cO%y}##cP>kBp6F z0I?!%)27iffDMbq`!?qBxAzLu1FCp!k@i&jy!VztNkx0O#QV|zPKotE(rJFIE1a>Q zRr*9|$rpT-trhC5$nmGkaE}5(dGHx6csI^dDL!jQ5T{`6HS^?5Wujj z&(1?)PHis{)_>xgmL^OX0UQb9wgRGq>md0qai$V10_t-$u3H7~aP&!jViGn8DOmdY z`bfb*@*g-Qi{SjH*X?NE;9@mGCH-@ei~j4=7@_r$qxnGa&Fj~Xd@E7@Ydfz&bK{}T zyK!;p;~#PK6)yaEQ(S^BWl%_nJG1qL37+3-P}WEw#aT7d?itLsU*zuS=r~~|iee)^ z4|UWj)P;tbi|g!l`8pVS2_FqB>^giLdNL~@zmCBD{90xgzMw0KL{$Yh7pWa_v8pNV z9)L@FKg{l7JcdEU_d+gHKo%h;7jhrnN?ygzcGK6IdDK~OY8jwrA_XJL5Ev;LkOEfY$566wGPj!9B^^pUBlXU;+Mdi=x5gyU`1qgG=fW9UU=Qm4i^q{~ z9)NRlT?-bg|BijhOH4mWzN;0dDA28HaM7IfWETXq#K&on>WFI)4f;1gsjO4*W#F5& zJB0=Jhc^_L1@OQWhm%uDd`{8PfrxjBKt@hlOP6w3Z0ukl)^5@VgfN2o$`q~6Luix~ z5xM&=8r%qhez5Z!9b3AbV*?tEZ;Kqiiu-BM=T}^*QVhR@L7r{Q()V=(-x_Z^G?4n# zbH{0gWpTG|`Q!SkU4P~jZEU^|8ms^LuUe_f`0Z}^a=@KdfW<+~2Cs6Xe>>o$^|t2* zd^>l(e-&~4kq}VvZ@=wT5q7lV=td&216=t1))LS8OStFv)(q=6SlZxzWrk?4YG4rE zIDvth!wE^sd0aY&+XgkrHIJb9242tw1s>1?V*)M>uzWcMfnN7@s`9RIII_@>4GAq> zF1N08#Zs*RY>C8yl895`fn-DmB{VFo3Zc$E{sIRF2Mkkr?`Ti`Q?HrjuizU&P$Pz? zw?-P;F1JVjxyxdvlnSe4hBQItokB0Irc4hP^TYaZM=3hIL~PRm3;q*SZlDaoE)M13 z=21c}KCLG^5*97Kty>SY=q2r6m2<9(3*!{crqut}9c6_|5eJhaH|0G}->McRqaP{$ zgQha1?vO=OjD)z=jA?hRWAKa)qHF{&Wr=|Fw--$Jb>E~*RkirEk51DkQp19C+GW5*bvrMNx-_SZ8 zlQhIROAPu?c3@M%GJy~R zt$ZEHyZ*UihqUAp+=(ia9Jg%uX(#k0)ky~65<%b-(ztcF1Lv1mcwv9oe@hN%IPkIj zho;YClam3ZrKR1ogP%Wd+&#a@^uglvX>u16>OSH;NzsT|GQptjJhr0@%{ljQ6{O_< z(c78F^_Z`H{7y{2Njkq!WNSG^Vj5DWXq3v5eMv{z(REQQA2B70V5TJ7*oz9u^S&}OoSE~yUe9x$^LqZ7F(ZEW@4mnH_j_HR?Q$I`=)g<99(Am^ z;U%64Ioy@N^lKGhYik4JAZik+Ut$Y-+oH%mul(C#Fms#S%OL+qc`^}7) zCL7~9SMsm4bXz!Kpoc753_kVnW}5)E$)Nr{e1;g9{o3tQ*M?!wPOkob&=21jWo)`L>5G^(UDlp z(mQv?5v|SD@x5K|uy4v;62-NEyd1Z&6&#bP{*|@+2h9^A>~vU6j8>OmQgDrNWhdtjFxq3$o`O? z;#gJ5k`PiFh0=rXN9NZY@@3T@2_D*+2n&Hc8;QV0^f;gjgllNKgzNlx#A1uMVl3@kw3jzixd!AR-Tf?eu*Jf!{M;~3zV567ZGjchhG6{LuBq_*iWypnu+XEX^d~v_Zz0Yfh`Od?Y0M(-vnwy6>tXe!U z!Pqo6ME5@t-J*U5*}qhp3o4hZft57csEs~)M>09{0#mE?52I%y_wGa1R-HZ6YvRMW z%ZdOd`H;v08cE;jSzL zW&4&(Ipt&J(^dgi6dA>|2SY+Z{)2R0^@DYh#(@q6CAes;z(x#11#U~)?zFbfMe2B{ z`ZeQI?ggR>MfoS;iv&lQU_F*paSqo{XU~4~r%XkMwj-Cc8qv$V-MPWOc`;_%%Q|oE zc=uXF+v-W}-rVbFko0J)?~a;x-L_wPGD5#>N3V@F+s@l}TGXdvc5!_EzD_Sky1{gx zae^=oqkHc5_Qg>9@OJ6!JXuf#l#FB{y6^oK&5qE|mrroX-n&P(=9(>cW5^S72xbAC z4jbH$rZ?>85im3SM5w)Vi{Nw*9W)8;8?b9z8YNT~YAcFp^NRIo%ajXRz(I(aq!IW2MA*3nSlV1CEh#Gb29x5pRP;6JUX?4^<~gRU)1tlA!rl18SF$ zYeiR!AExzpTYPQTsEHS!1FuAGKN2)XbIc3716yzKUy->sXM5}BJIeM^72E8LQ0tcAHSIO39Sm3M#V@Rlw9Cm9Q1ss(Ix_T0oB3p9Er<%)zBDm@ zsf(-oU}568q<=HD_13b42kQ;YHZJ^Nr|&SMx8H-9zD3PuEZFQ7-qJ`pa=yd;XP@Nj zDaGa5>o!IAjk29l*WL9(QP>{kqPoyXt9Rh5oNrsX%-yI{ecb2V|LuBy&!DX`Q=4Yp zs((<&@oZSG|0IvT+6f*bsw;l(wMQKmzM_{==(&1&!}M^i^PKrDB7vG;0c zVxABa3(9~?X`aJ}8(UbY*q}Rm`^b8UaoKWI?*(lS9w!yBK>!+xz5^?f4GrEN2x&tBsY zGIH#`FBY7LSv~oYdRoe&dW5-t)Aw9N%;wJI*43d3AJ*CbpiXe3(D!CxWu7`~Z2A2i zkV~&+nuc&wN10lpmqf#CdRaZnWWR7W=tfdj9_}MQCQg~ZxOD&6tdKbfpbrBI5xp>H zg#OKpxkj*%lE8vN<6lvWyDa?aH^a&3Wp^C>K--CWNpel4JZHFv;?VZK_=up54UVp0 z`{is)yN+d~%TXFC0fLE`d_20I-uGUXnj2wyX;|C(P71nz@5K?JZll@r zNP)OyzjT0kZU6KU;VD6fi`tIGkT(bEf~bggPx_xQM%Pm7Gux}L%Lg^h3tCr8TFca_!W1(Tu{U=e3$ z)V%zBCu&Pky9lNQ1Lkr(M%y>?m^pzZ{7?XPOvE#sy%AI~w-JBHgc6;X9+pc@f;`O7 zEP^Q@>RNbnT=N){Akn{)vH%iY43Xl>kIE)uS*~cflzxQ{KLpLJ^sj;va2Q=~+?|>r zd{#UCWqyi(QW(%hJXTd^>!ApQ4xXGaC~4474V-m`o6`P_$8i9l(GoZRYl#p8Bk z5?Z_NiW6V&v1eKJO-cQ`tLndEr2Y3D#w{REW{1ZJSJy?-YqS^_yXI2qqX`sg4Vf7O zL)3o+YSYuydN#Y^Y6#DZ;8Dc1yW-F23(FWJIN~0 zNQ75V-%eOMXxSvLTnvaJ07nVKybU9sMhGtI&B>c=>~BDFP{=NnC5?WITzdgDNDB@)ox5zS zjeV^{mu&nzlk?O)GU5eb@4LNOo!b(!&yW)}CTGRL)3*lkOL|Q!e9KPw;ltj3ndy@G zq94SXz`3%+K+aTGX1wdcV-e^AR9#1T#zAl!V|GB{9utK1qG1s&sG;@g+#j2tK};vE zWW=+gg@?*NO)%y+@T1rtG$l1DxxBQQ91>I8<@KLXR*?qeOn!^Y)^(kmY&rXr=&PAb z0s7tai{Xt1$HtAqO(b9~o`pS_i(u);zWe69}-;w@!V;Xi&{CWkhT_! zE}z7?v8DHyJ*CwwJUq3H$L`Uo6OPU-Z_V{Cc{{rpenh{~@UWeoqTLp>`k~NSB{Df_ zhVvc*nMKk-N6EiweWR$uwAsnPMU2O%B`4b0*VolGKsGIj{g)4*jy7?NCSp-ga>%!9RaKw7wE>Ik^G1bW{2mZ*Nl>hLE^7Hu_R&14Uk6POGuN4>Dn>qqD7H{tup{MSb(@ zm9|3F%n@OQST+b+B)`+f`$f*+*nAG5$168(-1V|0NS8f;_xGcoYpt>JH-G!3l5p;3 z!+u8M;F~q;$c0x20gEI7^I8!ahGvut+YV)DS^8{P)lUGc(;)hiz(5=f0nY_jLIkhYw z?vw#Sk{!<~{Z+nsKk*^1c-^w;jjbdCK&E2dx_7S)w>|a2)V~^n&3Ry_nHfajnqd^j zG09=t)H=^^uW%GjGB2OMP=0TLNl-=F_8F=|wf#{9hy;{2L4?;B##}rCS)7!i$eHwO zYIIJj?UzJFHH2#tZEFb{_Yw@c1^~28cv*J8zfR9pF!NEI4o$k=MbAJ7y9}N}EqNN? zo%$f_CH(JCzWuz%a3ywq(v2eyZ#&W6n4Dgn(SrSV^8-f^y}=oCzQyhsMv%jOa+idG zzqSr|MncvNI075-a?6oXa_Vo6SGp7>TDO~cfqM=6WC|<~eJsXZ^2Svg$_u#^G=y(u z09wG!Lwgw|H@n{PO;O__j}c0!-__6E-97twM@{Q1{*U9gduaru#8I^L2G#dM=~^3% zA%gISahd1NtLCM1Jj61Wv$&4q`Bs7Yi-0~ zEawzQeqd&BV*iF&OAGv@3p}s>Ed~Cg0!<3P|HaV9B<=6w;&j}iC0b59enhlc@94S^ z{rqxLw3(>qw*2}Kk;2TUIk{K}4NxNZ43cS42(r_zGvQ5Y8Fr3S79F$CoVg8B+H?Jl z;rOwlsOFk7{>EBwZI14|$nz^O(1VaK?VJIMOAsx;=tzru3dvjHIQ6Q<#Tb_i`-Ozq ziARpZB~aJVk7Sn?bqKK&^Nys90pLmQ8@Hhg9RgK>cF646n}G7fJj;G>d;8v*X zm^Qn^XI!L8fsFgk{sK!e0gRkgNp!ZRxcym06Al>JO;LDEzn6vNNXT%TcFX0gLZBuk zVN^mi~AzuVw zd#^MVrFAy1ni|Rw6k0@}oc7`h8$J86tqi@vFQO<}H!h%>bBH)&t5(f$SfW=#x2XrPwDFw%rNCkF{v7pN07lt*|P}u%XQNsv-NiB%m zLX+dZt|E$B;Sq5`?PXvCji7;;mxgS+Vp-;~bGT%di%5a~u_up}D~g~Tg-$0_6s$yF zV)P&|rs?kZoUbRkKwh>%#MzEK5o$;Plht%x@CB5snvl$^Wt;ODF=R-zVvNn@?~Bx? zK>vzN0F3PhrO*g!ss#OE>;Ys7k-#4KEDnV{PlE9(;pn^8>?j}Lji%f@N7IRd#cf9( zfIM>Z#y94&dc9GmczsR4K@(V(61+5~wzk6*HIFU($-9$UMv<&MRk~N|gb~L=_6qB6 zL3WL^!`Z=X>**B}f7-WY9hP~ub0kQo8!F4d;|07UfyciK{$*w_d{4tBZru9ihG0|c zG1BU<79TmM;>6KJS7iZK!Fd)0{vdRvC~fgw(6|&dYeV2Ei`eq=O=$(coMj{PNTvOe zV*GRvvbi|sYOpU{deOw$37;1aMxL~Dq7(m$YWaqJOb$A_qcjcyU4TsioY_#cln`q7 z^ops$!3D<(NcGQTu@l7;`}phYX+QpIgcONKWfO2wLB(2CkWmvx!|`3)zq$H!Bi?P5 zn?;vWFW2bJ-ZwCED8?kYK)7bStLcjE#Uo>`!LluS__cT%|8i!UUeS=JUc|pqrp;?6t%y;pM$nEd99-&&A zOv>GYtB+^k3KNFDd(e~TY|WCU!$IXO{S@;9f!YB8!|?+#^X*{aTbWeK>XTGZ{@N}# znlpBaJ<3Uy=vfc6z!(+?sqxQ4vZVx)HAt8xRthaeVjsG^1qbLrv#ry#gChG60I)#I zqu=&*?Gi#PyWtunM&dZ9skn(oxZe)1{1Pt6#3g(zJfuj55JrkJ!30q}Nfe*Iw4)uU z_1I#frR^-Cx|N7W#$nY}SWaE2>K1_?h@c02nTY=S80=GK&ZQR>5e2jO{BSHA+uIw` zduS@6!{)Wg5iy7)M$!RLIy+;8Z{NO8rGAUXjq75%MBmVYQ6!nv^-@^{zFf@AQSOrv zn{i!(b;(w@mN$3(d34{*iiL zdJW#vO^5qqAeb(VKs6~ODiWVX!z2_4{JZ7!rM#wMY2syH#HKtB`d?H{9!4~a=i?oX zuwFRv{a4`=1W8zPB-hNe+>|Y0??PkK?G~JCY}dF7;`h32o5BIa)V%uaDyL$)n|2c0 z@%bny2g?`$oY;P1UB>pYv3dgsFF?3_HkmGM-EsXk}nx wTAo9)W9=Ede_`zz{huGi{4cP=@^{LMfg?4#Mt3>Em} + benchmarking .. toctree:: diff --git a/requirements/test.txt b/requirements/test.txt index 3cb538a98d7c8..632f40e0287b4 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -17,3 +17,4 @@ pre-commit>=1.0 cloudpickle>=1.3 nltk>=3.3 +pandas # needed in benchmarks diff --git a/tests/base/datasets.py b/tests/base/datasets.py index 854d69b54eaf8..067f2717730bc 100644 --- a/tests/base/datasets.py +++ b/tests/base/datasets.py @@ -63,8 +63,13 @@ class MNIST(Dataset): TEST_FILE_NAME = 'test.pt' cache_folder_name = 'complete' - def __init__(self, root: str = PATH_DATASETS, train: bool = True, - normalize: tuple = (0.5, 1.0), download: bool = True): + def __init__( + self, + root: str = PATH_DATASETS, + train: bool = True, + normalize: tuple = (0.5, 1.0), + download: bool = True, + ): super().__init__() self.root = root self.train = train # training set or test set From cb45ab0000ea1c966833f88046b39cfedfb3e0f6 Mon Sep 17 00:00:00 2001 From: Sean Naren Date: Thu, 17 Dec 2020 13:45:02 +0000 Subject: [PATCH 20/37] Prelease 1.1.2rc (#5171) * update CHANGELOG.md, increment for RC * Add missing changelog update * Added a few more * Move to added * Address code review * Missing space * Remove unreleased * Remove lines * Update CHANGELOG.md Co-authored-by: Rohit Gupta --- CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f99764e5d460..0c4951bafa6d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,43 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `DDPHPCAccelerator` hangs in DDP construction by calling `init_device` ([#5157](https://github.com/PyTorchLightning/pytorch-lightning/pull/5157)) +## [1.1.2rc1] - 2020-12-17 + +### Added + +- Support number for logging with `sync_dist=True` ([#5080](https://github.com/PyTorchLightning/pytorch-lightning/pull/5080) + + +### Changed + + +### Deprecated + + +### Removed + +- `enable_pl_optimizer=False` by default to temporarily fix AMP issues ([#5163](https://github.com/PyTorchLightning/pytorch-lightning/pull/5163) + + +### Fixed + +- Metric reduction with Logging ([#5150](https://github.com/PyTorchLightning/pytorch-lightning/pull/5150) + + +- Remove nan loss in manual optimization ([#5121](https://github.com/PyTorchLightning/pytorch-lightning/pull/5121) + + +- Un-balanced logging properly supported ([#5119](https://github.com/PyTorchLightning/pytorch-lightning/pull/5119) + + +- Fix hanging in DDP HPC accelerators ([#5157](https://github.com/PyTorchLightning/pytorch-lightning/pull/5157) + + +- Fix saved filename in `ModelCheckpoint` if it already exists ([#4861](https://github.com/PyTorchLightning/pytorch-lightning/pull/4861) + + +- Fix reset `TensorRunningAccum` ([#5106](https://github.com/PyTorchLightning/pytorch-lightning/pull/5106) + ## [1.1.1] - 2020-12-15 From ac996fb0088daaf0c9ca26ed0af5b039fa3a53e0 Mon Sep 17 00:00:00 2001 From: Haswanth Aekula Date: Fri, 18 Dec 2020 19:00:32 +0530 Subject: [PATCH 21/37] Fixed docs for WandbLogger (#5128) Fixed a small bug with the `WandbLogger` docs. Co-authored-by: Jirka Borovec Co-authored-by: Roger Shieh --- pytorch_lightning/loggers/wandb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/loggers/wandb.py b/pytorch_lightning/loggers/wandb.py index 5c09cda5666fb..f0c042724538a 100644 --- a/pytorch_lightning/loggers/wandb.py +++ b/pytorch_lightning/loggers/wandb.py @@ -59,7 +59,7 @@ class WandbLogger(LightningLoggerBase): Example:: - .. code:: + .. code-block:: python from pytorch_lightning.loggers import WandbLogger from pytorch_lightning import Trainer From a5b2392652c476a75888c0cf50c0fdea3fbae0a1 Mon Sep 17 00:00:00 2001 From: Ganesh Anand Date: Fri, 18 Dec 2020 20:12:36 +0530 Subject: [PATCH 22/37] update DALIClassificationLoader to not use deprecated arguments (#4925) * update DALIClassificationLoader to not use deprecated arguments * fix line length * dali version check added and changed args accordingly * versions * checking version using disutils.version.LooseVersion now * . * ver * import Co-authored-by: Jirka Borovec Co-authored-by: Jirka Borovec --- .drone.yml | 3 +-- .../basic_examples/dali_image_classifier.py | 23 +++++++++++++++---- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/.drone.yml b/.drone.yml index c87130844c040..1041ebdf872c8 100644 --- a/.drone.yml +++ b/.drone.yml @@ -36,8 +36,7 @@ steps: - pip install -r ./requirements/devel.txt --upgrade-strategy only-if-needed -v --no-cache-dir - pip install git+https://${AUTH_TOKEN}@github.com/PyTorchLightning/lightning-dtrun.git@v0.0.2 -v --no-cache-dir # when Image has defined CUDa version we can switch to this package spec "nvidia-dali-cuda${CUDA_VERSION%%.*}0" - # todo: temprarl fix till https://github.com/PyTorchLightning/pytorch-lightning/pull/4922 is resolved - - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist "nvidia-dali-cuda100<0.27" --upgrade-strategy only-if-needed + - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda100 --upgrade-strategy only-if-needed - pip list - python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --durations=25 # --flake8 # Running special tests diff --git a/pl_examples/basic_examples/dali_image_classifier.py b/pl_examples/basic_examples/dali_image_classifier.py index b2c9231fe1851..d22ff171c9b20 100644 --- a/pl_examples/basic_examples/dali_image_classifier.py +++ b/pl_examples/basic_examples/dali_image_classifier.py @@ -15,6 +15,7 @@ from argparse import ArgumentParser from random import shuffle from warnings import warn +from distutils.version import LooseVersion import numpy as np import torch @@ -31,12 +32,17 @@ from tests.base.datasets import MNIST if DALI_AVAILABLE: - import nvidia.dali.ops as ops + from nvidia.dali import ops from nvidia.dali.pipeline import Pipeline from nvidia.dali.plugin.pytorch import DALIClassificationIterator + from nvidia.dali import __version__ as dali_version + + NEW_DALI_API = LooseVersion(dali_version) >= LooseVersion('0.28.0') + if NEW_DALI_API: + from nvidia.dali.plugin.base_iterator import LastBatchPolicy else: warn('NVIDIA DALI is not available') - ops, Pipeline, DALIClassificationIterator = ..., ABC, ABC + ops, Pipeline, DALIClassificationIterator, LastBatchPolicy = ..., ABC, ABC, ABC class ExternalMNISTInputIterator(object): @@ -98,11 +104,18 @@ def __init__( dynamic_shape=False, last_batch_padded=False, ): - super().__init__(pipelines, size, reader_name, auto_reset, fill_last_batch, dynamic_shape, last_batch_padded) + if NEW_DALI_API: + last_batch_policy = LastBatchPolicy.FILL if fill_last_batch else LastBatchPolicy.DROP + super().__init__(pipelines, size, reader_name, auto_reset, dynamic_shape, + last_batch_policy=last_batch_policy, last_batch_padded=last_batch_padded) + else: + super().__init__(pipelines, size, reader_name, auto_reset, fill_last_batch, + dynamic_shape, last_batch_padded) + self._fill_last_batch = fill_last_batch def __len__(self): batch_count = self._size // (self._num_gpus * self.batch_size) - last_batch = 1 if self._fill_last_batch else 0 + last_batch = 1 if self._fill_last_batch else 1 return batch_count + last_batch @@ -179,7 +192,7 @@ def cli_main(): eii_test = ExternalMNISTInputIterator(mnist_test, args.batch_size) pipe_train = ExternalSourcePipeline(batch_size=args.batch_size, eii=eii_train, num_threads=2, device_id=0) - train_loader = DALIClassificationLoader(pipe_train, size=len(mnist_train), auto_reset=True, fill_last_batch=False) + train_loader = DALIClassificationLoader(pipe_train, size=len(mnist_train), auto_reset=True, fill_last_batch=True) pipe_val = ExternalSourcePipeline(batch_size=args.batch_size, eii=eii_val, num_threads=2, device_id=0) val_loader = DALIClassificationLoader(pipe_val, size=len(mnist_val), auto_reset=True, fill_last_batch=False) From d72ba90e1e8d58647a1ec75b8f08210a536cd5f7 Mon Sep 17 00:00:00 2001 From: Marijan Smetko Date: Sat, 19 Dec 2020 00:20:49 +0100 Subject: [PATCH 23/37] Github Actions deprecation (#5183) * Fix deprecation call * fix Co-authored-by: Jirka Borovec --- .github/workflows/release-docker.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index b8ca5d8723b39..3543891cf7698 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -26,7 +26,7 @@ jobs: - name: Get release version if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' id: get_version - run: echo ::set-env name=RELEASE_VERSION::$(echo ${GITHUB_REF##*/}) + run: echo "::set-output name=RELEASE_VERSION::$(echo ${GITHUB_REF##*/})" - name: Publish Releases to Docker # only on releases @@ -37,6 +37,6 @@ jobs: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} dockerfile: dockers/release/Dockerfile - build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ env.RELEASE_VERSION }} - tags: "${{ env.RELEASE_VERSION }}-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }},latest-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}" + build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }} + tags: "${{ steps.get_version.outputs.RELEASE_VERSION }}-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }},latest-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}" timeout-minutes: 55 From d0b23f784aa0ee37e0cc2eb64696086d7e17dfac Mon Sep 17 00:00:00 2001 From: Gregor Date: Sat, 19 Dec 2020 01:35:46 +0100 Subject: [PATCH 24/37] [bugfix] Correct call to torch.no_grad (#5124) Co-authored-by: Gregor Koporec Co-authored-by: Jirka Borovec Co-authored-by: Sean Naren --- pytorch_lightning/utilities/distributed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py index c315c6633b6fb..be5d781939c04 100644 --- a/pytorch_lightning/utilities/distributed.py +++ b/pytorch_lightning/utilities/distributed.py @@ -206,6 +206,6 @@ def all_gather_ddp_if_available( if sync_grads: return AllGatherGrad.apply(tensor, group) else: - with torch.no_grad: + with torch.no_grad(): return AllGatherGrad.apply(tensor, group) return tensor From dcd29aef0624f988ae80d4bf61faf50402b3f613 Mon Sep 17 00:00:00 2001 From: Boris Dayma Date: Sat, 19 Dec 2020 06:52:11 -0600 Subject: [PATCH 25/37] feat(wandb): offset logging step when resuming (#5050) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(wandb): offset logging step when resuming * feat(wandb): output warnings * fix(wandb): allow step to be None * test(wandb): update tests * feat(wandb): display warning only once * style: fix PEP issues * tests(wandb): fix tests * tests(wandb): improve test * style: fix whitespace * feat: improve warning Co-authored-by: Adrian Wälchli * feat(wandb): use variable from class instance Co-authored-by: Jirka Borovec * tests(wandb): check warnings * feat(wandb): use WarningCache * tests(wandb): fix tests * style: fix formatting Co-authored-by: Adrian Wälchli Co-authored-by: Jirka Borovec --- pytorch_lightning/loggers/wandb.py | 11 ++++++++++- tests/loggers/test_all.py | 6 +++++- tests/loggers/test_wandb.py | 28 +++++++++++++++++++++++++++- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/loggers/wandb.py b/pytorch_lightning/loggers/wandb.py index f0c042724538a..8e5311b11dcb1 100644 --- a/pytorch_lightning/loggers/wandb.py +++ b/pytorch_lightning/loggers/wandb.py @@ -31,6 +31,7 @@ from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment from pytorch_lightning.utilities import rank_zero_only +from pytorch_lightning.utilities.warning_utils import WarningCache class WandbLogger(LightningLoggerBase): @@ -66,6 +67,9 @@ class WandbLogger(LightningLoggerBase): wandb_logger = WandbLogger() trainer = Trainer(logger=wandb_logger) + Note: When logging manually through `wandb.log` or `trainer.logger.experiment.log`, + make sure to use `commit=False` so the logging step does not increase. + See Also: - `Tutorial `__ @@ -103,8 +107,9 @@ def __init__( self._log_model = log_model self._prefix = prefix self._kwargs = kwargs - # logging multiple Trainer on a single W&B run (k-fold, etc) + # logging multiple Trainer on a single W&B run (k-fold, resuming, etc) self._step_offset = 0 + self.warning_cache = WarningCache() def __getstate__(self): state = self.__dict__.copy() @@ -134,6 +139,8 @@ def experiment(self) -> Run: self._experiment = wandb.init( name=self._name, dir=self._save_dir, project=self._project, anonymous=self._anonymous, id=self._id, resume='allow', **self._kwargs) if wandb.run is None else wandb.run + # offset logging step when resuming a run + self._step_offset = self._experiment.step # save checkpoints in wandb dir to upload on W&B servers if self._log_model: self._save_dir = self._experiment.dir @@ -154,6 +161,8 @@ def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> assert rank_zero_only.rank == 0, 'experiment tried to log from global_rank != 0' metrics = self._add_prefix(metrics) + if step is not None and step + self._step_offset < self.experiment.step: + self.warning_cache.warn('Trying to log at a previous step. Use `commit=False` when logging metrics manually.') self.experiment.log(metrics, step=(step + self._step_offset) if step is not None else None) @property diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py index ea40814b18861..945d8945a22c2 100644 --- a/tests/loggers/test_all.py +++ b/tests/loggers/test_all.py @@ -74,7 +74,9 @@ def test_loggers_fit_test_all(tmpdir, monkeypatch): with mock.patch('pytorch_lightning.loggers.test_tube.Experiment'): _test_loggers_fit_test(tmpdir, TestTubeLogger) - with mock.patch('pytorch_lightning.loggers.wandb.wandb'): + with mock.patch('pytorch_lightning.loggers.wandb.wandb') as wandb: + wandb.run = None + wandb.init().step = 0 _test_loggers_fit_test(tmpdir, WandbLogger) @@ -368,5 +370,7 @@ def test_logger_with_prefix_all(tmpdir, monkeypatch): # WandB with mock.patch('pytorch_lightning.loggers.wandb.wandb'): logger = _instantiate_logger(WandbLogger, save_idr=tmpdir, prefix=prefix) + wandb.run = None + wandb.init().step = 0 logger.log_metrics({"test": 1.0}, step=0) logger.experiment.log.assert_called_once_with({'tmp-test': 1.0}, step=0) diff --git a/tests/loggers/test_wandb.py b/tests/loggers/test_wandb.py index fa503f5d8eeb1..398ee45ef4aa0 100644 --- a/tests/loggers/test_wandb.py +++ b/tests/loggers/test_wandb.py @@ -22,8 +22,14 @@ from tests.base import EvalModelTemplate +def get_warnings(recwarn): + warnings_text = '\n'.join(str(w.message) for w in recwarn.list) + recwarn.clear() + return warnings_text + + @mock.patch('pytorch_lightning.loggers.wandb.wandb') -def test_wandb_logger_init(wandb): +def test_wandb_logger_init(wandb, recwarn): """Verify that basic functionality of wandb logger works. Wandb doesn't work well with pytest so we have to mock it out here.""" @@ -34,6 +40,9 @@ def test_wandb_logger_init(wandb): wandb.init.assert_called_once() wandb.init().log.assert_called_once_with({'acc': 1.0}, step=None) + # mock wandb step + wandb.init().step = 0 + # test wandb.init not called if there is a W&B run wandb.init().log.reset_mock() wandb.init.reset_mock() @@ -49,15 +58,28 @@ def test_wandb_logger_init(wandb): logger.log_metrics({'acc': 1.0}, step=3) wandb.init().log.assert_called_with({'acc': 1.0}, step=6) + # log hyper parameters logger.log_hyperparams({'test': None, 'nested': {'a': 1}, 'b': [2, 3, 4]}) wandb.init().config.update.assert_called_once_with( {'test': 'None', 'nested/a': 1, 'b': [2, 3, 4]}, allow_val_change=True, ) + # watch a model logger.watch('model', 'log', 10) wandb.init().watch.assert_called_once_with('model', log='log', log_freq=10) + # verify warning for logging at a previous step + assert 'Trying to log at a previous step' not in get_warnings(recwarn) + # current step from wandb should be 6 (last logged step) + logger.experiment.step = 6 + # logging at step 2 should raise a warning (step_offset is still 3) + logger.log_metrics({'acc': 1.0}, step=2) + assert 'Trying to log at a previous step' in get_warnings(recwarn) + # logging again at step 2 should not display again the same warning + logger.log_metrics({'acc': 1.0}, step=2) + assert 'Trying to log at a previous step' not in get_warnings(recwarn) + assert logger.name == wandb.init().project_name() assert logger.version == wandb.init().id @@ -71,6 +93,7 @@ def test_wandb_pickle(wandb, tmpdir): class Experiment: """ """ id = 'the_id' + step = 0 def project_name(self): return 'the_project_name' @@ -108,8 +131,11 @@ def test_wandb_logger_dirs_creation(wandb, tmpdir): assert logger.name is None # mock return values of experiment + wandb.run = None + wandb.init().step = 0 logger.experiment.id = '1' logger.experiment.project_name.return_value = 'project' + logger.experiment.step = 0 for _ in range(2): _ = logger.experiment From 3b0197fce5cc1d2d62247b43122de402681561cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 20 Dec 2020 01:32:17 +0100 Subject: [PATCH 26/37] reduce verbosity level in drone ci (#5190) * reduce verbosity level in drone * verbosity --- .drone.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.drone.yml b/.drone.yml index 1041ebdf872c8..b0b6c3df1b699 100644 --- a/.drone.yml +++ b/.drone.yml @@ -33,8 +33,8 @@ steps: - python --version - pip --version - nvidia-smi - - pip install -r ./requirements/devel.txt --upgrade-strategy only-if-needed -v --no-cache-dir - - pip install git+https://${AUTH_TOKEN}@github.com/PyTorchLightning/lightning-dtrun.git@v0.0.2 -v --no-cache-dir + - pip install -r ./requirements/devel.txt --upgrade-strategy only-if-needed --no-cache-dir + - pip install git+https://${AUTH_TOKEN}@github.com/PyTorchLightning/lightning-dtrun.git@v0.0.2 --no-cache-dir # when Image has defined CUDa version we can switch to this package spec "nvidia-dali-cuda${CUDA_VERSION%%.*}0" - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda100 --upgrade-strategy only-if-needed - pip list From cd83829aa0f05bd305ee6ae22b41902ce4e6bb7a Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Sun, 20 Dec 2020 13:20:45 +0530 Subject: [PATCH 27/37] Remove Sourcerer (#5172) * Remove Sourcerer * trigger Co-authored-by: Jirka Borovec --- README.md | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/README.md b/README.md index a5c6bbb244730..19182098bdb76 100644 --- a/README.md +++ b/README.md @@ -73,19 +73,6 @@ Lightning can automatically export to ONNX or TorchScript for those cases. --- -## Trending contributors - -[![](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/images/0)](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/links/0) -[![](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/images/1)](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/links/1) -[![](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/images/2)](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/links/2) -[![](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/images/3)](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/links/3) -[![](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/images/4)](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/links/4) -[![](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/images/5)](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/links/5) -[![](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/images/6)](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/links/6) -[![](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/images/7)](https://sourcerer.io/fame/williamFalcon/pytorchlightning/pytorch-lightning/links/7) - ---- - ## Continuous Integration
From cc14fc16bf127735af23ce4047fcfbdeba614be7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 20 Dec 2020 17:11:58 +0100 Subject: [PATCH 28/37] skip multi-gpu test when running on single-gpu machine (#5186) * skip test * Apply suggestions from code review Co-authored-by: Rohit Gupta Co-authored-by: Nicki Skafte --- tests/models/test_gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index cd61da7c008bc..169552ce1bd75 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -47,7 +47,7 @@ def test_multi_gpu_none_backend(tmpdir): tpipes.run_model_test(trainer_options, model) -@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") +@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @pytest.mark.parametrize('gpus', [1, [0], [1]]) def test_single_gpu_model(tmpdir, gpus): """Make sure single GPU works (DP mode).""" From fd5322d3e723cec0bc40cf29b7fe17f7ed632e69 Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Mon, 21 Dec 2020 06:30:17 +0530 Subject: [PATCH 29/37] Update warning if ckpt directory is not empty (#5209) --- pytorch_lightning/callbacks/model_checkpoint.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py index cc5252961d757..24e518fb7aa0a 100644 --- a/pytorch_lightning/callbacks/model_checkpoint.py +++ b/pytorch_lightning/callbacks/model_checkpoint.py @@ -20,11 +20,11 @@ """ +from copy import deepcopy import numbers import os -import re -from copy import deepcopy from pathlib import Path +import re from typing import Any, Dict, Optional, Union import numpy as np @@ -271,8 +271,7 @@ def __init_ckpt_dir(self, dirpath, filename, save_top_k): and len(self._fs.ls(dirpath)) > 0 ): rank_zero_warn( - f"Checkpoint directory {dirpath} exists and is not empty. With save_top_k={save_top_k}," - " all files in this directory will be deleted when a checkpoint is saved!" + f"Checkpoint directory {dirpath} exists and is not empty." ) if dirpath and self._fs.protocol == 'file': From 12d6437f65651b2fcafaf32bbcd5c548eb725e22 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 21 Dec 2020 07:35:01 +0100 Subject: [PATCH 30/37] add make cmd - clean (#5204) Co-authored-by: Roger Shieh --- .update.sh | 17 ----------------- Makefile | 6 +++++- 2 files changed, 5 insertions(+), 18 deletions(-) delete mode 100644 .update.sh diff --git a/.update.sh b/.update.sh deleted file mode 100644 index 40fcc22d6b79b..0000000000000 --- a/.update.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -version=$1 - -git commit -am "release v$version" -git tag $version -m "test_tube v$version" -git push --tags origin master - -# push to pypi -rm -rf ./dist/* -python3 setup.py sdist -twine upload dist/* - -# to update docs -# cd to root dir -# mkdocs gh-deploy - diff --git a/Makefile b/Makefile index 76e8bac4e3748..55a95f0b14af2 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: test +.PHONY: test clean test: # install APEX, see https://github.com/NVIDIA/apex#linux @@ -13,3 +13,7 @@ test: # specific file # python -m coverage run --source pytorch_lightning -m py.test --flake8 --durations=0 -v -k + +clean: + # clean all temp runs + rm -rf $(shell find . -name "mlruns" ) From 2438d7459b108a4eda127cb6915ec170fd35044a Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 21 Dec 2020 12:04:39 +0100 Subject: [PATCH 31/37] add doctests for example 2/n segmentation (#5083) * draft * fix * drop folder Co-authored-by: chaton --- .../domain_templates/semantic_segmentation.py | 36 +++++++++++++++++++ pl_examples/pytorch_ecosystem/__init__.py | 13 ------- 2 files changed, 36 insertions(+), 13 deletions(-) delete mode 100644 pl_examples/pytorch_ecosystem/__init__.py diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index 7bcad597a9a68..2e718a37ac4b0 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -32,6 +32,19 @@ DEFAULT_VALID_LABELS = (7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33) +def _create_synth_kitti_dataset(path_dir: str, image_dims: tuple = (1024, 512)): + """Create synthetic dataset with random images, just to simulate that the dataset have been already downloaded.""" + path_dir_images = os.path.join(path_dir, KITTI.IMAGE_PATH) + path_dir_masks = os.path.join(path_dir, KITTI.MASK_PATH) + for p_dir in (path_dir_images, path_dir_masks): + os.makedirs(p_dir, exist_ok=True) + for i in range(3): + path_img = os.path.join(path_dir_images, f'dummy_kitti_{i}.png') + Image.new('RGB', image_dims).save(path_img) + path_mask = os.path.join(path_dir_masks, f'dummy_kitti_{i}.png') + Image.new('L', image_dims).save(path_mask) + + class KITTI(Dataset): """ Class for KITTI Semantic Segmentation Benchmark dataset @@ -53,6 +66,12 @@ class KITTI(Dataset): In the `get_item` function, images and masks are resized to the given `img_size`, masks are encoded using `encode_segmap`, and given `transform` (if any) are applied to the image only (mask does not usually require transforms, but they can be implemented in a similar way). + + >>> from pl_examples import DATASETS_PATH + >>> dataset_path = os.path.join(DATASETS_PATH, "Kitti") + >>> _create_synth_kitti_dataset(dataset_path, image_dims=(1024, 512)) + >>> KITTI(dataset_path, 'train') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + <...semantic_segmentation.KITTI object at ...> """ IMAGE_PATH = os.path.join('training', 'image_2') MASK_PATH = os.path.join('training', 'semantic') @@ -141,6 +160,23 @@ class SegModel(pl.LightningModule): It uses the FCN ResNet50 model as an example. Adam optimizer is used along with Cosine Annealing learning rate scheduler. + + >>> from pl_examples import DATASETS_PATH + >>> dataset_path = os.path.join(DATASETS_PATH, "Kitti") + >>> _create_synth_kitti_dataset(dataset_path, image_dims=(1024, 512)) + >>> SegModel(dataset_path) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + SegModel( + (net): UNet( + (layers): ModuleList( + (0): DoubleConv(...) + (1): Down(...) + (2): Down(...) + (3): Up(...) + (4): Up(...) + (5): Conv2d(64, 19, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + ) """ def __init__( self, diff --git a/pl_examples/pytorch_ecosystem/__init__.py b/pl_examples/pytorch_ecosystem/__init__.py deleted file mode 100644 index d7aa17d7f8468..0000000000000 --- a/pl_examples/pytorch_ecosystem/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. From 64f9b4d113fc078503d09af577197e6382c6147c Mon Sep 17 00:00:00 2001 From: William Falcon Date: Mon, 21 Dec 2020 19:04:48 -0500 Subject: [PATCH 32/37] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 19182098bdb76..649d3a86dd09c 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,11 @@ Scale your models, not the boilerplate.** --- +## NEWS +[Dec 2020 - Read about how Facebook uses Lightning to standardize across teams for research and production](https://ai.facebook.com/blog/reengineering-facebook-ais-deep-learning-platforms-for-interoperability) + +--- + ## PyTorch Lightning is just organized PyTorch Lightning disentangles PyTorch code to decouple the science from the engineering. ![PT to PL](docs/source/_images/general/pl_quick_start_full_compressed.gif) From 2ddd36bcd74e44914f7ff587696d349c6a5dd850 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Mon, 21 Dec 2020 19:05:18 -0500 Subject: [PATCH 33/37] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 649d3a86dd09c..84d9571395519 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Scale your models, not the boilerplate.** --- ## NEWS -[Dec 2020 - Read about how Facebook uses Lightning to standardize across teams for research and production](https://ai.facebook.com/blog/reengineering-facebook-ais-deep-learning-platforms-for-interoperability) +[Dec 2020 - Read about how Facebook uses Lightning to standardize deep learning across research and production teams](https://ai.facebook.com/blog/reengineering-facebook-ais-deep-learning-platforms-for-interoperability) --- From 1c8ad3a94bce2d0b5e62b5e5a4395bb6fb25642d Mon Sep 17 00:00:00 2001 From: Alan Du Date: Wed, 23 Dec 2020 02:05:55 -0500 Subject: [PATCH 34/37] Tighten up mypy config (#5237) --- setup.cfg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/setup.cfg b/setup.cfg index 4475fb11266d0..7b685fb8dc0e5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -99,6 +99,10 @@ max-line-length = 120 files = pytorch_lightning, pl_examples, benchmarks, tests disallow_untyped_defs = True ignore_missing_imports = True +show_error_codes = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True # todo: add proper typing to this module... [mypy-pytorch_lightning.callbacks.*] From 365b9b561c6c4a1121129acea3735a47a373bc98 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Wed, 23 Dec 2020 10:22:11 +0100 Subject: [PATCH 35/37] update for v1.1.2 (#5240) --- CHANGELOG.md | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c4951bafa6d4..7cf51d80afd5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,12 +56,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `DDPHPCAccelerator` hangs in DDP construction by calling `init_device` ([#5157](https://github.com/PyTorchLightning/pytorch-lightning/pull/5157)) -## [1.1.2rc1] - 2020-12-17 +## [1.1.2] - 2020-12-23 ### Added - Support number for logging with `sync_dist=True` ([#5080](https://github.com/PyTorchLightning/pytorch-lightning/pull/5080) - +- Added offset logging step when resuming for Wandb logger ([#5050](https://github.com/PyTorchLightning/pytorch-lightning/pull/5050) ### Changed @@ -73,25 +73,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - `enable_pl_optimizer=False` by default to temporarily fix AMP issues ([#5163](https://github.com/PyTorchLightning/pytorch-lightning/pull/5163) - ### Fixed - Metric reduction with Logging ([#5150](https://github.com/PyTorchLightning/pytorch-lightning/pull/5150) - - - Remove nan loss in manual optimization ([#5121](https://github.com/PyTorchLightning/pytorch-lightning/pull/5121) - - - Un-balanced logging properly supported ([#5119](https://github.com/PyTorchLightning/pytorch-lightning/pull/5119) - - - Fix hanging in DDP HPC accelerators ([#5157](https://github.com/PyTorchLightning/pytorch-lightning/pull/5157) - - - Fix saved filename in `ModelCheckpoint` if it already exists ([#4861](https://github.com/PyTorchLightning/pytorch-lightning/pull/4861) - - - Fix reset `TensorRunningAccum` ([#5106](https://github.com/PyTorchLightning/pytorch-lightning/pull/5106) +- Updated `DALIClassificationLoader` to not use deprecated arguments ([#4925](https://github.com/PyTorchLightning/pytorch-lightning/pull/4925) +- Corrected call to `torch.no_grad` ([#5124](https://github.com/PyTorchLightning/pytorch-lightning/pull/5124) ## [1.1.1] - 2020-12-15 From 74d065216451cbce74525d4c996441bf3255b132 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 24 Dec 2020 00:11:42 +0100 Subject: [PATCH 36/37] flake8 ++ --- CHANGELOG.md | 6 ------ pl_examples/__init__.py | 10 +++++----- pl_examples/basic_examples/autoencoder.py | 4 ++-- .../basic_examples/backbone_image_classifier.py | 8 ++++---- .../basic_examples/dali_image_classifier.py | 8 ++++---- pl_examples/basic_examples/mnist_datamodule.py | 8 ++++---- .../domain_templates/semantic_segmentation.py | 8 ++++---- pl_examples/test_examples.py | 4 ++-- pytorch_lightning/__init__.py | 4 ++-- pytorch_lightning/loggers/wandb.py | 4 +++- pytorch_lightning/setup_tools.py | 6 +++--- tests/__init__.py | 14 +++++++------- tests/base/datasets.py | 4 ++-- tests/base/develop_utils.py | 4 ++-- tests/loggers/test_all.py | 2 +- tests/models/test_horovod.py | 12 ++++++------ .../logging_tests/test_train_loop_logging_1_0.py | 1 - tests/trainer/test_supporters.py | 1 - 18 files changed, 51 insertions(+), 57 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cf51d80afd5f..04cc5a71d728f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -63,12 +63,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Support number for logging with `sync_dist=True` ([#5080](https://github.com/PyTorchLightning/pytorch-lightning/pull/5080) - Added offset logging step when resuming for Wandb logger ([#5050](https://github.com/PyTorchLightning/pytorch-lightning/pull/5050) -### Changed - - -### Deprecated - - ### Removed - `enable_pl_optimizer=False` by default to temporarily fix AMP issues ([#5163](https://github.com/PyTorchLightning/pytorch-lightning/pull/5163) diff --git a/pl_examples/__init__.py b/pl_examples/__init__.py index 147fc330ecd59..860d8b48d4393 100644 --- a/pl_examples/__init__.py +++ b/pl_examples/__init__.py @@ -2,12 +2,12 @@ from pytorch_lightning.utilities import _module_available -EXAMPLES_ROOT = os.path.dirname(__file__) -PACKAGE_ROOT = os.path.dirname(EXAMPLES_ROOT) -DATASETS_PATH = os.path.join(PACKAGE_ROOT, 'Datasets') +_EXAMPLES_ROOT = os.path.dirname(__file__) +_PACKAGE_ROOT = os.path.dirname(_EXAMPLES_ROOT) +_DATASETS_PATH = os.path.join(_PACKAGE_ROOT, 'Datasets') -TORCHVISION_AVAILABLE = _module_available("torchvision") -DALI_AVAILABLE = _module_available("nvidia.dali") +_TORCHVISION_AVAILABLE = _module_available("torchvision") +_DALI_AVAILABLE = _module_available("nvidia.dali") LIGHTNING_LOGO = """ diff --git a/pl_examples/basic_examples/autoencoder.py b/pl_examples/basic_examples/autoencoder.py index 91f7ac0a1569d..eb540d16bf2cf 100644 --- a/pl_examples/basic_examples/autoencoder.py +++ b/pl_examples/basic_examples/autoencoder.py @@ -21,9 +21,9 @@ from torch.utils.data import random_split import pytorch_lightning as pl -from pl_examples import TORCHVISION_AVAILABLE, cli_lightning_logo +from pl_examples import _TORCHVISION_AVAILABLE, cli_lightning_logo -if TORCHVISION_AVAILABLE: +if _TORCHVISION_AVAILABLE: from torchvision.datasets.mnist import MNIST from torchvision import transforms else: diff --git a/pl_examples/basic_examples/backbone_image_classifier.py b/pl_examples/basic_examples/backbone_image_classifier.py index bb1daad301d08..63517dfc9ed08 100644 --- a/pl_examples/basic_examples/backbone_image_classifier.py +++ b/pl_examples/basic_examples/backbone_image_classifier.py @@ -19,9 +19,9 @@ from torch.utils.data import DataLoader, random_split import pytorch_lightning as pl -from pl_examples import DATASETS_PATH, TORCHVISION_AVAILABLE, cli_lightning_logo +from pl_examples import _DATASETS_PATH, _TORCHVISION_AVAILABLE, cli_lightning_logo -if TORCHVISION_AVAILABLE: +if _TORCHVISION_AVAILABLE: from torchvision.datasets.mnist import MNIST from torchvision import transforms else: @@ -111,8 +111,8 @@ def cli_main(): # ------------ # data # ------------ - dataset = MNIST(DATASETS_PATH, train=True, download=True, transform=transforms.ToTensor()) - mnist_test = MNIST(DATASETS_PATH, train=False, download=True, transform=transforms.ToTensor()) + dataset = MNIST(_DATASETS_PATH, train=True, download=True, transform=transforms.ToTensor()) + mnist_test = MNIST(_DATASETS_PATH, train=False, download=True, transform=transforms.ToTensor()) mnist_train, mnist_val = random_split(dataset, [55000, 5000]) train_loader = DataLoader(mnist_train, batch_size=args.batch_size) diff --git a/pl_examples/basic_examples/dali_image_classifier.py b/pl_examples/basic_examples/dali_image_classifier.py index d22ff171c9b20..e163cb4a6f156 100644 --- a/pl_examples/basic_examples/dali_image_classifier.py +++ b/pl_examples/basic_examples/dali_image_classifier.py @@ -23,15 +23,15 @@ from torch.utils.data import random_split import pytorch_lightning as pl -from pl_examples import TORCHVISION_AVAILABLE, DALI_AVAILABLE, cli_lightning_logo +from pl_examples import _TORCHVISION_AVAILABLE, _DALI_AVAILABLE, cli_lightning_logo -if TORCHVISION_AVAILABLE: +if _TORCHVISION_AVAILABLE: from torchvision.datasets.mnist import MNIST from torchvision import transforms else: from tests.base.datasets import MNIST -if DALI_AVAILABLE: +if _DALI_AVAILABLE: from nvidia.dali import ops from nvidia.dali.pipeline import Pipeline from nvidia.dali.plugin.pytorch import DALIClassificationIterator @@ -166,7 +166,7 @@ def add_model_specific_args(parent_parser): def cli_main(): - if not DALI_AVAILABLE: + if not _DALI_AVAILABLE: return pl.seed_everything(1234) diff --git a/pl_examples/basic_examples/mnist_datamodule.py b/pl_examples/basic_examples/mnist_datamodule.py index 95e20d22e1fdd..6c33e4f1b77d9 100644 --- a/pl_examples/basic_examples/mnist_datamodule.py +++ b/pl_examples/basic_examples/mnist_datamodule.py @@ -16,10 +16,10 @@ from torch.utils.data import DataLoader, random_split -from pl_examples import DATASETS_PATH, TORCHVISION_AVAILABLE +from pl_examples import _DATASETS_PATH, _TORCHVISION_AVAILABLE from pytorch_lightning import LightningDataModule -if TORCHVISION_AVAILABLE: +if _TORCHVISION_AVAILABLE: from torchvision import transforms as transform_lib from torchvision.datasets import MNIST else: @@ -38,7 +38,7 @@ class MNISTDataModule(LightningDataModule): def __init__( self, - data_dir: str = DATASETS_PATH, + data_dir: str = _DATASETS_PATH, val_split: int = 5000, num_workers: int = 16, normalize: bool = False, @@ -123,7 +123,7 @@ def test_dataloader(self): @property def default_transforms(self): - if not TORCHVISION_AVAILABLE: + if not _TORCHVISION_AVAILABLE: return None if self.normalize: mnist_transforms = transform_lib.Compose( diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index 2e718a37ac4b0..507efc78e0f8d 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -67,8 +67,8 @@ class KITTI(Dataset): encoded using `encode_segmap`, and given `transform` (if any) are applied to the image only (mask does not usually require transforms, but they can be implemented in a similar way). - >>> from pl_examples import DATASETS_PATH - >>> dataset_path = os.path.join(DATASETS_PATH, "Kitti") + >>> from pl_examples import _DATASETS_PATH + >>> dataset_path = os.path.join(_DATASETS_PATH, "Kitti") >>> _create_synth_kitti_dataset(dataset_path, image_dims=(1024, 512)) >>> KITTI(dataset_path, 'train') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE <...semantic_segmentation.KITTI object at ...> @@ -161,8 +161,8 @@ class SegModel(pl.LightningModule): Adam optimizer is used along with Cosine Annealing learning rate scheduler. - >>> from pl_examples import DATASETS_PATH - >>> dataset_path = os.path.join(DATASETS_PATH, "Kitti") + >>> from pl_examples import _DATASETS_PATH + >>> dataset_path = os.path.join(_DATASETS_PATH, "Kitti") >>> _create_synth_kitti_dataset(dataset_path, image_dims=(1024, 512)) >>> SegModel(dataset_path) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE SegModel( diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index 91145c5bd0d0b..5f92399671b37 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -19,7 +19,7 @@ import pytest import torch -from pl_examples import DALI_AVAILABLE +from pl_examples import _DALI_AVAILABLE ARGS_DEFAULT = """ --default_root_dir %(tmpdir)s \ @@ -104,7 +104,7 @@ def test_examples_cpu(tmpdir, import_cli, cli_args): module.cli_main() -@pytest.mark.skipif(not DALI_AVAILABLE, reason="Nvidia DALI required") +@pytest.mark.skipif(not _DALI_AVAILABLE, reason="Nvidia DALI required") @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") @pytest.mark.skipif(platform.system() != 'Linux', reason='Only applies to Linux platform.') @pytest.mark.parametrize('cli_args', [ARGS_GPU]) diff --git a/pytorch_lightning/__init__.py b/pytorch_lightning/__init__.py index 595750af0709a..890db586b2084 100644 --- a/pytorch_lightning/__init__.py +++ b/pytorch_lightning/__init__.py @@ -40,8 +40,8 @@ _logger.addHandler(python_logging.StreamHandler()) _logger.setLevel(python_logging.INFO) -PACKAGE_ROOT = os.path.dirname(__file__) -PROJECT_ROOT = os.path.dirname(PACKAGE_ROOT) +_PACKAGE_ROOT = os.path.dirname(__file__) +_PROJECT_ROOT = os.path.dirname(_PACKAGE_ROOT) try: # This variable is injected in the __builtins__ by the build diff --git a/pytorch_lightning/loggers/wandb.py b/pytorch_lightning/loggers/wandb.py index 8e5311b11dcb1..0d147adee5ed4 100644 --- a/pytorch_lightning/loggers/wandb.py +++ b/pytorch_lightning/loggers/wandb.py @@ -162,7 +162,9 @@ def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> metrics = self._add_prefix(metrics) if step is not None and step + self._step_offset < self.experiment.step: - self.warning_cache.warn('Trying to log at a previous step. Use `commit=False` when logging metrics manually.') + self.warning_cache.warn( + 'Trying to log at a previous step. Use `commit=False` when logging metrics manually.' + ) self.experiment.log(metrics, step=(step + self._step_offset) if step is not None else None) @property diff --git a/pytorch_lightning/setup_tools.py b/pytorch_lightning/setup_tools.py index e04c4ceec56ef..07f5545df8a54 100644 --- a/pytorch_lightning/setup_tools.py +++ b/pytorch_lightning/setup_tools.py @@ -19,7 +19,7 @@ from urllib.request import Request, urlopen import warnings -from pytorch_lightning import __homepage__, __version__, PROJECT_ROOT +from pytorch_lightning import __homepage__, __version__, _PROJECT_ROOT _PATH_BADGES = os.path.join('.', 'docs', 'source', '_images', 'badges') # badge to download @@ -37,7 +37,7 @@ def _load_requirements(path_dir: str , file_name: str = 'requirements.txt', comment_char: str = '#') -> List[str]: """Load requirements from a file - >>> _load_requirements(PROJECT_ROOT) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + >>> _load_requirements(_PROJECT_ROOT) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE ['numpy...', 'torch...', ...] """ with open(os.path.join(path_dir, file_name), 'r') as file: @@ -155,7 +155,7 @@ def _download_badge(url_badge: str, badge_name: str, target_dir: str) -> str: def _load_long_description(path_dir: str) -> str: """Load readme as decribtion - >>> _load_long_description(PROJECT_ROOT) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + >>> _load_long_description(_PROJECT_ROOT) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE '
...' """ path_readme = os.path.join(path_dir, "README.md") diff --git a/tests/__init__.py b/tests/__init__.py index 1bb81c466e6eb..e0ec83a2efbca 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -15,17 +15,17 @@ import numpy as np -TEST_ROOT = os.path.dirname(__file__) -PROJECT_ROOT = os.path.dirname(TEST_ROOT) -TEMP_PATH = os.path.join(PROJECT_ROOT, 'test_temp') +_TEST_ROOT = os.path.dirname(__file__) +_PROJECT_ROOT = os.path.dirname(_TEST_ROOT) +_TEMP_PATH = os.path.join(_PROJECT_ROOT, 'test_temp') # todo: this setting `PYTHONPATH` may not be used by other evns like Conda for import packages -if PROJECT_ROOT not in os.getenv('PYTHONPATH', ""): +if _PROJECT_ROOT not in os.getenv('PYTHONPATH', ""): splitter = ":" if os.environ.get("PYTHONPATH", "") else "" - os.environ['PYTHONPATH'] = f'{PROJECT_ROOT}{splitter}{os.environ.get("PYTHONPATH", "")}' + os.environ['PYTHONPATH'] = f'{_PROJECT_ROOT}{splitter}{os.environ.get("PYTHONPATH", "")}' # generate a list of random seeds for each test RANDOM_PORTS = list(np.random.randint(12000, 19000, 1000)) -if not os.path.isdir(TEMP_PATH): - os.mkdir(TEMP_PATH) +if not os.path.isdir(_TEMP_PATH): + os.mkdir(_TEMP_PATH) diff --git a/tests/base/datasets.py b/tests/base/datasets.py index 067f2717730bc..33d3801c432ab 100644 --- a/tests/base/datasets.py +++ b/tests/base/datasets.py @@ -22,10 +22,10 @@ from torch import Tensor from torch.utils.data import Dataset -from tests import PROJECT_ROOT +from tests import _PROJECT_ROOT #: local path to test datasets -PATH_DATASETS = os.path.join(PROJECT_ROOT, 'Datasets') +PATH_DATASETS = os.path.join(_PROJECT_ROOT, 'Datasets') class MNIST(Dataset): diff --git a/tests/base/develop_utils.py b/tests/base/develop_utils.py index 3db8eb022288a..6eb19d3c4b1e4 100644 --- a/tests/base/develop_utils.py +++ b/tests/base/develop_utils.py @@ -19,7 +19,7 @@ from pytorch_lightning import seed_everything from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.loggers import TensorBoardLogger, TestTubeLogger -from tests import TEMP_PATH, RANDOM_PORTS +from tests import _TEMP_PATH, RANDOM_PORTS from tests.base.model_template import EvalModelTemplate @@ -63,7 +63,7 @@ def get_data_path(expt_logger, path_dir=None): if hasattr(expt_logger, 'save_dir') and expt_logger.save_dir: path_dir = expt_logger.save_dir else: - path_dir = TEMP_PATH + path_dir = _TEMP_PATH path_expt = os.path.join(path_dir, name, 'version_%s' % version) # try if the new sub-folder exists, typical case for test-tube diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py index 945d8945a22c2..0aec3b22f74a9 100644 --- a/tests/loggers/test_all.py +++ b/tests/loggers/test_all.py @@ -368,7 +368,7 @@ def test_logger_with_prefix_all(tmpdir, monkeypatch): logger.experiment.log.assert_called_once_with({"tmp-test": 1.0}, global_step=0) # WandB - with mock.patch('pytorch_lightning.loggers.wandb.wandb'): + with mock.patch('pytorch_lightning.loggers.wandb.wandb') as wandb: logger = _instantiate_logger(WandbLogger, save_idr=tmpdir, prefix=prefix) wandb.run = None wandb.init().step = 0 diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py index 3a2ae8750443f..2f11c7df5f26f 100644 --- a/tests/models/test_horovod.py +++ b/tests/models/test_horovod.py @@ -44,9 +44,9 @@ from horovod.common.util import nccl_built nccl_built() except (ImportError, ModuleNotFoundError, AttributeError): - HOROVOD_NCCL_AVAILABLE = False + _HOROVOD_NCCL_AVAILABLE = False finally: - HOROVOD_NCCL_AVAILABLE = True + _HOROVOD_NCCL_AVAILABLE = True def _run_horovod(trainer_options, on_gpu=False): @@ -105,7 +105,7 @@ def test_horovod_cpu_implicit(enable_pl_optimizer, tmpdir): @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") -@pytest.mark.skipif(not HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") +@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_horovod_multi_gpu(tmpdir): """Test Horovod with multi-GPU support.""" @@ -125,7 +125,7 @@ def test_horovod_multi_gpu(tmpdir): @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") -@pytest.mark.skipif(not HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") +@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @pytest.mark.skipif(not _APEX_AVAILABLE, reason="test requires apex") def test_horovod_apex(tmpdir): @@ -149,7 +149,7 @@ def test_horovod_apex(tmpdir): @pytest.mark.skip(reason="Skip till Horovod fixes integration with Native torch.cuda.amp") @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") -@pytest.mark.skipif(not HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") +@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @pytest.mark.skipif(not _NATIVE_AMP_AVAILABLE, reason="test requires torch.cuda.amp") def test_horovod_amp(tmpdir): @@ -172,7 +172,7 @@ def test_horovod_amp(tmpdir): @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") -@pytest.mark.skipif(not HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") +@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") def test_horovod_transfer_batch_to_gpu(tmpdir): class TestTrainingStepModel(EvalModelTemplate): diff --git a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py index 61ed45fa254fa..b7d43886924f6 100644 --- a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py +++ b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py @@ -18,7 +18,6 @@ import collections import itertools import os -import platform from unittest import mock import numpy as np diff --git a/tests/trainer/test_supporters.py b/tests/trainer/test_supporters.py index b1b0db749ef8d..1a1203e8f2dd6 100644 --- a/tests/trainer/test_supporters.py +++ b/tests/trainer/test_supporters.py @@ -22,7 +22,6 @@ from pytorch_lightning.utilities.exceptions import MisconfigurationException - def test_tensor_running_accum_reset(): """ Test that reset would set all attributes to the initialization state """ From dfbb592566cf674ed48eed2a2ec30976ebb469d4 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 28 Dec 2020 13:49:17 +0100 Subject: [PATCH 37/37] fix test - reduce metric --- .../logging_tests/test_train_loop_logging_1_0.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py index b7d43886924f6..f418db2bd72a5 100644 --- a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py +++ b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py @@ -863,19 +863,18 @@ def test_metric_are_properly_reduced(tmpdir): class TestingModel(BoringModel): def __init__(self, *args, **kwargs): super().__init__() - self.train_acc = pl.metrics.Accuracy() self.val_acc = pl.metrics.Accuracy() def training_step(self, batch, batch_idx): - self.train_acc(torch.rand(1, 3, device=self.device), torch.randint(0, 2, (1,), device=self.device)) - self.log('train_acc', self.train_acc, on_step=True, on_epoch=True) - return super().training_step(batch, batch_idx) + output = super().training_step(batch, batch_idx) + self.log("train_loss", output["loss"]) + return output def validation_step(self, batch, batch_idx): - preds = torch.tensor(0, device=self.device) - targets = torch.tensor(1, device=self.device) + preds = torch.tensor([[0.9, 0.1]], device=self.device) + targets = torch.tensor([1], device=self.device) if batch_idx < 8: - targets = preds + preds = torch.tensor([[0.1, 0.9]], device=self.device) self.val_acc(preds, targets) self.log('val_acc', self.val_acc, on_step=True, on_epoch=True) return super().validation_step(batch, batch_idx) @@ -900,4 +899,4 @@ def validation_step(self, batch, batch_idx): trainer.fit(model) assert trainer.callback_metrics["val_acc"] == 8 / 32. - assert "train_acc" in trainer.callback_metrics + assert "train_loss" in trainer.callback_metrics