From c22b93e0577137cce458417955e550985040c17a Mon Sep 17 00:00:00 2001 From: TOKUNAGA Hiroyuki Date: Thu, 15 Apr 2021 11:47:22 +0900 Subject: [PATCH 1/7] Fix the condition for calling update_learning_rates --- pytorch_lightning/trainer/training_loop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index f7da8e929e865..0302beec8f79e 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -563,7 +563,7 @@ def run_training_epoch(self): should_train_only = self.trainer.disable_validation or should_skip_eval # update epoch level lr_schedulers if no val loop outside train loop is triggered - if (val_loop_called and not should_check_val) or should_train_only: + if (not val_loop_called and not should_check_val) or should_train_only: self.trainer.optimizer_connector.update_learning_rates(interval='epoch') if should_train_only: From 82c17afa43b03b563b3f71b9bed80c6a0747a04f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 17 Apr 2021 21:47:38 +0200 Subject: [PATCH 2/7] add simple test --- tests/trainer/optimization/test_optimizers.py | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/tests/trainer/optimization/test_optimizers.py b/tests/trainer/optimization/test_optimizers.py index f5b2229f8a99e..ecd6aca6d41ed 100644 --- a/tests/trainer/optimization/test_optimizers.py +++ b/tests/trainer/optimization/test_optimizers.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock + import pytest import torch from torch import optim @@ -577,21 +579,20 @@ def configure_optimizers(self): trainer.fit(model) -class TestModel(BoringModel): - - def configure_optimizers(self): - # Adagrad creates state tensors immediately, model is not yet on GPU. - return optim.Adagrad(self.parameters()) - - def on_train_start(self, *args, **kwargs): - opt = self.optimizers() - _, state = next(iter(opt.state.items())) - assert state["sum"].device == torch.device("cuda", self.local_rank) == self.device - - @RunIf(min_gpus=2, special=True) def test_optimizer_state_on_device(tmpdir): """ Test that optimizers that create state initially at instantiation still end up with the state on the GPU. """ + class TestModel(BoringModel): + + def configure_optimizers(self): + # Adagrad creates state tensors immediately, model is not yet on GPU. + return optim.Adagrad(self.parameters()) + + def on_train_start(self, *args, **kwargs): + opt = self.optimizers() + _, state = next(iter(opt.state.items())) + assert state["sum"].device == torch.device("cuda", self.local_rank) == self.device + model = TestModel() trainer = Trainer( default_root_dir=tmpdir, @@ -600,3 +601,21 @@ def test_optimizer_state_on_device(tmpdir): fast_dev_run=True, ) trainer.fit(model) + + +@pytest.mark.parametrize("check_val_every_n_epoch", [1, 2]) +@mock.patch("torch.optim.lr_scheduler.StepLR.step") +def test_lr_scheduler_update_frequency(mocked_sched, check_val_every_n_epoch, tmpdir): + epochs = 4 + expected_steps = epochs + 1 # every LRScheduler gets called once at init + + model = BoringModel() + trainer = Trainer( + default_root_dir=tmpdir, + limit_train_batches=2, + limit_val_batches=2, + check_val_every_n_epoch=check_val_every_n_epoch, + max_epochs=epochs, + ) + trainer.fit(model) + assert mocked_sched.call_count == expected_steps From de1d885181c202b15a90579d18873f34ee020756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 17 Apr 2021 21:49:57 +0200 Subject: [PATCH 3/7] add changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 114f1af38f82c..93ba9cbb3f77d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -366,6 +366,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). * Remove hardcoding of local rank in accelerator connector ([#6878](https://github.com/PyTorchLightning/pytorch-lightning/pull/6878)) +- Fixed incorrect number of calls to LR scheduler when `check_val_every_n_epoch > 1` ([#7032](https://github.com/PyTorchLightning/pytorch-lightning/pull/7032)) + + ## [1.2.7] - 2021-04-06 ### Fixed From aab0046886f3fc6a00a16d34bcf9f584b1554bb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 17 Apr 2021 21:51:32 +0200 Subject: [PATCH 4/7] rename test --- tests/trainer/optimization/test_optimizers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/trainer/optimization/test_optimizers.py b/tests/trainer/optimization/test_optimizers.py index ecd6aca6d41ed..56337f4be130d 100644 --- a/tests/trainer/optimization/test_optimizers.py +++ b/tests/trainer/optimization/test_optimizers.py @@ -605,7 +605,7 @@ def on_train_start(self, *args, **kwargs): @pytest.mark.parametrize("check_val_every_n_epoch", [1, 2]) @mock.patch("torch.optim.lr_scheduler.StepLR.step") -def test_lr_scheduler_update_frequency(mocked_sched, check_val_every_n_epoch, tmpdir): +def test_lr_scheduler_epoch_step_frequency(mocked_sched, check_val_every_n_epoch, tmpdir): epochs = 4 expected_steps = epochs + 1 # every LRScheduler gets called once at init From de569b4836879bf19a14717469800aba60ad8782 Mon Sep 17 00:00:00 2001 From: TOKUNAGA Hiroyuki Date: Tue, 20 Apr 2021 23:52:53 +0900 Subject: [PATCH 5/7] fix calling condition for update_learning_rates --- pytorch_lightning/trainer/training_loop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 0302beec8f79e..c61f72631a202 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -563,7 +563,7 @@ def run_training_epoch(self): should_train_only = self.trainer.disable_validation or should_skip_eval # update epoch level lr_schedulers if no val loop outside train loop is triggered - if (not val_loop_called and not should_check_val) or should_train_only: + if not should_check_val or should_train_only: self.trainer.optimizer_connector.update_learning_rates(interval='epoch') if should_train_only: From 76cb58de7163be3d3b926ffef74b523c725440b6 Mon Sep 17 00:00:00 2001 From: TOKUNAGA Hiroyuki Date: Wed, 21 Apr 2021 00:03:26 +0900 Subject: [PATCH 6/7] remove unused variable val_loop_called --- pytorch_lightning/trainer/training_loop.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index c61f72631a202..cb02cd9df521e 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -472,7 +472,6 @@ def run_training_epoch(self): train_dataloader = self.trainer.data_connector.get_profiled_train_dataloader(train_dataloader) dataloader_idx = 0 - val_loop_called = False batch_idx = None is_last_batch = None @@ -514,7 +513,6 @@ def run_training_epoch(self): self.trainer.validating = True self.trainer._run_evaluation() self.trainer.training = True - val_loop_called = True # ----------------------------------------- # SAVE LOGGERS (ie: Tensorboard, etc...) From 6d8fff3445ad25e1f4285d57fc6012b74025c686 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 13 May 2021 19:09:50 +0000 Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/trainer/optimization/test_optimizers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/trainer/optimization/test_optimizers.py b/tests/trainer/optimization/test_optimizers.py index 56337f4be130d..a81e0eecf5c61 100644 --- a/tests/trainer/optimization/test_optimizers.py +++ b/tests/trainer/optimization/test_optimizers.py @@ -582,6 +582,7 @@ def configure_optimizers(self): @RunIf(min_gpus=2, special=True) def test_optimizer_state_on_device(tmpdir): """ Test that optimizers that create state initially at instantiation still end up with the state on the GPU. """ + class TestModel(BoringModel): def configure_optimizers(self):