From f5f229d2c7dbec9f146961d9409f7d5dbadefe91 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 17:07:55 +0100 Subject: [PATCH 01/19] resolve urgent bug --- docs/source/optimizers.rst | 19 +++------- pytorch_lightning/__init__.py | 2 +- pytorch_lightning/core/lightning.py | 4 +- pytorch_lightning/core/optimizer.py | 22 +++++++++++ pytorch_lightning/trainer/training_loop.py | 14 +++---- setup.py | 2 +- tests/core/test_lightning_module.py | 43 ++++++++++++++++++++++ tests/core/test_lightning_optimizer.py | 9 ++++- 8 files changed, 87 insertions(+), 28 deletions(-) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 6ca72b8069d6d..a26793e004ea2 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -193,35 +193,27 @@ For example, here step optimizer A every 2 batches and optimizer B every 4 batch .. testcode:: - def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, second_order_closure=None, on_tpu=False, using_native_amp=False, using_lbfgs=False): - optimizer.step() - def optimizer_zero_grad(self, current_epoch, batch_idx, optimizer, opt_idx): optimizer.zero_grad() # Alternating schedule for optimizer steps (ie: GANs) - def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, second_order_closure=None, on_tpu=False, using_native_amp=False, using_lbfgs=False): + def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False): # update generator opt every 2 steps if optimizer_i == 0: if batch_nb % 2 == 0 : - optimizer.step() - optimizer.zero_grad() + optimizer.step(closure=closure) # update discriminator opt every 4 steps if optimizer_i == 1: if batch_nb % 4 == 0 : - optimizer.step() - optimizer.zero_grad() - - # ... - # add as many optimizers as you want + optimizer.step(closure=closure) Here we add a learning-rate warm up .. testcode:: # learning rate warm-up - def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, second_order_closure=None, on_tpu=False, using_native_amp=False, using_lbfgs=False): + def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False): # warm up lr if self.trainer.global_step < 500: lr_scale = min(1., float(self.trainer.global_step + 1) / 500.) @@ -229,8 +221,7 @@ Here we add a learning-rate warm up pg['lr'] = lr_scale * self.hparams.learning_rate # update params - optimizer.step() - optimizer.zero_grad() + optimizer.step(closure=closure) ---------- diff --git a/pytorch_lightning/__init__.py b/pytorch_lightning/__init__.py index d6c5139cb3799..2376a6b717158 100644 --- a/pytorch_lightning/__init__.py +++ b/pytorch_lightning/__init__.py @@ -1,6 +1,6 @@ """Root package info.""" -__version__ = '1.1.0' +__version__ = "20201211" __author__ = 'William Falcon et al.' __author_email__ = 'waf2107@columbia.edu' __license__ = 'Apache-2.0' diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index f29e7f75bfbff..ef05ce69c1828 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1170,7 +1170,6 @@ def toggle_optimizer(self, optimizer: Optimizer, optimizer_idx: int): def optimizer_step( self, - *args, epoch: int = None, batch_idx: int = None, optimizer: Optimizer = None, @@ -1179,7 +1178,6 @@ def optimizer_step( on_tpu: bool = None, using_native_amp: bool = None, using_lbfgs: bool = None, - **kwargs, ) -> None: r""" Override this method to adjust the default way the @@ -1254,7 +1252,7 @@ def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, if not isinstance(optimizer, LightningOptimizer): # wraps into LightingOptimizer only for running step optimizer = LightningOptimizer.to_lightning_optimizer(optimizer, self.trainer) - optimizer.step(closure=optimizer_closure, *args, **kwargs) + optimizer.step(closure=optimizer_closure) def optimizer_zero_grad( self, epoch: int, batch_idx: int, optimizer: Optimizer, optimizer_idx: int diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index e6b973b336e43..64ad5d4a5f2c2 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -49,6 +49,12 @@ def __init__(self, ) self.__dict__ = {k: v for k, v in optimizer.__dict__.items() if k != 'step'} + self.state_dict = optimizer.state_dict + self.load_state_dict = optimizer.load_state_dict + self.zero_grad = optimizer.zero_grad + self.add_param_group = optimizer.add_param_group + self.__setstate__ = optimizer.__setstate__ + self.__getstate__ = optimizer.__getstate__ # For Horovod if hasattr(optimizer, "skip_synchronize"): @@ -64,6 +70,22 @@ def __init__(self, self._support_closure = 'closure' in inspect.signature(optimizer.step).parameters self._optimizer_idx = None + @property + def state(self): + return self._optimizer.state + + @state.setter + def state(self, state): + self._optimizer.state = state + + @property + def param_groups(self): + return self._optimizer.param_groups + + @param_groups.setter + def param_groups(self, param_groups): + self._optimizer.param_groups = param_groups + @property def accumulate_grad_batches(self): return self._accumulate_grad_batches diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 20dfb0f4b380f..68a0f4781c9a9 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -477,7 +477,7 @@ def _process_result(self, training_step_output, split_batch): return training_step_output_for_epoch_end - def optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_closure, *args, **kwargs): + def optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_closure): model_ref = self.trainer.get_model() is_lbfgs = isinstance(optimizer, torch.optim.LBFGS) @@ -491,16 +491,14 @@ def optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_ # model hook model_ref.optimizer_step( - epoch=self.trainer.current_epoch, - batch_idx=batch_idx, - optimizer=optimizer, - optimizer_idx=opt_idx, - optimizer_closure=train_step_and_backward_closure, + self.trainer.current_epoch, + batch_idx, + optimizer, + opt_idx, + train_step_and_backward_closure, on_tpu=self.trainer.use_tpu and TPU_AVAILABLE, using_native_amp=using_native_amp, using_lbfgs=is_lbfgs, - *args, - **kwargs, ) def on_before_zero_grad(self, optimizer): diff --git a/setup.py b/setup.py index c548d508ab434..6b68d5524167d 100755 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ # the goal of the project is simplicity for researchers, don't want to add too much # engineer specific practices setup( - name="pytorch-lightning", + name="pytorch-lightning-nightly", version=pytorch_lightning.__version__, description=pytorch_lightning.__docs__, author=pytorch_lightning.__author__, diff --git a/tests/core/test_lightning_module.py b/tests/core/test_lightning_module.py index a7054a3a7ef49..9866455659a2f 100644 --- a/tests/core/test_lightning_module.py +++ b/tests/core/test_lightning_module.py @@ -103,3 +103,46 @@ def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, assert sgd_zero_grad.call_count == 4 assert adam_step.call_count == 2 assert adam_zero_grad.call_count == 2 + + +@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) +def test_params_groups_and_state_are_accessible(enable_pl_optimizer, tmpdir): + + with patch("torch.optim.SGD.step") as sgd_step, \ + patch("torch.optim.SGD.zero_grad") as sgd_zero_grad, \ + patch("torch.optim.Adam.step") as adam_step, \ + patch("torch.optim.Adam.zero_grad") as adam_zero_grad: + + class TestModel(BoringModel): + + def training_step(self, batch, batch_idx, optimizer_idx): + output = self.layer(batch) + loss = self.loss(batch, output) + return {"loss": loss} + + def configure_optimizers(self): + optimizer = SGD(self.layer.parameters(), lr=0.1) + optimizer_2 = Adam(self.layer.parameters(), lr=0.1) + return [optimizer, optimizer_2] + + def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False): + # warm up lr + if self.trainer.global_step < 500: + lr_scale = min(1., float(self.trainer.global_step + 1) / 500.) + for pg in optimizer.param_groups: + pg['lr'] = lr_scale * 0.01 + + optimizer.step(closure=closure) + + model = TestModel() + model.training_epoch_end = None + + trainer = Trainer( + max_epochs=1, + default_root_dir=tmpdir, + limit_train_batches=8, + accumulate_grad_batches=1, + enable_pl_optimizer=enable_pl_optimizer + ) + + trainer.fit(model) \ No newline at end of file diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 16963a2af3c0d..a914d4618973f 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -193,12 +193,19 @@ def test_state(tmpdir): model = torch.nn.Linear(3, 4) optimizer = torch.optim.Adam(model.parameters()) lightning_optimizer = LightningOptimizer(optimizer) + assert optimize.state == lightning_optimizer.state + lightning_optimizer.state = optimize.state + assert optimize.state == lightning_optimizer.state + assert optimize.params_group == lightning_optimizer.params_group + lightning_optimizer.params_group = optimize.params_group + assert optimize.params_group == lightning_optimizer.params_group assert isinstance(lightning_optimizer, LightningOptimizer) assert isinstance(lightning_optimizer, Adam) assert isinstance(lightning_optimizer, Optimizer) lightning_dict = {} special_attrs = ["_accumulate_grad_batches", "_optimizer", "_optimizer_idx", "_support_closure", - "_trainer"] + "_trainer", "__getstate__", "__setstate__", "state_dict", "load_state_dict", + "zero_grad", "__setstate__", "add_param_group"] for k, v in lightning_optimizer.__dict__.items(): if k not in special_attrs: lightning_dict[k] = v From 2ee80cd59b2f246ac101f3a9596cbd6232278d7c Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 17:12:35 +0100 Subject: [PATCH 02/19] update pr --- tests/core/test_lightning_optimizer.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index a914d4618973f..035def8b52df0 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -193,18 +193,18 @@ def test_state(tmpdir): model = torch.nn.Linear(3, 4) optimizer = torch.optim.Adam(model.parameters()) lightning_optimizer = LightningOptimizer(optimizer) - assert optimize.state == lightning_optimizer.state - lightning_optimizer.state = optimize.state - assert optimize.state == lightning_optimizer.state - assert optimize.params_group == lightning_optimizer.params_group - lightning_optimizer.params_group = optimize.params_group - assert optimize.params_group == lightning_optimizer.params_group + assert optimizer.state == lightning_optimizer.state + lightning_optimizer.state = optimizer.state + assert optimizer.state == lightning_optimizer.state + assert optimizer.params_group == lightning_optimizer.params_group + lightning_optimizer.params_group = optimizer.params_group + assert optimizer.params_group == lightning_optimizer.params_group assert isinstance(lightning_optimizer, LightningOptimizer) assert isinstance(lightning_optimizer, Adam) assert isinstance(lightning_optimizer, Optimizer) lightning_dict = {} special_attrs = ["_accumulate_grad_batches", "_optimizer", "_optimizer_idx", "_support_closure", - "_trainer", "__getstate__", "__setstate__", "state_dict", "load_state_dict", + "_trainer", "__getstate__", "__setstate__", "state_dict", "load_state_dict", "zero_grad", "__setstate__", "add_param_group"] for k, v in lightning_optimizer.__dict__.items(): if k not in special_attrs: From 07dda64d5a53063cf5ba9a83c9fc8ea8ef29df65 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 17:17:08 +0100 Subject: [PATCH 03/19] update doc --- docs/source/optimizers.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index a26793e004ea2..57c343ae92b3b 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -191,6 +191,8 @@ override the :meth:`optimizer_step` function. For example, here step optimizer A every 2 batches and optimizer B every 4 batches +.. note:: When using Trainer(enable_pl_optimizer=True), no need to call `.zero_grad`. + .. testcode:: def optimizer_zero_grad(self, current_epoch, batch_idx, optimizer, opt_idx): @@ -208,6 +210,22 @@ For example, here step optimizer A every 2 batches and optimizer B every 4 batch if batch_nb % 4 == 0 : optimizer.step(closure=closure) + +.. testcode:: + + def optimizer_zero_grad(self, current_epoch, batch_idx, optimizer, opt_idx): + optimizer.zero_grad() + + # Alternating schedule for optimizer steps (ie: GANs) + def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False): + # update generator opt every 2 steps + if optimizer_i == 0: + optimizer.step(closure=closure, make_optimizer_step=batch_nb % 2 == 0) + + # update discriminator opt every 4 steps + if optimizer_i == 1: + optimizer.step(closure=closure, make_optimizer_step=batch_nb % 4 == 0) + Here we add a learning-rate warm up .. testcode:: From ef63aae68638bd0fcd04b0c5f8c47ba42d9e133b Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 17:23:50 +0100 Subject: [PATCH 04/19] update --- pytorch_lightning/core/optimizer.py | 4 +++- tests/core/test_lightning_module.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index 64ad5d4a5f2c2..e52c6621d7b67 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -49,6 +49,8 @@ def __init__(self, ) self.__dict__ = {k: v for k, v in optimizer.__dict__.items() if k != 'step'} + + # save functions and attributes from optimizer self.state_dict = optimizer.state_dict self.load_state_dict = optimizer.load_state_dict self.zero_grad = optimizer.zero_grad @@ -73,7 +75,7 @@ def __init__(self, @property def state(self): return self._optimizer.state - + @state.setter def state(self, state): self._optimizer.state = state diff --git a/tests/core/test_lightning_module.py b/tests/core/test_lightning_module.py index 9866455659a2f..e3a597063d02e 100644 --- a/tests/core/test_lightning_module.py +++ b/tests/core/test_lightning_module.py @@ -125,7 +125,8 @@ def configure_optimizers(self): optimizer_2 = Adam(self.layer.parameters(), lr=0.1) return [optimizer, optimizer_2] - def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False): + def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, + on_tpu=False, using_native_amp=False, using_lbfgs=False): # warm up lr if self.trainer.global_step < 500: lr_scale = min(1., float(self.trainer.global_step + 1) / 500.) @@ -145,4 +146,4 @@ def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, clos enable_pl_optimizer=enable_pl_optimizer ) - trainer.fit(model) \ No newline at end of file + trainer.fit(model) From c7b8e1b03e3a02ea955c64f14b6e963e03168788 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 17:27:29 +0100 Subject: [PATCH 05/19] remove typo --- tests/core/test_lightning_optimizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 035def8b52df0..341aec3367b4b 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -196,9 +196,9 @@ def test_state(tmpdir): assert optimizer.state == lightning_optimizer.state lightning_optimizer.state = optimizer.state assert optimizer.state == lightning_optimizer.state - assert optimizer.params_group == lightning_optimizer.params_group - lightning_optimizer.params_group = optimizer.params_group - assert optimizer.params_group == lightning_optimizer.params_group + assert optimizer.param_groups == lightning_optimizer.param_groups + lightning_optimizer.param_groups = optimizer.param_groups + assert optimizer.param_groups == lightning_optimizer.param_groups assert isinstance(lightning_optimizer, LightningOptimizer) assert isinstance(lightning_optimizer, Adam) assert isinstance(lightning_optimizer, Optimizer) From a0028a26894232ac92d81f0b51f7eb3e5609a15b Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 17:33:50 +0100 Subject: [PATCH 06/19] add defaults --- pytorch_lightning/core/optimizer.py | 10 +++++++++- tests/core/test_lightning_optimizer.py | 10 ++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index e52c6621d7b67..660cf8c327baa 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -66,12 +66,20 @@ def __init__(self, else: self.__class__ = type("Lightning" + optimizer.__class__.__name__, (self.__class__, optimizer.__class__), {}) - self._trainer = None self._optimizer = optimizer + self._trainer = None self._accumulate_grad_batches = accumulate_grad_batches self._support_closure = 'closure' in inspect.signature(optimizer.step).parameters self._optimizer_idx = None + @property + def defaults(self): + return self._optimizer.defaults + + @defaults.setter + def defaults(self, defaults): + self._optimizer.defaults = defaults + @property def state(self): return self._optimizer.state diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 341aec3367b4b..1c951f0889afe 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -193,12 +193,22 @@ def test_state(tmpdir): model = torch.nn.Linear(3, 4) optimizer = torch.optim.Adam(model.parameters()) lightning_optimizer = LightningOptimizer(optimizer) + + # test state assert optimizer.state == lightning_optimizer.state lightning_optimizer.state = optimizer.state assert optimizer.state == lightning_optimizer.state + + # test param_groups assert optimizer.param_groups == lightning_optimizer.param_groups lightning_optimizer.param_groups = optimizer.param_groups assert optimizer.param_groups == lightning_optimizer.param_groups + + # test defaults + assert optimizer.defaults == lightning_optimizer.defaults + lightning_optimizer.defaults = optimizer.defaults + assert optimizer.defaults == lightning_optimizer.defaults + assert isinstance(lightning_optimizer, LightningOptimizer) assert isinstance(lightning_optimizer, Adam) assert isinstance(lightning_optimizer, Optimizer) From b9e237b17d4233859f1600c626b853d5d8f4d164 Mon Sep 17 00:00:00 2001 From: chaton Date: Fri, 11 Dec 2020 17:46:30 +0100 Subject: [PATCH 07/19] Update pytorch_lightning/__init__.py --- pytorch_lightning/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/__init__.py b/pytorch_lightning/__init__.py index 2376a6b717158..d6c5139cb3799 100644 --- a/pytorch_lightning/__init__.py +++ b/pytorch_lightning/__init__.py @@ -1,6 +1,6 @@ """Root package info.""" -__version__ = "20201211" +__version__ = '1.1.0' __author__ = 'William Falcon et al.' __author_email__ = 'waf2107@columbia.edu' __license__ = 'Apache-2.0' From 4eaad2be6a2e677d9b191b7fec318c2fee11d6c2 Mon Sep 17 00:00:00 2001 From: chaton Date: Fri, 11 Dec 2020 17:46:37 +0100 Subject: [PATCH 08/19] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6b68d5524167d..c548d508ab434 100755 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ # the goal of the project is simplicity for researchers, don't want to add too much # engineer specific practices setup( - name="pytorch-lightning-nightly", + name="pytorch-lightning", version=pytorch_lightning.__version__, description=pytorch_lightning.__docs__, author=pytorch_lightning.__author__, From 54a6aee5934c66f6d1bf68531a76e4e8b8cb8b31 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 17:55:59 +0100 Subject: [PATCH 09/19] update doc --- docs/source/optimizers.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 57c343ae92b3b..2b79f7f62cac9 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -241,6 +241,19 @@ Here we add a learning-rate warm up # update params optimizer.step(closure=closure) +The default ``optimizer_step`` is relying on the internal ``LightningOptimizer`` to properly perform a step. + +.. testcode:: + + from pytorch_lightning.core.optimizer import LightningOptimizer + + # function hook in LightningModule + def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False): + if not isinstance(optimizer, LightningOptimizer): + # wraps into LightingOptimizer only for running step + optimizer = LightningOptimizer.to_lightning_optimizer(optimizer, self.trainer) + optimizer.step(closure=closure) + ---------- Using the closure functions for optimization From ed73b2e8497676f30ec456fe2211557c92e821f0 Mon Sep 17 00:00:00 2001 From: chaton Date: Fri, 11 Dec 2020 18:24:03 +0100 Subject: [PATCH 10/19] Update docs/source/optimizers.rst Co-authored-by: Jirka Borovec --- docs/source/optimizers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 2b79f7f62cac9..368fd46c8e611 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -191,7 +191,7 @@ override the :meth:`optimizer_step` function. For example, here step optimizer A every 2 batches and optimizer B every 4 batches -.. note:: When using Trainer(enable_pl_optimizer=True), no need to call `.zero_grad`. +.. note:: When using ``Trainer(enable_pl_optimizer=True)``, no need to call ``.zero_grad(...)``. .. testcode:: From 94698ea89cc79d738d31fc42bad8fb11245b1226 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 18:34:32 +0100 Subject: [PATCH 11/19] update --- docs/source/optimizers.rst | 4 ++- pytorch_lightning/core/optimizer.py | 8 +---- tests/core/test_lightning_optimizer.py | 49 -------------------------- 3 files changed, 4 insertions(+), 57 deletions(-) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 368fd46c8e611..eb1e8053e453a 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -191,7 +191,7 @@ override the :meth:`optimizer_step` function. For example, here step optimizer A every 2 batches and optimizer B every 4 batches -.. note:: When using ``Trainer(enable_pl_optimizer=True)``, no need to call ``.zero_grad(...)``. +.. note:: When using Trainer(enable_pl_optimizer=True), no need to call `.zero_grad`. .. testcode:: @@ -210,6 +210,8 @@ For example, here step optimizer A every 2 batches and optimizer B every 4 batch if batch_nb % 4 == 0 : optimizer.step(closure=closure) +.. note:: When using ``Trainer(enable_pl_optimizer=True)``, ``.step`` accept a boolean ``make_optimizer_step`` which +can be used as below. .. testcode:: diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index 660cf8c327baa..21c5bde4c57a8 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import inspect import types from typing import Any, Callable, Optional from weakref import proxy @@ -69,7 +68,6 @@ def __init__(self, self._optimizer = optimizer self._trainer = None self._accumulate_grad_batches = accumulate_grad_batches - self._support_closure = 'closure' in inspect.signature(optimizer.step).parameters self._optimizer_idx = None @property @@ -143,11 +141,7 @@ def __optimizer_step(self, *args, closure: Optional[Callable] = None, profiler_n else: with trainer.profiler.profile(profiler_name): - if self._support_closure: - optimizer.step(closure=closure, *args, **kwargs) - else: - closure() - optimizer.step(*args, **kwargs) + optimizer.step(closure=closure, *args, **kwargs) accelerator_backend = trainer.accelerator_backend if accelerator_backend is not None and accelerator_backend.rpc_enabled: diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 1c951f0889afe..a9fcf918cc699 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -224,55 +224,6 @@ def test_state(tmpdir): assert optimizer.state == lightning_optimizer.state -def test_lightning_optimizer_with_wrong_optimizer_interface(tmpdir): - class OptimizerWrapper(object): - def __init__(self, optimizer): - self.optim = optimizer - self.state_dict = self.optim.state_dict - self.load_state_dict = self.optim.load_state_dict - self.zero_grad = self.optim.zero_grad - self.add_param_group = self.optim.add_param_group - self.__setstate__ = self.optim.__setstate__ - self.__getstate__ = self.optim.__getstate__ - self.__repr__ = self.optim.__repr__ - - @property - def __class__(self): - return Optimizer - - @property - def state(self): - return self.optim.state - - @property - def param_groups(self): - return self.optim.param_groups - - @param_groups.setter - def param_groups(self, value): - self.optim.param_groups = value - - def step(self): - # wrongly defined step. Should contain closure - self.optim.step(closure=None) - - class TestLightningOptimizerModel(BoringModel): - - def configure_optimizers(self): - optimizer = torch.optim.Adam(self.parameters(), lr=0.1) - optimizer = OptimizerWrapper(optimizer) - return [optimizer] - - model = TestLightningOptimizerModel() - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - weights_summary=None, - log_every_n_steps=1, - ) - trainer.fit(model) - - def test_lightning_optimizer_automatic_optimization(tmpdir): """ Test lightning optimize works with make_optimizer_step in automatic_optimization From 53f62e49c0281c3cb01c6ede5aae605a6977d863 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 18:38:22 +0100 Subject: [PATCH 12/19] resolve doc --- docs/source/optimizers.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index eb1e8053e453a..7d83c2176bd64 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -210,8 +210,7 @@ For example, here step optimizer A every 2 batches and optimizer B every 4 batch if batch_nb % 4 == 0 : optimizer.step(closure=closure) -.. note:: When using ``Trainer(enable_pl_optimizer=True)``, ``.step`` accept a boolean ``make_optimizer_step`` which -can be used as below. +.. note:: When using ``Trainer(enable_pl_optimizer=True)``, ``.step`` accept a boolean ``make_optimizer_step`` which can be used as follow. .. testcode:: From 9e15cd8c7155da3c42cb50584686d8ed2f4d6e47 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 18:45:30 +0100 Subject: [PATCH 13/19] debug test --- tests/trainer/optimization/test_manual_optimization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/trainer/optimization/test_manual_optimization.py b/tests/trainer/optimization/test_manual_optimization.py index 9e369a874acd0..4f0950d1875b8 100644 --- a/tests/trainer/optimization/test_manual_optimization.py +++ b/tests/trainer/optimization/test_manual_optimization.py @@ -933,9 +933,9 @@ def automatic_optimization(self) -> bool: ) trainer.fit(model) - expected_calls = [call(optim='sgd') for s in range(4)] + expected_calls = [call(closure=ANY, optim='sgd') for s in range(4)] mock_sgd_step.assert_has_calls(expected_calls) - expected_calls = [call() for s in range(2)] + expected_calls = [call(closure=ANY) for s in range(2)] mock_adam_step.assert_has_calls(expected_calls) From c68b6041d945e733341ed54285ddccfa6d063e7a Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 18:47:33 +0100 Subject: [PATCH 14/19] update test --- tests/trainer/optimization/test_manual_optimization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/trainer/optimization/test_manual_optimization.py b/tests/trainer/optimization/test_manual_optimization.py index 4f0950d1875b8..33d14e852b285 100644 --- a/tests/trainer/optimization/test_manual_optimization.py +++ b/tests/trainer/optimization/test_manual_optimization.py @@ -855,7 +855,7 @@ def automatic_optimization(self) -> bool: ) trainer.fit(model) - expected_calls = [call() for s in range(2)] + expected_calls = [call(closure=ANY) for s in range(2)] step_mock.assert_has_calls(expected_calls) From 618c1d2a9f39f9e10f63c1bfef9a386db8dfadd3 Mon Sep 17 00:00:00 2001 From: chaton Date: Fri, 11 Dec 2020 18:48:24 +0100 Subject: [PATCH 15/19] Update docs/source/optimizers.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- docs/source/optimizers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 7d83c2176bd64..035228b5d04b0 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -191,7 +191,7 @@ override the :meth:`optimizer_step` function. For example, here step optimizer A every 2 batches and optimizer B every 4 batches -.. note:: When using Trainer(enable_pl_optimizer=True), no need to call `.zero_grad`. +.. note:: When using Trainer(enable_pl_optimizer=True), there is no need to call `.zero_grad()`. .. testcode:: From 34cb86b82b9e6618c8dec16ccbcfe16d5721f5d6 Mon Sep 17 00:00:00 2001 From: chaton Date: Fri, 11 Dec 2020 18:48:38 +0100 Subject: [PATCH 16/19] Update docs/source/optimizers.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- docs/source/optimizers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 035228b5d04b0..87b283a1bf2aa 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -221,7 +221,7 @@ For example, here step optimizer A every 2 batches and optimizer B every 4 batch def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False): # update generator opt every 2 steps if optimizer_i == 0: - optimizer.step(closure=closure, make_optimizer_step=batch_nb % 2 == 0) + optimizer.step(closure=closure, make_optimizer_step=(batch_nb % 2) == 0) # update discriminator opt every 4 steps if optimizer_i == 1: From 2fb3a361544b1c8c92cf6c2c9bed297e8b6d7b4a Mon Sep 17 00:00:00 2001 From: chaton Date: Fri, 11 Dec 2020 18:48:48 +0100 Subject: [PATCH 17/19] Update docs/source/optimizers.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- docs/source/optimizers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 87b283a1bf2aa..6bd7fe099fa97 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -225,7 +225,7 @@ For example, here step optimizer A every 2 batches and optimizer B every 4 batch # update discriminator opt every 4 steps if optimizer_i == 1: - optimizer.step(closure=closure, make_optimizer_step=batch_nb % 4 == 0) + optimizer.step(closure=closure, make_optimizer_step=(batch_nb % 4) == 0) Here we add a learning-rate warm up From 38bcfe1cfec636de6e333ab52617a1e5a1235b55 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 18:50:49 +0100 Subject: [PATCH 18/19] remove useless import --- pytorch_lightning/core/optimizer.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index 21c5bde4c57a8..c8e9ff8b80a2f 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -49,14 +49,6 @@ def __init__(self, self.__dict__ = {k: v for k, v in optimizer.__dict__.items() if k != 'step'} - # save functions and attributes from optimizer - self.state_dict = optimizer.state_dict - self.load_state_dict = optimizer.load_state_dict - self.zero_grad = optimizer.zero_grad - self.add_param_group = optimizer.add_param_group - self.__setstate__ = optimizer.__setstate__ - self.__getstate__ = optimizer.__getstate__ - # For Horovod if hasattr(optimizer, "skip_synchronize"): self.__class__ = type("Lightning" + optimizer.__class__.__name__, (self.__class__, optimizer.__class__.__bases__[0]), {}) From 51cabe5919d6173f16edbf0b7e208b096007fb84 Mon Sep 17 00:00:00 2001 From: chaton Date: Fri, 11 Dec 2020 18:56:26 +0100 Subject: [PATCH 19/19] Update docs/source/optimizers.rst --- docs/source/optimizers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 6bd7fe099fa97..06e6e9679d29f 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -210,7 +210,7 @@ For example, here step optimizer A every 2 batches and optimizer B every 4 batch if batch_nb % 4 == 0 : optimizer.step(closure=closure) -.. note:: When using ``Trainer(enable_pl_optimizer=True)``, ``.step`` accept a boolean ``make_optimizer_step`` which can be used as follow. +.. note:: When using ``Trainer(enable_pl_optimizer=True)``, ``.step`` accepts a boolean ``make_optimizer_step`` which can be used as follow. .. testcode::