From 2dc8df8fc6af38e9424984aa339564cba3902ea6 Mon Sep 17 00:00:00 2001 From: tchaton Date: Tue, 2 Mar 2021 17:11:47 +0000 Subject: [PATCH 1/5] update --- pytorch_lightning/core/optimizer.py | 2 +- tests/core/test_lightning_optimizer.py | 103 +++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index 8b6548f438756..162e17ca47bf5 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -38,7 +38,7 @@ class LightningOptimizer: def __init__(self, optimizer: Optimizer): - self.__dict__ = {k: v for k, v in optimizer.__dict__.items() if k != 'step'} + self.__dict__ = {k: v for k, v in optimizer.__dict__.items() if k not in ('step', "__del__")} # For Horovod if hasattr(optimizer, "skip_synchronize"): diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 4fc6a06157ab0..c8e095c68c170 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -11,12 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import gc from unittest.mock import DEFAULT, patch import torch from torch.optim import Adam, Optimizer, SGD +from torch.optim.optimizer import _RequiredParameter from pytorch_lightning import Trainer +from pytorch_lightning.callbacks import Callback from pytorch_lightning.core.optimizer import LightningOptimizer from tests.helpers.boring_model import BoringModel @@ -303,3 +306,103 @@ def configure_optimizers(self): lbfgs = model.optimizers() max_iter = lbfgs.param_groups[0]["max_iter"] assert zero_grad.call_count == max_iter + + +required = _RequiredParameter() + + +class OptimizerWithHooks(Optimizer): + + def __init__(self, model, lr=required, u0=required): + if lr is not required and lr < 0.0: + raise ValueError("Invalid learning rate: {}".format(lr)) + + defaults = dict(lr=lr) + self.steps = 0 + + self.params = [] + + self._fwd_handles = [] + self._bwd_handles = [] + + self.model = model + + for _, mod in model.named_modules(): # iterates over modules of model + mod_class = mod.__class__.__name__ + if mod_class in ['Linear']: # silently skips other layers + + # save the inputs and gradients for the kfac matrix computation + handle = mod.register_forward_pre_hook(self._save_input) # save the inputs + self._fwd_handles.append(handle) # collect forward-save-input hooks in list + handle = mod.register_backward_hook(self._save_grad_output) # save the gradients + self._bwd_handles.append(handle) # collect backward-save-grad hook in list + + # save the parameters + params = [mod.weight] + if mod.bias is not None: + params.append(mod.bias) + + # save a param_group for each module + d = {'params': params, 'mod': mod, 'layer_type': mod_class} + self.params.append(d) + + super(OptimizerWithHooks, self).__init__(self.params, defaults) + + def _save_input(self, mod, i): + """Saves input of layer""" + if mod.training: + self.state[mod]['x'] = i[0] + + def _save_grad_output(self, mod, grad_input, grad_output): + """ + Saves grad on output of layer to + grad is scaled with batch_size since gradient is spread over samples in mini batch + """ + bs = grad_output[0].shape[0] # batch_size + if mod.training: + self.state[mod]['grad'] = grad_output[0] * bs + + def step(self, closure=None): + closure() + for group in self.param_groups: + _ = self.state[group['mod']]['x'] + _ = self.state[group['mod']]['grad'] + return True + + +def test_lightning_optimizer_dont_delete_wrapped_optimizer(tmpdir): + """ + """ + + class TestCB(Callback): + + def __init__(self): + self.count_on_train_batch_start = 0 + self.count_on_train_batch_end = 0 + + def on_train_batch_start(self, trainer, pl_module, batch, batch_idx: int, dataloader_idx: int) -> None: + self.count_on_train_batch_start += 1 + optimizer = pl_module.optimizers(use_pl_optimizer=False) + assert len(optimizer._fwd_handles) == 1 + + def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx: int, dataloader_idx: int) -> None: + self.count_on_train_batch_end += 1 + # delete the lightning_optimizers + pl_module.trainer._lightning_optimizers = None + gc.collect() + + class TestModel(BoringModel): + + def configure_optimizers(self): + return OptimizerWithHooks(self) + + callback = TestCB() + + model = TestModel() + # Initialize a trainer + trainer = Trainer(limit_train_batches=4, limit_val_batches=1, max_steps=1, callbacks=callback) + + # Train the model ⚡ + trainer.fit(model) + assert callback.count_on_train_batch_start == 4 + assert callback.count_on_train_batch_end == 4 From 754a29d57d2de26b36302c7a682f2b09378e8374 Mon Sep 17 00:00:00 2001 From: tchaton Date: Tue, 2 Mar 2021 20:51:57 +0000 Subject: [PATCH 2/5] resolve bug --- tests/core/test_lightning_optimizer.py | 29 ++++++++++++-------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index c8e095c68c170..0fc9f82141849 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import gc +from typing import Any from unittest.mock import DEFAULT, patch import torch @@ -19,7 +20,6 @@ from torch.optim.optimizer import _RequiredParameter from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import Callback from pytorch_lightning.core.optimizer import LightningOptimizer from tests.helpers.boring_model import BoringModel @@ -374,35 +374,32 @@ def test_lightning_optimizer_dont_delete_wrapped_optimizer(tmpdir): """ """ - class TestCB(Callback): + class TestModel(BoringModel): def __init__(self): + super().__init__() self.count_on_train_batch_start = 0 self.count_on_train_batch_end = 0 - def on_train_batch_start(self, trainer, pl_module, batch, batch_idx: int, dataloader_idx: int) -> None: + def configure_optimizers(self): + return OptimizerWithHooks(self) + + def on_train_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None: self.count_on_train_batch_start += 1 - optimizer = pl_module.optimizers(use_pl_optimizer=False) + optimizer = self.optimizers(use_pl_optimizer=False) assert len(optimizer._fwd_handles) == 1 - def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx: int, dataloader_idx: int) -> None: + def on_train_batch_end(self, outputs: Any, batch: Any, batch_idx: int, dataloader_idx: int) -> None: self.count_on_train_batch_end += 1 # delete the lightning_optimizers - pl_module.trainer._lightning_optimizers = None + self.trainer._lightning_optimizers = None gc.collect() - class TestModel(BoringModel): - - def configure_optimizers(self): - return OptimizerWithHooks(self) - - callback = TestCB() - model = TestModel() # Initialize a trainer - trainer = Trainer(limit_train_batches=4, limit_val_batches=1, max_steps=1, callbacks=callback) + trainer = Trainer(limit_train_batches=4, limit_val_batches=1, max_epochs=1) # Train the model ⚡ trainer.fit(model) - assert callback.count_on_train_batch_start == 4 - assert callback.count_on_train_batch_end == 4 + assert model.count_on_train_batch_start == 4 + assert model.count_on_train_batch_end == 4 From 185272f2fe341ba12fd71989fb7efc82f146d167 Mon Sep 17 00:00:00 2001 From: thomas chaton Date: Wed, 3 Mar 2021 13:24:38 +0000 Subject: [PATCH 3/5] Update tests/core/test_lightning_optimizer.py Co-authored-by: Jirka Borovec --- tests/core/test_lightning_optimizer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 0fc9f82141849..d8e9feaeb4fbc 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -371,8 +371,6 @@ def step(self, closure=None): def test_lightning_optimizer_dont_delete_wrapped_optimizer(tmpdir): - """ - """ class TestModel(BoringModel): From d757effbbee293becd9c8a98be761c957567a579 Mon Sep 17 00:00:00 2001 From: thomas chaton Date: Wed, 3 Mar 2021 13:24:46 +0000 Subject: [PATCH 4/5] Update tests/core/test_lightning_optimizer.py Co-authored-by: Jirka Borovec --- tests/core/test_lightning_optimizer.py | 33 +++++++++++++------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index d8e9feaeb4fbc..8c7d473a76738 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -329,22 +329,23 @@ def __init__(self, model, lr=required, u0=required): for _, mod in model.named_modules(): # iterates over modules of model mod_class = mod.__class__.__name__ - if mod_class in ['Linear']: # silently skips other layers - - # save the inputs and gradients for the kfac matrix computation - handle = mod.register_forward_pre_hook(self._save_input) # save the inputs - self._fwd_handles.append(handle) # collect forward-save-input hooks in list - handle = mod.register_backward_hook(self._save_grad_output) # save the gradients - self._bwd_handles.append(handle) # collect backward-save-grad hook in list - - # save the parameters - params = [mod.weight] - if mod.bias is not None: - params.append(mod.bias) - - # save a param_group for each module - d = {'params': params, 'mod': mod, 'layer_type': mod_class} - self.params.append(d) + if mod_class not in ['Linear']: # silently skips other layers + continue + + # save the inputs and gradients for the kfac matrix computation + handle = mod.register_forward_pre_hook(self._save_input) # save the inputs + self._fwd_handles.append(handle) # collect forward-save-input hooks in list + handle = mod.register_backward_hook(self._save_grad_output) # save the gradients + self._bwd_handles.append(handle) # collect backward-save-grad hook in list + + # save the parameters + params = [mod.weight] + if mod.bias is not None: + params.append(mod.bias) + + # save a param_group for each module + d = {'params': params, 'mod': mod, 'layer_type': mod_class} + self.params.append(d) super(OptimizerWithHooks, self).__init__(self.params, defaults) From 44f2c3bc21de2442679bc495c8a48ba739d5e0fb Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Wed, 3 Mar 2021 14:50:19 +0100 Subject: [PATCH 5/5] Cleanup --- tests/core/test_lightning_optimizer.py | 50 ++++++++------------------ 1 file changed, 14 insertions(+), 36 deletions(-) diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 8c7d473a76738..3c6e34df8d5e3 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -17,7 +17,6 @@ import torch from torch.optim import Adam, Optimizer, SGD -from torch.optim.optimizer import _RequiredParameter from pytorch_lightning import Trainer from pytorch_lightning.core.optimizer import LightningOptimizer @@ -308,31 +307,17 @@ def configure_optimizers(self): assert zero_grad.call_count == max_iter -required = _RequiredParameter() - - class OptimizerWithHooks(Optimizer): - def __init__(self, model, lr=required, u0=required): - if lr is not required and lr < 0.0: - raise ValueError("Invalid learning rate: {}".format(lr)) - - defaults = dict(lr=lr) - self.steps = 0 - - self.params = [] - + def __init__(self, model): self._fwd_handles = [] self._bwd_handles = [] - - self.model = model - - for _, mod in model.named_modules(): # iterates over modules of model + self.params = [] + for _, mod in model.named_modules(): mod_class = mod.__class__.__name__ - if mod_class not in ['Linear']: # silently skips other layers + if mod_class != 'Linear': continue - # save the inputs and gradients for the kfac matrix computation handle = mod.register_forward_pre_hook(self._save_input) # save the inputs self._fwd_handles.append(handle) # collect forward-save-input hooks in list handle = mod.register_backward_hook(self._save_grad_output) # save the gradients @@ -347,21 +332,21 @@ def __init__(self, model, lr=required, u0=required): d = {'params': params, 'mod': mod, 'layer_type': mod_class} self.params.append(d) - super(OptimizerWithHooks, self).__init__(self.params, defaults) + super(OptimizerWithHooks, self).__init__(self.params, {"lr": 0.01}) def _save_input(self, mod, i): """Saves input of layer""" if mod.training: self.state[mod]['x'] = i[0] - def _save_grad_output(self, mod, grad_input, grad_output): + def _save_grad_output(self, mod, _, grad_output): """ Saves grad on output of layer to grad is scaled with batch_size since gradient is spread over samples in mini batch """ - bs = grad_output[0].shape[0] # batch_size + batch_size = grad_output[0].shape[0] if mod.training: - self.state[mod]['grad'] = grad_output[0] * bs + self.state[mod]['grad'] = grad_output[0] * batch_size def step(self, closure=None): closure() @@ -371,14 +356,11 @@ def step(self, closure=None): return True -def test_lightning_optimizer_dont_delete_wrapped_optimizer(tmpdir): +def test_lightning_optimizer_keeps_hooks(tmpdir): class TestModel(BoringModel): - - def __init__(self): - super().__init__() - self.count_on_train_batch_start = 0 - self.count_on_train_batch_end = 0 + count_on_train_batch_start = 0 + count_on_train_batch_end = 0 def configure_optimizers(self): return OptimizerWithHooks(self) @@ -390,15 +372,11 @@ def on_train_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) def on_train_batch_end(self, outputs: Any, batch: Any, batch_idx: int, dataloader_idx: int) -> None: self.count_on_train_batch_end += 1 - # delete the lightning_optimizers - self.trainer._lightning_optimizers = None - gc.collect() + del self.trainer._lightning_optimizers + gc.collect() # not necessary, just in case + trainer = Trainer(default_root_dir=tmpdir, limit_train_batches=4, limit_val_batches=1, max_epochs=1) model = TestModel() - # Initialize a trainer - trainer = Trainer(limit_train_batches=4, limit_val_batches=1, max_epochs=1) - - # Train the model ⚡ trainer.fit(model) assert model.count_on_train_batch_start == 4 assert model.count_on_train_batch_end == 4