Fix finetuning complex models correctly unfreezes. (#6880)

scart97 · SeanNaren · commit 9b6374f32286 · 2021-04-13T10:26:46.000+01:00
Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com> (cherry picked from commit eb15abc)
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,9 +13,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added more explicit exception message when trying to execute `trainer.test()` or `trainer.validate()` with `fast_dev_run=True` ([#6667](https://github.com/PyTorchLightning/pytorch-lightning/pull/6667))
 
 
+- Added `LightningCLI` class to provide simple reproducibility with minimum boilerplate training cli. ([#4492](https://github.com/PyTorchLightning/pytorch-lightning/pull/4492))
+
+
 - Trigger warning when non-metric logged value with multi processes hasn't been reduced ([#6417](https://github.com/PyTorchLightning/pytorch-lightning/pull/6417))
 
 
+- Added `gradient_clip_algorithm` argument to Trainer for gradient clipping by value ([#6123](https://github.com/PyTorchLightning/pytorch-lightning/pull/6123)).
+
+
 - Added a way to print to terminal without breaking up the progress bar ([#5470](https://github.com/PyTorchLightning/pytorch-lightning/pull/5470))
 
 
@@ -75,6 +81,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Added support for `precision=64`, enabling training with double precision ([#6595](https://github.com/PyTorchLightning/pytorch-lightning/pull/6595))
 
+- Added support for DDP communication hooks ([#6736](https://github.com/PyTorchLightning/pytorch-lightning/issues/6736))
 
 - Added `artifact_location` argument to `MLFlowLogger` which will be passed to the `MlflowClient.create_experiment` call ([#6677](https://github.com/PyTorchLightning/pytorch-lightning/pull/6677))
 
@@ -208,13 +215,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed torch distributed not available in setup hook for DDP ([#6506](https://github.com/PyTorchLightning/pytorch-lightning/pull/6506))
 
 
-- Fixed TPU Colab hang issue, post training ([#6816](https://github.com/PyTorchLightning/pytorch-lightning/pull/6816))
+- Fixed bug where `BaseFinetuning.flatten_modules()` was duplicating leaf node parameters ([#6879](https://github.com/PyTorchLightning/pytorch-lightning/pull/6879))
 
 
-- Enforce an epoch scheduler interval when using SWA ([#6588](https://github.com/PyTorchLightning/pytorch-lightning/pull/6588))
-
-
-- Fixed an issue with `IterableDataset` when `__len__` is not defined ([#6828](https://github.com/PyTorchLightning/pytorch-lightning/pull/6828))
+- Fixed `EarlyStopping` logic when `min_epochs` or `min_steps` requirement is not met ([#6705](https://github.com/PyTorchLightning/pytorch-lightning/pull/6705))
 
 
 ## [1.2.8] - 2021-04-13
@@ -343,6 +347,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed incorrect yield logic for the amp autocast context manager ([#6080](https://github.com/PyTorchLightning/pytorch-lightning/pull/6080))
 - Fixed priority of plugin/accelerator when setting distributed mode ([#6089](https://github.com/PyTorchLightning/pytorch-lightning/pull/6089))
 - Fixed error message for AMP + CPU incompatibility ([#6107](https://github.com/PyTorchLightning/pytorch-lightning/pull/6107))
+- Disabled batch transfer in DP mode ([#6093](https://github.com/PyTorchLightning/pytorch-lightning/pull/6093))
 
 
 ## [1.2.0] - 2021-02-18
diff --git a/pytorch_lightning/callbacks/finetuning.py b/pytorch_lightning/callbacks/finetuning.py
@@ -22,7 +22,6 @@
 import torch
 from torch.nn import Module
 from torch.nn.modules.batchnorm import _BatchNorm
-from torch.nn.modules.container import Container, ModuleDict, ModuleList, Sequential
 from torch.optim.optimizer import Optimizer
 
 from pytorch_lightning.callbacks.base import Callback
@@ -102,11 +101,8 @@ def flatten_modules(modules: Union[Module, Iterable[Union[Module, Iterable]]]) -
         else:
             _modules = modules.modules()
 
-        return list(
-            filter(
-                lambda m: not isinstance(m, (Container, Sequential, ModuleDict, ModuleList, LightningModule)), _modules
-            )
-        )
+        # Leaf nodes in the graph have no children, so we use that to filter
+        return [m for m in _modules if not list(m.children())]
 
     @staticmethod
     def filter_params(
diff --git a/tests/callbacks/test_finetuning_callback.py b/tests/callbacks/test_finetuning_callback.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from collections import OrderedDict
+
 import pytest
 import torch
 from torch import nn
@@ -244,3 +246,40 @@ def configure_optimizers(self):
 
     trainer = Trainer(default_root_dir=tmpdir, callbacks=[callback], fast_dev_run=True)
     trainer.fit(model)
+
+
+def test_deep_nested_model():
+
+    class ConvBlock(nn.Module):
+
+        def __init__(self, in_channels, out_channels):
+            super().__init__()
+            self.conv = nn.Conv2d(in_channels, out_channels, 3)
+            self.act = nn.ReLU()
+            self.bn = nn.BatchNorm2d(out_channels)
+
+        def forward(self, x):
+            x = self.conv(x)
+            x = self.act(x)
+            return self.bn(x)
+
+    model = nn.Sequential(
+        OrderedDict([
+            ("encoder", nn.Sequential(ConvBlock(3, 64), ConvBlock(64, 128))),
+            ("decoder", ConvBlock(128, 10)),
+        ])
+    )
+
+    # There's 9 leaf layers in that model
+    assert len(BaseFinetuning.flatten_modules(model)) == 9
+
+    BaseFinetuning.freeze(model.encoder, train_bn=True)
+    assert not model.encoder[0].conv.weight.requires_grad
+    assert model.encoder[0].bn.weight.requires_grad
+
+    BaseFinetuning.make_trainable(model)
+    encoder_params = list(BaseFinetuning.filter_params(model.encoder, train_bn=True))
+    # The 8 parameters of the encoder are:
+    # conv0.weight, conv0.bias, bn0.weight, bn0.bias
+    # conv1.weight, conv1.bias, bn1.weight, bn1.bias
+    assert len(encoder_params) == 8