Add ModelPruning(prune_on_train_epoch_end) to choose when to apply pruning (#7704)

carmocca · awaelchli · web-flow · commit d26953c8bc57 · 2021-05-26T00:57:56.000+02:00
Co-authored-by: Adrian Wälchli &lt;aedu.waelchli@gmail.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added LightningCLI support for config files on object stores ([#7521](https://github.com/PyTorchLightning/pytorch-lightning/pull/7521))
 
 
+- Added `ModelPruning(prune_on_train_epoch_end=True|False)` to choose when to apply pruning ([#7704](https://github.com/PyTorchLightning/pytorch-lightning/pull/7704))
+
+
 - Added support for checkpointing based on a provided time interval during training ([#7515](https://github.com/PyTorchLightning/pytorch-lightning/pull/7515))
 
 
diff --git a/pytorch_lightning/callbacks/pruning.py b/pytorch_lightning/callbacks/pruning.py
@@ -71,6 +71,7 @@ def __init__(
         pruning_dim: Optional[int] = None,
         pruning_norm: Optional[int] = None,
         verbose: int = 0,
+        prune_on_train_epoch_end: bool = True,
     ) -> None:
         """
         Model pruning Callback, using PyTorch's prune utilities.
@@ -141,6 +142,9 @@ def __init__(
 
             verbose: Verbosity level. 0 to disable, 1 to log overall sparsity, 2 to log per-layer sparsity
 
+            prune_on_train_epoch_end: whether to apply pruning at the end of the training epoch.
+                If this is ``False``, then the check runs at the end of the validation epoch.
+
         Raises:
             MisconfigurationException:
                 If ``parameter_names`` is neither ``"weight"`` nor ``"bias"``,
@@ -155,6 +159,7 @@ def __init__(
         self._parameters_to_prune = parameters_to_prune
         self._use_lottery_ticket_hypothesis = use_lottery_ticket_hypothesis
         self._resample_parameters = resample_parameters
+        self._prune_on_train_epoch_end = prune_on_train_epoch_end
         self._parameter_names = parameter_names or self.PARAMETER_NAMES
         self._global_kwargs: Dict[str, Any] = {}
         self._original_layers: Optional[Dict[int, _LayerRef]] = None
@@ -381,8 +386,7 @@ def on_before_accelerator_backend_setup(self, trainer: 'pl.Trainer', pl_module:
                 self._original_layers.setdefault(id_, _LayerRef(data=deepcopy(module), names=[]))
                 self._original_layers[id_]["names"].append((i, name))
 
-    def on_train_epoch_end(self, trainer: 'pl.Trainer', pl_module: LightningModule) -> None:  # type: ignore
-        current_epoch = pl_module.current_epoch
+    def _run_pruning(self, current_epoch: int) -> None:
         prune = self._apply_pruning(current_epoch) if callable(self._apply_pruning) else self._apply_pruning
         amount = self.amount(current_epoch) if callable(self.amount) else self.amount
         if not prune or not amount:
@@ -395,9 +399,19 @@ def on_train_epoch_end(self, trainer: 'pl.Trainer', pl_module: LightningModule)
         ):
             self.apply_lottery_ticket_hypothesis()
 
+    def on_train_epoch_end(self, trainer: 'pl.Trainer', pl_module: LightningModule) -> None:  # type: ignore
+        if self._prune_on_train_epoch_end:
+            rank_zero_debug("`ModelPruning.on_train_epoch_end`. Applying pruning")
+            self._run_pruning(pl_module.current_epoch)
+
+    def on_validation_epoch_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -> None:
+        if not trainer.sanity_checking and not self._prune_on_train_epoch_end:
+            rank_zero_debug("`ModelPruning.on_validation_epoch_end`. Applying pruning")
+            self._run_pruning(pl_module.current_epoch)
+
     def on_train_end(self, trainer: 'pl.Trainer', pl_module: LightningModule) -> None:
         if self._make_pruning_permanent:
-            rank_zero_debug("`ModelPruning.on_train_end`. Pruning is made permanent for this checkpoint.")
+            rank_zero_debug("`ModelPruning.on_train_end`. Pruning is made permanent for this checkpoint")
             self.make_pruning_permanent(pl_module)
 
     def on_save_checkpoint(
@@ -407,7 +421,7 @@ def on_save_checkpoint(
         checkpoint: Dict[str, Any],
     ) -> Dict[str, Any]:
         if self._make_pruning_permanent:
-            rank_zero_debug("`ModelPruning.on_save_checkpoint`. Pruning is made permanent for this checkpoint.")
+            rank_zero_debug("`ModelPruning.on_save_checkpoint`. Pruning is made permanent for this checkpoint")
             prev_device = pl_module.device
             # prune a copy so training can continue with the same buffers
             copy = deepcopy(pl_module.to("cpu"))
diff --git a/tests/callbacks/test_pruning.py b/tests/callbacks/test_pruning.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
 from collections import OrderedDict
 from logging import INFO
 from typing import Union
@@ -21,7 +22,7 @@
 from torch import nn
 from torch.nn import Sequential
 
-from pytorch_lightning import seed_everything, Trainer
+from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import ModelCheckpoint, ModelPruning
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel
@@ -224,7 +225,6 @@ def apply_lottery_ticket_hypothesis(self):
 
 @pytest.mark.parametrize("make_pruning_permanent", (False, True))
 def test_multiple_pruning_callbacks(tmpdir, caplog, make_pruning_permanent: bool):
-    seed_everything(0)
     model = TestModel()
     pruning_kwargs = {
         'parameters_to_prune': [(model.layer.mlp_1, "weight"), (model.layer.mlp_3, "weight")],
@@ -250,17 +250,20 @@ def test_multiple_pruning_callbacks(tmpdir, caplog, make_pruning_permanent: bool
 
     actual = [m.strip() for m in caplog.messages]
     actual = [m for m in actual if m.startswith("Applied")]
-    assert actual == [
-        "Applied `L1Unstructured`. Pruned: 0/1122 (0.00%) -> 544/1122 (48.48%)",
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 503 (49.12%)",  # noqa: E501
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 41 (64.06%)",  # noqa: E501
-        "Applied `RandomUnstructured`. Pruned: 544/1122 (48.48%) -> 680/1122 (60.61%)",
-        "Applied `RandomUnstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.25. Pruned: 503 (49.12%) -> 629 (61.43%)",  # noqa: E501
-        "Applied `RandomUnstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.25. Pruned: 41 (64.06%) -> 51 (79.69%)",  # noqa: E501
-        "Applied `L1Unstructured`. Pruned: 680/1122 (60.61%) -> 884/1122 (78.79%)",
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 629 (61.43%) -> 827 (80.76%)",  # noqa: E501
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 51 (79.69%) -> 57 (89.06%)",  # noqa: E501
+    percentage = r"\(\d+(?:\.\d+)?%\)"
+    expected = [
+        rf"Applied `L1Unstructured`. Pruned: \d+\/1122 {percentage} -> \d+\/1122 {percentage}",
+        rf"Applied `L1Unstructured` to `Linear\(in_features=32, out_features=32, bias=True\).weight` with amount=0.5. Pruned: 0 \(0.00%\) -> \d+ {percentage}",  # noqa: E501
+        rf"Applied `L1Unstructured` to `Linear\(in_features=32, out_features=2, bias=True\).weight` with amount=0.5. Pruned: 0 \(0.00%\) -> \d+ {percentage}",  # noqa: E501
+        rf"Applied `RandomUnstructured`. Pruned: \d+\/1122 {percentage} -> \d+\/1122 {percentage}",
+        rf"Applied `RandomUnstructured` to `Linear\(in_features=32, out_features=32, bias=True\).weight` with amount=0.25. Pruned: \d+ {percentage} -> \d+ {percentage}",  # noqa: E501
+        rf"Applied `RandomUnstructured` to `Linear\(in_features=32, out_features=2, bias=True\).weight` with amount=0.25. Pruned: \d+ {percentage} -> \d+ {percentage}",  # noqa: E501
+        rf"Applied `L1Unstructured`. Pruned: \d+\/1122 {percentage} -> \d+\/1122 {percentage}",
+        rf"Applied `L1Unstructured` to `Linear\(in_features=32, out_features=32, bias=True\).weight` with amount=0.5. Pruned: \d+ {percentage} -> \d+ {percentage}",  # noqa: E501
+        rf"Applied `L1Unstructured` to `Linear\(in_features=32, out_features=2, bias=True\).weight` with amount=0.5. Pruned: \d+ {percentage} -> \d+ {percentage}",  # noqa: E501
     ]
+    expected = [re.compile(s) for s in expected]
+    assert all(regex.match(s) for s, regex in zip(actual, expected))
 
     filepath = str(tmpdir / "foo.ckpt")
     trainer.save_checkpoint(filepath)
@@ -270,27 +273,31 @@ def test_multiple_pruning_callbacks(tmpdir, caplog, make_pruning_permanent: bool
     assert not has_pruning if make_pruning_permanent else has_pruning
 
 
-def test_permanent_when_model_is_saved_multiple_times(tmpdir, caplog):
+@pytest.mark.parametrize("on_train_epoch_end", (False, True))
+def test_permanent_when_model_is_saved_multiple_times(tmpdir, caplog, on_train_epoch_end):
     """
     When a model is saved multiple times and make_permanent=True, we need to
     make sure a copy is pruned and not the trained model if we want to continue
     with the same pruning buffers.
     """
-    seed_everything(0)
 
     class TestPruning(ModelPruning):
 
         def on_save_checkpoint(self, trainer, pl_module, checkpoint):
             super().on_save_checkpoint(trainer, pl_module, checkpoint)
-            assert "layer.mlp_3.weight_orig" not in checkpoint["state_dict"]
-            assert hasattr(pl_module.layer.mlp_3, "weight_orig")
+            if not on_train_epoch_end:
+                # these checks only work if pruning on `validation_epoch_end`
+                # because `on_save_checkpoint` is called before `on_train_epoch_end`
+                assert "layer.mlp_3.weight_orig" not in checkpoint["state_dict"]
+                assert hasattr(pl_module.layer.mlp_3, "weight_orig")
 
     model = TestModel()
     pruning_callback = TestPruning(
         "random_unstructured",
         parameters_to_prune=[(model.layer.mlp_3, "weight")],
         verbose=1,
-        make_pruning_permanent=True
+        make_pruning_permanent=True,
+        prune_on_train_epoch_end=on_train_epoch_end,
     )
     ckpt_callback = ModelCheckpoint(monitor="test", save_top_k=2, save_last=True)
     trainer = Trainer(callbacks=[pruning_callback, ckpt_callback], max_epochs=3, progress_bar_refresh_rate=0)
@@ -299,11 +306,14 @@ def on_save_checkpoint(self, trainer, pl_module, checkpoint):
 
     actual = [m.strip() for m in caplog.messages]
     actual = [m for m in actual if m.startswith("Applied")]
-    assert actual == [
-        "Applied `RandomUnstructured`. Pruned: 0/66 (0.00%) -> 32/66 (48.48%)",
-        "Applied `RandomUnstructured`. Pruned: 32/66 (48.48%) -> 48/66 (72.73%)",
-        "Applied `RandomUnstructured`. Pruned: 48/66 (72.73%) -> 56/66 (84.85%)",
+    percentage = r"\(\d+(?:\.\d+)?%\)"
+    expected = [
+        rf"Applied `RandomUnstructured`. Pruned: \d+\/66 {percentage} -> \d+\/66 {percentage}",
+        rf"Applied `RandomUnstructured`. Pruned: \d+\/66 {percentage} -> \d+\/66 {percentage}",
+        rf"Applied `RandomUnstructured`. Pruned: \d+\/66 {percentage} -> \d+\/66 {percentage}",
     ]
+    expected = [re.compile(s) for s in expected]
+    assert all(regex.match(s) for s, regex in zip(actual, expected))
 
     # removed on_train_end
     assert not hasattr(model.layer.mlp_3, "weight_orig")