From 8e207dede173a1453a252d24297644650b48cbf4 Mon Sep 17 00:00:00 2001 From: "Alvin(Xinyao) Sun" Date: Sat, 15 May 2021 23:25:26 -0600 Subject: [PATCH 1/5] fix: dataloader not reset during tuning process ref: #7562 --- CHANGELOG.md | 1 + pytorch_lightning/tuner/batch_size_scaling.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index caacf2eb374bb..811dee4a2a2d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -83,6 +83,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed +- Fixed dataloaders are not reset when tuning the model - Fixed parsing of multiple training dataloaders ([#7433](https://github.com/PyTorchLightning/pytorch-lightning/pull/7433)) diff --git a/pytorch_lightning/tuner/batch_size_scaling.py b/pytorch_lightning/tuner/batch_size_scaling.py index 120a95a5084b1..d13f539b9924d 100644 --- a/pytorch_lightning/tuner/batch_size_scaling.py +++ b/pytorch_lightning/tuner/batch_size_scaling.py @@ -160,7 +160,10 @@ def _run_power_scaling( else: raise # some other error not memory related - if not changed: + if changed: + # set train dataloader to None so it is reset + trainer.train_dataloader = None + else: break return new_size @@ -192,7 +195,10 @@ def _run_binsearch_scaling( else: new_size, changed = _adjust_batch_size(trainer, batch_arg_name, factor=2.0, desc='succeeded') - if not changed: + if changed: + # set train dataloader to None so it is reset + trainer.train_dataloader = None + else: break except RuntimeError as exception: From d5c4c3f9b2ec809394e8e3df18c75a0f036bf0e2 Mon Sep 17 00:00:00 2001 From: "Alvin(Xinyao) Sun" Date: Tue, 18 May 2021 12:59:44 -0600 Subject: [PATCH 2/5] chore: update CHANGELOG --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 811dee4a2a2d1..45212fa918ab5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -83,7 +83,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- Fixed dataloaders are not reset when tuning the model +- Fixed dataloaders are not reset when tuning the model ([#7566](https://github.com/PyTorchLightning/pytorch-lightning/pull/7566)) + - Fixed parsing of multiple training dataloaders ([#7433](https://github.com/PyTorchLightning/pytorch-lightning/pull/7433)) From 2e03886e77c077386330393485a35be9fec793b6 Mon Sep 17 00:00:00 2001 From: "Alvin(Xinyao) Sun" Date: Tue, 18 May 2021 14:09:45 -0600 Subject: [PATCH 3/5] fix: add new test parameter for scale_batch_size --- tests/tuner/test_scale_batch_size.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/tuner/test_scale_batch_size.py b/tests/tuner/test_scale_batch_size.py index 7d4e05000d5da..ca3406027bcf1 100644 --- a/tests/tuner/test_scale_batch_size.py +++ b/tests/tuner/test_scale_batch_size.py @@ -54,6 +54,7 @@ def __init__(self, batch_size=None): (BatchSizeModel(2), BatchSizeDataModule(2)), (BatchSizeModel(2), BatchSizeDataModule(None)), (BatchSizeModel(None), BatchSizeDataModule(2)), + (BatchSizeModel(16), BatchSizeDataModule(16)), ] ) def test_scale_batch_size_method_with_model_or_datamodule(tmpdir, model, datamodule): From fcffdba559c1958074c0d112c1d480f87f60bf1c Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 21 May 2021 15:19:38 +0200 Subject: [PATCH 4/5] Add test. Improve comment --- pytorch_lightning/tuner/batch_size_scaling.py | 4 +- tests/tuner/test_scale_batch_size.py | 48 +++++++++++-------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/pytorch_lightning/tuner/batch_size_scaling.py b/pytorch_lightning/tuner/batch_size_scaling.py index d13f539b9924d..1777e4618c345 100644 --- a/pytorch_lightning/tuner/batch_size_scaling.py +++ b/pytorch_lightning/tuner/batch_size_scaling.py @@ -161,7 +161,7 @@ def _run_power_scaling( raise # some other error not memory related if changed: - # set train dataloader to None so it is reset + # Force the train dataloader to reset as the batch size has changed trainer.train_dataloader = None else: break @@ -196,7 +196,7 @@ def _run_binsearch_scaling( new_size, changed = _adjust_batch_size(trainer, batch_arg_name, factor=2.0, desc='succeeded') if changed: - # set train dataloader to None so it is reset + # Force the train dataloader to reset as the batch size has changed trainer.train_dataloader = None else: break diff --git a/tests/tuner/test_scale_batch_size.py b/tests/tuner/test_scale_batch_size.py index ca3406027bcf1..f9e132662b220 100644 --- a/tests/tuner/test_scale_batch_size.py +++ b/tests/tuner/test_scale_batch_size.py @@ -24,14 +24,14 @@ from pytorch_lightning.utilities import AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base import EvalModelTemplate -from tests.helpers import BoringDataModule, BoringModel +from tests.helpers import BoringDataModule, BoringModel, RandomDataset from tests.helpers.datamodules import MNISTDataModule from tests.helpers.runif import RunIf class BatchSizeDataModule(BoringDataModule): - def __init__(self, batch_size=None): + def __init__(self, batch_size): super().__init__() if batch_size is not None: self.batch_size = batch_size @@ -42,22 +42,23 @@ def train_dataloader(self): class BatchSizeModel(BoringModel): - def __init__(self, batch_size=None): + def __init__(self, batch_size): super().__init__() if batch_size is not None: self.batch_size = batch_size + def train_dataloader(self): + return DataLoader(RandomDataset(32, 64), batch_size=getattr(self, "batch_size", 1)) -@pytest.mark.parametrize( - "model,datamodule", [ - (BatchSizeModel(2), None), - (BatchSizeModel(2), BatchSizeDataModule(2)), - (BatchSizeModel(2), BatchSizeDataModule(None)), - (BatchSizeModel(None), BatchSizeDataModule(2)), - (BatchSizeModel(16), BatchSizeDataModule(16)), - ] -) -def test_scale_batch_size_method_with_model_or_datamodule(tmpdir, model, datamodule): + +@pytest.mark.parametrize(["model_bs", "dm_bs"], [ + (2, -1), + (2, 2), + (2, None), + (None, 2), + (16, 16), +]) +def test_scale_batch_size_method_with_model_or_datamodule(tmpdir, model_bs, dm_bs): """ Test the tuner method `Tuner.scale_batch_size` with a datamodule. """ trainer = Trainer( default_root_dir=tmpdir, @@ -66,14 +67,21 @@ def test_scale_batch_size_method_with_model_or_datamodule(tmpdir, model, datamod max_epochs=1, ) tuner = Tuner(trainer) - new_batch_size = tuner.scale_batch_size( - model=model, mode="binsearch", init_val=4, max_trials=2, datamodule=datamodule - ) + + model = BatchSizeModel(model_bs) + datamodule = BatchSizeDataModule(dm_bs) if dm_bs != -1 else None + + new_batch_size = tuner.scale_batch_size(model, mode="binsearch", init_val=4, max_trials=2, datamodule=datamodule) assert new_batch_size == 16 - if hasattr(model, "batch_size"): - assert model.batch_size == 16 - if datamodule is not None and hasattr(datamodule, "batch_size"): - assert datamodule.batch_size == 16 + + if model_bs is not None: + assert model.batch_size == new_batch_size + if dm_bs == -1: + # datamodule batch size takes precedence + assert trainer.train_dataloader.loaders.batch_size == new_batch_size + if dm_bs not in (-1, None): + assert datamodule.batch_size == new_batch_size + assert trainer.train_dataloader.loaders.batch_size == new_batch_size def test_model_reset_correctly(tmpdir): From 92d8f3195e49285c62b3f6a1c001c999407273d9 Mon Sep 17 00:00:00 2001 From: "Alvin(Xinyao) Sun" Date: Fri, 21 May 2021 22:46:43 -0600 Subject: [PATCH 5/5] feat: call reset_train_dataloader method instead of setting attribute to None --- pytorch_lightning/tuner/batch_size_scaling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/tuner/batch_size_scaling.py b/pytorch_lightning/tuner/batch_size_scaling.py index 1777e4618c345..d114c36a60104 100644 --- a/pytorch_lightning/tuner/batch_size_scaling.py +++ b/pytorch_lightning/tuner/batch_size_scaling.py @@ -162,7 +162,7 @@ def _run_power_scaling( if changed: # Force the train dataloader to reset as the batch size has changed - trainer.train_dataloader = None + trainer.reset_train_dataloader(model) else: break return new_size @@ -197,7 +197,7 @@ def _run_binsearch_scaling( if changed: # Force the train dataloader to reset as the batch size has changed - trainer.train_dataloader = None + trainer.reset_train_dataloader(model) else: break