From 3e6d927a9dc91cab449d4f6bbc02957130689194 Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 30 Jan 2022 19:44:36 +1100 Subject: [PATCH 01/15] Leave current value for cudnn.benchmark unchanged by default --- pytorch_lightning/trainer/trainer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index ac01227fd00ac..9d044d49616fb 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -176,7 +176,7 @@ def __init__( num_sanity_val_steps: int = 2, resume_from_checkpoint: Optional[Union[Path, str]] = None, profiler: Optional[Union[BaseProfiler, str]] = None, - benchmark: bool = False, + benchmark: Optional[bool] = None, deterministic: bool = False, reload_dataloaders_every_n_epochs: int = 0, auto_lr_find: Union[bool, str] = False, @@ -227,7 +227,8 @@ def __init__( GPUs are configured to be in "exclusive mode", such that only one process at a time can access them. - benchmark: If true enables cudnn.benchmark. + benchmark: The value (``True`` or ``False``) to set ``torch.backends.cudnn.benchmark`` to. If not specified, + the value set in the current session will be unchanged. callbacks: Add a callback or list of callbacks. From 040fec070db8025062f8cd1ad41bafe06b5034d1 Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 30 Jan 2022 19:47:09 +1100 Subject: [PATCH 02/15] Don't change torch.backends.cudnn.benchmark by default --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index d476bc5f0ca6e..9ab26f587b88c 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -182,7 +182,8 @@ def __init__( # benchmarking # TODO: should this be moved to GPU accelerator? - torch.backends.cudnn.benchmark = self.benchmark + if self.benchmark is not None: + torch.backends.cudnn.benchmark = self.benchmark self.replace_sampler_ddp = replace_sampler_ddp From 4910444910997830c843f86b89244df5509af605 Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 30 Jan 2022 19:56:22 +1100 Subject: [PATCH 03/15] Update docs for benckmark arg to Trainer --- docs/source/common/trainer.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/common/trainer.rst b/docs/source/common/trainer.rst index 94c6a86bf393d..37402e30201a6 100644 --- a/docs/source/common/trainer.rst +++ b/docs/source/common/trainer.rst @@ -416,7 +416,8 @@ benchmark | -If true enables cudnn.benchmark. +The value (``True`` or ``False``) to set ``torch.backends.cudnn.benchmark`` to. If not specified, the value set in +the current session will be unchanged. This flag is likely to increase the speed of your system if your input sizes don't change. However, if it does, then it will likely make your system slower. From 4f4e58664838e7015fe5f3099bba6fda478c1010 Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 30 Jan 2022 19:59:17 +1100 Subject: [PATCH 04/15] Clarify explanation of benchmark arg --- docs/source/common/trainer.rst | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/source/common/trainer.rst b/docs/source/common/trainer.rst index 37402e30201a6..a379f79d8a561 100644 --- a/docs/source/common/trainer.rst +++ b/docs/source/common/trainer.rst @@ -416,11 +416,10 @@ benchmark | -The value (``True`` or ``False``) to set ``torch.backends.cudnn.benchmark`` to. If not specified, the value set in -the current session will be unchanged. -This flag is likely to increase the speed of your system if your -input sizes don't change. However, if it does, then it will likely -make your system slower. +The value (``True`` or ``False``) to set ``torch.backends.cudnn.benchmark`` to. If not specified, the value +set in the current session will be unchanged. +Setting this flag to ``True`` is likely to increase the speed of your system if your input sizes don't +change. However, if it does, then it will likely make your system slower. The speedup comes from allowing the cudnn auto-tuner to find the best algorithm for the hardware `[see discussion here] @@ -428,8 +427,8 @@ algorithm for the hardware `[see discussion here] Example:: - # default used by the Trainer - trainer = Trainer(benchmark=False) + # default used by the Trainer (current setting for torch.backends.cudnn.benchmark is unchanged) + trainer = Trainer(benchmark=None) deterministic ^^^^^^^^^^^^^ From dbe73afe12291dc9dbf8cf40293878a21fa615dc Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 27 Feb 2022 14:52:12 +1100 Subject: [PATCH 05/15] Update docs/source/common/trainer.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- docs/source/common/trainer.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/common/trainer.rst b/docs/source/common/trainer.rst index a2f606ff0cecd..d4abff8ac9d4a 100644 --- a/docs/source/common/trainer.rst +++ b/docs/source/common/trainer.rst @@ -417,7 +417,7 @@ benchmark | The value (``True`` or ``False``) to set ``torch.backends.cudnn.benchmark`` to. If not specified, the value -set in the current session will be unchanged. +set in the current session will be used. Setting this flag to ``True`` is likely to increase the speed of your system if your input sizes don't change. However, if it does, then it will likely make your system slower. From 30392279e0b3e766c9a258f7ba810870ccdbc32a Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 27 Feb 2022 14:53:34 +1100 Subject: [PATCH 06/15] Clarify default benchmark behaviour in docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- docs/source/common/trainer.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/common/trainer.rst b/docs/source/common/trainer.rst index d4abff8ac9d4a..b69f286047b02 100644 --- a/docs/source/common/trainer.rst +++ b/docs/source/common/trainer.rst @@ -427,7 +427,7 @@ algorithm for the hardware `[see discussion here] Example:: - # default used by the Trainer (current setting for torch.backends.cudnn.benchmark is unchanged) + # default used by the Trainer (will use whatever the current value for torch.backends.cudnn.benchmark is) trainer = Trainer(benchmark=None) deterministic From 1d4d6c4c8cd0f12111972ed30858a4ec409ddecb Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 27 Feb 2022 15:53:33 +1100 Subject: [PATCH 07/15] Fix reference to non-existent benchmark attribute --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 153bcb079d761..395c535839c41 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -151,8 +151,8 @@ def __init__( # Set benchmark to False to ensure determinism benchmark = False # TODO: move to gpu accelerator - if self.benchmark is not None: - torch.backends.cudnn.benchmark = self.benchmark + if benchmark is not None: + torch.backends.cudnn.benchmark = benchmark self.benchmark = torch.backends.cudnn.benchmark self.replace_sampler_ddp = replace_sampler_ddp self.sync_batchnorm = sync_batchnorm From 75f98c3ab8e54d612c6e1ba13ae1b1724fe361b2 Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 27 Feb 2022 15:54:28 +1100 Subject: [PATCH 08/15] Remove erroneous deterministic comment --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 395c535839c41..a117c1b06b0ba 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -156,7 +156,6 @@ def __init__( self.benchmark = torch.backends.cudnn.benchmark self.replace_sampler_ddp = replace_sampler_ddp self.sync_batchnorm = sync_batchnorm - # Default to deterministic = True self._init_deterministic(deterministic) # 1. Parsing flags From 2413c6b41e82e96cda87248fad3b5f604d4a416b Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 27 Feb 2022 15:55:59 +1100 Subject: [PATCH 09/15] Update deterministic docstring entry. --- pytorch_lightning/trainer/trainer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 2be0060b3ac15..b747497e18266 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -262,7 +262,8 @@ def __init__( Default: ``False``. deterministic: If ``True``, sets whether PyTorch operations must use deterministic algorithms. - Default: ``False``. + If not set, defaults to ``False``. + Default: ``None``. devices: Will be mapped to either `gpus`, `tpu_cores`, `num_processes` or `ipus`, based on the accelerator type. From caaea75f3fb84e9d39fe8e288f9309e5768640a0 Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 27 Feb 2022 15:57:27 +1100 Subject: [PATCH 10/15] Clarify benchmark docstring. --- pytorch_lightning/trainer/trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index b747497e18266..bfa5c524c360c 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -231,8 +231,8 @@ def __init__( benchmark: Sets ``torch.backends.cudnn.benchmark``. The value (``True`` or ``False``) to set ``torch.backends.cudnn.benchmark`` to. If not specified, the value set in the current session will be used. However, if - :paramref:`~pytorch_lightning.trainer.trainer.Trainer.deterministic` is ``True``, this defaults to - ``False`` to ensure determinism. Override to manually set a different value. + :paramref:`~pytorch_lightning.trainer.trainer.Trainer.deterministic` is ``True``, ``benchmark`` defaults + to ``False`` to ensure determinism. Override to manually set a different value. Default: ``None``. callbacks: Add a callback or list of callbacks. From c0db741dcae4b22bb95dd03de787cb6ce55cd49a Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 27 Feb 2022 16:02:40 +1100 Subject: [PATCH 11/15] Add `benchmark` fix to CHANGELOG. --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 058c92fe20e4d..34c1cb8198d3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -658,6 +658,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed passing `_ddp_params_and_buffers_to_ignore` ([#11949](https://github.com/PyTorchLightning/pytorch-lightning/pull/11949)) + +- Prevent modification of `torch.backends.cudnn.benchmark` when `benchmark` not set on the `Trainer` ([#12020](https://github.com/PyTorchLightning/pytorch-lightning/pull/12020)) + ## [1.5.10] - 2022-02-08 From 55f96ec3ed17d01a99ccfe2584e70e79bec44b7d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 27 Feb 2022 05:03:53 +0000 Subject: [PATCH 12/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34c1cb8198d3a..bb356e0b7c32b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -658,7 +658,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed passing `_ddp_params_and_buffers_to_ignore` ([#11949](https://github.com/PyTorchLightning/pytorch-lightning/pull/11949)) - + - Prevent modification of `torch.backends.cudnn.benchmark` when `benchmark` not set on the `Trainer` ([#12020](https://github.com/PyTorchLightning/pytorch-lightning/pull/12020)) From cf21c54762fac839483f5046157d554a6a6bdd33 Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 27 Feb 2022 16:07:03 +1100 Subject: [PATCH 13/15] Fix ref to nonexistent attribute --- .../trainer/connectors/accelerator_connector.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index a117c1b06b0ba..91899614761aa 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -216,10 +216,10 @@ def _init_deterministic(self, deterministic: bool) -> None: # Default to False if not set self.deterministic = deterministic or False if _TORCH_GREATER_EQUAL_1_8: - torch.use_deterministic_algorithms(deterministic) + torch.use_deterministic_algorithms(self.deterministic) else: - torch.set_deterministic(deterministic) - if deterministic: + torch.set_deterministic(self.deterministic) + if self.deterministic: # fixing non-deterministic part of horovod # https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383 os.environ["HOROVOD_FUSION_THRESHOLD"] = "0" From 5426d1af411a18f59fe7e4b62c01e6df33ff882e Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Sun, 27 Feb 2022 16:56:51 +1100 Subject: [PATCH 14/15] Update benchmark arg test --- tests/trainer/test_trainer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 194ddd458e5ab..2b6ebebcf8d3e 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -641,12 +641,15 @@ def test_trainer_max_steps_accumulate_batches(tmpdir): @pytest.mark.parametrize( ["benchmark_", "deterministic", "expected"], [ - (None, False, True), + (None, False, None), (None, True, False), + (None, None, None), (True, False, True), (True, True, True), - (False, True, False), + (True, None, True), (False, False, False), + (False, True, False), + (False, None, False), ], ) def test_benchmark_option(benchmark_, deterministic, expected): @@ -659,6 +662,7 @@ def test_benchmark_option(benchmark_, deterministic, expected): trainer = Trainer(benchmark=benchmark_, deterministic=deterministic) else: trainer = Trainer(benchmark=benchmark_, deterministic=deterministic) + expected = original_val if expected is None else expected assert torch.backends.cudnn.benchmark == expected assert trainer._accelerator_connector.benchmark == expected From 22fdded803d051b94e15767d507328301f297c37 Mon Sep 17 00:00:00 2001 From: Tim Esler Date: Wed, 2 Mar 2022 14:43:45 +1100 Subject: [PATCH 15/15] Update docs/source/common/trainer.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Mocholí --- docs/source/common/trainer.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/common/trainer.rst b/docs/source/common/trainer.rst index 5d4dc376e80ef..2a594ce851933 100644 --- a/docs/source/common/trainer.rst +++ b/docs/source/common/trainer.rst @@ -436,7 +436,7 @@ Example:: # you can overwrite the value trainer = Trainer(benchmark=False) - # defaults to False when deterministic is True + # `benchmark` defaults to False when deterministic is True trainer = Trainer(deterministic=True) deterministic