Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8bc88a4
Thanks to @carmocca :tada:
krshrimali Feb 16, 2022
ab4067c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 16, 2022
4626492
Add entry to the changelog
krshrimali Feb 16, 2022
dc47950
Merge branch 'feature/9128_benchmark_trainer_default' of github.com:k…
krshrimali Feb 16, 2022
ed4db57
Improve docs
carmocca Feb 16, 2022
39b771c
Singular to plural
carmocca Feb 16, 2022
8d3275c
Update tests/trainer/test_trainer.py
krshrimali Feb 17, 2022
0fb524e
Change entry to 'Changed'
krshrimali Feb 17, 2022
8982149
Merge branch 'feature/9128_benchmark_trainer_default' of github.com:k…
krshrimali Feb 17, 2022
815bc58
Resolve merge conflict
krshrimali Feb 18, 2022
58034a5
Minor, default to None
krshrimali Feb 18, 2022
d3dcdee
Update CHANGELOG.md
krshrimali Feb 21, 2022
fca69f3
Merge branch 'master' into feature/9128_benchmark_trainer_default
krshrimali Feb 22, 2022
2fe07fa
Raise a warning when benchmark and deterministic are True
krshrimali Feb 23, 2022
8a16308
Fix merge conflict
krshrimali Feb 23, 2022
324b5bd
Fix merge conflict
krshrimali Feb 23, 2022
7fb9ed6
Apply suggestions from code review
krshrimali Feb 23, 2022
c2cbf0f
Apply suggestions from code review
rohitgr7 Feb 23, 2022
f4a96c4
Apply suggestions from code review
krshrimali Feb 23, 2022
30739d4
Update tests/trainer/test_trainer.py
rohitgr7 Feb 23, 2022
1d5b928
pre-commit
rohitgr7 Feb 23, 2022
22ba634
Merge remote-tracking branch 'upstream/master' into feature/9128_benc…
krshrimali Feb 24, 2022
5425952
Avoid conflict with pytest plugin
carmocca Feb 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Changed

- Make `benchmark` flag optional and set its value based on the deterministic flag ([#11944](https://github.com/PyTorchLightning/pytorch-lightning/pull/11944))


- Implemented a new native and rich format in `_print_results` method of the `EvaluationLoop` ([#11332](https://github.com/PyTorchLightning/pytorch-lightning/pull/11332))


Expand Down
18 changes: 10 additions & 8 deletions docs/source/common/trainer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -416,18 +416,20 @@ benchmark

|

If true enables cudnn.benchmark.
This flag is likely to increase the speed of your system if your
input sizes don't change. However, if it does, then it will likely
make your system slower.
Defaults to ``True`` if :paramref:`~pytorch_lightning.trainer.Trainer.deterministic` is not set.
This flag sets the ``torch.backends.cudnn.deterministic`` flag. You can read more about its impact
`here <https://pytorch.org/docs/stable/notes/randomness.html#cuda-convolution-benchmarking>`__

The speedup comes from allowing the cudnn auto-tuner to find the best
algorithm for the hardware `[see discussion here]
<https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936>`_.
This is likely to increase the speed of your system if your input sizes don't change. However, if they do, then it
might make your system slower. The CUDNN auto-tuner will try to find the best algorithm for the hardware when a new
input size is encountered. Read more about it `here <https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936>`__.

Example::

# default used by the Trainer
# defaults to True if not deterministic (which is False by default)
trainer = Trainer()

# you can overwrite the value
trainer = Trainer(benchmark=False)

deterministic
Expand Down
10 changes: 8 additions & 2 deletions pytorch_lightning/trainer/connectors/accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def __init__(
amp_type: str = "native",
amp_level: Optional[str] = None,
sync_batchnorm: bool = False,
benchmark: bool = False,
benchmark: Optional[bool] = None,
replace_sampler_ddp: bool = True,
deterministic: bool = False,
num_processes: Optional[int] = None, # deprecated
Expand Down Expand Up @@ -142,8 +142,14 @@ def __init__(
B. Strategy > Accelerator/precision/plugins
C. TODO When multiple flag set to the same thing
"""
if benchmark and deterministic:
rank_zero_warn(
"You passed `deterministic=True` and `benchmark=True`. Note that PyTorch ignores"
" torch.backends.cudnn.deterministic=True when torch.backends.cudnn.benchmark=True.",
)
self.benchmark = not deterministic if benchmark is None else benchmark
# TODO: move to gpu accelerator
torch.backends.cudnn.benchmark = benchmark
torch.backends.cudnn.benchmark = self.benchmark
self.replace_sampler_ddp = replace_sampler_ddp
self.sync_batchnorm = sync_batchnorm
self._init_deterministic(deterministic)
Expand Down
7 changes: 4 additions & 3 deletions pytorch_lightning/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def __init__(
num_sanity_val_steps: int = 2,
resume_from_checkpoint: Optional[Union[Path, str]] = None,
profiler: Optional[Union[BaseProfiler, str]] = None,
benchmark: bool = False,
benchmark: Optional[bool] = None,
deterministic: bool = False,
reload_dataloaders_every_n_epochs: int = 0,
auto_lr_find: Union[bool, str] = False,
Expand Down Expand Up @@ -228,8 +228,9 @@ def __init__(
that only one process at a time can access them.
Default: ``False``.

benchmark: If ``True``, enables cudnn.benchmark.
Default: ``False``.
benchmark: Sets ``torch.backends.cudnn.benchmark``.
Defaults to ``True`` if :paramref:`~pytorch_lightning.trainer.trainer.Trainer.deterministic`
is ``False``. Overwrite to manually set a different value. Default: ``None``.

callbacks: Add a callback or list of callbacks.
Default: ``None``.
Expand Down
34 changes: 21 additions & 13 deletions tests/trainer/test_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,23 +638,31 @@ def test_trainer_max_steps_accumulate_batches(tmpdir):
assert trainer.global_step == trainer.max_steps, "Model did not stop at max_steps"


def test_benchmark_option(tmpdir):
@pytest.mark.parametrize(
["benchmark_", "deterministic", "expected"],
[
(None, False, True),
(None, True, False),
(True, False, True),
(True, True, True),
(False, True, False),
(False, False, False),
],
)
def test_benchmark_option(benchmark_, deterministic, expected):
"""Verify benchmark option."""

model = BoringModel()

# verify torch.backends.cudnn.benchmark is not turned on
assert not torch.backends.cudnn.benchmark
original_val = torch.backends.cudnn.benchmark

# fit model
trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, benchmark=True)
trainer.fit(model)

# verify training completed
assert trainer.state.finished, f"Training failed with {trainer.state}"
if benchmark_ and deterministic:
with pytest.warns(UserWarning, match="You passed `deterministic=True` and `benchmark=True`"):
trainer = Trainer(benchmark=benchmark_, deterministic=deterministic)
else:
trainer = Trainer(benchmark=benchmark_, deterministic=deterministic)
assert torch.backends.cudnn.benchmark == expected
assert trainer._accelerator_connector.benchmark == expected

# verify torch.backends.cudnn.benchmark is not turned off
assert torch.backends.cudnn.benchmark
torch.backends.cudnn.benchmark = original_val


@pytest.mark.parametrize("ckpt_path", (None, "best", "specific"))
Expand Down