Skip to content

Commit 4c04a30

Browse files
authored
Merge branch 'master' into fix/horovod-allgather-plugin
2 parents c55bdd9 + a3def9d commit 4c04a30

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+251
-189
lines changed

CHANGELOG.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
2929
* Reset `current` progress counters when restarting an epoch loop that had already finished ([#9371](https://github.com/PyTorchLightning/pytorch-lightning/pull/9371))
3030
* Call `reset_on_restart` in the loop's `reset` hook instead of when loading a checkpoint ([#9561](https://github.com/PyTorchLightning/pytorch-lightning/pull/9561))
3131
* Use `completed` over `processed` in `reset_on_restart` ([#9656](https://github.com/PyTorchLightning/pytorch-lightning/pull/9656))
32+
* Rename `reset_on_epoch` to `reset_on_run` ([#9658](https://github.com/PyTorchLightning/pytorch-lightning/pull/9658))
3233

3334

3435
- Added `batch_size` and `rank_zero_only` arguments for `log_dict` to match `log` ([#8628](https://github.com/PyTorchLightning/pytorch-lightning/pull/8628))
@@ -152,6 +153,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
152153
- Added `RichModelSummary` callback ([#9546](https://github.com/PyTorchLightning/pytorch-lightning/pull/9546))
153154

154155

156+
- Added `enable_progress_bar` to Trainer constructor ([#9664](https://github.com/PyTorchLightning/pytorch-lightning/pull/9664))
157+
158+
155159
- Added `pl_legacy_patch` load utility for loading old checkpoints that have pickled legacy Lightning attributes ([#9166](https://github.com/PyTorchLightning/pytorch-lightning/pull/9166))
156160

157161

@@ -226,9 +230,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
226230
- `seed_everything` now fails when an invalid seed value is passed instead of selecting a random seed ([#8787](https://github.com/PyTorchLightning/pytorch-lightning/pull/8787))
227231

228232

233+
- Use a unique filename to save temp ckpt in tuner ([#96827](https://github.com/PyTorchLightning/pytorch-lightning/pull/9682))
234+
235+
229236
- Changed `HorovodPlugin.all_gather` to return a `torch.Tensor` instead of a list ([#9696](https://github.com/PyTorchLightning/pytorch-lightning/pull/9696))
230237

231238

239+
232240
### Deprecated
233241

234242
- Deprecated `LightningModule.summarize()` in favor of `pytorch_lightning.utilities.model_summary.summarize()`
@@ -267,7 +275,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
267275
- Deprecated `LightningLoggerBase.close`, `LoggerCollection.close` in favor of `LightningLoggerBase.finalize`, `LoggerCollection.finalize` ([#9422](https://github.com/PyTorchLightning/pytorch-lightning/pull/9422))
268276

269277

270-
- Deprecated passing `progress_bar_refresh_rate` to the `Trainer` constructor in favor of adding the `ProgressBar` callback with `refresh_rate` directly to the list of callbacks ([#9616](https://github.com/PyTorchLightning/pytorch-lightning/pull/9616))
278+
- Deprecated passing `progress_bar_refresh_rate` to the `Trainer` constructor in favor of adding the `ProgressBar` callback with `refresh_rate` directly to the list of callbacks, or passing `enable_progress_bar=False` to disable the progress bar ([#9616](https://github.com/PyTorchLightning/pytorch-lightning/pull/9616))
271279

272280

273281
### Removed

benchmarks/test_basic_parity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def lightning_loop(cls_model, idx, device_type: str = "cuda", num_epochs=10):
157157
trainer = Trainer(
158158
# as the first run is skipped, no need to run it long
159159
max_epochs=num_epochs if idx > 0 else 1,
160-
progress_bar_refresh_rate=0,
160+
enable_progress_bar=False,
161161
weights_summary=None,
162162
gpus=1 if device_type == "cuda" else 0,
163163
checkpoint_callback=False,

docs/source/common/trainer.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,6 +1281,10 @@ See the :doc:`profiler documentation <../advanced/profiler>`. for more details.
12811281

12821282
progress_bar_refresh_rate
12831283
^^^^^^^^^^^^^^^^^^^^^^^^^
1284+
``progress_bar_refresh_rate`` has been deprecated in v1.5 and will be removed in v1.7.
1285+
Please pass :class:`~pytorch_lightning.callbacks.progress.ProgressBar` with ``refresh_rate``
1286+
directly to the Trainer's ``callbacks`` argument instead. To disable the progress bar,
1287+
pass ``enable_progress_bar = False`` to the Trainer.
12841288

12851289
.. raw:: html
12861290

@@ -1305,6 +1309,19 @@ Note:
13051309
Lightning will set it to 20 in these environments if the user does not provide a value.
13061310
- This argument is ignored if a custom callback is passed to :paramref:`~Trainer.callbacks`.
13071311

1312+
enable_progress_bar
1313+
^^^^^^^^^^^^^^^^^^^
1314+
1315+
Whether to enable or disable the progress bar. Defaults to True.
1316+
1317+
.. testcode::
1318+
1319+
# default used by the Trainer
1320+
trainer = Trainer(enable_progress_bar=True)
1321+
1322+
# disable progress bar
1323+
trainer = Trainer(enable_progress_bar=False)
1324+
13081325
reload_dataloaders_every_n_epochs
13091326
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13101327

pl_examples/domain_templates/computer_vision_fine_tuning.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,6 @@ def add_arguments_to_parser(self, parser):
273273
{
274274
"trainer.max_epochs": 15,
275275
"trainer.weights_summary": None,
276-
"trainer.progress_bar_refresh_rate": 1,
277276
"trainer.num_sanity_val_steps": 0,
278277
}
279278
)

pytorch_lightning/callbacks/finetuning.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,10 @@ def _store(
285285

286286
def on_train_epoch_start(self, trainer, pl_module):
287287
"""Called when the epoch begins."""
288-
for opt_idx, optimizer in trainer.fit_loop.epoch_loop.batch_loop.get_active_optimizers():
288+
# import is here to avoid circular imports
289+
from pytorch_lightning.loops.utilities import _get_active_optimizers
290+
291+
for opt_idx, optimizer in _get_active_optimizers(trainer.optimizers, trainer.optimizer_frequencies):
289292
num_param_groups = len(optimizer.param_groups)
290293
self.finetune_function(pl_module, trainer.current_epoch, optimizer, opt_idx)
291294
current_param_groups = optimizer.param_groups

pytorch_lightning/loops/batch/training_batch_loop.py

Lines changed: 4 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,15 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
from typing import Any, List, Optional, Tuple
14+
from typing import Any, List, Optional
1515

16-
import numpy as np
1716
from deprecate import void
1817
from torch import Tensor
19-
from torch.optim import Optimizer
2018

2119
from pytorch_lightning.loops.base import Loop
2220
from pytorch_lightning.loops.optimization.manual_loop import ManualOptimization
2321
from pytorch_lightning.loops.optimization.optimizer_loop import OptimizerLoop
22+
from pytorch_lightning.loops.utilities import _get_active_optimizers
2423
from pytorch_lightning.trainer.supporters import TensorRunningAccum
2524
from pytorch_lightning.utilities import AttributeDict
2625
from pytorch_lightning.utilities.types import STEP_OUTPUT
@@ -41,21 +40,13 @@ def __init__(self) -> None:
4140
self.manual_loop = ManualOptimization()
4241

4342
self._warning_cache: WarningCache = WarningCache()
44-
self._optimizer_freq_cumsum: Optional[int] = None
4543
self._remaining_splits: Optional[List[Any]] = None
4644

4745
@property
4846
def done(self) -> bool:
4947
"""Returns if all batch splits have been processed already."""
5048
return len(self._remaining_splits) == 0
5149

52-
@property
53-
def optimizer_freq_cumsum(self) -> int:
54-
"""Returns the cumulated sum of optimizer frequencies."""
55-
if self._optimizer_freq_cumsum is None:
56-
self._optimizer_freq_cumsum = np.cumsum(self.trainer.optimizer_frequencies)
57-
return self._optimizer_freq_cumsum
58-
5950
def connect(
6051
self, optimizer_loop: Optional["Loop"] = None, manual_loop: Optional[ManualOptimization] = None
6152
) -> None:
@@ -123,7 +114,8 @@ def advance(self, batch, batch_idx):
123114

124115
if self.trainer.lightning_module.automatic_optimization:
125116
# in automatic optimization, hand over execution to the OptimizerLoop
126-
batch_outputs = self.optimizer_loop.run(split_batch, self.get_active_optimizers(batch_idx), batch_idx)
117+
optimizers = _get_active_optimizers(self.trainer.optimizers, self.trainer.optimizer_frequencies, batch_idx)
118+
batch_outputs = self.optimizer_loop.run(split_batch, optimizers, batch_idx)
127119
# combine outputs from each optimizer
128120
for k in range(len(batch_outputs)):
129121
self.batch_outputs[k].extend(batch_outputs[k])
@@ -142,10 +134,6 @@ def teardown(self) -> None:
142134
# release memory
143135
self._remaining_splits = None
144136

145-
def num_active_optimizers(self, batch_idx: Optional[int] = None) -> int:
146-
"""Gets the number of active optimizers based on their frequency."""
147-
return len(self.get_active_optimizers(batch_idx))
148-
149137
def _tbptt_split_batch(self, batch: Any) -> List[Any]:
150138
"""Splits a single batch into a list of sequence steps for tbptt.
151139
@@ -175,21 +163,3 @@ def _update_running_loss(self, current_loss: Tensor) -> None:
175163

176164
# reset for next set of accumulated grads
177165
self.accumulated_loss.reset()
178-
179-
def get_active_optimizers(self, batch_idx: Optional[int] = None) -> List[Tuple[int, Optimizer]]:
180-
"""Returns the currently active optimizers. When multiple optimizers are used with different frequencies,
181-
only one of the optimizers is active at a time.
182-
183-
Returns:
184-
A list of tuples (opt_idx, optimizer) of currently active optimizers.
185-
"""
186-
if not self.trainer.optimizer_frequencies:
187-
# call training_step once per optimizer
188-
return list(enumerate(self.trainer.optimizers))
189-
190-
optimizers_loop_length = self.optimizer_freq_cumsum[-1]
191-
current_place_in_loop = batch_idx % optimizers_loop_length
192-
193-
# find optimzier index by looking for the first {item > current_place} in the cumsum list
194-
opt_idx = np.searchsorted(self.optimizer_freq_cumsum, current_place_in_loop, side="right")
195-
return [(opt_idx, self.trainer.optimizers[opt_idx])]

pytorch_lightning/loops/dataloader/dataloader_loop.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ def done(self) -> bool:
5656
def reset(self) -> None:
5757
"""Resets the internal state."""
5858
if not self.restarting:
59-
self.dataloader_progress.current.reset()
59+
self.dataloader_progress.reset_on_run()
6060
else:
61-
self.dataloader_progress.current.reset_on_restart()
61+
self.dataloader_progress.reset_on_restart()
6262

6363
def on_advance_start(self, *args: Any, **kwargs: Any) -> None:
6464
self.dataloader_progress.increment_ready()

pytorch_lightning/loops/epoch/evaluation_epoch_loop.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ def reset(self) -> None:
5858
self.outputs = []
5959

6060
if not self.restarting:
61-
self.batch_progress.current.reset()
61+
self.batch_progress.reset_on_run()
6262
else:
63-
self.batch_progress.current.reset_on_restart()
63+
self.batch_progress.reset_on_restart()
6464

6565
def on_run_start(
6666
self, data_fetcher: AbstractDataFetcher, dataloader_idx: int, dl_max_batches: int, num_dataloaders: int

pytorch_lightning/loops/epoch/prediction_epoch_loop.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def reset(self) -> None:
4646
"""Resets the loops internal state."""
4747
self._all_batch_indices: List[int] = []
4848
self.predictions: List[Any] = []
49-
self.batch_progress.current.reset()
49+
self.batch_progress.reset_on_run()
5050

5151
def on_run_start(
5252
self,

pytorch_lightning/loops/epoch/training_epoch_loop.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from pytorch_lightning import loops # import as loops to avoid circular imports
1919
from pytorch_lightning.loops.batch import TrainingBatchLoop
2020
from pytorch_lightning.loops.optimization.closure import OutputResult
21-
from pytorch_lightning.loops.utilities import _prepare_dataloader_iter
21+
from pytorch_lightning.loops.utilities import _get_active_optimizers, _prepare_dataloader_iter
2222
from pytorch_lightning.trainer.connectors.logger_connector.result import ResultCollection
2323
from pytorch_lightning.trainer.progress import BatchProgress, SchedulerProgress
2424
from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -96,15 +96,15 @@ def reset(self) -> None:
9696
assert self.batch_loop.optimizer_loop is not None
9797
if self.restarting:
9898
self.batch_progress.reset_on_restart()
99-
self.scheduler_progress.current.reset_on_restart()
99+
self.scheduler_progress.reset_on_restart()
100100
self.batch_loop.optimizer_loop.optim_progress.reset_on_restart()
101101
else:
102-
self.batch_progress.reset_on_epoch()
103-
self.scheduler_progress.reset_on_epoch()
104-
self.batch_loop.optimizer_loop.optim_progress.reset_on_epoch()
102+
self.batch_progress.reset_on_run()
103+
self.scheduler_progress.reset_on_run()
104+
self.batch_loop.optimizer_loop.optim_progress.reset_on_run()
105105

106106
# track epoch output
107-
self._epoch_output = [[] for _ in range(self.batch_loop.num_active_optimizers(self.total_batch_idx))]
107+
self._epoch_output = [[] for _ in range(self._num_active_optimizers(self.total_batch_idx))]
108108

109109
def on_run_start(self, dataloader_iter: Iterator, **kwargs: Any) -> None:
110110
# hook
@@ -340,10 +340,13 @@ def update_lr_schedulers(self, interval: str, update_plateau_schedulers: bool) -
340340
"""updates the lr schedulers based on the given interval."""
341341
if interval == "step" and self._should_accumulate():
342342
return
343+
active_optimizers = _get_active_optimizers(
344+
self.trainer.optimizers, self.trainer.optimizer_frequencies, self.total_batch_idx
345+
)
343346
self.trainer.optimizer_connector.update_learning_rates(
344347
interval=interval,
345348
update_plateau_schedulers=update_plateau_schedulers,
346-
opt_indices=[opt_idx for opt_idx, _ in self.batch_loop.get_active_optimizers(self.total_batch_idx)],
349+
opt_indices=[opt_idx for opt_idx, _ in active_optimizers],
347350
)
348351

349352
def _should_check_val_fx(self, batch_idx: int, is_last_batch: bool) -> bool:
@@ -377,3 +380,7 @@ def _save_loggers_on_train_batch_end(self) -> None:
377380
should_flush_logs = self.trainer.logger_connector.should_flush_logs
378381
if should_flush_logs and self.trainer.is_global_zero and self.trainer.logger is not None:
379382
self.trainer.logger.save()
383+
384+
def _num_active_optimizers(self, batch_idx: Optional[int] = None) -> int:
385+
"""Gets the number of active optimizers based on their frequency."""
386+
return len(_get_active_optimizers(self.trainer.optimizers, self.trainer.optimizer_frequencies, batch_idx))

0 commit comments

Comments
 (0)