Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pytorch_lightning/callbacks/model_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,13 +247,16 @@ def state_key(self) -> str:
save_on_train_epoch_end=self._save_on_train_epoch_end,
)

def on_pretrain_routine_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
"""When pretrain routine starts we resolve the ckpt dir on the fly."""
def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None:
# NOTE: setting these attributes needs to happen as early as possible BEFORE reloading callback states,
# because the attributes are part of the state_key which needs to be fully defined before reloading.
if self._save_on_train_epoch_end is None:
# if the user runs validation multiple times per training epoch or multiple training epochs without
# validation, then we run after validation instead of on train epoch end
self._save_on_train_epoch_end = trainer.val_check_interval == 1.0 and trainer.check_val_every_n_epoch == 1

def on_pretrain_routine_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
"""When pretrain routine starts we build the ckpt dir on the fly."""
self.__resolve_ckpt_dir(trainer)
if trainer.is_global_zero:
self.__warn_if_dir_not_empty(self.dirpath)
Expand Down
5 changes: 3 additions & 2 deletions tests/models/test_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,11 @@ def get_trainer_args():
checkpoint = ModelCheckpoint(dirpath=tmpdir, monitor="val_loss", save_last=True)
trainer_args = dict(
default_root_dir=tmpdir,
max_steps=1,
limit_train_batches=1,
limit_val_batches=2,
max_epochs=1,
logger=False,
callbacks=[checkpoint, callback_capture],
limit_val_batches=2,
)
assert checkpoint.best_model_path == ""
assert checkpoint.best_model_score is None
Expand Down