From 564f9c694f0d5468f67d750cefb9211cd8b3653a Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Fri, 10 Sep 2021 17:05:56 +0530 Subject: [PATCH 1/5] Remove should_rank_save_checkpoint property from Trainer --- pytorch_lightning/trainer/trainer.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 7cabcb292622a..50c55ee87f362 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1465,10 +1465,6 @@ def world_size(self) -> int: # some training types define a world size return getattr(self.accelerator.training_type_plugin, "world_size", 1) - @property - def should_rank_save_checkpoint(self) -> bool: - return self.accelerator.training_type_plugin.should_rank_save_checkpoint - @property def _distrib_type(self) -> DistributedType: return self.accelerator_connector._distrib_type From 6a53896da0ff2fc78e58b9361b659a8a3faad6ba Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Fri, 10 Sep 2021 17:09:42 +0530 Subject: [PATCH 2/5] Update Changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2462ffbdbb773..b9084cffc0e4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -333,6 +333,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed deprecated properties `DeepSpeedPlugin.cpu_offload*` in favor of `offload_optimizer`, `offload_parameters` and `pin_memory` ([#9244](https://github.com/PyTorchLightning/pytorch-lightning/pull/9244)) +- Removed `should_rank_save_checkpoint` property from Trainer ([#9433](https://github.com/PyTorchLightning/pytorch-lightning/pull/9433)) + + ### Fixed From 0f34ad82ca4b9dd0bcf3d63cdc4188fa791968f9 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Fri, 10 Sep 2021 17:22:58 +0530 Subject: [PATCH 3/5] Update reference --- pytorch_lightning/callbacks/model_checkpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py index 42cd078d2179d..abae6e51cc077 100644 --- a/pytorch_lightning/callbacks/model_checkpoint.py +++ b/pytorch_lightning/callbacks/model_checkpoint.py @@ -616,7 +616,7 @@ def __resolve_ckpt_dir(self, trainer: "pl.Trainer") -> None: self.dirpath = ckpt_path - if not trainer.fast_dev_run and trainer.should_rank_save_checkpoint: + if not trainer.fast_dev_run and trainer.training_type_plugin.should_rank_save_checkpoint: self._fs.makedirs(self.dirpath, exist_ok=True) def _validate_monitor_key(self, trainer: "pl.Trainer") -> None: From 4e93c340d801e1b17fd8e4788d041b62774d13cb Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Mon, 20 Sep 2021 16:05:52 +0200 Subject: [PATCH 4/5] Remove unnecessary makedirs --- pytorch_lightning/callbacks/model_checkpoint.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py index abae6e51cc077..7798e5ccbd350 100644 --- a/pytorch_lightning/callbacks/model_checkpoint.py +++ b/pytorch_lightning/callbacks/model_checkpoint.py @@ -616,9 +616,6 @@ def __resolve_ckpt_dir(self, trainer: "pl.Trainer") -> None: self.dirpath = ckpt_path - if not trainer.fast_dev_run and trainer.training_type_plugin.should_rank_save_checkpoint: - self._fs.makedirs(self.dirpath, exist_ok=True) - def _validate_monitor_key(self, trainer: "pl.Trainer") -> None: metrics = trainer.callback_metrics From 545e90945dd7b170deebf65dccb6bca4fe5b37b3 Mon Sep 17 00:00:00 2001 From: Kaushik B Date: Wed, 13 Oct 2021 15:04:55 +0530 Subject: [PATCH 5/5] Update --- pytorch_lightning/callbacks/model_checkpoint.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py index 4ece63a8ffdbd..ec57147764914 100644 --- a/pytorch_lightning/callbacks/model_checkpoint.py +++ b/pytorch_lightning/callbacks/model_checkpoint.py @@ -609,6 +609,9 @@ def __resolve_ckpt_dir(self, trainer: "pl.Trainer") -> None: self.dirpath = ckpt_path + if not trainer.fast_dev_run and trainer.training_type_plugin.should_rank_save_checkpoint: + self._fs.makedirs(self.dirpath, exist_ok=True) + def __warn_if_dir_not_empty(self, dirpath: _PATH) -> None: if self.save_top_k != 0 and self._fs.isdir(dirpath) and len(self._fs.ls(dirpath)) > 0: rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")