From 13a5f8635799138903e0cf67e93d06637d042640 Mon Sep 17 00:00:00 2001 From: Rafal Jankowski Date: Thu, 4 Nov 2021 21:13:33 +0100 Subject: [PATCH 1/5] Fixed uploading best model checkpoint in Neptune logger --- pytorch_lightning/loggers/neptune.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/loggers/neptune.py b/pytorch_lightning/loggers/neptune.py index f7c611ed787ce..e09a2c6d861ea 100644 --- a/pytorch_lightning/loggers/neptune.py +++ b/pytorch_lightning/loggers/neptune.py @@ -523,6 +523,17 @@ def after_save_checkpoint(self, checkpoint_callback: "ReferenceType[ModelCheckpo file_names.add(model_name) self.experiment[f"{checkpoints_namespace}/{model_name}"].upload(key) + # log best model path + if checkpoint_callback.best_model_path: + self.experiment[ + self._construct_path_with_prefix("model/best_model_path") + ] = checkpoint_callback.best_model_path + + model_name = self._get_full_model_name(checkpoint_callback.best_model_path, checkpoint_callback) + file_names.add(model_name) + self.experiment[f"{checkpoints_namespace}/{model_name}"].upload( + checkpoint_callback.best_model_path) + # remove old models logged to experiment if they are not part of best k models at this point if self.experiment.exists(checkpoints_namespace): exp_structure = self.experiment.get_structure() @@ -531,11 +542,7 @@ def after_save_checkpoint(self, checkpoint_callback: "ReferenceType[ModelCheckpo for file_to_drop in list(uploaded_model_names - file_names): del self.experiment[f"{checkpoints_namespace}/{file_to_drop}"] - # log best model path and best model score - if checkpoint_callback.best_model_path: - self.experiment[ - self._construct_path_with_prefix("model/best_model_path") - ] = checkpoint_callback.best_model_path + # log best model score if checkpoint_callback.best_model_score: self.experiment[self._construct_path_with_prefix("model/best_model_score")] = ( checkpoint_callback.best_model_score.cpu().detach().numpy() From 32b0f9610ff1fbf4f3c35865ebd59bbbb67dbdc0 Mon Sep 17 00:00:00 2001 From: Rafal Jankowski Date: Tue, 9 Nov 2021 13:44:22 +0100 Subject: [PATCH 2/5] Test fixed --- tests/loggers/test_neptune.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/loggers/test_neptune.py b/tests/loggers/test_neptune.py index 6eec5fffcf5b5..ac64ed61293c7 100644 --- a/tests/loggers/test_neptune.py +++ b/tests/loggers/test_neptune.py @@ -292,19 +292,21 @@ def test_after_save_checkpoint(self, neptune): # then: self.assertEqual(run_instance_mock.__setitem__.call_count, 1) - self.assertEqual(run_instance_mock.__getitem__.call_count, 3) - self.assertEqual(run_attr_mock.upload.call_count, 3) + self.assertEqual(run_instance_mock.__getitem__.call_count, 4) + self.assertEqual(run_attr_mock.upload.call_count, 4) run_instance_mock.__setitem__.assert_called_once_with( f"{model_key_prefix}/best_model_path", "path/to/models/best_model" ) run_instance_mock.__getitem__.assert_any_call(f"{model_key_prefix}/checkpoints/last") run_instance_mock.__getitem__.assert_any_call(f"{model_key_prefix}/checkpoints/model1") run_instance_mock.__getitem__.assert_any_call(f"{model_key_prefix}/checkpoints/model2/with/slashes") + run_instance_mock.__getitem__.assert_any_call(f"{model_key_prefix}/checkpoints/best_model") run_attr_mock.upload.assert_has_calls( [ call("path/to/models/last"), call("path/to/models/model1"), call("path/to/models/model2/with/slashes"), + call("path/to/models/best_model"), ] ) From 503a9910755e26b0dfaba269c38135aa57782bba Mon Sep 17 00:00:00 2001 From: Rafal Jankowski Date: Tue, 9 Nov 2021 17:22:56 +0100 Subject: [PATCH 3/5] Comment updated and black suggestions applied --- pytorch_lightning/loggers/neptune.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/loggers/neptune.py b/pytorch_lightning/loggers/neptune.py index e09a2c6d861ea..c6472811a8d43 100644 --- a/pytorch_lightning/loggers/neptune.py +++ b/pytorch_lightning/loggers/neptune.py @@ -523,7 +523,7 @@ def after_save_checkpoint(self, checkpoint_callback: "ReferenceType[ModelCheckpo file_names.add(model_name) self.experiment[f"{checkpoints_namespace}/{model_name}"].upload(key) - # log best model path + # log best model path and checkpoint if checkpoint_callback.best_model_path: self.experiment[ self._construct_path_with_prefix("model/best_model_path") @@ -531,8 +531,7 @@ def after_save_checkpoint(self, checkpoint_callback: "ReferenceType[ModelCheckpo model_name = self._get_full_model_name(checkpoint_callback.best_model_path, checkpoint_callback) file_names.add(model_name) - self.experiment[f"{checkpoints_namespace}/{model_name}"].upload( - checkpoint_callback.best_model_path) + self.experiment[f"{checkpoints_namespace}/{model_name}"].upload(checkpoint_callback.best_model_path) # remove old models logged to experiment if they are not part of best k models at this point if self.experiment.exists(checkpoints_namespace): From 3c9b34e6df17edbdc4bef356d710be94312cd53f Mon Sep 17 00:00:00 2001 From: Rafal Jankowski Date: Fri, 19 Nov 2021 16:22:18 +0100 Subject: [PATCH 4/5] Windows path fix --- pytorch_lightning/loggers/neptune.py | 2 +- tests/loggers/test_neptune.py | 29 ++++++++++++++++------------ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/pytorch_lightning/loggers/neptune.py b/pytorch_lightning/loggers/neptune.py index c6472811a8d43..21fbd4a79ea0e 100644 --- a/pytorch_lightning/loggers/neptune.py +++ b/pytorch_lightning/loggers/neptune.py @@ -550,7 +550,7 @@ def after_save_checkpoint(self, checkpoint_callback: "ReferenceType[ModelCheckpo @staticmethod def _get_full_model_name(model_path: str, checkpoint_callback: "ReferenceType[ModelCheckpoint]") -> str: """Returns model name which is string `modle_path` appended to `checkpoint_callback.dirpath`.""" - expected_model_path = f"{checkpoint_callback.dirpath}/" + expected_model_path = f"{checkpoint_callback.dirpath}{os.path.sep}" if not model_path.startswith(expected_model_path): raise ValueError(f"{model_path} was expected to start with {expected_model_path}.") return model_path[len(expected_model_path) :] diff --git a/tests/loggers/test_neptune.py b/tests/loggers/test_neptune.py index ac64ed61293c7..6238b408c1e6e 100644 --- a/tests/loggers/test_neptune.py +++ b/tests/loggers/test_neptune.py @@ -276,14 +276,15 @@ def test_after_save_checkpoint(self, neptune): logger, run_instance_mock, run_attr_mock = self._get_logger_with_mocks( api_key="test", project="project", **prefix ) + models_root_dir = os.path.join("path", "to", "models") cb_mock = MagicMock( - dirpath="path/to/models", - last_model_path="path/to/models/last", + dirpath=models_root_dir, + last_model_path=os.path.join(models_root_dir, "last"), best_k_models={ - "path/to/models/model1": None, - "path/to/models/model2/with/slashes": None, + f"{os.path.join(models_root_dir, 'model1')}": None, + f"{os.path.join(models_root_dir, 'model2/with/slashes')}": None, }, - best_model_path="path/to/models/best_model", + best_model_path=os.path.join(models_root_dir, "best_model"), best_model_score=None, ) @@ -295,7 +296,7 @@ def test_after_save_checkpoint(self, neptune): self.assertEqual(run_instance_mock.__getitem__.call_count, 4) self.assertEqual(run_attr_mock.upload.call_count, 4) run_instance_mock.__setitem__.assert_called_once_with( - f"{model_key_prefix}/best_model_path", "path/to/models/best_model" + f"{model_key_prefix}/best_model_path", os.path.join(models_root_dir, "best_model") ) run_instance_mock.__getitem__.assert_any_call(f"{model_key_prefix}/checkpoints/last") run_instance_mock.__getitem__.assert_any_call(f"{model_key_prefix}/checkpoints/model1") @@ -303,10 +304,10 @@ def test_after_save_checkpoint(self, neptune): run_instance_mock.__getitem__.assert_any_call(f"{model_key_prefix}/checkpoints/best_model") run_attr_mock.upload.assert_has_calls( [ - call("path/to/models/last"), - call("path/to/models/model1"), - call("path/to/models/model2/with/slashes"), - call("path/to/models/best_model"), + call(os.path.join(models_root_dir, "last")), + call(os.path.join(models_root_dir, "model1")), + call(os.path.join(models_root_dir, "model2/with/slashes")), + call(os.path.join(models_root_dir, "best_model")), ] ) @@ -396,8 +397,12 @@ def test__get_full_model_name(self): # given: SimpleCheckpoint = namedtuple("SimpleCheckpoint", ["dirpath"]) test_input_data = [ - ("key.ext", "foo/bar/key.ext", SimpleCheckpoint(dirpath="foo/bar")), - ("key/in/parts.ext", "foo/bar/key/in/parts.ext", SimpleCheckpoint(dirpath="foo/bar")), + ("key.ext", os.path.join("foo", "bar", "key.ext"), SimpleCheckpoint(dirpath=os.path.join("foo", "bar"))), + ( + "key/in/parts.ext", + os.path.join("foo", "bar", "key/in/parts.ext"), + SimpleCheckpoint(dirpath=os.path.join("foo", "bar")), + ), ] # expect: From 1bb4cd1495e48ecd3111a80fd2bab192142a99cf Mon Sep 17 00:00:00 2001 From: Rafal Jankowski Date: Sat, 27 Nov 2021 11:53:19 +0100 Subject: [PATCH 5/5] Changelog update --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb5d26a1071a6..e24ffe07c710e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -193,7 +193,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed a consolidation error in Lite when attempting to save the state dict of a sharded optimizer ([#10746](https://github.com/PyTorchLightning/pytorch-lightning/pull/10746)) -- +- Fixed uploading best model checkpoint in NeptuneLogger ([#10369](https://github.com/PyTorchLightning/pytorch-lightning/pull/10369)) -