diff --git a/CHANGELOG.md b/CHANGELOG.md index 384a218c81305..d1c347c00a3f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -107,7 +107,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `ModelPruning(make_pruning_permanent=True)` pruning buffers getting removed when saved during training ([#6073](https://github.com/PyTorchLightning/pytorch-lightning/pull/6073)) -- Fixed `trainer.test` from `best_path` hangs after calling `trainer.fit` ([#6272](https://github.com/PyTorchLightning/pytorch-lightning/pull/6272)) +- Fixed `trainer.test` from `best_path` hangs after calling `trainer.fit` ([#6272](https://github.com/PyTorchLightning/pytorch-lightning/pull/6272)) - Fixed duplicate logs appearing in console when using the python logging module ([#5509](https://github.com/PyTorchLightning/pytorch-lightning/pull/5509), [#6275](https://github.com/PyTorchLightning/pytorch-lightning/pull/6275)) @@ -134,6 +134,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed an issue where the tuner would not tune the learning rate if also tuning the batch size ([#4688](https://github.com/PyTorchLightning/pytorch-lightning/pull/4688)) +- Fixed logger creating directory structure too early in DDP ([#6380](https://github.com/PyTorchLightning/pytorch-lightning/pull/6380)) + + ## [1.2.2] - 2021-03-02 ### Added diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 253d60c285834..ff8be336ee57a 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -381,21 +381,6 @@ def __init__( # Callback system self.on_init_end() - def setup_trainer(self, model: LightningModule): - """ - Sanity check a few things before starting actual training or testing. - - Args: - model: The model to run sanity test on. - """ - - # log hyper-parameters - if self.logger is not None: - # save exp to get started (this is where the first experiment logs are written) - self.logger.log_hyperparams(model.hparams_initial) - self.logger.log_graph(model) - self.logger.save() - def fit( self, model: LightningModule, @@ -444,7 +429,6 @@ def fit( self.call_setup_hook(model) self.call_hook("on_before_accelerator_backend_setup", model) self.accelerator.setup(self, model) # note: this sets up self.lightning_module - self.setup_trainer(model) # ---------------------------- # INSPECT THE CORE LOOPS @@ -509,6 +493,13 @@ def fit( def pre_dispatch(self): self.accelerator.pre_dispatch() + # log hyper-parameters + if self.logger is not None: + # save exp to get started (this is where the first experiment logs are written) + self.logger.log_hyperparams(self.lightning_module.hparams_initial) + self.logger.log_graph(self.lightning_module) + self.logger.save() + def post_dispatch(self): self.accelerator.post_dispatch() self.accelerator.teardown() diff --git a/tests/trainer/logging_/test_distributed_logging.py b/tests/trainer/logging_/test_distributed_logging.py index b8d68693fc393..5832f387cc63d 100644 --- a/tests/trainer/logging_/test_distributed_logging.py +++ b/tests/trainer/logging_/test_distributed_logging.py @@ -11,10 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import os from unittest import mock +from unittest.mock import Mock -from pytorch_lightning import Trainer +from pytorch_lightning import Callback, Trainer from tests.helpers import BoringModel from tests.helpers.runif import RunIf @@ -66,3 +67,39 @@ def test_global_zero_only_logging_ddp_spawn(tmpdir): weights_summary=None, ) trainer.fit(model) + + +def test_first_logger_call_in_subprocess(tmpdir): + """ + Test that the Trainer does not call the logger too early. Only when the worker processes are initialized + do we have access to the rank and know which one is the main process. + """ + + class LoggerCallsObserver(Callback): + + def on_fit_start(self, trainer, pl_module): + # this hook is executed directly before Trainer.pre_dispatch + # logger should not write any logs until this point + assert not trainer.logger.method_calls + assert not os.listdir(trainer.logger.save_dir) + + def on_train_start(self, trainer, pl_module): + assert trainer.logger.method_call + trainer.logger.log_hyperparams.assert_called_once() + trainer.logger.log_graph.assert_called_once() + + logger = Mock() + logger.version = "0" + logger.name = "name" + logger.save_dir = tmpdir + + model = BoringModel() + trainer = Trainer( + default_root_dir=tmpdir, + limit_train_batches=1, + limit_val_batches=1, + max_epochs=1, + logger=logger, + callbacks=[LoggerCallsObserver()] + ) + trainer.fit(model)