Remove rank 0 restrictions from logger

edward-io · edward-io · commit ecd7d798baaa · 2021-08-04T12:26:12.000-07:00
.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -56,6 +56,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - The accelerator and training type plugin `setup` hooks no longer have a `model` argument ([#8536](https://github.com/PyTorchLightning/pytorch-lightning/pull/8536))
 
+
+- Removed restrictions in the trainer that loggers can only log from rank 0. Existing logger behavior has not changed. ([#8608](https://github.com/PyTorchLightning/pytorch-lightning/pull/8608))
+
+
 ### Deprecated
 
 - Deprecated `LightningModule.summarize()` in favor of `pytorch_lightning.utilities.model_summary.summarize()`
@@ -2590,4 +2594,4 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ## [0.2.x] - 2019-07-09
 
-## [0.1.x] - 2019-06-DD
+## [0.1.x] - 2019-06-DD
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
@@ -102,9 +102,8 @@ def log_metrics(self, metrics: Dict[str, _METRIC], step: Optional[int] = None) -
             step = self.trainer.global_step
 
         # log actual metrics
-        if self.trainer.is_global_zero:
-            self.trainer.logger.agg_and_log_metrics(scalar_metrics, step=step)
-            self.trainer.logger.save()
+        self.trainer.logger.agg_and_log_metrics(scalar_metrics, step=step)
+        self.trainer.logger.save()
 
         self._logged_metrics.update(scalar_metrics)
 
diff --git a/tests/trainer/logging_/test_distributed_logging.py b/tests/trainer/logging_/test_distributed_logging.py
@@ -12,31 +12,61 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+from typing import Any, Dict, Optional, Union
 from unittest import mock
 from unittest.mock import Mock
 
 from pytorch_lightning import Callback, Trainer
+from pytorch_lightning.loggers.base import LightningLoggerBase
+from pytorch_lightning.loggers import TensorBoardLogger
 from tests.helpers import BoringModel
 from tests.helpers.runif import RunIf
 
 
+class AllRankLogger(LightningLoggerBase):
+    """
+    Logger to test all-rank logging (i.e. not just rank 0).
+    Logs are saved to local variable `logs`.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.logs = {}
+        self.exp = object()
+
+    def experiment(self) -> Any:
+        return self.exp
+
+    def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None):
+        self.logs.update(metrics)
+
+    def version(self) -> Union[int, str]:
+        return 1
+
+    def name(self) -> str:
+        return "AllRank"
+
+    def log_hyperparams(self, *args, **kwargs) -> None:
+        pass
+
+
 class TestModel(BoringModel):
-    def on_pretrain_routine_end(self) -> None:
-        with mock.patch("pytorch_lightning.loggers.base.LightningLoggerBase.agg_and_log_metrics") as m:
-            self.trainer.logger_connector.log_metrics({"a": 2})
-            logged_times = m.call_count
-            expected = int(self.trainer.is_global_zero)
-            msg = f"actual logger called from non-global zero, logged_times: {logged_times}, expected: {expected}"
-            assert logged_times == expected, msg
+    log_name = "rank-{rank}"
+
+    def on_train_start(self):
+        self.log(self.log_name.format(rank=self.local_rank), 0)
+
+    def on_train_end(self):
+        assert self.log_name.format(rank=self.local_rank) in self.logger.logs, "Expected rank to be logged"
 
 
 @RunIf(skip_windows=True)
-def test_global_zero_only_logging_ddp_cpu(tmpdir):
+def test_all_rank_logging_ddp_cpu(tmpdir):
     """
-    Makes sure logging only happens from root zero
+    Check that all ranks can be logged from
     """
     model = TestModel()
-    model.training_epoch_end = None
+    all_rank_logger = AllRankLogger()
     trainer = Trainer(
         accelerator="ddp_cpu",
         num_processes=2,
@@ -45,16 +75,19 @@ def test_global_zero_only_logging_ddp_cpu(tmpdir):
         limit_val_batches=1,
         max_epochs=1,
         weights_summary=None,
+        logger=all_rank_logger,
+        log_every_n_steps=1,
     )
     trainer.fit(model)
 
 
 @RunIf(min_gpus=2)
-def test_global_zero_only_logging_ddp_spawn(tmpdir):
+def test_all_rank_logging_ddp_spawn(tmpdir):
     """
-    Makes sure logging only happens from root zero
+    Check that all ranks can be logged from
     """
     model = TestModel()
+    all_rank_logger = AllRankLogger()
     model.training_epoch_end = None
     trainer = Trainer(
         accelerator="ddp_spawn",
@@ -63,6 +96,7 @@ def test_global_zero_only_logging_ddp_spawn(tmpdir):
         limit_train_batches=1,
         limit_val_batches=1,
         max_epochs=1,
+        logger=all_rank_logger,
         weights_summary=None,
     )
     trainer.fit(model)