From 0d2997fdad829879f31facbaf0f2e9d5ead0bd91 Mon Sep 17 00:00:00 2001
From: krishna <krishnakalyan3@gmail.com>
Date: Wed, 22 Jun 2022 12:24:03 +0200
Subject: [PATCH 01/18] add warning

---
 .../trainer/connectors/logger_connector/result.py   | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index b1d8a064e122e..8250d593da67c 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -516,17 +516,16 @@ def fn(result_metric: _ResultMetric, v: Tensor) -> None:
         apply_to_collections(self[key], value, _ResultMetric, fn)
 
     @staticmethod
-    def _get_cache(result_metric: _ResultMetric, on_step: bool) -> Optional[Tensor]:
+    def _get_cache(result_metric: _ResultMetric, on_step: bool, result_key: str) -> Optional[Tensor]:
         cache = None
         if on_step and result_metric.meta.on_step:
             cache = result_metric._forward_cache
         elif not on_step and result_metric.meta.on_epoch:
             if result_metric._computed is None:
                 # always reduce on epoch end
-                should = result_metric.meta.sync.should
-                result_metric.meta.sync.should = True
+                warning_cache.warn(f"Please set sync_dist to True {result_key}")
                 result_metric.compute()
-                result_metric.meta.sync.should = should
+
             cache = result_metric._computed
         if cache is not None and not result_metric.meta.enable_graph:
             return cache.detach()
@@ -554,10 +553,10 @@ def _forked_name(self, result_metric: _ResultMetric, on_step: bool) -> Tuple[str
     def metrics(self, on_step: bool) -> _METRICS:
         metrics = _METRICS(callback={}, log={}, pbar={})
 
-        for _, result_metric in self.valid_items():
-
+        for result_key, result_metric in self.valid_items():
+             
             # extract forward_cache or computed from the _ResultMetric. ignore when the output is None
-            value = apply_to_collection(result_metric, _ResultMetric, self._get_cache, on_step, include_none=False)
+            value = apply_to_collection(result_metric, _ResultMetric, self._get_cache, on_step, result_key, include_none=False)
 
             # convert metric collection to dict container.
             if isinstance(value, _ResultMetricCollection):

From f52490d81b120c41a4705e2ff797d6db8aad7ec9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 22 Jun 2022 10:27:47 +0000
Subject: [PATCH 02/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../trainer/connectors/logger_connector/result.py           | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index 8250d593da67c..e9b0d4d6ce1df 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -554,9 +554,11 @@ def metrics(self, on_step: bool) -> _METRICS:
         metrics = _METRICS(callback={}, log={}, pbar={})
 
         for result_key, result_metric in self.valid_items():
-             
+
             # extract forward_cache or computed from the _ResultMetric. ignore when the output is None
-            value = apply_to_collection(result_metric, _ResultMetric, self._get_cache, on_step, result_key, include_none=False)
+            value = apply_to_collection(
+                result_metric, _ResultMetric, self._get_cache, on_step, result_key, include_none=False
+            )
 
             # convert metric collection to dict container.
             if isinstance(value, _ResultMetricCollection):

From ff7d9248d172ecad2c10b24877888706062ad48b Mon Sep 17 00:00:00 2001
From: Krishna Kalyan <krishnakalyan3@gmail.com>
Date: Wed, 6 Jul 2022 07:02:00 -0400
Subject: [PATCH 03/18] add test

---
 .../connectors/logger_connector/result.py       |  8 +++++---
 .../logging_/test_distributed_logging.py        | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index e9b0d4d6ce1df..1b73b00adb408 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -516,14 +516,16 @@ def fn(result_metric: _ResultMetric, v: Tensor) -> None:
         apply_to_collections(self[key], value, _ResultMetric, fn)
 
     @staticmethod
-    def _get_cache(result_metric: _ResultMetric, on_step: bool, result_key: str) -> Optional[Tensor]:
+    def _get_cache(result_metric: _ResultMetric,  result_key: str, on_step: bool) -> Optional[Tensor]:
         cache = None
         if on_step and result_metric.meta.on_step:
             cache = result_metric._forward_cache
         elif not on_step and result_metric.meta.on_epoch:
             if result_metric._computed is None:
                 # always reduce on epoch end
-                warning_cache.warn(f"Please set sync_dist to True {result_key}")
+                print(f"ABBABAB {result_metric.meta.sync.should}")
+                if not result_metric.meta.sync.should:
+                    warning_cache.warn(f"Please set sync_dist to True {result_key}")
                 result_metric.compute()
 
             cache = result_metric._computed
@@ -557,7 +559,7 @@ def metrics(self, on_step: bool) -> _METRICS:
 
             # extract forward_cache or computed from the _ResultMetric. ignore when the output is None
             value = apply_to_collection(
-                result_metric, _ResultMetric, self._get_cache, on_step, result_key, include_none=False
+                result_metric, _ResultMetric, self._get_cache, result_key, on_step, include_none=False
             )
 
             # convert metric collection to dict container.
diff --git a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
index ff950f7a8f679..3fbdd10ee4a3d 100644
--- a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
@@ -20,6 +20,7 @@
 from pytorch_lightning.demos.boring_classes import BoringModel
 from pytorch_lightning.loggers.logger import Logger
 from tests_pytorch.helpers.runif import RunIf
+import pytest
 
 
 class AllRankLogger(Logger):
@@ -194,3 +195,19 @@ def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") ->
     assert trainer.logger.logs == {"fit": 1, "validate": 1, "test": 1}
     trainer.predict(model)
     assert trainer.logger.logs == {"fit": 1, "validate": 1, "test": 1, "predict": 1}
+
+def test_logger_sync_dist():
+    # Suggestions
+    # Imporve warning
+    class CustomBoringModel(BoringModel):
+        def training_epoch_end(self, *args, **kwargs):
+            super().training_epoch_end(*args, **kwargs)
+            self.log("global_rank", self.global_rank, sync_dist=False)
+
+    model = CustomBoringModel()
+    trainer = Trainer(fast_dev_run = 1, accelerator="cpu", strategy="ddp", devices=2)
+
+    with pytest.warns(UserWarning, match="Please set sync_dist to True global_rank"):
+        trainer.fit(model)
+
+    assert trainer.callback_metrics["global_rank"] == 0
\ No newline at end of file

From 19ee6b6b88cd495a77da6d1a6da2023889c55bf5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 6 Jul 2022 11:04:26 +0000
Subject: [PATCH 04/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../trainer/connectors/logger_connector/result.py         | 2 +-
 .../trainer/logging_/test_distributed_logging.py          | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index 1b73b00adb408..ff404938c317e 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -516,7 +516,7 @@ def fn(result_metric: _ResultMetric, v: Tensor) -> None:
         apply_to_collections(self[key], value, _ResultMetric, fn)
 
     @staticmethod
-    def _get_cache(result_metric: _ResultMetric,  result_key: str, on_step: bool) -> Optional[Tensor]:
+    def _get_cache(result_metric: _ResultMetric, result_key: str, on_step: bool) -> Optional[Tensor]:
         cache = None
         if on_step and result_metric.meta.on_step:
             cache = result_metric._forward_cache
diff --git a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
index 3fbdd10ee4a3d..cfed4a9e4948d 100644
--- a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
@@ -15,12 +15,13 @@
 from typing import Any, Dict, Optional, Union
 from unittest.mock import Mock
 
+import pytest
+
 import pytorch_lightning as pl
 from pytorch_lightning import Callback, Trainer
 from pytorch_lightning.demos.boring_classes import BoringModel
 from pytorch_lightning.loggers.logger import Logger
 from tests_pytorch.helpers.runif import RunIf
-import pytest
 
 
 class AllRankLogger(Logger):
@@ -196,6 +197,7 @@ def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") ->
     trainer.predict(model)
     assert trainer.logger.logs == {"fit": 1, "validate": 1, "test": 1, "predict": 1}
 
+
 def test_logger_sync_dist():
     # Suggestions
     # Imporve warning
@@ -205,9 +207,9 @@ def training_epoch_end(self, *args, **kwargs):
             self.log("global_rank", self.global_rank, sync_dist=False)
 
     model = CustomBoringModel()
-    trainer = Trainer(fast_dev_run = 1, accelerator="cpu", strategy="ddp", devices=2)
+    trainer = Trainer(fast_dev_run=1, accelerator="cpu", strategy="ddp", devices=2)
 
     with pytest.warns(UserWarning, match="Please set sync_dist to True global_rank"):
         trainer.fit(model)
 
-    assert trainer.callback_metrics["global_rank"] == 0
\ No newline at end of file
+    assert trainer.callback_metrics["global_rank"] == 0

From 07814f625685050f85e4ec3d97315e6993bcfa88 Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Tue, 12 Jul 2022 17:41:08 +0530
Subject: [PATCH 05/18] fix test

---
 .../connectors/logger_connector/result.py        | 16 ++++++++--------
 .../trainer/logging_/test_distributed_logging.py |  4 +---
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index ff404938c317e..4a831655a76e1 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -516,19 +516,21 @@ def fn(result_metric: _ResultMetric, v: Tensor) -> None:
         apply_to_collections(self[key], value, _ResultMetric, fn)
 
     @staticmethod
-    def _get_cache(result_metric: _ResultMetric, result_key: str, on_step: bool) -> Optional[Tensor]:
+    def _get_cache(result_metric: _ResultMetric, on_step: bool) -> Optional[Tensor]:
         cache = None
         if on_step and result_metric.meta.on_step:
             cache = result_metric._forward_cache
         elif not on_step and result_metric.meta.on_epoch:
             if result_metric._computed is None:
-                # always reduce on epoch end
-                print(f"ABBABAB {result_metric.meta.sync.should}")
                 if not result_metric.meta.sync.should:
-                    warning_cache.warn(f"Please set sync_dist to True {result_key}")
+                    warning_cache.warn(
+                        f"It is recommended to use `self.log({result_metric.meta.name}, sync_dist=True)` when logging"
+                        " on epoch level in distributed setting to accumulate the metric across devices."
+                    )
                 result_metric.compute()
 
             cache = result_metric._computed
+
         if cache is not None and not result_metric.meta.enable_graph:
             return cache.detach()
         return cache
@@ -555,12 +557,10 @@ def _forked_name(self, result_metric: _ResultMetric, on_step: bool) -> Tuple[str
     def metrics(self, on_step: bool) -> _METRICS:
         metrics = _METRICS(callback={}, log={}, pbar={})
 
-        for result_key, result_metric in self.valid_items():
+        for _, result_metric in self.valid_items():
 
             # extract forward_cache or computed from the _ResultMetric. ignore when the output is None
-            value = apply_to_collection(
-                result_metric, _ResultMetric, self._get_cache, result_key, on_step, include_none=False
-            )
+            value = apply_to_collection(result_metric, _ResultMetric, self._get_cache, on_step, include_none=False)
 
             # convert metric collection to dict container.
             if isinstance(value, _ResultMetricCollection):
diff --git a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
index cfed4a9e4948d..6ad828eb0e29b 100644
--- a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
@@ -199,8 +199,6 @@ def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") ->
 
 
 def test_logger_sync_dist():
-    # Suggestions
-    # Imporve warning
     class CustomBoringModel(BoringModel):
         def training_epoch_end(self, *args, **kwargs):
             super().training_epoch_end(*args, **kwargs)
@@ -209,7 +207,7 @@ def training_epoch_end(self, *args, **kwargs):
     model = CustomBoringModel()
     trainer = Trainer(fast_dev_run=1, accelerator="cpu", strategy="ddp", devices=2)
 
-    with pytest.warns(UserWarning, match="Please set sync_dist to True global_rank"):
+    with pytest.warns(UserWarning, match="It is recommended to use .* sync_dist=True"):
         trainer.fit(model)
 
     assert trainer.callback_metrics["global_rank"] == 0

From 2ac71cb388ff59928b154914cfabafedbdb2711b Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Tue, 12 Jul 2022 17:44:36 +0530
Subject: [PATCH 06/18] fix

---
 .../trainer/connectors/logger_connector/result.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index 4a831655a76e1..e8dc50a6b5ffd 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -524,8 +524,8 @@ def _get_cache(result_metric: _ResultMetric, on_step: bool) -> Optional[Tensor]:
             if result_metric._computed is None:
                 if not result_metric.meta.sync.should:
                     warning_cache.warn(
-                        f"It is recommended to use `self.log({result_metric.meta.name}, sync_dist=True)` when logging"
-                        " on epoch level in distributed setting to accumulate the metric across devices."
+                        f"It is recommended to use `self.log({result_metric.meta.name!r}, sync_dist=True)` when"
+                        " logging on epoch level in distributed setting to accumulate the metric across devices."
                     )
                 result_metric.compute()
 

From 0e2acc7e513e7dbd6a24bc8bd9d42d18aeb9a064 Mon Sep 17 00:00:00 2001
From: Rohit Gupta <rohitgr1998@gmail.com>
Date: Tue, 12 Jul 2022 17:49:30 +0530
Subject: [PATCH 07/18] fix rebase

---
 .../trainer/connectors/logger_connector/result.py      | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index 8f3dca7c387e1..260c5b7d6c63f 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -531,8 +531,14 @@ def _get_cache(result_metric: _ResultMetric, on_step: bool) -> Optional[Tensor]:
 
             cache = result_metric._computed
 
-        if cache is not None and not result_metric.meta.enable_graph:
-            return cache.detach()
+        if cache is not None:
+            if not isinstance(cache, torch.Tensor):
+                raise ValueError(
+                    f"The `.compute()` return of the metric logged as {result_metric.meta.name!r} must be a tensor."
+                    f" Found {cache}"
+                )
+            if not result_metric.meta.enable_graph:
+                return cache.detach()
 
         return cache
 

From fabae4779afcdc475f8ac7daf0ee3446152c3f45 Mon Sep 17 00:00:00 2001
From: Krishna Kalyan <krishnakalyan3@gmail.com>
Date: Tue, 12 Jul 2022 08:29:59 -0400
Subject: [PATCH 08/18] update change log

---
 src/pytorch_lightning/CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md
index 0d42f819d9d1d..b8f5ba860c3af 100644
--- a/src/pytorch_lightning/CHANGELOG.md
+++ b/src/pytorch_lightning/CHANGELOG.md
@@ -139,7 +139,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - The `WandbLogger` will now use the run name in the logs folder if it is provided, and otherwise the project name  ([#12604](https://github.com/PyTorchLightning/pytorch-lightning/pull/12604))
 
 
--
+- Raised warning instead of forcing `sync_dist=True` on `epoch_end` ([13210](https://github.com/Lightning-AI/lightning/issues/13210))
 
 
 ### Deprecated

From bf87e61466597ce47d08decff8f60aa48025a02d Mon Sep 17 00:00:00 2001
From: Krishna Kalyan <krishnakalyan3@gmail.com>
Date: Tue, 12 Jul 2022 08:31:38 -0400
Subject: [PATCH 09/18] fix_pr

---
 src/pytorch_lightning/CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md
index b8f5ba860c3af..592888bc73cbc 100644
--- a/src/pytorch_lightning/CHANGELOG.md
+++ b/src/pytorch_lightning/CHANGELOG.md
@@ -139,7 +139,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - The `WandbLogger` will now use the run name in the logs folder if it is provided, and otherwise the project name  ([#12604](https://github.com/PyTorchLightning/pytorch-lightning/pull/12604))
 
 
-- Raised warning instead of forcing `sync_dist=True` on `epoch_end` ([13210](https://github.com/Lightning-AI/lightning/issues/13210))
+- Raised warning instead of forcing `sync_dist=True` on `epoch_end` ([13364](https://github.com/Lightning-AI/lightning/pull/13364))
 
 
 ### Deprecated

From f4e059bf8a39ee709b9b4d063ba7102c12e9cc97 Mon Sep 17 00:00:00 2001
From: Rohit Gupta <rohitgr1998@gmail.com>
Date: Tue, 12 Jul 2022 18:03:03 +0530
Subject: [PATCH 10/18] nit

---
 src/pytorch_lightning/CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md
index 592888bc73cbc..deb17cca86f78 100644
--- a/src/pytorch_lightning/CHANGELOG.md
+++ b/src/pytorch_lightning/CHANGELOG.md
@@ -139,7 +139,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - The `WandbLogger` will now use the run name in the logs folder if it is provided, and otherwise the project name  ([#12604](https://github.com/PyTorchLightning/pytorch-lightning/pull/12604))
 
 
-- Raised warning instead of forcing `sync_dist=True` on `epoch_end` ([13364](https://github.com/Lightning-AI/lightning/pull/13364))
+- Raised a warning instead of forcing `sync_dist=True` on epoch end ([13364](https://github.com/Lightning-AI/lightning/pull/13364))
 
 
 ### Deprecated

From 352a34112074abd9ab6c064bb55fdd4eb51d941a Mon Sep 17 00:00:00 2001
From: Krishna Kalyan <krishnakalyan3@gmail.com>
Date: Wed, 13 Jul 2022 13:47:25 +0200
Subject: [PATCH 11/18] Update
 src/pytorch_lightning/trainer/connectors/logger_connector/result.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 .../trainer/connectors/logger_connector/result.py               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index 260c5b7d6c63f..2e047bbc08a20 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -524,7 +524,7 @@ def _get_cache(result_metric: _ResultMetric, on_step: bool) -> Optional[Tensor]:
             if result_metric._computed is None:
                 if not result_metric.meta.sync.should:
                     warning_cache.warn(
-                        f"It is recommended to use `self.log({result_metric.meta.name!r}, sync_dist=True)` when"
+                        f"It is recommended to use `self.log({result_metric.meta.name!r}, ..., sync_dist=True)` when"
                         " logging on epoch level in distributed setting to accumulate the metric across devices."
                     )
                 result_metric.compute()

From 5140c2bc58cb64a58de764a6f1f7c570dc966fe9 Mon Sep 17 00:00:00 2001
From: Krishna Kalyan <krishnakalyan3@gmail.com>
Date: Sat, 16 Jul 2022 16:42:12 -0400
Subject: [PATCH 12/18] commit suggestions

---
 .../trainer/connectors/logger_connector/result.py        | 9 +++++----
 .../trainer/logging_/test_distributed_logging.py         | 6 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index 2e047bbc08a20..b4a00b72f3038 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -24,11 +24,12 @@
 from pytorch_lightning.core.mixins import DeviceDtypeModuleMixin
 from pytorch_lightning.utilities.apply_func import apply_to_collection, apply_to_collections, move_data_to_device
 from pytorch_lightning.utilities.data import extract_batch_size
+from pytorch_lightning.utilities.distributed import distributed_available
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.memory import recursive_detach
 from pytorch_lightning.utilities.metrics import metrics_to_scalars
 from pytorch_lightning.utilities.rank_zero import rank_zero_warn
-from pytorch_lightning.utilities.warnings import WarningCache
+from pytorch_lightning.utilities.warnings import WarningCache, PossibleUserWarning
 
 _IN_METRIC = Union[Metric, Tensor]  # Do not include scalars as they were converted to tensors
 _OUT_METRIC = Union[Tensor, Dict[str, Tensor]]
@@ -522,11 +523,11 @@ def _get_cache(result_metric: _ResultMetric, on_step: bool) -> Optional[Tensor]:
             cache = result_metric._forward_cache
         elif not on_step and result_metric.meta.on_epoch:
             if result_metric._computed is None:
-                if not result_metric.meta.sync.should:
+                if not result_metric.meta.sync.should and distributed_available():
                     warning_cache.warn(
                         f"It is recommended to use `self.log({result_metric.meta.name!r}, ..., sync_dist=True)` when"
-                        " logging on epoch level in distributed setting to accumulate the metric across devices."
-                    )
+                        " logging on epoch level in distributed setting to accumulate the metric across devices.",
+                        category=PossibleUserWarning)
                 result_metric.compute()
 
             cache = result_metric._computed
diff --git a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
index 6ad828eb0e29b..9abfd42f03669 100644
--- a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
@@ -21,6 +21,7 @@
 from pytorch_lightning import Callback, Trainer
 from pytorch_lightning.demos.boring_classes import BoringModel
 from pytorch_lightning.loggers.logger import Logger
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
 from tests_pytorch.helpers.runif import RunIf
 
 
@@ -200,14 +201,13 @@ def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") ->
 
 def test_logger_sync_dist():
     class CustomBoringModel(BoringModel):
-        def training_epoch_end(self, *args, **kwargs):
-            super().training_epoch_end(*args, **kwargs)
+        def on_train_epoch_end(self):
             self.log("global_rank", self.global_rank, sync_dist=False)
 
     model = CustomBoringModel()
     trainer = Trainer(fast_dev_run=1, accelerator="cpu", strategy="ddp", devices=2)
 
-    with pytest.warns(UserWarning, match="It is recommended to use .* sync_dist=True"):
+    with pytest.warns(PossibleUserWarning, match="It is recommended to use .* sync_dist=True"):
         trainer.fit(model)
 
     assert trainer.callback_metrics["global_rank"] == 0

From db2d06c4b250f285f45378f5d86664d9535e7004 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 16 Jul 2022 20:46:40 +0000
Subject: [PATCH 13/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../trainer/connectors/logger_connector/result.py            | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index b4a00b72f3038..c9c3c897ffdd7 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -29,7 +29,7 @@
 from pytorch_lightning.utilities.memory import recursive_detach
 from pytorch_lightning.utilities.metrics import metrics_to_scalars
 from pytorch_lightning.utilities.rank_zero import rank_zero_warn
-from pytorch_lightning.utilities.warnings import WarningCache, PossibleUserWarning
+from pytorch_lightning.utilities.warnings import PossibleUserWarning, WarningCache
 
 _IN_METRIC = Union[Metric, Tensor]  # Do not include scalars as they were converted to tensors
 _OUT_METRIC = Union[Tensor, Dict[str, Tensor]]
@@ -527,7 +527,8 @@ def _get_cache(result_metric: _ResultMetric, on_step: bool) -> Optional[Tensor]:
                     warning_cache.warn(
                         f"It is recommended to use `self.log({result_metric.meta.name!r}, ..., sync_dist=True)` when"
                         " logging on epoch level in distributed setting to accumulate the metric across devices.",
-                        category=PossibleUserWarning)
+                        category=PossibleUserWarning,
+                    )
                 result_metric.compute()
 
             cache = result_metric._computed

From 26acd4590d3afa5749098a2a3a2c7fa5c405942f Mon Sep 17 00:00:00 2001
From: Krishna Kalyan <krishnakalyan3@gmail.com>
Date: Sat, 16 Jul 2022 18:01:16 -0400
Subject: [PATCH 14/18] improve error message

---
 .../tests_pytorch/trainer/logging_/test_distributed_logging.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
index 9abfd42f03669..c12669be87c4d 100644
--- a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
@@ -207,7 +207,8 @@ def on_train_epoch_end(self):
     model = CustomBoringModel()
     trainer = Trainer(fast_dev_run=1, accelerator="cpu", strategy="ddp", devices=2)
 
-    with pytest.warns(PossibleUserWarning, match="It is recommended to use .* sync_dist=True"):
+    with pytest.warns(PossibleUserWarning, match="It is recommended to use `self.log('global_rank', ..., sync_dist=True)` when "
+        "logging on epoch level in distributed setting to accumulate the metric across devices."):
         trainer.fit(model)
 
     assert trainer.callback_metrics["global_rank"] == 0

From 68735d7b403c052b7726a3fc23a045a637af77be Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 16 Jul 2022 22:02:57 +0000
Subject: [PATCH 15/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../trainer/logging_/test_distributed_logging.py           | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
index c12669be87c4d..c074af5038fda 100644
--- a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
@@ -207,8 +207,11 @@ def on_train_epoch_end(self):
     model = CustomBoringModel()
     trainer = Trainer(fast_dev_run=1, accelerator="cpu", strategy="ddp", devices=2)
 
-    with pytest.warns(PossibleUserWarning, match="It is recommended to use `self.log('global_rank', ..., sync_dist=True)` when "
-        "logging on epoch level in distributed setting to accumulate the metric across devices."):
+    with pytest.warns(
+        PossibleUserWarning,
+        match="It is recommended to use `self.log('global_rank', ..., sync_dist=True)` when "
+        "logging on epoch level in distributed setting to accumulate the metric across devices.",
+    ):
         trainer.fit(model)
 
     assert trainer.callback_metrics["global_rank"] == 0

From 677a3a63d055337718d45a93c2f3311111c3e10c Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Mon, 18 Jul 2022 14:16:06 +0530
Subject: [PATCH 16/18] fix test

---
 .../trainer/logging_/test_distributed_logging.py   | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
index c074af5038fda..bfa79dcfc6a24 100644
--- a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
@@ -23,6 +23,7 @@
 from pytorch_lightning.loggers.logger import Logger
 from pytorch_lightning.utilities.warnings import PossibleUserWarning
 from tests_pytorch.helpers.runif import RunIf
+from tests_pytorch.helpers.utils import no_warning_call
 
 
 class AllRankLogger(Logger):
@@ -206,11 +207,16 @@ def on_train_epoch_end(self):
 
     model = CustomBoringModel()
     trainer = Trainer(fast_dev_run=1, accelerator="cpu", strategy="ddp", devices=2)
-
     with pytest.warns(
-        PossibleUserWarning,
-        match="It is recommended to use `self.log('global_rank', ..., sync_dist=True)` when "
-        "logging on epoch level in distributed setting to accumulate the metric across devices.",
+        PossibleUserWarning, match=r"recommended to use `self.log\('global_rank', ..., sync_dist=True\)`"
+    ):
+        trainer.fit(model)
+
+    assert trainer.callback_metrics["global_rank"] == 0
+
+    trainer = Trainer(fast_dev_run=1, accelerator="cpu", devices=1)
+    with no_warning_call(
+        PossibleUserWarning, match=r"recommended to use `self.log\('global_rank', ..., sync_dist=True\)`"
     ):
         trainer.fit(model)
 

From e497b2ed6385a3add00dbc37aad2239fe9a13e0a Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Mon, 18 Jul 2022 18:54:28 +0530
Subject: [PATCH 17/18] block for ft and use unit test

---
 .../connectors/logger_connector/result.py     | 18 ++++++---
 .../core/test_metric_result_integration.py    |  4 +-
 .../logging_/test_distributed_logging.py      | 40 ++++++++++---------
 3 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index c9c3c897ffdd7..05bc62c98f92d 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -26,6 +26,7 @@
 from pytorch_lightning.utilities.data import extract_batch_size
 from pytorch_lightning.utilities.distributed import distributed_available
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.imports import _fault_tolerant_training
 from pytorch_lightning.utilities.memory import recursive_detach
 from pytorch_lightning.utilities.metrics import metrics_to_scalars
 from pytorch_lightning.utilities.rank_zero import rank_zero_warn
@@ -523,13 +524,20 @@ def _get_cache(result_metric: _ResultMetric, on_step: bool) -> Optional[Tensor]:
             cache = result_metric._forward_cache
         elif not on_step and result_metric.meta.on_epoch:
             if result_metric._computed is None:
+                should = result_metric.meta.sync.should
                 if not result_metric.meta.sync.should and distributed_available():
-                    warning_cache.warn(
-                        f"It is recommended to use `self.log({result_metric.meta.name!r}, ..., sync_dist=True)` when"
-                        " logging on epoch level in distributed setting to accumulate the metric across devices.",
-                        category=PossibleUserWarning,
-                    )
+                    if _fault_tolerant_training():
+                        # make sure to always sync across devices when fault-tolerant is used
+                        result_metric.meta.sync.should = True
+                    else:
+                        warning_cache.warn(
+                            f"It is recommended to use `self.log({result_metric.meta.name!r}, ..., sync_dist=True)`"
+                            " when logging on epoch level in distributed setting to accumulate the metric across"
+                            " devices.",
+                            category=PossibleUserWarning,
+                        )
                 result_metric.compute()
+                result_metric.meta.sync.should = should
 
             cache = result_metric._computed
 
diff --git a/tests/tests_pytorch/core/test_metric_result_integration.py b/tests/tests_pytorch/core/test_metric_result_integration.py
index 12247e27e8c9d..41672d2ff7577 100644
--- a/tests/tests_pytorch/core/test_metric_result_integration.py
+++ b/tests/tests_pytorch/core/test_metric_result_integration.py
@@ -456,6 +456,8 @@ def on_train_epoch_end(self) -> None:
         "limit_val_batches": 0,
         "accelerator": accelerator,
         "devices": devices,
+        "enable_progress_bar": False,
+        "enable_model_summary": False,
     }
     trainer_kwargs.update(kwargs)
     trainer = Trainer(**trainer_kwargs)
@@ -471,7 +473,7 @@ def on_train_epoch_end(self) -> None:
     )
     ckpt_path = os.path.join(tmpdir, ".pl_auto_save.ckpt")
 
-    trainer = Trainer(**trainer_kwargs, enable_progress_bar=False, enable_model_summary=False)
+    trainer = Trainer(**trainer_kwargs)
     trainer.fit(model, ckpt_path=ckpt_path)
     assert model.has_validated_sum
 
diff --git a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
index bfa79dcfc6a24..006248c95219c 100644
--- a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
@@ -13,14 +13,22 @@
 # limitations under the License.
 import os
 from typing import Any, Dict, Optional, Union
+from unittest import mock
 from unittest.mock import Mock
 
 import pytest
+import torch
 
 import pytorch_lightning as pl
 from pytorch_lightning import Callback, Trainer
 from pytorch_lightning.demos.boring_classes import BoringModel
 from pytorch_lightning.loggers.logger import Logger
+from pytorch_lightning.trainer.connectors.logger_connector.result import (
+    _Metadata,
+    _ResultCollection,
+    _ResultMetric,
+    _Sync,
+)
 from pytorch_lightning.utilities.warnings import PossibleUserWarning
 from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.utils import no_warning_call
@@ -200,24 +208,20 @@ def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") ->
     assert trainer.logger.logs == {"fit": 1, "validate": 1, "test": 1, "predict": 1}
 
 
-def test_logger_sync_dist():
-    class CustomBoringModel(BoringModel):
-        def on_train_epoch_end(self):
-            self.log("global_rank", self.global_rank, sync_dist=False)
+@pytest.mark.parametrize("distributed_env", [True, False])
+def test_logger_sync_dist(distributed_env):
+    # self.log('bar', 7, ..., sync_dist=False)
+    meta = _Metadata("foo", "bar")
+    meta.sync = _Sync(_should=False)
+    result_metric = _ResultMetric(metadata=meta, is_tensor=True)
+    result_metric.update(torch.tensor(7.0), 10)
 
-    model = CustomBoringModel()
-    trainer = Trainer(fast_dev_run=1, accelerator="cpu", strategy="ddp", devices=2)
-    with pytest.warns(
-        PossibleUserWarning, match=r"recommended to use `self.log\('global_rank', ..., sync_dist=True\)`"
-    ):
-        trainer.fit(model)
-
-    assert trainer.callback_metrics["global_rank"] == 0
+    warning_ctx = pytest.warns if distributed_env else no_warning_call
 
-    trainer = Trainer(fast_dev_run=1, accelerator="cpu", devices=1)
-    with no_warning_call(
-        PossibleUserWarning, match=r"recommended to use `self.log\('global_rank', ..., sync_dist=True\)`"
+    with mock.patch(
+        "pytorch_lightning.trainer.connectors.logger_connector.result.distributed_available",
+        return_value=distributed_env,
     ):
-        trainer.fit(model)
-
-    assert trainer.callback_metrics["global_rank"] == 0
+        with warning_ctx(PossibleUserWarning, match=r"recommended to use `self.log\('bar', ..., sync_dist=True\)`"):
+            value = _ResultCollection._get_cache(result_metric, on_step=False)
+        assert value == 7.0

From 0f39c807ea28b1ae7c703c9def361e23fd586281 Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Tue, 19 Jul 2022 13:31:48 +0530
Subject: [PATCH 18/18] move test

---
 .../connectors/logger_connector/result.py     |  5 ++-
 .../core/test_metric_result_integration.py    | 21 +++++++++++++
 .../logging_/test_distributed_logging.py      | 31 -------------------
 3 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index 05bc62c98f92d..1a8020f91f4f6 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -526,8 +526,11 @@ def _get_cache(result_metric: _ResultMetric, on_step: bool) -> Optional[Tensor]:
             if result_metric._computed is None:
                 should = result_metric.meta.sync.should
                 if not result_metric.meta.sync.should and distributed_available():
+                    # ensure sync happens for FT since during a failure, the metrics are synced and saved to the
+                    # checkpoint, so during restart, metrics on rank 0 are from the accumulated ones from the previous
+                    # run, and on other ranks, they are 0. So we need to make sure they are synced in further training
+                    # to ensure correct calculation.
                     if _fault_tolerant_training():
-                        # make sure to always sync across devices when fault-tolerant is used
                         result_metric.meta.sync.should = True
                     else:
                         warning_cache.warn(
diff --git a/tests/tests_pytorch/core/test_metric_result_integration.py b/tests/tests_pytorch/core/test_metric_result_integration.py
index 41672d2ff7577..cb8a51c5bf9ba 100644
--- a/tests/tests_pytorch/core/test_metric_result_integration.py
+++ b/tests/tests_pytorch/core/test_metric_result_integration.py
@@ -34,7 +34,9 @@
     _ResultMetric,
     _Sync,
 )
+from pytorch_lightning.utilities.warnings import PossibleUserWarning
 from tests_pytorch.helpers.runif import RunIf
+from tests_pytorch.helpers.utils import no_warning_call
 
 
 class DummyMetric(Metric):
@@ -661,3 +663,22 @@ def on_train_start(self):
     )
     with pytest.raises(ValueError, match=r"compute\(\)` return of.*foo' must be a tensor"):
         trainer.fit(model)
+
+
+@pytest.mark.parametrize("distributed_env", [True, False])
+def test_logger_sync_dist(distributed_env):
+    # self.log('bar', 7, ..., sync_dist=False)
+    meta = _Metadata("foo", "bar")
+    meta.sync = _Sync(_should=False)
+    result_metric = _ResultMetric(metadata=meta, is_tensor=True)
+    result_metric.update(torch.tensor(7.0), 10)
+
+    warning_ctx = pytest.warns if distributed_env else no_warning_call
+
+    with mock.patch(
+        "pytorch_lightning.trainer.connectors.logger_connector.result.distributed_available",
+        return_value=distributed_env,
+    ):
+        with warning_ctx(PossibleUserWarning, match=r"recommended to use `self.log\('bar', ..., sync_dist=True\)`"):
+            value = _ResultCollection._get_cache(result_metric, on_step=False)
+        assert value == 7.0
diff --git a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
index 006248c95219c..ff950f7a8f679 100644
--- a/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_distributed_logging.py
@@ -13,25 +13,13 @@
 # limitations under the License.
 import os
 from typing import Any, Dict, Optional, Union
-from unittest import mock
 from unittest.mock import Mock
 
-import pytest
-import torch
-
 import pytorch_lightning as pl
 from pytorch_lightning import Callback, Trainer
 from pytorch_lightning.demos.boring_classes import BoringModel
 from pytorch_lightning.loggers.logger import Logger
-from pytorch_lightning.trainer.connectors.logger_connector.result import (
-    _Metadata,
-    _ResultCollection,
-    _ResultMetric,
-    _Sync,
-)
-from pytorch_lightning.utilities.warnings import PossibleUserWarning
 from tests_pytorch.helpers.runif import RunIf
-from tests_pytorch.helpers.utils import no_warning_call
 
 
 class AllRankLogger(Logger):
@@ -206,22 +194,3 @@ def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") ->
     assert trainer.logger.logs == {"fit": 1, "validate": 1, "test": 1}
     trainer.predict(model)
     assert trainer.logger.logs == {"fit": 1, "validate": 1, "test": 1, "predict": 1}
-
-
-@pytest.mark.parametrize("distributed_env", [True, False])
-def test_logger_sync_dist(distributed_env):
-    # self.log('bar', 7, ..., sync_dist=False)
-    meta = _Metadata("foo", "bar")
-    meta.sync = _Sync(_should=False)
-    result_metric = _ResultMetric(metadata=meta, is_tensor=True)
-    result_metric.update(torch.tensor(7.0), 10)
-
-    warning_ctx = pytest.warns if distributed_env else no_warning_call
-
-    with mock.patch(
-        "pytorch_lightning.trainer.connectors.logger_connector.result.distributed_available",
-        return_value=distributed_env,
-    ):
-        with warning_ctx(PossibleUserWarning, match=r"recommended to use `self.log\('bar', ..., sync_dist=True\)`"):
-            value = _ResultCollection._get_cache(result_metric, on_step=False)
-        assert value == 7.0