From 1ffc159b69e6812ceb6a4e10f6d6bc9d12239af6 Mon Sep 17 00:00:00 2001
From: s-rog <sh.rog@protonmail.ch>
Date: Tue, 6 Apr 2021 09:26:40 +0800
Subject: [PATCH 01/11] init test

---
 tests/trainer/test_data_loading.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index ec7f020faa4c3..3599723f954b2 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -102,3 +102,14 @@ def check_replace_distrubuted_sampler(tmpdir, save_preds_on_dl_idx, accelerator,
 @pytest.mark.parametrize("mode", [1, 2])
 def test_replace_distrubuted_sampler_custom_dataloader_custom_batch_sampler(tmpdir, mode):
     check_replace_distrubuted_sampler(tmpdir, True, "ddp", 2, 2, mode)
+
+@RunIf(min_gpus=2, special=True)
+def test_dataloader_warnings():
+    trainer = Trainer(accelerator="ddp_spawn")
+    dl = DataLoader(RandomDataset(32, 64), num_workers=1)
+    if hasattr(dl, "persistent_workers"):
+        warn_str = "Consider setting persistent_workers=True"
+    else:
+        warn_str = "Consider setting accelerator=ddp"
+    with pytest.warns(UserWarning, match=warn_str):
+        trainer.test(BoringModel(), test_dataloaders=dl)

From 8bc4683c45e7a573bb074ebc5bb0739a5b7e792d Mon Sep 17 00:00:00 2001
From: Roger Shieh <sh.rog@protonmail.ch>
Date: Wed, 31 Mar 2021 13:50:57 +0800
Subject: [PATCH 02/11] Update data_loading.py

---
 pytorch_lightning/trainer/data_loading.py | 33 ++++++++++++++++-------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py
index 59ec40c3df2e8..98e0584601564 100644
--- a/pytorch_lightning/trainer/data_loading.py
+++ b/pytorch_lightning/trainer/data_loading.py
@@ -61,18 +61,31 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None:
         using_spawn = self.accelerator_connector.distributed_backend == "ddp_spawn"
         if is_dataloader and not on_windows:
             if dataloader.num_workers > 0 and using_spawn:
-                rank_zero_warn(
-                    'Dataloader(num_workers>0) and ddp_spawn do not mix well!'
-                    ' Your performance might suffer dramatically.'
-                    ' Please consider setting accelerator=ddp to use num_workers > 0'
-                    ' (this is a bottleneck of Python .spawn() and PyTorch'
-                )
+                if hasattr(dataloader, "persistent_workers"):
+                    if not dataloader.persistent_workers:
+                        rank_zero_warn(
+                            'num_workers>0, persistent_workers=False, and accelerator=ddp_spawn may result in data loading bottlenecks.'
+                            ' Consider setting persistent_workers=True (this is a limitation of Python .spawn() and PyTorch)'
+                        )
+                else:
+                    rank_zero_warn(
+                        'num_workers>0 and accelerator=ddp_spawn do not mix well and may result in data loading bottlenecks.'
+                        ' Consider setting accelerator=ddp to use num_workers>0 (this is a limitation of Python .spawn() and PyTorch)'
+                    )
 
             elif dataloader.num_workers == 0 and using_spawn:
-                rank_zero_warn(
-                    'You are using `accelerator=ddp_spawn` with num_workers=0.'
-                    ' For much faster performance, switch to `accelerator=ddp` and set `num_workers>0`'
-                )
+                # checks for the attr persistent_workers not available on pytorch < 1.7
+                if hasattr(dataloader, "persistent_workers"):
+                    if not dataloader.persistent_workers:
+                        rank_zero_warn(
+                            'accelerator=ddp_spawn and num_workers=0 may result in data loading bottlenecks.'
+                            ' Consider setting num_workers>0 and persistent_workers=True'
+                        )
+                else:
+                    rank_zero_warn(
+                        'accelerator=ddp_spawn and num_workers=0 may result in data loading bottlenecks.'
+                        ' Consider setting accelerator=ddp and set num_workers>0'
+                    )
 
             elif dataloader.num_workers <= 2 and multiprocessing.cpu_count() > 2 and not using_spawn:
                 num_cpus = multiprocessing.cpu_count()

From 97411f2202104614336b71c397e706cad7328d44 Mon Sep 17 00:00:00 2001
From: Roger Shieh <sh.rog@protonmail.ch>
Date: Wed, 31 Mar 2021 13:55:26 +0800
Subject: [PATCH 03/11] fmt

---
 pytorch_lightning/trainer/data_loading.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py
index 98e0584601564..0480c8023c3f8 100644
--- a/pytorch_lightning/trainer/data_loading.py
+++ b/pytorch_lightning/trainer/data_loading.py
@@ -61,20 +61,25 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None:
         using_spawn = self.accelerator_connector.distributed_backend == "ddp_spawn"
         if is_dataloader and not on_windows:
             if dataloader.num_workers > 0 and using_spawn:
+                # checks for the attr persistent_workers available in pytorch >= 1.7
                 if hasattr(dataloader, "persistent_workers"):
                     if not dataloader.persistent_workers:
                         rank_zero_warn(
-                            'num_workers>0, persistent_workers=False, and accelerator=ddp_spawn may result in data loading bottlenecks.'
-                            ' Consider setting persistent_workers=True (this is a limitation of Python .spawn() and PyTorch)'
+                            'num_workers>0, persistent_workers=False, and accelerator=ddp_spawn'
+                            ' may result in data loading bottlenecks.'
+                            ' Consider setting persistent_workers=True'
+                            ' (this is a limitation of Python .spawn() and PyTorch)'
                         )
                 else:
                     rank_zero_warn(
-                        'num_workers>0 and accelerator=ddp_spawn do not mix well and may result in data loading bottlenecks.'
-                        ' Consider setting accelerator=ddp to use num_workers>0 (this is a limitation of Python .spawn() and PyTorch)'
+                        'num_workers>0 and accelerator=ddp_spawn do not mix well'
+                        ' and may result in data loading bottlenecks.'
+                        ' Consider setting accelerator=ddp to use num_workers>0'
+                        ' (this is a limitation of Python .spawn() and PyTorch)'
                     )
 
             elif dataloader.num_workers == 0 and using_spawn:
-                # checks for the attr persistent_workers not available on pytorch < 1.7
+                # checks for the attr persistent_workers available in pytorch >= 1.7
                 if hasattr(dataloader, "persistent_workers"):
                     if not dataloader.persistent_workers:
                         rank_zero_warn(

From e1b1702fdff3828d457a97dcf178ae831ee1f1ff Mon Sep 17 00:00:00 2001
From: s-rog <sh.rog@protonmail.ch>
Date: Tue, 6 Apr 2021 09:38:09 +0800
Subject: [PATCH 04/11] pep8

---
 tests/trainer/test_data_loading.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index 3599723f954b2..d42e1bb62128e 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -103,6 +103,7 @@ def check_replace_distrubuted_sampler(tmpdir, save_preds_on_dl_idx, accelerator,
 def test_replace_distrubuted_sampler_custom_dataloader_custom_batch_sampler(tmpdir, mode):
     check_replace_distrubuted_sampler(tmpdir, True, "ddp", 2, 2, mode)
 
+
 @RunIf(min_gpus=2, special=True)
 def test_dataloader_warnings():
     trainer = Trainer(accelerator="ddp_spawn")

From a52e500d40a32c549a2147668affb8a48cec584d Mon Sep 17 00:00:00 2001
From: Roger Shieh <sh.rog@protonmail.ch>
Date: Tue, 6 Apr 2021 10:39:53 +0800
Subject: [PATCH 05/11] remove special

---
 tests/trainer/test_data_loading.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index d42e1bb62128e..6448ac3eea2b7 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -104,7 +104,7 @@ def test_replace_distrubuted_sampler_custom_dataloader_custom_batch_sampler(tmpd
     check_replace_distrubuted_sampler(tmpdir, True, "ddp", 2, 2, mode)
 
 
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2)
 def test_dataloader_warnings():
     trainer = Trainer(accelerator="ddp_spawn")
     dl = DataLoader(RandomDataset(32, 64), num_workers=1)

From c710c9aca08a25656b90601b2e670d7df80740fc Mon Sep 17 00:00:00 2001
From: s-rog <sh.rog@protonmail.ch>
Date: Tue, 6 Apr 2021 11:13:06 +0800
Subject: [PATCH 06/11] Update chlog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bab4034d80f6c..3f2dbaee9b754 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -102,6 +102,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Changed `PyTorchProfiler` to use `torch.autograd.profiler.record_function` to record functions ([#6349](https://github.com/PyTorchLightning/pytorch-lightning/pull/6349))
 
 
+- Changed warnings and recommendations for dataloaders in `ddp_spawn` ([#6762](https://github.com/PyTorchLightning/pytorch-lightning/pull/6762/))
+
+
 ### Deprecated
 
 - `period` has been deprecated in favor of `every_n_val_epochs` in the `ModelCheckpoint` callback ([#6146](https://github.com/PyTorchLightning/pytorch-lightning/pull/6146))

From aa96f06cb49c3c59e7f17d44c3349ed10569cdfa Mon Sep 17 00:00:00 2001
From: Roger Shieh <sh.rog@protonmail.ch>
Date: Thu, 8 Apr 2021 09:31:07 +0800
Subject: [PATCH 07/11] mock

---
 tests/trainer/test_data_loading.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index 6448ac3eea2b7..8e260077b1e13 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -15,6 +15,7 @@
 import pytest
 from torch.utils.data import DataLoader
 from torch.utils.data.sampler import BatchSampler, SequentialSampler
+from unittest import mock
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -104,7 +105,9 @@ def test_replace_distrubuted_sampler_custom_dataloader_custom_batch_sampler(tmpd
     check_replace_distrubuted_sampler(tmpdir, True, "ddp", 2, 2, mode)
 
 
-@RunIf(min_gpus=2)
+@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"})
+@mock.patch('torch.cuda.device_count', return_value=2)
+@mock.patch('torch.cuda.is_available', return_value=True)
 def test_dataloader_warnings():
     trainer = Trainer(accelerator="ddp_spawn")
     dl = DataLoader(RandomDataset(32, 64), num_workers=1)
@@ -112,5 +115,6 @@ def test_dataloader_warnings():
         warn_str = "Consider setting persistent_workers=True"
     else:
         warn_str = "Consider setting accelerator=ddp"
-    with pytest.warns(UserWarning, match=warn_str):
-        trainer.test(BoringModel(), test_dataloaders=dl)
+    with pytest.raises(RuntimeError):
+        with pytest.warns(UserWarning, match=warn_str):
+            trainer.test(BoringModel(), test_dataloaders=dl)

From 80254dbd4d2ef53c5fcb8a25e460503f05ce82e8 Mon Sep 17 00:00:00 2001
From: Roger Shieh <sh.rog@protonmail.ch>
Date: Thu, 8 Apr 2021 09:33:07 +0800
Subject: [PATCH 08/11] add missing import

---
 tests/trainer/test_data_loading.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index 8e260077b1e13..c44d208ef0417 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import pytest
 from torch.utils.data import DataLoader
 from torch.utils.data.sampler import BatchSampler, SequentialSampler

From 2aaef2f949cc041793d5eb4038fd6ca02726e487 Mon Sep 17 00:00:00 2001
From: Roger Shieh <sh.rog@protonmail.ch>
Date: Thu, 8 Apr 2021 10:55:15 +0800
Subject: [PATCH 09/11] missing args

---
 tests/trainer/test_data_loading.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index c44d208ef0417..88a62e2aa5cf5 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -109,7 +109,7 @@ def test_replace_distrubuted_sampler_custom_dataloader_custom_batch_sampler(tmpd
 @mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"})
 @mock.patch('torch.cuda.device_count', return_value=2)
 @mock.patch('torch.cuda.is_available', return_value=True)
-def test_dataloader_warnings():
+def test_dataloader_warnings(cuda_available_mock, device_count_mock):
     trainer = Trainer(accelerator="ddp_spawn")
     dl = DataLoader(RandomDataset(32, 64), num_workers=1)
     if hasattr(dl, "persistent_workers"):

From 511f962fb85d2a28f07471cb073af1909f0b5078 Mon Sep 17 00:00:00 2001
From: Roger Shieh <sh.rog@protonmail.ch>
Date: Thu, 8 Apr 2021 11:49:01 +0800
Subject: [PATCH 10/11] test

---
 tests/trainer/test_data_loading.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index 88a62e2aa5cf5..12e3fc6e2572f 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -116,6 +116,5 @@ def test_dataloader_warnings(cuda_available_mock, device_count_mock):
         warn_str = "Consider setting persistent_workers=True"
     else:
         warn_str = "Consider setting accelerator=ddp"
-    with pytest.raises(RuntimeError):
-        with pytest.warns(UserWarning, match=warn_str):
-            trainer.test(BoringModel(), test_dataloaders=dl)
+    with pytest.warns(UserWarning, match=warn_str):
+        trainer.test(BoringModel(), test_dataloaders=dl)

From 90f29fe591b0c128568f1d306c7ad1f935143109 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Fri, 9 Apr 2021 02:50:12 +0200
Subject: [PATCH 11/11] Do not require mock. Add num workers parametrize

---
 tests/trainer/test_data_loading.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index 12e3fc6e2572f..382311c107958 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -12,11 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import pytest
 from torch.utils.data import DataLoader
 from torch.utils.data.sampler import BatchSampler, SequentialSampler
-from unittest import mock
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -106,15 +104,23 @@ def test_replace_distrubuted_sampler_custom_dataloader_custom_batch_sampler(tmpd
     check_replace_distrubuted_sampler(tmpdir, True, "ddp", 2, 2, mode)
 
 
-@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"})
-@mock.patch('torch.cuda.device_count', return_value=2)
-@mock.patch('torch.cuda.is_available', return_value=True)
-def test_dataloader_warnings(cuda_available_mock, device_count_mock):
-    trainer = Trainer(accelerator="ddp_spawn")
-    dl = DataLoader(RandomDataset(32, 64), num_workers=1)
+@pytest.mark.parametrize("num_workers", [0, 1])
+def test_dataloader_warnings(num_workers):
+
+    class TestModel(BoringModel):
+
+        def on_train_start(self, *_) -> None:
+            raise SystemExit()
+
+    dl = DataLoader(RandomDataset(32, 64), num_workers=num_workers)
     if hasattr(dl, "persistent_workers"):
-        warn_str = "Consider setting persistent_workers=True"
+        if num_workers == 0:
+            warn_str = "Consider setting num_workers>0 and persistent_workers=True"
+        else:
+            warn_str = "Consider setting persistent_workers=True"
     else:
         warn_str = "Consider setting accelerator=ddp"
-    with pytest.warns(UserWarning, match=warn_str):
-        trainer.test(BoringModel(), test_dataloaders=dl)
+
+    trainer = Trainer(accelerator="ddp_spawn")
+    with pytest.warns(UserWarning, match=warn_str), pytest.raises(SystemExit):
+        trainer.fit(TestModel(), dl)