From a4457f12208f3a2ddbf9bf5cd63762a0cacf8550 Mon Sep 17 00:00:00 2001
From: Nisheeth Lahoti <nisheeth.lahoti@gmail.com>
Date: Mon, 24 Jul 2023 07:32:16 +0530
Subject: [PATCH 1/8] Use hydra.run.dir (not os.getcwd) for DDP subprocesses'
 run dir

---
 .../fabric/strategies/launchers/subprocess_script.py         | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/lightning/fabric/strategies/launchers/subprocess_script.py b/src/lightning/fabric/strategies/launchers/subprocess_script.py
index 62b3a718b90f9..a442531a497b0 100644
--- a/src/lightning/fabric/strategies/launchers/subprocess_script.py
+++ b/src/lightning/fabric/strategies/launchers/subprocess_script.py
@@ -143,6 +143,7 @@ def _basic_subprocess_cmd() -> Sequence[str]:
 
 def _hydra_subprocess_cmd(local_rank: int) -> Tuple[Sequence[str], str]:
     import __main__  # local import to avoid https://github.com/Lightning-AI/lightning/issues/15218
+    from hydra.core.hydra_config import HydraConfig
     from hydra.utils import get_original_cwd, to_absolute_path
 
     # when user is using hydra find the absolute path
@@ -154,6 +155,6 @@ def _hydra_subprocess_cmd(local_rank: int) -> Tuple[Sequence[str], str]:
     command += sys.argv[1:]
 
     cwd = get_original_cwd()
-    os_cwd = f'"{os.getcwd()}"'
-    command += [f"hydra.run.dir={os_cwd}", f"hydra.job.name=train_ddp_process_{local_rank}"]
+    run_dir = f'"{HydraConfig.get().run.dir}"'
+    command += [f"hydra.run.dir={run_dir}", f"hydra.job.name=train_ddp_process_{local_rank}"]
     return command, cwd

From 65caaa54d32cb7d2ede7115d07573c284c82d97f Mon Sep 17 00:00:00 2001
From: Nisheeth Lahoti <nisheeth.lahoti@gmail.com>
Date: Mon, 24 Jul 2023 08:02:43 +0530
Subject: [PATCH 2/8] Update changelog

---
 src/lightning/fabric/CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/lightning/fabric/CHANGELOG.md b/src/lightning/fabric/CHANGELOG.md
index 442abcfeec020..a6ecdf2366ed2 100644
--- a/src/lightning/fabric/CHANGELOG.md
+++ b/src/lightning/fabric/CHANGELOG.md
@@ -144,6 +144,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
+- Fixed issue where DDP subprocesses that used Hydra would set hydra's working directory to current directory ([#18145](https://github.com/Lightning-AI/lightning/pull/18145))
+
+
 - Fixed issue where running on TPUs would select the wrong device index ([#17227](https://github.com/Lightning-AI/lightning/pull/17227))
 
 

From 44b1304a0f87a80e7671467a982f25c822c711f9 Mon Sep 17 00:00:00 2001
From: Nisheeth Lahoti <nisheeth.lahoti@gmail.com>
Date: Fri, 28 Jul 2023 07:17:46 +0530
Subject: [PATCH 3/8] Add tests

---
 .../launchers/test_subprocess_script.py          | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
index 9f2fb371dc967..882669460097e 100644
--- a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
+++ b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
@@ -58,11 +58,25 @@ def test_ddp_with_hydra_runjob(subdir, tmpdir, monkeypatch):
 
     # Run CLI
     devices = 2
-    cmd = [sys.executable, "temp.py", f"+devices={devices}", '+strategy="ddp"']
+    run_dir = Path(tmpdir) / "hydra_output"
+    cmd = [sys.executable, "temp.py", f"+devices={devices}", '+strategy="ddp"', f"hydra.run.dir={run_dir}"]
     if subdir is not None:
         cmd += [f"hydra.output_subdir={subdir}"]
     run_process(cmd)
 
+    if subdir != "null":  # There's no .hydra if subdir on command line is "null"
+        # Make sure config.yaml was created for additional processes.
+        logs = list(run_dir.glob("**/config.yaml"))
+        assert len(logs) == devices
+
+        # Make sure the parameter was set and used
+        cfg = OmegaConf.load(logs[0])
+        assert cfg.devices == devices
+
+        # Make sure PL spawned a job that is logged by Hydra
+        logs = list(run_dir.glob("**/*.log"))
+        assert len(logs) == 1
+
 
 def test_kill():
     launcher = _SubprocessScriptLauncher(Mock(), 1, 1)

From 1f89360d4b1617af8f61b928606385d349cbef8e Mon Sep 17 00:00:00 2001
From: Nisheeth Lahoti <nisheeth.lahoti@gmail.com>
Date: Fri, 28 Jul 2023 07:22:01 +0530
Subject: [PATCH 4/8] Parts of test file got left uncommitted by mistake.
 Commit

---
 .../strategies/launchers/test_subprocess_script.py            | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
index 882669460097e..99e843f173c3f 100644
--- a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
+++ b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
@@ -1,5 +1,6 @@
 import subprocess
 import sys
+from pathlib import Path
 from unittest.mock import Mock
 
 import pytest
@@ -13,6 +14,7 @@
 
 if _HYDRA_WITH_RUN_PROCESS:
     from hydra.test_utils.test_utils import run_process
+    from omegaconf import OmegaConf
 
 
 # Script to run from command line
@@ -48,7 +50,7 @@ def task_fn(cfg):
 
 @RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
 @pytest.mark.skipif(not _HYDRA_WITH_RUN_PROCESS, reason=str(_HYDRA_WITH_RUN_PROCESS))
-@pytest.mark.parametrize("subdir", [None, "dksa", ".hello"])
+@pytest.mark.parametrize("subdir", [None, "null", "dksa", ".hello"])
 def test_ddp_with_hydra_runjob(subdir, tmpdir, monkeypatch):
     monkeypatch.chdir(tmpdir)
 

From 8e0f5fa2c5931880aa4026cdfabeda8e14464cc9 Mon Sep 17 00:00:00 2001
From: Nisheeth Lahoti <nisheeth.lahoti@gmail.com>
Date: Fri, 28 Jul 2023 07:25:41 +0530
Subject: [PATCH 5/8] Create separate subdirs for different subprocesses

---
 src/lightning/fabric/strategies/launchers/subprocess_script.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/lightning/fabric/strategies/launchers/subprocess_script.py b/src/lightning/fabric/strategies/launchers/subprocess_script.py
index a442531a497b0..fc161bae2ecb1 100644
--- a/src/lightning/fabric/strategies/launchers/subprocess_script.py
+++ b/src/lightning/fabric/strategies/launchers/subprocess_script.py
@@ -157,4 +157,6 @@ def _hydra_subprocess_cmd(local_rank: int) -> Tuple[Sequence[str], str]:
     cwd = get_original_cwd()
     run_dir = f'"{HydraConfig.get().run.dir}"'
     command += [f"hydra.run.dir={run_dir}", f"hydra.job.name=train_ddp_process_{local_rank}"]
+    subdir = "null" if HydraConfig.get().output_subdir is None else f".pl_ddp_hydra_{local_rank}"
+    command += [f"hydra.output_subdir={subdir}"]
     return command, cwd

From 120813a08cd08535882bf81733f0806169dc90ee Mon Sep 17 00:00:00 2001
From: Nisheeth Lahoti <nisheeth.lahoti@gmail.com>
Date: Fri, 28 Jul 2023 07:33:44 +0530
Subject: [PATCH 6/8] Improve tests

---
 .../launchers/test_subprocess_script.py         | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
index 99e843f173c3f..50c38762a1d16 100644
--- a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
+++ b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
@@ -66,18 +66,17 @@ def test_ddp_with_hydra_runjob(subdir, tmpdir, monkeypatch):
         cmd += [f"hydra.output_subdir={subdir}"]
     run_process(cmd)
 
-    if subdir != "null":  # There's no .hydra if subdir on command line is "null"
-        # Make sure config.yaml was created for additional processes.
-        logs = list(run_dir.glob("**/config.yaml"))
-        assert len(logs) == devices
+    # Make sure config.yaml was created for additional processes iff subdir is present.
+    saved_confs = list(run_dir.glob("**/config.yaml"))
+    assert len(saved_confs) == (0 if subdir == "null" else devices)
 
-        # Make sure the parameter was set and used
-        cfg = OmegaConf.load(logs[0])
+    if saved_confs:  # Make sure the parameter was set and used
+        cfg = OmegaConf.load(saved_confs[0])
         assert cfg.devices == devices
 
-        # Make sure PL spawned a job that is logged by Hydra
-        logs = list(run_dir.glob("**/*.log"))
-        assert len(logs) == 1
+    # Make sure PL spawned jobs that are logged by Hydra
+    logs = list(run_dir.glob("**/*.log"))
+    assert len(logs) == devices
 
 
 def test_kill():

From c1f30445ebc8b0f7def84c4a6762e4ba554906b7 Mon Sep 17 00:00:00 2001
From: Nisheeth Lahoti <nisheeth.lahoti@gmail.com>
Date: Sat, 29 Jul 2023 02:37:14 +0530
Subject: [PATCH 7/8] Replace `tmpdir` with `tmp_path` in test

---
 .../strategies/launchers/test_subprocess_script.py         | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
index 50c38762a1d16..dd4922dc89997 100644
--- a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
+++ b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
@@ -1,6 +1,5 @@
 import subprocess
 import sys
-from pathlib import Path
 from unittest.mock import Mock
 
 import pytest
@@ -51,8 +50,8 @@ def task_fn(cfg):
 @RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
 @pytest.mark.skipif(not _HYDRA_WITH_RUN_PROCESS, reason=str(_HYDRA_WITH_RUN_PROCESS))
 @pytest.mark.parametrize("subdir", [None, "null", "dksa", ".hello"])
-def test_ddp_with_hydra_runjob(subdir, tmpdir, monkeypatch):
-    monkeypatch.chdir(tmpdir)
+def test_ddp_with_hydra_runjob(subdir, tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
 
     # Save script locally
     with open("temp.py", "w") as fn:
@@ -60,7 +59,7 @@ def test_ddp_with_hydra_runjob(subdir, tmpdir, monkeypatch):
 
     # Run CLI
     devices = 2
-    run_dir = Path(tmpdir) / "hydra_output"
+    run_dir = tmp_path / "hydra_output"
     cmd = [sys.executable, "temp.py", f"+devices={devices}", '+strategy="ddp"', f"hydra.run.dir={run_dir}"]
     if subdir is not None:
         cmd += [f"hydra.output_subdir={subdir}"]

From bd44aa4bb16db9b84388bca74af2fece36668fc8 Mon Sep 17 00:00:00 2001
From: Nisheeth Lahoti <nisheeth.lahoti@gmail.com>
Date: Sun, 30 Jul 2023 13:52:48 +0530
Subject: [PATCH 8/8] Set subdir=null in subprocesses

---
 .../fabric/strategies/launchers/subprocess_script.py       | 7 +++----
 .../strategies/launchers/test_subprocess_script.py         | 5 ++---
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/lightning/fabric/strategies/launchers/subprocess_script.py b/src/lightning/fabric/strategies/launchers/subprocess_script.py
index fc161bae2ecb1..fe3815c9588d0 100644
--- a/src/lightning/fabric/strategies/launchers/subprocess_script.py
+++ b/src/lightning/fabric/strategies/launchers/subprocess_script.py
@@ -155,8 +155,7 @@ def _hydra_subprocess_cmd(local_rank: int) -> Tuple[Sequence[str], str]:
     command += sys.argv[1:]
 
     cwd = get_original_cwd()
-    run_dir = f'"{HydraConfig.get().run.dir}"'
-    command += [f"hydra.run.dir={run_dir}", f"hydra.job.name=train_ddp_process_{local_rank}"]
-    subdir = "null" if HydraConfig.get().output_subdir is None else f".pl_ddp_hydra_{local_rank}"
-    command += [f"hydra.output_subdir={subdir}"]
+    rundir = f'"{HydraConfig.get().run.dir}"'
+    # Set output_subdir null since we don't want different subprocesses trying to write to config.yaml
+    command += [f"hydra.run.dir={rundir}", f"hydra.job.name=train_ddp_process_{local_rank}", "hydra.output_subdir=null"]
     return command, cwd
diff --git a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
index dd4922dc89997..8d7d31d212c46 100644
--- a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
+++ b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py
@@ -8,7 +8,6 @@
 from lightning.pytorch.strategies.launchers.subprocess_script import _SubprocessScriptLauncher
 from tests_pytorch.helpers.runif import RunIf
 
-_HYDRA_WITH_RERUN = RequirementCache("hydra-core>=1.2")
 _HYDRA_WITH_RUN_PROCESS = RequirementCache("hydra-core>=1.0.7")
 
 if _HYDRA_WITH_RUN_PROCESS:
@@ -65,9 +64,9 @@ def test_ddp_with_hydra_runjob(subdir, tmp_path, monkeypatch):
         cmd += [f"hydra.output_subdir={subdir}"]
     run_process(cmd)
 
-    # Make sure config.yaml was created for additional processes iff subdir is present.
+    # Make sure no config.yaml was created for additional processes
     saved_confs = list(run_dir.glob("**/config.yaml"))
-    assert len(saved_confs) == (0 if subdir == "null" else devices)
+    assert len(saved_confs) == (0 if subdir == "null" else 1)  # Main process has config.yaml iff subdir!="null"
 
     if saved_confs:  # Make sure the parameter was set and used
         cfg = OmegaConf.load(saved_confs[0])