Skip to content

Commit 2ded6e4

Browse files
committed
Revert "Improving Hydra+DDP support (#11617)"
This reverts commit 45ca781.
1 parent 609b258 commit 2ded6e4

File tree

2 files changed

+36
-171
lines changed

2 files changed

+36
-171
lines changed

src/pytorch_lightning/strategies/launchers/subprocess_script.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from lightning_lite.strategies.launchers.base import _Launcher
2525
from lightning_lite.strategies.launchers.subprocess_script import _basic_subprocess_cmd, _hydra_subprocess_cmd
2626

27-
_HYDRA_AVAILABLE = RequirementCache("hydra-core")
27+
_HYDRA_AVAILABLE = RequirementCache("hydra")
2828

2929

3030
class _SubprocessScriptLauncher(_Launcher):
@@ -100,6 +100,32 @@ def _call_children_scripts(self) -> None:
100100
# allow the user to pass the node rank
101101
os.environ["NODE_RANK"] = str(self.cluster_environment.node_rank())
102102
os.environ["LOCAL_RANK"] = str(self.cluster_environment.local_rank())
103+
104+
# Check if the current calling command looked like `python a/b/c.py` or `python -m a.b.c`
105+
# See https://docs.python.org/3/reference/import.html#main-spec
106+
if __main__.__spec__ is None: # pragma: no-cover
107+
# Script called as `python a/b/c.py`
108+
if _HYDRA_AVAILABLE:
109+
# when user is using hydra find the absolute path
110+
from hydra.utils import to_absolute_path
111+
112+
to_abs_path = to_absolute_path
113+
else:
114+
to_abs_path = os.path.abspath
115+
116+
# pull out the commands used to run the script and resolve the absolute file path
117+
command = sys.argv
118+
try:
119+
full_path = to_abs_path(command[0])
120+
except Exception:
121+
full_path = os.path.abspath(command[0])
122+
123+
command[0] = full_path
124+
# use the same python interpreter and actually running
125+
command = [sys.executable] + command
126+
else: # Script called as `python -m a.b.c`
127+
command = [sys.executable, "-m", __main__.__spec__.name] + sys.argv[1:]
128+
103129
os.environ["WORLD_SIZE"] = f"{self.num_processes * self.num_nodes}"
104130

105131
for local_rank in range(1, self.num_processes):
@@ -110,18 +136,18 @@ def _call_children_scripts(self) -> None:
110136
if os.environ.get("PL_GLOBAL_SEED") is None and "PL_GLOBAL_SEED" in env_copy:
111137
del env_copy["PL_GLOBAL_SEED"]
112138

113-
hydra_in_use = False
139+
# start process
140+
# if hydra is available and initialized, make sure to set the cwd correctly
141+
cwd: Optional[str] = None
114142
if _HYDRA_AVAILABLE:
115143
from hydra.core.hydra_config import HydraConfig
144+
from hydra.utils import get_original_cwd
116145

117-
hydra_in_use = HydraConfig.initialized()
118-
119-
if hydra_in_use:
120-
command = _hydra_subprocess_cmd(local_rank)
121-
else:
122-
command = _basic_subprocess_cmd()
123-
124-
subprocess.Popen(command, env=env_copy)
146+
if HydraConfig.initialized():
147+
cwd = get_original_cwd()
148+
os_cwd = f'"{os.getcwd()}"'
149+
command += [f"hydra.run.dir={os_cwd}", f"hydra.job.name=train_ddp_process_{local_rank}"]
150+
subprocess.Popen(command, env=env_copy, cwd=cwd)
125151

126152
# starting all processes at once can cause issues
127153
# with dataloaders delay between 1-10 seconds

tests/tests_pytorch/strategies/launchers/test_subprocess_script.py

Lines changed: 0 additions & 161 deletions
This file was deleted.

0 commit comments

Comments
 (0)