2424from lightning_lite .strategies .launchers .base import _Launcher
2525from lightning_lite .strategies .launchers .subprocess_script import _basic_subprocess_cmd , _hydra_subprocess_cmd
2626
27- _HYDRA_AVAILABLE = RequirementCache ("hydra-core " )
27+ _HYDRA_AVAILABLE = RequirementCache ("hydra" )
2828
2929
3030class _SubprocessScriptLauncher (_Launcher ):
@@ -100,6 +100,32 @@ def _call_children_scripts(self) -> None:
100100 # allow the user to pass the node rank
101101 os .environ ["NODE_RANK" ] = str (self .cluster_environment .node_rank ())
102102 os .environ ["LOCAL_RANK" ] = str (self .cluster_environment .local_rank ())
103+
104+ # Check if the current calling command looked like `python a/b/c.py` or `python -m a.b.c`
105+ # See https://docs.python.org/3/reference/import.html#main-spec
106+ if __main__ .__spec__ is None : # pragma: no-cover
107+ # Script called as `python a/b/c.py`
108+ if _HYDRA_AVAILABLE :
109+ # when user is using hydra find the absolute path
110+ from hydra .utils import to_absolute_path
111+
112+ to_abs_path = to_absolute_path
113+ else :
114+ to_abs_path = os .path .abspath
115+
116+ # pull out the commands used to run the script and resolve the absolute file path
117+ command = sys .argv
118+ try :
119+ full_path = to_abs_path (command [0 ])
120+ except Exception :
121+ full_path = os .path .abspath (command [0 ])
122+
123+ command [0 ] = full_path
124+ # use the same python interpreter and actually running
125+ command = [sys .executable ] + command
126+ else : # Script called as `python -m a.b.c`
127+ command = [sys .executable , "-m" , __main__ .__spec__ .name ] + sys .argv [1 :]
128+
103129 os .environ ["WORLD_SIZE" ] = f"{ self .num_processes * self .num_nodes } "
104130
105131 for local_rank in range (1 , self .num_processes ):
@@ -110,18 +136,18 @@ def _call_children_scripts(self) -> None:
110136 if os .environ .get ("PL_GLOBAL_SEED" ) is None and "PL_GLOBAL_SEED" in env_copy :
111137 del env_copy ["PL_GLOBAL_SEED" ]
112138
113- hydra_in_use = False
139+ # start process
140+ # if hydra is available and initialized, make sure to set the cwd correctly
141+ cwd : Optional [str ] = None
114142 if _HYDRA_AVAILABLE :
115143 from hydra .core .hydra_config import HydraConfig
144+ from hydra .utils import get_original_cwd
116145
117- hydra_in_use = HydraConfig .initialized ()
118-
119- if hydra_in_use :
120- command = _hydra_subprocess_cmd (local_rank )
121- else :
122- command = _basic_subprocess_cmd ()
123-
124- subprocess .Popen (command , env = env_copy )
146+ if HydraConfig .initialized ():
147+ cwd = get_original_cwd ()
148+ os_cwd = f'"{ os .getcwd ()} "'
149+ command += [f"hydra.run.dir={ os_cwd } " , f"hydra.job.name=train_ddp_process_{ local_rank } " ]
150+ subprocess .Popen (command , env = env_copy , cwd = cwd )
125151
126152 # starting all processes at once can cause issues
127153 # with dataloaders delay between 1-10 seconds
0 commit comments