3131from pytorch_lightning .utilities .types import _PATH
3232
3333
34- class _SpawnLauncher (_Launcher ):
35- r"""Spawns processes that run a given function in parallel, and joins them all at the end.
34+ class _MultiProcessingLauncher (_Launcher ):
35+ r"""Launches processes that run a given function in parallel, and joins them all at the end.
3636
3737 The main process in which this launcher is invoked creates N so-called worker processes (using
3838 :func:`torch.multiprocessing.start_processes`) that run the given function.
@@ -71,20 +71,20 @@ def is_interactive_compatible(self) -> bool:
7171 return self ._start_method == "fork"
7272
7373 def launch (self , function : Callable , * args : Any , trainer : Optional ["pl.Trainer" ] = None , ** kwargs : Any ) -> Any :
74- """Spawns processes that run the given function in parallel.
74+ """Launches processes that run the given function in parallel.
7575
7676 The function is allowed to have a return value. However, when all processes join, only the return value
7777 of worker process 0 gets returned from this `launch` method in the main process.
7878
7979 Arguments:
80- function: The entry point for all spawned processes.
80+ function: The entry point for all launched processes.
8181 *args: Optional positional arguments to be passed to the given function.
8282 trainer: Optional reference to the :class:`~pytorch_lightning.trainer.trainer.Trainer` for which
8383 a selected set of attributes get restored in the main process after processes join.
8484 **kwargs: Optional keyword arguments to be passed to the given function.
8585 """
8686 # The default cluster environment in Lightning chooses a random free port number
87- # This needs to be done in the main process here before spawning to ensure each rank will connect
87+ # This needs to be done in the main process here before starting processes to ensure each rank will connect
8888 # through the same port
8989 os .environ ["MASTER_PORT" ] = str (self ._strategy .cluster_environment .main_port )
9090 context = mp .get_context (self ._start_method )
@@ -95,12 +95,12 @@ def launch(self, function: Callable, *args: Any, trainer: Optional["pl.Trainer"]
9595 nprocs = self ._strategy .num_processes ,
9696 start_method = self ._start_method ,
9797 )
98- spawn_output = return_queue .get ()
98+ worker_output = return_queue .get ()
9999 if trainer is None :
100- return spawn_output
100+ return worker_output
101101
102- self ._recover_results_in_main_process (spawn_output , trainer )
103- return spawn_output .trainer_results
102+ self ._recover_results_in_main_process (worker_output , trainer )
103+ return worker_output .trainer_results
104104
105105 def _wrapping_function (
106106 self ,
@@ -120,25 +120,25 @@ def _wrapping_function(
120120 if self ._strategy .local_rank == 0 :
121121 return_queue .put (move_data_to_device (results , "cpu" ))
122122
123- def _recover_results_in_main_process (self , spawn_output : "_SpawnOutput " , trainer : "pl.Trainer" ) -> None :
123+ def _recover_results_in_main_process (self , worker_output : "_WorkerOutput " , trainer : "pl.Trainer" ) -> None :
124124 # transfer back the best path to the trainer
125125 if trainer .checkpoint_callback and hasattr (trainer .checkpoint_callback , "best_model_path" ):
126- trainer .checkpoint_callback .best_model_path = str (spawn_output .best_model_path )
126+ trainer .checkpoint_callback .best_model_path = str (worker_output .best_model_path )
127127
128128 # TODO: pass also best score
129129 # load last weights
130- if spawn_output .weights_path is not None :
131- ckpt = self ._strategy .checkpoint_io .load_checkpoint (spawn_output .weights_path )
130+ if worker_output .weights_path is not None :
131+ ckpt = self ._strategy .checkpoint_io .load_checkpoint (worker_output .weights_path )
132132 trainer .lightning_module .load_state_dict (ckpt ) # type: ignore[arg-type]
133- self ._strategy .checkpoint_io .remove_checkpoint (spawn_output .weights_path )
133+ self ._strategy .checkpoint_io .remove_checkpoint (worker_output .weights_path )
134134
135- trainer .state = spawn_output .trainer_state
135+ trainer .state = worker_output .trainer_state
136136
137137 # get the `callback_metrics` and set it to the trainer
138- self .get_from_queue (trainer , spawn_output .extra )
138+ self .get_from_queue (trainer , worker_output .extra )
139139
140- def _collect_rank_zero_results (self , trainer : "pl.Trainer" , results : Any ) -> Optional ["_SpawnOutput " ]:
141- rank_zero_debug ("Finalizing the DDP spawn environment ." )
140+ def _collect_rank_zero_results (self , trainer : "pl.Trainer" , results : Any ) -> Optional ["_WorkerOutput " ]:
141+ rank_zero_debug ("Collecting results from rank 0 process ." )
142142 checkpoint_callback = trainer .checkpoint_callback
143143 best_model_path = (
144144 checkpoint_callback .best_model_path
@@ -162,7 +162,7 @@ def _collect_rank_zero_results(self, trainer: "pl.Trainer", results: Any) -> Opt
162162 extra = _FakeQueue ()
163163 self .add_to_queue (trainer , extra )
164164
165- return _SpawnOutput (best_model_path , weights_path , trainer .state , results , extra )
165+ return _WorkerOutput (best_model_path , weights_path , trainer .state , results , extra )
166166
167167 def add_to_queue (self , trainer : "pl.Trainer" , queue : "_FakeQueue" ) -> None :
168168 """Appends the :attr:`trainer.callback_metrics` dictionary to the given queue. To avoid issues with memory
@@ -203,7 +203,7 @@ def empty(self) -> bool:
203203 return len (self ) == 0
204204
205205
206- class _SpawnOutput (NamedTuple ):
206+ class _WorkerOutput (NamedTuple ):
207207 best_model_path : Optional [_PATH ]
208208 weights_path : Optional [_PATH ]
209209 trainer_state : TrainerState
0 commit comments