@@ -128,7 +128,7 @@ def create_lightning_trainer(container: LightningContainer,
128128    effective_num_gpus  =  num_gpus  *  num_nodes 
129129    # Accelerator should be "ddp" when running large models in AzureML (when using DDP_spawn, we get out of GPU memory). 
130130    if  effective_num_gpus  >  1 :
131-         accelerator  =  "ddp" 
131+         accelerator :  Optional [ str ]  =  "ddp" 
132132        # Initialize the DDP plugin with find_unused_parameters=False by default. If True (default), it prints out 
133133        # lengthy warnings about the performance impact of find_unused_parameters 
134134        plugins  =  [DDPPlugin (num_nodes = num_nodes , sync_batchnorm = True ,
@@ -273,10 +273,10 @@ def model_train(checkpoint_path: Optional[Path],
273273    # Per-subject model outputs for regression models are written per rank, and need to be aggregated here. 
274274    # Each thread per rank will come here, and upload its files to the run outputs. Rank 0 will later download them. 
275275    if  is_azureml_run  and  world_size  >  1  and  isinstance (lightning_model , ScalarLightning ):
276-         upload_output_file_as_temp (lightning_model .train_subject_outputs_logger .csv_path ,
277-                                    container .outputs_folder )   # type: ignore 
278-         upload_output_file_as_temp (lightning_model .val_subject_outputs_logger .csv_path ,
279-                                    container .outputs_folder )   # type: ignore 
276+         upload_output_file_as_temp (lightning_model .train_subject_outputs_logger .csv_path ,   # type: ignore 
277+                                    container .outputs_folder )
278+         upload_output_file_as_temp (lightning_model .val_subject_outputs_logger .csv_path ,   # type: ignore 
279+                                    container .outputs_folder )
280280    # DDP will start multiple instances of the runner, one for each GPU. Those should terminate here after training. 
281281    # We can now use the global_rank of the Lightining model, rather than environment variables, because DDP has set 
282282    # all necessary properties. 
0 commit comments