Skip to content

Commit eafec7d

Browse files
authored
Fix DPP + SyncBN (#6838)
* Fix DPP + SyncBN Ensure that model is already on correct GPU before applying SyncBN conversion * Fix order of SyncBN for ddp_spawn
1 parent 6dc1078 commit eafec7d

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

pytorch_lightning/plugins/training_type/ddp.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,12 @@ def init_ddp_connection(self, global_rank: int, world_size: int) -> None:
241241
torch_distrib.init_process_group(self.torch_distributed_backend, rank=global_rank, world_size=world_size)
242242

243243
def pre_dispatch(self):
244-
if self.sync_batchnorm:
245-
self.model = self.configure_sync_batchnorm(self.model)
246-
247244
# move the model to the correct device
248245
self.model_to_device()
249246

247+
if self.sync_batchnorm:
248+
self.model = self.configure_sync_batchnorm(self.model)
249+
250250
self.configure_ddp()
251251

252252
self.barrier()

pytorch_lightning/plugins/training_type/ddp_spawn.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,12 +141,12 @@ def new_process(self, process_idx, trainer, mp_queue):
141141
self.dist.rank = self.global_rank
142142
self.dist.device = self.root_device
143143

144-
if self.sync_batchnorm:
145-
self.model = self.configure_sync_batchnorm(self.model)
146-
147144
# move the model to the correct device
148145
self.model_to_device()
149146

147+
if self.sync_batchnorm:
148+
self.model = self.configure_sync_batchnorm(self.model)
149+
150150
self.configure_ddp()
151151

152152
self.barrier()

0 commit comments

Comments
 (0)