From ae2cd9b337df8b890e9a803dd2e62e4ceed1f59c Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 7 Apr 2021 15:40:23 -0700 Subject: [PATCH 1/3] Update accelerator_connector.py --- .../trainer/connectors/accelerator_connector.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 1e00d33cdf05a..0d8f16273f2b4 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -112,12 +112,6 @@ def __init__( self._training_type_plugin: Optional[TrainingTypePlugin] = None self._cluster_environment: Optional[ClusterEnvironment] = None - # init the default rank if exists - # we need to call this here or NVIDIA flags and other messaging in init will show on all ranks - # this way we only show it on rank 0 - if "LOCAL_RANK" in os.environ: - rank_zero_only.rank = int(os.environ["LOCAL_RANK"]) - # for gpus allow int, string and gpu list if auto_select_gpus and isinstance(gpus, int): self.gpus = pick_multiple_gpus(gpus) From 0f0fd67012fcde3308949d6ae35e1a299e8fb5fb Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 7 Apr 2021 15:51:25 -0700 Subject: [PATCH 2/3] Update accelerator_connector.py --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 0d8f16273f2b4..aa52ec1c40d82 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -59,7 +59,6 @@ device_parser, DeviceType, DistributedType, - rank_zero_only, ) from pytorch_lightning.utilities.distributed import rank_zero_info, rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException From aeb3268ab97dedff0d5684f9625cecd16f2b1712 Mon Sep 17 00:00:00 2001 From: ananthsub Date: Wed, 7 Apr 2021 16:02:53 -0700 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9cab7a63eca6..1d7573af02513 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -177,7 +177,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- Set better defaults for `rank_zero_only.rank` when training is launched with SLURM and torchelastic ([#6802](https://github.com/PyTorchLightning/pytorch-lightning/pull/6802/)) +- Set better defaults for `rank_zero_only.rank` when training is launched with SLURM and torchelastic: + * Support SLURM and torchelastic global rank environment variables ([#5715](https://github.com/PyTorchLightning/pytorch-lightning/pull/5715)) + * Remove hardcoding of local rank in accelerator connector ([#6878](https://github.com/PyTorchLightning/pytorch-lightning/pull/6878)) - Sanitize `None` params during pruning ([#6836](https://github.com/PyTorchLightning/pytorch-lightning/pull/6836))