diff --git a/CHANGELOG.md b/CHANGELOG.md index 6453989f72136..749e8813f3551 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -435,6 +435,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated passing `resume_from_checkpoint` to the `Trainer` constructor in favor of `trainer.fit(ckpt_path=)` ([#10061](https://github.com/PyTorchLightning/pytorch-lightning/pull/10061)) +- Deprecated `ClusterEnvironment.creates_children()` in favor of `ClusterEnvironment.creates_processes_externally` (property) ([#10106](https://github.com/PyTorchLightning/pytorch-lightning/pull/10106)) + + ### Removed - Removed deprecated `metrics` ([#8586](https://github.com/PyTorchLightning/pytorch-lightning/pull/8586/)) diff --git a/docs/source/clouds/cluster.rst b/docs/source/clouds/cluster.rst index a4a1524c05734..6f1c70b48b448 100644 --- a/docs/source/clouds/cluster.rst +++ b/docs/source/clouds/cluster.rst @@ -290,8 +290,9 @@ and node rank (node id). Here is an example of a custom class MyClusterEnvironment(ClusterEnvironment): - def creates_children(self) -> bool: - # return True if the cluster is managed (you don't launch processes yourself) + @property + def creates_processes_externally(self) -> bool: + """Return True if the cluster is managed (you don't launch processes yourself)""" return True def world_size(self) -> int: diff --git a/pytorch_lightning/plugins/environments/cluster_environment.py b/pytorch_lightning/plugins/environments/cluster_environment.py index 251e8ba16f6e7..237842b4ffb59 100644 --- a/pytorch_lightning/plugins/environments/cluster_environment.py +++ b/pytorch_lightning/plugins/environments/cluster_environment.py @@ -13,14 +13,30 @@ # limitations under the License. from abc import ABC, abstractmethod +from pytorch_lightning.utilities import rank_zero_deprecation + class ClusterEnvironment(ABC): """Specification of a cluster environment.""" + @property @abstractmethod - def creates_children(self) -> bool: + def creates_processes_externally(self) -> bool: """Whether the environment creates the subprocesses or not.""" + def creates_children(self) -> bool: + """Whether the environment creates the subprocesses or not. + + .. deprecated:: v1.5 + This method was deprecated in v1.5 and will be removed in v1.6. Use the property + :attr:`creates_processes_externally` instead. + """ + rank_zero_deprecation( + f"`{self.__class__.__name__}.creates_children()` was deprecated in v1.5 and will be removed in v1.6." + " Use the property :attr:`creates_processes_externally` instead." + ) + return self.creates_processes_externally + @abstractmethod def master_address(self) -> str: """The master address through which all processes connect and communicate.""" diff --git a/pytorch_lightning/plugins/environments/kubeflow_environment.py b/pytorch_lightning/plugins/environments/kubeflow_environment.py index 10a020d35a529..c92c10dfad8bd 100644 --- a/pytorch_lightning/plugins/environments/kubeflow_environment.py +++ b/pytorch_lightning/plugins/environments/kubeflow_environment.py @@ -35,7 +35,8 @@ def is_using_kubeflow() -> bool: excluded_env_vars = ("GROUP_RANK", "LOCAL_RANK", "LOCAL_WORLD_SIZE") return all(v in os.environ for v in required_env_vars) and not any(v in os.environ for v in excluded_env_vars) - def creates_children(self) -> bool: + @property + def creates_processes_externally(self) -> bool: return True def master_address(self) -> str: diff --git a/pytorch_lightning/plugins/environments/lightning_environment.py b/pytorch_lightning/plugins/environments/lightning_environment.py index b3558c23d6b94..6919b211cb0ba 100644 --- a/pytorch_lightning/plugins/environments/lightning_environment.py +++ b/pytorch_lightning/plugins/environments/lightning_environment.py @@ -40,7 +40,8 @@ def __init__(self): self._global_rank: int = 0 self._world_size: int = 1 - def creates_children(self) -> bool: + @property + def creates_processes_externally(self) -> bool: """Returns whether the cluster creates the processes or not. If at least :code:`LOCAL_RANK` is available as environment variable, Lightning assumes the user acts as the diff --git a/pytorch_lightning/plugins/environments/lsf_environment.py b/pytorch_lightning/plugins/environments/lsf_environment.py index af6bfbb8163c9..06563c7f017bb 100644 --- a/pytorch_lightning/plugins/environments/lsf_environment.py +++ b/pytorch_lightning/plugins/environments/lsf_environment.py @@ -52,7 +52,8 @@ def is_using_lsf() -> bool: required_env_vars = ("LSB_JOBID", "LSB_HOSTS", "JSM_NAMESPACE_LOCAL_RANK", "JSM_NAMESPACE_SIZE") return all(v in os.environ for v in required_env_vars) - def creates_children(self) -> bool: + @property + def creates_processes_externally(self) -> bool: return True def master_address(self): diff --git a/pytorch_lightning/plugins/environments/slurm_environment.py b/pytorch_lightning/plugins/environments/slurm_environment.py index fb5c3d1e92cbf..d599c513650c2 100644 --- a/pytorch_lightning/plugins/environments/slurm_environment.py +++ b/pytorch_lightning/plugins/environments/slurm_environment.py @@ -24,7 +24,8 @@ class SLURMEnvironment(ClusterEnvironment): """Cluster environment for training on a cluster managed by SLURM.""" - def creates_children(self) -> bool: + @property + def creates_processes_externally(self) -> bool: return True def master_address(self) -> str: diff --git a/pytorch_lightning/plugins/environments/torchelastic_environment.py b/pytorch_lightning/plugins/environments/torchelastic_environment.py index f6c0335ac5cc8..9e4b4a9482bfd 100644 --- a/pytorch_lightning/plugins/environments/torchelastic_environment.py +++ b/pytorch_lightning/plugins/environments/torchelastic_environment.py @@ -31,7 +31,8 @@ def is_using_torchelastic() -> bool: required_env_vars = ("RANK", "GROUP_RANK", "LOCAL_RANK", "LOCAL_WORLD_SIZE") return all(v in os.environ for v in required_env_vars) - def creates_children(self) -> bool: + @property + def creates_processes_externally(self) -> bool: return True def master_address(self) -> str: diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index 4499c1d7dfc41..fd8ee49cf15d7 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -177,7 +177,7 @@ def _is_single_process_single_device(self) -> bool: def setup_environment(self) -> None: # start the other scripts - if not self.cluster_environment.creates_children(): + if not self.cluster_environment.creates_processes_externally: self._call_children_scripts() # set the task idx @@ -277,7 +277,7 @@ def _check_can_spawn_children(self): raise RuntimeError( "Lightning attempted to launch new distributed processes with `local_rank > 0`. This should not happen." " Possible reasons: 1) LOCAL_RANK environment variable was incorrectly modified by the user," - " 2) `ClusterEnvironment.creates_children()` incorrectly implemented." + " 2) `ClusterEnvironment.creates_processes_externally` incorrectly implemented." ) def set_world_ranks(self) -> None: diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index e6a18d538eb1d..7ad93b167794d 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -369,7 +369,8 @@ class CustomCluster(LightningEnvironment): def master_address(self): return "asdf" - def creates_children(self) -> bool: + @property + def creates_processes_externally(self) -> bool: return True trainer = Trainer( diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 74ab533d39efb..22ce20618f2e8 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -20,6 +20,12 @@ from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.callbacks.early_stopping import EarlyStopping +from pytorch_lightning.plugins.environments import ( + KubeflowEnvironment, + LightningEnvironment, + SLURMEnvironment, + TorchElasticEnvironment, +) from pytorch_lightning.plugins.training_type import DDPPlugin, DDPSpawnPlugin from pytorch_lightning.utilities.distributed import rank_zero_deprecation, rank_zero_warn from pytorch_lightning.utilities.model_helpers import is_overridden @@ -421,3 +427,17 @@ def test_v1_6_0_is_slurm_managing_tasks(): with pytest.deprecated_call(match=r"`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.5"): trainer._accelerator_connector.is_slurm_managing_tasks = False + + +@pytest.mark.parametrize( + "cluster_environment", + [ + KubeflowEnvironment(), + LightningEnvironment(), + SLURMEnvironment(), + TorchElasticEnvironment(), + ], +) +def test_v1_6_0_cluster_environment_creates_children(cluster_environment): + with pytest.deprecated_call(match="was deprecated in v1.5 and will be removed in v1.6"): + cluster_environment.creates_children() diff --git a/tests/plugins/environments/test_kubeflow_environment.py b/tests/plugins/environments/test_kubeflow_environment.py index 29b8bba587922..44fb66fca0d91 100644 --- a/tests/plugins/environments/test_kubeflow_environment.py +++ b/tests/plugins/environments/test_kubeflow_environment.py @@ -24,7 +24,7 @@ def test_default_attributes(): """Test the default attributes when no environment variables are set.""" env = KubeflowEnvironment() - assert env.creates_children() + assert env.creates_processes_externally with pytest.raises(KeyError): # MASTER_ADDR is required diff --git a/tests/plugins/environments/test_lightning_environment.py b/tests/plugins/environments/test_lightning_environment.py index 012aa877ec57b..2de3574a1805f 100644 --- a/tests/plugins/environments/test_lightning_environment.py +++ b/tests/plugins/environments/test_lightning_environment.py @@ -23,7 +23,7 @@ def test_default_attributes(): """Test the default attributes when no environment variables are set.""" env = LightningEnvironment() - assert not env.creates_children() + assert not env.creates_processes_externally assert env.master_address() == "127.0.0.1" assert isinstance(env.master_port(), int) assert env.world_size() == 1 @@ -48,13 +48,13 @@ def test_attributes_from_environment_variables(): @pytest.mark.parametrize( - "environ, creates_children", [({}, False), (dict(LOCAL_RANK="2"), True), (dict(NODE_RANK="1"), False)] + "environ, creates_processes_externally", [({}, False), (dict(LOCAL_RANK="2"), True), (dict(NODE_RANK="1"), False)] ) -def test_manual_user_launch(environ, creates_children): +def test_manual_user_launch(environ, creates_processes_externally): """Test that the environment switches to manual user mode when LOCAL_RANK env variable detected.""" with mock.patch.dict(os.environ, environ): env = LightningEnvironment() - assert env.creates_children() == creates_children + assert env.creates_processes_externally == creates_processes_externally @mock.patch.dict(os.environ, {"GROUP_RANK": "1"}) diff --git a/tests/plugins/environments/test_lsf_environment.py b/tests/plugins/environments/test_lsf_environment.py index fbdb2197bdf51..e3a5a67ba4be2 100644 --- a/tests/plugins/environments/test_lsf_environment.py +++ b/tests/plugins/environments/test_lsf_environment.py @@ -55,7 +55,7 @@ def test_manual_master_port_and_address(): def test_attributes_from_environment_variables(): """Test that the LSF environment takes the attributes from the environment variables.""" env = LSFEnvironment() - assert env.creates_children() + assert env.creates_processes_externally assert env.master_address() == "10.10.10.0" assert env.master_port() == 10234 assert env.world_size() == 4 diff --git a/tests/plugins/environments/test_slurm_environment.py b/tests/plugins/environments/test_slurm_environment.py index 111d317b3a5fa..e2f1df4ecd9ad 100644 --- a/tests/plugins/environments/test_slurm_environment.py +++ b/tests/plugins/environments/test_slurm_environment.py @@ -24,7 +24,7 @@ def test_default_attributes(): """Test the default attributes when no environment variables are set.""" env = SLURMEnvironment() - assert env.creates_children() + assert env.creates_processes_externally assert env.master_address() == "127.0.0.1" assert env.master_port() == 12910 with pytest.raises(KeyError): diff --git a/tests/plugins/environments/test_torchelastic_environment.py b/tests/plugins/environments/test_torchelastic_environment.py index 19e64fba2e65f..d0df262c169cb 100644 --- a/tests/plugins/environments/test_torchelastic_environment.py +++ b/tests/plugins/environments/test_torchelastic_environment.py @@ -24,7 +24,7 @@ def test_default_attributes(): """Test the default attributes when no environment variables are set.""" env = TorchElasticEnvironment() - assert env.creates_children() + assert env.creates_processes_externally assert env.master_address() == "127.0.0.1" assert env.master_port() == 12910 assert env.world_size() is None diff --git a/tests/plugins/test_ddp_plugin.py b/tests/plugins/test_ddp_plugin.py index c7e8d1ffc9224..78ae931330307 100644 --- a/tests/plugins/test_ddp_plugin.py +++ b/tests/plugins/test_ddp_plugin.py @@ -80,7 +80,8 @@ def test_incorrect_ddp_script_spawning(tmpdir): """Test an error message when user accidentally instructs Lightning to spawn children processes on rank > 0.""" class WronglyImplementedEnvironment(LightningEnvironment): - def creates_children(self): + @property + def creates_processes_externally(self): # returning false no matter what means Lightning would spawn also on ranks > 0 new processes return False