Skip to content
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Deprecated passing `resume_from_checkpoint` to the `Trainer` constructor in favor of `trainer.fit(ckpt_path=)` ([#10061](https://github.com/PyTorchLightning/pytorch-lightning/pull/10061))


- Deprecated `ClusterEnvironment.creates_children()` in favor of `ClusterEnvironment.creates_processes_externally` (property) ([#10106](https://github.com/PyTorchLightning/pytorch-lightning/pull/10106))


### Removed

- Removed deprecated `metrics` ([#8586](https://github.com/PyTorchLightning/pytorch-lightning/pull/8586/))
Expand Down
5 changes: 3 additions & 2 deletions docs/source/clouds/cluster.rst
Original file line number Diff line number Diff line change
Expand Up @@ -290,8 +290,9 @@ and node rank (node id). Here is an example of a custom


class MyClusterEnvironment(ClusterEnvironment):
def creates_children(self) -> bool:
# return True if the cluster is managed (you don't launch processes yourself)
@property
def creates_processes_externally(self) -> bool:
"""Return True if the cluster is managed (you don't launch processes yourself)"""
return True

def world_size(self) -> int:
Expand Down
18 changes: 17 additions & 1 deletion pytorch_lightning/plugins/environments/cluster_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,30 @@
# limitations under the License.
from abc import ABC, abstractmethod

from pytorch_lightning.utilities import rank_zero_deprecation


class ClusterEnvironment(ABC):
"""Specification of a cluster environment."""

@property
@abstractmethod
def creates_children(self) -> bool:
def creates_processes_externally(self) -> bool:
"""Whether the environment creates the subprocesses or not."""

def creates_children(self) -> bool:
"""Whether the environment creates the subprocesses or not.
.. deprecated:: v1.5
This method was deprecated in v1.5 and will be removed in v1.6. Use the property
:attr:`creates_processes_externally` instead.
"""
rank_zero_deprecation(
f"`{self.__class__.__name__}.creates_children()` was deprecated in v1.5 and will be removed in v1.6."
" Use the property :attr:`creates_processes_externally` instead."
)
return self.creates_processes_externally

@abstractmethod
def master_address(self) -> str:
"""The master address through which all processes connect and communicate."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def is_using_kubeflow() -> bool:
excluded_env_vars = ("GROUP_RANK", "LOCAL_RANK", "LOCAL_WORLD_SIZE")
return all(v in os.environ for v in required_env_vars) and not any(v in os.environ for v in excluded_env_vars)

def creates_children(self) -> bool:
@property
def creates_processes_externally(self) -> bool:
return True

def master_address(self) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def __init__(self):
self._global_rank: int = 0
self._world_size: int = 1

def creates_children(self) -> bool:
@property
def creates_processes_externally(self) -> bool:
"""Returns whether the cluster creates the processes or not.

If at least :code:`LOCAL_RANK` is available as environment variable, Lightning assumes the user acts as the
Expand Down
3 changes: 2 additions & 1 deletion pytorch_lightning/plugins/environments/lsf_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def is_using_lsf() -> bool:
required_env_vars = ("LSB_JOBID", "LSB_HOSTS", "JSM_NAMESPACE_LOCAL_RANK", "JSM_NAMESPACE_SIZE")
return all(v in os.environ for v in required_env_vars)

def creates_children(self) -> bool:
@property
def creates_processes_externally(self) -> bool:
return True

def master_address(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
class SLURMEnvironment(ClusterEnvironment):
"""Cluster environment for training on a cluster managed by SLURM."""

def creates_children(self) -> bool:
@property
def creates_processes_externally(self) -> bool:
return True

def master_address(self) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def is_using_torchelastic() -> bool:
required_env_vars = ("RANK", "GROUP_RANK", "LOCAL_RANK", "LOCAL_WORLD_SIZE")
return all(v in os.environ for v in required_env_vars)

def creates_children(self) -> bool:
@property
def creates_processes_externally(self) -> bool:
return True

def master_address(self) -> str:
Expand Down
4 changes: 2 additions & 2 deletions pytorch_lightning/plugins/training_type/ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def _is_single_process_single_device(self) -> bool:

def setup_environment(self) -> None:
# start the other scripts
if not self.cluster_environment.creates_children():
if not self.cluster_environment.creates_processes_externally:
self._call_children_scripts()

# set the task idx
Expand Down Expand Up @@ -277,7 +277,7 @@ def _check_can_spawn_children(self):
raise RuntimeError(
"Lightning attempted to launch new distributed processes with `local_rank > 0`. This should not happen."
" Possible reasons: 1) LOCAL_RANK environment variable was incorrectly modified by the user,"
" 2) `ClusterEnvironment.creates_children()` incorrectly implemented."
" 2) `ClusterEnvironment.creates_processes_externally` incorrectly implemented."
)

def set_world_ranks(self) -> None:
Expand Down
3 changes: 2 additions & 1 deletion tests/accelerators/test_accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,8 @@ class CustomCluster(LightningEnvironment):
def master_address(self):
return "asdf"

def creates_children(self) -> bool:
@property
def creates_processes_externally(self) -> bool:
return True

trainer = Trainer(
Expand Down
20 changes: 20 additions & 0 deletions tests/deprecated_api/test_remove_1-6.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.plugins.environments import (
KubeflowEnvironment,
LightningEnvironment,
SLURMEnvironment,
TorchElasticEnvironment,
)
from pytorch_lightning.plugins.training_type import DDPPlugin, DDPSpawnPlugin
from pytorch_lightning.utilities.distributed import rank_zero_deprecation, rank_zero_warn
from pytorch_lightning.utilities.model_helpers import is_overridden
Expand Down Expand Up @@ -421,3 +427,17 @@ def test_v1_6_0_is_slurm_managing_tasks():

with pytest.deprecated_call(match=r"`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.5"):
trainer._accelerator_connector.is_slurm_managing_tasks = False


@pytest.mark.parametrize(
"cluster_environment",
[
KubeflowEnvironment(),
LightningEnvironment(),
SLURMEnvironment(),
TorchElasticEnvironment(),
],
)
def test_v1_6_0_cluster_environment_creates_children(cluster_environment):
with pytest.deprecated_call(match="was deprecated in v1.5 and will be removed in v1.6"):
cluster_environment.creates_children()
2 changes: 1 addition & 1 deletion tests/plugins/environments/test_kubeflow_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
def test_default_attributes():
"""Test the default attributes when no environment variables are set."""
env = KubeflowEnvironment()
assert env.creates_children()
assert env.creates_processes_externally

with pytest.raises(KeyError):
# MASTER_ADDR is required
Expand Down
8 changes: 4 additions & 4 deletions tests/plugins/environments/test_lightning_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
def test_default_attributes():
"""Test the default attributes when no environment variables are set."""
env = LightningEnvironment()
assert not env.creates_children()
assert not env.creates_processes_externally
assert env.master_address() == "127.0.0.1"
assert isinstance(env.master_port(), int)
assert env.world_size() == 1
Expand All @@ -48,13 +48,13 @@ def test_attributes_from_environment_variables():


@pytest.mark.parametrize(
"environ, creates_children", [({}, False), (dict(LOCAL_RANK="2"), True), (dict(NODE_RANK="1"), False)]
"environ, creates_processes_externally", [({}, False), (dict(LOCAL_RANK="2"), True), (dict(NODE_RANK="1"), False)]
)
def test_manual_user_launch(environ, creates_children):
def test_manual_user_launch(environ, creates_processes_externally):
"""Test that the environment switches to manual user mode when LOCAL_RANK env variable detected."""
with mock.patch.dict(os.environ, environ):
env = LightningEnvironment()
assert env.creates_children() == creates_children
assert env.creates_processes_externally == creates_processes_externally


@mock.patch.dict(os.environ, {"GROUP_RANK": "1"})
Expand Down
2 changes: 1 addition & 1 deletion tests/plugins/environments/test_lsf_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_manual_master_port_and_address():
def test_attributes_from_environment_variables():
"""Test that the LSF environment takes the attributes from the environment variables."""
env = LSFEnvironment()
assert env.creates_children()
assert env.creates_processes_externally
assert env.master_address() == "10.10.10.0"
assert env.master_port() == 10234
assert env.world_size() == 4
Expand Down
2 changes: 1 addition & 1 deletion tests/plugins/environments/test_slurm_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
def test_default_attributes():
"""Test the default attributes when no environment variables are set."""
env = SLURMEnvironment()
assert env.creates_children()
assert env.creates_processes_externally
assert env.master_address() == "127.0.0.1"
assert env.master_port() == 12910
with pytest.raises(KeyError):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
def test_default_attributes():
"""Test the default attributes when no environment variables are set."""
env = TorchElasticEnvironment()
assert env.creates_children()
assert env.creates_processes_externally
assert env.master_address() == "127.0.0.1"
assert env.master_port() == 12910
assert env.world_size() is None
Expand Down
3 changes: 2 additions & 1 deletion tests/plugins/test_ddp_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ def test_incorrect_ddp_script_spawning(tmpdir):
"""Test an error message when user accidentally instructs Lightning to spawn children processes on rank > 0."""

class WronglyImplementedEnvironment(LightningEnvironment):
def creates_children(self):
@property
def creates_processes_externally(self):
# returning false no matter what means Lightning would spawn also on ranks > 0 new processes
return False

Expand Down