Skip to content

Commit a270a79

Browse files
awaelchlipre-commit-ci[bot]justusschock
authored
Rename "master" methods to "main" in ClusterEnvironment plugins (#10103)
* rename occurrences of master port, master address, maser node, master process * rename properties * add property decorators * occurrences in docs * update changelog * update changelog * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add lost method * create deprecation * add changelog * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix typo (but it was already there!!!) * Apply suggestions from code review Co-authored-by: Justus Schock <[email protected]> * add todo * update more occurences * add types * add missing import Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Justus Schock <[email protected]>
1 parent 5b90253 commit a270a79

26 files changed

+170
-92
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
4141

4242
### Deprecated
4343

44-
-
44+
- Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/issues/10103))
4545

4646

4747
-

docs/source/clouds/cluster.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ Once the script is setup like described in :ref:`training_script_setup`, you can
8282

8383
Like a custom cluster, you have to ensure that there is network connectivity between the nodes with firewall rules that allow traffic flow on a specified *MASTER_PORT*.
8484

85-
Finally, you'll need to decide which node you'd like to be the master node (*MASTER_ADDR*), and the ranks of each node (*NODE_RANK*).
85+
Finally, you'll need to decide which node you'd like to be the main node (*MASTER_ADDR*), and the ranks of each node (*NODE_RANK*).
8686

8787
For example:
8888

@@ -248,7 +248,7 @@ See also the multi-node examples
248248
# NCCL is how the nodes talk to each other
249249
cluster.add_command("export NCCL_DEBUG=INFO")
250250
251-
# setting a master port here is a good idea.
251+
# setting a main port here is a good idea.
252252
cluster.add_command("export MASTER_PORT=%r" % PORT)
253253
254254
# ************** DON'T FORGET THIS ***************
@@ -307,10 +307,10 @@ and node rank (node id). Here is an example of a custom
307307
def node_rank(self) -> int:
308308
return int(os.environ["NODE_RANK"])
309309
310-
def master_address(self) -> str:
310+
def main_address(self) -> str:
311311
return os.environ["MASTER_ADDRESS"]
312312
313-
def master_port(self) -> int:
313+
def main_port(self) -> int:
314314
return int(os.environ["MASTER_PORT"])
315315
316316

docs/source/common/trainer.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,11 +1116,11 @@ To define your own behavior, subclass the relevant class and pass it in. Here's
11161116
11171117
11181118
class MyCluster(ClusterEnvironment):
1119-
def master_address(self):
1120-
return your_master_address
1119+
def main_address(self):
1120+
return your_main_address
11211121
1122-
def master_port(self):
1123-
return your_master_port
1122+
def main_port(self):
1123+
return your_main_port
11241124
11251125
def world_size(self):
11261126
return the_world_size

docs/source/guides/speed.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ Prefer DDP over DP
7171

7272
1. Copy model to device.
7373
2. Copy data to device.
74-
3. Copy outputs of each device back to master.
74+
3. Copy outputs of each device back to main device.
7575

7676
Whereas :class:`~pytorch_lightning.plugins.training_type.DDPPlugin` only performs 1 transfer to sync gradients, making DDP MUCH faster than DP.
7777

pytorch_lightning/accelerators/accelerator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,7 @@ def results(self) -> Any:
479479
This property is deprecated in v1.5 and will be removed in v1.6.
480480
Please call `training_type_plugin.results` directly.
481481
482-
In distributed training, we make sure to transfer the results to the appropriate master process.
482+
In distributed training, we make sure to transfer the results to the appropriate main process.
483483
"""
484484
rank_zero_deprecation(
485485
"`Accelerator.results` is deprecated in v1.5 and will be removed in v1.6. "

pytorch_lightning/plugins/environments/cluster_environment.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,33 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
from abc import ABC, abstractmethod
15+
from typing import Any, Type
16+
17+
from pytorch_lightning.utilities import rank_zero_deprecation
1518

1619

1720
class ClusterEnvironment(ABC):
1821
"""Specification of a cluster environment."""
1922

23+
def __new__(cls, *args: Any, **kwargs: Any) -> "ClusterEnvironment":
24+
# TODO: remove in 1.7
25+
_check_for_deprecated_methods(cls)
26+
return super().__new__(cls, *args, **kwargs)
27+
2028
@property
2129
@abstractmethod
2230
def creates_processes_externally(self) -> bool:
2331
"""Whether the environment creates the subprocesses or not."""
2432

33+
@property
2534
@abstractmethod
26-
def master_address(self) -> str:
27-
"""The master address through which all processes connect and communicate."""
35+
def main_address(self) -> str:
36+
"""The main address through which all processes connect and communicate."""
2837

38+
@property
2939
@abstractmethod
30-
def master_port(self) -> int:
31-
"""An open and configured port in the master node through which all processes communicate."""
40+
def main_port(self) -> int:
41+
"""An open and configured port in the main node through which all processes communicate."""
3242

3343
@abstractmethod
3444
def world_size(self) -> int:
@@ -57,3 +67,16 @@ def node_rank(self) -> int:
5767
def teardown(self) -> None:
5868
"""Clean up any state set after execution finishes."""
5969
pass
70+
71+
72+
def _check_for_deprecated_methods(cls: Type[ClusterEnvironment]) -> None:
73+
if hasattr(cls, "master_address") and callable(cls.master_address):
74+
rank_zero_deprecation(
75+
f"`{cls.__name__}.master_address` has been deprecated in v1.6 and will be removed in 1.7."
76+
" Implement the property `main_address` instead (do not forget to add the `@property` decorator)."
77+
)
78+
if hasattr(cls, "master_port") and callable(cls.master_port):
79+
rank_zero_deprecation(
80+
f"`{cls.__name__}.master_port` has been deprecated in v1.6 and will be removed in 1.7."
81+
" Implement the property `main_port` instead (do not forget to add the `@property` decorator)."
82+
)

pytorch_lightning/plugins/environments/kubeflow_environment.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,12 @@ def is_using_kubeflow() -> bool:
3939
def creates_processes_externally(self) -> bool:
4040
return True
4141

42-
def master_address(self) -> str:
42+
@property
43+
def main_address(self) -> str:
4344
return os.environ["MASTER_ADDR"]
4445

45-
def master_port(self) -> int:
46+
@property
47+
def main_port(self) -> int:
4648
return int(os.environ["MASTER_PORT"])
4749

4850
def world_size(self) -> int:

pytorch_lightning/plugins/environments/lightning_environment.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@ class LightningEnvironment(ClusterEnvironment):
2929
2. The user launches all processes manually or with utilities like :code:`torch.distributed.launch`.
3030
The appropriate environment variables need to be set, and at minimum :code:`LOCAL_RANK`.
3131
32-
If the master address and port are not provided, the default environment will choose them
32+
If the main address and port are not provided, the default environment will choose them
3333
automatically. It is recommended to use this default environment for single-node distributed
3434
training as it provides a convenient way to launch the training script.
3535
"""
3636

3737
def __init__(self):
3838
super().__init__()
39-
self._master_port = None
39+
self._main_port = None
4040
self._global_rank: int = 0
4141
self._world_size: int = 1
4242

@@ -49,13 +49,15 @@ def creates_processes_externally(self) -> bool:
4949
"""
5050
return "LOCAL_RANK" in os.environ
5151

52-
def master_address(self) -> str:
52+
@property
53+
def main_address(self) -> str:
5354
return os.environ.get("MASTER_ADDR", "127.0.0.1")
5455

55-
def master_port(self) -> int:
56-
if self._master_port is None:
57-
self._master_port = os.environ.get("MASTER_PORT", find_free_network_port())
58-
return int(self._master_port)
56+
@property
57+
def main_port(self) -> int:
58+
if self._main_port is None:
59+
self._main_port = os.environ.get("MASTER_PORT", find_free_network_port())
60+
return int(self._main_port)
5961

6062
def world_size(self) -> int:
6163
return self._world_size
@@ -85,7 +87,7 @@ def teardown(self) -> None:
8587
def find_free_network_port() -> int:
8688
"""Finds a free port on localhost.
8789
88-
It is useful in single-node training when we don't want to connect to a real master node but have to set the
90+
It is useful in single-node training when we don't want to connect to a real main node but have to set the
8991
`MASTER_PORT` environment variable.
9092
"""
9193
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

pytorch_lightning/plugins/environments/lsf_environment.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ class LSFEnvironment(ClusterEnvironment):
4141
"""
4242

4343
def __init__(self):
44-
self._master_address = self._get_master_address()
45-
self._master_port = self._get_master_port()
46-
log.debug(f"MASTER_ADDR: {self._master_address}")
47-
log.debug(f"MASTER_PORT: {self._master_port}")
44+
self._main_address = self._get_main_address()
45+
self._main_port = self._get_main_port()
46+
log.debug(f"MASTER_ADDR: {self._main_address}")
47+
log.debug(f"MASTER_PORT: {self._main_port}")
4848

4949
@staticmethod
5050
def is_using_lsf() -> bool:
@@ -56,13 +56,15 @@ def is_using_lsf() -> bool:
5656
def creates_processes_externally(self) -> bool:
5757
return True
5858

59-
def master_address(self):
60-
"""The master address is read from a list of hosts contained in the environment variable `LSB_HOSTS`."""
61-
return self._master_address
59+
@property
60+
def main_address(self) -> str:
61+
"""The main address is read from a list of hosts contained in the environment variable `LSB_HOSTS`."""
62+
return self._main_address
6263

63-
def master_port(self):
64-
"""THe master port gets calculated from the LSF job ID."""
65-
return self._master_port
64+
@property
65+
def main_port(self) -> int:
66+
"""The main port gets calculated from the LSF job ID."""
67+
return self._main_port
6668

6769
def world_size(self):
6870
"""The world size is read from the environment variable `JSM_NAMESPACE_SIZE`."""
@@ -127,17 +129,17 @@ def _read_hosts():
127129
)
128130
return hosts
129131

130-
def _get_master_address(self):
132+
def _get_main_address(self) -> str:
131133
hosts = self._read_hosts()
132134
return hosts[1]
133135

134136
@staticmethod
135-
def _get_master_port():
136-
"""A helper function for accessing the master port.
137+
def _get_main_port() -> int:
138+
"""A helper function for accessing the main port.
137139
138-
Uses the LSF job ID so all ranks can compute the master port.
140+
Uses the LSF job ID so all ranks can compute the main port.
139141
"""
140-
# check for user-specified master port
142+
# check for user-specified main port
141143
port = os.environ.get("MASTER_PORT")
142144
if not port:
143145
jobid = os.environ.get("LSB_JOBID")
@@ -146,7 +148,7 @@ def _get_master_port():
146148
port = int(jobid)
147149
# all ports should be in the 10k+ range
148150
port = int(port) % 1000 + 10000
149-
log.debug(f"calculated LSF master port: {port}")
151+
log.debug(f"calculated LSF main port: {port}")
150152
else:
151-
log.debug(f"using externally specified master port: {port}")
153+
log.debug(f"using externally specified main port: {port}")
152154
return int(port)

pytorch_lightning/plugins/environments/slurm_environment.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ class SLURMEnvironment(ClusterEnvironment):
2828
def creates_processes_externally(self) -> bool:
2929
return True
3030

31-
def master_address(self) -> str:
31+
@property
32+
def main_address(self) -> str:
3233
# figure out the root node addr
3334
slurm_nodelist = os.environ.get("SLURM_NODELIST")
3435
if slurm_nodelist:
@@ -41,7 +42,8 @@ def master_address(self) -> str:
4142
log.debug(f"MASTER_ADDR: {os.environ['MASTER_ADDR']}")
4243
return root_node
4344

44-
def master_port(self) -> int:
45+
@property
46+
def main_port(self) -> int:
4547
# -----------------------
4648
# SLURM JOB = PORT number
4749
# -----------------------

0 commit comments

Comments
 (0)