diff --git a/CHANGELOG.md b/CHANGELOG.md index 2287e97f7751e..7543af9ce8fdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -88,6 +88,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed the deprecated `terminate_on_nan` argument from the `Trainer` constructor ([#12553](https://github.com/PyTorchLightning/pytorch-lightning/pull/12553)) +- Removed the deprecated `XLAStatsMonitor` callback ([#12688](https://github.com/PyTorchLightning/pytorch-lightning/pull/12688)) + + - Remove deprecated `pytorch_lightning.callbacks.progress.progress` ([#12658](https://github.com/PyTorchLightning/pytorch-lightning/pull/12658)) diff --git a/docs/source/api_references.rst b/docs/source/api_references.rst index aa0ad662223b0..5d573b31eb486 100644 --- a/docs/source/api_references.rst +++ b/docs/source/api_references.rst @@ -98,7 +98,6 @@ Callbacks API StochasticWeightAveraging Timer TQDMProgressBar - XLAStatsMonitor Loggers API ----------- diff --git a/docs/source/extensions/callbacks.rst b/docs/source/extensions/callbacks.rst index 9f1765fe63b8f..f6dccff662e03 100644 --- a/docs/source/extensions/callbacks.rst +++ b/docs/source/extensions/callbacks.rst @@ -105,7 +105,6 @@ Lightning has a few built-in callbacks. StochasticWeightAveraging Timer TQDMProgressBar - XLAStatsMonitor ---------- diff --git a/pytorch_lightning/callbacks/__init__.py b/pytorch_lightning/callbacks/__init__.py index 3e0971e8789cd..fc64772e00474 100644 --- a/pytorch_lightning/callbacks/__init__.py +++ b/pytorch_lightning/callbacks/__init__.py @@ -27,7 +27,6 @@ from pytorch_lightning.callbacks.rich_model_summary import RichModelSummary from pytorch_lightning.callbacks.stochastic_weight_avg import StochasticWeightAveraging from pytorch_lightning.callbacks.timer import Timer -from pytorch_lightning.callbacks.xla_stats_monitor import XLAStatsMonitor __all__ = [ "BackboneFinetuning", @@ -35,7 +34,6 @@ "Callback", "DeviceStatsMonitor", "EarlyStopping", - "XLAStatsMonitor", "GradientAccumulationScheduler", "LambdaCallback", "LearningRateMonitor", diff --git a/pytorch_lightning/callbacks/xla_stats_monitor.py b/pytorch_lightning/callbacks/xla_stats_monitor.py deleted file mode 100644 index c7fe59a59d515..0000000000000 --- a/pytorch_lightning/callbacks/xla_stats_monitor.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -XLA Stats Monitor -================= - -Monitor and logs XLA stats during training. - -""" -import time - -import pytorch_lightning as pl -from pytorch_lightning.accelerators import TPUAccelerator -from pytorch_lightning.callbacks.base import Callback -from pytorch_lightning.utilities import _TPU_AVAILABLE -from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_info - -if _TPU_AVAILABLE: - import torch_xla.core.xla_model as xm - - -class XLAStatsMonitor(Callback): - r""" - .. deprecated:: v1.5 - The `XLAStatsMonitor` callback was deprecated in v1.5 and will be removed in v1.7. - Please use the `DeviceStatsMonitor` callback instead. - - Automatically monitors and logs XLA stats during training stage. ``XLAStatsMonitor`` is a callback and in - order to use it you need to assign a logger in the ``Trainer``. - - Args: - verbose: Set to ``True`` to print average peak and free memory, and epoch time - every epoch. - - Raises: - MisconfigurationException: - If not running on TPUs, or ``Trainer`` has no logger. - - Example:: - - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.callbacks import XLAStatsMonitor - >>> xla_stats = XLAStatsMonitor() # doctest: +SKIP - >>> trainer = Trainer(callbacks=[xla_stats]) # doctest: +SKIP - """ - - def __init__(self, verbose: bool = True) -> None: - super().__init__() - - rank_zero_deprecation( - "The `XLAStatsMonitor` callback was deprecated in v1.5 and will be removed in v1.7." - " Please use the `DeviceStatsMonitor` callback instead." - ) - - if not _TPU_AVAILABLE: - raise MisconfigurationException("Cannot use XLAStatsMonitor with TPUs are not available") - - self._verbose = verbose - - def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: - if not trainer.loggers: - raise MisconfigurationException("Cannot use XLAStatsMonitor callback with Trainer that has no logger.") - - if not isinstance(trainer.accelerator, TPUAccelerator): - raise MisconfigurationException( - "You are using XLAStatsMonitor but are not running on TPU." - f" The accelerator is set to {trainer.accelerator.__class__.__name__}." - ) - - device = trainer.strategy.root_device - memory_info = xm.get_memory_info(device) - total_memory = trainer.strategy.reduce(memory_info["kb_total"]) * 0.001 - rank_zero_info(f"Average Total memory: {total_memory:.2f} MB") - - def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: - self._start_time = time.time() - - def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: - if not trainer.loggers: - raise MisconfigurationException("Cannot use XLAStatsMonitor callback with Trainer that has no logger.") - - device = trainer.strategy.root_device - memory_info = xm.get_memory_info(device) - epoch_time = time.time() - self._start_time - - free_memory = memory_info["kb_free"] - peak_memory = memory_info["kb_total"] - free_memory - - free_memory = trainer.strategy.reduce(free_memory) * 0.001 - peak_memory = trainer.strategy.reduce(peak_memory) * 0.001 - epoch_time = trainer.strategy.reduce(epoch_time) - - for logger in trainer.loggers: - logger.log_metrics( - {"avg. free memory (MB)": float(free_memory), "avg. peak memory (MB)": float(peak_memory)}, - step=trainer.current_epoch, - ) - - if self._verbose: - rank_zero_info(f"Average Epoch time: {epoch_time:.2f} seconds") - rank_zero_info(f"Average Peak memory: {peak_memory:.2f} MB") - rank_zero_info(f"Average Free memory: {free_memory:.2f} MB") diff --git a/tests/callbacks/test_xla_stats_monitor.py b/tests/callbacks/test_xla_stats_monitor.py deleted file mode 100644 index 59cc2132e3cdb..0000000000000 --- a/tests/callbacks/test_xla_stats_monitor.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -import numpy as np -import pytest - -from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import XLAStatsMonitor -from pytorch_lightning.loggers import CSVLogger -from pytorch_lightning.loggers.csv_logs import ExperimentWriter -from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.helpers import BoringModel -from tests.helpers.runif import RunIf - - -@RunIf(tpu=True) -def test_xla_stats_monitor(tmpdir): - """Test XLA stats are logged using a logger.""" - - model = BoringModel() - xla_stats = XLAStatsMonitor() - logger = CSVLogger(tmpdir) - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=2, - limit_train_batches=5, - accelerator="tpu", - devices=8, - callbacks=[xla_stats], - logger=logger, - ) - - trainer.fit(model) - assert trainer.state.finished, f"Training failed with {trainer.state}" - - path_csv = os.path.join(logger.log_dir, ExperimentWriter.NAME_METRICS_FILE) - met_data = np.genfromtxt(path_csv, delimiter=",", names=True, deletechars="", replace_space=" ") - - fields = ["avg. free memory (MB)", "avg. peak memory (MB)"] - - for f in fields: - assert any(f in h for h in met_data.dtype.names) - - -@RunIf(tpu=True) -def test_xla_stats_monitor_no_logger(tmpdir): - """Test XLAStatsMonitor with no logger in Trainer.""" - - model = BoringModel() - xla_stats = XLAStatsMonitor() - - trainer = Trainer( - default_root_dir=tmpdir, callbacks=[xla_stats], max_epochs=1, accelerator="tpu", devices=[1], logger=False - ) - - with pytest.raises(MisconfigurationException, match="Trainer that has no logger."): - trainer.fit(model) diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 466a4a5561664..4d89ee9c8b5e2 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -20,10 +20,8 @@ import pytest import torch -import pytorch_lightning from pytorch_lightning import Callback, LightningDataModule, Trainer from pytorch_lightning.callbacks.lr_monitor import LearningRateMonitor -from pytorch_lightning.callbacks.xla_stats_monitor import XLAStatsMonitor from pytorch_lightning.loggers import LoggerCollection, TestTubeLogger from pytorch_lightning.overrides.distributed import IndexBatchSamplerWrapper from pytorch_lightning.plugins.environments import ( @@ -34,7 +32,6 @@ TorchElasticEnvironment, ) from pytorch_lightning.strategies import SingleDeviceStrategy -from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.deprecated_api import _soft_unimport_module from tests.helpers import BoringModel from tests.helpers.datamodules import MNISTDataModule @@ -310,12 +307,6 @@ def test_v1_7_0_deprecated_slurm_job_id(): trainer.slurm_job_id -def test_v1_7_0_deprecate_xla_stats_monitor(monkeypatch): - monkeypatch.setattr(pytorch_lightning.callbacks.xla_stats_monitor, "_TPU_AVAILABLE", True) - with pytest.deprecated_call(match="The `XLAStatsMonitor` callback was deprecated in v1.5"): - _ = XLAStatsMonitor() - - def test_v1_7_0_deprecated_max_steps_none(tmpdir): with pytest.deprecated_call(match="`max_steps = None` is deprecated in v1.5"): _ = Trainer(max_steps=None) @@ -428,17 +419,3 @@ def post_dispatch(self, trainer): with pytest.deprecated_call(match=escape("`CustomPlugin.post_dispatch()` has been deprecated in v1.6")): CustomPlugin(torch.device("cpu")) - - -def test_xla_stats_monitor_tpu_not_used(monkeypatch): - monkeypatch.setattr(pytorch_lightning.callbacks.xla_stats_monitor, "_TPU_AVAILABLE", True) - with pytest.deprecated_call(match="The `XLAStatsMonitor` callback was deprecated in v1.5"): - xla_stats = XLAStatsMonitor() - - trainer = Trainer(accelerator="cpu", callbacks=[xla_stats]) - model = BoringModel() - with pytest.raises( - MisconfigurationException, - match="You are using XLAStatsMonitor but are not running on TPU. The accelerator is set to CPUAccelerator.", - ): - trainer.fit(model)