From 2f6ca7aa97be8fefc363075a12d1147ff39c2cca Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Mon, 31 Jan 2022 17:54:10 +0530 Subject: [PATCH 1/5] sort simple profiler output based on mean time --- pytorch_lightning/profiler/simple.py | 33 ++++++++++++++++++---------- tests/profiler/test_profiler.py | 24 ++++++++++---------- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/pytorch_lightning/profiler/simple.py b/pytorch_lightning/profiler/simple.py index 0e375ec325b71..7847a13bc419a 100644 --- a/pytorch_lightning/profiler/simple.py +++ b/pytorch_lightning/profiler/simple.py @@ -17,7 +17,7 @@ import time from collections import defaultdict from pathlib import Path -from typing import Dict, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union import numpy as np @@ -72,13 +72,21 @@ def stop(self, action_name: str) -> None: duration = end_time - start_time self.recorded_durations[action_name].append(duration) - def _make_report(self) -> Tuple[list, float]: + def _make_report_extended(self) -> Tuple[List, float, float]: total_duration = time.monotonic() - self.start_time - report = [[a, d, 100.0 * np.sum(d) / total_duration] for a, d in self.recorded_durations.items()] - report.sort(key=lambda x: x[2], reverse=True) - total_calls = sum(len(x[1]) for x in report) + report = [ + [a, np.mean(d), len(d), np.sum(d), 100.0 * np.sum(d) / total_duration] + for a, d in self.recorded_durations.items() + ] + report.sort(key=lambda x: x[4], reverse=True) + total_calls = sum(x[2] for x in report) return report, total_calls, total_duration + def _make_report_non_extended(self) -> List[Tuple[str, float, float]]: + report = [(action, np.mean(d), np.sum(d)) for action, d in self.recorded_durations.items()] + report.sort(key=lambda x: x[1], reverse=True) + return report + def summary(self) -> str: sep = os.linesep output_string = "" @@ -100,15 +108,15 @@ def log_row(action, mean, num_calls, total, per): output_string_len = len(header_string.expandtabs()) sep_lines = f"{sep}{'-' * output_string_len}" output_string += sep_lines + header_string + sep_lines - report, total_calls, total_duration = self._make_report() + report, total_calls, total_duration = self._make_report_extended() output_string += log_row("Total", "-", f"{total_calls:}", f"{total_duration:.5}", "100 %") output_string += sep_lines - for action, durations, duration_per in report: + for action, mean_duration, num_calls, total_duration, duration_per in report: output_string += log_row( action, - f"{np.mean(durations):.5}", - f"{len(durations):}", - f"{np.sum(durations):.5}", + f"{mean_duration:.5}", + f"{num_calls}", + f"{total_duration:.5}", f"{duration_per:.5}", ) output_string += sep_lines @@ -122,9 +130,10 @@ def log_row(action, mean, total): output_string_len = len(header_string.expandtabs()) sep_lines = f"{sep}{'-' * output_string_len}" output_string += sep_lines + header_string + sep_lines + report = self._make_report_non_extended() - for action, durations in self.recorded_durations.items(): - output_string += log_row(action, f"{np.mean(durations):.5}", f"{np.sum(durations):.5}") + for action, mean_duration, total_duration in report: + output_string += log_row(action, f"{mean_duration:.5}", f"{total_duration:.5}") output_string += sep_lines output_string += sep return output_string diff --git a/tests/profiler/test_profiler.py b/tests/profiler/test_profiler.py index e4f54fb448333..f27f4ab1c63b0 100644 --- a/tests/profiler/test_profiler.py +++ b/tests/profiler/test_profiler.py @@ -218,11 +218,11 @@ def test_simple_profiler_summary(tmpdir, extended): sep = os.linesep max_action_len = len("on_before_batch_transfer") - for hook in hooks: + for i, hook in enumerate(hooks): with profiler.profile(hook): pass - profiler.recorded_durations[hook] = [sometime] + profiler.recorded_durations[hook] = [sometime + i] if extended: header_string = ( @@ -238,12 +238,12 @@ def test_simple_profiler_summary(tmpdir, extended): f"{sep_lines}" f"{sep}| Total | - | 6 | 7.0 | 100 % |" # noqa: E501 f"{sep_lines}" + f"{sep}| on_fit_start | 5.7734 | 1 | 5.7734 | 82.478 |" # noqa: E501 + f"{sep}| on_before_batch_transfer | 4.7734 | 1 | 4.7734 | 68.192 |" # noqa: E501 + f"{sep}| on_train_epoch_end | 3.7734 | 1 | 3.7734 | 53.906 |" # noqa: E501 + f"{sep}| on_train_epoch_start | 2.7734 | 1 | 2.7734 | 39.62 |" # noqa: E501 + f"{sep}| on_train_end | 1.7734 | 1 | 1.7734 | 25.335 |" # noqa: E501 f"{sep}| on_train_start | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501 - f"{sep}| on_train_end | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501 - f"{sep}| on_train_epoch_start | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501 - f"{sep}| on_train_epoch_end | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501 - f"{sep}| on_before_batch_transfer | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501 - f"{sep}| on_fit_start | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501 f"{sep_lines}{sep}" ) else: @@ -257,12 +257,12 @@ def test_simple_profiler_summary(tmpdir, extended): f"{sep_lines}" f"{sep}| Action | Mean duration (s) | Total time (s) |" f"{sep_lines}" + f"{sep}| on_fit_start | 5.7734 | 5.7734 |" + f"{sep}| on_before_batch_transfer | 4.7734 | 4.7734 |" + f"{sep}| on_train_epoch_end | 3.7734 | 3.7734 |" + f"{sep}| on_train_epoch_start | 2.7734 | 2.7734 |" + f"{sep}| on_train_end | 1.7734 | 1.7734 |" f"{sep}| on_train_start | 0.77343 | 0.77343 |" - f"{sep}| on_train_end | 0.77343 | 0.77343 |" - f"{sep}| on_train_epoch_start | 0.77343 | 0.77343 |" - f"{sep}| on_train_epoch_end | 0.77343 | 0.77343 |" - f"{sep}| on_before_batch_transfer | 0.77343 | 0.77343 |" - f"{sep}| on_fit_start | 0.77343 | 0.77343 |" f"{sep_lines}{sep}" ) From 94b25754c13df72661dca1ddf657073a4f54911b Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Mon, 31 Jan 2022 18:05:26 +0530 Subject: [PATCH 2/5] chlog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa7c4f9b056bc..6e56c9bb8730f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -222,6 +222,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed `MisconfigurationException` to `ModuleNotFoundError` when `rich` isn't available ([#11360](https://github.com/PyTorchLightning/pytorch-lightning/pull/11360)) +- Sorted `SimpleProfiler(extended=False)` summary based on mean duration for each hook ([#11671](https://github.com/PyTorchLightning/pytorch-lightning/pull/11671)) + + ### Deprecated - Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/pull/10103)) From 2710df3abf67803826a48887dded98d984b4be94 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Mon, 31 Jan 2022 18:05:34 +0530 Subject: [PATCH 3/5] update docs --- docs/source/advanced/profiler.rst | 32 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/docs/source/advanced/profiler.rst b/docs/source/advanced/profiler.rst index 0bcab5c4e04d0..fceb09cd1f33e 100644 --- a/docs/source/advanced/profiler.rst +++ b/docs/source/advanced/profiler.rst @@ -63,23 +63,21 @@ The profiler's results will be printed at the completion of a training ``trainer ----------------------------------------------------------------------------------------------- | Action | Mean duration (s) | Total time (s) | ----------------------------------------------------------------------------------------------- - | [LightningModule]BoringModel.prepare_data | 2.112e-06 | 2.112e-06 | - | [LightningModule]BoringModel.setup | 1.513e-06 | 1.513e-06 | - | [LightningModule]BoringModel.on_fit_start | 1.542e-06 | 1.542e-06 | - | [LightningModule]BoringModel.on_epoch_start | 1.051e-06 | 1.051e-06 | - | fetch_next_train_batch | 0.0003097 | 0.00061939 | - | get_train_batch | 0.0003287 | 0.00065739 | - | on_train_batch_start | 7.578e-05 | 7.578e-05 | - | [LightningModule]BoringModel.training_step_end | 1.556e-06 | 1.556e-06 | - | model_forward | 0.00028337 | 0.00028337 | - | [LightningModule]BoringModel.optimizer_step | 0.0011853 | 0.0011853 | - | run_training_batch | 0.0016311 | 0.0016311 | - | on_train_batch_end | 7.6117e-05 | 7.6117e-05 | - | run_training_epoch | 0.0036915 | 0.0036915 | - | [LightningModule]BoringModel.on_epoch_end | 1.079e-06 | 1.079e-06 | - | [LightningModule]BoringModel.on_train_end | 1.509e-06 | 1.509e-06 | - | [LightningModule]BoringModel.on_fit_end | 3.127e-06 | 3.127e-06 | - | [LightningModule]BoringModel.teardown | 2.581e-06 | 2.581e-06 | + | run_training_epoch | 6.1558 | 6.1558 | + | run_training_batch | 0.0022506 | 0.015754 | + | [LightningModule]BoringModel.optimizer_step | 0.0017477 | 0.012234 | + | model_forward | 0.00055868 | 0.0039108 | + | [LightningModule]BoringModel.val_dataloader | 0.00024388 | 0.00024388 | + | on_train_batch_start | 0.00014637 | 0.0010246 | + | [LightningModule]BoringModel.teardown | 2.15e-06 | 2.15e-06 | + | [LightningModule]BoringModel.prepare_data | 1.955e-06 | 1.955e-06 | + | [LightningModule]BoringModel.on_epoch_end | 1.8373e-06 | 5.512e-06 | + | [LightningModule]BoringModel.on_train_start | 1.644e-06 | 1.644e-06 | + | [LightningModule]BoringModel.on_train_end | 1.516e-06 | 1.516e-06 | + | [LightningModule]BoringModel.on_fit_end | 1.426e-06 | 1.426e-06 | + | [LightningModule]BoringModel.setup | 1.403e-06 | 1.403e-06 | + | [LightningModule]BoringModel.on_epoch_start | 1.2883e-06 | 3.865e-06 | + | [LightningModule]BoringModel.on_fit_start | 1.226e-06 | 1.226e-06 | ----------------------------------------------------------------------------------------------- .. note:: Note that there are a lot more actions that will be present in the final report along with percentage and call count for each action. From 7f24f6a48e82b21967c3de9759f0b1ded2b35e43 Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Mon, 31 Jan 2022 18:27:24 +0530 Subject: [PATCH 4/5] make_report --- pytorch_lightning/profiler/simple.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/profiler/simple.py b/pytorch_lightning/profiler/simple.py index 7847a13bc419a..292fa86786922 100644 --- a/pytorch_lightning/profiler/simple.py +++ b/pytorch_lightning/profiler/simple.py @@ -82,7 +82,7 @@ def _make_report_extended(self) -> Tuple[List, float, float]: total_calls = sum(x[2] for x in report) return report, total_calls, total_duration - def _make_report_non_extended(self) -> List[Tuple[str, float, float]]: + def _make_report(self) -> List[Tuple[str, float, float]]: report = [(action, np.mean(d), np.sum(d)) for action, d in self.recorded_durations.items()] report.sort(key=lambda x: x[1], reverse=True) return report @@ -130,7 +130,7 @@ def log_row(action, mean, total): output_string_len = len(header_string.expandtabs()) sep_lines = f"{sep}{'-' * output_string_len}" output_string += sep_lines + header_string + sep_lines - report = self._make_report_non_extended() + report = self._make_report() for action, mean_duration, total_duration in report: output_string += log_row(action, f"{mean_duration:.5}", f"{total_duration:.5}") From 77dd523d2a8fcb8050e20bec05aa7a9f0e182dca Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Mon, 31 Jan 2022 22:38:26 +0530 Subject: [PATCH 5/5] typing --- pytorch_lightning/profiler/simple.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/profiler/simple.py b/pytorch_lightning/profiler/simple.py index 292fa86786922..c72c50ec865ca 100644 --- a/pytorch_lightning/profiler/simple.py +++ b/pytorch_lightning/profiler/simple.py @@ -25,6 +25,11 @@ log = logging.getLogger(__name__) +_TABLE_ROW_EXTENDED = Tuple[str, float, int, float, float] +_TABLE_DATA_EXTENDED = List[_TABLE_ROW_EXTENDED] +_TABLE_ROW = Tuple[str, float, float] +_TABLE_DATA = List[_TABLE_ROW] + class SimpleProfiler(BaseProfiler): """This profiler simply records the duration of actions (in seconds) and reports the mean duration of each @@ -72,17 +77,17 @@ def stop(self, action_name: str) -> None: duration = end_time - start_time self.recorded_durations[action_name].append(duration) - def _make_report_extended(self) -> Tuple[List, float, float]: + def _make_report_extended(self) -> Tuple[_TABLE_DATA_EXTENDED, float, float]: total_duration = time.monotonic() - self.start_time report = [ - [a, np.mean(d), len(d), np.sum(d), 100.0 * np.sum(d) / total_duration] + (a, np.mean(d), len(d), np.sum(d), 100.0 * np.sum(d) / total_duration) for a, d in self.recorded_durations.items() ] report.sort(key=lambda x: x[4], reverse=True) total_calls = sum(x[2] for x in report) return report, total_calls, total_duration - def _make_report(self) -> List[Tuple[str, float, float]]: + def _make_report(self) -> _TABLE_DATA: report = [(action, np.mean(d), np.sum(d)) for action, d in self.recorded_durations.items()] report.sort(key=lambda x: x[1], reverse=True) return report