Skip to content

Commit db78422

Browse files
kartik4949rohitgr7awaelchlimergify[bot]
authored
summarize total size of model params in bytes (#5590)
* simplified model size calc * fix spaces * fix newlines * minor refactor * Update pytorch_lightning/core/memory.py Co-authored-by: Rohit Gupta <[email protected]> * make model size property * fix doctest * Update pytorch_lightning/core/memory.py Co-authored-by: Adrian Wälchli <[email protected]> * remove explicit doctest from file * better docs * model precalculate size 1.0 mbs * better comment * Update tests/core/test_memory.py Co-authored-by: Rohit Gupta <[email protected]> * Update tests/core/test_memory.py Co-authored-by: Rohit Gupta <[email protected]> * merge _model_size into model_size property itself * minor comment fix * add feature to changelog * added precision test * isort * minor def name typo * remove monkeypath set env as boringmodel wont need any torch hub cache Co-authored-by: Rohit Gupta <[email protected]> Co-authored-by: Adrian Wälchli <[email protected]> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
1 parent cb58fde commit db78422

File tree

3 files changed

+86
-6
lines changed

3 files changed

+86
-6
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
99

1010
### Added
1111

12+
- Add support for summarized model total params size in megabytes ([#5590](https://github.com/PyTorchLightning/pytorch-lightning/pull/5590))
13+
1214
- Add Support for multiple train loaders ([#1959](https://github.com/PyTorchLightning/pytorch-lightning/pull/1959))
1315

1416

pytorch_lightning/core/memory.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ class ModelSummary(object):
159159
132 K Trainable params
160160
0 Non-trainable params
161161
132 K Total params
162+
0.530 Total estimated model params size (MB)
162163
>>> ModelSummary(model, mode='full') # doctest: +NORMALIZE_WHITESPACE
163164
| Name | Type | Params | In sizes | Out sizes
164165
--------------------------------------------------------------
@@ -169,6 +170,7 @@ class ModelSummary(object):
169170
132 K Trainable params
170171
0 Non-trainable params
171172
132 K Total params
173+
0.530 Total estimated model params size (MB)
172174
"""
173175

174176
MODE_TOP = "top"
@@ -180,6 +182,7 @@ def __init__(self, model, mode: str = MODE_DEFAULT):
180182
self._model = model
181183
self._mode = mode
182184
self._layer_summary = self.summarize()
185+
self._precision_megabytes = (self._model.precision / 8.0) * 1e-6 # 1 byte -> 8 bits
183186

184187
@property
185188
def named_modules(self) -> List[Tuple[str, nn.Module]]:
@@ -213,6 +216,18 @@ def out_sizes(self) -> List:
213216
def param_nums(self) -> List[int]:
214217
return [layer.num_parameters for layer in self._layer_summary.values()]
215218

219+
@property
220+
def total_parameters(self) -> int:
221+
return sum(p.numel() for p in self._model.parameters())
222+
223+
@property
224+
def trainable_parameters(self) -> int:
225+
return sum(p.numel() for p in self._model.parameters() if p.requires_grad)
226+
227+
@property
228+
def model_size(self) -> float:
229+
return self.total_parameters * self._precision_megabytes
230+
216231
def summarize(self) -> Dict[str, LayerSummary]:
217232
summary = OrderedDict((name, LayerSummary(module)) for name, module in self.named_modules)
218233
if self._model.example_input_array is not None:
@@ -248,7 +263,7 @@ def __str__(self):
248263
"""
249264
Makes a summary listing with:
250265
251-
Layer Name, Layer Type, Number of Parameters, Input Sizes, Output Sizes
266+
Layer Name, Layer Type, Number of Parameters, Input Sizes, Output Sizes, Model Size
252267
"""
253268
arrays = [
254269
[" ", list(map(str, range(len(self._layer_summary))))],
@@ -259,11 +274,11 @@ def __str__(self):
259274
if self._model.example_input_array is not None:
260275
arrays.append(["In sizes", self.in_sizes])
261276
arrays.append(["Out sizes", self.out_sizes])
277+
total_parameters = self.total_parameters
278+
trainable_parameters = self.trainable_parameters
279+
model_size = self.model_size
262280

263-
trainable_parameters = sum(p.numel() for p in self._model.parameters() if p.requires_grad)
264-
total_parameters = sum(p.numel() for p in self._model.parameters())
265-
266-
return _format_summary_table(total_parameters, trainable_parameters, *arrays)
281+
return _format_summary_table(total_parameters, trainable_parameters, model_size, *arrays)
267282

268283
def __repr__(self):
269284
return str(self)
@@ -280,7 +295,7 @@ def parse_batch_shape(batch: Any) -> Union[str, List]:
280295
return UNKNOWN_SIZE
281296

282297

283-
def _format_summary_table(total_parameters: int, trainable_parameters: int, *cols) -> str:
298+
def _format_summary_table(total_parameters: int, trainable_parameters: int, model_size: float, *cols) -> str:
284299
"""
285300
Takes in a number of arrays, each specifying a column in
286301
the summary table, and combines them all into one big
@@ -316,6 +331,8 @@ def _format_summary_table(total_parameters: int, trainable_parameters: int, *col
316331
summary += "Non-trainable params"
317332
summary += "\n" + s.format(get_human_readable_count(total_parameters), 10)
318333
summary += "Total params"
334+
summary += "\n" + s.format(get_formatted_model_size(model_size), 10)
335+
summary += "Total estimated model params size (MB)"
319336

320337
return summary
321338

@@ -372,6 +389,8 @@ def get_gpu_memory_map() -> Dict[str, int]:
372389
}
373390
return gpu_memory_map
374391

392+
def get_formatted_model_size(total_model_size: float) -> float:
393+
return f"{total_model_size:,.3f}"
375394

376395
def get_human_readable_count(number: int) -> str:
377396
"""

tests/core/test_memory.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717

1818
from pytorch_lightning import LightningModule, Trainer
1919
from pytorch_lightning.core.memory import ModelSummary, UNKNOWN_SIZE
20+
from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE
2021
from pytorch_lightning.utilities.exceptions import MisconfigurationException
22+
from tests.base import BoringModel
2123
from tests.base.models import ParityModuleRNN
2224

2325

@@ -33,6 +35,21 @@ def forward(self, *args, **kwargs):
3335
return {'loss': self.parameter.sum()}
3436

3537

38+
class PreCalculatedModel(BoringModel):
39+
""" A model with precalculated total params size in MB for FP16 and FP32. """
40+
41+
def __init__(self, precision: int = 32):
42+
super().__init__()
43+
self.layer = nn.Linear(32, 1000, bias=False) # 32K params
44+
self.layer1 = nn.Linear(1000, 218, bias=False) # 218K params
45+
46+
# calculate model size based on precision.
47+
self.pre_calculated_model_size = 1.0 / (32 / precision)
48+
49+
def forward(self, x):
50+
x = self.layer(x)
51+
return self.layer1(x)
52+
3653
class UnorderedModel(LightningModule):
3754
""" A model in which the layers not defined in order of execution """
3855

@@ -247,3 +264,45 @@ def forward(self, *args, **kwargs):
247264
model.example_input_array = example_input
248265
summary = model.summarize(mode=mode)
249266
assert summary.in_sizes == [expected_size]
267+
268+
269+
@pytest.mark.parametrize(['mode'], [
270+
pytest.param(ModelSummary.MODE_FULL),
271+
pytest.param(ModelSummary.MODE_TOP),
272+
])
273+
def test_model_size(mode):
274+
""" Test model size is calculated correctly. """
275+
model = PreCalculatedModel()
276+
summary = model.summarize(mode=mode)
277+
assert model.pre_calculated_model_size == summary.model_size
278+
279+
280+
@pytest.mark.parametrize(['mode'], [
281+
pytest.param(ModelSummary.MODE_FULL),
282+
pytest.param(ModelSummary.MODE_TOP),
283+
])
284+
def test_empty_model_size(mode):
285+
""" Test empty model size is zero. """
286+
model = EmptyModule()
287+
summary = model.summarize(mode=mode)
288+
assert 0.0 == summary.model_size
289+
290+
291+
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Test requires GPU.")
292+
@pytest.mark.skipif(not _NATIVE_AMP_AVAILABLE, reason="test requires native AMP.")
293+
@pytest.mark.parametrize('precision', [16, 32])
294+
def test_model_size_precision(monkeypatch, tmpdir, precision):
295+
""" Test model size for half and full precision. """
296+
model = PreCalculatedModel(precision)
297+
298+
# fit model
299+
trainer = Trainer(
300+
default_root_dir=tmpdir,
301+
gpus=1,
302+
max_steps=1,
303+
max_epochs=1,
304+
precision=precision,
305+
)
306+
trainer.fit(model)
307+
summary = model.summarize()
308+
assert model.pre_calculated_model_size == summary.model_size

0 commit comments

Comments
 (0)