From a58afc6e337c60d44482693be0e8d7f9df964b25 Mon Sep 17 00:00:00 2001 From: Roshan Rao Date: Fri, 13 Mar 2020 17:57:14 -0700 Subject: [PATCH 01/11] Set precision=16 when use_amp is passed as True --- pytorch_lightning/trainer/trainer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 7b0568e7b4c65..f690b546efe72 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -443,6 +443,10 @@ def __init__( test_percent_check, overfit_pct) # 16 bit mixed precision training using apex + if use_amp: + warnings.warn("`use_amp` has been deprecated in favor of `precision` since v0.7.0" + " and will be removed in v0.9.0", DeprecationWarning) + precision = 16 self.amp_level = amp_level self.precision = precision From dbdb4eaa6c9ad09250ef971d21926233da543b31 Mon Sep 17 00:00:00 2001 From: Roshan Rao Date: Fri, 13 Mar 2020 18:04:48 -0700 Subject: [PATCH 02/11] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 795233c6c908f..dca80618456d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,6 +81,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed checkpointing interval ([#1272](https://github.com/PyTorchLightning/pytorch-lightning/pull/1272)) - Fixed validation and training loops run the partial dataset ([#1192](https://github.com/PyTorchLightning/pytorch-lightning/pull/1192)) - Fixed running `on_validation_end` only on main process in DDP ([#1125](https://github.com/PyTorchLightning/pytorch-lightning/pull/1125)) +- Fixes `use_amp` issue ([#1145](https://github.com/PyTorchLightning/pytorch-lightning/pull/1145)) ## [0.7.1] - 2020-03-07 From 8cdfe2470cc2def4ec6818d2a27a21e5d040f756 Mon Sep 17 00:00:00 2001 From: Roshan Rao Date: Sat, 14 Mar 2020 13:31:19 -0700 Subject: [PATCH 03/11] add use_amp to deprecated API --- CHANGELOG.md | 1 + pytorch_lightning/trainer/trainer.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dca80618456d3..e7a5bbf4fc491 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -82,6 +82,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed validation and training loops run the partial dataset ([#1192](https://github.com/PyTorchLightning/pytorch-lightning/pull/1192)) - Fixed running `on_validation_end` only on main process in DDP ([#1125](https://github.com/PyTorchLightning/pytorch-lightning/pull/1125)) - Fixes `use_amp` issue ([#1145](https://github.com/PyTorchLightning/pytorch-lightning/pull/1145)) +- Fixes using deprecated `use_amp` attribute ([#1145](https://github.com/PyTorchLightning/pytorch-lightning/pull/1145)) ## [0.7.1] - 2020-03-07 diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index f690b546efe72..0349c7a3a2569 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -443,12 +443,13 @@ def __init__( test_percent_check, overfit_pct) # 16 bit mixed precision training using apex + self.amp_level = amp_level + self.precision = precision + if use_amp: warnings.warn("`use_amp` has been deprecated in favor of `precision` since v0.7.0" " and will be removed in v0.9.0", DeprecationWarning) - precision = 16 - self.amp_level = amp_level - self.precision = precision + self.use_amp = use_amp assert self.precision in (16, 32), 'only 32 or 16 bit precision supported' From 8c4634fbf12a8826a645726ff8f3804e492fe894 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 23 Mar 2020 11:08:20 +0100 Subject: [PATCH 04/11] Update trainer.py --- pytorch_lightning/trainer/trainer.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 0349c7a3a2569..1a6768cdb00ee 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -21,8 +21,7 @@ from pytorch_lightning.trainer.callback_config import TrainerCallbackConfigMixin from pytorch_lightning.trainer.callback_hook import TrainerCallbackHookMixin from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin -from pytorch_lightning.trainer.deprecated_api import (TrainerDeprecatedAPITillVer0_8, - TrainerDeprecatedAPITillVer0_9) +from pytorch_lightning.trainer.deprecated_api import TrainerDeprecatedAPITillVer0_8, TrainerDeprecatedAPITillVer0_9 from pytorch_lightning.trainer.distrib_data_parallel import TrainerDDPMixin from pytorch_lightning.trainer.distrib_parts import TrainerDPMixin, parse_gpu_ids, determine_root_gpu_device from pytorch_lightning.trainer.evaluation_loop import TrainerEvaluationLoopMixin @@ -88,7 +87,6 @@ def __init__( gpus: Optional[Union[List[int], str, int]] = None, num_tpu_cores: Optional[int] = None, log_gpu_memory: Optional[str] = None, - show_progress_bar=None, # backward compatible, todo: remove in v0.9.0 progress_bar_refresh_rate: int = 1, overfit_pct: float = 0.0, track_grad_norm: int = -1, @@ -122,7 +120,8 @@ def __init__( nb_gpu_nodes=None, # backward compatible, todo: remove in v0.8.0 max_nb_epochs=None, # backward compatible, todo: remove in v0.8.0 min_nb_epochs=None, # backward compatible, todo: remove in v0.8.0 - use_amp=False, # backward compatible, todo: remove in v0.9.0 + use_amp=None, # backward compatible, todo: remove in v0.9.0 + show_progress_bar=None, # backward compatible, todo: remove in v0.9.0 nb_sanity_val_steps=None, # backward compatible, todo: remove in v0.8.0 **kwargs ): @@ -446,9 +445,8 @@ def __init__( self.amp_level = amp_level self.precision = precision - if use_amp: - warnings.warn("`use_amp` has been deprecated in favor of `precision` since v0.7.0" - " and will be removed in v0.9.0", DeprecationWarning) + # Backward compatibility, TODO: remove in v0.9.0 + if use_amp is not None: self.use_amp = use_amp assert self.precision in (16, 32), 'only 32 or 16 bit precision supported' @@ -469,6 +467,10 @@ def slurm_job_id(self) -> int: job_id = None return job_id + @property + def use_amp(self): + return self.precision == 16 + @classmethod def default_attributes(cls): init_signature = inspect.signature(Trainer) From 73ecffef56ffd7679b4c0372d650063a9a20bf89 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 2 Apr 2020 15:24:38 -0400 Subject: [PATCH 05/11] Update trainer.py --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 1a6768cdb00ee..89a2f6109859e 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -446,6 +446,7 @@ def __init__( self.precision = precision # Backward compatibility, TODO: remove in v0.9.0 + self.use_amp = False if use_amp is not None: self.use_amp = use_amp From 44b0dd1d75052204fe4dce6f8a9c3690b8f9a043 Mon Sep 17 00:00:00 2001 From: Roshan Rao Date: Thu, 2 Apr 2020 14:25:21 -0700 Subject: [PATCH 06/11] move the use_amp attribute to deprecated API --- pytorch_lightning/trainer/trainer.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 89a2f6109859e..91c16e22d394d 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -468,10 +468,6 @@ def slurm_job_id(self) -> int: job_id = None return job_id - @property - def use_amp(self): - return self.precision == 16 - @classmethod def default_attributes(cls): init_signature = inspect.signature(Trainer) From 95f298ff714e4f3b254088e11fb7e1e6bbbdde74 Mon Sep 17 00:00:00 2001 From: Roshan Rao Date: Sat, 4 Apr 2020 15:02:59 -0700 Subject: [PATCH 07/11] move use_amp deprecation back to Trainer's __init__ --- pytorch_lightning/trainer/trainer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 91c16e22d394d..09afab985f8af 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -446,9 +446,10 @@ def __init__( self.precision = precision # Backward compatibility, TODO: remove in v0.9.0 - self.use_amp = False if use_amp is not None: - self.use_amp = use_amp + warnings.warn("`use_amp` has been replaced by `precision` since v0.7.0" + " and this argument will be removed in v0.9.0", DeprecationWarning) + self.precision = 16 if use_amp else 32 assert self.precision in (16, 32), 'only 32 or 16 bit precision supported' From 8793928e2ec5e254c77c905186c90e4ac1cc4569 Mon Sep 17 00:00:00 2001 From: "J. Borovec" Date: Sun, 5 Apr 2020 00:49:47 +0200 Subject: [PATCH 08/11] drop unsed --- pytorch_lightning/trainer/training_loop.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index ba974dac12966..73333fb7d59cd 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -200,7 +200,6 @@ class TrainerTrainLoopMixin(ABC): optimizers: ... optimizer_frequencies: ... accumulate_grad_batches: int - use_amp: bool track_grad_norm: ... model: LightningModule interrupted: bool From 32aba5382ab609100da49e0b59e4d821b477bbd3 Mon Sep 17 00:00:00 2001 From: "J. Borovec" Date: Sun, 5 Apr 2020 02:04:34 +0200 Subject: [PATCH 09/11] drop deprecated --- pl_examples/basic_examples/gpu_template.py | 2 +- pl_examples/domain_templates/imagenet.py | 2 +- .../trainer/auto_mix_precision.py | 18 ++++++++++-------- .../trainer/distrib_data_parallel.py | 6 +++++- pytorch_lightning/trainer/distrib_parts.py | 6 +++++- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/pl_examples/basic_examples/gpu_template.py b/pl_examples/basic_examples/gpu_template.py index 408b62387fc8c..c5fa94a3cf140 100644 --- a/pl_examples/basic_examples/gpu_template.py +++ b/pl_examples/basic_examples/gpu_template.py @@ -32,7 +32,7 @@ def main(hparams): max_epochs=hparams.epochs, gpus=hparams.gpus, distributed_backend=hparams.distributed_backend, - use_amp=hparams.use_16bit + precision=16 if hparams.use_16bit else 32, ) # ------------------------ diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index 52c5cf0642f0b..95a894d57988b 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -226,7 +226,7 @@ def main(hparams): gpus=hparams.gpus, max_epochs=hparams.epochs, distributed_backend=hparams.distributed_backend, - use_amp=hparams.use_16bit + precision=16 if hparams.use_16bit else 32, ) if hparams.evaluate: trainer.run_evaluation() diff --git a/pytorch_lightning/trainer/auto_mix_precision.py b/pytorch_lightning/trainer/auto_mix_precision.py index 49bed8b8f69f4..41d66c6d299df 100644 --- a/pytorch_lightning/trainer/auto_mix_precision.py +++ b/pytorch_lightning/trainer/auto_mix_precision.py @@ -14,19 +14,21 @@ class TrainerAMPMixin(ABC): # this is just a summary on variables used in this abstract class, # the proper values/initialisation should be done in child class - use_amp: bool + precision: int def init_amp(self, use_amp): - self.use_amp = use_amp and APEX_AVAILABLE - if self.use_amp: - log.info('Using 16bit precision.') - if use_amp and not APEX_AVAILABLE: # pragma: no-cover - msg = """ + raise ModuleNotFoundError(""" You set `use_amp=True` but do not have apex installed. Install apex first using this guide and rerun with use_amp=True: https://github.com/NVIDIA/apex#linux this run will NOT use 16 bit precision - """ - raise ModuleNotFoundError(msg) + """) + + if self.use_amp: + log.info('Using 16bit precision.') + + @property + def use_amp(self): + return self.precision == 16 and APEX_AVAILABLE diff --git a/pytorch_lightning/trainer/distrib_data_parallel.py b/pytorch_lightning/trainer/distrib_data_parallel.py index 7a7f73bea3d70..50941e7b2fa3f 100644 --- a/pytorch_lightning/trainer/distrib_data_parallel.py +++ b/pytorch_lightning/trainer/distrib_data_parallel.py @@ -141,7 +141,6 @@ class TrainerDDPMixin(ABC): logger: Union[LightningLoggerBase, bool] data_parallel_device_ids: ... distributed_backend: str - use_amp: bool amp_level: str use_tpu: bool default_save_path: str @@ -151,6 +150,11 @@ class TrainerDDPMixin(ABC): def num_gpus(self) -> int: """Warning: this is just empty shell for code implemented in other class.""" + @property + @abstractmethod + def use_amp(self) -> bool: + """Warning: this is just empty shell for code implemented in other class.""" + @abstractmethod def copy_trainer_model_properties(self, *args): """Warning: this is just empty shell for code implemented in other class.""" diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py index 7abf987d5c4f3..084b4a677a8e0 100644 --- a/pytorch_lightning/trainer/distrib_parts.py +++ b/pytorch_lightning/trainer/distrib_parts.py @@ -372,7 +372,6 @@ class TrainerDPMixin(ABC): use_dp: bool use_ddp2: bool use_ddp: bool - use_amp: bool testing: bool single_gpu: bool root_gpu: ... @@ -385,6 +384,11 @@ class TrainerDPMixin(ABC): use_tpu: bool data_parallel_device_ids: ... + @property + @abstractmethod + def use_amp(self) -> bool: + """Warning: this is just empty shell for code implemented in other class.""" + @abstractmethod def run_pretrain_routine(self, *args): """Warning: this is just empty shell for code implemented in other class.""" From 59e003df4e286bafb4f735ff9e883358c66cb657 Mon Sep 17 00:00:00 2001 From: "J. Borovec" Date: Sun, 5 Apr 2020 02:19:52 +0200 Subject: [PATCH 10/11] reorder imports --- pytorch_lightning/trainer/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 09afab985f8af..d470bcb35a8cd 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -54,13 +54,13 @@ class Trainer( TrainerIOMixin, TrainerOptimizersMixin, + TrainerAMPMixin, TrainerDPMixin, TrainerDDPMixin, TrainerLoggingMixin, TrainerModelHooksMixin, TrainerTrainingTricksMixin, TrainerDataLoadingMixin, - TrainerAMPMixin, TrainerEvaluationLoopMixin, TrainerTrainLoopMixin, TrainerCallbackConfigMixin, From e85201993b75749521e152ad31683d9ea2971abf Mon Sep 17 00:00:00 2001 From: "J. Borovec" Date: Sun, 5 Apr 2020 23:25:52 +0200 Subject: [PATCH 11/11] typing --- pytorch_lightning/trainer/auto_mix_precision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/auto_mix_precision.py b/pytorch_lightning/trainer/auto_mix_precision.py index 41d66c6d299df..135cf83e288c8 100644 --- a/pytorch_lightning/trainer/auto_mix_precision.py +++ b/pytorch_lightning/trainer/auto_mix_precision.py @@ -30,5 +30,5 @@ def init_amp(self, use_amp): log.info('Using 16bit precision.') @property - def use_amp(self): + def use_amp(self) -> bool: return self.precision == 16 and APEX_AVAILABLE