From 2c2fada546ac2829d5fb9b6b9e8727b75bf740e9 Mon Sep 17 00:00:00 2001 From: Mauricio Villegas <5780272+mauvilsa@users.noreply.github.com> Date: Wed, 30 Apr 2025 07:41:41 +0200 Subject: [PATCH 1/4] Instantiator receives values applied by instantiation links to set in hparams (#20311). --- requirements/pytorch/extra.txt | 2 +- src/lightning/pytorch/CHANGELOG.md | 2 +- src/lightning/pytorch/cli.py | 40 ++++++++++++++-- tests/tests_pytorch/test_cli.py | 77 ++++++++++++++++++++++++++++-- 4 files changed, 112 insertions(+), 9 deletions(-) diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt index e14cb38297caa..8d65c94a65194 100644 --- a/requirements/pytorch/extra.txt +++ b/requirements/pytorch/extra.txt @@ -5,7 +5,7 @@ matplotlib>3.1, <3.9.0 omegaconf >=2.2.3, <2.4.0 hydra-core >=1.2.0, <1.4.0 -jsonargparse[signatures] >=4.27.7, <=4.35.0 +jsonargparse[signatures] >=4.39.0, <4.40.0 rich >=12.3.0, <13.6.0 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute bitsandbytes >=0.45.2,<0.45.3; platform_system != "Darwin" diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index 5616defeffc8a..07392561e514e 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -24,7 +24,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- +- Fixed `save_hyperparameters` not working correctly with `LightningCLI` when there are parsing links applied on instantiation ([#20777](https://github.com/Lightning-AI/pytorch-lightning/pull/20777)) --- diff --git a/src/lightning/pytorch/cli.py b/src/lightning/pytorch/cli.py index 75a6347c95356..acc28753f9201 100644 --- a/src/lightning/pytorch/cli.py +++ b/src/lightning/pytorch/cli.py @@ -320,6 +320,7 @@ def __init__( args: ArgsType = None, run: bool = True, auto_configure_optimizers: bool = True, + load_from_checkpoint_support: bool = True, ) -> None: """Receives as input pytorch-lightning classes (or callables which return pytorch-lightning classes), which are called / instantiated using a parsed configuration file and / or command line args. @@ -360,6 +361,11 @@ def __init__( ``dict`` or ``jsonargparse.Namespace``. run: Whether subcommands should be added to run a :class:`~lightning.pytorch.trainer.trainer.Trainer` method. If set to ``False``, the trainer and model classes will be instantiated only. + auto_configure_optimizers: Whether to automatically add default optimizer and lr_scheduler arguments. + load_from_checkpoint_support: Whether ``save_hyperparameters`` should save the original parsed + hyperparameters (instead of what ``__init__`` receives), such that it is possible for + ``load_from_checkpoint`` to correctly instantiate classes even when using complex nesting and + dependency injection. """ self.save_config_callback = save_config_callback @@ -389,7 +395,8 @@ def __init__( self._set_seed() - self._add_instantiators() + if load_from_checkpoint_support: + self._add_instantiators() self.before_instantiate_classes() self.instantiate_classes() self.after_instantiate_classes() @@ -537,11 +544,14 @@ def parse_arguments(self, parser: LightningArgumentParser, args: ArgsType) -> No else: self.config = parser.parse_args(args) - def _add_instantiators(self) -> None: + def _dump_config(self) -> None: + if hasattr(self, "config_dump"): + return self.config_dump = yaml.safe_load(self.parser.dump(self.config, skip_link_targets=False, skip_none=False)) if "subcommand" in self.config: self.config_dump = self.config_dump[self.config.subcommand] + def _add_instantiators(self) -> None: self.parser.add_instantiator( _InstantiatorFn(cli=self, key="model"), _get_module_type(self._model_class), @@ -792,12 +802,27 @@ def _get_module_type(value: Union[Callable, type]) -> type: return value +def _set_dict_nested(data: dict, key: str, value: Any) -> None: + keys = key.split(".") + for k in keys[:-1]: + assert k in data, f"Expected key {key} to be in data" + data = data[k] + data[keys[-1]] = value + + class _InstantiatorFn: def __init__(self, cli: LightningCLI, key: str) -> None: self.cli = cli self.key = key - def __call__(self, class_type: type[ModuleType], *args: Any, **kwargs: Any) -> ModuleType: + def __call__( + self, + class_type: type[ModuleType], + *args: Any, + applied_instantiation_links: dict, + **kwargs: Any, + ) -> ModuleType: + self.cli._dump_config() hparams = self.cli.config_dump.get(self.key, {}) if "class_path" in hparams: # To make hparams backwards compatible, and so that it is the same irrespective of subclass_mode, the @@ -808,6 +833,15 @@ def __call__(self, class_type: type[ModuleType], *args: Any, **kwargs: Any) -> M **hparams.get("init_args", {}), **hparams.get("dict_kwargs", {}), } + # get instantiation link target values from kwargs + for key, value in applied_instantiation_links.items(): + if not key.startswith(f"{self.key}."): + continue + key = key[len(f"{self.key}.") :] + if key.startswith("init_args."): + key = key[len("init_args.") :] + _set_dict_nested(hparams, key, value) + with _given_hyperparameters_context( hparams=hparams, instantiator="lightning.pytorch.cli.instantiate_module", diff --git a/tests/tests_pytorch/test_cli.py b/tests/tests_pytorch/test_cli.py index 5c33a8539b693..412dfbf317f97 100644 --- a/tests/tests_pytorch/test_cli.py +++ b/tests/tests_pytorch/test_cli.py @@ -560,6 +560,7 @@ def __init__(self, activation: torch.nn.Module = None, transform: Optional[list[ class BoringModelRequiredClasses(BoringModel): def __init__(self, num_classes: int, batch_size: int = 8): super().__init__() + self.save_hyperparameters() self.num_classes = num_classes self.batch_size = batch_size @@ -577,7 +578,7 @@ def add_arguments_to_parser(self, parser): parser.link_arguments("data.batch_size", "model.batch_size") parser.link_arguments("data.num_classes", "model.num_classes", apply_on="instantiate") - cli_args = ["--data.batch_size=12"] + cli_args = ["--data.batch_size=12", "--trainer.max_epochs=1"] with mock.patch("sys.argv", ["any.py"] + cli_args): cli = MyLightningCLI(BoringModelRequiredClasses, BoringDataModuleBatchSizeAndClasses, run=False) @@ -585,21 +586,89 @@ def add_arguments_to_parser(self, parser): assert cli.model.batch_size == 12 assert cli.model.num_classes == 5 - class MyLightningCLI(LightningCLI): + cli.trainer.fit(cli.model) + hparams_path = Path(cli.trainer.log_dir) / "hparams.yaml" + assert hparams_path.is_file() + hparams = yaml.safe_load(hparams_path.read_text()) + + hparams.pop("_instantiator") + assert hparams == {"batch_size": 12, "num_classes": 5} + + class MyLightningCLI2(LightningCLI): def add_arguments_to_parser(self, parser): parser.link_arguments("data.batch_size", "model.init_args.batch_size") parser.link_arguments("data.num_classes", "model.init_args.num_classes", apply_on="instantiate") - cli_args[-1] = "--model=tests_pytorch.test_cli.BoringModelRequiredClasses" + cli_args[0] = "--model=tests_pytorch.test_cli.BoringModelRequiredClasses" with mock.patch("sys.argv", ["any.py"] + cli_args): - cli = MyLightningCLI( + cli = MyLightningCLI2( BoringModelRequiredClasses, BoringDataModuleBatchSizeAndClasses, subclass_mode_model=True, run=False ) assert cli.model.batch_size == 8 assert cli.model.num_classes == 5 + cli.trainer.fit(cli.model) + hparams_path = Path(cli.trainer.log_dir) / "hparams.yaml" + assert hparams_path.is_file() + hparams = yaml.safe_load(hparams_path.read_text()) + + hparams.pop("_instantiator") + assert hparams == {"batch_size": 8, "num_classes": 5} + + +class CustomAdam(torch.optim.Adam): + def __init__(self, params, num_classes: Optional[int] = None, **kwargs): + super().__init__(params, **kwargs) + + +class DeepLinkTargetModel(BoringModel): + def __init__( + self, + optimizer: OptimizerCallable = torch.optim.Adam, + ): + super().__init__() + self.save_hyperparameters() + self.optimizer = optimizer + + def configure_optimizers(self): + optimizer = self.optimizer(self.parameters()) + return {"optimizer": optimizer} + + +def test_lightning_cli_link_arguments_subcommands_nested_target(cleandir): + class MyLightningCLI(LightningCLI): + def add_arguments_to_parser(self, parser): + parser.link_arguments( + "data.num_classes", + "model.init_args.optimizer.init_args.num_classes", + apply_on="instantiate", + ) + + cli_args = [ + "fit", + "--data.batch_size=12", + "--trainer.max_epochs=1", + "--model=tests_pytorch.test_cli.DeepLinkTargetModel", + "--model.optimizer=tests_pytorch.test_cli.CustomAdam", + ] + + with mock.patch("sys.argv", ["any.py"] + cli_args): + cli = MyLightningCLI( + DeepLinkTargetModel, + BoringDataModuleBatchSizeAndClasses, + subclass_mode_model=True, + auto_configure_optimizers=False, + ) + + hparams_path = Path(cli.trainer.log_dir) / "hparams.yaml" + assert hparams_path.is_file() + hparams = yaml.safe_load(hparams_path.read_text()) + + assert hparams["optimizer"]["class_path"] == "tests_pytorch.test_cli.CustomAdam" + assert hparams["optimizer"]["init_args"]["num_classes"] == 5 + class EarlyExitTestModel(BoringModel): def on_fit_start(self): From 37ac5e4edc23f4f79a35c7c2053cdb2ce5c28678 Mon Sep 17 00:00:00 2001 From: Mauricio Villegas <5780272+mauvilsa@users.noreply.github.com> Date: Wed, 30 Apr 2025 21:02:06 +0200 Subject: [PATCH 2/4] Add cleandir to test_lightning_cli_link_arguments --- tests/tests_pytorch/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_pytorch/test_cli.py b/tests/tests_pytorch/test_cli.py index 412dfbf317f97..6619be17357cb 100644 --- a/tests/tests_pytorch/test_cli.py +++ b/tests/tests_pytorch/test_cli.py @@ -572,7 +572,7 @@ def __init__(self, batch_size: int = 8): self.num_classes = 5 # only available after instantiation -def test_lightning_cli_link_arguments(): +def test_lightning_cli_link_arguments(cleandir): class MyLightningCLI(LightningCLI): def add_arguments_to_parser(self, parser): parser.link_arguments("data.batch_size", "model.batch_size") From ce96c0aad3618d66cd855a3016fa6a862ebfc528 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Fri, 6 Jun 2025 11:55:02 +0200 Subject: [PATCH 3/4] --fix-missing --- dockers/base-cuda/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 0da0cf9b2de9f..bf493ad47e51a 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -34,7 +34,7 @@ ENV \ MAKEFLAGS="-j2" RUN \ - apt-get update && apt-get install -y wget && \ + apt-get update --fix-missing && apt-get install -y wget && \ apt-get update -qq --fix-missing && \ NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \ CUDA_VERSION_MM=${CUDA_VERSION%.*} && \ From 32bd1c581700a765af4d5c5cfc7cdefb4c74b604 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Wed, 11 Jun 2025 07:33:33 +0200 Subject: [PATCH 4/4] fix install... --- .azure/gpu-tests-pytorch.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 803460c770c13..eb76cd49e3f94 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -117,7 +117,6 @@ jobs: set -e extra=$(python -c "print({'lightning': 'pytorch-'}.get('$(PACKAGE_NAME)', ''))") pip install -e ".[${extra}dev]" pytest-timeout -U --extra-index-url="${TORCH_URL}" - pip install setuptools==75.6.0 jsonargparse==4.35.0 displayName: "Install package & dependencies" - bash: pip uninstall -y lightning