From 0ea501b9c25e81930c2b5d20065d6cba1243adb3 Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Mon, 5 Sep 2022 12:04:07 -0300 Subject: [PATCH 01/12] add accelerate to load models with smaller memory footprint --- src/diffusers/configuration_utils.py | 18 ++++++++---- src/diffusers/modeling_utils.py | 43 +++++++++++++++++++++------- tests/test_models_unet.py | 7 +++++ 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py index 053ccd6429e0..b90c5c994589 100644 --- a/src/diffusers/configuration_utils.py +++ b/src/diffusers/configuration_utils.py @@ -24,6 +24,7 @@ from huggingface_hub import hf_hub_download from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError +import accelerate from requests import HTTPError from . import __version__ @@ -90,11 +91,18 @@ def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool @classmethod def from_config(cls, pretrained_model_name_or_path: Union[str, os.PathLike], return_unused_kwargs=False, **kwargs): - config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) - - init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs) - - model = cls(**init_dict) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", None) + device_map = kwargs.pop("device_map", None) + if low_cpu_mem_usage: + with accelerate.init_empty_weights(): + config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) + init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs) + model = cls(**init_dict) + + else: + config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) + init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs) + model = cls(**init_dict) if return_unused_kwargs: return model, unused_kwargs diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index ec501e2ae1f8..91de97d6b7c0 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -22,6 +22,7 @@ from huggingface_hub import hf_hub_download from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError +import accelerate from requests import HTTPError from .utils import CONFIG_NAME, DIFFUSERS_CACHE, HUGGINGFACE_CO_RESOLVE_ENDPOINT, logging @@ -317,6 +318,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P from_auto_class = kwargs.pop("_from_auto", False) torch_dtype = kwargs.pop("torch_dtype", None) subfolder = kwargs.pop("subfolder", None) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", None) + device_map = kwargs.pop("device_map", None) user_agent = {"file_type": "model", "framework": "pytorch", "from_auto_class": from_auto_class} @@ -333,6 +336,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P use_auth_token=use_auth_token, revision=revision, subfolder=subfolder, + low_cpu_mem_usage=low_cpu_mem_usage, + device_map=device_map, **kwargs, ) @@ -415,25 +420,41 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P ) # restore default dtype - state_dict = load_state_dict(model_file) - model, missing_keys, unexpected_keys, mismatched_keys, error_msgs = cls._load_pretrained_model( - model, - state_dict, - model_file, - pretrained_model_name_or_path, - ignore_mismatched_sizes=ignore_mismatched_sizes, - ) - # Set model in evaluation mode to deactivate DropOut modules by default - model.eval() + if low_cpu_mem_usage: + accelerate.load_checkpoint_and_dispatch( + model, + model_file, + device_map + ) + loading_info = { + "missing_keys": [], + "unexpected_keys": [], + "mismatched_keys": [], + "error_msgs": [], + } + + else: + + state_dict = load_state_dict(model_file) + model, missing_keys, unexpected_keys, mismatched_keys, error_msgs = cls._load_pretrained_model( + model, + state_dict, + model_file, + pretrained_model_name_or_path, + ignore_mismatched_sizes=ignore_mismatched_sizes, + ) - if output_loading_info: loading_info = { "missing_keys": missing_keys, "unexpected_keys": unexpected_keys, "mismatched_keys": mismatched_keys, "error_msgs": error_msgs, } + + # Set model in evaluation mode to deactivate DropOut modules by default + model.eval() + if output_loading_info: return model, loading_info return model diff --git a/tests/test_models_unet.py b/tests/test_models_unet.py index c574a0092e3c..2da4d103744f 100644 --- a/tests/test_models_unet.py +++ b/tests/test_models_unet.py @@ -133,6 +133,13 @@ def test_from_pretrained_hub(self): assert image is not None, "Make sure output is not None" + def test_from_pretrained_accelerate(self): + model, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True, low_cpu_mem_usage=True, device_map="auto") + model.to(torch_device) + image = model(**self.dummy_input).sample + + assert image is not None, "Make sure output is not None" + def test_output_pretrained(self): model = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update") model.eval() From 7631dd68da13f886d87ba3e6a0e21224abc4475d Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Mon, 12 Sep 2022 11:39:33 -0300 Subject: [PATCH 02/12] remove low_cpu_mem_usage as it is reduntant --- setup.py | 1 + src/diffusers/configuration_utils.py | 5 ++--- src/diffusers/modeling_utils.py | 4 +--- tests/test_models_unet.py | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 7b71bd70d470..a6bf120db47e 100644 --- a/setup.py +++ b/setup.py @@ -97,6 +97,7 @@ "tensorboard", "torch>=1.4", "transformers>=4.21.0", + "accelerate>=0.12.0" ] # this is a lookup table with items like: diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py index b90c5c994589..0720ba586939 100644 --- a/src/diffusers/configuration_utils.py +++ b/src/diffusers/configuration_utils.py @@ -91,9 +91,8 @@ def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool @classmethod def from_config(cls, pretrained_model_name_or_path: Union[str, os.PathLike], return_unused_kwargs=False, **kwargs): - low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", None) - device_map = kwargs.pop("device_map", None) - if low_cpu_mem_usage: + device_map = kwargs.pop("low_cpu_mem_usage", None) + if device_map is not None: with accelerate.init_empty_weights(): config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs) diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index 91de97d6b7c0..325160bbdc9f 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -318,7 +318,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P from_auto_class = kwargs.pop("_from_auto", False) torch_dtype = kwargs.pop("torch_dtype", None) subfolder = kwargs.pop("subfolder", None) - low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", None) device_map = kwargs.pop("device_map", None) user_agent = {"file_type": "model", "framework": "pytorch", "from_auto_class": from_auto_class} @@ -336,7 +335,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P use_auth_token=use_auth_token, revision=revision, subfolder=subfolder, - low_cpu_mem_usage=low_cpu_mem_usage, device_map=device_map, **kwargs, ) @@ -421,7 +419,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P # restore default dtype - if low_cpu_mem_usage: + if device_map is not None: accelerate.load_checkpoint_and_dispatch( model, model_file, diff --git a/tests/test_models_unet.py b/tests/test_models_unet.py index 2da4d103744f..477307cfb324 100644 --- a/tests/test_models_unet.py +++ b/tests/test_models_unet.py @@ -134,7 +134,7 @@ def test_from_pretrained_hub(self): assert image is not None, "Make sure output is not None" def test_from_pretrained_accelerate(self): - model, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True, low_cpu_mem_usage=True, device_map="auto") + model, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto") model.to(torch_device) image = model(**self.dummy_input).sample From 8592e23c3a1f50775308be0c49214e5872a8d3ea Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Fri, 16 Sep 2022 12:26:23 -0300 Subject: [PATCH 03/12] move accelerate init weights context to modelling utils --- src/diffusers/configuration_utils.py | 14 ++------- src/diffusers/modeling_utils.py | 45 +++++++++++++++++++--------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py index 4aeacca75edb..0b48fac039e4 100644 --- a/src/diffusers/configuration_utils.py +++ b/src/diffusers/configuration_utils.py @@ -154,17 +154,9 @@ def from_config(cls, pretrained_model_name_or_path: Union[str, os.PathLike], ret """ - device_map = kwargs.pop("low_cpu_mem_usage", None) - if device_map is not None: - with accelerate.init_empty_weights(): - config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) - init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs) - model = cls(**init_dict) - - else: - config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) - init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs) - model = cls(**init_dict) + config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) + init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs) + model = cls(**init_dict) if return_unused_kwargs: return model, unused_kwargs diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index 81d73a8be36c..07e3cdf66346 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -266,20 +266,37 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P # Load config if we don't provide a configuration config_path = pretrained_model_name_or_path - model, unused_kwargs = cls.from_config( - config_path, - cache_dir=cache_dir, - return_unused_kwargs=True, - force_download=force_download, - resume_download=resume_download, - proxies=proxies, - local_files_only=local_files_only, - use_auth_token=use_auth_token, - revision=revision, - subfolder=subfolder, - device_map=device_map, - **kwargs, - ) + if device_map == "auto": + with accelerate.init_empty_weights(): + model, unused_kwargs = cls.from_config( + config_path, + cache_dir=cache_dir, + return_unused_kwargs=True, + force_download=force_download, + resume_download=resume_download, + proxies=proxies, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + revision=revision, + subfolder=subfolder, + device_map=device_map, + **kwargs, + ) + else: + model, unused_kwargs = cls.from_config( + config_path, + cache_dir=cache_dir, + return_unused_kwargs=True, + force_download=force_download, + resume_download=resume_download, + proxies=proxies, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + revision=revision, + subfolder=subfolder, + device_map=device_map, + **kwargs, + ) if torch_dtype is not None and not isinstance(torch_dtype, torch.dtype): raise ValueError( From 76b8e4a0a21957a275eefa3863ee39215f0f5f6c Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Fri, 16 Sep 2022 12:44:31 -0300 Subject: [PATCH 04/12] add test to ensure results are the same when loading with accelerate --- tests/test_models_unet.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/test_models_unet.py b/tests/test_models_unet.py index 8ea25560c80c..13a2bda73d44 100644 --- a/tests/test_models_unet.py +++ b/tests/test_models_unet.py @@ -15,6 +15,7 @@ import math import unittest +import gc import torch @@ -140,6 +141,35 @@ def test_from_pretrained_accelerate(self): assert image is not None, "Make sure output is not None" + def test_from_pretrained_accelerate_wont_change_results(self): + model_accelerate, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto") + model_accelerate.to(torch_device) + model_accelerate.eval() + + noise = torch.randn( + 1, + model_accelerate.config.in_channels, + model_accelerate.config.sample_size, + model_accelerate.config.sample_size, + generator=torch.manual_seed(0), + ) + noise = noise.to(torch_device) + time_step = torch.tensor([10] * noise.shape[0]).to(torch_device) + + arr_accelerate = model_accelerate(noise, time_step)["sample"] + + # two models don't need to stay in the device at the same time + del model_accelerate + torch.cuda.empty_cache() + gc.collect() + + model_normal_load, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True) + model_normal_load.to(torch_device) + model_normal_load.eval() + arr_normal_load = model_normal_load(noise, time_step)["sample"] + + assert torch.allclose(arr_accelerate["sample"], arr_normal_load, rtol=1e-3) + def test_output_pretrained(self): model = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update") model.eval() From dd7f9b9cc66c0bd46562849a453c7b4b37d56766 Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Fri, 16 Sep 2022 12:59:01 -0300 Subject: [PATCH 05/12] add tests to ensure ram usage gets lower when using accelerate --- tests/test_models_unet.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/test_models_unet.py b/tests/test_models_unet.py index 13a2bda73d44..d12b3017f3af 100644 --- a/tests/test_models_unet.py +++ b/tests/test_models_unet.py @@ -16,6 +16,7 @@ import math import unittest import gc +import tracemalloc import torch @@ -134,6 +135,7 @@ def test_from_pretrained_hub(self): assert image is not None, "Make sure output is not None" + @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") def test_from_pretrained_accelerate(self): model, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto") model.to(torch_device) @@ -141,6 +143,7 @@ def test_from_pretrained_accelerate(self): assert image is not None, "Make sure output is not None" + @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") def test_from_pretrained_accelerate_wont_change_results(self): model_accelerate, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto") model_accelerate.to(torch_device) @@ -170,6 +173,31 @@ def test_from_pretrained_accelerate_wont_change_results(self): assert torch.allclose(arr_accelerate["sample"], arr_normal_load, rtol=1e-3) + @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") + def test_memory_footprint_gets_reduced(self): + torch.cuda.empty_cache() + gc.collect() + + tracemalloc.start() + model_accelerate, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto") + model_accelerate.to(torch_device) + model_accelerate.eval() + _, peak_accelerate = tracemalloc.get_traced_memory() + + + del model_accelerate + torch.cuda.empty_cache() + gc.collect() + + model_normal_load, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True) + model_normal_load.to(torch_device) + model_normal_load.eval() + _, peak_normal = tracemalloc.get_traced_memory() + + tracemalloc.stop() + + assert peak_accelerate < peak_normal + def test_output_pretrained(self): model = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update") model.eval() From ec5f7aa7583db1e6f8195bdc3afb2be928e33380 Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Fri, 16 Sep 2022 13:07:03 -0300 Subject: [PATCH 06/12] move accelerate logic to single snippet under modelling utils and remove it from configuration utils --- src/diffusers/configuration_utils.py | 1 - src/diffusers/modeling_utils.py | 82 ++++++++++++++-------------- 2 files changed, 41 insertions(+), 42 deletions(-) diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py index 0b48fac039e4..c6082a50c2d1 100644 --- a/src/diffusers/configuration_utils.py +++ b/src/diffusers/configuration_utils.py @@ -24,7 +24,6 @@ from huggingface_hub import hf_hub_download from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError -import accelerate from requests import HTTPError from . import __version__ diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index 07e3cdf66346..54044670c0cc 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -266,46 +266,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P # Load config if we don't provide a configuration config_path = pretrained_model_name_or_path - if device_map == "auto": - with accelerate.init_empty_weights(): - model, unused_kwargs = cls.from_config( - config_path, - cache_dir=cache_dir, - return_unused_kwargs=True, - force_download=force_download, - resume_download=resume_download, - proxies=proxies, - local_files_only=local_files_only, - use_auth_token=use_auth_token, - revision=revision, - subfolder=subfolder, - device_map=device_map, - **kwargs, - ) - else: - model, unused_kwargs = cls.from_config( - config_path, - cache_dir=cache_dir, - return_unused_kwargs=True, - force_download=force_download, - resume_download=resume_download, - proxies=proxies, - local_files_only=local_files_only, - use_auth_token=use_auth_token, - revision=revision, - subfolder=subfolder, - device_map=device_map, - **kwargs, - ) - - if torch_dtype is not None and not isinstance(torch_dtype, torch.dtype): - raise ValueError( - f"{torch_dtype} needs to be of type `torch.dtype`, e.g. `torch.float16`, but is {type(torch_dtype)}." - ) - elif torch_dtype is not None: - model = model.to(torch_dtype) - model.register_to_config(_name_or_path=pretrained_model_name_or_path) # This variable will flag if we're loading a sharded checkpoint. In this case the archive file is just the # Load model pretrained_model_name_or_path = str(pretrained_model_name_or_path) @@ -378,20 +339,50 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P # restore default dtype - if device_map is not None: + if device_map == "auto": + with accelerate.init_empty_weights(): + model, unused_kwargs = cls.from_config( + config_path, + cache_dir=cache_dir, + return_unused_kwargs=True, + force_download=force_download, + resume_download=resume_download, + proxies=proxies, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + revision=revision, + subfolder=subfolder, + device_map=device_map, + **kwargs, + ) + accelerate.load_checkpoint_and_dispatch( model, model_file, device_map ) + loading_info = { "missing_keys": [], "unexpected_keys": [], "mismatched_keys": [], "error_msgs": [], } - else: + model, unused_kwargs = cls.from_config( + config_path, + cache_dir=cache_dir, + return_unused_kwargs=True, + force_download=force_download, + resume_download=resume_download, + proxies=proxies, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + revision=revision, + subfolder=subfolder, + device_map=device_map, + **kwargs, + ) state_dict = load_state_dict(model_file) model, missing_keys, unexpected_keys, mismatched_keys, error_msgs = cls._load_pretrained_model( @@ -409,6 +400,15 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P "error_msgs": error_msgs, } + if torch_dtype is not None and not isinstance(torch_dtype, torch.dtype): + raise ValueError( + f"{torch_dtype} needs to be of type `torch.dtype`, e.g. `torch.float16`, but is {type(torch_dtype)}." + ) + elif torch_dtype is not None: + model = model.to(torch_dtype) + + model.register_to_config(_name_or_path=pretrained_model_name_or_path) + # Set model in evaluation mode to deactivate DropOut modules by default model.eval() if output_loading_info: From 8392e3ff65e4cfa6f912ef72fe7afcd71c2aeb91 Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Fri, 16 Sep 2022 15:27:24 -0300 Subject: [PATCH 07/12] format code using to pass quality check --- src/diffusers/modeling_utils.py | 12 ++++-------- tests/test_models_unet.py | 21 +++++++++++++-------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index c4e5799e11c7..53c0eedcdad0 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -359,12 +359,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P device_map=device_map, **kwargs, ) - - accelerate.load_checkpoint_and_dispatch( - model, - model_file, - device_map - ) + + accelerate.load_checkpoint_and_dispatch(model, model_file, device_map) loading_info = { "missing_keys": [], @@ -387,7 +383,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P device_map=device_map, **kwargs, ) - + state_dict = load_state_dict(model_file) model, missing_keys, unexpected_keys, mismatched_keys, error_msgs = cls._load_pretrained_model( model, @@ -412,7 +408,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P model = model.to(torch_dtype) model.register_to_config(_name_or_path=pretrained_model_name_or_path) - + # Set model in evaluation mode to deactivate DropOut modules by default model.eval() if output_loading_info: diff --git a/tests/test_models_unet.py b/tests/test_models_unet.py index 012ba126a2ba..82e03630cdcb 100644 --- a/tests/test_models_unet.py +++ b/tests/test_models_unet.py @@ -137,7 +137,9 @@ def test_from_pretrained_hub(self): @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") def test_from_pretrained_accelerate(self): - model, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto") + model, _ = UNet2DModel.from_pretrained( + "fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto" + ) model.to(torch_device) image = model(**self.dummy_input).sample @@ -145,7 +147,9 @@ def test_from_pretrained_accelerate(self): @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") def test_from_pretrained_accelerate_wont_change_results(self): - model_accelerate, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto") + model_accelerate, _ = UNet2DModel.from_pretrained( + "fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto" + ) model_accelerate.to(torch_device) model_accelerate.eval() @@ -170,20 +174,21 @@ def test_from_pretrained_accelerate_wont_change_results(self): model_normal_load.to(torch_device) model_normal_load.eval() arr_normal_load = model_normal_load(noise, time_step)["sample"] - + assert torch.allclose(arr_accelerate["sample"], arr_normal_load, rtol=1e-3) @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") def test_memory_footprint_gets_reduced(self): torch.cuda.empty_cache() gc.collect() - + tracemalloc.start() - model_accelerate, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto") + model_accelerate, _ = UNet2DModel.from_pretrained( + "fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto" + ) model_accelerate.to(torch_device) model_accelerate.eval() - _, peak_accelerate = tracemalloc.get_traced_memory() - + _, peak_accelerate = tracemalloc.get_traced_memory() del model_accelerate torch.cuda.empty_cache() @@ -192,7 +197,7 @@ def test_memory_footprint_gets_reduced(self): model_normal_load, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True) model_normal_load.to(torch_device) model_normal_load.eval() - _, peak_normal = tracemalloc.get_traced_memory() + _, peak_normal = tracemalloc.get_traced_memory() tracemalloc.stop() From 615054affbfb6cf22a13bedd65879908c8421ee0 Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Fri, 16 Sep 2022 15:30:37 -0300 Subject: [PATCH 08/12] fix imports with isor --- src/diffusers/modeling_utils.py | 2 +- tests/test_models_unet.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index 53c0eedcdad0..e2f55e5fd1d1 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -20,9 +20,9 @@ import torch from torch import Tensor, device +import accelerate from huggingface_hub import hf_hub_download from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError -import accelerate from requests import HTTPError from .utils import CONFIG_NAME, DIFFUSERS_CACHE, HUGGINGFACE_CO_RESOLVE_ENDPOINT, logging diff --git a/tests/test_models_unet.py b/tests/test_models_unet.py index 82e03630cdcb..fe49916a7146 100644 --- a/tests/test_models_unet.py +++ b/tests/test_models_unet.py @@ -13,10 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import math -import unittest import gc +import math import tracemalloc +import unittest import torch From 75c08a94625f45b8f5f1b707608eeb799112ca07 Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Fri, 16 Sep 2022 16:19:46 -0300 Subject: [PATCH 09/12] add accelerate to test extra deps --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7f4a8034317a..cafc6b0ec909 100644 --- a/setup.py +++ b/setup.py @@ -175,7 +175,7 @@ def run(self): extras["quality"] = ["black==22.8", "isort>=5.5.4", "flake8>=3.8.3", "hf-doc-builder"] extras["docs"] = ["hf-doc-builder"] extras["training"] = ["accelerate", "datasets", "tensorboard", "modelcards"] -extras["test"] = ["datasets", "onnxruntime", "pytest", "pytest-timeout", "pytest-xdist", "scipy", "transformers"] +extras["test"] = ["datasets", "onnxruntime", "pytest", "pytest-timeout", "pytest-xdist", "scipy", "transformers", "accelerate"] extras["torch"] = deps_list("torch") if os.name == "nt": # windows From 6189b86f06574b320c02a330c5f9543f12f3b53e Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Wed, 21 Sep 2022 11:25:08 -0300 Subject: [PATCH 10/12] only import accelerate if device_map is set to auto --- src/diffusers/modeling_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index e2f55e5fd1d1..dcc59c90578b 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -20,10 +20,10 @@ import torch from torch import Tensor, device -import accelerate from huggingface_hub import hf_hub_download from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError from requests import HTTPError +from transformers.utils import is_accelerate_available from .utils import CONFIG_NAME, DIFFUSERS_CACHE, HUGGINGFACE_CO_RESOLVE_ENDPOINT, logging @@ -344,6 +344,11 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P # restore default dtype if device_map == "auto": + if is_accelerate_available(): + import accelerate + else: + raise ImportError("Please install accelerate via `pip install accelerate`") + with accelerate.init_empty_weights(): model, unused_kwargs = cls.from_config( config_path, From bc510615c7aa68c26c1fa5050fdc23b8cda6cc9b Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Thu, 22 Sep 2022 11:08:31 -0300 Subject: [PATCH 11/12] move accelerate availability check to diffusers import utils --- src/diffusers/modeling_utils.py | 2 +- src/diffusers/utils/__init__.py | 1 + src/diffusers/utils/import_utils.py | 9 +++++++++ tests/test_models_unet.py | 2 +- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index 7cb9ee7f4f0f..bd9c4fe34ab3 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -23,7 +23,7 @@ from huggingface_hub import hf_hub_download from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError from requests import HTTPError -from transformers.utils import is_accelerate_available +from diffusers.utils import is_accelerate_available from .utils import CONFIG_NAME, DIFFUSERS_CACHE, HUGGINGFACE_CO_RESOLVE_ENDPOINT, WEIGHTS_NAME, logging diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py index b63dbd2b285c..32ab54079046 100644 --- a/src/diffusers/utils/__init__.py +++ b/src/diffusers/utils/__init__.py @@ -32,6 +32,7 @@ is_transformers_available, is_unidecode_available, requires_backends, + is_accelerate_available ) from .logging import get_logger from .outputs import BaseOutput diff --git a/src/diffusers/utils/import_utils.py b/src/diffusers/utils/import_utils.py index de344d074da0..f5a08a916eea 100644 --- a/src/diffusers/utils/import_utils.py +++ b/src/diffusers/utils/import_utils.py @@ -159,6 +159,12 @@ except importlib_metadata.PackageNotFoundError: _scipy_available = False +_accelerate_available = importlib.util.find_spec("accelerate") is not None +try: + _accelerate_version = importlib_metadata.version("accelerate") + logger.debug(f"Successfully imported accelerate version {_accelerate_version}") +except importlib_metadata.PackageNotFoundError: + _accelerate_available = False def is_torch_available(): return _torch_available @@ -195,6 +201,9 @@ def is_onnx_available(): def is_scipy_available(): return _scipy_available +def is_accelerate_available(): + return _accelerate_available + # docstyle-ignore FLAX_IMPORT_ERROR = """ diff --git a/tests/test_models_unet.py b/tests/test_models_unet.py index cd4eb3fb782d..59af3832b7a0 100644 --- a/tests/test_models_unet.py +++ b/tests/test_models_unet.py @@ -175,7 +175,7 @@ def test_from_pretrained_accelerate_wont_change_results(self): model_normal_load.eval() arr_normal_load = model_normal_load(noise, time_step)["sample"] - assert torch.allclose(arr_accelerate["sample"], arr_normal_load, rtol=1e-3) + assert torch.allclose(arr_accelerate, arr_normal_load, rtol=1e-3) @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") def test_memory_footprint_gets_reduced(self): From e020d73359ce3765fa83156c4b40f2e8bb3049d6 Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Thu, 22 Sep 2022 11:17:53 -0300 Subject: [PATCH 12/12] format code --- src/diffusers/modeling_utils.py | 2 +- src/diffusers/utils/__init__.py | 2 +- src/diffusers/utils/import_utils.py | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index c7d4cd6f5e9e..4d609043d731 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -21,10 +21,10 @@ import torch from torch import Tensor, device +from diffusers.utils import is_accelerate_available from huggingface_hub import hf_hub_download from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError from requests import HTTPError -from diffusers.utils import is_accelerate_available from .utils import CONFIG_NAME, DIFFUSERS_CACHE, HUGGINGFACE_CO_RESOLVE_ENDPOINT, WEIGHTS_NAME, logging diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py index 32ab54079046..9c428b6d7852 100644 --- a/src/diffusers/utils/__init__.py +++ b/src/diffusers/utils/__init__.py @@ -22,6 +22,7 @@ USE_TF, USE_TORCH, DummyObject, + is_accelerate_available, is_flax_available, is_inflect_available, is_modelcards_available, @@ -32,7 +33,6 @@ is_transformers_available, is_unidecode_available, requires_backends, - is_accelerate_available ) from .logging import get_logger from .outputs import BaseOutput diff --git a/src/diffusers/utils/import_utils.py b/src/diffusers/utils/import_utils.py index f5a08a916eea..b2aabee70c92 100644 --- a/src/diffusers/utils/import_utils.py +++ b/src/diffusers/utils/import_utils.py @@ -166,6 +166,7 @@ except importlib_metadata.PackageNotFoundError: _accelerate_available = False + def is_torch_available(): return _torch_available @@ -201,6 +202,7 @@ def is_onnx_available(): def is_scipy_available(): return _scipy_available + def is_accelerate_available(): return _accelerate_available