From 52f83d44211fe98c05429df8f44fe3252faffdaf Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 5 Feb 2023 18:06:37 -0800 Subject: [PATCH 1/3] =?UTF-8?q?lint:=20=F0=9F=9A=AE=20typos=20and=20unused?= =?UTF-8?q?=20imports=20and=20type=20hints?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ldm/generate.py | 2 +- ldm/invoke/generator/diffusers_pipeline.py | 4 ++-- ldm/invoke/model_manager.py | 16 +++++++--------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 002ba47a97b..e253c8ab5e4 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -211,7 +211,7 @@ def __init__( print('>> xformers memory-efficient attention is available but disabled') else: print('>> xformers not installed') - + # model caching system for fast switching self.model_manager = ModelManager(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models) # don't accept invalid models diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index f8efe03762c..4be05e22bbd 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -28,7 +28,7 @@ from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers.scheduling_utils import SchedulerMixin, SchedulerOutput -from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler +from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils.outputs import BaseOutput from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer @@ -271,7 +271,7 @@ def __init__( text_encoder: CLIPTextModel, tokenizer: CLIPTokenizer, unet: UNet2DConditionModel, - scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], + scheduler: KarrasDiffusionSchedulers, safety_checker: Optional[StableDiffusionSafetyChecker], feature_extractor: Optional[CLIPFeatureExtractor], requires_safety_checker: bool = False, diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 43854d7938f..d2c915eb1c4 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -15,24 +15,24 @@ import textwrap import time import warnings -import safetensors.torch from pathlib import Path from shutil import move, rmtree from typing import Any, Optional, Union -from huggingface_hub import scan_cache_dir -from ldm.util import download_with_progress_bar -import torch import safetensors +import safetensors.torch +import torch import transformers from diffusers import AutoencoderKL, logging as dlogging from diffusers.utils.logging import get_verbosity, set_verbosity, set_verbosity_error +from huggingface_hub import scan_cache_dir from omegaconf import OmegaConf from omegaconf.dictconfig import DictConfig from picklescan.scanner import scan_file_path from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline from ldm.invoke.globals import Globals, global_models_dir, global_autoscan_dir, global_cache_dir +from ldm.util import download_with_progress_bar from ldm.util import instantiate_from_config, ask_user DEFAULT_MAX_MODELS=2 @@ -43,7 +43,7 @@ class ModelManager(object): def __init__(self, config:OmegaConf, - device_type:str='cpu', + device_type:str | torch.device='cpu', precision:str='float16', max_loaded_models=DEFAULT_MAX_MODELS): ''' @@ -697,7 +697,6 @@ def convert_and_import(self, ''' new_config = None from ldm.invoke.ckpt_to_diffuser import convert_ckpt_to_diffuser - import transformers if diffusers_path.exists(): print(f'ERROR: The path {str(diffusers_path)} already exists. Please move or remove it and try again.') return @@ -706,7 +705,7 @@ def convert_and_import(self, model_description = model_description or 'Optimized version of {model_name}' print(f'>> Optimizing {model_name} (30-60s)') try: - # By passing the specified VAE too the conversion function, the autoencoder + # By passing the specified VAE to the conversion function, the autoencoder # will be built into the model rather than tacked on afterward via the config file vae_model = self._load_vae(vae) if vae else None convert_ckpt_to_diffuser( @@ -753,7 +752,6 @@ def search_models(self, search_folder): return search_folder, found_models def _choose_diffusers_vae(self, model_name:str, vae:str=None)->Union[dict,str]: - # In the event that the original entry is using a custom ckpt VAE, we try to # map that VAE onto a diffuser VAE using a hard-coded dictionary. # I would prefer to do this differently: We load the ckpt model into memory, swap the @@ -1066,7 +1064,7 @@ def _delete_model_from_cache(repo_id): strategy.execute() @staticmethod - def _abs_path(path:Union(str,Path))->Path: + def _abs_path(path: str | Path)->Path: if path is None or Path(path).is_absolute(): return path return Path(Globals.root,path).resolve() From 802ad1de7ee6bf6a65256991b904931779a16d1e Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 5 Feb 2023 18:09:58 -0800 Subject: [PATCH 2/3] fix: enable `free_gpu_mem` for diffusers --- ldm/generate.py | 7 ++--- ldm/invoke/generator/diffusers_pipeline.py | 31 ++++++++++++++++++---- ldm/invoke/model_manager.py | 19 ++++++++++--- 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index e253c8ab5e4..cf128ffd820 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -213,7 +213,9 @@ def __init__( print('>> xformers not installed') # model caching system for fast switching - self.model_manager = ModelManager(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models) + self.model_manager = ModelManager(mconfig, self.device, self.precision, + max_loaded_models=max_loaded_models, + free_gpu_mem=self.free_gpu_mem) # don't accept invalid models fallback = self.model_manager.default_model() or FALLBACK_MODEL_NAME model = model or fallback @@ -478,8 +480,7 @@ def process_image(image,seed): self.model.cond_stage_model.device = self.model.device self.model.cond_stage_model.to(self.model.device) except AttributeError: - print(">> Warning: '--free_gpu_mem' is not yet supported when generating image using model based on HuggingFace Diffuser.") - pass + pass # free_gpu_mem is handled by model_manager for diffusers try: uc, c, extra_conditioning_info = get_uc_and_c_and_ec( diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 4be05e22bbd..d67a528fac5 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -360,7 +360,7 @@ def latents_from_embeddings(self, latents: torch.Tensor, num_inference_steps: in callback: Callable[[PipelineIntermediateState], None] = None ) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]: if timesteps is None: - self.scheduler.set_timesteps(num_inference_steps, device=self.unet.device) + self.scheduler.set_timesteps(num_inference_steps, device=self.device) timesteps = self.scheduler.timesteps infer_latents_from_embeddings = GeneratorToCallbackinator(self.generate_latents_from_embeddings, PipelineIntermediateState) result: PipelineIntermediateState = infer_latents_from_embeddings( @@ -379,6 +379,8 @@ def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, additional_guidance: List[Callable] = None): if run_id is None: run_id = secrets.token_urlsafe(self.ID_LENGTH) + assert not latents.is_meta + assert not noise.is_meta if additional_guidance is None: additional_guidance = [] extra_conditioning_info = conditioning_data.extra @@ -391,7 +393,7 @@ def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, batch_size = latents.shape[0] batched_t = torch.full((batch_size,), timesteps[0], - dtype=timesteps.dtype, device=self.unet.device) + dtype=timesteps.dtype, device=self.device) latents = self.scheduler.add_noise(latents, noise, batched_t) attention_map_saver: Optional[AttentionMapSaver] = None @@ -488,7 +490,7 @@ def img2img_from_embeddings(self, init_image = einops.rearrange(init_image, 'c h w -> 1 c h w') # 6. Prepare latent variables - device = self.unet.device + device = self.device latents_dtype = self.unet.dtype initial_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype) noise = noise_func(initial_latents) @@ -503,7 +505,7 @@ def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_ste strength, noise: torch.Tensor, run_id=None, callback=None ) -> InvokeAIStableDiffusionPipelineOutput: - timesteps, _ = self.get_img2img_timesteps(num_inference_steps, strength, self.unet.device) + timesteps, _ = self.get_img2img_timesteps(num_inference_steps, strength, self.device) result_latents, result_attention_maps = self.latents_from_embeddings( initial_latents, num_inference_steps, conditioning_data, timesteps=timesteps, @@ -542,7 +544,7 @@ def inpaint_from_embeddings( run_id=None, noise_func=None, ) -> InvokeAIStableDiffusionPipelineOutput: - device = self.unet.device + device = self.device latents_dtype = self.unet.dtype if isinstance(init_image, PIL.Image.Image): @@ -657,6 +659,25 @@ def channels(self) -> int: """Compatible with DiffusionWrapper""" return self.unet.in_channels + @property + def device(self) -> torch.device: + maybe_device = super().device + if maybe_device.type != 'meta': + return maybe_device + # copied from StableDiffusionPipeline._execution_device: + # Returns the device on which the pipeline's models will be executed. After calling + # `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred + # from Accelerate's module hooks. + # FIXME: poking around in implementation details of accelerate hooks is Bad News + for module in self.unet.modules(): + if ( + hasattr(module, "_hf_hook") + and hasattr(module._hf_hook, "execution_device") + and module._hf_hook.execution_device is not None + ): + return torch.device(module._hf_hook.execution_device) + return maybe_device + def debug_latents(self, latents, msg): with torch.inference_mode(): from ldm.util import debug_image diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index d2c915eb1c4..c81c82ea11c 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -23,7 +23,7 @@ import safetensors.torch import torch import transformers -from diffusers import AutoencoderKL, logging as dlogging +from diffusers import AutoencoderKL, logging as dlogging, DiffusionPipeline from diffusers.utils.logging import get_verbosity, set_verbosity, set_verbosity_error from huggingface_hub import scan_cache_dir from omegaconf import OmegaConf @@ -45,7 +45,8 @@ def __init__(self, config:OmegaConf, device_type:str | torch.device='cpu', precision:str='float16', - max_loaded_models=DEFAULT_MAX_MODELS): + max_loaded_models=DEFAULT_MAX_MODELS, + free_gpu_mem=False): ''' Initialize with the path to the models.yaml config file, the torch device type, and precision. The optional @@ -58,6 +59,7 @@ def __init__(self, self.config = config self.precision = precision self.device = torch.device(device_type) + self.free_gpu_mem = free_gpu_mem self.max_loaded_models = max_loaded_models self.models = {} self.stack = [] # this is an LRU FIFO @@ -497,6 +499,9 @@ def _load_diffusers_model(self, mconfig): pipeline.to(self.device) + if self.free_gpu_mem: + pipeline.enable_sequential_cpu_offload() + model_hash = self._diffuser_sha256(name_or_path) # square images??? @@ -899,10 +904,14 @@ def _invalidate_cached_model(self,model_name:str) -> None: self.stack.remove(model_name) self.models.pop(model_name,None) - def _model_to_cpu(self,model): + def _model_to_cpu(self, model): if self.device == 'cpu': return model + if isinstance(model, DiffusionPipeline) and self.free_gpu_mem: + # diffusers CPU offloading is being handled by accelerate + return model + # diffusers really really doesn't like us moving a float16 model onto CPU verbosity = get_verbosity() set_verbosity_error() @@ -921,6 +930,10 @@ def _model_from_cpu(self,model): if self.device == 'cpu': return model + if isinstance(model, DiffusionPipeline) and self.free_gpu_mem: + # diffusers CPU offloading is being handled by accelerate + return model + model.to(self.device) model.cond_stage_model.device = self.device From ea88b1040297ca93c7037d7515a4624a47e84520 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 5 Feb 2023 20:32:36 -0800 Subject: [PATCH 3/3] fix: fix device used by prompt-to-embeddings --- ldm/modules/prompt_to_embeddings_converter.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ldm/modules/prompt_to_embeddings_converter.py b/ldm/modules/prompt_to_embeddings_converter.py index dea15d61b43..cb19b4b3f8f 100644 --- a/ldm/modules/prompt_to_embeddings_converter.py +++ b/ldm/modules/prompt_to_embeddings_converter.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import math import torch @@ -7,7 +9,7 @@ from ldm.modules.textual_inversion_manager import TextualInversionManager -class WeightedPromptFragmentsToEmbeddingsConverter(): +class WeightedPromptFragmentsToEmbeddingsConverter: def __init__(self, tokenizer: CLIPTokenizer, # converts strings to lists of int token ids @@ -159,7 +161,7 @@ def apply_embedding_weights(self, embeddings: torch.Tensor, per_embedding_weight # lerped embeddings has shape (77, 768) - def get_token_ids_and_expand_weights(self, fragments: list[str], weights: list[float], device: str) -> (torch.Tensor, torch.Tensor): + def get_token_ids_and_expand_weights(self, fragments: list[str], weights: list[float], device: str | torch.device) -> (torch.Tensor, torch.Tensor): ''' Given a list of text fragments and corresponding weights: tokenize each fragment, append the token sequences together and return a padded token sequence starting with the bos marker, ending with the eos marker, and padded @@ -208,7 +210,7 @@ def get_token_ids_and_expand_weights(self, fragments: list[str], weights: list[f per_token_weights += [1.0] * pad_length all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long, device=device) - per_token_weights_tensor = torch.tensor(per_token_weights, dtype=torch_dtype(self.text_encoder.device), device=device) + per_token_weights_tensor = torch.tensor(per_token_weights, dtype=torch_dtype(device), device=device) #print(f"assembled all_token_ids_tensor with shape {all_token_ids_tensor.shape}") return all_token_ids_tensor, per_token_weights_tensor