From a747d7c3fd24c4945418f0a329eab96af44d306f Mon Sep 17 00:00:00 2001 From: Benjamin Lefaudeux Date: Wed, 30 Nov 2022 08:28:30 +0000 Subject: [PATCH 1/2] Moving the mem efficiient attention activation to the top + recursive --- examples/community/lpw_stable_diffusion.py | 18 ---------- examples/community/sd_text2img_k_diffusion.py | 18 ---------- examples/community/text_inpainting.py | 18 ---------- src/diffusers/models/attention.py | 16 ++------- src/diffusers/models/unet_2d_blocks.py | 12 ------- src/diffusers/models/unet_2d_condition.py | 11 ------ src/diffusers/pipeline_utils.py | 35 +++++++++++++++++++ .../alt_diffusion/pipeline_alt_diffusion.py | 18 ---------- .../pipeline_alt_diffusion_img2img.py | 18 ---------- .../pipeline_cycle_diffusion.py | 19 ---------- .../pipeline_stable_diffusion.py | 18 ---------- ...peline_stable_diffusion_image_variation.py | 20 ----------- .../pipeline_stable_diffusion_img2img.py | 20 ----------- .../pipeline_stable_diffusion_inpaint.py | 20 ----------- ...ipeline_stable_diffusion_inpaint_legacy.py | 20 ----------- .../pipeline_stable_diffusion_upscale.py | 19 ---------- .../pipeline_stable_diffusion_safe.py | 18 ---------- .../versatile_diffusion/modeling_text_unet.py | 23 ------------ ...ipeline_versatile_diffusion_dual_guided.py | 20 ----------- ...ine_versatile_diffusion_image_variation.py | 20 ----------- ...eline_versatile_diffusion_text_to_image.py | 20 ----------- 21 files changed, 37 insertions(+), 364 deletions(-) diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py index 0e7dc9e1ed11..bad811a1f0a7 100644 --- a/examples/community/lpw_stable_diffusion.py +++ b/examples/community/lpw_stable_diffusion.py @@ -488,24 +488,6 @@ def __init__( feature_extractor=feature_extractor, ) - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" Enable sliced attention computation. diff --git a/examples/community/sd_text2img_k_diffusion.py b/examples/community/sd_text2img_k_diffusion.py index 9592f7879ff0..beb7103c15ce 100755 --- a/examples/community/sd_text2img_k_diffusion.py +++ b/examples/community/sd_text2img_k_diffusion.py @@ -106,24 +106,6 @@ def set_sampler(self, scheduler_type: str): sampling = getattr(library, "sampling") self.sampler = getattr(sampling, scheduler_type) - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" Enable sliced attention computation. diff --git a/examples/community/text_inpainting.py b/examples/community/text_inpainting.py index a4368f8b4365..f02d449fbd1d 100644 --- a/examples/community/text_inpainting.py +++ b/examples/community/text_inpainting.py @@ -183,24 +183,6 @@ def _execution_device(self): return torch.device(module._hf_hook.execution_device) return self.device - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - @torch.no_grad() def __call__( self, diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py index e9454a467af1..34a2ad3c9919 100644 --- a/src/diffusers/models/attention.py +++ b/src/diffusers/models/attention.py @@ -246,10 +246,6 @@ def forward(self, hidden_states, encoder_hidden_states=None, timestep=None, retu return Transformer2DModelOutput(sample=output) - def _set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): - for block in self.transformer_blocks: - block._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - class AttentionBlock(nn.Module): """ @@ -428,7 +424,7 @@ def __init__( # if xformers is installed try to use memory_efficient_attention by default if is_xformers_available(): try: - self._set_use_memory_efficient_attention_xformers(True) + self.set_use_memory_efficient_attention_xformers(True) except Exception as e: warnings.warn( "Could not enable memory efficient attention. Make sure xformers is installed" @@ -439,7 +435,7 @@ def _set_attention_slice(self, slice_size): self.attn1._slice_size = slice_size self.attn2._slice_size = slice_size - def _set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): + def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): if not is_xformers_available(): print("Here is how to install it") raise ModuleNotFoundError( @@ -849,11 +845,3 @@ def forward(self, hidden_states, encoder_hidden_states, timestep=None, return_di return (output_states,) return Transformer2DModelOutput(sample=output_states) - - def _set_attention_slice(self, slice_size): - for transformer in self.transformers: - transformer._set_attention_slice(slice_size) - - def _set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): - for transformer in self.transformers: - transformer._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index cce7e7fd5a90..d78804b18e75 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -418,10 +418,6 @@ def set_attention_slice(self, slice_size): for attn in self.attentions: attn._set_attention_slice(slice_size) - def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): - for attn in self.attentions: - attn._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - def forward(self, hidden_states, temb=None, encoder_hidden_states=None): hidden_states = self.resnets[0](hidden_states, temb) for attn, resnet in zip(self.attentions, self.resnets[1:]): @@ -616,10 +612,6 @@ def set_attention_slice(self, slice_size): for attn in self.attentions: attn._set_attention_slice(slice_size) - def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): - for attn in self.attentions: - attn._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - def forward(self, hidden_states, temb=None, encoder_hidden_states=None): output_states = () @@ -1217,10 +1209,6 @@ def set_attention_slice(self, slice_size): self.gradient_checkpointing = False - def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): - for attn in self.attentions: - attn._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - def forward( self, hidden_states, diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py index 1b43f960d997..43f032729ba6 100644 --- a/src/diffusers/models/unet_2d_condition.py +++ b/src/diffusers/models/unet_2d_condition.py @@ -252,17 +252,6 @@ def set_attention_slice(self, slice_size): if hasattr(block, "attentions") and block.attentions is not None: block.set_attention_slice(slice_size) - def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): - for block in self.down_blocks: - if hasattr(block, "attentions") and block.attentions is not None: - block.set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - - self.mid_block.set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - - for block in self.up_blocks: - if hasattr(block, "attentions") and block.attentions is not None: - block.set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - def _set_gradient_checkpointing(self, module, value=False): if isinstance(module, (CrossAttnDownBlock2D, DownBlock2D, CrossAttnUpBlock2D, UpBlock2D)): module.gradient_checkpointing = value diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 01bcc6a33803..c8ac46250268 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -784,3 +784,38 @@ def progress_bar(self, iterable=None, total=None): def set_progress_bar_config(self, **kwargs): self._progress_bar_config = kwargs + + def enable_xformers_memory_efficient_attention(self): + r""" + Enable memory efficient attention as implemented in xformers. + + When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference + time. Speed up at training time is not guaranteed. + + Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention + is used. + """ + self.set_use_memory_efficient_attention_xformers(True) + + def disable_xformers_memory_efficient_attention(self): + r""" + Disable memory efficient attention as implemented in xformers. + """ + self.set_use_memory_efficient_attention_xformers(False) + + def set_use_memory_efficient_attention_xformers(self, valid: bool) -> None: + # Recursively walk through all the children. + # Any children which exposes the set_use_memory_efficient_attention_xformers method + # gets the message + def fn_recursive_set_mem_eff(module: torch.nn.Module): + if hasattr(module, "set_use_memory_efficient_attention_xformers"): + module.set_use_memory_efficient_attention_xformers(valid) + + for child in module.children(): + fn_recursive_set_mem_eff(child) + + module_names, _, _ = self.extract_init_dict(dict(self.config)) + for module_name in module_names: + module = getattr(self, module_name) + if isinstance(module, torch.nn.Module): + fn_recursive_set_mem_eff(module) diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py index 9146d45bd3c5..7e55c1c1140b 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py @@ -166,24 +166,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" Enable sliced attention computation. diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py index 16dbd626cded..b0657c30c89f 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -251,24 +251,6 @@ def _execution_device(self): return torch.device(module._hf_hook.execution_device) return self.device - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): r""" Encodes the prompt into text encoder hidden states. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index 9ebbc249f6d2..227ebca146e5 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -285,25 +285,6 @@ def _execution_device(self): return torch.device(module._hf_hook.execution_device) return self.device - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index afaef6f481fb..1d616622aa42 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -165,24 +165,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" Enable sliced attention computation. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py index e64a572a87af..19dad28bfa9b 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py @@ -134,26 +134,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index a25acc0bd1a7..51385efeefe6 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -254,26 +254,6 @@ def _execution_device(self): return torch.device(module._hf_hook.execution_device) return self.device - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): r""" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 6cb2766bc22e..a61d0e3619c4 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -300,26 +300,6 @@ def enable_sequential_cpu_offload(self, gpu_id=0): # fix by only offloading self.safety_checker for now cpu_offload(self.safety_checker.vision_model, device) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - @property # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device def _execution_device(self): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 2440b6d5ad73..525bae55afc5 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -248,26 +248,6 @@ def enable_sequential_cpu_offload(self, gpu_id=0): # fix by only offloading self.safety_checker for now cpu_offload(self.safety_checker.vision_model, device) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - @property # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device def _execution_device(self): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py index c9c238ce9a1f..9f95c7e9e824 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py @@ -143,25 +143,6 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) @property # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device diff --git a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py index 7f08e4010373..d83bcf510f84 100644 --- a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +++ b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py @@ -182,24 +182,6 @@ def safety_concept(self, concept): """ self._safety_text_concept = concept - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" Enable sliced attention computation. diff --git a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py index 37a79b5c1be7..e782274d82f7 100644 --- a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +++ b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py @@ -330,17 +330,6 @@ def set_attention_slice(self, slice_size): if hasattr(block, "attentions") and block.attentions is not None: block.set_attention_slice(slice_size) - def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): - for block in self.down_blocks: - if hasattr(block, "attentions") and block.attentions is not None: - block.set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - - self.mid_block.set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - - for block in self.up_blocks: - if hasattr(block, "attentions") and block.attentions is not None: - block.set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - def _set_gradient_checkpointing(self, module, value=False): if isinstance(module, (CrossAttnDownBlockFlat, DownBlockFlat, CrossAttnUpBlockFlat, UpBlockFlat)): module.gradient_checkpointing = value @@ -761,10 +750,6 @@ def set_attention_slice(self, slice_size): for attn in self.attentions: attn._set_attention_slice(slice_size) - def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): - for attn in self.attentions: - attn._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - def forward(self, hidden_states, temb=None, encoder_hidden_states=None): output_states = () @@ -976,10 +961,6 @@ def set_attention_slice(self, slice_size): self.gradient_checkpointing = False - def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): - for attn in self.attentions: - attn._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - def forward( self, hidden_states, @@ -1122,10 +1103,6 @@ def set_attention_slice(self, slice_size): for attn in self.attentions: attn._set_attention_slice(slice_size) - def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): - for attn in self.attentions: - attn._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - def forward(self, hidden_states, temb=None, encoder_hidden_states=None): hidden_states = self.resnets[0](hidden_states, temb) for attn, resnet in zip(self.attentions, self.resnets[1:]): diff --git a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py index fa1754a4f062..3a90ae2c7620 100644 --- a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +++ b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py @@ -147,26 +147,6 @@ def _revert_dual_attention(self): self.image_unet.register_to_config(dual_cross_attention=False) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention with unet->image_unet - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.image_unet.set_use_memory_efficient_attention_xformers(True) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention with unet->image_unet - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.image_unet.set_use_memory_efficient_attention_xformers(False) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing with unet->image_unet def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" diff --git a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py index 3e51ce6371f0..b68dd244ce47 100644 --- a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +++ b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py @@ -73,26 +73,6 @@ def __init__( ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention with unet->image_unet - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.image_unet.set_use_memory_efficient_attention_xformers(True) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention with unet->image_unet - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.image_unet.set_use_memory_efficient_attention_xformers(False) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing with unet->image_unet def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" diff --git a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py index e77f5a2f22e4..c9c4bb7dc40e 100644 --- a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +++ b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py @@ -98,26 +98,6 @@ def _swap_unet_attention_blocks(self): def remove_unused_weights(self): self.register_modules(text_unet=None) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention with unet->image_unet - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.image_unet.set_use_memory_efficient_attention_xformers(True) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention with unet->image_unet - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.image_unet.set_use_memory_efficient_attention_xformers(False) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing with unet->image_unet def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" From 40b3b1e2ab66dd248065f3f2087966e1b638f509 Mon Sep 17 00:00:00 2001 From: Benjamin Lefaudeux Date: Wed, 30 Nov 2022 16:07:57 +0000 Subject: [PATCH 2/2] black, too bad there's no pre-commit ? --- .../pipelines/stable_diffusion/pipeline_cycle_diffusion.py | 1 - .../stable_diffusion/pipeline_stable_diffusion_upscale.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index 227ebca146e5..f9a5b7a05c1a 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -285,7 +285,6 @@ def _execution_device(self): return torch.device(module._hf_hook.execution_device) return self.device - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): r""" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py index 9f95c7e9e824..d3c30597f01a 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py @@ -143,7 +143,6 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) - @property # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device def _execution_device(self):