huggingface · anton-l · Sep 22, 2022 · Sep 22, 2022 · Sep 23, 2022
diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py
@@ -31,11 +31,11 @@
     from .pipeline_utils import DiffusionPipeline
     from .pipelines import DDIMPipeline, DDPMPipeline, KarrasVePipeline, LDMPipeline, PNDMPipeline, ScoreSdeVePipeline
     from .schedulers import (
+        BaseScheduler,
         DDIMScheduler,
         DDPMScheduler,
         KarrasVeScheduler,
         PNDMScheduler,
-        SchedulerMixin,
         ScoreSdeVeScheduler,
     )
     from .training_utils import EMAModel

diff --git a/src/diffusers/pipeline_flax_utils.py b/src/diffusers/pipeline_flax_utils.py
@@ -30,7 +30,7 @@
 
 from .configuration_utils import ConfigMixin
 from .modeling_flax_utils import FLAX_WEIGHTS_NAME, FlaxModelMixin
-from .schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME, SchedulerMixin
+from .schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME, BaseScheduler
 from .utils import CONFIG_NAME, DIFFUSERS_CACHE, BaseOutput, is_transformers_available, logging
 
 
@@ -436,7 +436,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     else:
                         loaded_sub_model, loaded_params = load_method(loadable_folder, _do_init=False)
                     params[name] = loaded_params
-                elif issubclass(class_obj, SchedulerMixin):
+                elif issubclass(class_obj, BaseScheduler):
                     loaded_sub_model, scheduler_state = load_method(loadable_folder)
                     params[name] = scheduler_state
                 else:

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -42,6 +42,14 @@ class StableDiffusionPipeline(DiffusionPipeline):
             Model that extracts features from generated images to be used as inputs for the `safety_checker`.
     """
 
+    vae: AutoencoderKL
+    text_encoder: CLIPTextModel
+    tokenizer: CLIPTokenizer
+    unet: UNet2DConditionModel
+    scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler]
+    safety_checker: StableDiffusionSafetyChecker
+    feature_extractor: CLIPFeatureExtractor
+
     def __init__(
         self,
         vae: AutoencoderKL,
@@ -231,30 +239,15 @@ def __call__(
             if latents.shape != latents_shape:
                 raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}")
         latents = latents.to(self.device)
+        latents = self.scheduler.scale_initial_noise(latents)
 
         # set timesteps
         self.scheduler.set_timesteps(num_inference_steps)
 
-        # if we use LMSDiscreteScheduler, let's make sure latents are multiplied by sigmas
-        if isinstance(self.scheduler, LMSDiscreteScheduler):
-            latents = latents * self.scheduler.sigmas[0]
-
-        # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
-        # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
-        # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
-        # and should be between [0, 1]
-        accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
-        extra_step_kwargs = {}
-        if accepts_eta:
-            extra_step_kwargs["eta"] = eta
-
-        for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)):
+        for t in self.progress_bar(self.scheduler.timesteps):
             # expand the latents if we are doing classifier free guidance
             latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
-            if isinstance(self.scheduler, LMSDiscreteScheduler):
-                sigma = self.scheduler.sigmas[i]
-                # the model input needs to be scaled to match the continuous ODE formulation in K-LMS
-                latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)
+            latent_model_input, t = self.scheduler.scale_model_inputs(latent_model_input, t)
 
             # predict the noise residual
             noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample
@@ -264,11 +257,7 @@ def __call__(
                 noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
                 noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
 
-            # compute the previous noisy sample x_t -> x_t-1
-            if isinstance(self.scheduler, LMSDiscreteScheduler):
-                latents = self.scheduler.step(noise_pred, i, latents, **extra_step_kwargs).prev_sample
-            else:
-                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample
+            latents = self.scheduler.step(noise_pred, t, latents, eta=eta).prev_sample
 
         # scale and decode the image latents with vae
         latents = 1 / 0.18215 * latents

diff --git a/src/diffusers/schedulers/__init__.py b/src/diffusers/schedulers/__init__.py
@@ -23,7 +23,7 @@
     from .scheduling_pndm import PNDMScheduler
     from .scheduling_sde_ve import ScoreSdeVeScheduler
     from .scheduling_sde_vp import ScoreSdeVpScheduler
-    from .scheduling_utils import SchedulerMixin
+    from .scheduling_utils import BaseScheduler
 else:
     from ..utils.dummy_pt_objects import *  # noqa F403
 

diff --git a/src/diffusers/schedulers/scheduling_ddim.py b/src/diffusers/schedulers/scheduling_ddim.py
@@ -23,7 +23,7 @@
 import torch
 
 from ..configuration_utils import ConfigMixin, register_to_config
-from .scheduling_utils import SchedulerMixin, SchedulerOutput
+from .scheduling_utils import BaseScheduler, SchedulerOutput
 
 
 def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
@@ -55,7 +55,7 @@ def alpha_bar(time_step):
     return np.array(betas, dtype=np.float32)
 
 
-class DDIMScheduler(SchedulerMixin, ConfigMixin):
+class DDIMScheduler(BaseScheduler, ConfigMixin):
     """
     Denoising diffusion implicit models is a scheduler that extends the denoising procedure introduced in denoising
     diffusion probabilistic models (DDPMs) with non-Markovian guidance.

diff --git a/src/diffusers/schedulers/scheduling_ddim_flax.py b/src/diffusers/schedulers/scheduling_ddim_flax.py
@@ -23,7 +23,7 @@
 import jax.numpy as jnp
 
 from ..configuration_utils import ConfigMixin, register_to_config
-from .scheduling_utils import SchedulerMixin, SchedulerOutput
+from .scheduling_utils import BaseScheduler, SchedulerOutput
 
 
 def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> jnp.ndarray:
@@ -72,7 +72,7 @@ class FlaxSchedulerOutput(SchedulerOutput):
     state: DDIMSchedulerState
 
 
-class FlaxDDIMScheduler(SchedulerMixin, ConfigMixin):
+class FlaxDDIMScheduler(BaseScheduler, ConfigMixin):
     """
     Denoising diffusion implicit models is a scheduler that extends the denoising procedure introduced in denoising
     diffusion probabilistic models (DDPMs) with non-Markovian guidance.

diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py
@@ -20,40 +20,10 @@
 import numpy as np
 import torch
 
-from ..configuration_utils import ConfigMixin, register_to_config
-from .scheduling_utils import SchedulerMixin, SchedulerOutput
+from .scheduling_utils import BaseScheduler, SchedulerOutput
 
 
-def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999):
-    """
-    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
-    (1-beta) over time from t = [0,1].
-
-    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
-    to that part of the diffusion process.
-
-
-    Args:
-        num_diffusion_timesteps (`int`): the number of betas to produce.
-        max_beta (`float`): the maximum beta to use; use values lower than 1 to
-                     prevent singularities.
-
-    Returns:
-        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
-    """
-
-    def alpha_bar(time_step):
-        return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2
-
-    betas = []
-    for i in range(num_diffusion_timesteps):
-        t1 = i / num_diffusion_timesteps
-        t2 = (i + 1) / num_diffusion_timesteps
-        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
-    return np.array(betas, dtype=np.float32)
-
-
-class DDPMScheduler(SchedulerMixin, ConfigMixin):
+class DDPMScheduler(BaseScheduler):
     """
     Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and
     Langevin dynamics sampling.
@@ -83,43 +53,15 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
 
     """
 
-    @register_to_config
     def __init__(
         self,
-        num_train_timesteps: int = 1000,
-        beta_start: float = 0.0001,
-        beta_end: float = 0.02,
-        beta_schedule: str = "linear",
-        trained_betas: Optional[np.ndarray] = None,
         variance_type: str = "fixed_small",
         clip_sample: bool = True,
-        tensor_format: str = "pt",
+        **kwargs,
     ):
-        if trained_betas is not None:
-            self.betas = np.asarray(trained_betas)
-        elif beta_schedule == "linear":
-            self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32)
-        elif beta_schedule == "scaled_linear":
-            # this schedule is very specific to the latent diffusion model.
-            self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32) ** 2
-        elif beta_schedule == "squaredcos_cap_v2":
-            # Glide cosine schedule
-            self.betas = betas_for_alpha_bar(num_train_timesteps)
-        else:
-            raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
-
-        self.alphas = 1.0 - self.betas
-        self.alphas_cumprod = np.cumprod(self.alphas, axis=0)
-        self.one = np.array(1.0)
-
-        # setable values
-        self.num_inference_steps = None
-        self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy()
-
-        self.tensor_format = tensor_format
-        self.set_format(tensor_format=tensor_format)
-
+        super().__init__(**kwargs)
         self.variance_type = variance_type
+        self.clip_sample = clip_sample
 
     def set_timesteps(self, num_inference_steps: int):
         """

diff --git a/src/diffusers/schedulers/scheduling_ddpm_flax.py b/src/diffusers/schedulers/scheduling_ddpm_flax.py
@@ -23,7 +23,7 @@
 from jax import random
 
 from ..configuration_utils import ConfigMixin, register_to_config
-from .scheduling_utils import SchedulerMixin, SchedulerOutput
+from .scheduling_utils import BaseScheduler, SchedulerOutput
 
 
 def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999) -> jnp.ndarray:
@@ -71,7 +71,7 @@ class FlaxSchedulerOutput(SchedulerOutput):
     state: DDPMSchedulerState
 
 
-class FlaxDDPMScheduler(SchedulerMixin, ConfigMixin):
+class FlaxDDPMScheduler(BaseScheduler, ConfigMixin):
     """
     Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and
     Langevin dynamics sampling.

diff --git a/src/diffusers/schedulers/scheduling_karras_ve.py b/src/diffusers/schedulers/scheduling_karras_ve.py
@@ -21,7 +21,7 @@
 
 from ..configuration_utils import ConfigMixin, register_to_config
 from ..utils import BaseOutput
-from .scheduling_utils import SchedulerMixin
+from .scheduling_utils import BaseScheduler
 
 
 @dataclass
@@ -41,7 +41,7 @@ class KarrasVeOutput(BaseOutput):
     derivative: torch.FloatTensor
 
 
-class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
+class KarrasVeScheduler(BaseScheduler, ConfigMixin):
     """
     Stochastic sampling from Karras et al. [1] tailored to the Variance-Expanding (VE) models [2]. Use Algorithm 2 and
     the VE column of Table 1 from [1] for reference.
@@ -74,16 +74,15 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
 
     """
 
-    @register_to_config
-    def __init__(
-        self,
-        sigma_min: float = 0.02,
-        sigma_max: float = 100,
-        s_noise: float = 1.007,
-        s_churn: float = 80,
-        s_min: float = 0.05,
-        s_max: float = 50,
-        tensor_format: str = "pt",
+    def __old_init__(
+            self,
+            sigma_min: float = 0.02,
+            sigma_max: float = 100,
+            s_noise: float = 1.007,
+            s_churn: float = 80,
+            s_min: float = 0.05,
+            s_max: float = 50,
+            tensor_format: str = "pt",
     ):
         # setable values
         self.num_inference_steps = None
@@ -93,7 +92,17 @@ def __init__(
         self.tensor_format = tensor_format
         self.set_format(tensor_format=tensor_format)
 
-    def set_timesteps(self, num_inference_steps: int):
+    def __init__(
+        self,
+        s_noise: float = 1.007,
+        s_churn: float = 80,
+        s_min: float = 0.05,
+        s_max: float = 50,
+        **kwargs,
+    ):
+        super().__init__(s_noise=s_noise, s_churn=s_churn, s_min=s_min, s_max=s_max, **kwargs)
+
+    def set_schedule(self, num_inference_steps: int):
         """
         Sets the continuous timesteps used for the diffusion chain. Supporting function to be run before inference.
 

diff --git a/src/diffusers/schedulers/scheduling_karras_ve_flax.py b/src/diffusers/schedulers/scheduling_karras_ve_flax.py
@@ -22,7 +22,7 @@
 
 from ..configuration_utils import ConfigMixin, register_to_config
 from ..utils import BaseOutput
-from .scheduling_utils import SchedulerMixin
+from .scheduling_utils import BaseScheduler
 
 
 @flax.struct.dataclass
@@ -56,7 +56,7 @@ class FlaxKarrasVeOutput(BaseOutput):
     state: KarrasVeSchedulerState
 
 
-class FlaxKarrasVeScheduler(SchedulerMixin, ConfigMixin):
+class FlaxKarrasVeScheduler(BaseScheduler, ConfigMixin):
     """
     Stochastic sampling from Karras et al. [1] tailored to the Variance-Expanding (VE) models [2]. Use Algorithm 2 and
     the VE column of Table 1 from [1] for reference.