Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer

from ...configuration_utils import FrozenDict
from ...models import AutoencoderKL, UNet2DConditionModel
from ...pipeline_utils import DiffusionPipeline
from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
Expand Down Expand Up @@ -53,6 +54,21 @@ def __init__(
):
super().__init__()
scheduler = scheduler.set_format("pt")

if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jonatanklosko - Currently the slow tests are broken because we haven't (and can't yet) update the configs online. So for now I think we need to do some deprecation warning here - ok for you?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perfect!

Our of curiosity, is there an issue with updating the configs now? I assume an additional property in the config doesn't hurt even if ignored by previous versions?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most users are on the 0.3.0 version therefore will not make use of this code but will nevertheless use the newest config file. Given the high usage of stable diffusion: https://huggingface.co/CompVis/stable-diffusion-v1-4 (>500k downloads per month), I think it would be better to not burden users with an extra config parameter that should throw a warning in 0.3.0 .

So, it would indeed not break anything, but an unused config parameter should throw a warning (haven't checked though whether our code actually works correctly here), which will probably scare users.

Does that make sense?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah totally, I didn't realise the extra option would cause a warning :)

warnings.warn(
f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
f" should be set to 1 istead of {scheduler.config.steps_offset}. Please make sure "
"to update the config accordingly as leaving `steps_offset` might led to incorrect results"
" in future versions. If you have downloaded this checkpoint from the Hugging Face Hub,"
" it would be very nice if you could open a Pull request for the `scheduler/scheduler_config.json`"
" file",
DeprecationWarning,
)
new_config = dict(scheduler.config)
new_config["steps_offset"] = 1
scheduler._internal_dict = FrozenDict(new_config)

self.register_modules(
vae=vae,
text_encoder=text_encoder,
Expand Down Expand Up @@ -217,12 +233,7 @@ def __call__(
latents = latents.to(self.device)

# set timesteps
accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys())
extra_set_kwargs = {}
if accepts_offset:
extra_set_kwargs["offset"] = 1

self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs)
self.scheduler.set_timesteps(num_inference_steps)
Comment on lines 235 to +236
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we try to override the configuration here, or are you going to update the configuration in the relevant hf repositories?


# if we use LMSDiscreteScheduler, let's make sure latents are multiplied by sigmas
if isinstance(self.scheduler, LMSDiscreteScheduler):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import inspect
import warnings
from typing import List, Optional, Union

import numpy as np
Expand All @@ -7,6 +8,7 @@
import PIL
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer

from ...configuration_utils import FrozenDict
from ...models import AutoencoderKL, UNet2DConditionModel
from ...pipeline_utils import DiffusionPipeline
from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
Expand Down Expand Up @@ -64,6 +66,21 @@ def __init__(
):
super().__init__()
scheduler = scheduler.set_format("pt")

if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
warnings.warn(
f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
f" should be set to 1 istead of {scheduler.config.steps_offset}. Please make sure "
"to update the config accordingly as leaving `steps_offset` might led to incorrect results"
" in future versions. If you have downloaded this checkpoint from the Hugging Face Hub,"
" it would be very nice if you could open a Pull request for the `scheduler/scheduler_config.json`"
" file",
DeprecationWarning,
)
new_config = dict(scheduler.config)
new_config["steps_offset"] = 1
scheduler._internal_dict = FrozenDict(new_config)

self.register_modules(
vae=vae,
text_encoder=text_encoder,
Expand Down Expand Up @@ -169,14 +186,7 @@ def __call__(
raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}")

# set timesteps
accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys())
extra_set_kwargs = {}
offset = 0
if accepts_offset:
offset = 1
extra_set_kwargs["offset"] = 1

self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs)
self.scheduler.set_timesteps(num_inference_steps)

if isinstance(init_image, PIL.Image.Image):
init_image = preprocess(init_image)
Expand All @@ -190,6 +200,7 @@ def __call__(
init_latents = torch.cat([init_latents] * batch_size)

# get the original timestep using init_timestep
offset = self.scheduler.config.get("steps_offset", 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

init_timestep = int(num_inference_steps * strength) + offset
init_timestep = min(init_timestep, num_inference_steps)
if isinstance(self.scheduler, LMSDiscreteScheduler):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import inspect
import warnings
from typing import List, Optional, Union

import numpy as np
Expand All @@ -8,6 +9,7 @@
from tqdm.auto import tqdm
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer

from ...configuration_utils import FrozenDict
from ...models import AutoencoderKL, UNet2DConditionModel
from ...pipeline_utils import DiffusionPipeline
from ...schedulers import DDIMScheduler, PNDMScheduler
Expand Down Expand Up @@ -83,6 +85,21 @@ def __init__(
super().__init__()
scheduler = scheduler.set_format("pt")
logger.info("`StableDiffusionInpaintPipeline` is experimental and will very likely change in the future.")

if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
warnings.warn(
f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
f" should be set to 1 istead of {scheduler.config.steps_offset}. Please make sure "
"to update the config accordingly as leaving `steps_offset` might led to incorrect results"
" in future versions. If you have downloaded this checkpoint from the Hugging Face Hub,"
" it would be very nice if you could open a Pull request for the `scheduler/scheduler_config.json`"
" file",
DeprecationWarning,
)
new_config = dict(scheduler.config)
new_config["steps_offset"] = 1
scheduler._internal_dict = FrozenDict(new_config)

self.register_modules(
vae=vae,
text_encoder=text_encoder,
Expand Down Expand Up @@ -193,19 +210,12 @@ def __call__(
raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}")

# set timesteps
accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys())
extra_set_kwargs = {}
offset = 0
if accepts_offset:
offset = 1
extra_set_kwargs["offset"] = 1

self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs)
self.scheduler.set_timesteps(num_inference_steps)

# preprocess image
if not isinstance(init_image, torch.FloatTensor):
init_image = preprocess_image(init_image)
init_image.to(self.device)
init_image = init_image.to(self.device)

# encode the init image into latents and scale the latents
init_latent_dist = self.vae.encode(init_image).latent_dist
Expand All @@ -220,14 +230,15 @@ def __call__(
# preprocess mask
if not isinstance(mask_image, torch.FloatTensor):
mask_image = preprocess_mask(mask_image)
mask_image.to(self.device)
mask_image = mask_image.to(self.device)
mask = torch.cat([mask_image] * batch_size)

# check sizes
if not mask.shape == init_latents.shape:
raise ValueError("The mask and init_image should be the same size!")

# get the original timestep using init_timestep
offset = self.scheduler.config.get("steps_offset", 0)
init_timestep = int(num_inference_steps * strength) + offset
init_timestep = min(init_timestep, num_inference_steps)
timesteps = self.scheduler.timesteps[-init_timestep]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,7 @@ def __call__(
raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}")

# set timesteps
accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys())
extra_set_kwargs = {}
if accepts_offset:
extra_set_kwargs["offset"] = 1

self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs)
self.scheduler.set_timesteps(num_inference_steps)

# if we use LMSDiscreteScheduler, let's make sure latents are multiplied by sigmas
if isinstance(self.scheduler, LMSDiscreteScheduler):
Expand Down
26 changes: 22 additions & 4 deletions src/diffusers/schedulers/scheduling_ddim.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# and https://github.com/hojonathanho/diffusion

import math
import warnings
from typing import Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -78,7 +79,13 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
clip_sample (`bool`, default `True`):
option to clip predicted sample between -1 and 1 for numerical stability.
set_alpha_to_one (`bool`, default `True`):
if alpha for final step is 1 or the final alpha of the "non-previous" one.
each diffusion step uses the value of alphas product at that step and at the previous one. For the final
step there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
otherwise it uses the value of alpha at step 0.
steps_offset (`int`, default `0`):
an offset added to the inference steps. You can use a combination of `offset=1` and
`set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
stable diffusion.
tensor_format (`str`): whether the scheduler expects pytorch or numpy arrays.

"""
Expand All @@ -93,6 +100,7 @@ def __init__(
trained_betas: Optional[np.ndarray] = None,
clip_sample: bool = True,
set_alpha_to_one: bool = True,
steps_offset: int = 0,
tensor_format: str = "pt",
):
if trained_betas is not None:
Expand Down Expand Up @@ -134,16 +142,26 @@ def _get_variance(self, timestep, prev_timestep):

return variance

def set_timesteps(self, num_inference_steps: int, offset: int = 0):
def set_timesteps(self, num_inference_steps: int, **kwargs):
"""
Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.

Args:
num_inference_steps (`int`):
the number of diffusion steps used when generating samples with a pre-trained model.
offset (`int`):
optional value to shift timestep values up by. A value of 1 is used in stable diffusion for inference.
"""

offset = self.config.steps_offset

if "offset" in kwargs:
warnings.warn(
"`offset` is deprecated as an input argument to `set_timesteps` and will be removed in v0.4.0."
" Please pass `steps_offset` to `__init__` instead.",
DeprecationWarning,
)

offset = kwargs["offset"]

self.num_inference_steps = num_inference_steps
step_ratio = self.config.num_train_timesteps // self.num_inference_steps
# creates integer timesteps by multiplying by ratio
Expand Down
48 changes: 34 additions & 14 deletions src/diffusers/schedulers/scheduling_pndm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim

import math
import warnings
from typing import Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -74,10 +75,18 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
trained_betas (`np.ndarray`, optional):
option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
tensor_format (`str`): whether the scheduler expects pytorch or numpy arrays
skip_prk_steps (`bool`):
allows the scheduler to skip the Runge-Kutta steps that are defined in the original paper as being required
before plms steps; defaults to `False`.
set_alpha_to_one (`bool`, default `False`):
each diffusion step uses the value of alphas product at that step and at the previous one. For the final
step there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
otherwise it uses the value of alpha at step 0.
steps_offset (`int`, default `0`):
an offset added to the inference steps. You can use a combination of `offset=1` and
`set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
stable diffusion.
tensor_format (`str`): whether the scheduler expects pytorch or numpy arrays

"""

Expand All @@ -89,8 +98,10 @@ def __init__(
beta_end: float = 0.02,
beta_schedule: str = "linear",
trained_betas: Optional[np.ndarray] = None,
tensor_format: str = "pt",
skip_prk_steps: bool = False,
set_alpha_to_one: bool = False,
steps_offset: int = 0,
tensor_format: str = "pt",
):
if trained_betas is not None:
self.betas = np.asarray(trained_betas)
Expand All @@ -108,6 +119,8 @@ def __init__(
self.alphas = 1.0 - self.betas
self.alphas_cumprod = np.cumprod(self.alphas, axis=0)

self.final_alpha_cumprod = np.array(1.0) if set_alpha_to_one else self.alphas_cumprod[0]

# For now we only support F-PNDM, i.e. the runge-kutta method
# For more information on the algorithm please take a look at the paper: https://arxiv.org/pdf/2202.09778.pdf
# mainly at formula (9), (12), (13) and the Algorithm 2.
Expand All @@ -122,31 +135,38 @@ def __init__(
# setable values
self.num_inference_steps = None
self._timesteps = np.arange(0, num_train_timesteps)[::-1].copy()
self._offset = 0
self.prk_timesteps = None
self.plms_timesteps = None
self.timesteps = None

self.tensor_format = tensor_format
self.set_format(tensor_format=tensor_format)

def set_timesteps(self, num_inference_steps: int, offset: int = 0) -> torch.FloatTensor:
def set_timesteps(self, num_inference_steps: int, **kwargs) -> torch.FloatTensor:
"""
Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.

Args:
num_inference_steps (`int`):
the number of diffusion steps used when generating samples with a pre-trained model.
offset (`int`):
optional value to shift timestep values up by. A value of 1 is used in stable diffusion for inference.
"""

offset = self.config.steps_offset

if "offset" in kwargs:
warnings.warn(
"`offset` is deprecated as an input argument to `set_timesteps` and will be removed in v0.4.0."
" Please pass `steps_offset` to `__init__` instead."
)

offset = kwargs["offset"]

self.num_inference_steps = num_inference_steps
step_ratio = self.config.num_train_timesteps // self.num_inference_steps
# creates integer timesteps by multiplying by ratio
# casting to int to avoid issues when num_inference_step is power of 3
self._timesteps = (np.arange(0, num_inference_steps) * step_ratio).round().tolist()
self._offset = offset
self._timesteps = np.array([t + self._offset for t in self._timesteps])
self._timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()
self._timesteps += offset

if self.config.skip_prk_steps:
# for some models like stable diffusion the prk steps can/should be skipped to
Expand Down Expand Up @@ -231,7 +251,7 @@ def step_prk(
)

diff_to_prev = 0 if self.counter % 2 else self.config.num_train_timesteps // self.num_inference_steps // 2
prev_timestep = max(timestep - diff_to_prev, self.prk_timesteps[-1])
prev_timestep = timestep - diff_to_prev
timestep = self.prk_timesteps[self.counter // 4 * 4]

if self.counter % 4 == 0:
Expand Down Expand Up @@ -293,7 +313,7 @@ def step_plms(
"for more information."
)

prev_timestep = max(timestep - self.config.num_train_timesteps // self.num_inference_steps, 0)
prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps

if self.counter != 1:
self.ets.append(model_output)
Expand Down Expand Up @@ -323,7 +343,7 @@ def step_plms(

return SchedulerOutput(prev_sample=prev_sample)

def _get_prev_sample(self, sample, timestep, timestep_prev, model_output):
def _get_prev_sample(self, sample, timestep, prev_timestep, model_output):
# See formula (9) of PNDM paper https://arxiv.org/pdf/2202.09778.pdf
# this function computes x_(t−δ) using the formula of (9)
# Note that x_t needs to be added to both sides of the equation
Expand All @@ -336,8 +356,8 @@ def _get_prev_sample(self, sample, timestep, timestep_prev, model_output):
# sample -> x_t
# model_output -> e_θ(x_t, t)
# prev_sample -> x_(t−δ)
alpha_prod_t = self.alphas_cumprod[timestep + 1 - self._offset]
alpha_prod_t_prev = self.alphas_cumprod[timestep_prev + 1 - self._offset]
Comment on lines -339 to -340
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@patrickvonplaten this change breaks this test:

def test_full_loop_no_noise(self):
scheduler_class = self.scheduler_classes[0]
scheduler_config = self.get_scheduler_config()
scheduler = scheduler_class(**scheduler_config)
num_inference_steps = 10
model = self.dummy_model()
sample = self.dummy_sample_deter
scheduler.set_timesteps(num_inference_steps)
for i, t in enumerate(scheduler.prk_timesteps):
residual = model(sample, t)
sample = scheduler.step_prk(residual, i, sample).prev_sample
for i, t in enumerate(scheduler.plms_timesteps):
residual = model(sample, t)
sample = scheduler.step_plms(residual, i, sample).prev_sample
result_sum = torch.sum(torch.abs(sample))
result_mean = torch.mean(torch.abs(sample))
assert abs(result_sum.item() - 428.8788) < 1e-2
assert abs(result_mean.item() - 0.5584) < 1e-3

However, I think the test must be wrong, because there we use the default offset=0, which means that we are using self.alphas_cumprod[1] for the timestep 0, which doesn't make sense. I assume the value in the test is computed using the current implementation, rather than a reference value from elsewhere?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I think the test breaks because set_alpha_to_one defaults to True no?

Copy link
Contributor Author

@jonatanklosko jonatanklosko Sep 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually no, adding + 1 - self.config.steps_offset back fixes the tests, but to me the failure is expected.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kept the change and adjusted the test value.

alpha_prod_t = self.alphas_cumprod[timestep]
alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
beta_prod_t = 1 - alpha_prod_t
beta_prod_t_prev = 1 - alpha_prod_t_prev

Expand Down
Loading