|
27 | 27 | AutoencoderKL, |
28 | 28 | DDIMInverseScheduler, |
29 | 29 | DDIMScheduler, |
| 30 | + DPMSolverMultistepInverseScheduler, |
| 31 | + DPMSolverMultistepScheduler, |
30 | 32 | StableDiffusionDiffEditPipeline, |
31 | 33 | UNet2DConditionModel, |
32 | 34 | ) |
@@ -74,6 +76,13 @@ def get_dummy_components(self): |
74 | 76 | clip_sample=False, |
75 | 77 | set_alpha_to_zero=False, |
76 | 78 | ) |
| 79 | + inverse_scheduler = DPMSolverMultistepInverseScheduler( |
| 80 | + beta_start=0.00085, |
| 81 | + beta_end=0.012, |
| 82 | + beta_schedule="scaled_linear", |
| 83 | + clip_sample=False, |
| 84 | + set_alpha_to_zero=False, |
| 85 | + ) |
77 | 86 | torch.manual_seed(0) |
78 | 87 | vae = AutoencoderKL( |
79 | 88 | block_out_channels=[32, 64], |
@@ -249,6 +258,30 @@ def test_inversion(self): |
249 | 258 | max_diff = np.abs(image_slice.flatten() - expected_slice).max() |
250 | 259 | self.assertLessEqual(max_diff, 1e-3) |
251 | 260 |
|
| 261 | + def test_inversion_dpm(self): |
| 262 | + device = "cpu" |
| 263 | + |
| 264 | + components = self.get_dummy_components() |
| 265 | + components["scheduler"] = DPMSolverMultistepInverseScheduler( |
| 266 | + beta_start=0.00085, |
| 267 | + beta_end=0.012, |
| 268 | + beta_schedule="scaled_linear", |
| 269 | + clip_sample=False, |
| 270 | + ) |
| 271 | + pipe = self.pipeline_class(**components) |
| 272 | + pipe.to(device) |
| 273 | + pipe.set_progress_bar_config(disable=None) |
| 274 | + |
| 275 | + inputs = self.get_dummy_inversion_inputs(device) |
| 276 | + image = pipe.invert(**inputs).images |
| 277 | + image_slice = image[0, -1, -3:, -3:] |
| 278 | + |
| 279 | + self.assertEqual(image.shape, (2, 32, 32, 3)) |
| 280 | + expected_slice = np.array( |
| 281 | + [0.5150, 0.5134, 0.5043, 0.5376, 0.4694, 0.51050, 0.5015, 0.4407, 0.4799], |
| 282 | + ) |
| 283 | + max_diff = np.abs(image_slice.flatten() - expected_slice).max() |
| 284 | + self.assertLessEqual(max_diff, 1e-3) |
252 | 285 |
|
253 | 286 | @require_torch_gpu |
254 | 287 | @slow |
@@ -313,3 +346,54 @@ def test_stable_diffusion_diffedit_full(self): |
313 | 346 | / 255 |
314 | 347 | ) |
315 | 348 | assert np.abs((expected_image - image).max()) < 5e-1 |
| 349 | + |
| 350 | + def test_stable_diffusion_diffedit_dpm(self): |
| 351 | + generator = torch.manual_seed(0) |
| 352 | + |
| 353 | + pipe = StableDiffusionDiffEditPipeline.from_pretrained( |
| 354 | + "stabilityai/stable-diffusion-2-1", safety_checker=None, torch_dtype=torch.float16 |
| 355 | + ) |
| 356 | + pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) |
| 357 | + pipe.inverse_scheduler = DPMSolverMultistepInverseScheduler.from_config(pipe.scheduler.config) |
| 358 | + pipe.enable_model_cpu_offload() |
| 359 | + pipe.set_progress_bar_config(disable=None) |
| 360 | + |
| 361 | + source_prompt = "a bowl of fruit" |
| 362 | + target_prompt = "a bowl of pears" |
| 363 | + |
| 364 | + mask_image = pipe.generate_mask( |
| 365 | + image=self.raw_image, |
| 366 | + source_prompt=source_prompt, |
| 367 | + target_prompt=target_prompt, |
| 368 | + generator=generator, |
| 369 | + ) |
| 370 | + |
| 371 | + inv_latents = pipe.invert( |
| 372 | + prompt=source_prompt, |
| 373 | + image=self.raw_image, |
| 374 | + inpaint_strength=0.7, |
| 375 | + generator=generator, |
| 376 | + num_inference_steps=25, |
| 377 | + ).latents |
| 378 | + |
| 379 | + image = pipe( |
| 380 | + prompt=target_prompt, |
| 381 | + mask_image=mask_image, |
| 382 | + image_latents=inv_latents, |
| 383 | + generator=generator, |
| 384 | + negative_prompt=source_prompt, |
| 385 | + inpaint_strength=0.7, |
| 386 | + num_inference_steps=25, |
| 387 | + output_type="numpy", |
| 388 | + ).images[0] |
| 389 | + |
| 390 | + expected_image = ( |
| 391 | + np.array( |
| 392 | + load_image( |
| 393 | + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" |
| 394 | + "/diffedit/pears.png" |
| 395 | + ).resize((768, 768)) |
| 396 | + ) |
| 397 | + / 255 |
| 398 | + ) |
| 399 | + assert np.abs((expected_image - image).max()) < 5e-1 |
0 commit comments