2929 UNet2DModel ,
3030 VQModel ,
3131)
32- from diffusers .utils import floats_tensor , load_image , slow , torch_device
32+ from diffusers .utils import floats_tensor , load_image , load_numpy , slow , torch_device
3333from diffusers .utils .testing_utils import require_torch_gpu
3434from transformers import CLIPTextConfig , CLIPTextModel , CLIPTokenizer
3535
@@ -156,7 +156,7 @@ def to(self, device):
156156
157157 return extract
158158
159- def test_stable_diffusion_img2img (self ):
159+ def test_stable_diffusion_img2img_default_case (self ):
160160 device = "cpu" # ensure determinism for the device-dependent torch.Generator
161161 unet = self .dummy_cond_unet
162162 scheduler = PNDMScheduler (skip_prk_steps = True )
@@ -208,8 +208,8 @@ def test_stable_diffusion_img2img(self):
208208
209209 assert image .shape == (1 , 32 , 32 , 3 )
210210 expected_slice = np .array ([0.4492 , 0.3865 , 0.4222 , 0.5854 , 0.5139 , 0.4379 , 0.4193 , 0.48 , 0.4218 ])
211- assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
212- assert np .abs (image_from_tuple_slice .flatten () - expected_slice ).max () < 1e-2
211+ assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-3
212+ assert np .abs (image_from_tuple_slice .flatten () - expected_slice ).max () < 1e-3
213213
214214 def test_stable_diffusion_img2img_negative_prompt (self ):
215215 device = "cpu" # ensure determinism for the device-dependent torch.Generator
@@ -251,7 +251,7 @@ def test_stable_diffusion_img2img_negative_prompt(self):
251251
252252 assert image .shape == (1 , 32 , 32 , 3 )
253253 expected_slice = np .array ([0.4065 , 0.3783 , 0.4050 , 0.5266 , 0.4781 , 0.4252 , 0.4203 , 0.4692 , 0.4365 ])
254- assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
254+ assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-3
255255
256256 def test_stable_diffusion_img2img_multiple_init_images (self ):
257257 device = "cpu" # ensure determinism for the device-dependent torch.Generator
@@ -293,7 +293,7 @@ def test_stable_diffusion_img2img_multiple_init_images(self):
293293
294294 assert image .shape == (2 , 32 , 32 , 3 )
295295 expected_slice = np .array ([0.5144 , 0.4447 , 0.4735 , 0.6676 , 0.5526 , 0.5454 , 0.645 , 0.5149 , 0.4689 ])
296- assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
296+ assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-3
297297
298298 def test_stable_diffusion_img2img_k_lms (self ):
299299 device = "cpu" # ensure determinism for the device-dependent torch.Generator
@@ -348,8 +348,8 @@ def test_stable_diffusion_img2img_k_lms(self):
348348
349349 assert image .shape == (1 , 32 , 32 , 3 )
350350 expected_slice = np .array ([0.4367 , 0.4986 , 0.4372 , 0.6706 , 0.5665 , 0.444 , 0.5864 , 0.6019 , 0.5203 ])
351- assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
352- assert np .abs (image_from_tuple_slice .flatten () - expected_slice ).max () < 1e-2
351+ assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-3
352+ assert np .abs (image_from_tuple_slice .flatten () - expected_slice ).max () < 1e-3
353353
354354 def test_stable_diffusion_img2img_num_images_per_prompt (self ):
355355 device = "cpu"
@@ -472,17 +472,15 @@ def tearDown(self):
472472 gc .collect ()
473473 torch .cuda .empty_cache ()
474474
475- def test_stable_diffusion_img2img_pipeline (self ):
475+ def test_stable_diffusion_img2img_pipeline_default (self ):
476476 init_image = load_image (
477477 "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
478478 "/img2img/sketch-mountains-input.jpg"
479479 )
480- expected_image = load_image (
481- "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
482- "/img2img/fantasy_landscape.png"
483- )
484480 init_image = init_image .resize ((768 , 512 ))
485- expected_image = np .array (expected_image , dtype = np .float32 ) / 255.0
481+ expected_image = load_numpy (
482+ "https://huggingface.co/datasets/lewington/expected-images/resolve/main/fantasy_landscape.npy"
483+ )
486484
487485 model_id = "CompVis/stable-diffusion-v1-4"
488486 pipe = StableDiffusionImg2ImgPipeline .from_pretrained (
@@ -508,19 +506,17 @@ def test_stable_diffusion_img2img_pipeline(self):
508506
509507 assert image .shape == (512 , 768 , 3 )
510508 # img2img is flaky across GPUs even in fp32, so using MAE here
511- assert np .abs (expected_image - image ).mean () < 1e-2
509+ assert np .abs (expected_image - image ).mean () < 1e-3
512510
513511 def test_stable_diffusion_img2img_pipeline_k_lms (self ):
514512 init_image = load_image (
515513 "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
516514 "/img2img/sketch-mountains-input.jpg"
517515 )
518- expected_image = load_image (
519- "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
520- "/img2img/fantasy_landscape_k_lms.png"
521- )
522516 init_image = init_image .resize ((768 , 512 ))
523- expected_image = np .array (expected_image , dtype = np .float32 ) / 255.0
517+ expected_image = load_numpy (
518+ "https://huggingface.co/datasets/lewington/expected-images/resolve/main/fantasy_landscape_k_lms.npy"
519+ )
524520
525521 model_id = "CompVis/stable-diffusion-v1-4"
526522 lms = LMSDiscreteScheduler .from_config (model_id , subfolder = "scheduler" )
@@ -548,7 +544,7 @@ def test_stable_diffusion_img2img_pipeline_k_lms(self):
548544
549545 assert image .shape == (512 , 768 , 3 )
550546 # img2img is flaky across GPUs even in fp32, so using MAE here
551- assert np .abs (expected_image - image ).mean () < 1e-2
547+ assert np .abs (expected_image - image ).mean () < 1e-3
552548
553549 def test_stable_diffusion_img2img_intermediate_state (self ):
554550 number_of_steps = 0
0 commit comments