From d2aeeb48ee0fd203201d830f8fe65b3391889454 Mon Sep 17 00:00:00 2001
From: Pedro Cuenca <pedro@huggingface.co>
Date: Sun, 2 Oct 2022 19:44:45 +0200
Subject: [PATCH] Improve callback tests:

- Use default weights.
- Do not use attention slicing or autocast.
- Compare first and last slices.
- In ONNX, use the CUDAExecutionProvider.

In ONNX, however, the results of the last slice were erratic (sometimes
they get very close to 0). I need to check with @anton-l what could be
the reason.
---
 tests/test_pipelines.py | 106 +++++++++++++++++++++-------------------
 1 file changed, 57 insertions(+), 49 deletions(-)

diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index d0d78171378e..74d02c177bba 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -1450,31 +1450,35 @@ def test_callback_fn(step: int, timestep: int, latents: torch.FloatTensor) -> No
                 assert latents.shape == (1, 4, 64, 64)
                 latents_slice = latents[0, -3:, -3:, -1]
                 expected_slice = np.array(
-                    [1.8285, 1.2857, -0.1024, 1.2406, -2.3068, 1.0747, -0.0818, -0.6520, -2.9506]
+                    [1.8279, 1.2858, -0.1022, 1.2406, -2.3068, 1.0748, -0.0819, -0.6522, -2.9496]
+                )
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
+            if step == 50:
+                latents = latents.detach().cpu().numpy()
+                assert latents.shape == (1, 4, 64, 64)
+                latents_slice = latents[0, -3:, -3:, -1]
+                expected_slice = np.array(
+                    [1.0940, 1.5760, 0.2619, -0.0501, -1.7947, -0.3565, -0.4667, -1.0703, -1.1592]
                 )
                 assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
 
         test_callback_fn.has_been_called = False
 
-        pipe = StableDiffusionPipeline.from_pretrained(
-            "CompVis/stable-diffusion-v1-4", use_auth_token=True, revision="fp16", torch_dtype=torch.float16
-        )
+        pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=True)
         pipe.to(torch_device)
         pipe.set_progress_bar_config(disable=None)
-        pipe.enable_attention_slicing()
 
         prompt = "Andromeda galaxy in a bottle"
 
         generator = torch.Generator(device=torch_device).manual_seed(0)
-        with torch.autocast(torch_device):
-            pipe(
-                prompt=prompt,
-                num_inference_steps=50,
-                guidance_scale=7.5,
-                generator=generator,
-                callback=test_callback_fn,
-                callback_steps=1,
-            )
+        pipe(
+            prompt=prompt,
+            num_inference_steps=50,
+            guidance_scale=7.5,
+            generator=generator,
+            callback=test_callback_fn,
+            callback_steps=1,
+        )
         assert test_callback_fn.has_been_called
         assert number_of_steps == 51
 
@@ -1491,7 +1495,13 @@ def test_callback_fn(step: int, timestep: int, latents: torch.FloatTensor) -> No
                 latents = latents.detach().cpu().numpy()
                 assert latents.shape == (1, 4, 64, 96)
                 latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([0.9052, -0.0184, 0.4810, 0.2898, 0.5851, 1.4920, 0.5362, 1.9838, 0.0530])
+                expected_slice = np.array([0.9052, -0.0187, 0.4808, 0.2900, 0.5852, 1.4922, 0.5364, 1.9840, 0.0534])
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
+            if step == 37:
+                latents = latents.detach().cpu().numpy()
+                assert latents.shape == (1, 4, 64, 96)
+                latents_slice = latents[0, -3:, -3:, -1]
+                expected_slice = np.array([0.7063, 0.7833, 0.8345, 1.8114, 1.7867, 1.9398, 1.3653, 1.6623, 1.2869])
                 assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
 
         test_callback_fn.has_been_called = False
@@ -1502,27 +1512,23 @@ def test_callback_fn(step: int, timestep: int, latents: torch.FloatTensor) -> No
         )
         init_image = init_image.resize((768, 512))
 
-        pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
-            "CompVis/stable-diffusion-v1-4", use_auth_token=True, revision="fp16", torch_dtype=torch.float16
-        )
+        pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=True)
         pipe.to(torch_device)
         pipe.set_progress_bar_config(disable=None)
-        pipe.enable_attention_slicing()
 
         prompt = "A fantasy landscape, trending on artstation"
 
         generator = torch.Generator(device=torch_device).manual_seed(0)
-        with torch.autocast(torch_device):
-            pipe(
-                prompt=prompt,
-                init_image=init_image,
-                strength=0.75,
-                num_inference_steps=50,
-                guidance_scale=7.5,
-                generator=generator,
-                callback=test_callback_fn,
-                callback_steps=1,
-            )
+        pipe(
+            prompt=prompt,
+            init_image=init_image,
+            strength=0.75,
+            num_inference_steps=50,
+            guidance_scale=7.5,
+            generator=generator,
+            callback=test_callback_fn,
+            callback_steps=1,
+        )
         assert test_callback_fn.has_been_called
         assert number_of_steps == 38
 
@@ -1540,9 +1546,15 @@ def test_callback_fn(step: int, timestep: int, latents: torch.FloatTensor) -> No
                 assert latents.shape == (1, 4, 64, 64)
                 latents_slice = latents[0, -3:, -3:, -1]
                 expected_slice = np.array(
-                    [-0.5472, 1.1218, -0.5505, -0.9390, -1.0794, 0.4063, 0.5158, 0.6429, -1.5246]
+                    [-0.5472, 1.1218, -0.5504, -0.9391, -1.0795, 0.4064, 0.5158, 0.6427, -1.5245]
                 )
                 assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
+            if step == 37:
+                latents = latents.detach().cpu().numpy()
+                assert latents.shape == (1, 4, 64, 64)
+                latents_slice = latents[0, -3:, -3:, -1]
+                expected_slice = np.array([0.4783, 1.1574, 0.6261, 0.2289, 0.2550, -0.1438, 0.7085, -0.1604, -0.5655])
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
 
         test_callback_fn.has_been_called = False
 
@@ -1555,28 +1567,24 @@ def test_callback_fn(step: int, timestep: int, latents: torch.FloatTensor) -> No
             "/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
         )
 
-        pipe = StableDiffusionInpaintPipeline.from_pretrained(
-            "CompVis/stable-diffusion-v1-4", use_auth_token=True, revision="fp16", torch_dtype=torch.float16
-        )
+        pipe = StableDiffusionInpaintPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=True)
         pipe.to(torch_device)
         pipe.set_progress_bar_config(disable=None)
-        pipe.enable_attention_slicing()
 
         prompt = "A red cat sitting on a park bench"
 
         generator = torch.Generator(device=torch_device).manual_seed(0)
-        with torch.autocast(torch_device):
-            pipe(
-                prompt=prompt,
-                init_image=init_image,
-                mask_image=mask_image,
-                strength=0.75,
-                num_inference_steps=50,
-                guidance_scale=7.5,
-                generator=generator,
-                callback=test_callback_fn,
-                callback_steps=1,
-            )
+        pipe(
+            prompt=prompt,
+            init_image=init_image,
+            mask_image=mask_image,
+            strength=0.75,
+            num_inference_steps=50,
+            guidance_scale=7.5,
+            generator=generator,
+            callback=test_callback_fn,
+            callback_steps=1,
+        )
         assert test_callback_fn.has_been_called
         assert number_of_steps == 38
 
@@ -1592,14 +1600,14 @@ def test_callback_fn(step: int, timestep: int, latents: np.ndarray) -> None:
                 assert latents.shape == (1, 4, 64, 64)
                 latents_slice = latents[0, -3:, -3:, -1]
                 expected_slice = np.array(
-                    [-0.6254, -0.2742, -1.0710, 0.2296, -1.1683, 0.6913, -2.0605, -0.0682, 0.9700]
+                    [-0.6255, -0.2742, -1.071, 0.2296, -1.1683, 0.6913, -2.0606, -0.0683, 0.9700]
                 )
                 assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
 
         test_callback_fn.has_been_called = False
 
         pipe = StableDiffusionOnnxPipeline.from_pretrained(
-            "CompVis/stable-diffusion-v1-4", use_auth_token=True, revision="onnx", provider="CPUExecutionProvider"
+            "CompVis/stable-diffusion-v1-4", use_auth_token=True, revision="onnx", provider="CUDAExecutionProvider"
         )
         pipe.set_progress_bar_config(disable=None)