@@ -1387,3 +1387,165 @@ def test_stable_diffusion_onnx(self):
13871387 assert image .shape == (1 , 512 , 512 , 3 )
13881388 expected_slice = np .array ([0.0385 , 0.0252 , 0.0234 , 0.0287 , 0.0358 , 0.0287 , 0.0276 , 0.0235 , 0.0010 ])
13891389 assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-3
1390+
1391+ @slow
1392+ @unittest .skipIf (torch_device == "cpu" , "Stable diffusion is supposed to run on GPU" )
1393+ def test_stable_diffusion_text2img_intermediate_state (self ):
1394+ number_of_steps = 0
1395+
1396+ def test_callback_fn (step : int , timestep : int , latents : torch .FloatTensor ) -> None :
1397+ test_callback_fn .has_been_called = True
1398+ nonlocal number_of_steps
1399+ number_of_steps += 1
1400+ if step == 0 :
1401+ latents = np .array (latents )
1402+ assert latents .shape == (1 , 4 , 64 , 64 )
1403+ latents_slice = latents [0 , - 3 :, - 3 :, - 1 ]
1404+ expected_slice = np .array (
1405+ [- 1.2277 , - 0.3692 , - 0.2123 , - 1.3709 , - 1.4505 , - 0.6718 , - 0.3112 , - 1.2481 , - 1.0674 ]
1406+ )
1407+ assert np .abs (latents_slice .flatten () - expected_slice ).max () < 1e-3
1408+
1409+ test_callback_fn .has_been_called = False
1410+
1411+ pipe = StableDiffusionPipeline .from_pretrained ("CompVis/stable-diffusion-v1-4" , use_auth_token = True )
1412+ pipe .to (torch_device )
1413+ pipe .set_progress_bar_config (disable = None )
1414+
1415+ prompt = "Andromeda galaxy in a bottle"
1416+
1417+ generator = torch .Generator (device = torch_device ).manual_seed (0 )
1418+ pipe (
1419+ prompt = prompt ,
1420+ num_inference_steps = 50 ,
1421+ guidance_scale = 7.5 ,
1422+ generator = generator ,
1423+ callback = test_callback_fn ,
1424+ callback_steps = 1 ,
1425+ )
1426+ assert test_callback_fn .has_been_called
1427+ assert number_of_steps == 51
1428+
1429+ @slow
1430+ @unittest .skipIf (torch_device == "cpu" , "Stable diffusion is supposed to run on GPU" )
1431+ def test_stable_diffusion_img2img_intermediate_state (self ):
1432+ number_of_steps = 0
1433+
1434+ def test_callback_fn (step : int , timestep : int , latents : torch .FloatTensor ) -> None :
1435+ test_callback_fn .has_been_called = True
1436+ nonlocal number_of_steps
1437+ number_of_steps += 1
1438+ if step == 0 :
1439+ latents = np .array (latents )
1440+ assert latents .shape == (1 , 4 , 64 , 96 )
1441+ latents_slice = latents [0 , - 3 :, - 3 :, - 1 ]
1442+ expected_slice = np .array ([0.5486 , 0.8705 , 1.4053 , 1.6771 , 2.0729 , 0.7256 , 1.5693 , - 0.1298 , - 1.3520 ])
1443+ assert np .abs (latents_slice .flatten () - expected_slice ).max () < 1e-3
1444+
1445+ test_callback_fn .has_been_called = False
1446+
1447+ init_image = load_image (
1448+ "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
1449+ "/img2img/sketch-mountains-input.jpg"
1450+ )
1451+ init_image = init_image .resize ((768 , 512 ))
1452+
1453+ pipe = StableDiffusionImg2ImgPipeline .from_pretrained ("CompVis/stable-diffusion-v1-4" , use_auth_token = True )
1454+ pipe .to (torch_device )
1455+ pipe .set_progress_bar_config (disable = None )
1456+
1457+ prompt = "A fantasy landscape, trending on artstation"
1458+
1459+ generator = torch .Generator (device = torch_device ).manual_seed (0 )
1460+ pipe (
1461+ prompt = prompt ,
1462+ init_image = init_image ,
1463+ strength = 0.75 ,
1464+ num_inference_steps = 50 ,
1465+ guidance_scale = 7.5 ,
1466+ generator = generator ,
1467+ callback = test_callback_fn ,
1468+ callback_steps = 1 ,
1469+ )
1470+ assert test_callback_fn .has_been_called
1471+ assert number_of_steps == 38
1472+
1473+ @slow
1474+ @unittest .skipIf (torch_device == "cpu" , "Stable diffusion is supposed to run on GPU" )
1475+ def test_stable_diffusion_inpaint_intermediate_state (self ):
1476+ number_of_steps = 0
1477+
1478+ def test_callback_fn (step : int , timestep : int , latents : torch .FloatTensor ) -> None :
1479+ test_callback_fn .has_been_called = True
1480+ nonlocal number_of_steps
1481+ number_of_steps += 1
1482+ if step == 0 :
1483+ latents = np .array (latents )
1484+ assert latents .shape == (1 , 4 , 64 , 64 )
1485+ latents_slice = latents [0 , - 3 :, - 3 :, - 1 ]
1486+ expected_slice = np .array (
1487+ [- 0.4155 , - 0.4140 , 1.1430 , - 2.0722 , 2.2523 , - 1.8766 , - 0.4917 , 0.3338 , 0.9667 ]
1488+ )
1489+ assert np .abs (latents_slice .flatten () - expected_slice ).max () < 1e-3
1490+
1491+ test_callback_fn .has_been_called = False
1492+
1493+ init_image = load_image (
1494+ "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
1495+ "/in_paint/overture-creations-5sI6fQgYIuo.png"
1496+ )
1497+ mask_image = load_image (
1498+ "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
1499+ "/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
1500+ )
1501+
1502+ pipe = StableDiffusionInpaintPipeline .from_pretrained ("CompVis/stable-diffusion-v1-4" , use_auth_token = True )
1503+ pipe .to (torch_device )
1504+ pipe .set_progress_bar_config (disable = None )
1505+
1506+ prompt = "A red cat sitting on a park bench"
1507+
1508+ generator = torch .Generator (device = torch_device ).manual_seed (0 )
1509+ pipe (
1510+ prompt = prompt ,
1511+ init_image = init_image ,
1512+ mask_image = mask_image ,
1513+ strength = 0.75 ,
1514+ num_inference_steps = 50 ,
1515+ guidance_scale = 7.5 ,
1516+ generator = generator ,
1517+ callback = test_callback_fn ,
1518+ callback_steps = 1 ,
1519+ )
1520+ assert test_callback_fn .has_been_called
1521+ assert number_of_steps == 38
1522+
1523+ @slow
1524+ def test_stable_diffusion_onnx_intermediate_state (self ):
1525+ number_of_steps = 0
1526+
1527+ def test_callback_fn (step : int , timestep : int , latents : np .ndarray ) -> None :
1528+ test_callback_fn .has_been_called = True
1529+ nonlocal number_of_steps
1530+ number_of_steps += 1
1531+ if step == 0 :
1532+ assert latents .shape == (1 , 4 , 64 , 64 )
1533+ latents_slice = latents [0 , - 3 :, - 3 :, - 1 ]
1534+ expected_slice = np .array (
1535+ [- 0.6254 , - 0.2742 , - 1.0710 , 0.2296 , - 1.1683 , 0.6913 , - 2.0605 , - 0.0682 , 0.9700 ]
1536+ )
1537+ assert np .abs (latents_slice .flatten () - expected_slice ).max () < 1e-3
1538+
1539+ test_callback_fn .has_been_called = False
1540+
1541+ pipe = StableDiffusionOnnxPipeline .from_pretrained (
1542+ "CompVis/stable-diffusion-v1-4" , use_auth_token = True , revision = "onnx" , provider = "CUDAExecutionProvider"
1543+ )
1544+ pipe .set_progress_bar_config (disable = None )
1545+
1546+ prompt = "Andromeda galaxy in a bottle"
1547+
1548+ np .random .seed (0 )
1549+ pipe (prompt = prompt , num_inference_steps = 50 , guidance_scale = 7.5 , callback = test_callback_fn , callback_steps = 1 )
1550+ assert test_callback_fn .has_been_called
1551+ assert number_of_steps == 51
0 commit comments