Classifier free guidance unconditioned value (#8562)

virginiafdez · Virginia Fernandez · web-flow · commit b5bc69d0306c · 2025-09-09T11:07:42.000+08:00
Fixes #8560. ### Description This PR adds an argument cfg_fill_value so that users can control the value that replaces the conditioning tensor during classifier-free guidance inference. Previously, this was set to -1, which might not be ideal for certain application where the conditioning could have -1 as a normal value. ### Types of changes  - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [x] New tests added to cover the changes **modified existing ones - [x] Integration tests passed locally by running `./runtests.sh -f -u --net --coverage`. - [x] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. - [x] In-line docstrings updated. --------- Co-authored-by: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
diff --git a/monai/inferers/inferer.py b/monai/inferers/inferer.py
@@ -916,6 +916,7 @@ def sample(
         verbose: bool = True,
         seg: torch.Tensor | None = None,
         cfg: float | None = None,
+        cfg_fill_value: float = -1.0,
     ) -> torch.Tensor | tuple[torch.Tensor, list[torch.Tensor]]:
         """
         Args:
@@ -929,6 +930,7 @@ def sample(
             verbose: if true, prints the progression bar of the sampling process.
             seg: if diffusion model is instance of SPADEDiffusionModel, segmentation must be provided.
             cfg: classifier-free-guidance scale, which indicates the level of strengthening on the conditioning.
+            cfg_fill_value: the fill value to use for the unconditioned input when using classifier-free guidance.
         """
         if mode not in ["crossattn", "concat"]:
             raise NotImplementedError(f"{mode} condition is not supported")
@@ -961,7 +963,7 @@ def sample(
                 model_input = torch.cat([image] * 2, dim=0)
                 if conditioning is not None:
                     uncondition = torch.ones_like(conditioning)
-                    uncondition.fill_(-1)
+                    uncondition.fill_(cfg_fill_value)
                     conditioning_input = torch.cat([uncondition, conditioning], dim=0)
                 else:
                     conditioning_input = None
@@ -1261,6 +1263,7 @@ def sample(  # type: ignore[override]
         verbose: bool = True,
         seg: torch.Tensor | None = None,
         cfg: float | None = None,
+        cfg_fill_value: float = -1.0,
     ) -> torch.Tensor | tuple[torch.Tensor, list[torch.Tensor]]:
         """
         Args:
@@ -1276,6 +1279,7 @@ def sample(  # type: ignore[override]
             seg: if diffusion model is instance of SPADEDiffusionModel, or autoencoder_model
              is instance of SPADEAutoencoderKL, segmentation must be provided.
             cfg: classifier-free-guidance scale, which indicates the level of strengthening on the conditioning.
+            cfg_fill_value: the fill value to use for the unconditioned input when using classifier-free guidance.
         """
 
         if (
@@ -1300,6 +1304,7 @@ def sample(  # type: ignore[override]
             verbose=verbose,
             seg=seg,
             cfg=cfg,
+            cfg_fill_value=cfg_fill_value,
         )
 
         if save_intermediates:
@@ -1479,6 +1484,7 @@ def sample(  # type: ignore[override]
         verbose: bool = True,
         seg: torch.Tensor | None = None,
         cfg: float | None = None,
+        cfg_fill_value: float = -1.0,
     ) -> torch.Tensor | tuple[torch.Tensor, list[torch.Tensor]]:
         """
         Args:
@@ -1493,7 +1499,8 @@ def sample(  # type: ignore[override]
             mode: Conditioning mode for the network.
             verbose: if true, prints the progression bar of the sampling process.
             seg: if diffusion model is instance of SPADEDiffusionModel, segmentation must be provided.
-                        cfg: classifier-free-guidance scale, which indicates the level of strengthening on the conditioning.
+            cfg: classifier-free-guidance scale, which indicates the level of strengthening on the conditioning.
+            cfg_fill_value: the fill value to use for the unconditioned input when using classifier-free guidance.
         """
         if mode not in ["crossattn", "concat"]:
             raise NotImplementedError(f"{mode} condition is not supported")
@@ -1521,7 +1528,7 @@ def sample(  # type: ignore[override]
                 model_input = torch.cat([image] * 2, dim=0)
                 if conditioning is not None:
                     uncondition = torch.ones_like(conditioning)
-                    uncondition.fill_(-1)
+                    uncondition.fill_(cfg_fill_value)
                     conditioning_input = torch.cat([uncondition, conditioning], dim=0)
                 else:
                     conditioning_input = None
@@ -1839,6 +1846,7 @@ def sample(  # type: ignore[override]
         verbose: bool = True,
         seg: torch.Tensor | None = None,
         cfg: float | None = None,
+        cfg_fill_value: float = -1.0,
     ) -> torch.Tensor | tuple[torch.Tensor, list[torch.Tensor]]:
         """
         Args:
@@ -1856,6 +1864,7 @@ def sample(  # type: ignore[override]
             seg: if diffusion model is instance of SPADEDiffusionModel, or autoencoder_model
              is instance of SPADEAutoencoderKL, segmentation must be provided.
             cfg: classifier-free-guidance scale, which indicates the level of strengthening on the conditioning.
+            cfg_fill_value: the fill value to use for the unconditioned input when using classifier-free guidance.
         """
 
         if (
@@ -1884,6 +1893,7 @@ def sample(  # type: ignore[override]
             verbose=verbose,
             seg=seg,
             cfg=cfg,
+            cfg_fill_value=cfg_fill_value,
         )
 
         if save_intermediates:
diff --git a/tests/inferers/test_diffusion_inferer.py b/tests/inferers/test_diffusion_inferer.py
@@ -106,6 +106,7 @@ def test_sample_cfg(self, model_params, input_shape):
             save_intermediates=True,
             intermediate_steps=1,
             cfg=5,
+            cfg_fill_value=-1,
         )
         self.assertEqual(sample.shape, noise.shape)
 
diff --git a/tests/inferers/test_latent_diffusion_inferer.py b/tests/inferers/test_latent_diffusion_inferer.py
@@ -456,6 +456,7 @@ def test_sample_shape_with_cfg(
                     scheduler=scheduler,
                     seg=input_seg,
                     cfg=5,
+                    cfg_fill_value=-1,
                 )
             else:
                 sample = inferer.sample(

Original file line number	Diff line number	Diff line change
`@@ -106,6 +106,7 @@ def test_sample_cfg(self, model_params, input_shape):`
`106`	`106`	`save_intermediates=True,`
`107`	`107`	`intermediate_steps=1,`
`108`	`108`	`cfg=5,`
	`109`	`+ cfg_fill_value=-1,`
`109`	`110`	`)`
`110`	`111`	`self.assertEqual(sample.shape, noise.shape)`
`111`	`112`
Original file line number	Diff line number	Diff line change
`@@ -456,6 +456,7 @@ def test_sample_shape_with_cfg(`
`456`	`456`	`scheduler=scheduler,`
`457`	`457`	`seg=input_seg,`
`458`	`458`	`cfg=5,`
	`459`	`+ cfg_fill_value=-1,`
`459`	`460`	`)`
`460`	`461`	`else:`
`461`	`462`	`sample = inferer.sample(`