Added non-scalar fill support workaround for pad

vfdev-5 · vfdev-5 · commit 3e97c1c5861c · 2022-06-30T11:29:42.000Z
diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
@@ -426,6 +426,17 @@ def resized_crop_segmentation_mask():
         yield SampleInput(mask, top=top, left=left, height=height, width=width, size=size)
 
 
+@register_kernel_info_from_sample_inputs_fn
+def pad_image_tensor():
+    for image, padding, fill, padding_mode in itertools.product(
+        make_images(),
+        [[1], [1, 1], [1, 1, 2, 2]],  # padding
+        [12],  # fill
+        ["constant", "symmetric", "edge", "reflect"],  # padding mode,
+    ):
+        yield SampleInput(image, padding=padding, fill=fill, padding_mode=padding_mode)
+
+
 @register_kernel_info_from_sample_inputs_fn
 def pad_segmentation_mask():
     for mask, padding, padding_mode in itertools.product(
diff --git a/torchvision/prototype/features/_feature.py b/torchvision/prototype/features/_feature.py
@@ -136,7 +136,7 @@ def resized_crop(
         # How dangerous to do this instead of raising an error ?
         return self
 
-    def pad(self, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Any:
+    def pad(self, padding: List[int], fill: Union[float, Sequence[float]] = 0, padding_mode: str = "constant") -> Any:
         # Just output itself
         # How dangerous to do this instead of raising an error ?
         return self
diff --git a/torchvision/prototype/features/_image.py b/torchvision/prototype/features/_image.py
@@ -163,10 +163,17 @@ def resized_crop(
         )
         return Image.new_like(self, output)
 
-    def pad(self, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Image:
+    def pad(self, padding: List[int], fill: Union[float, List[float]] = 0.0, padding_mode: str = "constant") -> Image:
         from torchvision.prototype.transforms import functional as _F
 
-        output = _F.pad_image_tensor(self, padding, fill=fill, padding_mode=padding_mode)
+        # PyTorch's pad supports only scalars on fill. So we need to overwrite the colour
+        if isinstance(fill, (int, float)):
+            output = _F.pad_image_tensor(self, padding, fill=fill, padding_mode=padding_mode)
+        else:
+            from torchvision.prototype.transforms.functional._geometry import _pad_with_vector_fill
+
+            output = _pad_with_vector_fill(self, padding, fill=fill, padding_mode=padding_mode)
+
         return Image.new_like(self, output)
 
     def rotate(
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
@@ -503,10 +503,45 @@ def rotate(
         return inpt
 
 
-pad_image_tensor = _FT.pad
 pad_image_pil = _FP.pad
 
 
+def pad_image_tensor(
+    img: torch.Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant"
+) -> torch.Tensor:
+    num_masks, height, width = img.shape[-3:]
+    extra_dims = img.shape[:-3]
+
+    padded_image = _FT.pad(
+        img=img.view(-1, num_masks, height, width), padding=padding, fill=fill, padding_mode=padding_mode
+    )
+
+    new_height, new_width = padded_image.shape[-2:]
+    return padded_image.view(extra_dims + (num_masks, new_height, new_width))
+
+
+# TODO: This should be removed once pytorch pad supports non-scalar padding values
+def _pad_with_vector_fill(
+    img: torch.Tensor, padding: List[int], fill: Union[float, List[float]] = 0.0, padding_mode: str = "constant"
+):
+    if padding_mode != "constant":
+        raise ValueError(f"Padding mode '{padding_mode}' is not supported if fill is not scalar")
+
+    output = pad_image_tensor(img, padding, fill=0, padding_mode="constant")
+    left, top, right, bottom = padding
+    fill = torch.tensor(fill, dtype=img.dtype, device=img.device).view(-1, 1, 1)
+
+    if top > 0:
+        output[..., :top, :] = fill
+    if left > 0:
+        output[..., :, :left] = fill
+    if bottom > 0:
+        output[..., -bottom:, :] = fill
+    if right > 0:
+        output[..., :, -right:] = fill
+    return output
+
+
 def pad_segmentation_mask(
     segmentation_mask: torch.Tensor, padding: List[int], padding_mode: str = "constant"
 ) -> torch.Tensor:
@@ -537,13 +572,19 @@ def pad_bounding_box(
     return bounding_box
 
 
-def pad(inpt: Any, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Any:
+def pad(
+    inpt: Any, padding: List[int], fill: Union[float, Sequence[float]] = 0.0, padding_mode: str = "constant"
+) -> Any:
     if isinstance(inpt, features._Feature):
         return inpt.pad(padding, fill=fill, padding_mode=padding_mode)
     elif isinstance(inpt, PIL.Image.Image):
         return pad_image_pil(inpt, padding, fill=fill, padding_mode=padding_mode)
     elif isinstance(inpt, torch.Tensor):
-        return pad_image_tensor(inpt, padding, fill=fill, padding_mode=padding_mode)
+        # PyTorch's pad supports only scalars on fill. So we need to overwrite the colour
+        if isinstance(fill, (int, float)):
+            return pad_image_tensor(inpt, padding, fill=fill, padding_mode=padding_mode)
+        else:
+            return _pad_with_vector_fill(inpt, padding, fill=fill, padding_mode=padding_mode)
     else:
         return inpt