From c5855d684ace2667314c5902f990d42265995908 Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Thu, 7 Apr 2022 09:17:30 +0000
Subject: [PATCH 1/7] [proto] Added crop_bounding_box op

---
 test/test_prototype_transforms_functional.py  | 58 +++++++++++++++++++
 .../transforms/functional/__init__.py         |  3 +-
 .../transforms/functional/_geometry.py        | 26 +++++++++
 3 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
index 2c8540f093c..a9e250a22b9 100644
--- a/test/test_prototype_transforms_functional.py
+++ b/test/test_prototype_transforms_functional.py
@@ -321,6 +321,20 @@ def rotate_segmentation_mask():
         )
 
 
+@register_kernel_info_from_sample_inputs_fn
+def crop_bounding_box():
+    for bounding_box, top, left in itertools.product(make_bounding_boxes(), [-8, 0, 9], [-8, 0, 9]):
+        yield SampleInput(
+            bounding_box,
+            format=bounding_box.format,
+            image_size=bounding_box.image_size,
+            top=top,
+            left=left,
+            height=top + 10,  # this argument is unused
+            width=left + 10,  # this argument is unused
+        )
+
+
 @pytest.mark.parametrize(
     "kernel",
     [
@@ -808,3 +822,47 @@ def test_correctness_rotate_segmentation_mask_on_fixed_input(device):
     expected_mask = torch.rot90(mask, k=1, dims=(-2, -1))
     out_mask = F.rotate_segmentation_mask(mask, 90, expand=False)
     torch.testing.assert_close(out_mask, expected_mask)
+
+
+@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize(
+    "top, left, height, width, expected_bboxes",
+    [
+        [8, 12, 30, 40, [(-2.0, 7.0, 13.0, 27.0), (38.0, -3.0, 58.0, 14.0), (33.0, 38.0, 44.0, 54.0)]],
+        [-8, 12, 70, 40, [(-2.0, 23.0, 13.0, 43.0), (38.0, 13.0, 58.0, 30.0), (33.0, 54.0, 44.0, 70.0)]],
+    ],
+)
+def test_correctness_crop_bounding_box(device, top, left, height, width, expected_bboxes):
+
+    # Expected bboxes computed using Albumentations:
+    # import numpy as np
+    # from albumentations.augmentations.crops.functional import crop_bbox_by_coords, normalize_bbox, denormalize_bbox
+    # expected_bboxes = []
+    # for in_box in in_boxes:
+    #     n_in_box = normalize_bbox(in_box, *size)
+    #     n_out_box = crop_bbox_by_coords(
+    #         n_in_box, (left, top, left + width, top + height), height, width, *size
+    #     )
+    #     out_box = denormalize_bbox(n_out_box, height, width)
+    #     expected_bboxes.append(out_box)
+
+    size = (64, 76)
+    # xyxy format
+    in_boxes = [
+        [10.0, 15.0, 25.0, 35.0],
+        [50.0, 5.0, 70.0, 22.0],
+        [45.0, 46.0, 56.0, 62.0],
+    ]
+    in_boxes = features.BoundingBox(in_boxes, format=features.BoundingBoxFormat.XYXY, image_size=size, device=device)
+
+    output_boxes = F.crop_bounding_box(
+        in_boxes,
+        in_boxes.format,
+        in_boxes.image_size,
+        top,
+        left,
+        height,
+        width,
+    )
+
+    torch.testing.assert_close(output_boxes.tolist(), expected_bboxes)
diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py
index 64d47958b96..decf9e21020 100644
--- a/torchvision/prototype/transforms/functional/__init__.py
+++ b/torchvision/prototype/transforms/functional/__init__.py
@@ -57,9 +57,10 @@
     rotate_image_tensor,
     rotate_image_pil,
     rotate_segmentation_mask,
+    pad_bounding_box,
     pad_image_tensor,
     pad_image_pil,
-    pad_bounding_box,
+    crop_bounding_box,
     crop_image_tensor,
     crop_image_pil,
     perspective_image_tensor,
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index 7629766c0e2..755e51ce81c 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -419,6 +419,32 @@ def pad_bounding_box(
 crop_image_pil = _FP.crop
 
 
+def crop_bounding_box(
+    bounding_box: torch.Tensor,
+    format: features.BoundingBoxFormat,
+    image_size: Tuple[int, int],
+    top: int,
+    left: int,
+    height: int,
+    width: int,
+) -> torch.Tensor:
+    pass
+
+    shape = bounding_box.shape
+
+    bounding_box = convert_bounding_box_format(
+        bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY
+    ).view(-1, 4)
+
+    # Crop and optionally pad:
+    bounding_box[:, 0::2] -= left
+    bounding_box[:, 1::2] -= top
+
+    return convert_bounding_box_format(
+        bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False
+    ).view(shape)
+
+
 def perspective_image_tensor(
     img: torch.Tensor,
     perspective_coeffs: List[float],

From 6ccd26e2a404febc52787a3e132a4c028321e147 Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Thu, 21 Apr 2022 11:04:30 +0000
Subject: [PATCH 2/7] Added `crop_segmentation_mask` op

---
 test/test_prototype_transforms_functional.py  | 55 +++++++++++++++++++
 .../transforms/functional/__init__.py         |  1 +
 .../transforms/functional/_geometry.py        |  4 ++
 3 files changed, 60 insertions(+)

diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
index 91623854330..de49d8a8bef 100644
--- a/test/test_prototype_transforms_functional.py
+++ b/test/test_prototype_transforms_functional.py
@@ -332,6 +332,20 @@ def crop_bounding_box():
         )
 
 
+@register_kernel_info_from_sample_inputs_fn
+def crop_segmentation_mask():
+    for mask, top, left, height, width in itertools.product(
+        make_segmentation_masks(), [-8, 0, 9], [-8, 0, 9], [12, 20], [12, 20]
+    ):
+        yield SampleInput(
+            mask,
+            top=top,
+            left=left,
+            height=height,
+            width=width,
+        )
+
+
 @pytest.mark.parametrize(
     "kernel",
     [
@@ -860,3 +874,44 @@ def test_correctness_crop_bounding_box(device, top, left, height, width, expecte
     )
 
     torch.testing.assert_close(output_boxes.tolist(), expected_bboxes)
+
+
+@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize(
+    "top, left, height, width",
+    [
+        [4, 6, 30, 40],
+        [-8, 6, 70, 40],
+        [-8, -6, 70, 8],
+    ],
+)
+def test_correctness_crop_segmentation_mask(device, top, left, height, width):
+    def _compute_expected_mask(mask, top_, left_, height_, width_):
+        h, w = mask.shape[-2], mask.shape[-1]
+        if top_ >= 0 and left_ >= 0 and top_ + height_ < h and left_ + width_ < w:
+            expected = mask[..., top_ : top_ + height_, left_ : left_ + width_]
+        else:
+            # Create output mask
+            expected_shape = mask.shape[:-2] + (height_, width_)
+            expected = torch.zeros(expected_shape, device=mask.device, dtype=mask.dtype)
+
+            out_y1 = abs(top_) if top_ < 0 else 0
+            out_y2 = h - top_ if top_ + height_ >= h else height_
+            out_x1 = abs(left_) if left_ < 0 else 0
+            out_x2 = w - left_ if left_ + width_ >= w else width_
+
+            in_y1 = 0 if top_ < 0 else top_
+            in_y2 = h if top_ + height_ >= h else top_ + height_
+            in_x1 = 0 if left_ < 0 else left_
+            in_x2 = w if left_ + width_ >= w else left_ + width_
+            # Paste input mask into output
+            expected[..., out_y1:out_y2, out_x1:out_x2] = mask[..., in_y1:in_y2, in_x1:in_x2]
+
+        return expected
+
+    for mask in make_segmentation_masks():
+        if mask.device != torch.device(device):
+            mask = mask.to(device)
+        output_mask = F.crop_segmentation_mask(mask, top, left, height, width)
+        expected_mask = _compute_expected_mask(mask, top, left, height, width)
+        torch.testing.assert_close(output_mask, expected_mask)
diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py
index decf9e21020..bbfa9584d88 100644
--- a/torchvision/prototype/transforms/functional/__init__.py
+++ b/torchvision/prototype/transforms/functional/__init__.py
@@ -63,6 +63,7 @@
     crop_bounding_box,
     crop_image_tensor,
     crop_image_pil,
+    crop_segmentation_mask,
     perspective_image_tensor,
     perspective_image_pil,
     vertical_flip_image_tensor,
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index 71be0a22c00..3eabc1725b7 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -440,6 +440,10 @@ def crop_bounding_box(
     ).view(shape)
 
 
+def crop_segmentation_mask(img: torch.Tensor, top: int, left: int, height: int, width: int):
+    return crop_image_tensor(img, top, left, height, width)
+
+
 def perspective_image_tensor(
     img: torch.Tensor,
     perspective_coeffs: List[float],

From 6f12767a248cedd39c5989264a8bd53698c87e7f Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Thu, 21 Apr 2022 12:59:28 +0000
Subject: [PATCH 3/7] Fixed failed mypy

---
 torchvision/prototype/transforms/functional/_geometry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index 3eabc1725b7..d4f1fadb0bf 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -440,7 +440,7 @@ def crop_bounding_box(
     ).view(shape)
 
 
-def crop_segmentation_mask(img: torch.Tensor, top: int, left: int, height: int, width: int):
+def crop_segmentation_mask(img: torch.Tensor, top: int, left: int, height: int, width: int) -> torch.Tensor:
     return crop_image_tensor(img, top, left, height, width)
 
 

From 6306c068bf8bf3bc0ce8627bc5d1b9349204f57d Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Thu, 21 Apr 2022 15:24:38 +0000
Subject: [PATCH 4/7] Added tests for resized_crop_bounding_box

---
 test/test_prototype_transforms_functional.py | 75 +++++++++++++++++++-
 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
index 91623854330..9adf623ebc7 100644
--- a/test/test_prototype_transforms_functional.py
+++ b/test/test_prototype_transforms_functional.py
@@ -332,6 +332,22 @@ def crop_bounding_box():
         )
 
 
+@register_kernel_info_from_sample_inputs_fn
+def resized_crop_bounding_box():
+    for bounding_box, top, left, height, width, size in itertools.product(
+        make_bounding_boxes(), [-8, 9], [-8, 9], [32, 22], [34, 20], [(32, 32), (16, 18)]
+    ):
+        yield SampleInput(
+            bounding_box,
+            format=bounding_box.format,
+            top=top,
+            left=left,
+            height=height,
+            width=width,
+            size=size
+        )
+
+
 @pytest.mark.parametrize(
     "kernel",
     [
@@ -822,6 +838,10 @@ def test_correctness_rotate_segmentation_mask_on_fixed_input(device):
 
 
 @pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize(
+    "format",
+    [features.BoundingBoxFormat.XYXY, features.BoundingBoxFormat.XYWH, features.BoundingBoxFormat.CXCYWH],
+)
 @pytest.mark.parametrize(
     "top, left, height, width, expected_bboxes",
     [
@@ -829,7 +849,7 @@ def test_correctness_rotate_segmentation_mask_on_fixed_input(device):
         [-8, 12, 70, 40, [(-2.0, 23.0, 13.0, 43.0), (38.0, 13.0, 58.0, 30.0), (33.0, 54.0, 44.0, 70.0)]],
     ],
 )
-def test_correctness_crop_bounding_box(device, top, left, height, width, expected_bboxes):
+def test_correctness_crop_bounding_box(device, format, top, left, height, width, expected_bboxes):
 
     # Expected bboxes computed using Albumentations:
     # import numpy as np
@@ -851,12 +871,63 @@ def test_correctness_crop_bounding_box(device, top, left, height, width, expecte
         [45.0, 46.0, 56.0, 62.0],
     ]
     in_boxes = features.BoundingBox(in_boxes, format=features.BoundingBoxFormat.XYXY, image_size=size, device=device)
+    if format != features.BoundingBoxFormat.XYXY:
+        in_boxes = convert_bounding_box_format(in_boxes, features.BoundingBoxFormat.XYXY, format)
 
     output_boxes = F.crop_bounding_box(
         in_boxes,
-        in_boxes.format,
+        format,
         top,
         left,
     )
 
+    if format != features.BoundingBoxFormat.XYXY:
+        output_boxes = convert_bounding_box_format(output_boxes, format, features.BoundingBoxFormat.XYXY)
+
     torch.testing.assert_close(output_boxes.tolist(), expected_bboxes)
+
+
+@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize(
+    "format",
+    [features.BoundingBoxFormat.XYXY, features.BoundingBoxFormat.XYWH, features.BoundingBoxFormat.CXCYWH],
+)
+@pytest.mark.parametrize(
+    "top, left, height, width, size",
+    [
+        [0, 0, 30, 30, (60, 60)],
+        [-5, 5, 35, 45, (32, 34)],
+    ],
+)
+def test_correctness_resized_crop_bounding_box(device, format, top, left, height, width, size):
+    def _compute_expected(bbox, top_, left_, height_, width_, size_):
+        # bbox should be xyxy
+        bbox[0] = (bbox[0] - left_) * size_[1] / width_
+        bbox[1] = (bbox[1] - top_) * size_[0] / height_
+        bbox[2] = (bbox[2] - left_) * size_[1] / width_
+        bbox[3] = (bbox[3] - top_) * size_[0] / height_
+        return bbox
+
+    image_size = (100, 100)
+    # xyxy format
+    in_boxes = [
+        [10.0, 10.0, 20.0, 20.0],
+        [5.0, 10.0, 15.0, 20.0],
+    ]
+    expected_bboxes = []
+    for in_box in in_boxes:
+        expected_bboxes.append(_compute_expected(list(in_box), top, left, height, width, size))
+    expected_bboxes = torch.tensor(expected_bboxes, device=device)
+
+    in_boxes = features.BoundingBox(
+        in_boxes, format=features.BoundingBoxFormat.XYXY, image_size=image_size, device=device
+    )
+    if format != features.BoundingBoxFormat.XYXY:
+        in_boxes = convert_bounding_box_format(in_boxes, features.BoundingBoxFormat.XYXY, format)
+
+    output_boxes = F.resized_crop_bounding_box(in_boxes, format, top, left, height, width, size)
+
+    if format != features.BoundingBoxFormat.XYXY:
+        output_boxes = convert_bounding_box_format(output_boxes, format, features.BoundingBoxFormat.XYXY)
+
+    torch.testing.assert_close(output_boxes, expected_bboxes)

From d712f1d1b3bea8500a42090c8b50964bfb41da65 Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Thu, 21 Apr 2022 15:36:18 +0000
Subject: [PATCH 5/7] Fixed code formatting

---
 test/test_prototype_transforms_functional.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
index 9adf623ebc7..78e419ca2c1 100644
--- a/test/test_prototype_transforms_functional.py
+++ b/test/test_prototype_transforms_functional.py
@@ -338,13 +338,7 @@ def resized_crop_bounding_box():
         make_bounding_boxes(), [-8, 9], [-8, 9], [32, 22], [34, 20], [(32, 32), (16, 18)]
     ):
         yield SampleInput(
-            bounding_box,
-            format=bounding_box.format,
-            top=top,
-            left=left,
-            height=height,
-            width=width,
-            size=size
+            bounding_box, format=bounding_box.format, top=top, left=left, height=height, width=width, size=size
         )
 
 

From 86727f5a34e7ff9303d0b3a3ee35e4b8871a1aa9 Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Thu, 21 Apr 2022 16:00:01 +0000
Subject: [PATCH 6/7] Added resized_crop_segmentation_mask op

---
 .../prototype/transforms/functional/__init__.py      |  3 ++-
 .../prototype/transforms/functional/_geometry.py     | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py
index 7069f17c414..dfbc81baea3 100644
--- a/torchvision/prototype/transforms/functional/__init__.py
+++ b/torchvision/prototype/transforms/functional/__init__.py
@@ -47,9 +47,10 @@
     resize_segmentation_mask,
     center_crop_image_tensor,
     center_crop_image_pil,
+    resized_crop_bounding_box,
     resized_crop_image_tensor,
     resized_crop_image_pil,
-    resized_crop_bounding_box,
+    resized_crop_segmentation_mask,
     affine_bounding_box,
     affine_image_tensor,
     affine_image_pil,
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index a9a427ad029..191d3510525 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -559,6 +559,18 @@ def resized_crop_bounding_box(
     return resize_bounding_box(bounding_box, size, (height, width))
 
 
+def resized_crop_segmentation_mask(
+    mask: torch.Tensor,
+    top: int,
+    left: int,
+    height: int,
+    width: int,
+    size: List[int],
+) -> torch.Tensor:
+    mask = crop_segmentation_mask(mask, top, left, height, width)
+    return resize_segmentation_mask(mask, size)
+
+
 def _parse_five_crop_size(size: List[int]) -> List[int]:
     if isinstance(size, numbers.Number):
         size = [int(size), int(size)]

From fcb8492682606f5d307a8d58719eb99cfe323c0c Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Mon, 25 Apr 2022 09:58:06 +0000
Subject: [PATCH 7/7] Added tests

---
 test/test_prototype_transforms_functional.py  | 33 +++++++++++++++++++
 .../transforms/functional/_geometry.py        |  4 ---
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
index 2da3aa4696a..36d1677ede5 100644
--- a/test/test_prototype_transforms_functional.py
+++ b/test/test_prototype_transforms_functional.py
@@ -362,6 +362,14 @@ def resized_crop_bounding_box():
         )
 
 
+@register_kernel_info_from_sample_inputs_fn
+def resized_crop_segmentation_mask():
+    for mask, top, left, height, width, size in itertools.product(
+        make_segmentation_masks(), [-8, 0, 9], [-8, 0, 9], [12, 20], [12, 20], [(32, 32), (16, 18)]
+    ):
+        yield SampleInput(mask, top=top, left=left, height=height, width=width, size=size)
+
+
 @pytest.mark.parametrize(
     "kernel",
     [
@@ -998,3 +1006,28 @@ def _compute_expected(bbox, top_, left_, height_, width_, size_):
         output_boxes = convert_bounding_box_format(output_boxes, format, features.BoundingBoxFormat.XYXY)
 
     torch.testing.assert_close(output_boxes, expected_bboxes)
+
+
+@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize(
+    "top, left, height, width, size",
+    [
+        [0, 0, 30, 30, (60, 60)],
+        [5, 5, 35, 45, (32, 34)],
+    ],
+)
+def test_correctness_resized_crop_segmentation_mask(device, top, left, height, width, size):
+    def _compute_expected(mask, top_, left_, height_, width_, size_):
+        output = mask.clone()
+        output = output[:, top_ : top_ + height_, left_ : left_ + width_]
+        output = torch.nn.functional.interpolate(output[None, :].float(), size=size_, mode="nearest")
+        output = output[0, :].long()
+        return output
+
+    in_mask = torch.zeros(1, 100, 100, dtype=torch.long, device=device)
+    in_mask[0, 10:20, 10:20] = 1
+    in_mask[0, 5:15, 12:23] = 2
+
+    expected_mask = _compute_expected(in_mask, top, left, height, width, size)
+    output_mask = F.resized_crop_segmentation_mask(in_mask, top, left, height, width, size)
+    torch.testing.assert_close(output_mask, expected_mask)
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index 801e4979181..5f9e77fdbf4 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -440,10 +440,6 @@ def crop_segmentation_mask(img: torch.Tensor, top: int, left: int, height: int,
     return crop_image_tensor(img, top, left, height, width)
 
 
-def crop_segmentation_mask(img: torch.Tensor, top: int, left: int, height: int, width: int) -> torch.Tensor:
-    return crop_image_tensor(img, top, left, height, width)
-
-
 def perspective_image_tensor(
     img: torch.Tensor,
     perspective_coeffs: List[float],