From c5855d684ace2667314c5902f990d42265995908 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 7 Apr 2022 09:17:30 +0000 Subject: [PATCH 1/7] [proto] Added crop_bounding_box op --- test/test_prototype_transforms_functional.py | 58 +++++++++++++++++++ .../transforms/functional/__init__.py | 3 +- .../transforms/functional/_geometry.py | 26 +++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index 2c8540f093c..a9e250a22b9 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -321,6 +321,20 @@ def rotate_segmentation_mask(): ) +@register_kernel_info_from_sample_inputs_fn +def crop_bounding_box(): + for bounding_box, top, left in itertools.product(make_bounding_boxes(), [-8, 0, 9], [-8, 0, 9]): + yield SampleInput( + bounding_box, + format=bounding_box.format, + image_size=bounding_box.image_size, + top=top, + left=left, + height=top + 10, # this argument is unused + width=left + 10, # this argument is unused + ) + + @pytest.mark.parametrize( "kernel", [ @@ -808,3 +822,47 @@ def test_correctness_rotate_segmentation_mask_on_fixed_input(device): expected_mask = torch.rot90(mask, k=1, dims=(-2, -1)) out_mask = F.rotate_segmentation_mask(mask, 90, expand=False) torch.testing.assert_close(out_mask, expected_mask) + + +@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize( + "top, left, height, width, expected_bboxes", + [ + [8, 12, 30, 40, [(-2.0, 7.0, 13.0, 27.0), (38.0, -3.0, 58.0, 14.0), (33.0, 38.0, 44.0, 54.0)]], + [-8, 12, 70, 40, [(-2.0, 23.0, 13.0, 43.0), (38.0, 13.0, 58.0, 30.0), (33.0, 54.0, 44.0, 70.0)]], + ], +) +def test_correctness_crop_bounding_box(device, top, left, height, width, expected_bboxes): + + # Expected bboxes computed using Albumentations: + # import numpy as np + # from albumentations.augmentations.crops.functional import crop_bbox_by_coords, normalize_bbox, denormalize_bbox + # expected_bboxes = [] + # for in_box in in_boxes: + # n_in_box = normalize_bbox(in_box, *size) + # n_out_box = crop_bbox_by_coords( + # n_in_box, (left, top, left + width, top + height), height, width, *size + # ) + # out_box = denormalize_bbox(n_out_box, height, width) + # expected_bboxes.append(out_box) + + size = (64, 76) + # xyxy format + in_boxes = [ + [10.0, 15.0, 25.0, 35.0], + [50.0, 5.0, 70.0, 22.0], + [45.0, 46.0, 56.0, 62.0], + ] + in_boxes = features.BoundingBox(in_boxes, format=features.BoundingBoxFormat.XYXY, image_size=size, device=device) + + output_boxes = F.crop_bounding_box( + in_boxes, + in_boxes.format, + in_boxes.image_size, + top, + left, + height, + width, + ) + + torch.testing.assert_close(output_boxes.tolist(), expected_bboxes) diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py index 64d47958b96..decf9e21020 100644 --- a/torchvision/prototype/transforms/functional/__init__.py +++ b/torchvision/prototype/transforms/functional/__init__.py @@ -57,9 +57,10 @@ rotate_image_tensor, rotate_image_pil, rotate_segmentation_mask, + pad_bounding_box, pad_image_tensor, pad_image_pil, - pad_bounding_box, + crop_bounding_box, crop_image_tensor, crop_image_pil, perspective_image_tensor, diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 7629766c0e2..755e51ce81c 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -419,6 +419,32 @@ def pad_bounding_box( crop_image_pil = _FP.crop +def crop_bounding_box( + bounding_box: torch.Tensor, + format: features.BoundingBoxFormat, + image_size: Tuple[int, int], + top: int, + left: int, + height: int, + width: int, +) -> torch.Tensor: + pass + + shape = bounding_box.shape + + bounding_box = convert_bounding_box_format( + bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY + ).view(-1, 4) + + # Crop and optionally pad: + bounding_box[:, 0::2] -= left + bounding_box[:, 1::2] -= top + + return convert_bounding_box_format( + bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False + ).view(shape) + + def perspective_image_tensor( img: torch.Tensor, perspective_coeffs: List[float], From 6ccd26e2a404febc52787a3e132a4c028321e147 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 21 Apr 2022 11:04:30 +0000 Subject: [PATCH 2/7] Added `crop_segmentation_mask` op --- test/test_prototype_transforms_functional.py | 55 +++++++++++++++++++ .../transforms/functional/__init__.py | 1 + .../transforms/functional/_geometry.py | 4 ++ 3 files changed, 60 insertions(+) diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index 91623854330..de49d8a8bef 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -332,6 +332,20 @@ def crop_bounding_box(): ) +@register_kernel_info_from_sample_inputs_fn +def crop_segmentation_mask(): + for mask, top, left, height, width in itertools.product( + make_segmentation_masks(), [-8, 0, 9], [-8, 0, 9], [12, 20], [12, 20] + ): + yield SampleInput( + mask, + top=top, + left=left, + height=height, + width=width, + ) + + @pytest.mark.parametrize( "kernel", [ @@ -860,3 +874,44 @@ def test_correctness_crop_bounding_box(device, top, left, height, width, expecte ) torch.testing.assert_close(output_boxes.tolist(), expected_bboxes) + + +@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize( + "top, left, height, width", + [ + [4, 6, 30, 40], + [-8, 6, 70, 40], + [-8, -6, 70, 8], + ], +) +def test_correctness_crop_segmentation_mask(device, top, left, height, width): + def _compute_expected_mask(mask, top_, left_, height_, width_): + h, w = mask.shape[-2], mask.shape[-1] + if top_ >= 0 and left_ >= 0 and top_ + height_ < h and left_ + width_ < w: + expected = mask[..., top_ : top_ + height_, left_ : left_ + width_] + else: + # Create output mask + expected_shape = mask.shape[:-2] + (height_, width_) + expected = torch.zeros(expected_shape, device=mask.device, dtype=mask.dtype) + + out_y1 = abs(top_) if top_ < 0 else 0 + out_y2 = h - top_ if top_ + height_ >= h else height_ + out_x1 = abs(left_) if left_ < 0 else 0 + out_x2 = w - left_ if left_ + width_ >= w else width_ + + in_y1 = 0 if top_ < 0 else top_ + in_y2 = h if top_ + height_ >= h else top_ + height_ + in_x1 = 0 if left_ < 0 else left_ + in_x2 = w if left_ + width_ >= w else left_ + width_ + # Paste input mask into output + expected[..., out_y1:out_y2, out_x1:out_x2] = mask[..., in_y1:in_y2, in_x1:in_x2] + + return expected + + for mask in make_segmentation_masks(): + if mask.device != torch.device(device): + mask = mask.to(device) + output_mask = F.crop_segmentation_mask(mask, top, left, height, width) + expected_mask = _compute_expected_mask(mask, top, left, height, width) + torch.testing.assert_close(output_mask, expected_mask) diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py index decf9e21020..bbfa9584d88 100644 --- a/torchvision/prototype/transforms/functional/__init__.py +++ b/torchvision/prototype/transforms/functional/__init__.py @@ -63,6 +63,7 @@ crop_bounding_box, crop_image_tensor, crop_image_pil, + crop_segmentation_mask, perspective_image_tensor, perspective_image_pil, vertical_flip_image_tensor, diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 71be0a22c00..3eabc1725b7 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -440,6 +440,10 @@ def crop_bounding_box( ).view(shape) +def crop_segmentation_mask(img: torch.Tensor, top: int, left: int, height: int, width: int): + return crop_image_tensor(img, top, left, height, width) + + def perspective_image_tensor( img: torch.Tensor, perspective_coeffs: List[float], From 6f12767a248cedd39c5989264a8bd53698c87e7f Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 21 Apr 2022 12:59:28 +0000 Subject: [PATCH 3/7] Fixed failed mypy --- torchvision/prototype/transforms/functional/_geometry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 3eabc1725b7..d4f1fadb0bf 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -440,7 +440,7 @@ def crop_bounding_box( ).view(shape) -def crop_segmentation_mask(img: torch.Tensor, top: int, left: int, height: int, width: int): +def crop_segmentation_mask(img: torch.Tensor, top: int, left: int, height: int, width: int) -> torch.Tensor: return crop_image_tensor(img, top, left, height, width) From 6306c068bf8bf3bc0ce8627bc5d1b9349204f57d Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 21 Apr 2022 15:24:38 +0000 Subject: [PATCH 4/7] Added tests for resized_crop_bounding_box --- test/test_prototype_transforms_functional.py | 75 +++++++++++++++++++- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index 91623854330..9adf623ebc7 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -332,6 +332,22 @@ def crop_bounding_box(): ) +@register_kernel_info_from_sample_inputs_fn +def resized_crop_bounding_box(): + for bounding_box, top, left, height, width, size in itertools.product( + make_bounding_boxes(), [-8, 9], [-8, 9], [32, 22], [34, 20], [(32, 32), (16, 18)] + ): + yield SampleInput( + bounding_box, + format=bounding_box.format, + top=top, + left=left, + height=height, + width=width, + size=size + ) + + @pytest.mark.parametrize( "kernel", [ @@ -822,6 +838,10 @@ def test_correctness_rotate_segmentation_mask_on_fixed_input(device): @pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize( + "format", + [features.BoundingBoxFormat.XYXY, features.BoundingBoxFormat.XYWH, features.BoundingBoxFormat.CXCYWH], +) @pytest.mark.parametrize( "top, left, height, width, expected_bboxes", [ @@ -829,7 +849,7 @@ def test_correctness_rotate_segmentation_mask_on_fixed_input(device): [-8, 12, 70, 40, [(-2.0, 23.0, 13.0, 43.0), (38.0, 13.0, 58.0, 30.0), (33.0, 54.0, 44.0, 70.0)]], ], ) -def test_correctness_crop_bounding_box(device, top, left, height, width, expected_bboxes): +def test_correctness_crop_bounding_box(device, format, top, left, height, width, expected_bboxes): # Expected bboxes computed using Albumentations: # import numpy as np @@ -851,12 +871,63 @@ def test_correctness_crop_bounding_box(device, top, left, height, width, expecte [45.0, 46.0, 56.0, 62.0], ] in_boxes = features.BoundingBox(in_boxes, format=features.BoundingBoxFormat.XYXY, image_size=size, device=device) + if format != features.BoundingBoxFormat.XYXY: + in_boxes = convert_bounding_box_format(in_boxes, features.BoundingBoxFormat.XYXY, format) output_boxes = F.crop_bounding_box( in_boxes, - in_boxes.format, + format, top, left, ) + if format != features.BoundingBoxFormat.XYXY: + output_boxes = convert_bounding_box_format(output_boxes, format, features.BoundingBoxFormat.XYXY) + torch.testing.assert_close(output_boxes.tolist(), expected_bboxes) + + +@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize( + "format", + [features.BoundingBoxFormat.XYXY, features.BoundingBoxFormat.XYWH, features.BoundingBoxFormat.CXCYWH], +) +@pytest.mark.parametrize( + "top, left, height, width, size", + [ + [0, 0, 30, 30, (60, 60)], + [-5, 5, 35, 45, (32, 34)], + ], +) +def test_correctness_resized_crop_bounding_box(device, format, top, left, height, width, size): + def _compute_expected(bbox, top_, left_, height_, width_, size_): + # bbox should be xyxy + bbox[0] = (bbox[0] - left_) * size_[1] / width_ + bbox[1] = (bbox[1] - top_) * size_[0] / height_ + bbox[2] = (bbox[2] - left_) * size_[1] / width_ + bbox[3] = (bbox[3] - top_) * size_[0] / height_ + return bbox + + image_size = (100, 100) + # xyxy format + in_boxes = [ + [10.0, 10.0, 20.0, 20.0], + [5.0, 10.0, 15.0, 20.0], + ] + expected_bboxes = [] + for in_box in in_boxes: + expected_bboxes.append(_compute_expected(list(in_box), top, left, height, width, size)) + expected_bboxes = torch.tensor(expected_bboxes, device=device) + + in_boxes = features.BoundingBox( + in_boxes, format=features.BoundingBoxFormat.XYXY, image_size=image_size, device=device + ) + if format != features.BoundingBoxFormat.XYXY: + in_boxes = convert_bounding_box_format(in_boxes, features.BoundingBoxFormat.XYXY, format) + + output_boxes = F.resized_crop_bounding_box(in_boxes, format, top, left, height, width, size) + + if format != features.BoundingBoxFormat.XYXY: + output_boxes = convert_bounding_box_format(output_boxes, format, features.BoundingBoxFormat.XYXY) + + torch.testing.assert_close(output_boxes, expected_bboxes) From d712f1d1b3bea8500a42090c8b50964bfb41da65 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 21 Apr 2022 15:36:18 +0000 Subject: [PATCH 5/7] Fixed code formatting --- test/test_prototype_transforms_functional.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index 9adf623ebc7..78e419ca2c1 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -338,13 +338,7 @@ def resized_crop_bounding_box(): make_bounding_boxes(), [-8, 9], [-8, 9], [32, 22], [34, 20], [(32, 32), (16, 18)] ): yield SampleInput( - bounding_box, - format=bounding_box.format, - top=top, - left=left, - height=height, - width=width, - size=size + bounding_box, format=bounding_box.format, top=top, left=left, height=height, width=width, size=size ) From 86727f5a34e7ff9303d0b3a3ee35e4b8871a1aa9 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 21 Apr 2022 16:00:01 +0000 Subject: [PATCH 6/7] Added resized_crop_segmentation_mask op --- .../prototype/transforms/functional/__init__.py | 3 ++- .../prototype/transforms/functional/_geometry.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py index 7069f17c414..dfbc81baea3 100644 --- a/torchvision/prototype/transforms/functional/__init__.py +++ b/torchvision/prototype/transforms/functional/__init__.py @@ -47,9 +47,10 @@ resize_segmentation_mask, center_crop_image_tensor, center_crop_image_pil, + resized_crop_bounding_box, resized_crop_image_tensor, resized_crop_image_pil, - resized_crop_bounding_box, + resized_crop_segmentation_mask, affine_bounding_box, affine_image_tensor, affine_image_pil, diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index a9a427ad029..191d3510525 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -559,6 +559,18 @@ def resized_crop_bounding_box( return resize_bounding_box(bounding_box, size, (height, width)) +def resized_crop_segmentation_mask( + mask: torch.Tensor, + top: int, + left: int, + height: int, + width: int, + size: List[int], +) -> torch.Tensor: + mask = crop_segmentation_mask(mask, top, left, height, width) + return resize_segmentation_mask(mask, size) + + def _parse_five_crop_size(size: List[int]) -> List[int]: if isinstance(size, numbers.Number): size = [int(size), int(size)] From fcb8492682606f5d307a8d58719eb99cfe323c0c Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Mon, 25 Apr 2022 09:58:06 +0000 Subject: [PATCH 7/7] Added tests --- test/test_prototype_transforms_functional.py | 33 +++++++++++++++++++ .../transforms/functional/_geometry.py | 4 --- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index 2da3aa4696a..36d1677ede5 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -362,6 +362,14 @@ def resized_crop_bounding_box(): ) +@register_kernel_info_from_sample_inputs_fn +def resized_crop_segmentation_mask(): + for mask, top, left, height, width, size in itertools.product( + make_segmentation_masks(), [-8, 0, 9], [-8, 0, 9], [12, 20], [12, 20], [(32, 32), (16, 18)] + ): + yield SampleInput(mask, top=top, left=left, height=height, width=width, size=size) + + @pytest.mark.parametrize( "kernel", [ @@ -998,3 +1006,28 @@ def _compute_expected(bbox, top_, left_, height_, width_, size_): output_boxes = convert_bounding_box_format(output_boxes, format, features.BoundingBoxFormat.XYXY) torch.testing.assert_close(output_boxes, expected_bboxes) + + +@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize( + "top, left, height, width, size", + [ + [0, 0, 30, 30, (60, 60)], + [5, 5, 35, 45, (32, 34)], + ], +) +def test_correctness_resized_crop_segmentation_mask(device, top, left, height, width, size): + def _compute_expected(mask, top_, left_, height_, width_, size_): + output = mask.clone() + output = output[:, top_ : top_ + height_, left_ : left_ + width_] + output = torch.nn.functional.interpolate(output[None, :].float(), size=size_, mode="nearest") + output = output[0, :].long() + return output + + in_mask = torch.zeros(1, 100, 100, dtype=torch.long, device=device) + in_mask[0, 10:20, 10:20] = 1 + in_mask[0, 5:15, 12:23] = 2 + + expected_mask = _compute_expected(in_mask, top, left, height, width, size) + output_mask = F.resized_crop_segmentation_mask(in_mask, top, left, height, width, size) + torch.testing.assert_close(output_mask, expected_mask) diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 801e4979181..5f9e77fdbf4 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -440,10 +440,6 @@ def crop_segmentation_mask(img: torch.Tensor, top: int, left: int, height: int, return crop_image_tensor(img, top, left, height, width) -def crop_segmentation_mask(img: torch.Tensor, top: int, left: int, height: int, width: int) -> torch.Tensor: - return crop_image_tensor(img, top, left, height, width) - - def perspective_image_tensor( img: torch.Tensor, perspective_coeffs: List[float],