From 8079a3114ecee55e120626258eb8a7d2146011b3 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 25 Nov 2022 15:15:13 +0100 Subject: [PATCH 1/3] use non-random images for interpolation kernels for testing --- test/prototype_common_utils.py | 34 ++++++ test/prototype_transforms_kernel_infos.py | 130 ++++++---------------- 2 files changed, 66 insertions(+), 98 deletions(-) diff --git a/test/prototype_common_utils.py b/test/prototype_common_utils.py index 9a613901e6a..a4c82df2518 100644 --- a/test/prototype_common_utils.py +++ b/test/prototype_common_utils.py @@ -7,6 +7,7 @@ from typing import Callable, Optional, Sequence, Tuple, Union import PIL.Image +import PIL.ImageDraw import pytest import torch import torch.testing @@ -69,6 +70,7 @@ def compare(self) -> None: self._compare_attributes(actual, expected) actual, expected = self._equalize_attributes(actual, expected) + actual, expected = self._promote_for_comparison(actual, expected) abs_diff = torch.abs(actual - expected) if self.allowed_percentage_diff is not None: @@ -313,6 +315,38 @@ def make_image_loaders( make_images = from_loaders(make_image_loaders) +def make_image_loader_for_interpolation(size="random", *, color_space=features.ColorSpace.RGB, dtype=torch.uint8): + size = _parse_spatial_size(size) + num_channels = get_num_channels(color_space) + + def fn(shape, dtype, device): + num_channels, height, width = shape + + top = int((0.8 * height) // 2) + left = int((0.8 * width) // 2) + bottom = height - top + right = width - left + + image_pil = PIL.Image.new("L", (width, height)) + draw = PIL.ImageDraw.Draw(image_pil) + draw.ellipse(((left, top), (right, bottom)), fill=255) + + return features.Image(to_image_tensor(image_pil).repeat(num_channels, 1, 1), color_space=color_space).to( + dtype=dtype, device=device + ) + + return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype, color_space=color_space) + + +def make_image_loaders_for_interpolation( + sizes=((233, 147),), + color_spaces=(features.ColorSpace.RGB,), + dtypes=(torch.uint8,), +): + for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes): + yield make_image_loader_for_interpolation(**params) + + @dataclasses.dataclass class BoundingBoxLoader(TensorLoader): format: features.BoundingBoxFormat diff --git a/test/prototype_transforms_kernel_infos.py b/test/prototype_transforms_kernel_infos.py index 25daf3da59f..b61327621a3 100644 --- a/test/prototype_transforms_kernel_infos.py +++ b/test/prototype_transforms_kernel_infos.py @@ -19,6 +19,7 @@ make_bounding_box_loaders, make_image_loader, make_image_loaders, + make_image_loaders_for_interpolation, make_mask_loaders, make_video_loaders, mark_framework_limitation, @@ -287,7 +288,7 @@ def reference_resize_image_tensor(*args, **kwargs): def reference_inputs_resize_image_tensor(): for image_loader, interpolation in itertools.product( - make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), + make_image_loaders_for_interpolation(), [ F.InterpolationMode.NEAREST, F.InterpolationMode.NEAREST_EXACT, @@ -319,17 +320,6 @@ def sample_inputs_resize_mask(): yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1]) -@pil_reference_wrapper -def reference_resize_mask(*args, **kwargs): - return F.resize_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs) - - -def reference_inputs_resize_mask(): - for mask_loader in make_mask_loaders(extra_dims=[()], num_objects=[1]): - for size in _get_resize_sizes(mask_loader.shape[-2:]): - yield ArgsKwargs(mask_loader, size=size) - - def sample_inputs_resize_video(): for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): yield ArgsKwargs(video_loader, size=[min(video_loader.shape[-2:]) + 1]) @@ -369,11 +359,9 @@ def reference_inputs_resize_bounding_box(): reference_inputs_fn=reference_inputs_resize_image_tensor, float32_vs_uint8=True, closeness_kwargs={ - # TODO: investigate - **pil_reference_pixel_difference(110, agg_method="mean"), + **pil_reference_pixel_difference(1, agg_method="mean"), **cuda_vs_cpu_pixel_difference(), - # TODO: investigate - **float32_vs_uint8_pixel_difference(50), + **float32_vs_uint8_pixel_difference(1, agg_method="mean"), }, test_marks=[ xfail_jit_python_scalar_arg("size"), @@ -391,9 +379,6 @@ def reference_inputs_resize_bounding_box(): KernelInfo( F.resize_mask, sample_inputs_fn=sample_inputs_resize_mask, - reference_fn=reference_resize_mask, - reference_inputs_fn=reference_inputs_resize_mask, - float32_vs_uint8=True, closeness_kwargs=pil_reference_pixel_difference(10), test_marks=[ xfail_jit_python_scalar_arg("size"), @@ -501,9 +486,7 @@ def sample_inputs_affine_image_tensor(): def reference_inputs_affine_image_tensor(): - for image_loader, affine_kwargs in itertools.product( - make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _AFFINE_KWARGS - ): + for image_loader, affine_kwargs in itertools.product(make_image_loaders_for_interpolation(), _AFFINE_KWARGS): yield ArgsKwargs( image_loader, interpolation=F.InterpolationMode.NEAREST, @@ -617,18 +600,6 @@ def sample_inputs_affine_mask(): yield ArgsKwargs(mask_loader, **_full_affine_params()) -@pil_reference_wrapper -def reference_affine_mask(*args, **kwargs): - return F.affine_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs) - - -def reference_inputs_resize_mask(): - for mask_loader, affine_kwargs in itertools.product( - make_mask_loaders(extra_dims=[()], num_objects=[1]), _AFFINE_KWARGS - ): - yield ArgsKwargs(mask_loader, **affine_kwargs) - - def sample_inputs_affine_video(): for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): yield ArgsKwargs(video_loader, **_full_affine_params()) @@ -665,10 +636,6 @@ def sample_inputs_affine_video(): KernelInfo( F.affine_mask, sample_inputs_fn=sample_inputs_affine_mask, - reference_fn=reference_affine_mask, - reference_inputs_fn=reference_inputs_resize_mask, - closeness_kwargs=pil_reference_pixel_difference(10), - float32_vs_uint8=True, test_marks=[ xfail_jit_python_scalar_arg("shear"), ], @@ -870,9 +837,7 @@ def sample_inputs_rotate_image_tensor(): def reference_inputs_rotate_image_tensor(): - for image_loader, angle in itertools.product( - make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _ROTATE_ANGLES - ): + for image_loader, angle in itertools.product(make_image_loaders_for_interpolation(), _ROTATE_ANGLES): yield ArgsKwargs(image_loader, angle=angle) @@ -891,16 +856,6 @@ def sample_inputs_rotate_mask(): yield ArgsKwargs(mask_loader, angle=15.0) -@pil_reference_wrapper -def reference_rotate_mask(*args, **kwargs): - return F.rotate_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs) - - -def reference_inputs_rotate_mask(): - for mask_loader, angle in itertools.product(make_mask_loaders(extra_dims=[()], num_objects=[1]), _ROTATE_ANGLES): - yield ArgsKwargs(mask_loader, angle=angle) - - def sample_inputs_rotate_video(): for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): yield ArgsKwargs(video_loader, angle=15.0) @@ -914,8 +869,7 @@ def sample_inputs_rotate_video(): reference_fn=pil_reference_wrapper(F.rotate_image_pil), reference_inputs_fn=reference_inputs_rotate_image_tensor, float32_vs_uint8=True, - # TODO: investigate - closeness_kwargs=pil_reference_pixel_difference(110, agg_method="mean"), + closeness_kwargs=pil_reference_pixel_difference(0, agg_method="mean"), test_marks=[ xfail_jit_tuple_instead_of_list("fill"), # TODO: check if this is a regression since it seems that should be supported if `int` is ok @@ -929,10 +883,6 @@ def sample_inputs_rotate_video(): KernelInfo( F.rotate_mask, sample_inputs_fn=sample_inputs_rotate_mask, - reference_fn=reference_rotate_mask, - reference_inputs_fn=reference_inputs_rotate_mask, - float32_vs_uint8=True, - closeness_kwargs=pil_reference_pixel_difference(10), ), KernelInfo( F.rotate_video, @@ -1058,7 +1008,7 @@ def reference_resized_crop_image_tensor(*args, **kwargs): def reference_inputs_resized_crop_image_tensor(): for image_loader, interpolation, params in itertools.product( - make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), + make_image_loaders_for_interpolation(), [ F.InterpolationMode.NEAREST, F.InterpolationMode.NEAREST_EXACT, @@ -1089,13 +1039,6 @@ def sample_inputs_resized_crop_mask(): yield ArgsKwargs(mask_loader, **_RESIZED_CROP_PARAMS[0]) -def reference_inputs_resized_crop_mask(): - for mask_loader, params in itertools.product( - make_mask_loaders(extra_dims=[()], num_objects=[1]), _RESIZED_CROP_PARAMS - ): - yield ArgsKwargs(mask_loader, **params) - - def sample_inputs_resized_crop_video(): for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0]) @@ -1109,13 +1052,7 @@ def sample_inputs_resized_crop_video(): reference_fn=reference_resized_crop_image_tensor, reference_inputs_fn=reference_inputs_resized_crop_image_tensor, float32_vs_uint8=True, - closeness_kwargs={ - # TODO: investigate - **pil_reference_pixel_difference(60, agg_method="mean"), - **cuda_vs_cpu_pixel_difference(), - # TODO: investigate - **float32_vs_uint8_pixel_difference(50), - }, + closeness_kwargs=cuda_vs_cpu_pixel_difference(), ), KernelInfo( F.resized_crop_bounding_box, @@ -1124,10 +1061,6 @@ def sample_inputs_resized_crop_video(): KernelInfo( F.resized_crop_mask, sample_inputs_fn=sample_inputs_resized_crop_mask, - reference_fn=pil_reference_wrapper(F.resized_crop_image_pil), - reference_inputs_fn=reference_inputs_resized_crop_mask, - float32_vs_uint8=True, - closeness_kwargs=pil_reference_pixel_difference(10), ), KernelInfo( F.resized_crop_video, @@ -1298,12 +1231,24 @@ def sample_inputs_perspective_image_tensor(): def reference_inputs_perspective_image_tensor(): - for image_loader, coefficients in itertools.product( - make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _PERSPECTIVE_COEFFS + for image_loader, coefficients, interpolation in itertools.product( + make_image_loaders_for_interpolation(), + _PERSPECTIVE_COEFFS, + [ + F.InterpolationMode.NEAREST, + F.InterpolationMode.BILINEAR, + ], ): # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it? for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): - yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients) + yield ArgsKwargs( + image_loader, + startpoints=None, + endpoints=None, + interpolation=interpolation, + fill=fill, + coefficients=coefficients, + ) def sample_inputs_perspective_bounding_box(): @@ -1339,8 +1284,7 @@ def sample_inputs_perspective_video(): reference_inputs_fn=reference_inputs_perspective_image_tensor, float32_vs_uint8=float32_vs_uint8_fill_adapter, closeness_kwargs={ - # TODO: investigate - **pil_reference_pixel_difference(160, agg_method="mean"), + **pil_reference_pixel_difference(2, agg_method="mean"), **cuda_vs_cpu_pixel_difference(), **float32_vs_uint8_pixel_difference(), }, @@ -1381,7 +1325,7 @@ def sample_inputs_elastic_image_tensor(): def reference_inputs_elastic_image_tensor(): for image_loader, interpolation in itertools.product( - make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), + make_image_loaders_for_interpolation(), [ F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR, @@ -1409,12 +1353,6 @@ def sample_inputs_elastic_mask(): yield ArgsKwargs(mask_loader, displacement=displacement) -def reference_inputs_elastic_mask(): - for mask_loader in make_mask_loaders(extra_dims=[()], num_objects=[1]): - displacement = _get_elastic_displacement(mask_loader.shape[-2:]) - yield ArgsKwargs(mask_loader, displacement=displacement) - - def sample_inputs_elastic_video(): for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): displacement = _get_elastic_displacement(video_loader.shape[-2:]) @@ -1426,11 +1364,12 @@ def sample_inputs_elastic_video(): KernelInfo( F.elastic_image_tensor, sample_inputs_fn=sample_inputs_elastic_image_tensor, - reference_fn=pil_reference_wrapper(F.elastic_image_pil), reference_inputs_fn=reference_inputs_elastic_image_tensor, float32_vs_uint8=float32_vs_uint8_fill_adapter, - # TODO: investigate - closeness_kwargs=float32_vs_uint8_pixel_difference(60, agg_method="mean"), + closeness_kwargs={ + **float32_vs_uint8_pixel_difference(6, agg_method="mean"), + **cuda_vs_cpu_pixel_difference(), + }, ), KernelInfo( F.elastic_bounding_box, @@ -1439,15 +1378,11 @@ def sample_inputs_elastic_video(): KernelInfo( F.elastic_mask, sample_inputs_fn=sample_inputs_elastic_mask, - reference_fn=pil_reference_wrapper(F.elastic_image_pil), - reference_inputs_fn=reference_inputs_elastic_mask, - float32_vs_uint8=True, - # TODO: investigate - closeness_kwargs=pil_reference_pixel_difference(80, agg_method="mean"), ), KernelInfo( F.elastic_video, sample_inputs_fn=sample_inputs_elastic_video, + closeness_kwargs=cuda_vs_cpu_pixel_difference(), ), ] ) @@ -2089,8 +2024,7 @@ def sample_inputs_adjust_hue_video(): reference_inputs_fn=reference_inputs_adjust_hue_image_tensor, float32_vs_uint8=True, closeness_kwargs={ - # TODO: investigate - **pil_reference_pixel_difference(20), + **pil_reference_pixel_difference(2, agg_method="mean"), **float32_vs_uint8_pixel_difference(), }, ), From 2eba005893d8eeeb8f9cc2fd290e6e11cc6538d5 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 28 Nov 2022 10:47:01 +0100 Subject: [PATCH 2/3] use real image rather than artificial --- test/prototype_common_utils.py | 31 +++++++++++++---------- test/prototype_transforms_kernel_infos.py | 10 +++++--- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/test/prototype_common_utils.py b/test/prototype_common_utils.py index a4c82df2518..0d1ee75140a 100644 --- a/test/prototype_common_utils.py +++ b/test/prototype_common_utils.py @@ -3,6 +3,7 @@ import collections.abc import dataclasses import functools +import pathlib from collections import defaultdict from typing import Callable, Optional, Sequence, Tuple, Union @@ -15,7 +16,7 @@ from torch.nn.functional import one_hot from torch.testing._comparison import assert_equal as _assert_equal, BooleanPair, NonePair, NumberPair, TensorLikePair from torchvision.prototype import features -from torchvision.prototype.transforms.functional import to_image_tensor +from torchvision.prototype.transforms.functional import convert_dtype_image_tensor, to_image_tensor from torchvision.transforms.functional_tensor import _max_value as get_max_value __all__ = [ @@ -320,20 +321,24 @@ def make_image_loader_for_interpolation(size="random", *, color_space=features.C num_channels = get_num_channels(color_space) def fn(shape, dtype, device): - num_channels, height, width = shape - - top = int((0.8 * height) // 2) - left = int((0.8 * width) // 2) - bottom = height - top - right = width - left + height, width = shape[-2:] + + image_pil = ( + PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg") + .resize((width, height)) + .convert( + { + features.ColorSpace.GRAY: "L", + features.ColorSpace.GRAY_ALPHA: "LA", + features.ColorSpace.RGB: "RGB", + features.ColorSpace.RGB_ALPHA: "RGBA", + }[color_space] + ) + ) - image_pil = PIL.Image.new("L", (width, height)) - draw = PIL.ImageDraw.Draw(image_pil) - draw.ellipse(((left, top), (right, bottom)), fill=255) + image_tensor = convert_dtype_image_tensor(to_image_tensor(image_pil).to(device=device), dtype=dtype) - return features.Image(to_image_tensor(image_pil).repeat(num_channels, 1, 1), color_space=color_space).to( - dtype=dtype, device=device - ) + return features.Image(image_tensor, color_space=color_space) return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype, color_space=color_space) diff --git a/test/prototype_transforms_kernel_infos.py b/test/prototype_transforms_kernel_infos.py index b61327621a3..12bdf60a48a 100644 --- a/test/prototype_transforms_kernel_infos.py +++ b/test/prototype_transforms_kernel_infos.py @@ -359,7 +359,7 @@ def reference_inputs_resize_bounding_box(): reference_inputs_fn=reference_inputs_resize_image_tensor, float32_vs_uint8=True, closeness_kwargs={ - **pil_reference_pixel_difference(1, agg_method="mean"), + **pil_reference_pixel_difference(10, agg_method="mean"), **cuda_vs_cpu_pixel_difference(), **float32_vs_uint8_pixel_difference(1, agg_method="mean"), }, @@ -869,7 +869,7 @@ def sample_inputs_rotate_video(): reference_fn=pil_reference_wrapper(F.rotate_image_pil), reference_inputs_fn=reference_inputs_rotate_image_tensor, float32_vs_uint8=True, - closeness_kwargs=pil_reference_pixel_difference(0, agg_method="mean"), + closeness_kwargs=pil_reference_pixel_difference(1, agg_method="mean"), test_marks=[ xfail_jit_tuple_instead_of_list("fill"), # TODO: check if this is a regression since it seems that should be supported if `int` is ok @@ -1052,7 +1052,11 @@ def sample_inputs_resized_crop_video(): reference_fn=reference_resized_crop_image_tensor, reference_inputs_fn=reference_inputs_resized_crop_image_tensor, float32_vs_uint8=True, - closeness_kwargs=cuda_vs_cpu_pixel_difference(), + closeness_kwargs={ + **cuda_vs_cpu_pixel_difference(), + **pil_reference_pixel_difference(3, agg_method="mean"), + **float32_vs_uint8_pixel_difference(3, agg_method="mean"), + }, ), KernelInfo( F.resized_crop_bounding_box, From 6207e9c3486011a8a9f8c645df015935bb1641f9 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 28 Nov 2022 13:52:47 +0100 Subject: [PATCH 3/3] cleanup --- test/prototype_common_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/prototype_common_utils.py b/test/prototype_common_utils.py index 0d1ee75140a..795642683e6 100644 --- a/test/prototype_common_utils.py +++ b/test/prototype_common_utils.py @@ -8,7 +8,6 @@ from typing import Callable, Optional, Sequence, Tuple, Union import PIL.Image -import PIL.ImageDraw import pytest import torch import torch.testing