-
Couldn't load subscription status.
- Fork 7.2k
use non-random images for interpolation kernels for testing #6977
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| import collections.abc | ||
| import dataclasses | ||
| import functools | ||
| import pathlib | ||
| from collections import defaultdict | ||
| from typing import Callable, Optional, Sequence, Tuple, Union | ||
|
|
||
|
|
@@ -14,7 +15,7 @@ | |
| from torch.nn.functional import one_hot | ||
| from torch.testing._comparison import assert_equal as _assert_equal, BooleanPair, NonePair, NumberPair, TensorLikePair | ||
| from torchvision.prototype import features | ||
| from torchvision.prototype.transforms.functional import to_image_tensor | ||
| from torchvision.prototype.transforms.functional import convert_dtype_image_tensor, to_image_tensor | ||
| from torchvision.transforms.functional_tensor import _max_value as get_max_value | ||
|
|
||
| __all__ = [ | ||
|
|
@@ -69,6 +70,7 @@ def compare(self) -> None: | |
| self._compare_attributes(actual, expected) | ||
|
|
||
| actual, expected = self._equalize_attributes(actual, expected) | ||
| actual, expected = self._promote_for_comparison(actual, expected) | ||
| abs_diff = torch.abs(actual - expected) | ||
|
|
||
| if self.allowed_percentage_diff is not None: | ||
|
|
@@ -313,6 +315,42 @@ def make_image_loaders( | |
| make_images = from_loaders(make_image_loaders) | ||
|
|
||
|
|
||
| def make_image_loader_for_interpolation(size="random", *, color_space=features.ColorSpace.RGB, dtype=torch.uint8): | ||
| size = _parse_spatial_size(size) | ||
| num_channels = get_num_channels(color_space) | ||
|
|
||
| def fn(shape, dtype, device): | ||
| height, width = shape[-2:] | ||
|
|
||
| image_pil = ( | ||
| PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg") | ||
| .resize((width, height)) | ||
| .convert( | ||
| { | ||
| features.ColorSpace.GRAY: "L", | ||
| features.ColorSpace.GRAY_ALPHA: "LA", | ||
| features.ColorSpace.RGB: "RGB", | ||
| features.ColorSpace.RGB_ALPHA: "RGBA", | ||
| }[color_space] | ||
| ) | ||
| ) | ||
|
|
||
| image_tensor = convert_dtype_image_tensor(to_image_tensor(image_pil).to(device=device), dtype=dtype) | ||
|
|
||
| return features.Image(image_tensor, color_space=color_space) | ||
|
|
||
| return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype, color_space=color_space) | ||
|
|
||
|
|
||
| def make_image_loaders_for_interpolation( | ||
| sizes=((233, 147),), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't use the default shapes here since they are pretty small and thus the area of the border of the ellipse is quite large compared to the overall image and thus larger tolerances would need to be larger. Open for discussion about this. |
||
| color_spaces=(features.ColorSpace.RGB,), | ||
| dtypes=(torch.uint8,), | ||
| ): | ||
| for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes): | ||
| yield make_image_loader_for_interpolation(**params) | ||
|
|
||
|
|
||
| @dataclasses.dataclass | ||
| class BoundingBoxLoader(TensorLoader): | ||
| format: features.BoundingBoxFormat | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |
| make_bounding_box_loaders, | ||
| make_image_loader, | ||
| make_image_loaders, | ||
| make_image_loaders_for_interpolation, | ||
| make_mask_loaders, | ||
| make_video_loaders, | ||
| mark_framework_limitation, | ||
|
|
@@ -287,7 +288,7 @@ def reference_resize_image_tensor(*args, **kwargs): | |
|
|
||
| def reference_inputs_resize_image_tensor(): | ||
| for image_loader, interpolation in itertools.product( | ||
| make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), | ||
| make_image_loaders_for_interpolation(), | ||
| [ | ||
| F.InterpolationMode.NEAREST, | ||
| F.InterpolationMode.NEAREST_EXACT, | ||
|
|
@@ -319,17 +320,6 @@ def sample_inputs_resize_mask(): | |
| yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1]) | ||
|
|
||
|
|
||
| @pil_reference_wrapper | ||
| def reference_resize_mask(*args, **kwargs): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since all of our mask kernels are just wrapping the image ones, there is no need to compare both against a reference. Checking the image kernel is sufficient. |
||
| return F.resize_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs) | ||
|
|
||
|
|
||
| def reference_inputs_resize_mask(): | ||
| for mask_loader in make_mask_loaders(extra_dims=[()], num_objects=[1]): | ||
| for size in _get_resize_sizes(mask_loader.shape[-2:]): | ||
| yield ArgsKwargs(mask_loader, size=size) | ||
|
|
||
|
|
||
| def sample_inputs_resize_video(): | ||
| for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): | ||
| yield ArgsKwargs(video_loader, size=[min(video_loader.shape[-2:]) + 1]) | ||
|
|
@@ -369,11 +359,9 @@ def reference_inputs_resize_bounding_box(): | |
| reference_inputs_fn=reference_inputs_resize_image_tensor, | ||
| float32_vs_uint8=True, | ||
| closeness_kwargs={ | ||
| # TODO: investigate | ||
| **pil_reference_pixel_difference(110, agg_method="mean"), | ||
| **pil_reference_pixel_difference(10, agg_method="mean"), | ||
| **cuda_vs_cpu_pixel_difference(), | ||
| # TODO: investigate | ||
| **float32_vs_uint8_pixel_difference(50), | ||
| **float32_vs_uint8_pixel_difference(1, agg_method="mean"), | ||
| }, | ||
| test_marks=[ | ||
| xfail_jit_python_scalar_arg("size"), | ||
|
|
@@ -391,9 +379,6 @@ def reference_inputs_resize_bounding_box(): | |
| KernelInfo( | ||
| F.resize_mask, | ||
| sample_inputs_fn=sample_inputs_resize_mask, | ||
| reference_fn=reference_resize_mask, | ||
| reference_inputs_fn=reference_inputs_resize_mask, | ||
| float32_vs_uint8=True, | ||
| closeness_kwargs=pil_reference_pixel_difference(10), | ||
| test_marks=[ | ||
| xfail_jit_python_scalar_arg("size"), | ||
|
|
@@ -501,9 +486,7 @@ def sample_inputs_affine_image_tensor(): | |
|
|
||
|
|
||
| def reference_inputs_affine_image_tensor(): | ||
| for image_loader, affine_kwargs in itertools.product( | ||
| make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _AFFINE_KWARGS | ||
| ): | ||
| for image_loader, affine_kwargs in itertools.product(make_image_loaders_for_interpolation(), _AFFINE_KWARGS): | ||
| yield ArgsKwargs( | ||
| image_loader, | ||
| interpolation=F.InterpolationMode.NEAREST, | ||
|
|
@@ -617,18 +600,6 @@ def sample_inputs_affine_mask(): | |
| yield ArgsKwargs(mask_loader, **_full_affine_params()) | ||
|
|
||
|
|
||
| @pil_reference_wrapper | ||
| def reference_affine_mask(*args, **kwargs): | ||
| return F.affine_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs) | ||
|
|
||
|
|
||
| def reference_inputs_resize_mask(): | ||
| for mask_loader, affine_kwargs in itertools.product( | ||
| make_mask_loaders(extra_dims=[()], num_objects=[1]), _AFFINE_KWARGS | ||
| ): | ||
| yield ArgsKwargs(mask_loader, **affine_kwargs) | ||
|
|
||
|
|
||
| def sample_inputs_affine_video(): | ||
| for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): | ||
| yield ArgsKwargs(video_loader, **_full_affine_params()) | ||
|
|
@@ -665,10 +636,6 @@ def sample_inputs_affine_video(): | |
| KernelInfo( | ||
| F.affine_mask, | ||
| sample_inputs_fn=sample_inputs_affine_mask, | ||
| reference_fn=reference_affine_mask, | ||
| reference_inputs_fn=reference_inputs_resize_mask, | ||
| closeness_kwargs=pil_reference_pixel_difference(10), | ||
| float32_vs_uint8=True, | ||
| test_marks=[ | ||
| xfail_jit_python_scalar_arg("shear"), | ||
| ], | ||
|
|
@@ -870,9 +837,7 @@ def sample_inputs_rotate_image_tensor(): | |
|
|
||
|
|
||
| def reference_inputs_rotate_image_tensor(): | ||
| for image_loader, angle in itertools.product( | ||
| make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _ROTATE_ANGLES | ||
| ): | ||
| for image_loader, angle in itertools.product(make_image_loaders_for_interpolation(), _ROTATE_ANGLES): | ||
| yield ArgsKwargs(image_loader, angle=angle) | ||
|
|
||
|
|
||
|
|
@@ -891,16 +856,6 @@ def sample_inputs_rotate_mask(): | |
| yield ArgsKwargs(mask_loader, angle=15.0) | ||
|
|
||
|
|
||
| @pil_reference_wrapper | ||
| def reference_rotate_mask(*args, **kwargs): | ||
| return F.rotate_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs) | ||
|
|
||
|
|
||
| def reference_inputs_rotate_mask(): | ||
| for mask_loader, angle in itertools.product(make_mask_loaders(extra_dims=[()], num_objects=[1]), _ROTATE_ANGLES): | ||
| yield ArgsKwargs(mask_loader, angle=angle) | ||
|
|
||
|
|
||
| def sample_inputs_rotate_video(): | ||
| for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): | ||
| yield ArgsKwargs(video_loader, angle=15.0) | ||
|
|
@@ -914,8 +869,7 @@ def sample_inputs_rotate_video(): | |
| reference_fn=pil_reference_wrapper(F.rotate_image_pil), | ||
| reference_inputs_fn=reference_inputs_rotate_image_tensor, | ||
| float32_vs_uint8=True, | ||
| # TODO: investigate | ||
| closeness_kwargs=pil_reference_pixel_difference(110, agg_method="mean"), | ||
| closeness_kwargs=pil_reference_pixel_difference(1, agg_method="mean"), | ||
| test_marks=[ | ||
| xfail_jit_tuple_instead_of_list("fill"), | ||
| # TODO: check if this is a regression since it seems that should be supported if `int` is ok | ||
|
|
@@ -929,10 +883,6 @@ def sample_inputs_rotate_video(): | |
| KernelInfo( | ||
| F.rotate_mask, | ||
| sample_inputs_fn=sample_inputs_rotate_mask, | ||
| reference_fn=reference_rotate_mask, | ||
| reference_inputs_fn=reference_inputs_rotate_mask, | ||
| float32_vs_uint8=True, | ||
| closeness_kwargs=pil_reference_pixel_difference(10), | ||
| ), | ||
| KernelInfo( | ||
| F.rotate_video, | ||
|
|
@@ -1058,7 +1008,7 @@ def reference_resized_crop_image_tensor(*args, **kwargs): | |
|
|
||
| def reference_inputs_resized_crop_image_tensor(): | ||
| for image_loader, interpolation, params in itertools.product( | ||
| make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), | ||
| make_image_loaders_for_interpolation(), | ||
| [ | ||
| F.InterpolationMode.NEAREST, | ||
| F.InterpolationMode.NEAREST_EXACT, | ||
|
|
@@ -1089,13 +1039,6 @@ def sample_inputs_resized_crop_mask(): | |
| yield ArgsKwargs(mask_loader, **_RESIZED_CROP_PARAMS[0]) | ||
|
|
||
|
|
||
| def reference_inputs_resized_crop_mask(): | ||
| for mask_loader, params in itertools.product( | ||
| make_mask_loaders(extra_dims=[()], num_objects=[1]), _RESIZED_CROP_PARAMS | ||
| ): | ||
| yield ArgsKwargs(mask_loader, **params) | ||
|
|
||
|
|
||
| def sample_inputs_resized_crop_video(): | ||
| for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): | ||
| yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0]) | ||
|
|
@@ -1110,11 +1053,9 @@ def sample_inputs_resized_crop_video(): | |
| reference_inputs_fn=reference_inputs_resized_crop_image_tensor, | ||
| float32_vs_uint8=True, | ||
| closeness_kwargs={ | ||
| # TODO: investigate | ||
| **pil_reference_pixel_difference(60, agg_method="mean"), | ||
| **cuda_vs_cpu_pixel_difference(), | ||
| # TODO: investigate | ||
| **float32_vs_uint8_pixel_difference(50), | ||
| **pil_reference_pixel_difference(3, agg_method="mean"), | ||
| **float32_vs_uint8_pixel_difference(3, agg_method="mean"), | ||
| }, | ||
| ), | ||
| KernelInfo( | ||
|
|
@@ -1124,10 +1065,6 @@ def sample_inputs_resized_crop_video(): | |
| KernelInfo( | ||
| F.resized_crop_mask, | ||
| sample_inputs_fn=sample_inputs_resized_crop_mask, | ||
| reference_fn=pil_reference_wrapper(F.resized_crop_image_pil), | ||
| reference_inputs_fn=reference_inputs_resized_crop_mask, | ||
| float32_vs_uint8=True, | ||
| closeness_kwargs=pil_reference_pixel_difference(10), | ||
| ), | ||
| KernelInfo( | ||
| F.resized_crop_video, | ||
|
|
@@ -1298,12 +1235,24 @@ def sample_inputs_perspective_image_tensor(): | |
|
|
||
|
|
||
| def reference_inputs_perspective_image_tensor(): | ||
| for image_loader, coefficients in itertools.product( | ||
| make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _PERSPECTIVE_COEFFS | ||
| for image_loader, coefficients, interpolation in itertools.product( | ||
| make_image_loaders_for_interpolation(), | ||
| _PERSPECTIVE_COEFFS, | ||
| [ | ||
| F.InterpolationMode.NEAREST, | ||
| F.InterpolationMode.BILINEAR, | ||
| ], | ||
| ): | ||
| # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it? | ||
| for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): | ||
| yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients) | ||
| yield ArgsKwargs( | ||
| image_loader, | ||
| startpoints=None, | ||
| endpoints=None, | ||
| interpolation=interpolation, | ||
| fill=fill, | ||
| coefficients=coefficients, | ||
| ) | ||
|
|
||
|
|
||
| def sample_inputs_perspective_bounding_box(): | ||
|
|
@@ -1339,8 +1288,7 @@ def sample_inputs_perspective_video(): | |
| reference_inputs_fn=reference_inputs_perspective_image_tensor, | ||
| float32_vs_uint8=float32_vs_uint8_fill_adapter, | ||
| closeness_kwargs={ | ||
| # TODO: investigate | ||
| **pil_reference_pixel_difference(160, agg_method="mean"), | ||
| **pil_reference_pixel_difference(2, agg_method="mean"), | ||
| **cuda_vs_cpu_pixel_difference(), | ||
| **float32_vs_uint8_pixel_difference(), | ||
| }, | ||
|
|
@@ -1381,7 +1329,7 @@ def sample_inputs_elastic_image_tensor(): | |
|
|
||
| def reference_inputs_elastic_image_tensor(): | ||
| for image_loader, interpolation in itertools.product( | ||
| make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), | ||
| make_image_loaders_for_interpolation(), | ||
| [ | ||
| F.InterpolationMode.NEAREST, | ||
| F.InterpolationMode.BILINEAR, | ||
|
|
@@ -1409,12 +1357,6 @@ def sample_inputs_elastic_mask(): | |
| yield ArgsKwargs(mask_loader, displacement=displacement) | ||
|
|
||
|
|
||
| def reference_inputs_elastic_mask(): | ||
| for mask_loader in make_mask_loaders(extra_dims=[()], num_objects=[1]): | ||
| displacement = _get_elastic_displacement(mask_loader.shape[-2:]) | ||
| yield ArgsKwargs(mask_loader, displacement=displacement) | ||
|
|
||
|
|
||
| def sample_inputs_elastic_video(): | ||
| for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): | ||
| displacement = _get_elastic_displacement(video_loader.shape[-2:]) | ||
|
|
@@ -1426,11 +1368,12 @@ def sample_inputs_elastic_video(): | |
| KernelInfo( | ||
| F.elastic_image_tensor, | ||
| sample_inputs_fn=sample_inputs_elastic_image_tensor, | ||
| reference_fn=pil_reference_wrapper(F.elastic_image_pil), | ||
| reference_inputs_fn=reference_inputs_elastic_image_tensor, | ||
| float32_vs_uint8=float32_vs_uint8_fill_adapter, | ||
| # TODO: investigate | ||
| closeness_kwargs=float32_vs_uint8_pixel_difference(60, agg_method="mean"), | ||
| closeness_kwargs={ | ||
| **float32_vs_uint8_pixel_difference(6, agg_method="mean"), | ||
| **cuda_vs_cpu_pixel_difference(), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Driveby that seems to have snuck in since the last time I was testing against CUDA. |
||
| }, | ||
| ), | ||
| KernelInfo( | ||
| F.elastic_bounding_box, | ||
|
|
@@ -1439,15 +1382,11 @@ def sample_inputs_elastic_video(): | |
| KernelInfo( | ||
| F.elastic_mask, | ||
| sample_inputs_fn=sample_inputs_elastic_mask, | ||
| reference_fn=pil_reference_wrapper(F.elastic_image_pil), | ||
| reference_inputs_fn=reference_inputs_elastic_mask, | ||
| float32_vs_uint8=True, | ||
| # TODO: investigate | ||
| closeness_kwargs=pil_reference_pixel_difference(80, agg_method="mean"), | ||
| ), | ||
| KernelInfo( | ||
| F.elastic_video, | ||
| sample_inputs_fn=sample_inputs_elastic_video, | ||
| closeness_kwargs=cuda_vs_cpu_pixel_difference(), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above. |
||
| ), | ||
| ] | ||
| ) | ||
|
|
@@ -2089,8 +2028,7 @@ def sample_inputs_adjust_hue_video(): | |
| reference_inputs_fn=reference_inputs_adjust_hue_image_tensor, | ||
| float32_vs_uint8=True, | ||
| closeness_kwargs={ | ||
| # TODO: investigate | ||
| **pil_reference_pixel_difference(20), | ||
| **pil_reference_pixel_difference(2, agg_method="mean"), | ||
| **float32_vs_uint8_pixel_difference(), | ||
| }, | ||
| ), | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was a bug. Without this promotion, we are later subtracting
uint8values for the absolute difference. They wrap around:If we convert to a singed integer dtype first (here
int64) we get the correct absolute diff:This bug only affected the comparisons that used
agg_method=....