Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion test/prototype_common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import collections.abc
import dataclasses
import functools
import pathlib
from collections import defaultdict
from typing import Callable, Optional, Sequence, Tuple, Union

Expand All @@ -14,7 +15,7 @@
from torch.nn.functional import one_hot
from torch.testing._comparison import assert_equal as _assert_equal, BooleanPair, NonePair, NumberPair, TensorLikePair
from torchvision.prototype import features
from torchvision.prototype.transforms.functional import to_image_tensor
from torchvision.prototype.transforms.functional import convert_dtype_image_tensor, to_image_tensor
from torchvision.transforms.functional_tensor import _max_value as get_max_value

__all__ = [
Expand Down Expand Up @@ -69,6 +70,7 @@ def compare(self) -> None:
self._compare_attributes(actual, expected)

actual, expected = self._equalize_attributes(actual, expected)
actual, expected = self._promote_for_comparison(actual, expected)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a bug. Without this promotion, we are later subtracting uint8 values for the absolute difference. They wrap around:

>>> a = torch.tensor(10, dtype=torch.uint8)
>>> b = torch.tensor(11, dtype=torch.uint8)
>>> a - b
tensor(255, dtype=torch.uint8)
>>> abs(a - b)
tensor(255, dtype=torch.uint8)

If we convert to a singed integer dtype first (here int64) we get the correct absolute diff:

>>> a = torch.tensor(10, dtype=torch.int64)
>>> b = torch.tensor(11, dtype=torch.int64)
>>> a - b
tensor(-1, dtype=torch.int64)
>>> abs(a - b)
tensor(1, dtype=torch.int64)

This bug only affected the comparisons that used agg_method=....

abs_diff = torch.abs(actual - expected)

if self.allowed_percentage_diff is not None:
Expand Down Expand Up @@ -313,6 +315,42 @@ def make_image_loaders(
make_images = from_loaders(make_image_loaders)


def make_image_loader_for_interpolation(size="random", *, color_space=features.ColorSpace.RGB, dtype=torch.uint8):
size = _parse_spatial_size(size)
num_channels = get_num_channels(color_space)

def fn(shape, dtype, device):
height, width = shape[-2:]

image_pil = (
PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg")
.resize((width, height))
.convert(
{
features.ColorSpace.GRAY: "L",
features.ColorSpace.GRAY_ALPHA: "LA",
features.ColorSpace.RGB: "RGB",
features.ColorSpace.RGB_ALPHA: "RGBA",
}[color_space]
)
)

image_tensor = convert_dtype_image_tensor(to_image_tensor(image_pil).to(device=device), dtype=dtype)

return features.Image(image_tensor, color_space=color_space)

return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype, color_space=color_space)


def make_image_loaders_for_interpolation(
sizes=((233, 147),),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't use the default shapes here since they are pretty small and thus the area of the border of the ellipse is quite large compared to the overall image and thus larger tolerances would need to be larger. Open for discussion about this.

color_spaces=(features.ColorSpace.RGB,),
dtypes=(torch.uint8,),
):
for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes):
yield make_image_loader_for_interpolation(**params)


@dataclasses.dataclass
class BoundingBoxLoader(TensorLoader):
format: features.BoundingBoxFormat
Expand Down
128 changes: 33 additions & 95 deletions test/prototype_transforms_kernel_infos.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
make_bounding_box_loaders,
make_image_loader,
make_image_loaders,
make_image_loaders_for_interpolation,
make_mask_loaders,
make_video_loaders,
mark_framework_limitation,
Expand Down Expand Up @@ -287,7 +288,7 @@ def reference_resize_image_tensor(*args, **kwargs):

def reference_inputs_resize_image_tensor():
for image_loader, interpolation in itertools.product(
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]),
make_image_loaders_for_interpolation(),
[
F.InterpolationMode.NEAREST,
F.InterpolationMode.NEAREST_EXACT,
Expand Down Expand Up @@ -319,17 +320,6 @@ def sample_inputs_resize_mask():
yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1])


@pil_reference_wrapper
def reference_resize_mask(*args, **kwargs):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since all of our mask kernels are just wrapping the image ones, there is no need to compare both against a reference. Checking the image kernel is sufficient.

return F.resize_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs)


def reference_inputs_resize_mask():
for mask_loader in make_mask_loaders(extra_dims=[()], num_objects=[1]):
for size in _get_resize_sizes(mask_loader.shape[-2:]):
yield ArgsKwargs(mask_loader, size=size)


def sample_inputs_resize_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader, size=[min(video_loader.shape[-2:]) + 1])
Expand Down Expand Up @@ -369,11 +359,9 @@ def reference_inputs_resize_bounding_box():
reference_inputs_fn=reference_inputs_resize_image_tensor,
float32_vs_uint8=True,
closeness_kwargs={
# TODO: investigate
**pil_reference_pixel_difference(110, agg_method="mean"),
**pil_reference_pixel_difference(10, agg_method="mean"),
**cuda_vs_cpu_pixel_difference(),
# TODO: investigate
**float32_vs_uint8_pixel_difference(50),
**float32_vs_uint8_pixel_difference(1, agg_method="mean"),
},
test_marks=[
xfail_jit_python_scalar_arg("size"),
Expand All @@ -391,9 +379,6 @@ def reference_inputs_resize_bounding_box():
KernelInfo(
F.resize_mask,
sample_inputs_fn=sample_inputs_resize_mask,
reference_fn=reference_resize_mask,
reference_inputs_fn=reference_inputs_resize_mask,
float32_vs_uint8=True,
closeness_kwargs=pil_reference_pixel_difference(10),
test_marks=[
xfail_jit_python_scalar_arg("size"),
Expand Down Expand Up @@ -501,9 +486,7 @@ def sample_inputs_affine_image_tensor():


def reference_inputs_affine_image_tensor():
for image_loader, affine_kwargs in itertools.product(
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _AFFINE_KWARGS
):
for image_loader, affine_kwargs in itertools.product(make_image_loaders_for_interpolation(), _AFFINE_KWARGS):
yield ArgsKwargs(
image_loader,
interpolation=F.InterpolationMode.NEAREST,
Expand Down Expand Up @@ -617,18 +600,6 @@ def sample_inputs_affine_mask():
yield ArgsKwargs(mask_loader, **_full_affine_params())


@pil_reference_wrapper
def reference_affine_mask(*args, **kwargs):
return F.affine_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs)


def reference_inputs_resize_mask():
for mask_loader, affine_kwargs in itertools.product(
make_mask_loaders(extra_dims=[()], num_objects=[1]), _AFFINE_KWARGS
):
yield ArgsKwargs(mask_loader, **affine_kwargs)


def sample_inputs_affine_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader, **_full_affine_params())
Expand Down Expand Up @@ -665,10 +636,6 @@ def sample_inputs_affine_video():
KernelInfo(
F.affine_mask,
sample_inputs_fn=sample_inputs_affine_mask,
reference_fn=reference_affine_mask,
reference_inputs_fn=reference_inputs_resize_mask,
closeness_kwargs=pil_reference_pixel_difference(10),
float32_vs_uint8=True,
test_marks=[
xfail_jit_python_scalar_arg("shear"),
],
Expand Down Expand Up @@ -870,9 +837,7 @@ def sample_inputs_rotate_image_tensor():


def reference_inputs_rotate_image_tensor():
for image_loader, angle in itertools.product(
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _ROTATE_ANGLES
):
for image_loader, angle in itertools.product(make_image_loaders_for_interpolation(), _ROTATE_ANGLES):
yield ArgsKwargs(image_loader, angle=angle)


Expand All @@ -891,16 +856,6 @@ def sample_inputs_rotate_mask():
yield ArgsKwargs(mask_loader, angle=15.0)


@pil_reference_wrapper
def reference_rotate_mask(*args, **kwargs):
return F.rotate_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs)


def reference_inputs_rotate_mask():
for mask_loader, angle in itertools.product(make_mask_loaders(extra_dims=[()], num_objects=[1]), _ROTATE_ANGLES):
yield ArgsKwargs(mask_loader, angle=angle)


def sample_inputs_rotate_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader, angle=15.0)
Expand All @@ -914,8 +869,7 @@ def sample_inputs_rotate_video():
reference_fn=pil_reference_wrapper(F.rotate_image_pil),
reference_inputs_fn=reference_inputs_rotate_image_tensor,
float32_vs_uint8=True,
# TODO: investigate
closeness_kwargs=pil_reference_pixel_difference(110, agg_method="mean"),
closeness_kwargs=pil_reference_pixel_difference(1, agg_method="mean"),
test_marks=[
xfail_jit_tuple_instead_of_list("fill"),
# TODO: check if this is a regression since it seems that should be supported if `int` is ok
Expand All @@ -929,10 +883,6 @@ def sample_inputs_rotate_video():
KernelInfo(
F.rotate_mask,
sample_inputs_fn=sample_inputs_rotate_mask,
reference_fn=reference_rotate_mask,
reference_inputs_fn=reference_inputs_rotate_mask,
float32_vs_uint8=True,
closeness_kwargs=pil_reference_pixel_difference(10),
),
KernelInfo(
F.rotate_video,
Expand Down Expand Up @@ -1058,7 +1008,7 @@ def reference_resized_crop_image_tensor(*args, **kwargs):

def reference_inputs_resized_crop_image_tensor():
for image_loader, interpolation, params in itertools.product(
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]),
make_image_loaders_for_interpolation(),
[
F.InterpolationMode.NEAREST,
F.InterpolationMode.NEAREST_EXACT,
Expand Down Expand Up @@ -1089,13 +1039,6 @@ def sample_inputs_resized_crop_mask():
yield ArgsKwargs(mask_loader, **_RESIZED_CROP_PARAMS[0])


def reference_inputs_resized_crop_mask():
for mask_loader, params in itertools.product(
make_mask_loaders(extra_dims=[()], num_objects=[1]), _RESIZED_CROP_PARAMS
):
yield ArgsKwargs(mask_loader, **params)


def sample_inputs_resized_crop_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0])
Expand All @@ -1110,11 +1053,9 @@ def sample_inputs_resized_crop_video():
reference_inputs_fn=reference_inputs_resized_crop_image_tensor,
float32_vs_uint8=True,
closeness_kwargs={
# TODO: investigate
**pil_reference_pixel_difference(60, agg_method="mean"),
**cuda_vs_cpu_pixel_difference(),
# TODO: investigate
**float32_vs_uint8_pixel_difference(50),
**pil_reference_pixel_difference(3, agg_method="mean"),
**float32_vs_uint8_pixel_difference(3, agg_method="mean"),
},
),
KernelInfo(
Expand All @@ -1124,10 +1065,6 @@ def sample_inputs_resized_crop_video():
KernelInfo(
F.resized_crop_mask,
sample_inputs_fn=sample_inputs_resized_crop_mask,
reference_fn=pil_reference_wrapper(F.resized_crop_image_pil),
reference_inputs_fn=reference_inputs_resized_crop_mask,
float32_vs_uint8=True,
closeness_kwargs=pil_reference_pixel_difference(10),
),
KernelInfo(
F.resized_crop_video,
Expand Down Expand Up @@ -1298,12 +1235,24 @@ def sample_inputs_perspective_image_tensor():


def reference_inputs_perspective_image_tensor():
for image_loader, coefficients in itertools.product(
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _PERSPECTIVE_COEFFS
for image_loader, coefficients, interpolation in itertools.product(
make_image_loaders_for_interpolation(),
_PERSPECTIVE_COEFFS,
[
F.InterpolationMode.NEAREST,
F.InterpolationMode.BILINEAR,
],
):
# FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients)
yield ArgsKwargs(
image_loader,
startpoints=None,
endpoints=None,
interpolation=interpolation,
fill=fill,
coefficients=coefficients,
)


def sample_inputs_perspective_bounding_box():
Expand Down Expand Up @@ -1339,8 +1288,7 @@ def sample_inputs_perspective_video():
reference_inputs_fn=reference_inputs_perspective_image_tensor,
float32_vs_uint8=float32_vs_uint8_fill_adapter,
closeness_kwargs={
# TODO: investigate
**pil_reference_pixel_difference(160, agg_method="mean"),
**pil_reference_pixel_difference(2, agg_method="mean"),
**cuda_vs_cpu_pixel_difference(),
**float32_vs_uint8_pixel_difference(),
},
Expand Down Expand Up @@ -1381,7 +1329,7 @@ def sample_inputs_elastic_image_tensor():

def reference_inputs_elastic_image_tensor():
for image_loader, interpolation in itertools.product(
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]),
make_image_loaders_for_interpolation(),
[
F.InterpolationMode.NEAREST,
F.InterpolationMode.BILINEAR,
Expand Down Expand Up @@ -1409,12 +1357,6 @@ def sample_inputs_elastic_mask():
yield ArgsKwargs(mask_loader, displacement=displacement)


def reference_inputs_elastic_mask():
for mask_loader in make_mask_loaders(extra_dims=[()], num_objects=[1]):
displacement = _get_elastic_displacement(mask_loader.shape[-2:])
yield ArgsKwargs(mask_loader, displacement=displacement)


def sample_inputs_elastic_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
displacement = _get_elastic_displacement(video_loader.shape[-2:])
Expand All @@ -1426,11 +1368,12 @@ def sample_inputs_elastic_video():
KernelInfo(
F.elastic_image_tensor,
sample_inputs_fn=sample_inputs_elastic_image_tensor,
reference_fn=pil_reference_wrapper(F.elastic_image_pil),
reference_inputs_fn=reference_inputs_elastic_image_tensor,
float32_vs_uint8=float32_vs_uint8_fill_adapter,
# TODO: investigate
closeness_kwargs=float32_vs_uint8_pixel_difference(60, agg_method="mean"),
closeness_kwargs={
**float32_vs_uint8_pixel_difference(6, agg_method="mean"),
**cuda_vs_cpu_pixel_difference(),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Driveby that seems to have snuck in since the last time I was testing against CUDA.

},
),
KernelInfo(
F.elastic_bounding_box,
Expand All @@ -1439,15 +1382,11 @@ def sample_inputs_elastic_video():
KernelInfo(
F.elastic_mask,
sample_inputs_fn=sample_inputs_elastic_mask,
reference_fn=pil_reference_wrapper(F.elastic_image_pil),
reference_inputs_fn=reference_inputs_elastic_mask,
float32_vs_uint8=True,
# TODO: investigate
closeness_kwargs=pil_reference_pixel_difference(80, agg_method="mean"),
),
KernelInfo(
F.elastic_video,
sample_inputs_fn=sample_inputs_elastic_video,
closeness_kwargs=cuda_vs_cpu_pixel_difference(),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above.

),
]
)
Expand Down Expand Up @@ -2089,8 +2028,7 @@ def sample_inputs_adjust_hue_video():
reference_inputs_fn=reference_inputs_adjust_hue_image_tensor,
float32_vs_uint8=True,
closeness_kwargs={
# TODO: investigate
**pil_reference_pixel_difference(20),
**pil_reference_pixel_difference(2, agg_method="mean"),
**float32_vs_uint8_pixel_difference(),
},
),
Expand Down