Skip to content

Commit d25928e

Browse files
NicolasHugfacebook-github-bot
authored andcommitted
[fbsync] use non-random images for interpolation kernels for testing (#6977)
Summary: * use non-random images for interpolation kernels for testing * use real image rather than artificial * cleanup Reviewed By: YosuaMichael Differential Revision: D41648540 fbshipit-source-id: d3685e4ade1e6729c64e9ec6002d6914ff070a52
1 parent f1b43f1 commit d25928e

File tree

2 files changed

+72
-96
lines changed

2 files changed

+72
-96
lines changed

test/prototype_common_utils.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import collections.abc
44
import dataclasses
55
import functools
6+
import pathlib
67
from collections import defaultdict
78
from typing import Callable, Optional, Sequence, Tuple, Union
89

@@ -14,7 +15,7 @@
1415
from torch.nn.functional import one_hot
1516
from torch.testing._comparison import assert_equal as _assert_equal, BooleanPair, NonePair, NumberPair, TensorLikePair
1617
from torchvision.prototype import features
17-
from torchvision.prototype.transforms.functional import to_image_tensor
18+
from torchvision.prototype.transforms.functional import convert_dtype_image_tensor, to_image_tensor
1819
from torchvision.transforms.functional_tensor import _max_value as get_max_value
1920

2021
__all__ = [
@@ -69,6 +70,7 @@ def compare(self) -> None:
6970
self._compare_attributes(actual, expected)
7071

7172
actual, expected = self._equalize_attributes(actual, expected)
73+
actual, expected = self._promote_for_comparison(actual, expected)
7274
abs_diff = torch.abs(actual - expected)
7375

7476
if self.allowed_percentage_diff is not None:
@@ -313,6 +315,42 @@ def make_image_loaders(
313315
make_images = from_loaders(make_image_loaders)
314316

315317

318+
def make_image_loader_for_interpolation(size="random", *, color_space=features.ColorSpace.RGB, dtype=torch.uint8):
319+
size = _parse_spatial_size(size)
320+
num_channels = get_num_channels(color_space)
321+
322+
def fn(shape, dtype, device):
323+
height, width = shape[-2:]
324+
325+
image_pil = (
326+
PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg")
327+
.resize((width, height))
328+
.convert(
329+
{
330+
features.ColorSpace.GRAY: "L",
331+
features.ColorSpace.GRAY_ALPHA: "LA",
332+
features.ColorSpace.RGB: "RGB",
333+
features.ColorSpace.RGB_ALPHA: "RGBA",
334+
}[color_space]
335+
)
336+
)
337+
338+
image_tensor = convert_dtype_image_tensor(to_image_tensor(image_pil).to(device=device), dtype=dtype)
339+
340+
return features.Image(image_tensor, color_space=color_space)
341+
342+
return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype, color_space=color_space)
343+
344+
345+
def make_image_loaders_for_interpolation(
346+
sizes=((233, 147),),
347+
color_spaces=(features.ColorSpace.RGB,),
348+
dtypes=(torch.uint8,),
349+
):
350+
for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes):
351+
yield make_image_loader_for_interpolation(**params)
352+
353+
316354
@dataclasses.dataclass
317355
class BoundingBoxLoader(TensorLoader):
318356
format: features.BoundingBoxFormat

test/prototype_transforms_kernel_infos.py

Lines changed: 33 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
make_bounding_box_loaders,
2020
make_image_loader,
2121
make_image_loaders,
22+
make_image_loaders_for_interpolation,
2223
make_mask_loaders,
2324
make_video_loaders,
2425
mark_framework_limitation,
@@ -287,7 +288,7 @@ def reference_resize_image_tensor(*args, **kwargs):
287288

288289
def reference_inputs_resize_image_tensor():
289290
for image_loader, interpolation in itertools.product(
290-
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]),
291+
make_image_loaders_for_interpolation(),
291292
[
292293
F.InterpolationMode.NEAREST,
293294
F.InterpolationMode.NEAREST_EXACT,
@@ -319,17 +320,6 @@ def sample_inputs_resize_mask():
319320
yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1])
320321

321322

322-
@pil_reference_wrapper
323-
def reference_resize_mask(*args, **kwargs):
324-
return F.resize_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs)
325-
326-
327-
def reference_inputs_resize_mask():
328-
for mask_loader in make_mask_loaders(extra_dims=[()], num_objects=[1]):
329-
for size in _get_resize_sizes(mask_loader.shape[-2:]):
330-
yield ArgsKwargs(mask_loader, size=size)
331-
332-
333323
def sample_inputs_resize_video():
334324
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
335325
yield ArgsKwargs(video_loader, size=[min(video_loader.shape[-2:]) + 1])
@@ -369,11 +359,9 @@ def reference_inputs_resize_bounding_box():
369359
reference_inputs_fn=reference_inputs_resize_image_tensor,
370360
float32_vs_uint8=True,
371361
closeness_kwargs={
372-
# TODO: investigate
373-
**pil_reference_pixel_difference(110, agg_method="mean"),
362+
**pil_reference_pixel_difference(10, agg_method="mean"),
374363
**cuda_vs_cpu_pixel_difference(),
375-
# TODO: investigate
376-
**float32_vs_uint8_pixel_difference(50),
364+
**float32_vs_uint8_pixel_difference(1, agg_method="mean"),
377365
},
378366
test_marks=[
379367
xfail_jit_python_scalar_arg("size"),
@@ -391,9 +379,6 @@ def reference_inputs_resize_bounding_box():
391379
KernelInfo(
392380
F.resize_mask,
393381
sample_inputs_fn=sample_inputs_resize_mask,
394-
reference_fn=reference_resize_mask,
395-
reference_inputs_fn=reference_inputs_resize_mask,
396-
float32_vs_uint8=True,
397382
closeness_kwargs=pil_reference_pixel_difference(10),
398383
test_marks=[
399384
xfail_jit_python_scalar_arg("size"),
@@ -501,9 +486,7 @@ def sample_inputs_affine_image_tensor():
501486

502487

503488
def reference_inputs_affine_image_tensor():
504-
for image_loader, affine_kwargs in itertools.product(
505-
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _AFFINE_KWARGS
506-
):
489+
for image_loader, affine_kwargs in itertools.product(make_image_loaders_for_interpolation(), _AFFINE_KWARGS):
507490
yield ArgsKwargs(
508491
image_loader,
509492
interpolation=F.InterpolationMode.NEAREST,
@@ -617,18 +600,6 @@ def sample_inputs_affine_mask():
617600
yield ArgsKwargs(mask_loader, **_full_affine_params())
618601

619602

620-
@pil_reference_wrapper
621-
def reference_affine_mask(*args, **kwargs):
622-
return F.affine_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs)
623-
624-
625-
def reference_inputs_resize_mask():
626-
for mask_loader, affine_kwargs in itertools.product(
627-
make_mask_loaders(extra_dims=[()], num_objects=[1]), _AFFINE_KWARGS
628-
):
629-
yield ArgsKwargs(mask_loader, **affine_kwargs)
630-
631-
632603
def sample_inputs_affine_video():
633604
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
634605
yield ArgsKwargs(video_loader, **_full_affine_params())
@@ -665,10 +636,6 @@ def sample_inputs_affine_video():
665636
KernelInfo(
666637
F.affine_mask,
667638
sample_inputs_fn=sample_inputs_affine_mask,
668-
reference_fn=reference_affine_mask,
669-
reference_inputs_fn=reference_inputs_resize_mask,
670-
closeness_kwargs=pil_reference_pixel_difference(10),
671-
float32_vs_uint8=True,
672639
test_marks=[
673640
xfail_jit_python_scalar_arg("shear"),
674641
],
@@ -870,9 +837,7 @@ def sample_inputs_rotate_image_tensor():
870837

871838

872839
def reference_inputs_rotate_image_tensor():
873-
for image_loader, angle in itertools.product(
874-
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _ROTATE_ANGLES
875-
):
840+
for image_loader, angle in itertools.product(make_image_loaders_for_interpolation(), _ROTATE_ANGLES):
876841
yield ArgsKwargs(image_loader, angle=angle)
877842

878843

@@ -891,16 +856,6 @@ def sample_inputs_rotate_mask():
891856
yield ArgsKwargs(mask_loader, angle=15.0)
892857

893858

894-
@pil_reference_wrapper
895-
def reference_rotate_mask(*args, **kwargs):
896-
return F.rotate_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs)
897-
898-
899-
def reference_inputs_rotate_mask():
900-
for mask_loader, angle in itertools.product(make_mask_loaders(extra_dims=[()], num_objects=[1]), _ROTATE_ANGLES):
901-
yield ArgsKwargs(mask_loader, angle=angle)
902-
903-
904859
def sample_inputs_rotate_video():
905860
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
906861
yield ArgsKwargs(video_loader, angle=15.0)
@@ -914,8 +869,7 @@ def sample_inputs_rotate_video():
914869
reference_fn=pil_reference_wrapper(F.rotate_image_pil),
915870
reference_inputs_fn=reference_inputs_rotate_image_tensor,
916871
float32_vs_uint8=True,
917-
# TODO: investigate
918-
closeness_kwargs=pil_reference_pixel_difference(110, agg_method="mean"),
872+
closeness_kwargs=pil_reference_pixel_difference(1, agg_method="mean"),
919873
test_marks=[
920874
xfail_jit_tuple_instead_of_list("fill"),
921875
# TODO: check if this is a regression since it seems that should be supported if `int` is ok
@@ -929,10 +883,6 @@ def sample_inputs_rotate_video():
929883
KernelInfo(
930884
F.rotate_mask,
931885
sample_inputs_fn=sample_inputs_rotate_mask,
932-
reference_fn=reference_rotate_mask,
933-
reference_inputs_fn=reference_inputs_rotate_mask,
934-
float32_vs_uint8=True,
935-
closeness_kwargs=pil_reference_pixel_difference(10),
936886
),
937887
KernelInfo(
938888
F.rotate_video,
@@ -1058,7 +1008,7 @@ def reference_resized_crop_image_tensor(*args, **kwargs):
10581008

10591009
def reference_inputs_resized_crop_image_tensor():
10601010
for image_loader, interpolation, params in itertools.product(
1061-
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]),
1011+
make_image_loaders_for_interpolation(),
10621012
[
10631013
F.InterpolationMode.NEAREST,
10641014
F.InterpolationMode.NEAREST_EXACT,
@@ -1089,13 +1039,6 @@ def sample_inputs_resized_crop_mask():
10891039
yield ArgsKwargs(mask_loader, **_RESIZED_CROP_PARAMS[0])
10901040

10911041

1092-
def reference_inputs_resized_crop_mask():
1093-
for mask_loader, params in itertools.product(
1094-
make_mask_loaders(extra_dims=[()], num_objects=[1]), _RESIZED_CROP_PARAMS
1095-
):
1096-
yield ArgsKwargs(mask_loader, **params)
1097-
1098-
10991042
def sample_inputs_resized_crop_video():
11001043
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
11011044
yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0])
@@ -1110,11 +1053,9 @@ def sample_inputs_resized_crop_video():
11101053
reference_inputs_fn=reference_inputs_resized_crop_image_tensor,
11111054
float32_vs_uint8=True,
11121055
closeness_kwargs={
1113-
# TODO: investigate
1114-
**pil_reference_pixel_difference(60, agg_method="mean"),
11151056
**cuda_vs_cpu_pixel_difference(),
1116-
# TODO: investigate
1117-
**float32_vs_uint8_pixel_difference(50),
1057+
**pil_reference_pixel_difference(3, agg_method="mean"),
1058+
**float32_vs_uint8_pixel_difference(3, agg_method="mean"),
11181059
},
11191060
),
11201061
KernelInfo(
@@ -1124,10 +1065,6 @@ def sample_inputs_resized_crop_video():
11241065
KernelInfo(
11251066
F.resized_crop_mask,
11261067
sample_inputs_fn=sample_inputs_resized_crop_mask,
1127-
reference_fn=pil_reference_wrapper(F.resized_crop_image_pil),
1128-
reference_inputs_fn=reference_inputs_resized_crop_mask,
1129-
float32_vs_uint8=True,
1130-
closeness_kwargs=pil_reference_pixel_difference(10),
11311068
),
11321069
KernelInfo(
11331070
F.resized_crop_video,
@@ -1298,12 +1235,24 @@ def sample_inputs_perspective_image_tensor():
12981235

12991236

13001237
def reference_inputs_perspective_image_tensor():
1301-
for image_loader, coefficients in itertools.product(
1302-
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _PERSPECTIVE_COEFFS
1238+
for image_loader, coefficients, interpolation in itertools.product(
1239+
make_image_loaders_for_interpolation(),
1240+
_PERSPECTIVE_COEFFS,
1241+
[
1242+
F.InterpolationMode.NEAREST,
1243+
F.InterpolationMode.BILINEAR,
1244+
],
13031245
):
13041246
# FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
13051247
for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
1306-
yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients)
1248+
yield ArgsKwargs(
1249+
image_loader,
1250+
startpoints=None,
1251+
endpoints=None,
1252+
interpolation=interpolation,
1253+
fill=fill,
1254+
coefficients=coefficients,
1255+
)
13071256

13081257

13091258
def sample_inputs_perspective_bounding_box():
@@ -1339,8 +1288,7 @@ def sample_inputs_perspective_video():
13391288
reference_inputs_fn=reference_inputs_perspective_image_tensor,
13401289
float32_vs_uint8=float32_vs_uint8_fill_adapter,
13411290
closeness_kwargs={
1342-
# TODO: investigate
1343-
**pil_reference_pixel_difference(160, agg_method="mean"),
1291+
**pil_reference_pixel_difference(2, agg_method="mean"),
13441292
**cuda_vs_cpu_pixel_difference(),
13451293
**float32_vs_uint8_pixel_difference(),
13461294
},
@@ -1381,7 +1329,7 @@ def sample_inputs_elastic_image_tensor():
13811329

13821330
def reference_inputs_elastic_image_tensor():
13831331
for image_loader, interpolation in itertools.product(
1384-
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]),
1332+
make_image_loaders_for_interpolation(),
13851333
[
13861334
F.InterpolationMode.NEAREST,
13871335
F.InterpolationMode.BILINEAR,
@@ -1409,12 +1357,6 @@ def sample_inputs_elastic_mask():
14091357
yield ArgsKwargs(mask_loader, displacement=displacement)
14101358

14111359

1412-
def reference_inputs_elastic_mask():
1413-
for mask_loader in make_mask_loaders(extra_dims=[()], num_objects=[1]):
1414-
displacement = _get_elastic_displacement(mask_loader.shape[-2:])
1415-
yield ArgsKwargs(mask_loader, displacement=displacement)
1416-
1417-
14181360
def sample_inputs_elastic_video():
14191361
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
14201362
displacement = _get_elastic_displacement(video_loader.shape[-2:])
@@ -1426,11 +1368,12 @@ def sample_inputs_elastic_video():
14261368
KernelInfo(
14271369
F.elastic_image_tensor,
14281370
sample_inputs_fn=sample_inputs_elastic_image_tensor,
1429-
reference_fn=pil_reference_wrapper(F.elastic_image_pil),
14301371
reference_inputs_fn=reference_inputs_elastic_image_tensor,
14311372
float32_vs_uint8=float32_vs_uint8_fill_adapter,
1432-
# TODO: investigate
1433-
closeness_kwargs=float32_vs_uint8_pixel_difference(60, agg_method="mean"),
1373+
closeness_kwargs={
1374+
**float32_vs_uint8_pixel_difference(6, agg_method="mean"),
1375+
**cuda_vs_cpu_pixel_difference(),
1376+
},
14341377
),
14351378
KernelInfo(
14361379
F.elastic_bounding_box,
@@ -1439,15 +1382,11 @@ def sample_inputs_elastic_video():
14391382
KernelInfo(
14401383
F.elastic_mask,
14411384
sample_inputs_fn=sample_inputs_elastic_mask,
1442-
reference_fn=pil_reference_wrapper(F.elastic_image_pil),
1443-
reference_inputs_fn=reference_inputs_elastic_mask,
1444-
float32_vs_uint8=True,
1445-
# TODO: investigate
1446-
closeness_kwargs=pil_reference_pixel_difference(80, agg_method="mean"),
14471385
),
14481386
KernelInfo(
14491387
F.elastic_video,
14501388
sample_inputs_fn=sample_inputs_elastic_video,
1389+
closeness_kwargs=cuda_vs_cpu_pixel_difference(),
14511390
),
14521391
]
14531392
)
@@ -2089,8 +2028,7 @@ def sample_inputs_adjust_hue_video():
20892028
reference_inputs_fn=reference_inputs_adjust_hue_image_tensor,
20902029
float32_vs_uint8=True,
20912030
closeness_kwargs={
2092-
# TODO: investigate
2093-
**pil_reference_pixel_difference(20),
2031+
**pil_reference_pixel_difference(2, agg_method="mean"),
20942032
**float32_vs_uint8_pixel_difference(),
20952033
},
20962034
),

0 commit comments

Comments
 (0)