From 0bb69b17068c56d6ea81042c81df801c2f1e16e1 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 13 Jun 2023 02:55:08 -0700 Subject: [PATCH 1/3] Add uint8 bicubic support to ResizeV2 --- test/test_transforms_v2_consistency.py | 8 ++++---- .../transforms/v2/functional/_geometry.py | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index 05ab6b67af5..02800145f1c 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -98,8 +98,8 @@ def __init__( ArgsKwargs((29, 32), antialias=False), ArgsKwargs((28, 31), antialias=True), ], - # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes - closeness_kwargs=dict(rtol=0, atol=1), + # Such a high atol is needed for bicubic mode + closeness_kwargs=dict(rtol=0, atol=32), ), ConsistencyConfig( v2_transforms.CenterCrop, @@ -315,8 +315,8 @@ def __init__( ArgsKwargs((29, 32), antialias=False), ArgsKwargs((28, 31), antialias=True), ], - # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes - closeness_kwargs=dict(rtol=0, atol=1), + # Such a high atol is needed for bicubic mode + closeness_kwargs=dict(rtol=0, atol=30), ), ConsistencyConfig( v2_transforms.RandomErasing, diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index ced7ff0b28b..aab3be24e0b 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -190,14 +190,13 @@ def resize_image_tensor( if interpolation == InterpolationMode.NEAREST or interpolation == InterpolationMode.NEAREST_EXACT: # uint8 dtype can be included for cpu and cuda input if nearest mode acceptable_dtypes.append(torch.uint8) - elif ( - interpolation == InterpolationMode.BILINEAR - and image.device.type == "cpu" - and "AVX2" in torch.backends.cpu.get_cpu_capability() - ): - # uint8 dtype support for bilinear mode is limited to cpu and - # according to our benchmarks non-AVX CPUs should prefer u8->f32->interpolate->u8 path - acceptable_dtypes.append(torch.uint8) + elif image.device.type == "cpu": + # uint8 dtype support for bilinear and bicubic is limited to cpu and + # according to our benchmarks, non-AVX CPUs should still prefer u8->f32->interpolate->u8 path for bilinear + if (interpolation == InterpolationMode.BILINEAR and "AVX2" in torch.backends.cpu.get_cpu_capability()) or ( + interpolation == InterpolationMode.BICUBIC + ): + acceptable_dtypes.append(torch.uint8) strides = image.stride() if image.is_contiguous(memory_format=torch.channels_last) and image.shape[0] == 1 and numel != strides[0]: @@ -227,6 +226,7 @@ def resize_image_tensor( if need_cast: if interpolation == InterpolationMode.BICUBIC and dtype == torch.uint8: + # This path is hit on non-AVX archs, or on GPU. image = image.clamp_(min=0, max=255) if dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64): image = image.round_() From d3c5751dbd918b7adace9772ddaf49133fc575a8 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 13 Jun 2023 03:50:43 -0700 Subject: [PATCH 2/3] slightly better tests --- test/test_transforms_v2_consistency.py | 30 +++++++++++++++++++------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index 02800145f1c..e541feaf1eb 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -87,10 +87,8 @@ def __init__( ArgsKwargs([32]), ArgsKwargs((32, 29)), ArgsKwargs((31, 28), interpolation=v2_transforms.InterpolationMode.NEAREST), - ArgsKwargs((33, 26), interpolation=v2_transforms.InterpolationMode.BICUBIC), ArgsKwargs((30, 27), interpolation=PIL.Image.NEAREST), ArgsKwargs((35, 29), interpolation=PIL.Image.BILINEAR), - ArgsKwargs((34, 25), interpolation=PIL.Image.BICUBIC), NotScriptableArgsKwargs(31, max_size=32), ArgsKwargs([31], max_size=32), NotScriptableArgsKwargs(30, max_size=100), @@ -98,8 +96,17 @@ def __init__( ArgsKwargs((29, 32), antialias=False), ArgsKwargs((28, 31), antialias=True), ], - # Such a high atol is needed for bicubic mode - closeness_kwargs=dict(rtol=0, atol=32), + # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes + closeness_kwargs=dict(rtol=0, atol=1), + ), + ConsistencyConfig( + v2_transforms.Resize, + legacy_transforms.Resize, + [ + ArgsKwargs((33, 26), interpolation=v2_transforms.InterpolationMode.BICUBIC, antialias=True), + ArgsKwargs((34, 25), interpolation=PIL.Image.BICUBIC, antialias=True), + ], + closeness_kwargs=dict(rtol=0, atol=21), ), ConsistencyConfig( v2_transforms.CenterCrop, @@ -309,14 +316,21 @@ def __init__( ArgsKwargs(17, scale=(0.3, 0.7)), ArgsKwargs(25, ratio=(0.5, 1.5)), ArgsKwargs((31, 28), interpolation=v2_transforms.InterpolationMode.NEAREST), - ArgsKwargs((33, 26), interpolation=v2_transforms.InterpolationMode.BICUBIC), ArgsKwargs((31, 28), interpolation=PIL.Image.NEAREST), - ArgsKwargs((33, 26), interpolation=PIL.Image.BICUBIC), ArgsKwargs((29, 32), antialias=False), ArgsKwargs((28, 31), antialias=True), ], - # Such a high atol is needed for bicubic mode - closeness_kwargs=dict(rtol=0, atol=30), + # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes + closeness_kwargs=dict(rtol=0, atol=1), + ), + ConsistencyConfig( + v2_transforms.RandomResizedCrop, + legacy_transforms.RandomResizedCrop, + [ + ArgsKwargs((33, 26), interpolation=v2_transforms.InterpolationMode.BICUBIC, antialias=True), + ArgsKwargs((33, 26), interpolation=PIL.Image.BICUBIC, antialias=True), + ], + closeness_kwargs=dict(rtol=0, atol=21), ), ConsistencyConfig( v2_transforms.RandomErasing, From 8e6351b90f685c5a83e8c13a72448d23fdca97b1 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Wed, 14 Jun 2023 12:13:41 +0200 Subject: [PATCH 3/3] Fixed broken cuda vs cpu tests --- test/transforms_v2_kernel_infos.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index e5873f80d15..7b877fb092d 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -257,17 +257,20 @@ def sample_inputs_resize_image_tensor(): for image_loader, interpolation in itertools.product( make_image_loaders(sizes=["random"], color_spaces=["RGB"]), - [ - F.InterpolationMode.NEAREST, - F.InterpolationMode.BILINEAR, - F.InterpolationMode.BICUBIC, - ], + [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR], ): yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation) yield ArgsKwargs(make_image_loader(size=(11, 17)), size=20, max_size=25) +def sample_inputs_resize_image_tensor_bicubic(): + for image_loader, interpolation in itertools.product( + make_image_loaders(sizes=["random"], color_spaces=["RGB"]), [F.InterpolationMode.BICUBIC] + ): + yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation) + + @pil_reference_wrapper def reference_resize_image_tensor(*args, **kwargs): if not kwargs.pop("antialias", False) and kwargs.get("interpolation", F.InterpolationMode.BILINEAR) in { @@ -364,6 +367,21 @@ def reference_inputs_resize_bounding_box(): xfail_jit_python_scalar_arg("size"), ], ), + KernelInfo( + F.resize_image_tensor, + sample_inputs_fn=sample_inputs_resize_image_tensor_bicubic, + reference_fn=reference_resize_image_tensor, + reference_inputs_fn=reference_inputs_resize_image_tensor, + float32_vs_uint8=True, + closeness_kwargs={ + **pil_reference_pixel_difference(10, mae=True), + **cuda_vs_cpu_pixel_difference(atol=30), + **float32_vs_uint8_pixel_difference(1, mae=True), + }, + test_marks=[ + xfail_jit_python_scalar_arg("size"), + ], + ), KernelInfo( F.resize_bounding_box, sample_inputs_fn=sample_inputs_resize_bounding_box,