1414    assert_equal ,
1515    assert_no_warnings ,
1616    cache ,
17-     cpu_and_gpu ,
17+     cpu_and_cuda ,
1818    ignore_jit_no_profile_information_warning ,
1919    make_bounding_box ,
2020    make_detection_mask ,
@@ -382,13 +382,15 @@ def _check_size(self, input, output, *, size, max_size):
382382    @pytest .mark .parametrize ("use_max_size" , [True , False ]) 
383383    @pytest .mark .parametrize ("antialias" , [True , False ]) 
384384    @pytest .mark .parametrize ("dtype" , [torch .float32 , torch .uint8 ]) 
385-     @pytest .mark .parametrize ("device" , cpu_and_gpu ()) 
385+     @pytest .mark .parametrize ("device" , cpu_and_cuda ()) 
386386    def  test_kernel_image_tensor (self , size , interpolation , use_max_size , antialias , dtype , device ):
387387        if  not  (max_size_kwarg  :=  self ._make_max_size_kwarg (use_max_size = use_max_size , size = size )):
388388            return 
389389
390-         uint8_atol  =  30  if  transforms .InterpolationMode .BICUBIC  else  1 
391-         check_cuda_vs_cpu_tolerances  =  dict (rtol = 0 , atol = uint8_atol  /  255  if  dtype .is_floating_point  else  uint8_atol )
390+         # In contrast to CPU, there is no native `InterpolationMode.BICUBIC` implementation for uint8 images on CUDA. 
391+         # Internally, it uses the float path. Thus, we need to test with an enormous tolerance here to account for that. 
392+         atol  =  30  if  transforms .InterpolationMode .BICUBIC  and  dtype  is  torch .uint8  else  1 
393+         check_cuda_vs_cpu_tolerances  =  dict (rtol = 0 , atol = atol  /  255  if  dtype .is_floating_point  else  atol )
392394
393395        check_kernel (
394396            F .resize_image_tensor ,
@@ -397,8 +399,6 @@ def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias,
397399            interpolation = interpolation ,
398400            ** max_size_kwarg ,
399401            antialias = antialias ,
400-             # The `InterpolationMode.BICUBIC` implementation on CUDA does not match CPU implementation well. Thus, 
401-             # wee need to test with an enormous tolerance. 
402402            check_cuda_vs_cpu = check_cuda_vs_cpu_tolerances ,
403403            check_scripted_vs_eager = not  isinstance (size , int ),
404404        )
@@ -407,7 +407,7 @@ def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias,
407407    @pytest .mark .parametrize ("format" , list (datapoints .BoundingBoxFormat )) 
408408    @pytest .mark .parametrize ("use_max_size" , [True , False ]) 
409409    @pytest .mark .parametrize ("dtype" , [torch .float32 , torch .int64 ]) 
410-     @pytest .mark .parametrize ("device" , cpu_and_gpu ()) 
410+     @pytest .mark .parametrize ("device" , cpu_and_cuda ()) 
411411    def  test_kernel_bounding_box (self , size , format , use_max_size , dtype , device ):
412412        if  not  (max_size_kwarg  :=  self ._make_max_size_kwarg (use_max_size = use_max_size , size = size )):
413413            return 
@@ -470,7 +470,7 @@ def test_dispatcher_signature(self, kernel, input_type):
470470        check_dispatcher_signatures_match (F .resize , kernel = kernel , input_type = input_type )
471471
472472    @pytest .mark .parametrize ("size" , OUTPUT_SIZES ) 
473-     @pytest .mark .parametrize ("device" , cpu_and_gpu ()) 
473+     @pytest .mark .parametrize ("device" , cpu_and_cuda ()) 
474474    @pytest .mark .parametrize ( 
475475        "input_type" , 
476476        [torch .Tensor , PIL .Image .Image , datapoints .Image , datapoints .BoundingBox , datapoints .Mask , datapoints .Video ], 
0 commit comments