1111from torch .nn .functional import one_hot
1212from torchvision .prototype import features
1313from torchvision .prototype .transforms .functional ._meta import convert_bounding_box_format
14+ from torchvision .transforms .functional import _get_perspective_coeffs
1415from torchvision .transforms .functional_tensor import _max_value as get_max_value
1516
1617make_tensor = functools .partial (torch .testing .make_tensor , device = "cpu" )
@@ -380,6 +381,37 @@ def pad_segmentation_mask():
380381 yield SampleInput (mask , padding = padding , padding_mode = padding_mode )
381382
382383
384+ @register_kernel_info_from_sample_inputs_fn
385+ def perspective_bounding_box ():
386+ for bounding_box , perspective_coeffs in itertools .product (
387+ make_bounding_boxes (),
388+ [
389+ [1.2405 , 0.1772 , - 6.9113 , 0.0463 , 1.251 , - 5.235 , 0.00013 , 0.0018 ],
390+ [0.7366 , - 0.11724 , 1.45775 , - 0.15012 , 0.73406 , 2.6019 , - 0.0072 , - 0.0063 ],
391+ ],
392+ ):
393+ yield SampleInput (
394+ bounding_box ,
395+ format = bounding_box .format ,
396+ perspective_coeffs = perspective_coeffs ,
397+ )
398+
399+
400+ @register_kernel_info_from_sample_inputs_fn
401+ def perspective_segmentation_mask ():
402+ for mask , perspective_coeffs in itertools .product (
403+ make_segmentation_masks (extra_dims = ((), (4 ,))),
404+ [
405+ [1.2405 , 0.1772 , - 6.9113 , 0.0463 , 1.251 , - 5.235 , 0.00013 , 0.0018 ],
406+ [0.7366 , - 0.11724 , 1.45775 , - 0.15012 , 0.73406 , 2.6019 , - 0.0072 , - 0.0063 ],
407+ ],
408+ ):
409+ yield SampleInput (
410+ mask ,
411+ perspective_coeffs = perspective_coeffs ,
412+ )
413+
414+
383415@register_kernel_info_from_sample_inputs_fn
384416def center_crop_bounding_box ():
385417 for bounding_box , output_size in itertools .product (make_bounding_boxes (), [(24 , 12 ), [16 , 18 ], [46 , 48 ], [12 ]]):
@@ -993,7 +1025,7 @@ def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device):
9931025 ],
9941026)
9951027def test_correctness_resized_crop_bounding_box (device , format , top , left , height , width , size ):
996- def _compute_expected (bbox , top_ , left_ , height_ , width_ , size_ ):
1028+ def _compute_expected_bbox (bbox , top_ , left_ , height_ , width_ , size_ ):
9971029 # bbox should be xyxy
9981030 bbox [0 ] = (bbox [0 ] - left_ ) * size_ [1 ] / width_
9991031 bbox [1 ] = (bbox [1 ] - top_ ) * size_ [0 ] / height_
@@ -1009,7 +1041,7 @@ def _compute_expected(bbox, top_, left_, height_, width_, size_):
10091041 ]
10101042 expected_bboxes = []
10111043 for in_box in in_boxes :
1012- expected_bboxes .append (_compute_expected (list (in_box ), top , left , height , width , size ))
1044+ expected_bboxes .append (_compute_expected_bbox (list (in_box ), top , left , height , width , size ))
10131045 expected_bboxes = torch .tensor (expected_bboxes , device = device )
10141046
10151047 in_boxes = features .BoundingBox (
@@ -1035,7 +1067,7 @@ def _compute_expected(bbox, top_, left_, height_, width_, size_):
10351067 ],
10361068)
10371069def test_correctness_resized_crop_segmentation_mask (device , top , left , height , width , size ):
1038- def _compute_expected (mask , top_ , left_ , height_ , width_ , size_ ):
1070+ def _compute_expected_mask (mask , top_ , left_ , height_ , width_ , size_ ):
10391071 output = mask .clone ()
10401072 output = output [:, top_ : top_ + height_ , left_ : left_ + width_ ]
10411073 output = torch .nn .functional .interpolate (output [None , :].float (), size = size_ , mode = "nearest" )
@@ -1046,7 +1078,7 @@ def _compute_expected(mask, top_, left_, height_, width_, size_):
10461078 in_mask [0 , 10 :20 , 10 :20 ] = 1
10471079 in_mask [0 , 5 :15 , 12 :23 ] = 2
10481080
1049- expected_mask = _compute_expected (in_mask , top , left , height , width , size )
1081+ expected_mask = _compute_expected_mask (in_mask , top , left , height , width , size )
10501082 output_mask = F .resized_crop_segmentation_mask (in_mask , top , left , height , width , size )
10511083 torch .testing .assert_close (output_mask , expected_mask )
10521084
@@ -1095,6 +1127,161 @@ def parse_padding():
10951127 torch .testing .assert_close (out_mask , expected_mask )
10961128
10971129
1130+ @pytest .mark .parametrize ("device" , cpu_and_gpu ())
1131+ @pytest .mark .parametrize (
1132+ "startpoints, endpoints" ,
1133+ [
1134+ [[[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]], [[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]]],
1135+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]]],
1136+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[5 , 5 ], [30 , 3 ], [33 , 19 ], [4 , 25 ]]],
1137+ ],
1138+ )
1139+ def test_correctness_perspective_bounding_box (device , startpoints , endpoints ):
1140+ def _compute_expected_bbox (bbox , pcoeffs_ ):
1141+ m1 = np .array (
1142+ [
1143+ [pcoeffs_ [0 ], pcoeffs_ [1 ], pcoeffs_ [2 ]],
1144+ [pcoeffs_ [3 ], pcoeffs_ [4 ], pcoeffs_ [5 ]],
1145+ ]
1146+ )
1147+ m2 = np .array (
1148+ [
1149+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1150+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1151+ ]
1152+ )
1153+
1154+ bbox_xyxy = convert_bounding_box_format (
1155+ bbox , old_format = bbox .format , new_format = features .BoundingBoxFormat .XYXY
1156+ )
1157+ points = np .array (
1158+ [
1159+ [bbox_xyxy [0 ].item (), bbox_xyxy [1 ].item (), 1.0 ],
1160+ [bbox_xyxy [2 ].item (), bbox_xyxy [1 ].item (), 1.0 ],
1161+ [bbox_xyxy [0 ].item (), bbox_xyxy [3 ].item (), 1.0 ],
1162+ [bbox_xyxy [2 ].item (), bbox_xyxy [3 ].item (), 1.0 ],
1163+ ]
1164+ )
1165+ numer = np .matmul (points , m1 .T )
1166+ denom = np .matmul (points , m2 .T )
1167+ transformed_points = numer / denom
1168+ out_bbox = [
1169+ np .min (transformed_points [:, 0 ]),
1170+ np .min (transformed_points [:, 1 ]),
1171+ np .max (transformed_points [:, 0 ]),
1172+ np .max (transformed_points [:, 1 ]),
1173+ ]
1174+ out_bbox = features .BoundingBox (
1175+ out_bbox ,
1176+ format = features .BoundingBoxFormat .XYXY ,
1177+ image_size = bbox .image_size ,
1178+ dtype = torch .float32 ,
1179+ device = bbox .device ,
1180+ )
1181+ return convert_bounding_box_format (
1182+ out_bbox , old_format = features .BoundingBoxFormat .XYXY , new_format = bbox .format , copy = False
1183+ )
1184+
1185+ image_size = (32 , 38 )
1186+
1187+ pcoeffs = _get_perspective_coeffs (startpoints , endpoints )
1188+ inv_pcoeffs = _get_perspective_coeffs (endpoints , startpoints )
1189+
1190+ for bboxes in make_bounding_boxes (
1191+ image_sizes = [
1192+ image_size ,
1193+ ],
1194+ extra_dims = ((4 ,),),
1195+ ):
1196+ bboxes = bboxes .to (device )
1197+ bboxes_format = bboxes .format
1198+ bboxes_image_size = bboxes .image_size
1199+
1200+ output_bboxes = F .perspective_bounding_box (
1201+ bboxes ,
1202+ bboxes_format ,
1203+ perspective_coeffs = pcoeffs ,
1204+ )
1205+
1206+ if bboxes .ndim < 2 :
1207+ bboxes = [bboxes ]
1208+
1209+ expected_bboxes = []
1210+ for bbox in bboxes :
1211+ bbox = features .BoundingBox (bbox , format = bboxes_format , image_size = bboxes_image_size )
1212+ expected_bboxes .append (_compute_expected_bbox (bbox , inv_pcoeffs ))
1213+ if len (expected_bboxes ) > 1 :
1214+ expected_bboxes = torch .stack (expected_bboxes )
1215+ else :
1216+ expected_bboxes = expected_bboxes [0 ]
1217+ torch .testing .assert_close (output_bboxes , expected_bboxes , rtol = 1e-5 , atol = 1e-5 )
1218+
1219+
1220+ @pytest .mark .parametrize ("device" , cpu_and_gpu ())
1221+ @pytest .mark .parametrize (
1222+ "startpoints, endpoints" ,
1223+ [
1224+ [[[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]], [[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]]],
1225+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]]],
1226+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[5 , 5 ], [30 , 3 ], [33 , 19 ], [4 , 25 ]]],
1227+ ],
1228+ )
1229+ def test_correctness_perspective_segmentation_mask (device , startpoints , endpoints ):
1230+ def _compute_expected_mask (mask , pcoeffs_ ):
1231+ assert mask .ndim == 3 and mask .shape [0 ] == 1
1232+ m1 = np .array (
1233+ [
1234+ [pcoeffs_ [0 ], pcoeffs_ [1 ], pcoeffs_ [2 ]],
1235+ [pcoeffs_ [3 ], pcoeffs_ [4 ], pcoeffs_ [5 ]],
1236+ ]
1237+ )
1238+ m2 = np .array (
1239+ [
1240+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1241+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1242+ ]
1243+ )
1244+
1245+ expected_mask = torch .zeros_like (mask .cpu ())
1246+ for out_y in range (expected_mask .shape [1 ]):
1247+ for out_x in range (expected_mask .shape [2 ]):
1248+ output_pt = np .array ([out_x + 0.5 , out_y + 0.5 , 1.0 ])
1249+
1250+ numer = np .matmul (output_pt , m1 .T )
1251+ denom = np .matmul (output_pt , m2 .T )
1252+ input_pt = np .floor (numer / denom ).astype (np .int32 )
1253+
1254+ in_x , in_y = input_pt [:2 ]
1255+ if 0 <= in_x < mask .shape [2 ] and 0 <= in_y < mask .shape [1 ]:
1256+ expected_mask [0 , out_y , out_x ] = mask [0 , in_y , in_x ]
1257+ return expected_mask .to (mask .device )
1258+
1259+ pcoeffs = _get_perspective_coeffs (startpoints , endpoints )
1260+
1261+ for mask in make_segmentation_masks (extra_dims = ((), (4 ,))):
1262+ mask = mask .to (device )
1263+
1264+ output_mask = F .perspective_segmentation_mask (
1265+ mask ,
1266+ perspective_coeffs = pcoeffs ,
1267+ )
1268+
1269+ if mask .ndim < 4 :
1270+ masks = [mask ]
1271+ else :
1272+ masks = [m for m in mask ]
1273+
1274+ expected_masks = []
1275+ for mask in masks :
1276+ expected_mask = _compute_expected_mask (mask , pcoeffs )
1277+ expected_masks .append (expected_mask )
1278+ if len (expected_masks ) > 1 :
1279+ expected_masks = torch .stack (expected_masks )
1280+ else :
1281+ expected_masks = expected_masks [0 ]
1282+ torch .testing .assert_close (output_mask , expected_masks )
1283+
1284+
10981285@pytest .mark .parametrize ("device" , cpu_and_gpu ())
10991286@pytest .mark .parametrize (
11001287 "output_size" ,
@@ -1148,5 +1335,4 @@ def _compute_expected_bbox(bbox, output_size_):
11481335 expected_bboxes = torch .stack (expected_bboxes )
11491336 else :
11501337 expected_bboxes = expected_bboxes [0 ]
1151- expected_bboxes = expected_bboxes .to (device = device )
11521338 torch .testing .assert_close (output_boxes , expected_bboxes )
0 commit comments