Skip to content

Commit cfdbce6

Browse files
Yosua Michael Maranathafacebook-github-bot
authored andcommitted
[fbsync] [prototype] Restore BC on perspective (#6902)
Summary: * Restore BC on perspective * Fixes linter * Fixing tests. * Apply code-review changes. * Pleasing mypy. * Revert named parameters. Reviewed By: NicolasHug Differential Revision: D41265194 fbshipit-source-id: 4e72d73342d179071458d15593b9cee00eaa84e5
1 parent 12f7679 commit cfdbce6

File tree

10 files changed

+102
-36
lines changed

10 files changed

+102
-36
lines changed

test/prototype_transforms_kernel_infos.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,38 +1184,38 @@ def reference_inputs_pad_bounding_box():
11841184
def sample_inputs_perspective_image_tensor():
11851185
for image_loader in make_image_loaders(sizes=["random"]):
11861186
for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]:
1187-
yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
1187+
yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0])
11881188

11891189

11901190
def reference_inputs_perspective_image_tensor():
1191-
for image_loader, perspective_coeffs in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS):
1191+
for image_loader, coefficients in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS):
11921192
# FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
11931193
for fill in [None, 128.0, 128, [12.0 + c for c in range(image_loader.num_channels)]]:
1194-
yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=perspective_coeffs)
1194+
yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients)
11951195

11961196

11971197
def sample_inputs_perspective_bounding_box():
11981198
for bounding_box_loader in make_bounding_box_loaders():
11991199
yield ArgsKwargs(
1200-
bounding_box_loader, format=bounding_box_loader.format, perspective_coeffs=_PERSPECTIVE_COEFFS[0]
1200+
bounding_box_loader, bounding_box_loader.format, None, None, coefficients=_PERSPECTIVE_COEFFS[0]
12011201
)
12021202

12031203

12041204
def sample_inputs_perspective_mask():
12051205
for mask_loader in make_mask_loaders(sizes=["random"]):
1206-
yield ArgsKwargs(mask_loader, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
1206+
yield ArgsKwargs(mask_loader, None, None, coefficients=_PERSPECTIVE_COEFFS[0])
12071207

12081208

12091209
def reference_inputs_perspective_mask():
12101210
for mask_loader, perspective_coeffs in itertools.product(
12111211
make_mask_loaders(extra_dims=[()], num_objects=[1]), _PERSPECTIVE_COEFFS
12121212
):
1213-
yield ArgsKwargs(mask_loader, perspective_coeffs=perspective_coeffs)
1213+
yield ArgsKwargs(mask_loader, None, None, coefficients=perspective_coeffs)
12141214

12151215

12161216
def sample_inputs_perspective_video():
12171217
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
1218-
yield ArgsKwargs(video_loader, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
1218+
yield ArgsKwargs(video_loader, None, None, coefficients=_PERSPECTIVE_COEFFS[0])
12191219

12201220

12211221
KERNEL_INFOS.extend(

test/test_prototype_transforms.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -917,8 +917,8 @@ def test__get_params(self, mocker):
917917
params = transform._get_params([image])
918918

919919
h, w = image.spatial_size
920-
assert "perspective_coeffs" in params
921-
assert len(params["perspective_coeffs"]) == 8
920+
assert "coefficients" in params
921+
assert len(params["coefficients"]) == 8
922922

923923
@pytest.mark.parametrize("distortion_scale", [0.1, 0.7])
924924
def test__transform(self, distortion_scale, mocker):
@@ -940,7 +940,7 @@ def test__transform(self, distortion_scale, mocker):
940940
params = transform._get_params([inpt])
941941

942942
fill = transforms._utils._convert_fill_arg(fill)
943-
fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation)
943+
fn.assert_called_once_with(inpt, None, None, **params, fill=fill, interpolation=interpolation)
944944

945945

946946
class TestElasticTransform:

test/test_prototype_transforms_functional.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,9 @@ def _compute_expected_bbox(bbox, pcoeffs_):
874874
output_bboxes = F.perspective_bounding_box(
875875
bboxes,
876876
bboxes_format,
877-
perspective_coeffs=pcoeffs,
877+
None,
878+
None,
879+
coefficients=pcoeffs,
878880
)
879881

880882
if bboxes.ndim < 2:

torchvision/prototype/features/_bounding_box.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,15 @@ def affine(
169169

170170
def perspective(
171171
self,
172-
perspective_coeffs: List[float],
172+
startpoints: Optional[List[List[int]]],
173+
endpoints: Optional[List[List[int]]],
173174
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
174175
fill: FillTypeJIT = None,
176+
coefficients: Optional[List[float]] = None,
175177
) -> BoundingBox:
176-
output = self._F.perspective_bounding_box(self.as_subclass(torch.Tensor), self.format, perspective_coeffs)
178+
output = self._F.perspective_bounding_box(
179+
self.as_subclass(torch.Tensor), startpoints, endpoints, self.format, coefficients=coefficients
180+
)
177181
return BoundingBox.wrap_like(self, output)
178182

179183
def elastic(

torchvision/prototype/features/_feature.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,11 @@ def affine(
218218

219219
def perspective(
220220
self,
221-
perspective_coeffs: List[float],
221+
startpoints: Optional[List[List[int]]],
222+
endpoints: Optional[List[List[int]]],
222223
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
223224
fill: FillTypeJIT = None,
225+
coefficients: Optional[List[float]] = None,
224226
) -> _Feature:
225227
return self
226228

torchvision/prototype/features/_image.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,12 +206,19 @@ def affine(
206206

207207
def perspective(
208208
self,
209-
perspective_coeffs: List[float],
209+
startpoints: Optional[List[List[int]]],
210+
endpoints: Optional[List[List[int]]],
210211
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
211212
fill: FillTypeJIT = None,
213+
coefficients: Optional[List[float]] = None,
212214
) -> Image:
213215
output = self._F.perspective_image_tensor(
214-
self.as_subclass(torch.Tensor), perspective_coeffs, interpolation=interpolation, fill=fill
216+
self.as_subclass(torch.Tensor),
217+
startpoints,
218+
endpoints,
219+
interpolation=interpolation,
220+
fill=fill,
221+
coefficients=coefficients,
215222
)
216223
return Image.wrap_like(self, output)
217224

torchvision/prototype/features/_mask.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,15 @@ def affine(
118118

119119
def perspective(
120120
self,
121-
perspective_coeffs: List[float],
121+
startpoints: Optional[List[List[int]]],
122+
endpoints: Optional[List[List[int]]],
122123
interpolation: InterpolationMode = InterpolationMode.NEAREST,
123124
fill: FillTypeJIT = None,
125+
coefficients: Optional[List[float]] = None,
124126
) -> Mask:
125-
output = self._F.perspective_mask(self.as_subclass(torch.Tensor), perspective_coeffs, fill=fill)
127+
output = self._F.perspective_mask(
128+
self.as_subclass(torch.Tensor), startpoints, endpoints, fill=fill, coefficients=coefficients
129+
)
126130
return Mask.wrap_like(self, output)
127131

128132
def elastic(

torchvision/prototype/features/_video.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,19 @@ def affine(
166166

167167
def perspective(
168168
self,
169-
perspective_coeffs: List[float],
169+
startpoints: Optional[List[List[int]]],
170+
endpoints: Optional[List[List[int]]],
170171
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
171172
fill: FillTypeJIT = None,
173+
coefficients: Optional[List[float]] = None,
172174
) -> Video:
173175
output = self._F.perspective_video(
174-
self.as_subclass(torch.Tensor), perspective_coeffs, interpolation=interpolation, fill=fill
176+
self.as_subclass(torch.Tensor),
177+
startpoints,
178+
endpoints,
179+
interpolation=interpolation,
180+
fill=fill,
181+
coefficients=coefficients,
175182
)
176183
return Video.wrap_like(self, output)
177184

torchvision/prototype/transforms/_geometry.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -524,15 +524,17 @@ def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
524524
startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
525525
endpoints = [topleft, topright, botright, botleft]
526526
perspective_coeffs = _get_perspective_coeffs(startpoints, endpoints)
527-
return dict(perspective_coeffs=perspective_coeffs)
527+
return dict(coefficients=perspective_coeffs)
528528

529529
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
530530
fill = self.fill[type(inpt)]
531531
return F.perspective(
532532
inpt,
533-
**params,
533+
None,
534+
None,
534535
fill=fill,
535536
interpolation=self.interpolation,
537+
**params,
536538
)
537539

538540

torchvision/prototype/transforms/functional/_geometry.py

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from torchvision.transforms.functional import (
1212
_compute_resized_output_size as __compute_resized_output_size,
1313
_get_inverse_affine_matrix,
14+
_get_perspective_coeffs,
1415
InterpolationMode,
1516
pil_modes_mapping,
1617
pil_to_tensor,
@@ -906,12 +907,32 @@ def crop(inpt: features.InputTypeJIT, top: int, left: int, height: int, width: i
906907
return crop_image_pil(inpt, top, left, height, width)
907908

908909

910+
def _perspective_coefficients(
911+
startpoints: Optional[List[List[int]]],
912+
endpoints: Optional[List[List[int]]],
913+
coefficients: Optional[List[float]],
914+
) -> List[float]:
915+
if coefficients is not None:
916+
if startpoints is not None and endpoints is not None:
917+
raise ValueError("The startpoints/endpoints and the coefficients shouldn't be defined concurrently.")
918+
elif len(coefficients) != 8:
919+
raise ValueError("Argument coefficients should have 8 float values")
920+
return coefficients
921+
elif startpoints is not None and endpoints is not None:
922+
return _get_perspective_coeffs(startpoints, endpoints)
923+
else:
924+
raise ValueError("Either the startpoints/endpoints or the coefficients must have non `None` values.")
925+
926+
909927
def perspective_image_tensor(
910928
image: torch.Tensor,
911-
perspective_coeffs: List[float],
929+
startpoints: Optional[List[List[int]]],
930+
endpoints: Optional[List[List[int]]],
912931
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
913932
fill: features.FillTypeJIT = None,
933+
coefficients: Optional[List[float]] = None,
914934
) -> torch.Tensor:
935+
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
915936
if image.numel() == 0:
916937
return image
917938

@@ -934,21 +955,24 @@ def perspective_image_tensor(
934955
@torch.jit.unused
935956
def perspective_image_pil(
936957
image: PIL.Image.Image,
937-
perspective_coeffs: List[float],
958+
startpoints: Optional[List[List[int]]],
959+
endpoints: Optional[List[List[int]]],
938960
interpolation: InterpolationMode = InterpolationMode.BICUBIC,
939961
fill: features.FillTypeJIT = None,
962+
coefficients: Optional[List[float]] = None,
940963
) -> PIL.Image.Image:
964+
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
941965
return _FP.perspective(image, perspective_coeffs, interpolation=pil_modes_mapping[interpolation], fill=fill)
942966

943967

944968
def perspective_bounding_box(
945969
bounding_box: torch.Tensor,
946970
format: features.BoundingBoxFormat,
947-
perspective_coeffs: List[float],
971+
startpoints: Optional[List[List[int]]],
972+
endpoints: Optional[List[List[int]]],
973+
coefficients: Optional[List[float]] = None,
948974
) -> torch.Tensor:
949-
950-
if len(perspective_coeffs) != 8:
951-
raise ValueError("Argument perspective_coeffs should have 8 float values")
975+
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
952976

953977
original_shape = bounding_box.shape
954978
bounding_box = (
@@ -1029,8 +1053,10 @@ def perspective_bounding_box(
10291053

10301054
def perspective_mask(
10311055
mask: torch.Tensor,
1032-
perspective_coeffs: List[float],
1056+
startpoints: Optional[List[List[int]]],
1057+
endpoints: Optional[List[List[int]]],
10331058
fill: features.FillTypeJIT = None,
1059+
coefficients: Optional[List[float]] = None,
10341060
) -> torch.Tensor:
10351061
if mask.ndim < 3:
10361062
mask = mask.unsqueeze(0)
@@ -1039,7 +1065,7 @@ def perspective_mask(
10391065
needs_squeeze = False
10401066

10411067
output = perspective_image_tensor(
1042-
mask, perspective_coeffs=perspective_coeffs, interpolation=InterpolationMode.NEAREST, fill=fill
1068+
mask, startpoints, endpoints, interpolation=InterpolationMode.NEAREST, fill=fill, coefficients=coefficients
10431069
)
10441070

10451071
if needs_squeeze:
@@ -1050,25 +1076,37 @@ def perspective_mask(
10501076

10511077
def perspective_video(
10521078
video: torch.Tensor,
1053-
perspective_coeffs: List[float],
1079+
startpoints: Optional[List[List[int]]],
1080+
endpoints: Optional[List[List[int]]],
10541081
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
10551082
fill: features.FillTypeJIT = None,
1083+
coefficients: Optional[List[float]] = None,
10561084
) -> torch.Tensor:
1057-
return perspective_image_tensor(video, perspective_coeffs, interpolation=interpolation, fill=fill)
1085+
return perspective_image_tensor(
1086+
video, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
1087+
)
10581088

10591089

10601090
def perspective(
10611091
inpt: features.InputTypeJIT,
1062-
perspective_coeffs: List[float],
1092+
startpoints: Optional[List[List[int]]],
1093+
endpoints: Optional[List[List[int]]],
10631094
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
10641095
fill: features.FillTypeJIT = None,
1096+
coefficients: Optional[List[float]] = None,
10651097
) -> features.InputTypeJIT:
10661098
if isinstance(inpt, torch.Tensor) and (torch.jit.is_scripting() or not isinstance(inpt, features._Feature)):
1067-
return perspective_image_tensor(inpt, perspective_coeffs, interpolation=interpolation, fill=fill)
1099+
return perspective_image_tensor(
1100+
inpt, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
1101+
)
10681102
elif isinstance(inpt, features._Feature):
1069-
return inpt.perspective(perspective_coeffs, interpolation=interpolation, fill=fill)
1103+
return inpt.perspective(
1104+
startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
1105+
)
10701106
else:
1071-
return perspective_image_pil(inpt, perspective_coeffs, interpolation=interpolation, fill=fill)
1107+
return perspective_image_pil(
1108+
inpt, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
1109+
)
10721110

10731111

10741112
def elastic_image_tensor(

0 commit comments

Comments
 (0)