pytorch
diff --git a/‎test/test_prototype_transforms.py‎
Lines changed: 23 additions & 22 deletions b/‎test/test_prototype_transforms.py‎
Lines changed: 23 additions & 22 deletions
diff --git a/‎test/test_prototype_transforms_consistency.py‎
Lines changed: 1 addition & 1 deletion b/‎test/test_prototype_transforms_consistency.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎torchvision/prototype/transforms/_augment.py‎
Lines changed: 13 additions & 13 deletions b/‎torchvision/prototype/transforms/_augment.py‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎torchvision/prototype/transforms/_auto_augment.py‎
Lines changed: 27 additions & 27 deletions b/‎torchvision/prototype/transforms/_auto_augment.py‎
Lines changed: 27 additions & 27 deletions
@@ -437,7 +437,7 @@ def test__get_params(self, fill, side_range, mocker):
         image = mocker.MagicMock(spec=features.Image)
         h, w = image.spatial_size = (24, 32)
 
-        params = transform._get_params(image)
+        params = transform._get_params([image])
 
         assert len(params["padding"]) == 4
         assert 0 <= params["padding"][0] <= (side_range[1] - 1) * w
@@ -462,7 +462,7 @@ def test__transform(self, fill, side_range, mocker):
         _ = transform(inpt)
         torch.manual_seed(12)
         torch.rand(1)  # random apply changes random state
-        params = transform._get_params(inpt)
+        params = transform._get_params([inpt])
 
         fill = transforms.functional._geometry._convert_fill_arg(fill)
         fn.assert_called_once_with(inpt, **params, fill=fill)
@@ -623,7 +623,7 @@ def test__get_params(self, degrees, translate, scale, shear, mocker):
         h, w = image.spatial_size
 
         transform = transforms.RandomAffine(degrees, translate=translate, scale=scale, shear=shear)
-        params = transform._get_params(image)
+        params = transform._get_params([image])
 
         if not isinstance(degrees, (list, tuple)):
             assert -degrees <= params["angle"] <= degrees
@@ -690,7 +690,7 @@ def test__transform(self, degrees, translate, scale, shear, fill, center, mocker
         torch.manual_seed(12)
         _ = transform(inpt)
         torch.manual_seed(12)
-        params = transform._get_params(inpt)
+        params = transform._get_params([inpt])
 
         fill = transforms.functional._geometry._convert_fill_arg(fill)
         fn.assert_called_once_with(inpt, **params, interpolation=interpolation, fill=fill, center=center)
@@ -722,7 +722,7 @@ def test__get_params(self, padding, pad_if_needed, size, mocker):
         h, w = image.spatial_size
 
         transform = transforms.RandomCrop(size, padding=padding, pad_if_needed=pad_if_needed)
-        params = transform._get_params(image)
+        params = transform._get_params([image])
 
         if padding is not None:
             if isinstance(padding, int):
@@ -793,7 +793,7 @@ def test__transform(self, padding, pad_if_needed, fill, padding_mode, mocker):
         torch.manual_seed(12)
         _ = transform(inpt)
         torch.manual_seed(12)
-        params = transform._get_params(inpt)
+        params = transform._get_params([inpt])
         if padding is None and not pad_if_needed:
             fn_crop.assert_called_once_with(
                 inpt, top=params["top"], left=params["left"], height=output_size[0], width=output_size[1]
@@ -832,7 +832,7 @@ def test_assertions(self):
     @pytest.mark.parametrize("sigma", [10.0, [10.0, 12.0]])
     def test__get_params(self, sigma):
         transform = transforms.GaussianBlur(3, sigma=sigma)
-        params = transform._get_params(None)
+        params = transform._get_params([])
 
         if isinstance(sigma, float):
             assert params["sigma"][0] == params["sigma"][1] == 10
@@ -867,7 +867,7 @@ def test__transform(self, kernel_size, sigma, mocker):
         torch.manual_seed(12)
         _ = transform(inpt)
         torch.manual_seed(12)
-        params = transform._get_params(inpt)
+        params = transform._get_params([inpt])
 
         fn.assert_called_once_with(inpt, kernel_size, **params)
 
@@ -912,7 +912,7 @@ def test__get_params(self, mocker):
         image.num_channels = 3
         image.spatial_size = (24, 32)
 
-        params = transform._get_params(image)
+        params = transform._get_params([image])
 
         h, w = image.spatial_size
         assert "perspective_coeffs" in params
@@ -935,7 +935,7 @@ def test__transform(self, distortion_scale, mocker):
         _ = transform(inpt)
         torch.manual_seed(12)
         torch.rand(1)  # random apply changes random state
-        params = transform._get_params(inpt)
+        params = transform._get_params([inpt])
 
         fill = transforms.functional._geometry._convert_fill_arg(fill)
         fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation)
@@ -973,7 +973,7 @@ def test__get_params(self, mocker):
         image.num_channels = 3
         image.spatial_size = (24, 32)
 
-        params = transform._get_params(image)
+        params = transform._get_params([image])
 
         h, w = image.spatial_size
         displacement = params["displacement"]
@@ -1006,7 +1006,7 @@ def test__transform(self, alpha, sigma, mocker):
         # Let's mock transform._get_params to control the output:
         transform._get_params = mocker.MagicMock()
         _ = transform(inpt)
-        params = transform._get_params(inpt)
+        params = transform._get_params([inpt])
         fill = transforms.functional._geometry._convert_fill_arg(fill)
         fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation)
 
@@ -1035,7 +1035,7 @@ def test_assertions(self, mocker):
         transform = transforms.RandomErasing(value=[1, 2, 3, 4])
 
         with pytest.raises(ValueError, match="If value is a sequence, it should have either a single value"):
-            transform._get_params(image)
+            transform._get_params([image])
 
     @pytest.mark.parametrize("value", [5.0, [1, 2, 3], "random"])
     def test__get_params(self, value, mocker):
@@ -1044,7 +1044,7 @@ def test__get_params(self, value, mocker):
         image.spatial_size = (24, 32)
 
         transform = transforms.RandomErasing(value=value)
-        params = transform._get_params(image)
+        params = transform._get_params([image])
 
         v = params["v"]
         h, w = params["h"], params["w"]
@@ -1197,6 +1197,7 @@ def test_assertions(self, transform_cls):
         [
             [transforms.Pad(2), transforms.RandomCrop(28)],
             [lambda x: 2.0 * x, transforms.Pad(2), transforms.RandomCrop(28)],
+            [transforms.Pad(2), lambda x: 2.0 * x, transforms.RandomCrop(28)],
         ],
     )
     def test_ctor(self, transform_cls, trfms):
@@ -1339,7 +1340,7 @@ def test__get_params(self, mocker):
         n_samples = 5
         for _ in range(n_samples):
 
-            params = transform._get_params(sample)
+            params = transform._get_params([sample])
 
             assert "size" in params
             size = params["size"]
@@ -1386,7 +1387,7 @@ def test__get_params(self, mocker):
         transform = transforms.RandomShortestSize(min_size=min_size, max_size=max_size)
 
         sample = mocker.MagicMock(spec=features.Image, num_channels=3, spatial_size=spatial_size)
-        params = transform._get_params(sample)
+        params = transform._get_params([sample])
 
         assert "size" in params
         size = params["size"]
@@ -1554,13 +1555,13 @@ def test__get_params(self, mocker):
 
         transform = transforms.FixedSizeCrop(size=crop_size)
 
-        sample = dict(
-            image=make_image(size=spatial_size, color_space=features.ColorSpace.RGB),
-            bounding_boxes=make_bounding_box(
+        flat_inputs = [
+            make_image(size=spatial_size, color_space=features.ColorSpace.RGB),
+            make_bounding_box(
                 format=features.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape
             ),
-        )
-        params = transform._get_params(sample)
+        ]
+        params = transform._get_params(flat_inputs)
 
         assert params["needs_crop"]
         assert params["height"] <= crop_size[0]
@@ -1759,7 +1760,7 @@ def test__get_params(self):
         transform = transforms.RandomResize(min_size=min_size, max_size=max_size)
 
         for _ in range(10):
-            params = transform._get_params(None)
+            params = transform._get_params([])
 
             assert isinstance(params["size"], list) and len(params["size"]) == 1
             size = params["size"][0]
 
@@ -639,7 +639,7 @@ def test_random_apply(self, p):
         prototype_transform = prototype_transforms.RandomApply(
             [
                 prototype_transforms.Resize(256),
-                legacy_transforms.CenterCrop(224),
+                prototype_transforms.CenterCrop(224),
             ],
             p=p,
         )
 
@@ -45,8 +45,8 @@ def __init__(
 
         self._log_ratio = torch.log(torch.tensor(self.ratio))
 
-    def _get_params(self, sample: Any) -> Dict[str, Any]:
-        img_c, img_h, img_w = query_chw(sample)
+    def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+        img_c, img_h, img_w = query_chw(flat_inputs)
 
         if isinstance(self.value, (int, float)):
             value = [self.value]
@@ -107,13 +107,13 @@ def __init__(self, alpha: float, p: float = 0.5) -> None:
         self.alpha = alpha
         self._dist = torch.distributions.Beta(torch.tensor([alpha]), torch.tensor([alpha]))
 
-    def _check_inputs(self, sample: Any) -> None:
+    def _check_inputs(self, flat_inputs: List[Any]) -> None:
         if not (
-            has_any(sample, features.Image, features.Video, features.is_simple_tensor)
-            and has_any(sample, features.OneHotLabel)
+            has_any(flat_inputs, features.Image, features.Video, features.is_simple_tensor)
+            and has_any(flat_inputs, features.OneHotLabel)
         ):
             raise TypeError(f"{type(self).__name__}() is only defined for tensor images/videos and one-hot labels.")
-        if has_any(sample, PIL.Image.Image, features.BoundingBox, features.Mask, features.Label):
+        if has_any(flat_inputs, PIL.Image.Image, features.BoundingBox, features.Mask, features.Label):
             raise TypeError(
                 f"{type(self).__name__}() does not support PIL images, bounding boxes, masks and plain labels."
             )
@@ -127,7 +127,7 @@ def _mixup_onehotlabel(self, inpt: features.OneHotLabel, lam: float) -> features
 
 
 class RandomMixup(_BaseMixupCutmix):
-    def _get_params(self, sample: Any) -> Dict[str, Any]:
+    def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
         return dict(lam=float(self._dist.sample(())))
 
     def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
@@ -150,10 +150,10 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
 
 
 class RandomCutmix(_BaseMixupCutmix):
-    def _get_params(self, sample: Any) -> Dict[str, Any]:
+    def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
         lam = float(self._dist.sample(()))
 
-        H, W = query_spatial_size(sample)
+        H, W = query_spatial_size(flat_inputs)
 
         r_x = torch.randint(W, ())
         r_y = torch.randint(H, ())
@@ -344,9 +344,9 @@ def _insert_outputs(
                 c3 += 1
 
     def forward(self, *inputs: Any) -> Any:
-        flat_sample, spec = tree_flatten(inputs)
+        flat_inputs, spec = tree_flatten(inputs if len(inputs) > 1 else inputs[0])
 
-        images, targets = self._extract_image_targets(flat_sample)
+        images, targets = self._extract_image_targets(flat_inputs)
 
         # images = [t1, t2, ..., tN]
         # Let's define paste_images as shifted list of input images
@@ -384,6 +384,6 @@ def forward(self, *inputs: Any) -> Any:
             output_targets.append(output_target)
 
         # Insert updated images and targets into input flat_sample
-        self._insert_outputs(flat_sample, output_images, output_targets)
+        self._insert_outputs(flat_inputs, output_images, output_targets)
 
-        return tree_unflatten(flat_sample, spec)
+        return tree_unflatten(flat_inputs, spec)
@@ -4,7 +4,7 @@
 import PIL.Image
 import torch
 
-from torch.utils._pytree import tree_flatten, tree_unflatten
+from torch.utils._pytree import tree_flatten, tree_unflatten, TreeSpec
 from torchvision.prototype import features
 from torchvision.prototype.transforms import AutoAugmentPolicy, functional as F, InterpolationMode, Transform
 from torchvision.prototype.transforms.functional._meta import get_spatial_size
@@ -31,16 +31,17 @@ def _get_random_item(self, dct: Dict[K, V]) -> Tuple[K, V]:
         key = keys[int(torch.randint(len(keys), ()))]
         return key, dct[key]
 
-    def _extract_image_or_video(
+    def _flatten_and_extract_image_or_video(
         self,
-        sample: Any,
+        inputs: Any,
         unsupported_types: Tuple[Type, ...] = (features.BoundingBox, features.Mask),
-    ) -> Tuple[int, Union[features.ImageType, features.VideoType]]:
-        sample_flat, _ = tree_flatten(sample)
+    ) -> Tuple[Tuple[List[Any], TreeSpec, int], Union[features.ImageType, features.VideoType]]:
+        flat_inputs, spec = tree_flatten(inputs if len(inputs) > 1 else inputs[0])
+
         image_or_videos = []
-        for id, inpt in enumerate(sample_flat):
+        for idx, inpt in enumerate(flat_inputs):
             if _isinstance(inpt, (features.Image, PIL.Image.Image, features.is_simple_tensor, features.Video)):
-                image_or_videos.append((id, inpt))
+                image_or_videos.append((idx, inpt))
             elif isinstance(inpt, unsupported_types):
                 raise TypeError(f"Inputs of type {type(inpt).__name__} are not supported by {type(self).__name__}()")
 
@@ -51,12 +52,18 @@ def _extract_image_or_video(
                 f"Auto augment transformations are only properly defined for a single image or video, "
                 f"but found {len(image_or_videos)}."
             )
-        return image_or_videos[0]
 
-    def _put_into_sample(self, sample: Any, id: int, item: Any) -> Any:
-        sample_flat, spec = tree_flatten(sample)
-        sample_flat[id] = item
-        return tree_unflatten(sample_flat, spec)
+        idx, image_or_video = image_or_videos[0]
+        return (flat_inputs, spec, idx), image_or_video
+
+    def _unflatten_and_insert_image_or_video(
+        self,
+        flat_inputs_with_spec: Tuple[List[Any], TreeSpec, int],
+        image_or_video: Union[features.ImageType, features.VideoType],
+    ) -> Any:
+        flat_inputs, spec, idx = flat_inputs_with_spec
+        flat_inputs[idx] = image_or_video
+        return tree_unflatten(flat_inputs, spec)
 
     def _apply_image_or_video_transform(
         self,
@@ -275,9 +282,7 @@ def _get_policies(
             raise ValueError(f"The provided policy {policy} is not recognized.")
 
     def forward(self, *inputs: Any) -> Any:
-        sample = inputs if len(inputs) > 1 else inputs[0]
-
-        id, image_or_video = self._extract_image_or_video(sample)
+        flat_inputs_with_spec, image_or_video = self._flatten_and_extract_image_or_video(inputs)
         height, width = get_spatial_size(image_or_video)
 
         policy = self._policies[int(torch.randint(len(self._policies), ()))]
@@ -300,7 +305,7 @@ def forward(self, *inputs: Any) -> Any:
                 image_or_video, transform_id, magnitude, interpolation=self.interpolation, fill=self.fill
             )
 
-        return self._put_into_sample(sample, id, image_or_video)
+        return self._unflatten_and_insert_image_or_video(flat_inputs_with_spec, image_or_video)
 
 
 class RandAugment(_AutoAugmentBase):
@@ -346,9 +351,7 @@ def __init__(
         self.num_magnitude_bins = num_magnitude_bins
 
     def forward(self, *inputs: Any) -> Any:
-        sample = inputs if len(inputs) > 1 else inputs[0]
-
-        id, image_or_video = self._extract_image_or_video(sample)
+        flat_inputs_with_spec, image_or_video = self._flatten_and_extract_image_or_video(inputs)
         height, width = get_spatial_size(image_or_video)
 
         for _ in range(self.num_ops):
@@ -364,7 +367,7 @@ def forward(self, *inputs: Any) -> Any:
                 image_or_video, transform_id, magnitude, interpolation=self.interpolation, fill=self.fill
             )
 
-        return self._put_into_sample(sample, id, image_or_video)
+        return self._unflatten_and_insert_image_or_video(flat_inputs_with_spec, image_or_video)
 
 
 class TrivialAugmentWide(_AutoAugmentBase):
@@ -400,9 +403,7 @@ def __init__(
         self.num_magnitude_bins = num_magnitude_bins
 
     def forward(self, *inputs: Any) -> Any:
-        sample = inputs if len(inputs) > 1 else inputs[0]
-
-        id, image_or_video = self._extract_image_or_video(sample)
+        flat_inputs_with_spec, image_or_video = self._flatten_and_extract_image_or_video(inputs)
         height, width = get_spatial_size(image_or_video)
 
         transform_id, (magnitudes_fn, signed) = self._get_random_item(self._AUGMENTATION_SPACE)
@@ -418,7 +419,7 @@ def forward(self, *inputs: Any) -> Any:
         image_or_video = self._apply_image_or_video_transform(
             image_or_video, transform_id, magnitude, interpolation=self.interpolation, fill=self.fill
         )
-        return self._put_into_sample(sample, id, image_or_video)
+        return self._unflatten_and_insert_image_or_video(flat_inputs_with_spec, image_or_video)
 
 
 class AugMix(_AutoAugmentBase):
@@ -471,8 +472,7 @@ def _sample_dirichlet(self, params: torch.Tensor) -> torch.Tensor:
         return torch._sample_dirichlet(params)
 
     def forward(self, *inputs: Any) -> Any:
-        sample = inputs if len(inputs) > 1 else inputs[0]
-        id, orig_image_or_video = self._extract_image_or_video(sample)
+        flat_inputs_with_spec, orig_image_or_video = self._flatten_and_extract_image_or_video(inputs)
         height, width = get_spatial_size(orig_image_or_video)
 
         if isinstance(orig_image_or_video, torch.Tensor):
@@ -525,4 +525,4 @@ def forward(self, *inputs: Any) -> Any:
         elif isinstance(orig_image_or_video, PIL.Image.Image):
             mix = F.to_image_pil(mix)
 
-        return self._put_into_sample(sample, id, mix)
+        return self._unflatten_and_insert_image_or_video(flat_inputs_with_spec, mix)
Original file line number	Diff line number	Diff line change
`@@ -639,7 +639,7 @@ def test_random_apply(self, p):`
`639`	`639`	`prototype_transform = prototype_transforms.RandomApply(`
`640`	`640`	`[`
`641`	`641`	`prototype_transforms.Resize(256),`
`642`		`- legacy_transforms.CenterCrop(224),`
	`642`	`+ prototype_transforms.CenterCrop(224),`
`643`	`643`	`],`
`644`	`644`	`p=p,`
`645`	`645`	`)`