Skip to content
55 changes: 30 additions & 25 deletions test/prototype_common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,18 @@ def load(self, device="cpu"):
return args, kwargs


DEFAULT_SQUARE_IMAGE_SIZE = 15
DEFAULT_LANDSCAPE_IMAGE_SIZE = (7, 33)
DEFAULT_PORTRAIT_IMAGE_SIZE = (31, 9)
DEFAULT_IMAGE_SIZES = (DEFAULT_LANDSCAPE_IMAGE_SIZE, DEFAULT_PORTRAIT_IMAGE_SIZE, DEFAULT_SQUARE_IMAGE_SIZE, "random")
DEFAULT_SQUARE_SPATIAL_SIZE = 15
DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33)
DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9)
DEFAULT_SPATIAL_SIZES = (
DEFAULT_LANDSCAPE_SPATIAL_SIZE,
DEFAULT_PORTRAIT_SPATIAL_SIZE,
DEFAULT_SQUARE_SPATIAL_SIZE,
"random",
)


def _parse_image_size(size, *, name="size"):
def _parse_spatial_size(size, *, name="size"):
if size == "random":
return tuple(torch.randint(15, 33, (2,)).tolist())
elif isinstance(size, int) and size > 0:
Expand Down Expand Up @@ -246,11 +251,11 @@ def load(self, device):
@dataclasses.dataclass
class ImageLoader(TensorLoader):
color_space: features.ColorSpace
image_size: Tuple[int, int] = dataclasses.field(init=False)
spatial_size: Tuple[int, int] = dataclasses.field(init=False)
num_channels: int = dataclasses.field(init=False)

def __post_init__(self):
self.image_size = self.shape[-2:]
self.spatial_size = self.shape[-2:]
self.num_channels = self.shape[-3]


Expand All @@ -277,7 +282,7 @@ def make_image_loader(
dtype=torch.float32,
constant_alpha=True,
):
size = _parse_image_size(size)
size = _parse_spatial_size(size)
num_channels = get_num_channels(color_space)

def fn(shape, dtype, device):
Expand All @@ -295,7 +300,7 @@ def fn(shape, dtype, device):

def make_image_loaders(
*,
sizes=DEFAULT_IMAGE_SIZES,
sizes=DEFAULT_SPATIAL_SIZES,
color_spaces=(
features.ColorSpace.GRAY,
features.ColorSpace.GRAY_ALPHA,
Expand All @@ -316,7 +321,7 @@ def make_image_loaders(
@dataclasses.dataclass
class BoundingBoxLoader(TensorLoader):
format: features.BoundingBoxFormat
image_size: Tuple[int, int]
spatial_size: Tuple[int, int]


def randint_with_tensor_bounds(arg1, arg2=None, **kwargs):
Expand All @@ -331,7 +336,7 @@ def randint_with_tensor_bounds(arg1, arg2=None, **kwargs):
).reshape(low.shape)


def make_bounding_box_loader(*, extra_dims=(), format, image_size="random", dtype=torch.float32):
def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32):
if isinstance(format, str):
format = features.BoundingBoxFormat[format]
if format not in {
Expand All @@ -341,7 +346,7 @@ def make_bounding_box_loader(*, extra_dims=(), format, image_size="random", dtyp
}:
raise pytest.UsageError(f"Can't make bounding box in format {format}")

image_size = _parse_image_size(image_size, name="image_size")
spatial_size = _parse_spatial_size(spatial_size, name="spatial_size")

def fn(shape, dtype, device):
*extra_dims, num_coordinates = shape
Expand All @@ -350,10 +355,10 @@ def fn(shape, dtype, device):

if any(dim == 0 for dim in extra_dims):
return features.BoundingBox(
torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, image_size=image_size
torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
)

height, width = image_size
height, width = spatial_size

if format == features.BoundingBoxFormat.XYXY:
x1 = torch.randint(0, width // 2, extra_dims)
Expand All @@ -375,10 +380,10 @@ def fn(shape, dtype, device):
parts = (cx, cy, w, h)

return features.BoundingBox(
torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, image_size=image_size
torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
)

return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, image_size=image_size)
return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)


make_bounding_box = from_loader(make_bounding_box_loader)
Expand All @@ -388,11 +393,11 @@ def make_bounding_box_loaders(
*,
extra_dims=DEFAULT_EXTRA_DIMS,
formats=tuple(features.BoundingBoxFormat),
image_size="random",
spatial_size="random",
dtypes=(torch.float32, torch.int64),
):
for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes):
yield make_bounding_box_loader(**params, image_size=image_size)
yield make_bounding_box_loader(**params, spatial_size=spatial_size)


make_bounding_boxes = from_loaders(make_bounding_box_loaders)
Expand Down Expand Up @@ -475,7 +480,7 @@ class MaskLoader(TensorLoader):

def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8):
# This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects
size = _parse_image_size(size)
size = _parse_spatial_size(size)
num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects

def fn(shape, dtype, device):
Expand All @@ -489,7 +494,7 @@ def fn(shape, dtype, device):


def make_detection_mask_loaders(
sizes=DEFAULT_IMAGE_SIZES,
sizes=DEFAULT_SPATIAL_SIZES,
num_objects=(1, 0, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.uint8,),
Expand All @@ -503,7 +508,7 @@ def make_detection_mask_loaders(

def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8):
# This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values
size = _parse_image_size(size)
size = _parse_spatial_size(size)
num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories

def fn(shape, dtype, device):
Expand All @@ -518,7 +523,7 @@ def fn(shape, dtype, device):

def make_segmentation_mask_loaders(
*,
sizes=DEFAULT_IMAGE_SIZES,
sizes=DEFAULT_SPATIAL_SIZES,
num_categories=(1, 2, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.uint8,),
Expand All @@ -532,7 +537,7 @@ def make_segmentation_mask_loaders(

def make_mask_loaders(
*,
sizes=DEFAULT_IMAGE_SIZES,
sizes=DEFAULT_SPATIAL_SIZES,
num_objects=(1, 0, "random"),
num_categories=(1, 2, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
Expand All @@ -559,7 +564,7 @@ def make_video_loader(
extra_dims=(),
dtype=torch.uint8,
):
size = _parse_image_size(size)
size = _parse_spatial_size(size)
num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames

def fn(shape, dtype, device):
Expand All @@ -576,7 +581,7 @@ def fn(shape, dtype, device):

def make_video_loaders(
*,
sizes=DEFAULT_IMAGE_SIZES,
sizes=DEFAULT_SPATIAL_SIZES,
color_spaces=(
features.ColorSpace.GRAY,
features.ColorSpace.RGB,
Expand Down
Loading