From 882287bf66daa545f3a1c0c727f7c7ac6e9355dc Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 6 Apr 2022 13:15:54 +0100 Subject: [PATCH 1/2] Migrate Oxford Pets prototype dataset --- test/builtin_dataset_mocks.py | 6 +- .../prototype/datasets/_builtin/__init__.py | 2 +- .../datasets/_builtin/oxford_iiit_pet.py | 66 +++++++++++-------- 3 files changed, 44 insertions(+), 30 deletions(-) diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py index 0210a4dacec..40df772a49a 100644 --- a/test/builtin_dataset_mocks.py +++ b/test/builtin_dataset_mocks.py @@ -1214,9 +1214,9 @@ def generate(self, root): return num_samples_map -# @register_mock -def oxford_iiit_pet(info, root, config): - return OxfordIIITPetMockData.generate(root)[config.split] +@register_mock(name="oxford-iiit-pet", configs=combinations_grid(split=("trainval", "test"))) +def oxford_iiit_pet(root, config): + return OxfordIIITPetMockData.generate(root)[config["split"]] class _CUB200MockData: diff --git a/torchvision/prototype/datasets/_builtin/__init__.py b/torchvision/prototype/datasets/_builtin/__init__.py index b2beddc7f2b..4acc1d53b4d 100644 --- a/torchvision/prototype/datasets/_builtin/__init__.py +++ b/torchvision/prototype/datasets/_builtin/__init__.py @@ -12,7 +12,7 @@ from .gtsrb import GTSRB from .imagenet import ImageNet from .mnist import MNIST, FashionMNIST, KMNIST, EMNIST, QMNIST -from .oxford_iiit_pet import OxfordIITPet +from .oxford_iiit_pet import OxfordIIITPet from .pcam import PCAM from .sbd import SBD from .semeion import SEMEION diff --git a/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py b/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py index 8d4fc00dbb0..f8ea832b699 100644 --- a/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py +++ b/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py @@ -1,11 +1,10 @@ import enum import pathlib -from typing import Any, Dict, List, Optional, Tuple, BinaryIO +from typing import Any, Dict, List, Optional, Tuple, BinaryIO, Union from torchdata.datapipes.iter import IterDataPipe, Mapper, Filter, IterKeyZipper, Demultiplexer, CSVDictParser from torchvision.prototype.datasets.utils import ( - Dataset, - DatasetConfig, + Dataset2, DatasetInfo, HttpResource, OnlineResource, @@ -14,29 +13,45 @@ INFINITE_BUFFER_SIZE, hint_sharding, hint_shuffling, + BUILTIN_DIR, getitem, path_accessor, path_comparator, ) from torchvision.prototype.features import Label, EncodedImage +from .._api import register_dataset, register_info -class OxfordIITPetDemux(enum.IntEnum): + +NAME = "oxford-iiit-pet" + + +class OxfordIIITPetDemux(enum.IntEnum): SPLIT_AND_CLASSIFICATION = 0 SEGMENTATIONS = 1 -class OxfordIITPet(Dataset): - def _make_info(self) -> DatasetInfo: - return DatasetInfo( - "oxford-iiit-pet", - homepage="https://www.robots.ox.ac.uk/~vgg/data/pets/", - valid_options=dict( - split=("trainval", "test"), - ), - ) +@register_info(NAME) +def _info() -> Dict[str, Any]: + categories = DatasetInfo.read_categories_file(BUILTIN_DIR / f"{NAME}.categories") + categories = [c[0] for c in categories] + return dict(categories=categories) + - def resources(self, config: DatasetConfig) -> List[OnlineResource]: +@register_dataset(NAME) +class OxfordIIITPet(Dataset2): + """Oxford IIIT Pet Dataset + homepage="https://www.robots.ox.ac.uk/~vgg/data/pets/", + """ + + def __init__( + self, root: Union[str, pathlib.Path], *, split: str = "trainval", skip_integrity_check: bool = False + ) -> None: + self._split = self._verify_str_arg(split, "split", {"trainval", "test"}) + self._categories = _info()["categories"] + super().__init__(root, skip_integrity_check=skip_integrity_check) + + def _resources(self) -> List[OnlineResource]: images = HttpResource( "https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz", sha256="67195c5e1c01f1ab5f9b6a5d22b8c27a580d896ece458917e61d459337fa318d", @@ -51,8 +66,8 @@ def resources(self, config: DatasetConfig) -> List[OnlineResource]: def _classify_anns(self, data: Tuple[str, Any]) -> Optional[int]: return { - "annotations": OxfordIITPetDemux.SPLIT_AND_CLASSIFICATION, - "trimaps": OxfordIITPetDemux.SEGMENTATIONS, + "annotations": OxfordIIITPetDemux.SPLIT_AND_CLASSIFICATION, + "trimaps": OxfordIIITPetDemux.SEGMENTATIONS, }.get(pathlib.Path(data[0]).parent.name) def _filter_images(self, data: Tuple[str, Any]) -> bool: @@ -70,7 +85,7 @@ def _prepare_sample( image_path, image_buffer = image_data return dict( - label=Label(int(classification_data["label"]) - 1, categories=self.categories), + label=Label(int(classification_data["label"]) - 1, categories=self._categories), species="cat" if classification_data["species"] == "1" else "dog", segmentation_path=segmentation_path, segmentation=EncodedImage.from_file(segmentation_buffer), @@ -78,9 +93,7 @@ def _prepare_sample( image=EncodedImage.from_file(image_buffer), ) - def _make_datapipe( - self, resource_dps: List[IterDataPipe], *, config: DatasetConfig - ) -> IterDataPipe[Dict[str, Any]]: + def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: images_dp, anns_dp = resource_dps images_dp = Filter(images_dp, self._filter_images) @@ -93,9 +106,7 @@ def _make_datapipe( buffer_size=INFINITE_BUFFER_SIZE, ) - split_and_classification_dp = Filter( - split_and_classification_dp, path_comparator("name", f"{config.split}.txt") - ) + split_and_classification_dp = Filter(split_and_classification_dp, path_comparator("name", f"{self._split}.txt")) split_and_classification_dp = CSVDictParser( split_and_classification_dp, fieldnames=("image_id", "label", "species"), delimiter=" " ) @@ -122,13 +133,13 @@ def _make_datapipe( return Mapper(dp, self._prepare_sample) def _filter_split_and_classification_anns(self, data: Tuple[str, Any]) -> bool: - return self._classify_anns(data) == OxfordIITPetDemux.SPLIT_AND_CLASSIFICATION + return self._classify_anns(data) == OxfordIIITPetDemux.SPLIT_AND_CLASSIFICATION - def _generate_categories(self, root: pathlib.Path) -> List[str]: + def _generate_categories(self) -> List[str]: config = self.default_config resources = self.resources(config) - dp = resources[1].load(root) + dp = resources[1].load(self._root) dp = Filter(dp, self._filter_split_and_classification_anns) dp = Filter(dp, path_comparator("name", f"{config.split}.txt")) dp = CSVDictParser(dp, fieldnames=("image_id", "label"), delimiter=" ") @@ -138,3 +149,6 @@ def _generate_categories(self, root: pathlib.Path) -> List[str]: *sorted(raw_categories_and_labels, key=lambda raw_category_and_label: int(raw_category_and_label[1])) ) return [" ".join(part.title() for part in raw_category.split("_")) for raw_category in raw_categories] + + def __len__(self) -> int: + 3_680 if self._split == "trainval" else 3_669 From a9d72569af4766a8f7b0d7d584675df273f97729 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 6 Apr 2022 14:06:56 +0100 Subject: [PATCH 2/2] Update torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py Co-authored-by: Philip Meier --- torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py b/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py index f8ea832b699..714360c24f6 100644 --- a/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py +++ b/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py @@ -151,4 +151,4 @@ def _generate_categories(self) -> List[str]: return [" ".join(part.title() for part in raw_category.split("_")) for raw_category in raw_categories] def __len__(self) -> int: - 3_680 if self._split == "trainval" else 3_669 + return 3_680 if self._split == "trainval" else 3_669