From d33633f339cd70753497dc887d0ec6640720b73c Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 5 Oct 2021 15:51:16 +0200 Subject: [PATCH] add prototype for HMDB51 dataset --- mypy.ini | 4 + torchvision/prototype/datasets/_api.py | 18 ++- .../prototype/datasets/_builtin/__init__.py | 1 + .../prototype/datasets/_builtin/caltech.py | 21 +-- .../prototype/datasets/_builtin/hdmb51.py | 141 ++++++++++++++++++ .../datasets/_builtin/hmdb51.categories | 51 +++++++ torchvision/prototype/datasets/_folder.py | 14 +- torchvision/prototype/datasets/decoder.py | 20 ++- .../prototype/datasets/utils/__init__.py | 2 +- .../prototype/datasets/utils/_dataset.py | 13 +- .../prototype/datasets/utils/_internal.py | 47 +++++- 11 files changed, 304 insertions(+), 28 deletions(-) create mode 100644 torchvision/prototype/datasets/_builtin/hdmb51.py create mode 100644 torchvision/prototype/datasets/_builtin/hmdb51.categories diff --git a/mypy.ini b/mypy.ini index dac60e11ce0..b808c1faa81 100644 --- a/mypy.ini +++ b/mypy.ini @@ -71,3 +71,7 @@ ignore_missing_imports = True [mypy-torchdata.*] ignore_missing_imports = True + +[mypy-rarfile.*] + +ignore_missing_imports = True diff --git a/torchvision/prototype/datasets/_api.py b/torchvision/prototype/datasets/_api.py index 5c613035e2b..efeddaeffce 100644 --- a/torchvision/prototype/datasets/_api.py +++ b/torchvision/prototype/datasets/_api.py @@ -1,11 +1,10 @@ import io from typing import Any, Callable, Dict, List, Optional -import torch from torch.utils.data import IterDataPipe from torchvision.prototype.datasets import home -from torchvision.prototype.datasets.decoder import pil -from torchvision.prototype.datasets.utils import Dataset, DatasetInfo +from torchvision.prototype.datasets.decoder import pil, av +from torchvision.prototype.datasets.utils import Dataset, DatasetInfo, DatasetType from torchvision.prototype.datasets.utils._internal import add_suggestion from . import _builtin @@ -48,15 +47,26 @@ def info(name: str) -> DatasetInfo: return find(name).info +default = object() + +DEFAULT_DECODER: Dict[DatasetType, Callable[[io.IOBase], Dict[str, Any]]] = { + DatasetType.IMAGE: pil, + DatasetType.VIDEO: av, +} + + def load( name: str, *, - decoder: Optional[Callable[[io.IOBase], torch.Tensor]] = pil, + decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]] = default, # type: ignore[assignment] split: str = "train", **options: Any, ) -> IterDataPipe[Dict[str, Any]]: dataset = find(name) + if decoder is default: + decoder = DEFAULT_DECODER[dataset.info.type] + config = dataset.info.make_config(split=split, **options) root = home() / name diff --git a/torchvision/prototype/datasets/_builtin/__init__.py b/torchvision/prototype/datasets/_builtin/__init__.py index 7d6961fa920..128c0812828 100644 --- a/torchvision/prototype/datasets/_builtin/__init__.py +++ b/torchvision/prototype/datasets/_builtin/__init__.py @@ -1 +1,2 @@ from .caltech import Caltech101, Caltech256 +from .hdmb51 import HMDB51 diff --git a/torchvision/prototype/datasets/_builtin/caltech.py b/torchvision/prototype/datasets/_builtin/caltech.py index d2ce41c0d0f..814650bfa3f 100644 --- a/torchvision/prototype/datasets/_builtin/caltech.py +++ b/torchvision/prototype/datasets/_builtin/caltech.py @@ -30,6 +30,7 @@ class Caltech101(Dataset): def info(self) -> DatasetInfo: return DatasetInfo( "caltech101", + type="image", categories=HERE / "caltech101.categories", homepage="http://www.vision.caltech.edu/Image_Datasets/Caltech101", ) @@ -82,7 +83,7 @@ def _anns_key_fn(self, data: Tuple[str, Any]) -> Tuple[str, str]: return category, id def _collate_and_decode_sample( - self, data, *, decoder: Optional[Callable[[io.IOBase], torch.Tensor]] + self, data, *, decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]] ) -> Dict[str, Any]: key, image_data, ann_data = data category, _ = key @@ -91,28 +92,27 @@ def _collate_and_decode_sample( label = self.info.categories.index(category) - image = decoder(image_buffer) if decoder else image_buffer - ann = read_mat(ann_buffer) bbox = torch.as_tensor(ann["box_coord"].astype(np.int64)) contour = torch.as_tensor(ann["obj_contour"]) - return dict( + sample = dict( category=category, label=label, - image=image, image_path=image_path, bbox=bbox, contour=contour, ann_path=ann_path, ) + sample.update(decoder(image_buffer) if decoder else dict(image=image_buffer)) + return sample def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, - decoder: Optional[Callable[[io.IOBase], torch.Tensor]], + decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]], ) -> IterDataPipe[Dict[str, Any]]: images_dp, anns_dp = resource_dps @@ -146,6 +146,7 @@ class Caltech256(Dataset): def info(self) -> DatasetInfo: return DatasetInfo( "caltech256", + type="image", categories=HERE / "caltech256.categories", homepage="http://www.vision.caltech.edu/Image_Datasets/Caltech256", ) @@ -166,7 +167,7 @@ def _collate_and_decode_sample( self, data: Tuple[str, io.IOBase], *, - decoder: Optional[Callable[[io.IOBase], torch.Tensor]], + decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]], ) -> Dict[str, Any]: path, buffer = data @@ -174,14 +175,16 @@ def _collate_and_decode_sample( label_str, category = dir_name.split(".") label = torch.tensor(int(label_str)) - return dict(label=label, category=category, image=decoder(buffer) if decoder else buffer) + sample = dict(label=label, category=category) + sample.update(decoder(buffer) if decoder else dict(image=buffer)) + return sample def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, - decoder: Optional[Callable[[io.IOBase], torch.Tensor]], + decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]], ) -> IterDataPipe[Dict[str, Any]]: dp = resource_dps[0] dp = TarArchiveReader(dp) diff --git a/torchvision/prototype/datasets/_builtin/hdmb51.py b/torchvision/prototype/datasets/_builtin/hdmb51.py new file mode 100644 index 00000000000..6b8cfe6c941 --- /dev/null +++ b/torchvision/prototype/datasets/_builtin/hdmb51.py @@ -0,0 +1,141 @@ +import io +import pathlib +import re +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +import torch +from torch.utils.data import IterDataPipe +from torch.utils.data.datapipes.iter import ( + Mapper, + Shuffler, + Filter, +) +from torchdata.datapipes.iter import KeyZipper, CSVParser +from torchvision.prototype.datasets.utils import ( + Dataset, + DatasetConfig, + DatasetInfo, + HttpResource, + OnlineResource, +) +from torchvision.prototype.datasets.utils._internal import ( + create_categories_file, + INFINITE_BUFFER_SIZE, + RarArchiveReader, +) + +HERE = pathlib.Path(__file__).parent + + +class HMDB51(Dataset): + @property + def info(self) -> DatasetInfo: + return DatasetInfo( + "hmdb51", + type="video", + categories=HERE / "hmdb51.categories", + homepage="https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/", + valid_options=dict( + split=("train", "test"), + split_number=("1", "2", "3"), + ), + ) + + def resources(self, config: DatasetConfig) -> List[OnlineResource]: + splits = HttpResource( + "http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar", + sha256="229c94f845720d01eb3946d39f39292ea962d50a18136484aa47c1eba251d2b7", + ) + videos = HttpResource( + "http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar", + sha256="9e714a0d8b76104d76e932764a7ca636f929fff66279cda3f2e326fa912a328e", + ) + return [splits, videos] + + _SPLIT_FILE_PATTERN = re.compile(r"(?P\w+?)_test_split(?P[1-3])[.]txt") + + def _is_split_number(self, data: Tuple[str, Any], *, config: DatasetConfig) -> bool: + path = pathlib.Path(data[0]) + split_number = self._SPLIT_FILE_PATTERN.match(path.name).group("split_number") # type: ignore[union-attr] + return split_number == config.split_number + + _SPLIT_ID_TO_NAME = { + "1": "train", + "2": "test", + } + + def _is_split(self, data: List[str], *, config=DatasetConfig) -> bool: + split_id = data[1] + if split_id not in self._SPLIT_ID_TO_NAME: + return False + return self._SPLIT_ID_TO_NAME[split_id] == config.split + + def _splits_key(self, data: List[str]) -> str: + return data[0] + + def _videos_key(self, data: Tuple[str, Any]) -> str: + path = pathlib.Path(data[0]) + return path.name + + def _collate_and_decode_sample( + self, data: Tuple[List[str], Tuple[str, io.IOBase]], *, decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]] + ) -> Dict[str, Any]: + _, video_data = data + path, buffer = video_data + + category = pathlib.Path(path).parent.name + label = torch.tensor(self.info.categories.index(category)) + + sample = dict( + path=path, + category=category, + label=label, + ) + + sample.update(decoder(buffer) if decoder else dict(video=buffer)) + return sample + + def _make_datapipe( + self, + resource_dps: List[IterDataPipe], + *, + config: DatasetConfig, + decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]], + ) -> IterDataPipe[Dict[str, Any]]: + splits_dp, videos_dp = resource_dps + + splits_dp = RarArchiveReader(splits_dp) + splits_dp = Filter(splits_dp, self._is_split_number, fn_kwargs=dict(config=config)) + splits_dp = CSVParser(splits_dp, delimiter=" ") + splits_dp = Filter(splits_dp, self._is_split, fn_kwargs=dict(config=config)) + splits_dp = Shuffler(splits_dp, buffer_size=INFINITE_BUFFER_SIZE) + + videos_dp = RarArchiveReader(videos_dp) + videos_dp = RarArchiveReader(videos_dp) + + dp = KeyZipper( + splits_dp, + videos_dp, + key_fn=self._splits_key, + ref_key_fn=self._videos_key, + buffer_size=INFINITE_BUFFER_SIZE, + ) + return Mapper(dp, self._collate_and_decode_sample, fn_kwargs=dict(decoder=decoder)) + + def generate_categories_file(self, root: Union[str, pathlib.Path]) -> None: + splits_archive = self.resources(self.default_config)[0] + dp = splits_archive.to_datapipe(pathlib.Path(root) / self.name) + dp = RarArchiveReader(dp) + + categories = { + self._SPLIT_FILE_PATTERN.match(pathlib.Path(path).name).group("category") # type: ignore[union-attr] + for path, _ in dp + } + create_categories_file(HERE, self.name, sorted(categories)) + + +if __name__ == "__main__": + from torchvision.prototype.datasets import home + + root = home() + HMDB51().generate_categories_file(root) diff --git a/torchvision/prototype/datasets/_builtin/hmdb51.categories b/torchvision/prototype/datasets/_builtin/hmdb51.categories new file mode 100644 index 00000000000..3217416f524 --- /dev/null +++ b/torchvision/prototype/datasets/_builtin/hmdb51.categories @@ -0,0 +1,51 @@ +brush_hair +cartwheel +catch +chew +clap +climb +climb_stairs +dive +draw_sword +dribble +drink +eat +fall_floor +fencing +flic_flac +golf +handstand +hit +hug +jump +kick +kick_ball +kiss +laugh +pick +pour +pullup +punch +push +pushup +ride_bike +ride_horse +run +shake_hands +shoot_ball +shoot_bow +shoot_gun +sit +situp +smile +smoke +somersault +stand +swing_baseball +sword +sword_exercise +talk +throw +turn +walk +wave diff --git a/torchvision/prototype/datasets/_folder.py b/torchvision/prototype/datasets/_folder.py index 55e48387d6a..67a8c1b5ec7 100644 --- a/torchvision/prototype/datasets/_folder.py +++ b/torchvision/prototype/datasets/_folder.py @@ -25,24 +25,26 @@ def _collate_and_decode_data( *, root: pathlib.Path, categories: List[str], - decoder, + decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]], ) -> Dict[str, Any]: path, buffer = data - data = decoder(buffer) if decoder else buffer + category = pathlib.Path(path).relative_to(root).parts[0] label = torch.tensor(categories.index(category)) - return dict( + + sample = dict( path=path, - data=data, label=label, category=category, ) + sample.update(decoder(buffer) if decoder else dict(data=buffer)) + return sample def from_data_folder( root: Union[str, pathlib.Path], *, - decoder: Optional[Callable[[io.IOBase], torch.Tensor]] = None, + decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]] = None, valid_extensions: Optional[Collection[str]] = None, recursive: bool = True, ) -> Tuple[IterDataPipe, List[str]]: @@ -67,7 +69,7 @@ def _data_to_image_key(sample: Dict[str, Any]) -> Dict[str, Any]: def from_image_folder( root: Union[str, pathlib.Path], *, - decoder: Optional[Callable[[io.IOBase], torch.Tensor]] = pil, + decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]] = pil, valid_extensions: Collection[str] = ("jpg", "jpeg", "png", "ppm", "bmp", "pgm", "tif", "tiff", "webp"), **kwargs: Any, ) -> Tuple[IterDataPipe, List[str]]: diff --git a/torchvision/prototype/datasets/decoder.py b/torchvision/prototype/datasets/decoder.py index 64cea43e5f0..6a2083fd781 100644 --- a/torchvision/prototype/datasets/decoder.py +++ b/torchvision/prototype/datasets/decoder.py @@ -1,11 +1,23 @@ import io +import unittest.mock +from typing import Dict, Any import PIL.Image -import torch +from torchvision.io.video import read_video from torchvision.transforms.functional import pil_to_tensor -__all__ = ["pil"] +__all__ = ["pil", "av"] -def pil(buffer: io.IOBase, mode: str = "RGB") -> torch.Tensor: - return pil_to_tensor(PIL.Image.open(buffer).convert(mode.upper())) +def pil(buffer: io.IOBase, *, mode: str = "RGB") -> Dict[str, Any]: + return dict(image=pil_to_tensor(PIL.Image.open(buffer).convert(mode.upper()))) + + +def av(buffer: io.IOBase, **read_video_kwargs: Any) -> Dict[str, Any]: + with unittest.mock.patch("torchvision.io.video.os.path.exists", return_value=True): + return dict( + zip( + ("video", "audio", "video_meta"), + read_video(buffer, **read_video_kwargs), # type: ignore[arg-type] + ) + ) diff --git a/torchvision/prototype/datasets/utils/__init__.py b/torchvision/prototype/datasets/utils/__init__.py index 48e7541eba5..018553e0908 100644 --- a/torchvision/prototype/datasets/utils/__init__.py +++ b/torchvision/prototype/datasets/utils/__init__.py @@ -1,3 +1,3 @@ from . import _internal -from ._dataset import DatasetConfig, DatasetInfo, Dataset +from ._dataset import DatasetType, DatasetConfig, DatasetInfo, Dataset from ._resource import LocalResource, OnlineResource, HttpResource, GDriveResource diff --git a/torchvision/prototype/datasets/utils/_dataset.py b/torchvision/prototype/datasets/utils/_dataset.py index b43dc3fc4c4..97dc29fd685 100644 --- a/torchvision/prototype/datasets/utils/_dataset.py +++ b/torchvision/prototype/datasets/utils/_dataset.py @@ -1,4 +1,5 @@ import abc +import enum import io import os import pathlib @@ -17,7 +18,6 @@ Tuple, ) -import torch from torch.utils.data import IterDataPipe from torchvision.prototype.datasets.utils._internal import ( add_suggestion, @@ -45,6 +45,11 @@ def to_str(sep: str) -> str: return f"{prefix}\n{body}\n{postfix}" +class DatasetType(enum.Enum): + IMAGE = enum.auto() + VIDEO = enum.auto() + + class DatasetConfig(Mapping): def __init__(self, *args, **kwargs): data = dict(*args, **kwargs) @@ -96,6 +101,7 @@ def __init__( self, name: str, *, + type: Union[str, DatasetType], categories: Optional[Union[int, Sequence[str], str, pathlib.Path]] = None, citation: Optional[str] = None, homepage: Optional[str] = None, @@ -103,6 +109,7 @@ def __init__( valid_options: Optional[Dict[str, Sequence]] = None, ) -> None: self.name = name.lower() + self.type = DatasetType[type.upper()] if isinstance(type, str) else type if categories is None: categories = [] @@ -191,7 +198,7 @@ def _make_datapipe( resource_dps: List[IterDataPipe], *, config: DatasetConfig, - decoder: Optional[Callable[[io.IOBase], torch.Tensor]], + decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]], ) -> IterDataPipe[Dict[str, Any]]: pass @@ -200,7 +207,7 @@ def to_datapipe( root: Union[str, pathlib.Path], *, config: Optional[DatasetConfig] = None, - decoder: Optional[Callable[[io.IOBase], torch.Tensor]] = None, + decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]] = None, ) -> IterDataPipe[Dict[str, Any]]: if not config: config = self.info.default_config diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index 7a1d34ffa0e..7820d4ffe4c 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -1,11 +1,22 @@ import collections.abc import difflib import io +import os.path import pathlib from typing import Collection, Sequence, Callable, Union, Any +from typing import Tuple, Iterator +from torchdata.datapipes.iter import IterDataPipe -__all__ = ["INFINITE_BUFFER_SIZE", "sequence_to_str", "add_suggestion", "create_categories_file", "read_mat"] + +__all__ = [ + "INFINITE_BUFFER_SIZE", + "sequence_to_str", + "add_suggestion", + "create_categories_file", + "read_mat", + "RarArchiveReader", +] # pseudo-infinite until a true infinite buffer is supported by all datapipes INFINITE_BUFFER_SIZE = 1_000_000_000 @@ -47,3 +58,37 @@ def read_mat(buffer: io.IOBase, **kwargs: Any) -> Any: raise ModuleNotFoundError("Package `scipy` is required to be installed to read .mat files.") from error return sio.loadmat(buffer, **kwargs) + + +class RarArchiveReader(IterDataPipe[Tuple[str, io.BufferedIOBase]]): + def __init__(self, datapipe: IterDataPipe[Tuple[str, io.BufferedIOBase]]): + self._rarfile = self._verify_dependencies() + super().__init__() + self.datapipe = datapipe + + @staticmethod + def _verify_dependencies(): + try: + import rarfile + except ImportError as error: + raise ModuleNotFoundError( + "Package `rarfile` is required to be installed to use this datapipe. " + "Please use `pip install rarfile` or `conda -c conda-forge install rarfile` to install it." + ) from error + + # check if at least one system library for reading rar archives is available to be used by rarfile + rarfile.tool_setup() + + return rarfile + + def __iter__(self) -> Iterator[Tuple[str, io.BufferedIOBase]]: + for path, stream in self.datapipe: + rar = self._rarfile.RarFile(stream) + for info in rar.infolist(): + if info.filename.endswith("/"): + continue + + inner_path = os.path.join(path, info.filename) + file_obj = rar.open(info) + + yield inner_path, file_obj