From aba8779b15ae225858a169c577426c5ef5bab626 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 15:46:23 +0100
Subject: [PATCH 01/29] add base class for datasets tests

---
 test/common_utils.py       |   8 +
 test/datasets_testcases.py | 318 +++++++++++++++++++++++++++++++++++++
 2 files changed, 326 insertions(+)
 create mode 100644 test/datasets_testcases.py

diff --git a/test/common_utils.py b/test/common_utils.py
index 76cdfdc006b..a0fb7781899 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -393,3 +393,11 @@ def int_dtypes():
 
 def float_dtypes():
     return torch.testing.floating_types()
+
+
+@contextlib.contextmanager
+def disable_console_output():
+    with contextlib.ExitStack() as stack, open(os.devnull, "w") as devnull:
+        stack.enter_context(contextlib.redirect_stdout(devnull))
+        stack.enter_context(contextlib.redirect_stderr(devnull))
+        yield
diff --git a/test/datasets_testcases.py b/test/datasets_testcases.py
new file mode 100644
index 00000000000..5d4cb36e575
--- /dev/null
+++ b/test/datasets_testcases.py
@@ -0,0 +1,318 @@
+import contextlib
+import functools
+import importlib
+import inspect
+import itertools
+import unittest
+import unittest.mock
+
+from PIL import Image
+
+import torchvision.datasets
+
+from datasets_utils import tmpdir, disable_console_output
+
+
+__all__ = ["DatasetTestCase", "ImageDatasetTestCase", "VideoDatasetTestCase", "test_all_configs"]
+
+
+class UsageError(RuntimeError):
+    """Should be raised instead of a generic ``RuntimeError`` in case a test case is not correctly configured."""
+
+
+# As of Python 3.7 this is provided by contextlib
+# https://docs.python.org/3.7/library/contextlib.html#contextlib.nullcontext
+# TODO: If the minimum Python requirement is >= 3.7, replace this
+@contextlib.contextmanager
+def nullcontext(enter_result=None):
+    yield enter_result
+
+
+def test_all_configs(test):
+    """Decorator to run test against all configurations.
+
+    Add this as decorator to an arbitrary test to run it against all configurations. The current configuration is
+    provided as the first parameter:
+
+    .. code-block::
+
+        @test_all_configs
+        def test_foo(self, config):
+            pass
+    """
+
+    @functools.wraps(test)
+    def wrapper(self):
+        for config in self.CONFIGS:
+            with self.subTest(**config):
+                test(self, config)
+
+    return wrapper
+
+
+class DatasetTestCase(unittest.TestCase):
+    """Abstract base class for all dataset testcases.
+
+    You have to overwrite the following class attributes:
+
+        - DATASET_CLASS (torchvision.datasets.VisionDataset): Class of dataset to be tested.
+        - FEATURE_TYPES (Sequence[Any]): Types of the elements returned by index access of the dataset. Instead of
+            providing these manually, you can instead subclass ``ImageDatasetTestCase`` or ``VideoDatasetTestCase```to
+            get a reasonable default, that should work for most cases.
+
+    Optionally, you can overwrite the following class attributes:
+
+        - CONFIGS (Sequence[Dict[str, Any]]): Additional configs that should be tested. Each dictonary can contain an
+            arbitrary combination of dataset parameters that are **not** ``transform``, ``target_transform``,
+            ``transforms``, or ``download``. The first element will be used as default configuration.
+        - REQUIRED_PACKAGES (Iterable[str]): Additional dependencies to use the dataset. If these packages are not
+            available, the tests are skipped.
+
+    Additionally, you need to overwrite the ``inject_fake_data()`` method that provides the data that the tests rely on.
+    The fake data should resemble the original data as close as necessary, while containing only few examples. During
+    the creation of the dataset check-, download-, and extract-functions from ``torchvision.datasets.utils`` are
+    disabled.
+
+    Without further configuration, the testcase will test if
+
+    1. the dataset raises a ``RuntimeError`` if the data files are not found,
+    2. the dataset inherits from `torchvision.datasets.VisionDataset`,
+    3. the dataset can be turned into a string,
+    4. the feature types of a returned example matches ``FEATURE_TYPES``, and
+    5. the number of examples matches the injected fake data.
+
+    Case 3., 4., and 5. are tested against all configurations in ``CONFIGS``.
+
+    To add dataset-specific tests, create a new method that takes no arguments with ``test_`` as a name prefix:
+
+    .. code-block::
+
+        def test_foo(self):
+            pass
+
+    If you want to run the test against all configs, add the ``@test_all_configs`` decorator to the definition and
+    accept a single argument:
+
+    .. code-block::
+
+        @test_all_configs
+        def test_bar(self, config):
+            pass
+
+    Within the test you can use the ``create_dataset()`` method that yields the dataset as well as additional information
+    provided by the ``ìnject_fake_data()`` method:
+
+    .. code-block::
+
+        def test_baz(self):
+            with self.create_dataset() as (dataset, info):
+                pass
+    """
+
+    DATASET_CLASS = None
+    FEATURE_TYPES = None
+
+    CONFIGS = None
+    REQUIRED_PACKAGES = None
+
+    _SPECIAL_KWARGS = {
+        "transform",
+        "target_transform",
+        "transforms",
+        "download",
+    }
+    _HAS_SPECIAL_KWARG = None
+
+    _CHECK_FUNCTIONS = {
+        "check_md5",
+        "check_integrity",
+    }
+    _DOWNLOAD_EXTRACT_FUNCTIONS = {
+        "download_url",
+        "download_file_from_google_drive",
+        "extract_archive",
+        "download_and_extract_archive",
+    }
+
+    def inject_fake_data(self, root: str, config: dict[str, Any]) -> dict[str, Any]:
+        """Inject fake data into the root of the dataset.
+
+        Args:
+            root (str): Root of the dataset.
+            config (dict[str, Any]): Configuration that will be used to create the dataset.
+
+        Returns:
+            info (dict[str, Any]): Additional information about the injected fake data. Must contain the field
+                ``"num_examples"`` that corresponds to the length of the dataset to be created.
+        """
+        raise NotImplementedError("You need to provide fake data in order for the tests to run.")
+
+    @contextlib.contextmanager
+    def create_dataset(self, config=None, inject_fake_data=True, disable_download_extract=None, **kwargs):
+        r"""Create the dataset in a temporary directory.
+
+        Args:
+            config (Optional[dict[str, Any]]): Configuration that will be used to create the dataset. If omitted, the
+                default configuration is used.
+            inject_fake_data (bool): If ``True`` (default) inject the fake data with :meth:`.inject_fake_data` before
+                creating the dataset.
+            disable_download_extract (Optional[bool]): If ``True`` disable download and extract logic while creating
+                the dataset. If ``None`` (default) this takes the same value as ``inject_fake_data``.
+            **kwargs (Any): Additional parameters passed to the dataset. These parameters take precedence in case they
+                overlap with ``config``.
+
+        Yields:
+            dataset (torchvision.dataset.VisionDataset): Dataset.
+            info (dict[str, Any]): Additional information about the injected fake data. See :meth:`.inject_fake_data`
+                for details.
+        """
+        if config is None:
+            config = self.CONFIGS[0]
+
+        special_kwargs, other_kwargs = self._split_kwargs(kwargs)
+        config.update(other_kwargs)
+
+        if disable_download_extract is None:
+            disable_download_extract = inject_fake_data
+
+        with tmpdir() as root:
+            info = self.inject_fake_data(root, config) if inject_fake_data else None
+            if info is None or "num_examples" not in info:
+                raise UsageError(
+                    "The method 'inject_fake_data' needs to return a dictionary that contains at least a "
+                    "'num_examples' field."
+                )
+
+            cm = self._disable_download_extract if disable_download_extract else nullcontext
+            with cm(special_kwargs), disable_console_output():
+                dataset = self.DATASET_CLASS(root, **config, **special_kwargs)
+
+            yield dataset, info
+
+    @classmethod
+    def setUpClass(cls):
+        cls._verify_required_public_class_attributes()
+        cls._populate_private_class_attributes()
+        cls._process_optional_public_class_attributes()
+        super().setUpClass()
+
+    @classmethod
+    def _verify_required_public_class_attributes(cls):
+        if cls.DATASET_CLASS is None:
+            raise UsageError(
+                "The class attribute 'DATASET_CLASS' needs to be overwritten. "
+                "It should contain the class of the dataset to be tested."
+            )
+        if cls.FEATURE_TYPES is None:
+            raise UsageError(
+                "The class attribute 'FEATURE_TYPES' needs to be overwritten. "
+                "It should contain a sequence of types that the dataset returns when accessed by index."
+            )
+
+    @property
+    @classmethod
+    def _argspec(cls):
+        return inspect.getfullargspec(cls.DATASET_CLASS.__init__)
+
+    @property
+    @classmethod
+    def _name(cls):
+        return cls.DATASET_CLASS.__name__
+
+    @classmethod
+    def _populate_private_class_attributes(cls):
+        cls._HAS_SPECIAL_KWARG = {name: name in cls._argspec.args for name in cls._SPECIAL_KWARGS}
+
+    @classmethod
+    def _process_optional_public_class_attributes(cls):
+        argspec = cls._argspec
+        if cls.CONFIGS is None:
+            config = {
+                kwarg: default
+                for kwarg, default in zip(argspec.args[-len(argspec.defaults) :], argspec.defaults)
+                if kwarg not in cls._SPECIAL_KWARGS
+            }
+            cls.CONFIGS = (config,)
+
+        if cls.REQUIRED_PACKAGES is not None:
+            try:
+                for pkg in cls.REQUIRED_PACKAGES:
+                    importlib.import_module(pkg)
+            except ImportError as error:
+                raise unittest.SkipTest(
+                    f"The package '{error.name}' is required to load the dataset '{cls._name}' but is not installed."
+                )
+
+    def _split_kwargs(self, kwargs):
+        special_kwargs = kwargs.copy()
+        other_kwargs = {key: special_kwargs.pop(key) for key in set(special_kwargs.keys()) - self._SPECIAL_KWARGS}
+        return special_kwargs, other_kwargs
+
+    @contextlib.contextmanager
+    def _disable_download_extract(self, special_kwargs):
+        inject_download_kwarg = self._HAS_SPECIAL_KWARG["download"] and "download" not in special_kwargs
+        if inject_download_kwarg:
+            special_kwargs["download"] = False
+
+        module = inspect.getmodule(self.DATASET_CLASS).__name__
+        with contextlib.ExitStack() as stack:
+            mocks = {}
+            for function, kwargs in itertools.chain(
+                zip(self._CHECK_FUNCTIONS, [dict(return_value=True)] * len(self._CHECK_FUNCTIONS)),
+                zip(self._DOWNLOAD_EXTRACT_FUNCTIONS, [dict()] * len(self._DOWNLOAD_EXTRACT_FUNCTIONS)),
+            ):
+                with contextlib.suppress(AttributeError):
+                    patcher = unittest.mock.patch(f"{module}.{function}", **kwargs)
+                    mocks[function] = stack.enter_context(patcher)
+
+            try:
+                yield mocks
+            finally:
+                if inject_download_kwarg:
+                    del special_kwargs["download"]
+
+    def test_not_found(self):
+        with self.assertRaises(RuntimeError):
+            with self.create_dataset(inject_fake_data=False):
+                pass
+
+    def test_smoke(self, config):
+        with self.create_dataset(config) as (dataset, _):
+            self.assertIsInstance(dataset, torchvision.datasets.VisionDataset)
+
+    @test_all_configs
+    def test_str_smoke(self, config):
+        with self.create_dataset(config) as (dataset, _):
+            self.assertIsInstance(str(dataset), str)
+
+    @test_all_configs
+    def test_feature_types(self, config):
+        with self.create_dataset(config) as (dataset, _):
+            example = dataset[0]
+
+            actual = len(example)
+            expected = len(self.FEATURE_TYPES)
+            self.assertEqual(
+                actual,
+                expected,
+                f"The number of the returned features does not match the the number of elements in in FEATURE_TYPES: "
+                f"{actual} != {expected}",
+            )
+
+            for idx, (feature, expected_feature_type) in enumerate(zip(example, self.FEATURE_TYPES)):
+                with self.subTest(idx=idx):
+                    self.assertIsInstance(feature, expected_feature_type)
+
+    @test_all_configs
+    def test_num_examples(self, config):
+        with self.create_dataset(config) as (dataset, info):
+            self.assertEqual(len(dataset), info["num_examples"])
+
+
+class ImageDatasetTestCase(DatasetTestCase):
+    FEATURE_TYPES = (Image.Image, int)
+
+
+class VideoDatasetTestCase(DatasetTestCase):
+    FEATURE_TYPES = (torch.Tensor, torch.Tensor, int)
+    REQUIRED_PACKAGES = ("av",)

From fd8d48adee8d2da4fbd1baef7bd6304eaf497ba5 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 16:18:14 +0100
Subject: [PATCH 02/29] add better type hints

---
 ...atasets_testcases.py => datasets_utils.py} | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)
 rename test/{datasets_testcases.py => datasets_utils.py} (94%)

diff --git a/test/datasets_testcases.py b/test/datasets_utils.py
similarity index 94%
rename from test/datasets_testcases.py
rename to test/datasets_utils.py
index 5d4cb36e575..cdf1159694c 100644
--- a/test/datasets_testcases.py
+++ b/test/datasets_utils.py
@@ -5,6 +5,7 @@
 import itertools
 import unittest
 import unittest.mock
+from typing import Any, Iterator, Sequence, Tuple, Union
 
 from PIL import Image
 
@@ -134,25 +135,31 @@ def test_baz(self):
         "download_and_extract_archive",
     }
 
-    def inject_fake_data(self, root: str, config: dict[str, Any]) -> dict[str, Any]:
+    def inject_fake_data(self, root: str, config: Dict[str, Any]) -> Dict[str, Any]:
         """Inject fake data into the root of the dataset.
 
         Args:
             root (str): Root of the dataset.
-            config (dict[str, Any]): Configuration that will be used to create the dataset.
+            config (Dict[str, Any]): Configuration that will be used to create the dataset.
 
         Returns:
-            info (dict[str, Any]): Additional information about the injected fake data. Must contain the field
+            info (Dict[str, Any]): Additional information about the injected fake data. Must contain the field
                 ``"num_examples"`` that corresponds to the length of the dataset to be created.
         """
         raise NotImplementedError("You need to provide fake data in order for the tests to run.")
 
     @contextlib.contextmanager
-    def create_dataset(self, config=None, inject_fake_data=True, disable_download_extract=None, **kwargs):
+    def create_dataset(
+        self,
+        config: Optional[Dict[str, Any]] = None,
+        inject_fake_data: bool = True,
+        disable_download_extract: Optional[bool] = None,
+        **kwargs: Any,
+    ) -> Iterator[Tuple[torchvision.datasets.VisionDataset, Dict[str, Any]]]:
         r"""Create the dataset in a temporary directory.
 
         Args:
-            config (Optional[dict[str, Any]]): Configuration that will be used to create the dataset. If omitted, the
+            config (Optional[Dict[str, Any]]): Configuration that will be used to create the dataset. If omitted, the
                 default configuration is used.
             inject_fake_data (bool): If ``True`` (default) inject the fake data with :meth:`.inject_fake_data` before
                 creating the dataset.
@@ -163,7 +170,7 @@ def create_dataset(self, config=None, inject_fake_data=True, disable_download_ex
 
         Yields:
             dataset (torchvision.dataset.VisionDataset): Dataset.
-            info (dict[str, Any]): Additional information about the injected fake data. See :meth:`.inject_fake_data`
+            info (Dict[str, Any]): Additional information about the injected fake data. See :meth:`.inject_fake_data`
                 for details.
         """
         if config is None:

From cb29187fa1770436a1f9652cce137f8afa1985d9 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 16:24:00 +0100
Subject: [PATCH 03/29] add documentation to subclasses

---
 test/datasets_utils.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index cdf1159694c..39afd3fb592 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -317,9 +317,21 @@ def test_num_examples(self, config):
 
 
 class ImageDatasetTestCase(DatasetTestCase):
+    """Abstract base class for image dataset testcases.
+
+    - Overwrites the FEATURE_TYPES class attribute to expect a :class:`PIL.Image.Image` and an integer label.
+    """
+
     FEATURE_TYPES = (Image.Image, int)
 
 
 class VideoDatasetTestCase(DatasetTestCase):
+    """Abstract base class for video dataset testcases.
+
+    - Overwrites the FEATURE_TYPES class attribute to expect two :class:`torch.Tensor` s for the video and audio as
+      well as an integer label.
+    - Overwrites the REQUIRED_PACKAGES class attribute to require PyAV (``av``).
+    """
+
     FEATURE_TYPES = (torch.Tensor, torch.Tensor, int)
     REQUIRED_PACKAGES = ("av",)

From 11b27e480ed86b5f7db2991978a8da0653747150 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 17:04:40 +0100
Subject: [PATCH 04/29] add utility functions to create files / folders of
 random images and videos

---
 test/datasets_utils.py | 195 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 191 insertions(+), 4 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 39afd3fb592..919299a6837 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -3,22 +3,57 @@
 import importlib
 import inspect
 import itertools
+import os
+import pathlib
 import unittest
 import unittest.mock
 from typing import Any, Iterator, Sequence, Tuple, Union
 
-from PIL import Image
+import PIL.Image
 
+import torch
 import torchvision.datasets
 
+from common_utils import get_tmp_dir
 from datasets_utils import tmpdir, disable_console_output
 
+try:
+    from torchvision.io import write_video
 
-__all__ = ["DatasetTestCase", "ImageDatasetTestCase", "VideoDatasetTestCase", "test_all_configs"]
+    PYAV_AVAILABLE = True
+
+except ImportError:
+    write_video = None
+    PYAV_AVAILABLE = False
+
+
+__all__ = [
+    "UsageError",
+    "test_all_configs",
+    "DatasetTestCase",
+    "ImageDatasetTestCase",
+    "VideoDatasetTestCase",
+    "create_image_or_video_tensor",
+    "create_image_file",
+    "create_image_folder",
+    "create_video_file",
+    "create_video_folder",
+]
 
 
 class UsageError(RuntimeError):
-    """Should be raised instead of a generic ``RuntimeError`` in case a test case is not correctly configured."""
+    """Should be raised in case an error happens in the setup rather than the test."""
+
+
+def requires_pyav(fn):
+    @functools.wraps(fn)
+    def wrapper(*args, **kwargs):
+        if not PYAV_AVAILABLE:
+            raise UsageError("PyAV (av) is required but not available.")
+
+        return fn(*args, **kwargs)
+
+    return wrapper
 
 
 # As of Python 3.7 this is provided by contextlib
@@ -322,7 +357,7 @@ class ImageDatasetTestCase(DatasetTestCase):
     - Overwrites the FEATURE_TYPES class attribute to expect a :class:`PIL.Image.Image` and an integer label.
     """
 
-    FEATURE_TYPES = (Image.Image, int)
+    FEATURE_TYPES = (PIL.Image.Image, int)
 
 
 class VideoDatasetTestCase(DatasetTestCase):
@@ -335,3 +370,155 @@ class VideoDatasetTestCase(DatasetTestCase):
 
     FEATURE_TYPES = (torch.Tensor, torch.Tensor, int)
     REQUIRED_PACKAGES = ("av",)
+
+
+def create_image_or_video_tensor(size: Sequence[int]) -> torch.Tensor:
+    r"""Create a random uint8 tensor.
+
+    Args:
+        size (Sequence[int]): Size of the tensor.
+    """
+    return torch.randint(0, 256, size, dtype=torch.uint8)
+
+
+def create_image_file(
+    root: Union[pathlib.Path, str], name: Union[pathlib.Path, str], size: Union[Sequence[int], int] = 10, **kwargs: Any
+) -> None:
+    """Create an image file from random data.
+
+    Args:
+        root (Union[str, pathlib.Path]): Root directory the image file will be placed in.
+        name (Union[str, pathlib.Path]): Name of the image file.
+        size (Union[Sequence[int], int]): Size of the image that represents the ``(num_channels, height, width)``. If
+            scalar, the value is used for the height and width. If not provided, three channels are assumed.
+        kwargs (Any): Additional parameters passed to :meth:`PIL.Image.Image.save`.
+    """
+    if isinstance(size, int):
+        size = (size, size)
+    if len(size) == 2:
+        size = (3, *size)
+    if len(size) != 3:
+        raise UsageError(
+            f"The 'size' argument should either be an int or a sequence of length 2 or 3. Got {len(size)} instead"
+        )
+
+    image = create_image_or_video_tensor(size)
+    PIL.Image.fromarray(image.permute(2, 1, 0).numpy()).save(pathlib.Path(root) / name)
+
+
+def create_image_folder(
+    root: Union[pathlib.Path, str],
+    name: Union[pathlib.Path, str],
+    file_name_fn: Callable[[idx], str],
+    num_examples: int,
+    size: Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]] = None,
+    **kwargs: Any,
+):
+    """Create a folder of random images.
+
+    Args:
+        root (Union[str, pathlib.Path]): Root directory the image folder will be placed in.
+        name (Union[str, pathlib.Path]): Name of the image folder.
+        file_name_fn (Callable[[idx], str]): Should return a file name if called with the file index.
+        num_examples (int): Number of images to create.
+        size (Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]]): Size of the images. If
+            callable, will be called with the index of the corresponding file. If omitted, a random height and width
+            between 3 and 10 pixels is selected on a per-image basis.
+        kwargs (Any): Additional parameters passed to :func:`create_image_file`.
+    """
+    if size is None:
+
+        def size(idx: int) -> Tuple[int, int, int]:
+            num_channels = 3
+            height, width = torch.randint(3, 11, size=(2,), dtype=np.int).tolist()
+            return (num_channels, height, width)
+
+    root = pathlib.Path(root) / name
+    os.makedirs(root)
+
+    for idx in range(num_examples):
+        create_image_file(root, file_name_fn(idx), size=size(idx) if callable(size) else size, **kwargs)
+
+
+@requires_pyav
+def create_video_file(
+    root: Union[pathlib.Path, str],
+    name: Union[pathlib.Path, str],
+    size: Union[Sequence[int], int] = (25, 3, 10, 10),
+    fps: float = 25,
+    **kwargs: Any,
+) -> None:
+    """Create an video file from random data.
+
+    Args:
+        root (Union[str, pathlib.Path]): Root directory the video file will be placed in.
+        name (Union[str, pathlib.Path]): Name of the video file.
+        size (Union[Sequence[int], int]): Size of the video that represents the
+            ``(length, num_channels, height, width)``. If scalar, the value is used for the height and width.
+            If not provided, three channels are assumed. If not provided, the length is set to one second.
+        fps (float): Frame rate in frames per second.
+        kwargs (Any): Additional parameters passed to :func:`torchvision.io.write_video`.
+
+    Raises:
+        UsageError: If PyAV is not available.
+    """
+    if not PYAV_AVAILABLE:
+        raise PyAVNotAvailableError
+
+    if isinstance(size, int):
+        size = (size, size)
+    if len(size) == 2:
+        size = (3, *size)
+    if len(size) == 3:
+        size = (fps, *size)
+    if len(size) != 4:
+        raise UsageError(
+            f"The 'size' argument should either be an int or a sequence of length 2, 3, or 4. Got {len(size)} instead"
+        )
+
+    video = create_image_or_video_tensor(size)
+    write_video(str(pathlib.Path(root) / name), video.permute(0, 2, 3, 1), fps, **kwargs)
+
+
+@requires_pyav
+def create_video_folder(
+    root: Union[str, pathlib.Path],
+    name: Union[str, pathlib.Path],
+    file_name_fn: Callable[[idx], str],
+    num_examples: int,
+    size: Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]] = None,
+    fps=25,
+    **kwargs,
+):
+    """Create a folder of random videos.
+
+    Args:
+        root (Union[str, pathlib.Path]): Root directory the image folder will be placed in.
+        name (Union[str, pathlib.Path]): Name of the image folder.
+        file_name_fn (Callable[[idx], str]): Should return a file name if called with the file index.
+        num_examples (int): Number of images to create.
+        size (Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]]): Size of the images. If
+            callable, will be called with the index of the corresponding file. If omitted, a random length between 0.5
+            and 1.5 seconds as well as random even height and width between 4 and 10 pixels are selected on a
+            per-video basis.
+        fps (float): Frame rate in frames per second.
+        kwargs (Any): Additional parameters passed to :func:`create_video_file`.
+
+    Raises:
+        UsageError: If PyAV is not available.
+    """
+    if size is None:
+
+        def size(idx):
+            length = int((torch.rand(()).item() + 0.5) * fps)
+            num_channels = 3
+            # The 'libx264' video codec, which is the default of torchvision.io.write_video, requires the height and
+            # width of the video to be divisible by 2.
+            height, width = (torch.randint(2, 6, size=(2,), dtype=np.int) * 2).tolist()
+            return (length, num_channels, height, width)
+
+    root = pathlib.Path(root) / name
+    os.makedirs(root)
+
+    for idx in range(num_examples):
+        create_video_file(root, file_name_fn(idx), size=size(idx) if callable(size) else size)

From 3115e1afec54255335a6505827162508df8c9e34 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 17:23:08 +0100
Subject: [PATCH 05/29] fix imports

---
 test/datasets_utils.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 919299a6837..95c7de8f937 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -7,15 +7,14 @@
 import pathlib
 import unittest
 import unittest.mock
-from typing import Any, Iterator, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, Iterator, Optional, Sequence, Tuple, Union
 
 import PIL.Image
 
 import torch
 import torchvision.datasets
 
-from common_utils import get_tmp_dir
-from datasets_utils import tmpdir, disable_console_output
+from common_utils import get_tmp_dir, disable_console_output
 
 try:
     from torchvision.io import write_video
@@ -217,7 +216,7 @@ def create_dataset(
         if disable_download_extract is None:
             disable_download_extract = inject_fake_data
 
-        with tmpdir() as root:
+        with get_tmp_dir() as root:
             info = self.inject_fake_data(root, config) if inject_fake_data else None
             if info is None or "num_examples" not in info:
                 raise UsageError(

From 5f32d77832c319f18a7e181715aea7af29007f42 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 17:23:41 +0100
Subject: [PATCH 06/29] remove class properties

---
 test/datasets_utils.py | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 95c7de8f937..5b4e1f75b46 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -250,23 +250,14 @@ def _verify_required_public_class_attributes(cls):
                 "It should contain a sequence of types that the dataset returns when accessed by index."
             )
 
-    @property
-    @classmethod
-    def _argspec(cls):
-        return inspect.getfullargspec(cls.DATASET_CLASS.__init__)
-
-    @property
-    @classmethod
-    def _name(cls):
-        return cls.DATASET_CLASS.__name__
-
     @classmethod
     def _populate_private_class_attributes(cls):
-        cls._HAS_SPECIAL_KWARG = {name: name in cls._argspec.args for name in cls._SPECIAL_KWARGS}
+        argspec = inspect.getfullargspec(cls.DATASET_CLASS.__init__)
+        cls._HAS_SPECIAL_KWARG = {name: name in argspec.args for name in cls._SPECIAL_KWARGS}
 
     @classmethod
     def _process_optional_public_class_attributes(cls):
-        argspec = cls._argspec
+        argspec = inspect.getfullargspec(cls.DATASET_CLASS.__init__)
         if cls.CONFIGS is None:
             config = {
                 kwarg: default
@@ -281,7 +272,8 @@ def _process_optional_public_class_attributes(cls):
                     importlib.import_module(pkg)
             except ImportError as error:
                 raise unittest.SkipTest(
-                    f"The package '{error.name}' is required to load the dataset '{cls._name}' but is not installed."
+                    f"The package '{error.name}' is required to load the dataset '{cls.DATASET_CLASS.__name__}' but is "
+                    f"not installed."
                 )
 
     def _split_kwargs(self, kwargs):

From 2cc4d778737e501ccff05539b28ea44a688c756e Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 17:23:54 +0100
Subject: [PATCH 07/29] fix smoke test

---
 test/datasets_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 5b4e1f75b46..25888feb66f 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -309,8 +309,8 @@ def test_not_found(self):
             with self.create_dataset(inject_fake_data=False):
                 pass
 
-    def test_smoke(self, config):
-        with self.create_dataset(config) as (dataset, _):
+    def test_smoke(self):
+        with self.create_dataset() as (dataset, _):
             self.assertIsInstance(dataset, torchvision.datasets.VisionDataset)
 
     @test_all_configs

From 29645772110a1831046edafa9cc568d5dffcb716 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 17:24:15 +0100
Subject: [PATCH 08/29] fix type hints

---
 test/datasets_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 25888feb66f..f3ab1892cdf 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -400,7 +400,7 @@ def create_image_file(
 def create_image_folder(
     root: Union[pathlib.Path, str],
     name: Union[pathlib.Path, str],
-    file_name_fn: Callable[[idx], str],
+    file_name_fn: Callable[[int], str],
     num_examples: int,
     size: Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]] = None,
     **kwargs: Any,
@@ -410,7 +410,7 @@ def create_image_folder(
     Args:
         root (Union[str, pathlib.Path]): Root directory the image folder will be placed in.
         name (Union[str, pathlib.Path]): Name of the image folder.
-        file_name_fn (Callable[[idx], str]): Should return a file name if called with the file index.
+        file_name_fn (Callable[[int], str]): Should return a file name if called with the file index.
         num_examples (int): Number of images to create.
         size (Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]]): Size of the images. If
             callable, will be called with the index of the corresponding file. If omitted, a random height and width
@@ -475,7 +475,7 @@ def create_video_file(
 def create_video_folder(
     root: Union[str, pathlib.Path],
     name: Union[str, pathlib.Path],
-    file_name_fn: Callable[[idx], str],
+    file_name_fn: Callable[[int], str],
     num_examples: int,
     size: Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]] = None,
     fps=25,
@@ -486,7 +486,7 @@ def create_video_folder(
     Args:
         root (Union[str, pathlib.Path]): Root directory the image folder will be placed in.
         name (Union[str, pathlib.Path]): Name of the image folder.
-        file_name_fn (Callable[[idx], str]): Should return a file name if called with the file index.
+        file_name_fn (Callable[[int], str]): Should return a file name if called with the file index.
         num_examples (int): Number of images to create.
         size (Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]]): Size of the images. If
             callable, will be called with the index of the corresponding file. If omitted, a random length between 0.5

From 5993f8c4d1d4d183beadd14fbe4e370e35ca16a2 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 17:24:29 +0100
Subject: [PATCH 09/29] fix random size generation

---
 test/datasets_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index f3ab1892cdf..40e4c2db678 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -421,7 +421,7 @@ def create_image_folder(
 
         def size(idx: int) -> Tuple[int, int, int]:
             num_channels = 3
-            height, width = torch.randint(3, 11, size=(2,), dtype=np.int).tolist()
+            height, width = torch.randint(3, 11, size=(2,), dtype=torch.int).tolist()
             return (num_channels, height, width)
 
     root = pathlib.Path(root) / name
@@ -505,7 +505,7 @@ def size(idx):
             num_channels = 3
             # The 'libx264' video codec, which is the default of torchvision.io.write_video, requires the height and
             # width of the video to be divisible by 2.
-            height, width = (torch.randint(2, 6, size=(2,), dtype=np.int) * 2).tolist()
+            height, width = (torch.randint(2, 6, size=(2,), dtype=torch.int) * 2).tolist()
             return (length, num_channels, height, width)
 
     root = pathlib.Path(root) / name

From 8dca6e4911bd91584f9459316854bec03d21edb4 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 17:29:25 +0100
Subject: [PATCH 10/29] add Caltech256 as example

---
 test/test_datasets.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index ff8e0281e7c..087bc2d75a5 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -15,6 +15,9 @@
 import xml.etree.ElementTree as ET
 from urllib.request import Request, urlopen
 import itertools
+import datasets_utils
+import pathlib
+from torchvision import datasets
 
 
 try:
@@ -466,5 +469,25 @@ def test_repr_smoke(self):
             self.assertIsInstance(repr(dataset), str)
 
 
+class Caltech256TestCase(datasets_utils.ImageDatasetTestCase):
+    DATASET_CLASS = datasets.Caltech256
+
+    def inject_fake_data(self, root, config):
+        root = pathlib.Path(root) / "caltech256" / "256_ObjectCategories"
+
+        categories = ((1, "ak47"), (127, "laptop-101"), (257, "clutter"))
+        num_images_per_category = 2
+
+        for idx, category in categories:
+            datasets_utils.create_image_folder(
+                root,
+                name=f"{idx:03d}.{category}",
+                file_name_fn=lambda image_idx: f"{idx:03d}_{image_idx:04d}.jpg",
+                num_examples=num_images_per_category,
+            )
+
+        return dict(num_examples=num_images_per_category * len(categories))
+
+
 if __name__ == '__main__':
     unittest.main()

From 37eeff7192d41b5a3ba377d25c119fa06728ca86 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 18:04:41 +0100
Subject: [PATCH 11/29] add utility function to create grid of combinations

---
 test/datasets_utils.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 40e4c2db678..db75f8f5b18 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -85,6 +85,23 @@ def wrapper(self):
     return wrapper
 
 
+def combinations_grid(**kwargs):
+    """Creates a grid of input combinations.
+
+    Each element in the returned sequence is a dictionary containing one possible combination as values.
+
+    Example:
+        >>> combinations_grid(foo=("bar", "baz"), spam=("eggs", "ham"))
+        [
+            {'foo': 'bar', 'spam': 'eggs'},
+            {'foo': 'bar', 'spam': 'ham'},
+            {'foo': 'baz', 'spam': 'eggs'},
+            {'foo': 'baz', 'spam': 'ham'}
+        ]
+    """
+    return [dict(zip(kwargs.keys(), values)) for values in itertools.product(*kwargs.values())]
+
+
 class DatasetTestCase(unittest.TestCase):
     """Abstract base class for all dataset testcases.
 

From a9526e1782e2d4d17498efd8d9365c5e35c178ba Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 18:06:16 +0100
Subject: [PATCH 12/29] add CIFAR100? as example

---
 test/test_datasets.py | 72 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index 087bc2d75a5..5b618fb071f 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -489,5 +489,75 @@ def inject_fake_data(self, root, config):
         return dict(num_examples=num_images_per_category * len(categories))
 
 
-if __name__ == '__main__':
+class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase):
+    DATASET_CLASS = datasets.CIFAR10
+    CONFIGS = datasets_utils.combinations_grid(train=(True, False))
+
+    _VERSION_CONFIG = dict(
+        base_folder="cifar-10-batches-py",
+        train_files=tuple(f"data_batch_{idx}" for idx in range(1, 6)),
+        test_files=("test_batch",),
+        labels_key="labels",
+        meta_file="batches.meta",
+        num_categories=10,
+        categories_key="label_names",
+    )
+
+    def inject_fake_data(self, root, config):
+        root = pathlib.Path(root) / self._VERSION_CONFIG["base_folder"]
+        os.makedirs(root)
+
+        num_images_per_file = 1
+        for name in itertools.chain(self._VERSION_CONFIG["train_files"], self._VERSION_CONFIG["test_files"]):
+            self._create_batch_file(root, name, num_images_per_file)
+
+        categories = self._create_meta_file(root)
+
+        return dict(
+            num_examples=num_images_per_file
+            * len(self._VERSION_CONFIG["train_files"] if config["train"] else self._VERSION_CONFIG["test_files"]),
+            categories=categories,
+        )
+
+    def _create_batch_file(self, root, name, num_images):
+        data = datasets_utils.create_image_or_video_tensor((num_images, 32 * 32 * 3))
+        labels = np.random.randint(0, self._VERSION_CONFIG["num_categories"], size=num_images).tolist()
+        self._create_binary_file(root, name, {"data": data, self._VERSION_CONFIG["labels_key"]: labels})
+
+    def _create_meta_file(self, root):
+        categories = [
+            f"{idx:0{len(str(self._VERSION_CONFIG['num_categories'] - 1))}d}"
+            for idx in range(self._VERSION_CONFIG["num_categories"])
+        ]
+        self._create_binary_file(
+            root, self._VERSION_CONFIG["meta_file"], {self._VERSION_CONFIG["categories_key"]: categories}
+        )
+        return categories
+
+    def _create_binary_file(self, root, name, content):
+        with open(pathlib.Path(root) / name, "wb") as fh:
+            pickle.dump(content, fh)
+
+    def test_class_to_idx(self):
+        with self.create_dataset() as (dataset, info):
+            expected = {category: label for label, category in enumerate(info["categories"])}
+            actual = dataset.class_to_idx
+            self.assertEqual(actual, expected)
+
+
+class CIFAR100(CIFAR10TestCase):
+    DATASET_CLASS = datasets.CIFAR100
+
+    _VERSION_CONFIG = dict(
+        base_folder="cifar-100-python",
+        train_files=("train",),
+        test_files=("test",),
+        labels_key="fine_labels",
+        meta_file="meta",
+        num_categories=100,
+        categories_key="fine_label_names",
+    )
+
+
+if __name__ == "__main__":
     unittest.main()

From 857c5a8c12fa95bdb1d5ff589bab2845535c51ca Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 15 Feb 2021 18:19:06 +0100
Subject: [PATCH 13/29] lint

---
 test/datasets_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index db75f8f5b18..685898125bc 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -151,8 +151,8 @@ def test_foo(self):
         def test_bar(self, config):
             pass
 
-    Within the test you can use the ``create_dataset()`` method that yields the dataset as well as additional information
-    provided by the ``ìnject_fake_data()`` method:
+    Within the test you can use the ``create_dataset()`` method that yields the dataset as well as additional
+    information provided by the ``ìnject_fake_data()`` method:
 
     .. code-block::
 
@@ -278,7 +278,7 @@ def _process_optional_public_class_attributes(cls):
         if cls.CONFIGS is None:
             config = {
                 kwarg: default
-                for kwarg, default in zip(argspec.args[-len(argspec.defaults) :], argspec.defaults)
+                for kwarg, default in zip(argspec.args[-len(argspec.defaults):], argspec.defaults)
                 if kwarg not in cls._SPECIAL_KWARGS
             }
             cls.CONFIGS = (config,)

From e85f9964f683deee37f51df4c3e552a873a6ccb1 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 16 Feb 2021 10:24:33 +0100
Subject: [PATCH 14/29] add missing import

---
 test/test_datasets.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index 5b618fb071f..11b3f90db2a 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -17,6 +17,7 @@
 import itertools
 import datasets_utils
 import pathlib
+import pickle
 from torchvision import datasets
 
 

From 5ecd061f6e35c68c7cb7efcc694a3e97790fa7a4 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 16 Feb 2021 14:38:11 +0100
Subject: [PATCH 15/29] improve documentation

---
 test/datasets_utils.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 685898125bc..94ea95659ab 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -433,6 +433,11 @@ def create_image_folder(
             callable, will be called with the index of the corresponding file. If omitted, a random height and width
             between 3 and 10 pixels is selected on a per-image basis.
         kwargs (Any): Additional parameters passed to :func:`create_image_file`.
+
+
+    .. seealso::
+
+        - :func:`create_image_file`
     """
     if size is None:
 
@@ -514,6 +519,10 @@ def create_video_folder(
 
     Raises:
         UsageError: If PyAV is not available.
+
+    .. seealso::
+
+        - :func:`create_video_file`
     """
     if size is None:
 

From 1175a326659c9803330070b6bf335bcb6c0a2259 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 16 Feb 2021 14:38:59 +0100
Subject: [PATCH 16/29] create 1 frame videos by default

---
 test/datasets_utils.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 94ea95659ab..6217842fa13 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -457,7 +457,7 @@ def size(idx: int) -> Tuple[int, int, int]:
 def create_video_file(
     root: Union[pathlib.Path, str],
     name: Union[pathlib.Path, str],
-    size: Union[Sequence[int], int] = (25, 3, 10, 10),
+    size: Union[Sequence[int], int] = (1, 3, 10, 10),
     fps: float = 25,
     **kwargs: Any,
 ) -> None:
@@ -467,8 +467,8 @@ def create_video_file(
         root (Union[str, pathlib.Path]): Root directory the video file will be placed in.
         name (Union[str, pathlib.Path]): Name of the video file.
         size (Union[Sequence[int], int]): Size of the video that represents the
-            ``(length, num_channels, height, width)``. If scalar, the value is used for the height and width.
-            If not provided, three channels are assumed. If not provided, the length is set to one second.
+            ``(num_frames, num_channels, height, width)``. If scalar, the value is used for the height and width.
+            If not provided, ``num_frames=1`` and ``num_channels=3`` are assumed.
         fps (float): Frame rate in frames per second.
         kwargs (Any): Additional parameters passed to :func:`torchvision.io.write_video`.
 
@@ -483,7 +483,7 @@ def create_video_file(
     if len(size) == 2:
         size = (3, *size)
     if len(size) == 3:
-        size = (fps, *size)
+        size = (1, *size)
     if len(size) != 4:
         raise UsageError(
             f"The 'size' argument should either be an int or a sequence of length 2, 3, or 4. Got {len(size)} instead"
@@ -510,10 +510,9 @@ def create_video_folder(
         name (Union[str, pathlib.Path]): Name of the image folder.
         file_name_fn (Callable[[int], str]): Should return a file name if called with the file index.
         num_examples (int): Number of images to create.
-        size (Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]]): Size of the images. If
-            callable, will be called with the index of the corresponding file. If omitted, a random length between 0.5
-            and 1.5 seconds as well as random even height and width between 4 and 10 pixels are selected on a
-            per-video basis.
+        size (Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]]): Size of the videos. If
+            callable, will be called with the index of the corresponding file. If omitted, a random even height and
+            width between 4 and 10 pixels is selected on a per-video basis.
         fps (float): Frame rate in frames per second.
         kwargs (Any): Additional parameters passed to :func:`create_video_file`.
 
@@ -527,7 +526,7 @@ def create_video_folder(
     if size is None:
 
         def size(idx):
-            length = int((torch.rand(()).item() + 0.5) * fps)
+            num_frames = 1
             num_channels = 3
             # The 'libx264' video codec, which is the default of torchvision.io.write_video, requires the height and
             # width of the video to be divisible by 2.

From d164ea9c14916e00269f302db9d1c3e8348804fd Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 16 Feb 2021 14:39:23 +0100
Subject: [PATCH 17/29] remove obsolete check

---
 test/datasets_utils.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 6217842fa13..93b06f452b4 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -475,9 +475,6 @@ def create_video_file(
     Raises:
         UsageError: If PyAV is not available.
     """
-    if not PYAV_AVAILABLE:
-        raise PyAVNotAvailableError
-
     if isinstance(size, int):
         size = (size, size)
     if len(size) == 2:

From 9cadab16d78a2e4eb4339cea6fed6c9a6a6a7a3d Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 16 Feb 2021 14:43:21 +0100
Subject: [PATCH 18/29] return path of files created with utility functions

---
 test/datasets_utils.py | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 93b06f452b4..7b2757cfa45 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -7,7 +7,7 @@
 import pathlib
 import unittest
 import unittest.mock
-from typing import Any, Callable, Dict, Iterator, Optional, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple, Union
 
 import PIL.Image
 
@@ -391,7 +391,7 @@ def create_image_or_video_tensor(size: Sequence[int]) -> torch.Tensor:
 
 def create_image_file(
     root: Union[pathlib.Path, str], name: Union[pathlib.Path, str], size: Union[Sequence[int], int] = 10, **kwargs: Any
-) -> None:
+) -> pathlib.Path:
     """Create an image file from random data.
 
     Args:
@@ -400,6 +400,9 @@ def create_image_file(
         size (Union[Sequence[int], int]): Size of the image that represents the ``(num_channels, height, width)``. If
             scalar, the value is used for the height and width. If not provided, three channels are assumed.
         kwargs (Any): Additional parameters passed to :meth:`PIL.Image.Image.save`.
+
+    Returns:
+        pathlib.Path: Path to the created image file.
     """
     if isinstance(size, int):
         size = (size, size)
@@ -411,7 +414,9 @@ def create_image_file(
         )
 
     image = create_image_or_video_tensor(size)
-    PIL.Image.fromarray(image.permute(2, 1, 0).numpy()).save(pathlib.Path(root) / name)
+    file = pathlib.Path(root) / name
+    PIL.Image.fromarray(image.permute(2, 1, 0).numpy()).save(file)
+    return file
 
 
 def create_image_folder(
@@ -421,7 +426,7 @@ def create_image_folder(
     num_examples: int,
     size: Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]] = None,
     **kwargs: Any,
-):
+) -> List[pathlib.Path]:
     """Create a folder of random images.
 
     Args:
@@ -434,6 +439,8 @@ def create_image_folder(
             between 3 and 10 pixels is selected on a per-image basis.
         kwargs (Any): Additional parameters passed to :func:`create_image_file`.
 
+    Returns:
+        List[pathlib.Path]: Paths to all created image files.
 
     .. seealso::
 
@@ -449,8 +456,10 @@ def size(idx: int) -> Tuple[int, int, int]:
     root = pathlib.Path(root) / name
     os.makedirs(root)
 
-    for idx in range(num_examples):
+    return [
         create_image_file(root, file_name_fn(idx), size=size(idx) if callable(size) else size, **kwargs)
+        for idx in range(num_examples)
+    ]
 
 
 @requires_pyav
@@ -460,7 +469,7 @@ def create_video_file(
     size: Union[Sequence[int], int] = (1, 3, 10, 10),
     fps: float = 25,
     **kwargs: Any,
-) -> None:
+) -> pathlib.Path:
     """Create an video file from random data.
 
     Args:
@@ -472,6 +481,9 @@ def create_video_file(
         fps (float): Frame rate in frames per second.
         kwargs (Any): Additional parameters passed to :func:`torchvision.io.write_video`.
 
+    Returns:
+        pathlib.Path: Path to the created image file.
+
     Raises:
         UsageError: If PyAV is not available.
     """
@@ -487,7 +499,9 @@ def create_video_file(
         )
 
     video = create_image_or_video_tensor(size)
-    write_video(str(pathlib.Path(root) / name), video.permute(0, 2, 3, 1), fps, **kwargs)
+    file = pathlib.Path(root) / name
+    write_video(str(file), video.permute(0, 2, 3, 1), fps, **kwargs)
+    return file
 
 
 @requires_pyav
@@ -499,7 +513,7 @@ def create_video_folder(
     size: Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]] = None,
     fps=25,
     **kwargs,
-):
+) -> List[pathlib.Path]:
     """Create a folder of random videos.
 
     Args:
@@ -513,6 +527,9 @@ def create_video_folder(
         fps (float): Frame rate in frames per second.
         kwargs (Any): Additional parameters passed to :func:`create_video_file`.
 
+    Returns:
+        List[pathlib.Path]: Paths to all created video files.
+
     Raises:
         UsageError: If PyAV is not available.
 
@@ -533,5 +550,7 @@ def size(idx):
     root = pathlib.Path(root) / name
     os.makedirs(root)
 
-    for idx in range(num_examples):
+    return [
         create_video_file(root, file_name_fn(idx), size=size(idx) if callable(size) else size)
+        for idx in range(num_examples)
+    ]

From 77fa7168cb11c4a10e6630a37bb721e49ad0449f Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 16 Feb 2021 15:50:19 +0100
Subject: [PATCH 19/29] [test] close PIL file handles before deletion

---
 test/datasets_utils.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 7b2757cfa45..015a91a2672 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -367,6 +367,42 @@ class ImageDatasetTestCase(DatasetTestCase):
 
     FEATURE_TYPES = (PIL.Image.Image, int)
 
+    @contextlib.contextmanager
+    def create_dataset(
+        self,
+        config: Optional[Dict[str, Any]] = None,
+        inject_fake_data: bool = True,
+        disable_download_extract: Optional[bool] = None,
+        **kwargs: Any,
+    ) -> Iterator[Tuple[torchvision.datasets.VisionDataset, Dict[str, Any]]]:
+        with super().create_dataset(
+            config=config,
+            inject_fake_data=inject_fake_data,
+            disable_download_extract=disable_download_extract,
+            **kwargs,
+        ) as (dataset, info):
+            with self._eagerly_load_pil_images():
+                yield dataset, info
+
+    @contextlib.contextmanager
+    def _eagerly_load_pil_images(self):
+        lazily_opened_files = set()
+
+        open = PIL.Image.open
+
+        def new(fp, *args, **kwargs):
+            image = open(fp, *args, **kwargs)
+            if isinstance(fp, (str, pathlib.Path)):
+                lazily_opened_files.add(image.fp)
+            return image
+
+        with unittest.mock.patch("torchvision.datasets.caltech.Image.open", new=new):
+            try:
+                yield
+            finally:
+                for fh in lazily_opened_files:
+                    fh.close()
+
 
 class VideoDatasetTestCase(DatasetTestCase):
     """Abstract base class for video dataset testcases.

From c2b3b0a7a7febba06ebccce3a8344adee10671b1 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 16 Feb 2021 15:52:52 +0100
Subject: [PATCH 20/29] fix video folder creation

---
 test/datasets_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 015a91a2672..860aaa44c60 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -581,7 +581,7 @@ def size(idx):
             # The 'libx264' video codec, which is the default of torchvision.io.write_video, requires the height and
             # width of the video to be divisible by 2.
             height, width = (torch.randint(2, 6, size=(2,), dtype=torch.int) * 2).tolist()
-            return (length, num_channels, height, width)
+            return (num_frames, num_channels, height, width)
 
     root = pathlib.Path(root) / name
     os.makedirs(root)

From 6f05ca06207dd2ed017618aa7d422b814afa423e Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 16 Feb 2021 16:27:51 +0100
Subject: [PATCH 21/29] generalize file handle closing

---
 test/datasets_utils.py | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 860aaa44c60..b49f9d9d503 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -9,6 +9,7 @@
 import unittest.mock
 from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple, Union
 
+import PIL
 import PIL.Image
 
 import torch
@@ -381,11 +382,32 @@ def create_dataset(
             disable_download_extract=disable_download_extract,
             **kwargs,
         ) as (dataset, info):
-            with self._eagerly_load_pil_images():
+            # PIL.Image.open() only loads the image meta data upfront and keeps the file open until the first access
+            # to the pixel data occurs. Trying to delete such a file results in an PermissionError on Windows. Thus, we
+            # track all lazily opened images and close the file handle before the file is deleted.
+            # This problem only occurs during testing since some tests, e.g. DatasetTestCase.test_feature_types open an
+            # image, but never use the underlying data. During normal operation it is reasonable to assume that the
+            # user wants to work with the image he just opened rather than deleting the underlying file.
+            with self._close_image_handles():
                 yield dataset, info
 
     @contextlib.contextmanager
-    def _eagerly_load_pil_images(self):
+    def _close_image_handles(self):
+        module = inspect.getmodule(self.DATASET_CLASS)
+
+        def resolve_patch_object():
+            with contextlib.suppress(StopIteration):
+                return next(name for name, attr in vars(module).items() if attr is PIL.Image)
+
+            with contextlib.suppress(StopIteration):
+                name = next(name for name, attr in vars(module).items() if attr is PIL)
+                return f"{name}.Image"
+
+        obj = resolve_patch_object()
+        if not obj:
+            yield
+            return
+
         lazily_opened_files = set()
 
         open = PIL.Image.open
@@ -396,7 +418,7 @@ def new(fp, *args, **kwargs):
                 lazily_opened_files.add(image.fp)
             return image
 
-        with unittest.mock.patch("torchvision.datasets.caltech.Image.open", new=new):
+        with unittest.mock.patch(f"{module.__name__}.{obj}.open", new=new):
             try:
                 yield
             finally:

From d3f92683fb585d35904ad15c94607cfe3552b9bf Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 17 Feb 2021 14:05:46 +0100
Subject: [PATCH 22/29] fix lazy imports

---
 test/datasets_utils.py | 74 +++++++++++++++++++++++++++++++-----------
 1 file changed, 55 insertions(+), 19 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index b49f9d9d503..b67ffc8ee22 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -14,21 +14,14 @@
 
 import torch
 import torchvision.datasets
+import torchvision.io
 
 from common_utils import get_tmp_dir, disable_console_output
 
-try:
-    from torchvision.io import write_video
-
-    PYAV_AVAILABLE = True
-
-except ImportError:
-    write_video = None
-    PYAV_AVAILABLE = False
-
 
 __all__ = [
     "UsageError",
+    "lazy_importer",
     "test_all_configs",
     "DatasetTestCase",
     "ImageDatasetTestCase",
@@ -45,15 +38,58 @@ class UsageError(RuntimeError):
     """Should be raised in case an error happens in the setup rather than the test."""
 
 
-def requires_pyav(fn):
-    @functools.wraps(fn)
-    def wrapper(*args, **kwargs):
-        if not PYAV_AVAILABLE:
-            raise UsageError("PyAV (av) is required but not available.")
+class LazyImporter:
+    r"""Lazy importer for additional dependicies.
 
-        return fn(*args, **kwargs)
+    Some datasets require additional packages that are no direct dependencies of torchvision. Instances of this class
+    provide modules listed in MODULES as attributes. They are only imported when accessed.
 
-    return wrapper
+    """
+    MODULES = (
+        "av",
+        "lmdb",
+        "pandas",
+        "pycocotools",
+        "requests",
+        "scipy.io",
+    )
+
+    def __init__(self):
+        cls = type(self)
+        for module in self.MODULES:
+            # We need the quirky module=module argument to the lambda since otherwise the lookup for module in this
+            # scope happens at runtime rather than at definition. Thus, without it every property would try to import
+            # the last module in MODULES
+            setattr(cls, module.split(".", 1)[0], property(lambda self, module=module: LazyImporter._import(module)))
+
+    @staticmethod
+    def _import(module):
+        try:
+            importlib.import_module(module)
+            return importlib.import_module(module.split(".", 1)[0])
+        except ImportError as error:
+            raise UsageError(
+                f"Failed to import module '{module}'. "
+                f"This probably means that the current test case needs '{module}' installed, "
+                f"but it is not a dependency of torchvision. "
+                f"You need to install it manually, for example 'pip install {module}'."
+            ) from error
+
+
+lazy_importer = LazyImporter()
+
+
+def requires_lazy_imports(*modules):
+    def outer_wrapper(fn):
+        @functools.wraps(fn)
+        def inner_wrapper(*args, **kwargs):
+            for module in modules:
+                getattr(lazy_importer, module.replace(".", "_"))
+            return fn(*args, **kwargs)
+
+        return inner_wrapper
+
+    return outer_wrapper
 
 
 # As of Python 3.7 this is provided by contextlib
@@ -520,7 +556,7 @@ def size(idx: int) -> Tuple[int, int, int]:
     ]
 
 
-@requires_pyav
+@requires_lazy_imports("av")
 def create_video_file(
     root: Union[pathlib.Path, str],
     name: Union[pathlib.Path, str],
@@ -558,11 +594,11 @@ def create_video_file(
 
     video = create_image_or_video_tensor(size)
     file = pathlib.Path(root) / name
-    write_video(str(file), video.permute(0, 2, 3, 1), fps, **kwargs)
+    torchvision.io.write_video(str(file), video.permute(0, 2, 3, 1), fps, **kwargs)
     return file
 
 
-@requires_pyav
+@requires_lazy_imports("av")
 def create_video_folder(
     root: Union[str, pathlib.Path],
     name: Union[str, pathlib.Path],

From e7d1675dfa6e054be4fce681fbc62bd6d4eb193d Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 17 Feb 2021 14:33:41 +0100
Subject: [PATCH 23/29] add test for transforms

---
 test/datasets_utils.py | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index b67ffc8ee22..e47c983ea33 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -167,10 +167,11 @@ class DatasetTestCase(unittest.TestCase):
     1. the dataset raises a ``RuntimeError`` if the data files are not found,
     2. the dataset inherits from `torchvision.datasets.VisionDataset`,
     3. the dataset can be turned into a string,
-    4. the feature types of a returned example matches ``FEATURE_TYPES``, and
-    5. the number of examples matches the injected fake data.
+    4. the feature types of a returned example matches ``FEATURE_TYPES``,
+    5. the number of examples matches the injected fake data, and
+    6. the dataset calls ``transform``, ``target_transform``, or ``transforms`` if available when accessing data.
 
-    Case 3., 4., and 5. are tested against all configurations in ``CONFIGS``.
+    Case 3. to 6. are tested against all configurations in ``CONFIGS``.
 
     To add dataset-specific tests, create a new method that takes no arguments with ``test_`` as a name prefix:
 
@@ -204,10 +205,13 @@ def test_baz(self):
     CONFIGS = None
     REQUIRED_PACKAGES = None
 
-    _SPECIAL_KWARGS = {
+    _TRANSFORM_KWARGS = {
         "transform",
         "target_transform",
         "transforms",
+    }
+    _SPECIAL_KWARGS = {
+        *_TRANSFORM_KWARGS,
         "download",
     }
     _HAS_SPECIAL_KWARG = None
@@ -307,7 +311,7 @@ def _verify_required_public_class_attributes(cls):
     @classmethod
     def _populate_private_class_attributes(cls):
         argspec = inspect.getfullargspec(cls.DATASET_CLASS.__init__)
-        cls._HAS_SPECIAL_KWARG = {name: name in argspec.args for name in cls._SPECIAL_KWARGS}
+        cls._HAS_SPECIAL_KWARG = {name for name in cls._SPECIAL_KWARGS if name in argspec.args}
 
     @classmethod
     def _process_optional_public_class_attributes(cls):
@@ -337,7 +341,7 @@ def _split_kwargs(self, kwargs):
 
     @contextlib.contextmanager
     def _disable_download_extract(self, special_kwargs):
-        inject_download_kwarg = self._HAS_SPECIAL_KWARG["download"] and "download" not in special_kwargs
+        inject_download_kwarg = "download" in self._HAS_SPECIAL_KWARG and "download" not in special_kwargs
         if inject_download_kwarg:
             special_kwargs["download"] = False
 
@@ -395,6 +399,21 @@ def test_num_examples(self, config):
         with self.create_dataset(config) as (dataset, info):
             self.assertEqual(len(dataset), info["num_examples"])
 
+    @test_all_configs
+    def test_transforms(self, config):
+        mock = unittest.mock.Mock(wraps=lambda *args: args[0] if len(args) == 1 else args)
+        for kwarg in self._TRANSFORM_KWARGS:
+            if not kwarg in self._HAS_SPECIAL_KWARG:
+                continue
+
+            mock.reset_mock()
+
+            with self.subTest(kwarg=kwarg):
+                with self.create_dataset(config, **{kwarg: mock}) as (dataset, _):
+                    dataset[0]
+
+                mock.assert_called()
+
 
 class ImageDatasetTestCase(DatasetTestCase):
     """Abstract base class for image dataset testcases.

From 06625331e1fca2326c1757cc48e0f5f9401a07de Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 17 Feb 2021 14:35:42 +0100
Subject: [PATCH 24/29] fix explanation comment

---
 test/datasets_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index e47c983ea33..9b462976d76 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -57,9 +57,9 @@ class LazyImporter:
     def __init__(self):
         cls = type(self)
         for module in self.MODULES:
-            # We need the quirky module=module argument to the lambda since otherwise the lookup for module in this
-            # scope happens at runtime rather than at definition. Thus, without it every property would try to import
-            # the last module in MODULES
+            # We need the quirky 'module=module' argument to the lambda since otherwise the lookup for 'module' in this
+            # scope would happen at runtime rather than at definition. Thus, without it, every property would try to
+            # import the last 'module' in MODULES.
             setattr(cls, module.split(".", 1)[0], property(lambda self, module=module: LazyImporter._import(module)))
 
     @staticmethod

From 9773089541aebcd34c097bf9867cd6cfd300c319 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 17 Feb 2021 14:38:00 +0100
Subject: [PATCH 25/29] lint

---
 test/datasets_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 9b462976d76..8da2b9b63e7 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -403,7 +403,7 @@ def test_num_examples(self, config):
     def test_transforms(self, config):
         mock = unittest.mock.Mock(wraps=lambda *args: args[0] if len(args) == 1 else args)
         for kwarg in self._TRANSFORM_KWARGS:
-            if not kwarg in self._HAS_SPECIAL_KWARG:
+            if kwarg not in self._HAS_SPECIAL_KWARG:
                 continue
 
             mock.reset_mock()

From c517a5a2ba32a2fa031a787fc771abde5876f11d Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 17 Feb 2021 15:49:14 +0100
Subject: [PATCH 26/29] force load opened PIL images

---
 test/datasets_utils.py | 33 ++++++---------------------------
 1 file changed, 6 insertions(+), 27 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 8da2b9b63e7..46df015cbbd 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -439,46 +439,25 @@ def create_dataset(
         ) as (dataset, info):
             # PIL.Image.open() only loads the image meta data upfront and keeps the file open until the first access
             # to the pixel data occurs. Trying to delete such a file results in an PermissionError on Windows. Thus, we
-            # track all lazily opened images and close the file handle before the file is deleted.
+            # force-load opened images.
             # This problem only occurs during testing since some tests, e.g. DatasetTestCase.test_feature_types open an
             # image, but never use the underlying data. During normal operation it is reasonable to assume that the
             # user wants to work with the image he just opened rather than deleting the underlying file.
-            with self._close_image_handles():
+            with self._force_load_images():
                 yield dataset, info
 
     @contextlib.contextmanager
-    def _close_image_handles(self):
-        module = inspect.getmodule(self.DATASET_CLASS)
-
-        def resolve_patch_object():
-            with contextlib.suppress(StopIteration):
-                return next(name for name, attr in vars(module).items() if attr is PIL.Image)
-
-            with contextlib.suppress(StopIteration):
-                name = next(name for name, attr in vars(module).items() if attr is PIL)
-                return f"{name}.Image"
-
-        obj = resolve_patch_object()
-        if not obj:
-            yield
-            return
-
-        lazily_opened_files = set()
-
+    def _force_load_images(self):
         open = PIL.Image.open
 
         def new(fp, *args, **kwargs):
             image = open(fp, *args, **kwargs)
             if isinstance(fp, (str, pathlib.Path)):
-                lazily_opened_files.add(image.fp)
+                image.load()
             return image
 
-        with unittest.mock.patch(f"{module.__name__}.{obj}.open", new=new):
-            try:
-                yield
-            finally:
-                for fh in lazily_opened_files:
-                    fh.close()
+        with unittest.mock.patch(f"PIL.Image.open", new=new):
+            yield
 
 
 class VideoDatasetTestCase(DatasetTestCase):

From 4c1ff7ce1cd2ff461c6f56ef596a39147fd1b8b3 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 17 Feb 2021 15:55:35 +0100
Subject: [PATCH 27/29] lint

---
 test/datasets_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 46df015cbbd..9450488646f 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -456,7 +456,7 @@ def new(fp, *args, **kwargs):
                 image.load()
             return image
 
-        with unittest.mock.patch(f"PIL.Image.open", new=new):
+        with unittest.mock.patch("PIL.Image.open", new=new):
             yield
 
 

From 170f700a17a0ca06cff40dc59b04ba1f607542d4 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 17 Feb 2021 17:43:46 +0100
Subject: [PATCH 28/29] copy default config to avoid inplace modification

---
 test/datasets_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 9450488646f..88c167f2771 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -266,7 +266,7 @@ def create_dataset(
                 for details.
         """
         if config is None:
-            config = self.CONFIGS[0]
+            config = self.CONFIGS[0].copy()
 
         special_kwargs, other_kwargs = self._split_kwargs(kwargs)
         config.update(other_kwargs)

From aba3ee005ea8ca976a56ad4b0d3bb86a1772bfe8 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 17 Feb 2021 17:57:45 +0100
Subject: [PATCH 29/29] enable additional arg forwarding

---
 test/datasets_utils.py | 57 +++++++++++++++++++++++++++++++++---------
 test/test_datasets.py  | 20 +++++++--------
 2 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 88c167f2771..aa3e3f61be3 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -1,3 +1,4 @@
+import collections.abc
 import contextlib
 import functools
 import importlib
@@ -227,16 +228,27 @@ def test_baz(self):
         "download_and_extract_archive",
     }
 
-    def inject_fake_data(self, root: str, config: Dict[str, Any]) -> Dict[str, Any]:
-        """Inject fake data into the root of the dataset.
+    def inject_fake_data(
+        self, tmpdir: str, config: Dict[str, Any]
+    ) -> Union[int, Dict[str, Any], Tuple[Sequence[Any], Union[int, Dict[str, Any]]]]:
+        """Inject fake data for dataset into a temporary directory.
 
         Args:
-            root (str): Root of the dataset.
+            tmpdir (str): Path to a temporary directory. For most cases this acts as root directory for the dataset
+                to be created and in turn also for the fake data injected here.
             config (Dict[str, Any]): Configuration that will be used to create the dataset.
 
-        Returns:
-            info (Dict[str, Any]): Additional information about the injected fake data. Must contain the field
-                ``"num_examples"`` that corresponds to the length of the dataset to be created.
+        Needs to return one of the following:
+
+            1. (int): Number of examples in the dataset to be created,
+            2. (Dict[str, Any]): Additional information about the injected fake data. Must contain the field
+                ``"num_examples"`` that corresponds to the number of examples in the dataset to be created, or
+            3. (Tuple[Sequence[Any], Union[int, Dict[str, Any]]]): Additional required parameters that are passed to
+                the dataset constructor. The second element corresponds to cases 1. and 2.
+
+        If no ``args`` is returned (case 1. and 2.), the ``tmp_dir`` is passed as first parameter to the dataset
+        constructor. In most cases this corresponds to ``root``. If the dataset has more parameters without default
+        values you need to explicitly pass them as explained in case 3.
         """
         raise NotImplementedError("You need to provide fake data in order for the tests to run.")
 
@@ -274,17 +286,38 @@ def create_dataset(
         if disable_download_extract is None:
             disable_download_extract = inject_fake_data
 
-        with get_tmp_dir() as root:
-            info = self.inject_fake_data(root, config) if inject_fake_data else None
-            if info is None or "num_examples" not in info:
+        with get_tmp_dir() as tmpdir:
+            output = self.inject_fake_data(tmpdir, config) if inject_fake_data else None
+            if output is None:
+                raise UsageError(
+                    "The method 'inject_fake_data' needs to return at least an integer indicating the number of "
+                    "examples for the current configuration."
+                )
+
+            if isinstance(output, collections.abc.Sequence) and len(output) == 2:
+                args, info = output
+            else:
+                args = (tmpdir,)
+                info = output
+
+            if isinstance(info, int):
+                info = dict(num_examples=info)
+            elif isinstance(info, dict):
+                if "num_examples" not in info:
+                    raise UsageError(
+                        "The information dictionary returned by the method 'inject_fake_data' must contain a "
+                        "'num_examples' field that holds the number of examples for the current configuration."
+                    )
+            else:
                 raise UsageError(
-                    "The method 'inject_fake_data' needs to return a dictionary that contains at least a "
-                    "'num_examples' field."
+                    f"The additional information returned by the method 'inject_fake_data' must be either an integer "
+                    f"indicating the number of examples for the current configuration or a dictionary with the the "
+                    f"same content. Got {type(info)} instead."
                 )
 
             cm = self._disable_download_extract if disable_download_extract else nullcontext
             with cm(special_kwargs), disable_console_output():
-                dataset = self.DATASET_CLASS(root, **config, **special_kwargs)
+                dataset = self.DATASET_CLASS(*args, **config, **special_kwargs)
 
             yield dataset, info
 
diff --git a/test/test_datasets.py b/test/test_datasets.py
index 11b3f90db2a..8ec5be7de19 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -473,21 +473,21 @@ def test_repr_smoke(self):
 class Caltech256TestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.Caltech256
 
-    def inject_fake_data(self, root, config):
-        root = pathlib.Path(root) / "caltech256" / "256_ObjectCategories"
+    def inject_fake_data(self, tmpdir, config):
+        tmpdir = pathlib.Path(tmpdir) / "caltech256" / "256_ObjectCategories"
 
         categories = ((1, "ak47"), (127, "laptop-101"), (257, "clutter"))
         num_images_per_category = 2
 
         for idx, category in categories:
             datasets_utils.create_image_folder(
-                root,
+                tmpdir,
                 name=f"{idx:03d}.{category}",
-                file_name_fn=lambda image_idx: f"{idx:03d}_{image_idx:04d}.jpg",
+                file_name_fn=lambda image_idx: f"{idx:03d}_{image_idx + 1:04d}.jpg",
                 num_examples=num_images_per_category,
             )
 
-        return dict(num_examples=num_images_per_category * len(categories))
+        return num_images_per_category * len(categories)
 
 
 class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase):
@@ -504,15 +504,15 @@ class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase):
         categories_key="label_names",
     )
 
-    def inject_fake_data(self, root, config):
-        root = pathlib.Path(root) / self._VERSION_CONFIG["base_folder"]
-        os.makedirs(root)
+    def inject_fake_data(self, tmpdir, config):
+        tmpdir = pathlib.Path(tmpdir) / self._VERSION_CONFIG["base_folder"]
+        os.makedirs(tmpdir)
 
         num_images_per_file = 1
         for name in itertools.chain(self._VERSION_CONFIG["train_files"], self._VERSION_CONFIG["test_files"]):
-            self._create_batch_file(root, name, num_images_per_file)
+            self._create_batch_file(tmpdir, name, num_images_per_file)
 
-        categories = self._create_meta_file(root)
+        categories = self._create_meta_file(tmpdir)
 
         return dict(
             num_examples=num_images_per_file