Add tests for UCF101 (#3411)

zhangguanheng66 · fmassa · facebook-github-bot · commit d98711d01c1d · 2021-03-04T07:04:14.000-08:00
Summary:
* enable default frames per clips for video test cases

* add tests for UCF101

* remove old tests as well as fake data generation

* better explain frames_per_clip overriding

* lint

Reviewed By: fmassa

Differential Revision: D26756269

fbshipit-source-id: c3a6ae69a0e3155864bd3d09556a99f7f6771953

Co-authored-by: Francisco Massa &lt;fvsmassa@gmail.com&gt;
diff --git a/test/datasets_utils.py b/test/datasets_utils.py
@@ -496,14 +496,44 @@ def new(fp, *args, **kwargs):
 class VideoDatasetTestCase(DatasetTestCase):
     """Abstract base class for video dataset testcases.
 
-    - Overwrites the FEATURE_TYPES class attribute to expect two :class:`torch.Tensor` s for the video and audio as
+    - Overwrites the 'FEATURE_TYPES' class attribute to expect two :class:`torch.Tensor` s for the video and audio as
       well as an integer label.
-    - Overwrites the REQUIRED_PACKAGES class attribute to require PyAV (``av``).
+    - Overwrites the 'REQUIRED_PACKAGES' class attribute to require PyAV (``av``).
+    - Adds the 'DEFAULT_FRAMES_PER_CLIP' class attribute. If no 'frames_per_clip' is provided by 'inject_fake_data()'
+        and it is the last parameter without a default value in the dataset constructor, the value of the
+        'DEFAULT_FRAMES_PER_CLIP' class attribute is appended to the output.
     """
 
     FEATURE_TYPES = (torch.Tensor, torch.Tensor, int)
     REQUIRED_PACKAGES = ("av",)
 
+    DEFAULT_FRAMES_PER_CLIP = 1
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.inject_fake_data = self._set_default_frames_per_clip(self.inject_fake_data)
+
+    def _set_default_frames_per_clip(self, inject_fake_data):
+        argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__)
+        args_without_default = argspec.args[1:-len(argspec.defaults)]
+        frames_per_clip_last = args_without_default[-1] == "frames_per_clip"
+        only_root_and_frames_per_clip = (len(args_without_default) == 2) and frames_per_clip_last
+
+        @functools.wraps(inject_fake_data)
+        def wrapper(tmpdir, config):
+            output = inject_fake_data(tmpdir, config)
+            if isinstance(output, collections.abc.Sequence) and len(output) == 2:
+                args, info = output
+                if frames_per_clip_last and len(args) == len(args_without_default) - 1:
+                    args = (*args, self.DEFAULT_FRAMES_PER_CLIP)
+                    return args, info
+            elif isinstance(output, (int, dict)) and only_root_and_frames_per_clip:
+                return (tmpdir, self.DEFAULT_FRAMES_PER_CLIP)
+            else:
+                return output
+
+        return wrapper
+
 
 def create_image_or_video_tensor(size: Sequence[int]) -> torch.Tensor:
     r"""Create a random uint8 tensor.
diff --git a/test/fakedata_generation.py b/test/fakedata_generation.py
@@ -369,50 +369,6 @@ def _make_mat(file):
         yield root
 
 
-@contextlib.contextmanager
-def ucf101_root():
-    with get_tmp_dir() as tmp_dir:
-        ucf_dir = os.path.join(tmp_dir, 'UCF-101')
-        video_dir = os.path.join(ucf_dir, 'video')
-        annotations = os.path.join(ucf_dir, 'annotations')
-
-        os.makedirs(ucf_dir)
-        os.makedirs(video_dir)
-        os.makedirs(annotations)
-
-        fold_files = []
-        for split in {'train', 'test'}:
-            for fold in range(1, 4):
-                fold_file = '{:s}list{:02d}.txt'.format(split, fold)
-                fold_files.append(os.path.join(annotations, fold_file))
-
-        file_handles = [open(x, 'w') for x in fold_files]
-        file_iter = cycle(file_handles)
-
-        for i in range(0, 2):
-            current_class = 'class_{0}'.format(i + 1)
-            class_dir = os.path.join(video_dir, current_class)
-            os.makedirs(class_dir)
-            for group in range(0, 3):
-                for clip in range(0, 4):
-                    # Save sample file
-                    clip_name = 'v_{0}_g{1}_c{2}.avi'.format(
-                        current_class, group, clip)
-                    clip_path = os.path.join(class_dir, clip_name)
-                    length = random.randrange(10, 21)
-                    this_clip = torch.randint(
-                        0, 256, (length * 25, 320, 240, 3), dtype=torch.uint8)
-                    write_video(clip_path, this_clip, 25)
-                    # Add to annotations
-                    ann_file = next(file_iter)
-                    ann_file.write('{0}\n'.format(
-                        os.path.join(current_class, clip_name)))
-        # Close all file descriptors
-        for f in file_handles:
-            f.close()
-        yield (video_dir, annotations)
-
-
 @contextlib.contextmanager
 def places365_root(split="train-standard", small=False):
     VARIANTS = {
diff --git a/test/test_datasets.py b/test/test_datasets.py
@@ -11,7 +11,7 @@
 from torchvision.datasets import utils
 from common_utils import get_tmp_dir
 from fakedata_generation import mnist_root, cifar_root, imagenet_root, \
-    cityscapes_root, svhn_root, ucf101_root, places365_root, widerface_root, stl10_root
+    cityscapes_root, svhn_root, places365_root, widerface_root, stl10_root
 import xml.etree.ElementTree as ET
 from urllib.request import Request, urlopen
 import itertools
@@ -22,6 +22,7 @@
 import torch
 import shutil
 import json
+import random
 
 
 try:
@@ -261,29 +262,6 @@ def test_svhn(self, mock_check):
             dataset = torchvision.datasets.SVHN(root, split="extra")
             self.generic_classification_dataset_test(dataset, num_images=2)
 
-    @unittest.skipIf(not HAS_PYAV, "PyAV unavailable")
-    def test_ucf101(self):
-        cached_meta_data = None
-        with ucf101_root() as (root, ann_root):
-            for split in {True, False}:
-                for fold in range(1, 4):
-                    for length in {10, 15, 20}:
-                        dataset = torchvision.datasets.UCF101(root, ann_root, length, fold=fold, train=split,
-                                                              num_workers=2, _precomputed_metadata=cached_meta_data)
-                        if cached_meta_data is None:
-                            cached_meta_data = dataset.metadata
-                        self.assertGreater(len(dataset), 0)
-
-                        video, audio, label = dataset[0]
-                        self.assertEqual(video.size(), (length, 320, 240, 3))
-                        self.assertEqual(audio.numel(), 0)
-                        self.assertEqual(label, 0)
-
-                        video, audio, label = dataset[len(dataset) - 1]
-                        self.assertEqual(video.size(), (length, 320, 240, 3))
-                        self.assertEqual(audio.numel(), 0)
-                        self.assertEqual(label, 1)
-
     def test_places365(self):
         for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)):
             with places365_root(split=split, small=small) as places365:
@@ -905,5 +883,56 @@ def test_captions(self):
             self.assertEqual(tuple(captions), tuple(info["captions"]))
 
 
+class UCF101TestCase(datasets_utils.VideoDatasetTestCase):
+    DATASET_CLASS = datasets.UCF101
+
+    CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False))
+
+    def inject_fake_data(self, tmpdir, config):
+        tmpdir = pathlib.Path(tmpdir)
+
+        video_folder = tmpdir / "videos"
+        os.makedirs(video_folder)
+        video_files = self._create_videos(video_folder)
+
+        annotations_folder = annotations_folder = tmpdir / "annotations"
+        os.makedirs(annotations_folder)
+        num_examples = self._create_annotation_files(annotations_folder, video_files, config["fold"], config["train"])
+
+        return (str(video_folder), str(annotations_folder)), num_examples
+
+    def _create_videos(self, root, num_examples_per_class=3):
+        def file_name_fn(cls, idx, clips_per_group=2):
+            return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi"
+
+        video_files = [
+            datasets_utils.create_video_folder(root, cls, lambda idx: file_name_fn(cls, idx), num_examples_per_class)
+            for cls in ("ApplyEyeMakeup", "YoYo")
+        ]
+        return [path.relative_to(root) for path in itertools.chain(*video_files)]
+
+    def _create_annotation_files(self, root, video_files, fold, train):
+        current_videos = random.sample(video_files, random.randrange(1, len(video_files) - 1))
+        current_annotation = self._annotation_file_name(fold, train)
+        self._create_annotation_file(root, current_annotation, current_videos)
+
+        other_videos = set(video_files) - set(current_videos)
+        other_annotations = [
+            self._annotation_file_name(fold, train) for fold, train in itertools.product((1, 2, 3), (True, False))
+        ]
+        other_annotations.remove(current_annotation)
+        for name in other_annotations:
+            self._create_annotation_file(root, name, other_videos)
+
+        return len(current_videos)
+
+    def _annotation_file_name(self, fold, train):
+        return f"{'train' if train else 'test'}list{fold:02d}.txt"
+
+    def _create_annotation_file(self, root, name, video_files):
+        with open(pathlib.Path(root) / name, "w") as fh:
+            fh.writelines(f"{file}\n" for file in sorted(video_files))
+
+
 if __name__ == "__main__":
     unittest.main()