From 5e60e01c88d808143fa554664cfc613196bd2ea2 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 16 Apr 2021 15:36:19 -0500 Subject: [PATCH 01/61] Initial commit --- torchvision/datasets/__init__.py | 4 +- torchvision/datasets/kinetics.py | 300 +++++++++++++++++++++++++++++-- 2 files changed, 284 insertions(+), 20 deletions(-) diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index b60fc7c7964..e67ba08d299 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -20,7 +20,7 @@ from .sbd import SBDataset from .vision import VisionDataset from .usps import USPS -from .kinetics import Kinetics400 +from .kinetics import Kinetics400, Kinetics from .hmdb51 import HMDB51 from .ucf101 import UCF101 from .places365 import Places365 @@ -34,6 +34,6 @@ 'Omniglot', 'SBU', 'Flickr8k', 'Flickr30k', 'VOCSegmentation', 'VOCDetection', 'Cityscapes', 'ImageNet', 'Caltech101', 'Caltech256', 'CelebA', 'WIDERFace', 'SBDataset', - 'VisionDataset', 'USPS', 'Kinetics400', 'HMDB51', 'UCF101', + 'VisionDataset', 'USPS', 'Kinetics400', "Kinetics", 'HMDB51', 'UCF101', 'Places365', 'Kitti', ) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index a8986986c17..d96d7fc09e4 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -1,15 +1,30 @@ -from .utils import list_dir +import urllib +import time +import os +import sys +import warnings + + +from os import path +import pandas as pd +from typing import Callable, Optional +from functools import partial +from multiprocessing import Pool + +from .utils import download_and_extract_archive from .folder import find_classes, make_dataset from .video_utils import VideoClips from .vision import VisionDataset -class Kinetics400(VisionDataset): - """ - `Kinetics-400 `_ +def _dl_wrap(tarpath, videopath, line): + download_and_extract_archive(line, tarpath, videopath) + +class Kinetics(VisionDataset): + """` Generic Kinetics `_ dataset. - Kinetics-400 is an action recognition video dataset. + Kinetics-400/600/700 are action recognition video datasets. This dataset consider every video as a collection of video clips of fixed size, specified by ``frames_per_clip``, where the step in frames between each clip is given by ``step_between_clips``. @@ -20,11 +35,9 @@ class Kinetics400(VisionDataset): Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all frames in a video might be present. - Internally, it uses a VideoClips object to handle clip creation. - Args: - root (string): Root directory of the Kinetics-400 Dataset. Should be structured as follows: - + root (string): Root directory of the (split of the) Kinetics Dataset. + Directory should be structured as follows: .. code:: root/ @@ -35,29 +48,92 @@ class Kinetics400(VisionDataset): └── class2 ├── clipx.avi └── ... - + If the split is not defined, it is appended using the split argument. + n_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700 + split (str): split of the dataset to consider; currently supports ["train", "val"] + frame_rate (float): If not None, interpolate different frame rate for each clip. frames_per_clip (int): number of frames in a clip step_between_clips (int): number of frames between each clip + annotation_path (str): path to official Kinetics annotation file. transform (callable, optional): A function/transform that takes in a TxHxWxC video and returns a transformed version. + download (bool): Download the official version of the dataset to root folder. + num_workers (int): Use multiple workers for VideoClips creation + _num_download_workers (int): Use multiprocessing in order to speed up download. Returns: tuple: A 3-tuple with the following entries: - - video (Tensor[T, H, W, C]): the `T` video frames + - video (Tensor[T, H, W, C]): the `T` video frames in torch.uint8 tensor - audio(Tensor[K, L]): the audio frames, where `K` is the number of channels - and `L` is the number of points + and `L` is the number of points in torch.float tensor - label (int): class of the video clip + + Raises: + RuntimeError: If ``download is True`` and the image archive is already extracted. """ - def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None, - extensions=('avi',), transform=None, _precomputed_metadata=None, - num_workers=1, _video_width=0, _video_height=0, - _video_min_dimension=0, _audio_samples=0, _audio_channels=0): - super(Kinetics400, self).__init__(root) + _FILES = { + 400: "https://s3.amazonaws.com/kinetics/400/{split}/k400_{split}_path.txt", + 600: "https://s3.amazonaws.com/kinetics/600/{split}/k600_{split}_path.txt", + 700: "https://s3.amazonaws.com/kinetics/700_2020/{split}/k700_2020_{split}_path.txt", + } + _ANNOTATION = { + 400: "https://s3.amazonaws.com/kinetics/400/annotations/{split}.csv", + 600: "https://s3.amazonaws.com/kinetics/600/annotations/{split}.txt", + 700: "https://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csv", + } + + def __init__( + self, + root: str, + n_classes: int = 400, + split: str = "train", + frame_rate: float = None, + frames_per_clip: int = 5, + step_between_clips: int = 1, + annotation_path: str = None, + transform: Optional[Callable] = None, + extensions=("avi", "mp4"), + download: bool = False, + num_workers: int = 1, + _precomputed_metadata=None, + _num_download_workers=1, + _video_width=0, + _video_height=0, + _video_min_dimension=0, + _audio_samples=0, + _audio_channels=0, + ) -> None: + + # TODO: support test + assert split in ["train", "val"] + assert n_classes in [400, 700] + self.n_classes = n_classes + self.extensions = extensions + self._num_download_workers = _num_download_workers + + # set up self.root and self.split + self._set_up_paths(root, split) + # load annotation files + if annotation_path is not None: + self.annotations = pd.read_csv(annotation_path) + else: + self.annotations = pd.read_csv( + self._ANNOTATION[self.n_classes].format(split=self.split) + ) + + if download: + self.download_and_process_videos() + # init folder dataset at the end + super().__init__(self.root) + + # and then figure out the rest self.classes, class_to_idx = find_classes(self.root) - self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) + self.samples = make_dataset( + self.root, class_to_idx, extensions, is_valid_file=None + ) video_list = [x[0] for x in self.samples] self.video_clips = VideoClips( video_list, @@ -74,6 +150,106 @@ def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None, ) self.transform = transform + def _set_up_paths(self, root, split) -> None: + """Sets up self.root and self.split to avoid confusion. + Split in the root (e.g. kinetics/val) overrides the setting in + the split. + """ + self.split = split + if path.basename(root) == split: + self.root = root + elif path.basename(root) in ["train", "val"]: + self.root = root + self.split = path.basename(root) + warnings.warn( + f"Root {root} points to a different split than {split}." + f"Assigning self.split to {self.split}." + ) + else: + self.root = path.join(root, split) + + def download_and_process_videos(self) -> None: + """ + downloads all the videos to the _root_ folder + in the expected format + """ + tic = time.time() + _ = self._download_videos() + toc = time.time() + print("Elapsed time for downloading in mins ", (toc - tic) / 60) + self._make_ds_structure() + toc2 = time.time() + print("Elapsed time for processing in mins ", (toc2 - toc) / 60) + print("Elapsed time overall in mins ", (toc2 - tic) / 60) + + def _download_videos(self) -> int: + """download tarballs containing the video to + "tars" folder and extract them into the _split_ folder + where split is one of the official dataset splits. + + Raises: + RuntimeError: if download folder exists, break to prevent + downloading entire dataset again. + """ + if path.exists(self.root): + raise RuntimeError( + f"The directory {self.root} already exists. If you want to re-download or re-extract the images, " + f"delete the directory." + ) + + file_url = urllib.request.urlopen( + self._FILES[self.n_classes].format(split=self.split) + ) + kinetics_dir, _ = path.split(self.root) + tar_path = path.join(kinetics_dir, "tars") + + if self._num_download_workers < 2: + for line in file_url: + line = str(line.decode("utf-8")).replace("\n", "") + dl_wrap(tar_path, self.root, line) + else: + part = partial(_dl_wrap, tar_path, self.root) + lines = [str(line.decode("utf-8")).replace("\n", "") for line in file_url] + poolproc = Pool(self._num_download_workers) + poolproc.map(part, lines) + return 0 + + + + def _make_ds_structure(self): + """move videos from + root/ + ├── clip1.avi + ├── clip2.avi + + to the correct format as described below: + root/ + ├── class1 + │ ├── clip1.avi + + """ + for file in os.listdir(self.root): + if file.endswith(self.extensions): + ytid = file[:11] + try: + df = self.annotations[self.annotations.youtube_id == ytid] + label = ( + df.label.item() + .replace(" ", "_") + .replace("'", "") + .replace("(", "") + .replace(")", "") + ) + os.makedirs(os.path.join(self.root, label), exist_ok=True) + os.replace( + os.path.join(self.root, file), + os.path.join(self.root, label, file), + ) + except: + warnings.warn( + f"Unexpected error while processing {ytid}:", sys.exc_info()[0] + ) + @property def metadata(self): return self.video_clips.metadata @@ -89,3 +265,91 @@ def __getitem__(self, idx): video = self.transform(video) return video, audio, label + + +class Kinetics400(Kinetics): + """ + `Kinetics-400 `_ + dataset. + + Kinetics-400 is an action recognition video dataset. + This dataset consider every video as a collection of video clips of fixed size, specified + by ``frames_per_clip``, where the step in frames between each clip is given by + ``step_between_clips``. + + To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5`` + and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two + elements will come from video 1, and the next three elements from video 2. + Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all + frames in a video might be present. + + Internally, it uses a VideoClips object to handle clip creation. + + Args: + root (string): Root directory of the Kinetics-400 Dataset. Should be structured as follows: + + .. code:: + + root/ + ├── class1 + │ ├── clip1.avi + │ ├── clip2.avi + │ └── ... + └── class2 + ├── clipx.avi + └── ... + + frames_per_clip (int): number of frames in a clip + step_between_clips (int): number of frames between each clip + transform (callable, optional): A function/transform that takes in a TxHxWxC video + and returns a transformed version. + + Returns: + tuple: A 3-tuple with the following entries: + + - video (Tensor[T, H, W, C]): the `T` video frames + - audio(Tensor[K, L]): the audio frames, where `K` is the number of channels + and `L` is the number of points + - label (int): class of the video clip + """ + + def __init__( + self, + root, + frames_per_clip, + step_between_clips=1, + frame_rate=None, + extensions=("avi",), + transform=None, + _precomputed_metadata=None, + num_workers=1, + _video_width=0, + _video_height=0, + _video_min_dimension=0, + _audio_samples=0, + _audio_channels=0, + ): + warnings.warn( + "torchvision now supports multiple versions of Kinetics" + "datasets, available via Kinetics class with a separate " + "n_classes parameter. This function might get deprecated in the future." + ) + + super(Kinetics400, self).__init__( + root=root, + n_classes=400, + frame_rate=frame_rate, + step_between_clips=step_between_clips, + frames_per_clip=frames_per_clip, + extensions=extensions, + transform=transform, + _precomputed_metadata=_precomputed_metadata, + num_workers=num_workers, + _video_width=_video_width, + _video_height=_video_height, + _video_min_dimension=_video_min_dimension, + _audio_channels=_audio_channels, + _audio_samples=_audio_samples, + download=False, + ) + From 62c5e0595d8309b77475399ba5ba692b8e8c8633 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 20 Apr 2021 14:48:24 +0100 Subject: [PATCH 02/61] pmeiers comments Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index d96d7fc09e4..3e5e38bca67 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -245,9 +245,9 @@ def _make_ds_structure(self): os.path.join(self.root, file), os.path.join(self.root, label, file), ) - except: + except Exception as error: warnings.warn( - f"Unexpected error while processing {ytid}:", sys.exc_info()[0] + f"Unexpected error while processing {ytid}: {error}" ) @property @@ -352,4 +352,3 @@ def __init__( _audio_samples=_audio_samples, download=False, ) - From 090e526e1d078d030bf969b937e24bd441c256a5 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 20 Apr 2021 14:48:50 +0100 Subject: [PATCH 03/61] pmeiers changes Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 3e5e38bca67..29e43f1364a 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -203,7 +203,7 @@ def _download_videos(self) -> int: kinetics_dir, _ = path.split(self.root) tar_path = path.join(kinetics_dir, "tars") - if self._num_download_workers < 2: + if self._num_download_workers > 1: for line in file_url: line = str(line.decode("utf-8")).replace("\n", "") dl_wrap(tar_path, self.root, line) From 9403bcf74c83d963211930deb141c0599a70cb36 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 20 Apr 2021 14:49:11 +0100 Subject: [PATCH 04/61] pmeiers comments Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 29e43f1364a..7841f5a555b 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -87,7 +87,7 @@ class Kinetics(VisionDataset): def __init__( self, root: str, - n_classes: int = 400, + num_classes: str = "400", split: str = "train", frame_rate: float = None, frames_per_clip: int = 5, From e08cf092b6e6004aad9ebab79d85f01534d1e171 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Thu, 22 Apr 2021 12:24:47 -0500 Subject: [PATCH 05/61] replace pandas with system library to avoid crashes --- torchvision/datasets/kinetics.py | 76 ++++++++++++++------------------ 1 file changed, 33 insertions(+), 43 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 7841f5a555b..a97a2dcb29e 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -1,17 +1,16 @@ import urllib import time import os -import sys import warnings from os import path -import pandas as pd +import csv from typing import Callable, Optional from functools import partial from multiprocessing import Pool -from .utils import download_and_extract_archive +from .utils import download_and_extract_archive, download_url from .folder import find_classes, make_dataset from .video_utils import VideoClips from .vision import VisionDataset @@ -74,14 +73,14 @@ class Kinetics(VisionDataset): """ _FILES = { - 400: "https://s3.amazonaws.com/kinetics/400/{split}/k400_{split}_path.txt", - 600: "https://s3.amazonaws.com/kinetics/600/{split}/k600_{split}_path.txt", - 700: "https://s3.amazonaws.com/kinetics/700_2020/{split}/k700_2020_{split}_path.txt", + "400": "https://s3.amazonaws.com/kinetics/400/{split}/k400_{split}_path.txt", + "600": "https://s3.amazonaws.com/kinetics/600/{split}/k600_{split}_path.txt", + "700": "https://s3.amazonaws.com/kinetics/700_2020/{split}/k700_2020_{split}_path.txt", } _ANNOTATION = { - 400: "https://s3.amazonaws.com/kinetics/400/annotations/{split}.csv", - 600: "https://s3.amazonaws.com/kinetics/600/annotations/{split}.txt", - 700: "https://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csv", + "400": "https://s3.amazonaws.com/kinetics/400/annotations/{split}.csv", + "600": "https://s3.amazonaws.com/kinetics/600/annotations/{split}.txt", + "700": "https://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csv", } def __init__( @@ -108,21 +107,16 @@ def __init__( # TODO: support test assert split in ["train", "val"] - assert n_classes in [400, 700] - self.n_classes = n_classes + assert num_classes in ["400", "600", "700"] + self.n_classes = num_classes self.extensions = extensions self._num_download_workers = _num_download_workers - # set up self.root and self.split self._set_up_paths(root, split) - # load annotation files if annotation_path is not None: - self.annotations = pd.read_csv(annotation_path) - else: - self.annotations = pd.read_csv( - self._ANNOTATION[self.n_classes].format(split=self.split) - ) + self.annotations = annotation_path + if download: self.download_and_process_videos() @@ -174,7 +168,7 @@ def download_and_process_videos(self) -> None: in the expected format """ tic = time.time() - _ = self._download_videos() + self._download_videos() toc = time.time() print("Elapsed time for downloading in mins ", (toc - tic) / 60) self._make_ds_structure() @@ -182,7 +176,7 @@ def download_and_process_videos(self) -> None: print("Elapsed time for processing in mins ", (toc2 - toc) / 60) print("Elapsed time overall in mins ", (toc2 - tic) / 60) - def _download_videos(self) -> int: + def _download_videos(self) -> None: """download tarballs containing the video to "tars" folder and extract them into the _split_ folder where split is one of the official dataset splits. @@ -202,17 +196,21 @@ def _download_videos(self) -> int: ) kinetics_dir, _ = path.split(self.root) tar_path = path.join(kinetics_dir, "tars") + annotation_path = path.join(kinetics_dir, "annotations") + + # download annotations + download_url(self._ANNOTATION[self.n_classes].format(split=self.split), annotation_path) + self.annotations = os.path.join(annotation_path, f"{self.split}.csv") - if self._num_download_workers > 1: + if self._num_download_workers == 1: for line in file_url: line = str(line.decode("utf-8")).replace("\n", "") - dl_wrap(tar_path, self.root, line) + download_and_extract_archive(line, tar_path, self.root) else: part = partial(_dl_wrap, tar_path, self.root) lines = [str(line.decode("utf-8")).replace("\n", "") for line in file_url] poolproc = Pool(self._num_download_workers) poolproc.map(part, lines) - return 0 @@ -228,27 +226,19 @@ def _make_ds_structure(self): │ ├── clip1.avi """ - for file in os.listdir(self.root): - if file.endswith(self.extensions): - ytid = file[:11] - try: - df = self.annotations[self.annotations.youtube_id == ytid] - label = ( - df.label.item() - .replace(" ", "_") - .replace("'", "") - .replace("(", "") - .replace(")", "") - ) - os.makedirs(os.path.join(self.root, label), exist_ok=True) + file_tmp = "{ytid}_{start:06}_{end:06}.mp4" + with open(self.annotations) as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + f = file_tmp.format(ytid=row['youtube_id'],start=int(row['time_start']), end=int(row['time_end'])) + label = row["label"].replace(" ", "_").replace("'", "").replace("(", "").replace(")", "") + os.makedirs(os.path.join(self.root, label), exist_ok=True) + existing_file = os.path.join(self.root, f) + if os.path.isfile(existing_file): os.replace( - os.path.join(self.root, file), - os.path.join(self.root, label, file), - ) - except Exception as error: - warnings.warn( - f"Unexpected error while processing {ytid}: {error}" - ) + existing_file, + os.path.join(self.root, label, f), + ) @property def metadata(self): From 29a4f038d3bcc3737a149f9938f86629f7c3dffb Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Thu, 22 Apr 2021 12:43:01 -0500 Subject: [PATCH 06/61] Lint --- torchvision/datasets/kinetics.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index a97a2dcb29e..ee4909a43db 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -19,6 +19,7 @@ def _dl_wrap(tarpath, videopath, line): download_and_extract_archive(line, tarpath, videopath) + class Kinetics(VisionDataset): """` Generic Kinetics `_ dataset. @@ -116,7 +117,6 @@ def __init__( if annotation_path is not None: self.annotations = annotation_path - if download: self.download_and_process_videos() @@ -199,7 +199,9 @@ def _download_videos(self) -> None: annotation_path = path.join(kinetics_dir, "annotations") # download annotations - download_url(self._ANNOTATION[self.n_classes].format(split=self.split), annotation_path) + download_url( + self._ANNOTATION[self.n_classes].format(split=self.split), annotation_path + ) self.annotations = os.path.join(annotation_path, f"{self.split}.csv") if self._num_download_workers == 1: @@ -212,8 +214,6 @@ def _download_videos(self) -> None: poolproc = Pool(self._num_download_workers) poolproc.map(part, lines) - - def _make_ds_structure(self): """move videos from root/ @@ -230,15 +230,24 @@ def _make_ds_structure(self): with open(self.annotations) as csvfile: reader = csv.DictReader(csvfile) for row in reader: - f = file_tmp.format(ytid=row['youtube_id'],start=int(row['time_start']), end=int(row['time_end'])) - label = row["label"].replace(" ", "_").replace("'", "").replace("(", "").replace(")", "") + f = file_tmp.format( + ytid=row["youtube_id"], + start=int(row["time_start"]), + end=int(row["time_end"]), + ) + label = ( + row["label"] + .replace(" ", "_") + .replace("'", "") + .replace("(", "") + .replace(")", "") + ) os.makedirs(os.path.join(self.root, label), exist_ok=True) existing_file = os.path.join(self.root, f) if os.path.isfile(existing_file): os.replace( - existing_file, - os.path.join(self.root, label, f), - ) + existing_file, os.path.join(self.root, label, f), + ) @property def metadata(self): @@ -327,7 +336,7 @@ def __init__( super(Kinetics400, self).__init__( root=root, - n_classes=400, + num_classes="400", frame_rate=frame_rate, step_between_clips=step_between_clips, frames_per_clip=frames_per_clip, From 8cd5209be9d92208ab540c3c8a6dca24bbcd2c78 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Thu, 22 Apr 2021 12:51:29 -0500 Subject: [PATCH 07/61] Lint --- torchvision/datasets/kinetics.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index ee4909a43db..77a767b43c9 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -36,7 +36,7 @@ class Kinetics(VisionDataset): frames in a video might be present. Args: - root (string): Root directory of the (split of the) Kinetics Dataset. + root (string): Root directory of the (split of the) Kinetics Dataset. Directory should be structured as follows: .. code:: @@ -49,7 +49,7 @@ class Kinetics(VisionDataset): ├── clipx.avi └── ... If the split is not defined, it is appended using the split argument. - n_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700 + n_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700 split (str): split of the dataset to consider; currently supports ["train", "val"] frame_rate (float): If not None, interpolate different frame rate for each clip. frames_per_clip (int): number of frames in a clip @@ -68,7 +68,7 @@ class Kinetics(VisionDataset): - audio(Tensor[K, L]): the audio frames, where `K` is the number of channels and `L` is the number of points in torch.float tensor - label (int): class of the video clip - + Raises: RuntimeError: If ``download is True`` and the image archive is already extracted. """ @@ -146,7 +146,7 @@ def __init__( def _set_up_paths(self, root, split) -> None: """Sets up self.root and self.split to avoid confusion. - Split in the root (e.g. kinetics/val) overrides the setting in + Split in the root (e.g. kinetics/val) overrides the setting in the split. """ self.split = split @@ -177,7 +177,7 @@ def download_and_process_videos(self) -> None: print("Elapsed time overall in mins ", (toc2 - tic) / 60) def _download_videos(self) -> None: - """download tarballs containing the video to + """download tarballs containing the video to "tars" folder and extract them into the _split_ folder where split is one of the official dataset splits. @@ -215,11 +215,11 @@ def _download_videos(self) -> None: poolproc.map(part, lines) def _make_ds_structure(self): - """move videos from + """move videos from root/ ├── clip1.avi ├── clip2.avi - + to the correct format as described below: root/ ├── class1 From a6d2490023fb3f8ad711a87b6ad8a83046feafa7 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Thu, 22 Apr 2021 13:40:39 -0500 Subject: [PATCH 08/61] fixing unittest --- torchvision/datasets/kinetics.py | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 77a767b43c9..95aaffbf9d7 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -113,7 +113,8 @@ def __init__( self.extensions = extensions self._num_download_workers = _num_download_workers - self._set_up_paths(root, split) + self.root = root + self.split = split if annotation_path is not None: self.annotations = annotation_path @@ -144,24 +145,6 @@ def __init__( ) self.transform = transform - def _set_up_paths(self, root, split) -> None: - """Sets up self.root and self.split to avoid confusion. - Split in the root (e.g. kinetics/val) overrides the setting in - the split. - """ - self.split = split - if path.basename(root) == split: - self.root = root - elif path.basename(root) in ["train", "val"]: - self.root = root - self.split = path.basename(root) - warnings.warn( - f"Root {root} points to a different split than {split}." - f"Assigning self.split to {self.split}." - ) - else: - self.root = path.join(root, split) - def download_and_process_videos(self) -> None: """ downloads all the videos to the _root_ folder From 93d1444dec084f412912d7522aa01544220bb74f Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Thu, 22 Apr 2021 22:50:05 +0100 Subject: [PATCH 09/61] Minor comments removal --- torchvision/datasets/kinetics.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 95aaffbf9d7..3bf4d9f9bb0 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -121,10 +121,8 @@ def __init__( if download: self.download_and_process_videos() - # init folder dataset at the end super().__init__(self.root) - # and then figure out the rest self.classes, class_to_idx = find_classes(self.root) self.samples = make_dataset( self.root, class_to_idx, extensions, is_valid_file=None From 9e3d3f3a0f39c8bfc187eff4e79a9920aed29a89 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 10:26:11 +0100 Subject: [PATCH 10/61] pmeier comments Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 3bf4d9f9bb0..fa3b1f25144 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -179,7 +179,6 @@ def _download_videos(self) -> None: tar_path = path.join(kinetics_dir, "tars") annotation_path = path.join(kinetics_dir, "annotations") - # download annotations download_url( self._ANNOTATION[self.n_classes].format(split=self.split), annotation_path ) From 139ec6d0623e341964fd713e895e4468c7f4dea0 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 04:30:29 -0500 Subject: [PATCH 11/61] remove asserts --- torchvision/datasets/kinetics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 95aaffbf9d7..60b758f7f68 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -10,7 +10,7 @@ from functools import partial from multiprocessing import Pool -from .utils import download_and_extract_archive, download_url +from .utils import download_and_extract_archive, download_url, verify_str_arg from .folder import find_classes, make_dataset from .video_utils import VideoClips from .vision import VisionDataset @@ -107,8 +107,8 @@ def __init__( ) -> None: # TODO: support test - assert split in ["train", "val"] - assert num_classes in ["400", "600", "700"] + verify_str_arg(split, arg="split", valid_values=['train', 'val']) + verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"]) self.n_classes = num_classes self.extensions = extensions self._num_download_workers = _num_download_workers From 33a9f98644203a174db10c69a9008f931d287652 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 04:32:55 -0500 Subject: [PATCH 12/61] address pmeier formatting changes --- torchvision/datasets/kinetics.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 60b758f7f68..a241fac0d79 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -73,12 +73,12 @@ class Kinetics(VisionDataset): RuntimeError: If ``download is True`` and the image archive is already extracted. """ - _FILES = { + _TAR_URLS = { "400": "https://s3.amazonaws.com/kinetics/400/{split}/k400_{split}_path.txt", "600": "https://s3.amazonaws.com/kinetics/600/{split}/k600_{split}_path.txt", "700": "https://s3.amazonaws.com/kinetics/700_2020/{split}/k700_2020_{split}_path.txt", } - _ANNOTATION = { + _ANNOTATION_URLS = { "400": "https://s3.amazonaws.com/kinetics/400/annotations/{split}.csv", "600": "https://s3.amazonaws.com/kinetics/600/annotations/{split}.txt", "700": "https://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csv", @@ -126,9 +126,8 @@ def __init__( # and then figure out the rest self.classes, class_to_idx = find_classes(self.root) - self.samples = make_dataset( - self.root, class_to_idx, extensions, is_valid_file=None - ) + self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) + video_list = [x[0] for x in self.samples] self.video_clips = VideoClips( video_list, @@ -175,7 +174,7 @@ def _download_videos(self) -> None: ) file_url = urllib.request.urlopen( - self._FILES[self.n_classes].format(split=self.split) + self._TAR_URLS[self.n_classes].format(split=self.split) ) kinetics_dir, _ = path.split(self.root) tar_path = path.join(kinetics_dir, "tars") @@ -183,7 +182,7 @@ def _download_videos(self) -> None: # download annotations download_url( - self._ANNOTATION[self.n_classes].format(split=self.split), annotation_path + self._ANNOTATION_URLS[self.n_classes].format(split=self.split), annotation_path ) self.annotations = os.path.join(annotation_path, f"{self.split}.csv") From abdd2f6346a5159d7212b9c985d016477fec9f42 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 10:33:11 +0100 Subject: [PATCH 13/61] address pmeier changes Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index fa3b1f25144..6b10acfa3f5 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -144,10 +144,7 @@ def __init__( self.transform = transform def download_and_process_videos(self) -> None: - """ - downloads all the videos to the _root_ folder - in the expected format - """ + """Downloads all the videos to the _root_ folder in the expected format.""" tic = time.time() self._download_videos() toc = time.time() From 1460886f5b8b106021f92ad915915c0505c4305c Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 10:34:14 +0100 Subject: [PATCH 14/61] pmeier changes Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 6b10acfa3f5..ebc8b1cd178 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -306,9 +306,8 @@ def __init__( _audio_channels=0, ): warnings.warn( - "torchvision now supports multiple versions of Kinetics" - "datasets, available via Kinetics class with a separate " - "n_classes parameter. This function might get deprecated in the future." + "Kinetics400 is deprecated and will be removed in a future release." + "It was replaced by Kinetics(..., n_classes="400")". ) super(Kinetics400, self).__init__( From 7b069066d043f74d5e0dfaa1c25e251b05ad7a1b Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 04:35:19 -0500 Subject: [PATCH 15/61] rename n_classes to num_classes --- torchvision/datasets/kinetics.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index a241fac0d79..3ca02846e82 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -49,7 +49,7 @@ class Kinetics(VisionDataset): ├── clipx.avi └── ... If the split is not defined, it is appended using the split argument. - n_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700 + num_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700 split (str): split of the dataset to consider; currently supports ["train", "val"] frame_rate (float): If not None, interpolate different frame rate for each clip. frames_per_clip (int): number of frames in a clip @@ -109,7 +109,7 @@ def __init__( # TODO: support test verify_str_arg(split, arg="split", valid_values=['train', 'val']) verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"]) - self.n_classes = num_classes + self.num_classes = num_classes self.extensions = extensions self._num_download_workers = _num_download_workers @@ -174,7 +174,7 @@ def _download_videos(self) -> None: ) file_url = urllib.request.urlopen( - self._TAR_URLS[self.n_classes].format(split=self.split) + self._TAR_URLS[self.num_classes].format(split=self.split) ) kinetics_dir, _ = path.split(self.root) tar_path = path.join(kinetics_dir, "tars") @@ -182,7 +182,7 @@ def _download_videos(self) -> None: # download annotations download_url( - self._ANNOTATION_URLS[self.n_classes].format(split=self.split), annotation_path + self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path ) self.annotations = os.path.join(annotation_path, f"{self.split}.csv") @@ -313,7 +313,7 @@ def __init__( warnings.warn( "torchvision now supports multiple versions of Kinetics" "datasets, available via Kinetics class with a separate " - "n_classes parameter. This function might get deprecated in the future." + "num_classes parameter. This function might get deprecated in the future." ) super(Kinetics400, self).__init__( From e76f4aba1bbaf4b07b5098f6a0c9d54e697f47bd Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 04:38:36 -0500 Subject: [PATCH 16/61] formatting changes --- torchvision/datasets/kinetics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 57fd72aed85..f641b25d550 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -44,6 +44,7 @@ class Kinetics(VisionDataset): ├── class1 │ ├── clip1.avi │ ├── clip2.avi + │ ├── clip3.mp4 │ └── ... └── class2 ├── clipx.avi @@ -269,6 +270,7 @@ class Kinetics400(Kinetics): ├── class1 │ ├── clip1.avi │ ├── clip2.avi + │ ├── clip3.mp4 │ └── ... └── class2 ├── clipx.avi @@ -306,7 +308,7 @@ def __init__( ): warnings.warn( "Kinetics400 is deprecated and will be removed in a future release." - "It was replaced by Kinetics(..., num_classes="400")". + "It was replaced by Kinetics(..., num_classes=\"400\")". ) super(Kinetics400, self).__init__( From 0a8f2164c8c130f512a2e08a9ddf5b9a8520296c Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 04:41:38 -0500 Subject: [PATCH 17/61] doc change to add ".mp4" to backported class --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index f641b25d550..1c654cb969b 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -296,7 +296,7 @@ def __init__( frames_per_clip, step_between_clips=1, frame_rate=None, - extensions=("avi",), + extensions=("avi", "mp4"), transform=None, _precomputed_metadata=None, num_workers=1, From 94a40aab6e5c281fb4b1fc73126583518575a14f Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 04:47:43 -0500 Subject: [PATCH 18/61] formatting to correct line length --- torchvision/datasets/kinetics.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 1c654cb969b..4212004c05c 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -155,13 +155,11 @@ def download_and_process_videos(self) -> None: print("Elapsed time overall in mins ", (toc2 - tic) / 60) def _download_videos(self) -> None: - """download tarballs containing the video to - "tars" folder and extract them into the _split_ folder - where split is one of the official dataset splits. + """download tarballs containing the video to "tars" folder and extract them into the _split_ folder where + split is one of the official dataset splits. Raises: - RuntimeError: if download folder exists, break to prevent - downloading entire dataset again. + RuntimeError: if download folder exists, break to prevent downloading entire dataset again. """ if path.exists(self.root): raise RuntimeError( From c585a5f62bf6af9902f5d5dd1377120441f46685 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 04:56:45 -0500 Subject: [PATCH 19/61] adding **kwargs to Kinetics400 class --- torchvision/datasets/kinetics.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 4212004c05c..742f18c4d20 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -42,12 +42,12 @@ class Kinetics(VisionDataset): root/ ├── class1 - │ ├── clip1.avi - │ ├── clip2.avi + │ ├── clip1.mp4 + │ ├── clip2.mp4 │ ├── clip3.mp4 │ └── ... └── class2 - ├── clipx.avi + ├── clipx.mp4 └── ... If the split is not defined, it is appended using the split argument. num_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700 @@ -303,6 +303,7 @@ def __init__( _video_min_dimension=0, _audio_samples=0, _audio_channels=0, + **kwargs ): warnings.warn( "Kinetics400 is deprecated and will be removed in a future release." From 8cacd804513d5d1df79288cd90dc2a1ed417da4f Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 06:08:07 -0500 Subject: [PATCH 20/61] remove urlib request and download the file directly --- torchvision/datasets/kinetics.py | 35 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 742f18c4d20..5b750679b7c 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -1,4 +1,3 @@ -import urllib import time import os import warnings @@ -167,25 +166,26 @@ def _download_videos(self) -> None: f"delete the directory." ) - file_url = urllib.request.urlopen( - self._TAR_URLS[self.num_classes].format(split=self.split) - ) - kinetics_dir, _ = path.split(self.root) + kinetics_dir, split = path.split(self.root) + assert split == self.split tar_path = path.join(kinetics_dir, "tars") annotation_path = path.join(kinetics_dir, "annotations") + file_list_path = path.join(kinetics_dir, "files") - download_url( - self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path - ) - self.annotations = os.path.join(annotation_path, f"{self.split}.csv") + split_url = self._TAR_URLS[self.num_classes].format(split=self.split) + download_url(split_url, file_list_path) + list_video_urls = open(path.join(file_list_path, path.basename(split_url)), "r") + + download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path) + self.annotations = path.join(annotation_path, f"{self.split}.csv") if self._num_download_workers == 1: - for line in file_url: - line = str(line.decode("utf-8")).replace("\n", "") + for line in list_video_urls.readlines(): + line = str(line).replace("\n", "") download_and_extract_archive(line, tar_path, self.root) else: part = partial(_dl_wrap, tar_path, self.root) - lines = [str(line.decode("utf-8")).replace("\n", "") for line in file_url] + lines = [str(line).replace("\n", "") for line in list_video_urls.readlines()] poolproc = Pool(self._num_download_workers) poolproc.map(part, lines) @@ -217,11 +217,11 @@ def _make_ds_structure(self): .replace("(", "") .replace(")", "") ) - os.makedirs(os.path.join(self.root, label), exist_ok=True) - existing_file = os.path.join(self.root, f) - if os.path.isfile(existing_file): + os.makedirs(path.join(self.root, label), exist_ok=True) + existing_file = path.join(self.root, f) + if path.isfile(existing_file): os.replace( - existing_file, os.path.join(self.root, label, f), + existing_file, path.join(self.root, label, f), ) @property @@ -307,8 +307,7 @@ def __init__( ): warnings.warn( "Kinetics400 is deprecated and will be removed in a future release." - "It was replaced by Kinetics(..., num_classes=\"400\")". - ) + "It was replaced by Kinetics(..., num_classes=\"400\").") super(Kinetics400, self).__init__( root=root, From 802f8f9f95315211b03ae3046554e002aeb3aa04 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Fri, 30 Apr 2021 06:15:41 -0500 Subject: [PATCH 21/61] annotations and files can be already downloaded --- torchvision/datasets/kinetics.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 5b750679b7c..565a9225a65 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -173,10 +173,13 @@ def _download_videos(self) -> None: file_list_path = path.join(kinetics_dir, "files") split_url = self._TAR_URLS[self.num_classes].format(split=self.split) - download_url(split_url, file_list_path) - list_video_urls = open(path.join(file_list_path, path.basename(split_url)), "r") + split_url_filepath = path.join(file_list_path, path.basename(split_url)) + if not path.isfile(split_url_filepath): + download_url(split_url, file_list_path) + list_video_urls = open(split_url_filepath, "r") - download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path) + if not path.isfile(path.join(annotation_path, f"{self.split}.csv")): + download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path) self.annotations = path.join(annotation_path, f"{self.split}.csv") if self._num_download_workers == 1: From af70e5f3493b156f6a1c57f6c922ff96133ae29c Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 4 May 2021 08:50:45 -0500 Subject: [PATCH 22/61] test fix --- test/test_datasets.py | 24 ++++++++++++++++++++++++ torchvision/datasets/kinetics.py | 4 +++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index f28885d064d..e8a427900bb 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1064,8 +1064,32 @@ def test_not_found_or_corrupted(self): super().test_not_found_or_corrupted() +class KineticsTestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.Kinetics + # DEFAULT_CONFIG = {"frames_per_clip": 1} + # ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + # split=("train", "val") + # ) + + + def inject_fake_data(self, tmpdir, config): + classes = ("Abseiling", "Zumba") + num_videos_per_class = 2 + #tmpdir = pathlib.Path(tmpdir) / config['split'] + digits = string.ascii_letters + string.digits + "-_" + for cls in classes: + datasets_utils.create_video_folder( + tmpdir, + cls, + lambda _: f"{datasets_utils.create_random_string(11, digits)}.mp4", + num_videos_per_class, + ) + # ret = {'num_examples': num_videos_per_class * len(classes)} + return num_videos_per_class * len(classes) + class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase): DATASET_CLASS = datasets.Kinetics400 + # DEFAULT_CONFIG = {"frames_per_clip": 1} def inject_fake_data(self, tmpdir, config): classes = ("Abseiling", "Zumba") diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 565a9225a65..ffc2a1a4350 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -87,10 +87,10 @@ class Kinetics(VisionDataset): def __init__( self, root: str, + frames_per_clip: int, num_classes: str = "400", split: str = "train", frame_rate: float = None, - frames_per_clip: int = 5, step_between_clips: int = 1, annotation_path: str = None, transform: Optional[Callable] = None, @@ -121,6 +121,8 @@ def __init__( if download: self.download_and_process_videos() + + print("HERE") super().__init__(self.root) self.classes, class_to_idx = find_classes(self.root) From 6ec32534d46d7e7a59b6277c8e3be4e7880b73e2 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 4 May 2021 16:08:19 +0200 Subject: [PATCH 23/61] add download tests for Kinetics --- test/test_datasets_download.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py index 6ff3a33bcc9..3a11f9962de 100644 --- a/test/test_datasets_download.py +++ b/test/test_datasets_download.py @@ -391,6 +391,25 @@ def widerface(): ) +def kinetics(): + return itertools.chain( + *[ + collect_download_configs( + lambda: datasets.Kinetics( + path.join(ROOT, "Kinetics", split), + frames_per_clip=1, + num_classes=num_classes, + split=split, + download=True, + ), + name=f"Kinetics, {num_classes}, {split}", + file="kinetics", + ) + for num_classes, split in itertools.product(("400", "600", "700"), ("train", "val")) + ] + ) + + def make_parametrize_kwargs(download_configs): argvalues = [] ids = [] @@ -426,6 +445,7 @@ def make_parametrize_kwargs(download_configs): usps(), celeba(), widerface(), + kinetics(), ) ) ) From b84b298553b06916fdb37d015a184633292ac340 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 4 May 2021 12:57:33 -0500 Subject: [PATCH 24/61] users now dont need to provide full path within the root for new Kinetics dataset --- test/test_datasets.py | 20 +++++++++++------- test/test_datasets_download.py | 2 +- torchvision/datasets/kinetics.py | 35 ++++++++++++++++++-------------- 3 files changed, 34 insertions(+), 23 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index e8a427900bb..07fe89cdfdb 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1066,16 +1066,15 @@ def test_not_found_or_corrupted(self): class KineticsTestCase(datasets_utils.VideoDatasetTestCase): DATASET_CLASS = datasets.Kinetics - # DEFAULT_CONFIG = {"frames_per_clip": 1} - # ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - # split=("train", "val") - # ) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("train", "val"), num_classes=("400", "600", "700") + ) def inject_fake_data(self, tmpdir, config): classes = ("Abseiling", "Zumba") num_videos_per_class = 2 - #tmpdir = pathlib.Path(tmpdir) / config['split'] + tmpdir = pathlib.Path(tmpdir) / config['split'] digits = string.ascii_letters + string.digits + "-_" for cls in classes: datasets_utils.create_video_folder( @@ -1084,18 +1083,25 @@ def inject_fake_data(self, tmpdir, config): lambda _: f"{datasets_utils.create_random_string(11, digits)}.mp4", num_videos_per_class, ) - # ret = {'num_examples': num_videos_per_class * len(classes)} return num_videos_per_class * len(classes) class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase): DATASET_CLASS = datasets.Kinetics400 - # DEFAULT_CONFIG = {"frames_per_clip": 1} + + def dataset_args(self, tmpdir, config): + # note: train is here hardcoded by default bc we expect the user to supply it, + # but that requirement have changed in subsequent version of the dataset + root = pathlib.Path(tmpdir) / "train" + return root, 1 def inject_fake_data(self, tmpdir, config): classes = ("Abseiling", "Zumba") num_videos_per_class = 2 digits = string.ascii_letters + string.digits + "-_" + # note: train is here hardcoded by default bc we expect the user to supply it, + # but that requirement have changed in subsequent version of the dataset + tmpdir = pathlib.Path(tmpdir) / "train" for cls in classes: datasets_utils.create_video_folder( tmpdir, diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py index 3a11f9962de..f3213ddd31c 100644 --- a/test/test_datasets_download.py +++ b/test/test_datasets_download.py @@ -396,7 +396,7 @@ def kinetics(): *[ collect_download_configs( lambda: datasets.Kinetics( - path.join(ROOT, "Kinetics", split), + path.join(ROOT, f"Kinetics_{num_classes}"), frames_per_clip=1, num_classes=num_classes, split=split, diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index ffc2a1a4350..3b6371f9c13 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -35,20 +35,21 @@ class Kinetics(VisionDataset): frames in a video might be present. Args: - root (string): Root directory of the (split of the) Kinetics Dataset. + root (string): Root directory of the Kinetics Dataset. Directory should be structured as follows: .. code:: root/ - ├── class1 - │ ├── clip1.mp4 - │ ├── clip2.mp4 - │ ├── clip3.mp4 - │ └── ... - └── class2 - ├── clipx.mp4 - └── ... - If the split is not defined, it is appended using the split argument. + ├── split + │ ├── class1 + │ │ ├── clip1.mp4 + │ │ ├── clip2.mp4 + │ │ ├── clip3.mp4 + │ │ ├── ... + │ ├── class2 + │ │ ├── clipx.mp4 + │ │ └── ... + Split is appended using the split argument. num_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700 split (str): split of the dataset to consider; currently supports ["train", "val"] frame_rate (float): If not None, interpolate different frame rate for each clip. @@ -113,7 +114,10 @@ def __init__( self.extensions = extensions self._num_download_workers = _num_download_workers - self.root = root + if path.basename(root) != split: + self.root = path.join(root, split) + else: + self.root = root self.split = split if annotation_path is not None: @@ -122,7 +126,6 @@ def __init__( if download: self.download_and_process_videos() - print("HERE") super().__init__(self.root) self.classes, class_to_idx = find_classes(self.root) @@ -167,9 +170,9 @@ def _download_videos(self) -> None: f"The directory {self.root} already exists. If you want to re-download or re-extract the images, " f"delete the directory." ) - + # check that the assignment was made properly kinetics_dir, split = path.split(self.root) - assert split == self.split + assert split == self.split, 'File folder assignment not done properly' tar_path = path.join(kinetics_dir, "tars") annotation_path = path.join(kinetics_dir, "annotations") file_list_path = path.join(kinetics_dir, "files") @@ -314,8 +317,10 @@ def __init__( "Kinetics400 is deprecated and will be removed in a future release." "It was replaced by Kinetics(..., num_classes=\"400\").") + kinetics_dir, split = path.split(root) super(Kinetics400, self).__init__( - root=root, + root=kinetics_dir, + split=split, num_classes="400", frame_rate=frame_rate, step_between_clips=step_between_clips, From d7f14d0e6652b1cefd65eef500db07eccd3f889b Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 4 May 2021 13:22:43 -0500 Subject: [PATCH 25/61] linter --- test/test_datasets.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 07fe89cdfdb..87c62e9dd1f 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -961,7 +961,6 @@ def _create_annotation_files(self, root, video_files, fold, train): other_annotations.remove(current_annotation) for name in other_annotations: self._create_annotation_file(root, name, other_videos) - return len(current_videos) def _annotation_file_name(self, fold, train): @@ -1070,7 +1069,6 @@ class KineticsTestCase(datasets_utils.VideoDatasetTestCase): split=("train", "val"), num_classes=("400", "600", "700") ) - def inject_fake_data(self, tmpdir, config): classes = ("Abseiling", "Zumba") num_videos_per_class = 2 @@ -1085,6 +1083,7 @@ def inject_fake_data(self, tmpdir, config): ) return num_videos_per_class * len(classes) + class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase): DATASET_CLASS = datasets.Kinetics400 From 96e2becb244ed3fd0e608e6d3b9d3a5c0ba140b0 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 5 May 2021 10:44:40 +0200 Subject: [PATCH 26/61] Update test/test_datasets_download.py --- test/test_datasets_download.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py index 9b0f8df4504..1defa9c4eca 100644 --- a/test/test_datasets_download.py +++ b/test/test_datasets_download.py @@ -407,7 +407,8 @@ def kinetics(): file="kinetics", ) for num_classes, split in itertools.product(("400", "600", "700"), ("train", "val")) - + ] + ) def kitti(): return itertools.chain( *[ From 20dc75d3d344392c4a6e360874417b61e6c4d122 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 14:56:20 +0100 Subject: [PATCH 27/61] Update torchvision/datasets/kinetics.py Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 3b6371f9c13..aa58bd5d765 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -71,7 +71,7 @@ class Kinetics(VisionDataset): - label (int): class of the video clip Raises: - RuntimeError: If ``download is True`` and the image archive is already extracted. + RuntimeError: If ``download is True`` and the video archives are already extracted. """ _TAR_URLS = { From 5ea1232864a27b62f7fc8fbfaa6b6aa209bc5c29 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 09:49:03 -0500 Subject: [PATCH 28/61] revert whitespace (3680#discussion_r626382842) --- test/test_datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_datasets.py b/test/test_datasets.py index 7d82fee9262..41defd2a631 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -841,6 +841,7 @@ def _create_annotation_files(self, root, video_files, fold, train): other_annotations.remove(current_annotation) for name in other_annotations: self._create_annotation_file(root, name, other_videos) + return len(current_videos) def _annotation_file_name(self, fold, train): From 607a3cb293879fbcccebb6a0e67dca19cd0ed16a Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 09:52:28 -0500 Subject: [PATCH 29/61] addressing annotation_path parameter which is unnecessary --- torchvision/datasets/kinetics.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index aa58bd5d765..6cc2c0072c5 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -55,7 +55,6 @@ class Kinetics(VisionDataset): frame_rate (float): If not None, interpolate different frame rate for each clip. frames_per_clip (int): number of frames in a clip step_between_clips (int): number of frames between each clip - annotation_path (str): path to official Kinetics annotation file. transform (callable, optional): A function/transform that takes in a TxHxWxC video and returns a transformed version. download (bool): Download the official version of the dataset to root folder. @@ -93,7 +92,6 @@ def __init__( split: str = "train", frame_rate: float = None, step_between_clips: int = 1, - annotation_path: str = None, transform: Optional[Callable] = None, extensions=("avi", "mp4"), download: bool = False, @@ -120,9 +118,6 @@ def __init__( self.root = root self.split = split - if annotation_path is not None: - self.annotations = annotation_path - if download: self.download_and_process_videos() From da586c65b8793a458ae4f4ac7627d4dcbc62bde9 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 15:53:06 +0100 Subject: [PATCH 30/61] Update torchvision/datasets/kinetics.py Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index aa58bd5d765..e9499dfc522 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -130,7 +130,6 @@ def __init__( self.classes, class_to_idx = find_classes(self.root) self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) - video_list = [x[0] for x in self.samples] self.video_clips = VideoClips( video_list, From fd2208b063e08f85a2fa432e70e5ec545a0fdb6b Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 15:54:41 +0100 Subject: [PATCH 31/61] Update torchvision/datasets/kinetics.py Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index e9499dfc522..d0d19688121 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -321,17 +321,6 @@ def __init__( root=kinetics_dir, split=split, num_classes="400", - frame_rate=frame_rate, - step_between_clips=step_between_clips, - frames_per_clip=frames_per_clip, - extensions=extensions, - transform=transform, - _precomputed_metadata=_precomputed_metadata, - num_workers=num_workers, - _video_width=_video_width, - _video_height=_video_height, - _video_min_dimension=_video_min_dimension, - _audio_channels=_audio_channels, - _audio_samples=_audio_samples, download=False, + **kwargs, ) From 0dc04d3dcc175c4db11b335d51dd23cc577af947 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 15:55:12 +0100 Subject: [PATCH 32/61] kwargs update Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index d0d19688121..32083b92032 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -299,17 +299,6 @@ def __init__( self, root, frames_per_clip, - step_between_clips=1, - frame_rate=None, - extensions=("avi", "mp4"), - transform=None, - _precomputed_metadata=None, - num_workers=1, - _video_width=0, - _video_height=0, - _video_min_dimension=0, - _audio_samples=0, - _audio_channels=0, **kwargs ): warnings.warn( From 2bdd820e2ad46570726d61ee335a279bb41561bb Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 09:56:20 -0500 Subject: [PATCH 33/61] expose num_download_workers as public --- torchvision/datasets/kinetics.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 2ede26e48fc..3c2541fd0f9 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -59,7 +59,7 @@ class Kinetics(VisionDataset): and returns a transformed version. download (bool): Download the official version of the dataset to root folder. num_workers (int): Use multiple workers for VideoClips creation - _num_download_workers (int): Use multiprocessing in order to speed up download. + num_download_workers (int): Use multiprocessing in order to speed up download. Returns: tuple: A 3-tuple with the following entries: @@ -97,7 +97,7 @@ def __init__( download: bool = False, num_workers: int = 1, _precomputed_metadata=None, - _num_download_workers=1, + num_download_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, @@ -110,7 +110,7 @@ def __init__( verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"]) self.num_classes = num_classes self.extensions = extensions - self._num_download_workers = _num_download_workers + self.num_download_workers = num_download_workers if path.basename(root) != split: self.root = path.join(root, split) @@ -181,14 +181,14 @@ def _download_videos(self) -> None: download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path) self.annotations = path.join(annotation_path, f"{self.split}.csv") - if self._num_download_workers == 1: + if self.num_download_workers == 1: for line in list_video_urls.readlines(): line = str(line).replace("\n", "") download_and_extract_archive(line, tar_path, self.root) else: part = partial(_dl_wrap, tar_path, self.root) lines = [str(line).replace("\n", "") for line in list_video_urls.readlines()] - poolproc = Pool(self._num_download_workers) + poolproc = Pool(self.num_download_workers) poolproc.map(part, lines) def _make_ds_structure(self): From 5640dd9ded41664910ea6d18b17de92d7a6597cc Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 09:58:53 -0500 Subject: [PATCH 34/61] swap os.isfile with check_integrity --- torchvision/datasets/kinetics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 3c2541fd0f9..1d88967b18a 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -9,7 +9,7 @@ from functools import partial from multiprocessing import Pool -from .utils import download_and_extract_archive, download_url, verify_str_arg +from .utils import download_and_extract_archive, download_url, verify_str_arg, check_integrity from .folder import find_classes, make_dataset from .video_utils import VideoClips from .vision import VisionDataset @@ -173,11 +173,11 @@ def _download_videos(self) -> None: split_url = self._TAR_URLS[self.num_classes].format(split=self.split) split_url_filepath = path.join(file_list_path, path.basename(split_url)) - if not path.isfile(split_url_filepath): + if not check_integrity(split_url_filepath): download_url(split_url, file_list_path) list_video_urls = open(split_url_filepath, "r") - if not path.isfile(path.join(annotation_path, f"{self.split}.csv")): + if not check_integrity(path.join(annotation_path, f"{self.split}.csv")): download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path) self.annotations = path.join(annotation_path, f"{self.split}.csv") From 9ef70da53e1cb68c583687023c73e20f96c1580b Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 09:59:56 -0500 Subject: [PATCH 35/61] nit on private things --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 1d88967b18a..c6dcb6005cf 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -95,9 +95,9 @@ def __init__( transform: Optional[Callable] = None, extensions=("avi", "mp4"), download: bool = False, + num_download_workers=1, num_workers: int = 1, _precomputed_metadata=None, - num_download_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, From b7b81b17b9d4dff3dcbe102730590c58df2d75ef Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 12:01:48 -0500 Subject: [PATCH 36/61] special case if there are no default arguments --- test/datasets_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index 8077a03b910..f82e861bf64 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -637,7 +637,11 @@ def __init__(self, *args, **kwargs): def _set_default_frames_per_clip(self, inject_fake_data): argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__) - args_without_default = argspec.args[1:-len(argspec.defaults)] + # edge case if dataset doesn't have default parameters + if argspec.defaults: + args_without_default = argspec.args[1:-len(argspec.defaults)] + else: + args_without_default = argspec.args[1:] frames_per_clip_last = args_without_default[-1] == "frames_per_clip" @functools.wraps(inject_fake_data) From 36bd2c70fb08846997cb5da228a5f6a82b2b25b6 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 12:02:37 -0500 Subject: [PATCH 37/61] revert changes to kinetics400 test case for BC --- test/test_datasets.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 41defd2a631..bea2a2b80b9 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -968,20 +968,11 @@ def inject_fake_data(self, tmpdir, config): class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase): DATASET_CLASS = datasets.Kinetics400 - def dataset_args(self, tmpdir, config): - # note: train is here hardcoded by default bc we expect the user to supply it, - # but that requirement have changed in subsequent version of the dataset - root = pathlib.Path(tmpdir) / "train" - return root, 1 - def inject_fake_data(self, tmpdir, config): classes = ("Abseiling", "Zumba") num_videos_per_class = 2 digits = string.ascii_letters + string.digits + "-_" - # note: train is here hardcoded by default bc we expect the user to supply it, - # but that requirement have changed in subsequent version of the dataset - tmpdir = pathlib.Path(tmpdir) / "train" for cls in classes: datasets_utils.create_video_folder( tmpdir, From 2bda79c48637434ee7c36a0a25ed3f7ae17d3b6a Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Wed, 5 May 2021 12:03:32 -0500 Subject: [PATCH 38/61] add split_folder changes and support for legacy format --- torchvision/datasets/kinetics.py | 55 +++++++++++++++++--------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index c6dcb6005cf..7e87d67083f 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -103,28 +103,35 @@ def __init__( _video_min_dimension=0, _audio_samples=0, _audio_channels=0, + **kwargs ) -> None: # TODO: support test - verify_str_arg(split, arg="split", valid_values=['train', 'val']) + verify_str_arg(split, arg="split", valid_values=['train', 'val', 'unknown']) verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"]) self.num_classes = num_classes self.extensions = extensions self.num_download_workers = num_download_workers - if path.basename(root) != split: - self.root = path.join(root, split) + _use_legacy_structure = kwargs.get('_use_legacy_structure', False) + if _use_legacy_structure: + print("Using legacy structure") + self.root = root + self.split_folder = root + self.split = "unknown" + assert download == False, "Cannot download the videos using legacy_structure." else: self.root = root - self.split = split + self.split_folder = path.join(root, split) + self.split = split if download: self.download_and_process_videos() - super().__init__(self.root) + super().__init__(self.split_folder) - self.classes, class_to_idx = find_classes(self.root) - self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) + self.classes, class_to_idx = find_classes(self.split_folder) + self.samples = make_dataset(self.split_folder, class_to_idx, extensions, is_valid_file=None) video_list = [x[0] for x in self.samples] self.video_clips = VideoClips( video_list, @@ -159,17 +166,15 @@ def _download_videos(self) -> None: Raises: RuntimeError: if download folder exists, break to prevent downloading entire dataset again. """ - if path.exists(self.root): + if path.exists(self.split_folder): raise RuntimeError( - f"The directory {self.root} already exists. If you want to re-download or re-extract the images, " + f"The directory {self.split_folder} already exists. If you want to re-download or re-extract the images, " f"delete the directory." ) # check that the assignment was made properly - kinetics_dir, split = path.split(self.root) - assert split == self.split, 'File folder assignment not done properly' - tar_path = path.join(kinetics_dir, "tars") - annotation_path = path.join(kinetics_dir, "annotations") - file_list_path = path.join(kinetics_dir, "files") + tar_path = path.join(self.root, "tars") + annotation_path = path.join(self.root, "annotations") + file_list_path = path.join(self.root, "files") split_url = self._TAR_URLS[self.num_classes].format(split=self.split) split_url_filepath = path.join(file_list_path, path.basename(split_url)) @@ -184,21 +189,21 @@ def _download_videos(self) -> None: if self.num_download_workers == 1: for line in list_video_urls.readlines(): line = str(line).replace("\n", "") - download_and_extract_archive(line, tar_path, self.root) + download_and_extract_archive(line, tar_path, self.split_folder) else: - part = partial(_dl_wrap, tar_path, self.root) + part = partial(_dl_wrap, tar_path, self.split_folder) lines = [str(line).replace("\n", "") for line in list_video_urls.readlines()] poolproc = Pool(self.num_download_workers) poolproc.map(part, lines) def _make_ds_structure(self): """move videos from - root/ + split_folder/ ├── clip1.avi ├── clip2.avi to the correct format as described below: - root/ + split_folder/ ├── class1 │ ├── clip1.avi @@ -219,11 +224,11 @@ def _make_ds_structure(self): .replace("(", "") .replace(")", "") ) - os.makedirs(path.join(self.root, label), exist_ok=True) - existing_file = path.join(self.root, f) + os.makedirs(path.join(self.split_folder, label), exist_ok=True) + existing_file = path.join(self.split_folder, f) if path.isfile(existing_file): os.replace( - existing_file, path.join(self.root, label, f), + existing_file, path.join(self.split_folder, label, f), ) @property @@ -300,11 +305,9 @@ def __init__( "Kinetics400 is deprecated and will be removed in a future release." "It was replaced by Kinetics(..., num_classes=\"400\").") - kinetics_dir, split = path.split(root) super(Kinetics400, self).__init__( - root=kinetics_dir, - split=split, - num_classes="400", - download=False, + root=root, + frames_per_clip=frames_per_clip, + _use_legacy_structure=True, **kwargs, ) From 1a7a978d24f2ea51e0d976b9354ab45ee65666b9 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 17:31:24 +0100 Subject: [PATCH 39/61] pmeiers suggestions Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 7e87d67083f..4aa421770ea 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -208,11 +208,11 @@ def _make_ds_structure(self): │ ├── clip1.avi """ - file_tmp = "{ytid}_{start:06}_{end:06}.mp4" + file_fmtstr = "{ytid}_{start:06}_{end:06}.mp4" with open(self.annotations) as csvfile: reader = csv.DictReader(csvfile) for row in reader: - f = file_tmp.format( + f = file_fmtstr.format( ytid=row["youtube_id"], start=int(row["time_start"]), end=int(row["time_end"]), From 89e41e6a03d8febc748a11aae9c4b91af5693b28 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 11:36:29 -0500 Subject: [PATCH 40/61] pmeiers suggestions - root comment --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 4aa421770ea..af0e5cdf032 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -128,7 +128,7 @@ def __init__( if download: self.download_and_process_videos() - super().__init__(self.split_folder) + super().__init__(self.root) self.classes, class_to_idx = find_classes(self.split_folder) self.samples = make_dataset(self.split_folder, class_to_idx, extensions, is_valid_file=None) From 5941dab54a24af97e4b4100ef16ed605fd39546a Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 11:45:05 -0500 Subject: [PATCH 41/61] pmeiers comments - annotation attribute remmoved --- torchvision/datasets/kinetics.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index af0e5cdf032..84d945a1c22 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -171,9 +171,7 @@ def _download_videos(self) -> None: f"The directory {self.split_folder} already exists. If you want to re-download or re-extract the images, " f"delete the directory." ) - # check that the assignment was made properly tar_path = path.join(self.root, "tars") - annotation_path = path.join(self.root, "annotations") file_list_path = path.join(self.root, "files") split_url = self._TAR_URLS[self.num_classes].format(split=self.split) @@ -182,9 +180,6 @@ def _download_videos(self) -> None: download_url(split_url, file_list_path) list_video_urls = open(split_url_filepath, "r") - if not check_integrity(path.join(annotation_path, f"{self.split}.csv")): - download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path) - self.annotations = path.join(annotation_path, f"{self.split}.csv") if self.num_download_workers == 1: for line in list_video_urls.readlines(): @@ -208,8 +203,13 @@ def _make_ds_structure(self): │ ├── clip1.avi """ + annotation_path = path.join(self.root, "annotations") + if not check_integrity(path.join(annotation_path, f"{self.split}.csv")): + download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path) + annotations = path.join(annotation_path, f"{self.split}.csv") + file_fmtstr = "{ytid}_{start:06}_{end:06}.mp4" - with open(self.annotations) as csvfile: + with open(annotations) as csvfile: reader = csv.DictReader(csvfile) for row in reader: f = file_fmtstr.format( From 72d260a207953e16db886cd078f1ffd087257a05 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 18:11:46 +0100 Subject: [PATCH 42/61] pmeiers suggestion Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 84d945a1c22..b90ad90b8e7 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -50,10 +50,10 @@ class Kinetics(VisionDataset): │ │ ├── clipx.mp4 │ │ └── ... Split is appended using the split argument. + frames_per_clip (int): number of frames in a clip num_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700 split (str): split of the dataset to consider; currently supports ["train", "val"] frame_rate (float): If not None, interpolate different frame rate for each clip. - frames_per_clip (int): number of frames in a clip step_between_clips (int): number of frames between each clip transform (callable, optional): A function/transform that takes in a TxHxWxC video and returns a transformed version. From 51231cf67b1ed8466c53c907865ad87308d2f1a1 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 18:16:56 +0100 Subject: [PATCH 43/61] pmeiers suggestion Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index b90ad90b8e7..c18c914aa2a 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -51,9 +51,9 @@ class Kinetics(VisionDataset): │ │ └── ... Split is appended using the split argument. frames_per_clip (int): number of frames in a clip - num_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700 - split (str): split of the dataset to consider; currently supports ["train", "val"] - frame_rate (float): If not None, interpolate different frame rate for each clip. + num_classes (int): select between Kinetics-400 (default), Kinetics-600, and Kinetics-700 + split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"`` + frame_rate (float): If omitted, interpolate different frame rate for each clip. step_between_clips (int): number of frames between each clip transform (callable, optional): A function/transform that takes in a TxHxWxC video and returns a transformed version. From 7b91bbe0064fdbbe475068ecb0797f0c3943ef5f Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 18:24:01 +0100 Subject: [PATCH 44/61] pmeiers suggestion Co-authored-by: Philip Meier --- test/test_datasets_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py index 1defa9c4eca..6c6e2e1d640 100644 --- a/test/test_datasets_download.py +++ b/test/test_datasets_download.py @@ -397,7 +397,7 @@ def kinetics(): *[ collect_download_configs( lambda: datasets.Kinetics( - path.join(ROOT, f"Kinetics_{num_classes}"), + path.join(ROOT, f"Kinetics{num_classes}"), frames_per_clip=1, num_classes=num_classes, split=split, From cd2e55a99165770eed7b29d03b1025ee68bb3dc5 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 18:24:24 +0100 Subject: [PATCH 45/61] pmeiers suggestion Co-authored-by: Philip Meier --- test/datasets_utils.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index f82e861bf64..9da6f73347b 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -637,11 +637,7 @@ def __init__(self, *args, **kwargs): def _set_default_frames_per_clip(self, inject_fake_data): argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__) - # edge case if dataset doesn't have default parameters - if argspec.defaults: - args_without_default = argspec.args[1:-len(argspec.defaults)] - else: - args_without_default = argspec.args[1:] + args_without_default = argspec.args[1:(-len(argspec.defaults) if argspec.defaults else None)] frames_per_clip_last = args_without_default[-1] == "frames_per_clip" @functools.wraps(inject_fake_data) From 7b322e9147b41a17153015754a999efb863959fb Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 19:12:46 +0100 Subject: [PATCH 46/61] Update torchvision/datasets/kinetics.py Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index c18c914aa2a..ca0ff06f71b 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -299,11 +299,20 @@ def __init__( self, root, frames_per_clip, + num_classes = None, + split = None, + download = None, + num_download_workers = None, **kwargs ): warnings.warn( "Kinetics400 is deprecated and will be removed in a future release." "It was replaced by Kinetics(..., num_classes=\"400\").") + if any(value is not None for value in (num_classes, split, download, num_download_workers)): + raise RuntimeError( + "Usage of 'num_classes', 'split', 'download', or 'num_download_workers' is not supported in Kinetics400. " + "Please use Kinetics instead." + ) super(Kinetics400, self).__init__( root=root, From 328c84e80bbf95999ba39b6d3129702d4bdfddd3 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 19:13:22 +0100 Subject: [PATCH 47/61] Update torchvision/datasets/kinetics.py Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index ca0ff06f71b..ab30e701521 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -90,7 +90,7 @@ def __init__( frames_per_clip: int, num_classes: str = "400", split: str = "train", - frame_rate: float = None, + frame_rate: Optional[float] = None, step_between_clips: int = 1, transform: Optional[Callable] = None, extensions=("avi", "mp4"), From 22e5d48a6813e60b3913f98d7b9f48b6f185c861 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 19:14:01 +0100 Subject: [PATCH 48/61] Update torchvision/datasets/kinetics.py Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index ab30e701521..dae017134c9 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -93,7 +93,7 @@ def __init__( frame_rate: Optional[float] = None, step_between_clips: int = 1, transform: Optional[Callable] = None, - extensions=("avi", "mp4"), + extensions: Tuple[str, ...] = ("avi", "mp4"), download: bool = False, num_download_workers=1, num_workers: int = 1, From 173d385942178d9d698b1f923f15fd36737f9550 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 19:14:35 +0100 Subject: [PATCH 49/61] Update torchvision/datasets/kinetics.py Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index dae017134c9..a635868d63a 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -95,7 +95,7 @@ def __init__( transform: Optional[Callable] = None, extensions: Tuple[str, ...] = ("avi", "mp4"), download: bool = False, - num_download_workers=1, + num_download_workers: int = 1, num_workers: int = 1, _precomputed_metadata=None, _video_width=0, From 5a7db27f658a6261b6cc243c897fd577c3187358 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 19:16:47 +0100 Subject: [PATCH 50/61] Update torchvision/datasets/kinetics.py Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index a635868d63a..45321a12dbd 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -108,8 +108,7 @@ def __init__( # TODO: support test verify_str_arg(split, arg="split", valid_values=['train', 'val', 'unknown']) - verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"]) - self.num_classes = num_classes + self.num_classes = verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"]) self.extensions = extensions self.num_download_workers = num_download_workers From 44030ee899499e309115bdcb2a3a38ee169d4069 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 19:17:43 +0100 Subject: [PATCH 51/61] Update torchvision/datasets/kinetics.py Co-authored-by: Philip Meier --- torchvision/datasets/kinetics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 45321a12dbd..9ab5336b057 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -118,7 +118,7 @@ def __init__( self.root = root self.split_folder = root self.split = "unknown" - assert download == False, "Cannot download the videos using legacy_structure." + assert not download, "Cannot download the videos using legacy_structure." else: self.root = root self.split_folder = path.join(root, split) From ce5f80bdd636ff76167a85e1462655288f517041 Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 13:28:48 -0500 Subject: [PATCH 52/61] minor debugging --- torchvision/datasets/kinetics.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 9ab5336b057..8094cd8c254 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -5,7 +5,7 @@ from os import path import csv -from typing import Callable, Optional +from typing import Callable, Optional, Tuple from functools import partial from multiprocessing import Pool @@ -49,7 +49,7 @@ class Kinetics(VisionDataset): │ ├── class2 │ │ ├── clipx.mp4 │ │ └── ... - Split is appended using the split argument. + Note: split is appended automatically using the split argument. frames_per_clip (int): number of frames in a clip num_classes (int): select between Kinetics-400 (default), Kinetics-600, and Kinetics-700 split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"`` @@ -107,22 +107,20 @@ def __init__( ) -> None: # TODO: support test - verify_str_arg(split, arg="split", valid_values=['train', 'val', 'unknown']) self.num_classes = verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"]) self.extensions = extensions self.num_download_workers = num_download_workers + self.root = root _use_legacy_structure = kwargs.get('_use_legacy_structure', False) if _use_legacy_structure: print("Using legacy structure") - self.root = root self.split_folder = root self.split = "unknown" assert not download, "Cannot download the videos using legacy_structure." else: - self.root = root self.split_folder = path.join(root, split) - self.split = split + self.split = verify_str_arg(split, arg="split", valid_values=["train", "val"]) if download: self.download_and_process_videos() From 803bab12658a43384cd3fb36eaa6285de34d79ad Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 11 May 2021 14:40:36 -0500 Subject: [PATCH 53/61] nit picks --- torchvision/datasets/kinetics.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 8094cd8c254..954e2c0d660 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -97,12 +97,12 @@ def __init__( download: bool = False, num_download_workers: int = 1, num_workers: int = 1, - _precomputed_metadata=None, - _video_width=0, - _video_height=0, - _video_min_dimension=0, - _audio_samples=0, - _audio_channels=0, + _precomputed_metadata = None, + _video_width: int = 0, + _video_height: int = 0, + _video_min_dimension: int = 0, + _audio_samples: int = 0, + _audio_channels: int = 0, **kwargs ) -> None: From 6e64bb6165d97d96807f7fcd8b37dff1c59f118e Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 12 May 2021 09:26:39 +0200 Subject: [PATCH 54/61] only include public kwargs into defaults --- test/datasets_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index 9da6f73347b..c305d5391db 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -416,7 +416,11 @@ def _populate_private_class_attributes(cls): continue defaults.append( - {kwarg: default for kwarg, default in zip(argspec.args[-len(argspec.defaults):], argspec.defaults)} + { + kwarg: default + for kwarg, default in zip(argspec.args[-len(argspec.defaults):], argspec.defaults) + if not kwarg.startswith("_") + } ) if not argspec.varkw: @@ -637,7 +641,7 @@ def __init__(self, *args, **kwargs): def _set_default_frames_per_clip(self, inject_fake_data): argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__) - args_without_default = argspec.args[1:(-len(argspec.defaults) if argspec.defaults else None)] + args_without_default = argspec.args[1 : (-len(argspec.defaults) if argspec.defaults else None)] frames_per_clip_last = args_without_default[-1] == "frames_per_clip" @functools.wraps(inject_fake_data) From 8b64d1d816c7f66d693133a606116bd5e2bbf558 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 12 May 2021 09:27:18 +0200 Subject: [PATCH 55/61] add _use_legacy_structure in favour of **kwargs --- torchvision/datasets/kinetics.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 954e2c0d660..daf77bad881 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -103,7 +103,7 @@ def __init__( _video_min_dimension: int = 0, _audio_samples: int = 0, _audio_channels: int = 0, - **kwargs + _use_legacy_structure: bool = False, ) -> None: # TODO: support test @@ -112,7 +112,6 @@ def __init__( self.num_download_workers = num_download_workers self.root = root - _use_legacy_structure = kwargs.get('_use_legacy_structure', False) if _use_legacy_structure: print("Using legacy structure") self.split_folder = root From 94b21cc6ec8d34f38d047420cf00b6d0dfc3dfb8 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 12 May 2021 09:27:41 +0200 Subject: [PATCH 56/61] add type hints for Kinetics400 --- torchvision/datasets/kinetics.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index daf77bad881..9aba2905b6c 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -5,7 +5,7 @@ from os import path import csv -from typing import Callable, Optional, Tuple +from typing import Any, Callable, Optional, Tuple from functools import partial from multiprocessing import Pool @@ -293,13 +293,13 @@ class Kinetics400(Kinetics): def __init__( self, - root, - frames_per_clip, - num_classes = None, - split = None, - download = None, - num_download_workers = None, - **kwargs + root: str, + frames_per_clip: int, + num_classes: Any = None, + split: Any = None, + download: Any = None, + num_download_workers: Any = None, + **kwargs: Any ): warnings.warn( "Kinetics400 is deprecated and will be removed in a future release." From f8039462e886fe1fda050735b6bcc59f7cb857a2 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 12 May 2021 09:27:52 +0200 Subject: [PATCH 57/61] flake8 --- torchvision/datasets/kinetics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 9aba2905b6c..c013a394809 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -306,8 +306,8 @@ def __init__( "It was replaced by Kinetics(..., num_classes=\"400\").") if any(value is not None for value in (num_classes, split, download, num_download_workers)): raise RuntimeError( - "Usage of 'num_classes', 'split', 'download', or 'num_download_workers' is not supported in Kinetics400. " - "Please use Kinetics instead." + "Usage of 'num_classes', 'split', 'download', or 'num_download_workers' is not supported in " + "Kinetics400. Please use Kinetics instead." ) super(Kinetics400, self).__init__( From b39646a94b1981c7011fa1a2cbcc0548f63144ec Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 12 May 2021 09:32:09 +0200 Subject: [PATCH 58/61] flake8 --- torchvision/datasets/kinetics.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index c013a394809..2b005ddbc13 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -5,7 +5,7 @@ from os import path import csv -from typing import Any, Callable, Optional, Tuple +from typing import Any, Callable, Dict, Optional, Tuple from functools import partial from multiprocessing import Pool @@ -97,7 +97,7 @@ def __init__( download: bool = False, num_download_workers: int = 1, num_workers: int = 1, - _precomputed_metadata = None, + _precomputed_metadata: Optional[Dict] = None, _video_width: int = 0, _video_height: int = 0, _video_min_dimension: int = 0, @@ -164,8 +164,8 @@ def _download_videos(self) -> None: """ if path.exists(self.split_folder): raise RuntimeError( - f"The directory {self.split_folder} already exists. If you want to re-download or re-extract the images, " - f"delete the directory." + f"The directory {self.split_folder} already exists. " + f"If you want to re-download or re-extract the images, delete the directory." ) tar_path = path.join(self.root, "tars") file_list_path = path.join(self.root, "files") @@ -176,7 +176,6 @@ def _download_videos(self) -> None: download_url(split_url, file_list_path) list_video_urls = open(split_url_filepath, "r") - if self.num_download_workers == 1: for line in list_video_urls.readlines(): line = str(line).replace("\n", "") From c47c309abbe84ae85e58f5bc95e34b6646330161 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 12 May 2021 10:45:27 +0200 Subject: [PATCH 59/61] flake8 --- test/datasets_utils.py | 2 +- test/test_datasets_download.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index c305d5391db..d7853b46314 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -641,7 +641,7 @@ def __init__(self, *args, **kwargs): def _set_default_frames_per_clip(self, inject_fake_data): argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__) - args_without_default = argspec.args[1 : (-len(argspec.defaults) if argspec.defaults else None)] + args_without_default = argspec.args[1:(-len(argspec.defaults) if argspec.defaults else None)] frames_per_clip_last = args_without_default[-1] == "frames_per_clip" @functools.wraps(inject_fake_data) diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py index 6c6e2e1d640..8c2d575e01d 100644 --- a/test/test_datasets_download.py +++ b/test/test_datasets_download.py @@ -409,6 +409,8 @@ def kinetics(): for num_classes, split in itertools.product(("400", "600", "700"), ("train", "val")) ] ) + + def kitti(): return itertools.chain( *[ From 18ad36d00ac4dbf2a8c658c55143421c64628f4f Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Mon, 24 May 2021 10:47:36 -0500 Subject: [PATCH 60/61] rename to make thigs clearer --- torchvision/datasets/kinetics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 2b005ddbc13..721e2232648 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -220,10 +220,10 @@ def _make_ds_structure(self): .replace(")", "") ) os.makedirs(path.join(self.split_folder, label), exist_ok=True) - existing_file = path.join(self.split_folder, f) - if path.isfile(existing_file): + downloaded_file = path.join(self.split_folder, f) + if path.isfile(downloaded_file): os.replace( - existing_file, path.join(self.split_folder, label, f), + downloaded_file, path.join(self.split_folder, label, f), ) @property From 12b76d7fbd9f9fb81d490d5cb27d4ce83ac9d27c Mon Sep 17 00:00:00 2001 From: Bruno Korbar Date: Tue, 8 Jun 2021 15:00:35 -0500 Subject: [PATCH 61/61] permuting the output --- torchvision/datasets/kinetics.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py index 721e2232648..2543b6c514d 100644 --- a/torchvision/datasets/kinetics.py +++ b/torchvision/datasets/kinetics.py @@ -64,7 +64,7 @@ class Kinetics(VisionDataset): Returns: tuple: A 3-tuple with the following entries: - - video (Tensor[T, H, W, C]): the `T` video frames in torch.uint8 tensor + - video (Tensor[T, C, H, W]): the `T` video frames in torch.uint8 tensor - audio(Tensor[K, L]): the audio frames, where `K` is the number of channels and `L` is the number of points in torch.float tensor - label (int): class of the video clip @@ -103,7 +103,7 @@ def __init__( _video_min_dimension: int = 0, _audio_samples: int = 0, _audio_channels: int = 0, - _use_legacy_structure: bool = False, + _legacy: bool = False, ) -> None: # TODO: support test @@ -112,7 +112,8 @@ def __init__( self.num_download_workers = num_download_workers self.root = root - if _use_legacy_structure: + self._legacy = _legacy + if _legacy: print("Using legacy structure") self.split_folder = root self.split = "unknown" @@ -235,6 +236,9 @@ def __len__(self): def __getitem__(self, idx): video, audio, info, video_idx = self.video_clips.get_clip(idx) + if not self._legacy: + # [T,H,W,C] --> [T,C,H,W] + video = video.permute(0, 3, 1, 2) label = self.samples[video_idx][1] if self.transform is not None: @@ -312,6 +316,6 @@ def __init__( super(Kinetics400, self).__init__( root=root, frames_per_clip=frames_per_clip, - _use_legacy_structure=True, + _legacy=True, **kwargs, )