From 5e60e01c88d808143fa554664cfc613196bd2ea2 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 16 Apr 2021 15:36:19 -0500
Subject: [PATCH 01/61] Initial commit

---
 torchvision/datasets/__init__.py |   4 +-
 torchvision/datasets/kinetics.py | 300 +++++++++++++++++++++++++++++--
 2 files changed, 284 insertions(+), 20 deletions(-)

diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py
index b60fc7c7964..e67ba08d299 100644
--- a/torchvision/datasets/__init__.py
+++ b/torchvision/datasets/__init__.py
@@ -20,7 +20,7 @@
 from .sbd import SBDataset
 from .vision import VisionDataset
 from .usps import USPS
-from .kinetics import Kinetics400
+from .kinetics import Kinetics400, Kinetics
 from .hmdb51 import HMDB51
 from .ucf101 import UCF101
 from .places365 import Places365
@@ -34,6 +34,6 @@
            'Omniglot', 'SBU', 'Flickr8k', 'Flickr30k',
            'VOCSegmentation', 'VOCDetection', 'Cityscapes', 'ImageNet',
            'Caltech101', 'Caltech256', 'CelebA', 'WIDERFace', 'SBDataset',
-           'VisionDataset', 'USPS', 'Kinetics400', 'HMDB51', 'UCF101',
+           'VisionDataset', 'USPS', 'Kinetics400', "Kinetics", 'HMDB51', 'UCF101',
            'Places365', 'Kitti',
            )
diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index a8986986c17..d96d7fc09e4 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -1,15 +1,30 @@
-from .utils import list_dir
+import urllib
+import time
+import os
+import sys
+import warnings
+
+
+from os import path
+import pandas as pd
+from typing import Callable, Optional
+from functools import partial
+from multiprocessing import Pool
+
+from .utils import download_and_extract_archive
 from .folder import find_classes, make_dataset
 from .video_utils import VideoClips
 from .vision import VisionDataset
 
 
-class Kinetics400(VisionDataset):
-    """
-    `Kinetics-400 <https://deepmind.com/research/open-source/open-source-datasets/kinetics/>`_
+def _dl_wrap(tarpath, videopath, line):
+    download_and_extract_archive(line, tarpath, videopath)
+
+class Kinetics(VisionDataset):
+    """` Generic Kinetics <https://deepmind.com/research/open-source/open-source-datasets/kinetics/>`_
     dataset.
 
-    Kinetics-400 is an action recognition video dataset.
+    Kinetics-400/600/700 are action recognition video datasets.
     This dataset consider every video as a collection of video clips of fixed size, specified
     by ``frames_per_clip``, where the step in frames between each clip is given by
     ``step_between_clips``.
@@ -20,11 +35,9 @@ class Kinetics400(VisionDataset):
     Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
     frames in a video might be present.
 
-    Internally, it uses a VideoClips object to handle clip creation.
-
     Args:
-        root (string): Root directory of the Kinetics-400 Dataset. Should be structured as follows:
-
+        root (string): Root directory of the (split of the) Kinetics Dataset. 
+            Directory should be structured as follows:
             .. code::
 
                 root/
@@ -35,29 +48,92 @@ class Kinetics400(VisionDataset):
                 └── class2
                     ├── clipx.avi
                     └── ...
-
+            If the split is not defined, it is appended using the split argument.
+        n_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700 
+        split (str): split of the dataset to consider; currently supports ["train", "val"]
+        frame_rate (float): If not None, interpolate different frame rate for each clip.
         frames_per_clip (int): number of frames in a clip
         step_between_clips (int): number of frames between each clip
+        annotation_path (str): path to official Kinetics annotation file.
         transform (callable, optional): A function/transform that  takes in a TxHxWxC video
             and returns a transformed version.
+        download (bool): Download the official version of the dataset to root folder.
+        num_workers (int): Use multiple workers for VideoClips creation
+        _num_download_workers (int): Use multiprocessing in order to speed up download.
 
     Returns:
         tuple: A 3-tuple with the following entries:
 
-            - video (Tensor[T, H, W, C]): the `T` video frames
+            - video (Tensor[T, H, W, C]): the `T` video frames in torch.uint8 tensor
             - audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
-              and `L` is the number of points
+              and `L` is the number of points in torch.float tensor
             - label (int): class of the video clip
+    
+    Raises:
+        RuntimeError: If ``download is True`` and the image archive is already extracted.
     """
 
-    def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None,
-                 extensions=('avi',), transform=None, _precomputed_metadata=None,
-                 num_workers=1, _video_width=0, _video_height=0,
-                 _video_min_dimension=0, _audio_samples=0, _audio_channels=0):
-        super(Kinetics400, self).__init__(root)
+    _FILES = {
+        400: "https://s3.amazonaws.com/kinetics/400/{split}/k400_{split}_path.txt",
+        600: "https://s3.amazonaws.com/kinetics/600/{split}/k600_{split}_path.txt",
+        700: "https://s3.amazonaws.com/kinetics/700_2020/{split}/k700_2020_{split}_path.txt",
+    }
+    _ANNOTATION = {
+        400: "https://s3.amazonaws.com/kinetics/400/annotations/{split}.csv",
+        600: "https://s3.amazonaws.com/kinetics/600/annotations/{split}.txt",
+        700: "https://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csv",
+    }
+
+    def __init__(
+        self,
+        root: str,
+        n_classes: int = 400,
+        split: str = "train",
+        frame_rate: float = None,
+        frames_per_clip: int = 5,
+        step_between_clips: int = 1,
+        annotation_path: str = None,
+        transform: Optional[Callable] = None,
+        extensions=("avi", "mp4"),
+        download: bool = False,
+        num_workers: int = 1,
+        _precomputed_metadata=None,
+        _num_download_workers=1,
+        _video_width=0,
+        _video_height=0,
+        _video_min_dimension=0,
+        _audio_samples=0,
+        _audio_channels=0,
+    ) -> None:
+
+        # TODO: support test
+        assert split in ["train", "val"]
+        assert n_classes in [400, 700]
+        self.n_classes = n_classes
+        self.extensions = extensions
+        self._num_download_workers = _num_download_workers
+
+        # set up self.root and self.split
+        self._set_up_paths(root, split)
 
+        # load annotation files
+        if annotation_path is not None:
+            self.annotations = pd.read_csv(annotation_path)
+        else:
+            self.annotations = pd.read_csv(
+                self._ANNOTATION[self.n_classes].format(split=self.split)
+            )
+
+        if download:
+            self.download_and_process_videos()
+        # init folder dataset at the end
+        super().__init__(self.root)
+
+        # and then figure out the rest
         self.classes, class_to_idx = find_classes(self.root)
-        self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None)
+        self.samples = make_dataset(
+            self.root, class_to_idx, extensions, is_valid_file=None
+        )
         video_list = [x[0] for x in self.samples]
         self.video_clips = VideoClips(
             video_list,
@@ -74,6 +150,106 @@ def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None,
         )
         self.transform = transform
 
+    def _set_up_paths(self, root, split) -> None:
+        """Sets up self.root and self.split to avoid confusion.
+        Split in the root (e.g. kinetics/val) overrides the setting in 
+        the split.
+        """
+        self.split = split
+        if path.basename(root) == split:
+            self.root = root
+        elif path.basename(root) in ["train", "val"]:
+            self.root = root
+            self.split = path.basename(root)
+            warnings.warn(
+                f"Root {root} points to a different split than {split}."
+                f"Assigning self.split to {self.split}."
+            )
+        else:
+            self.root = path.join(root, split)
+
+    def download_and_process_videos(self) -> None:
+        """
+        downloads all the videos to the _root_ folder
+        in the expected format
+        """
+        tic = time.time()
+        _ = self._download_videos()
+        toc = time.time()
+        print("Elapsed time for downloading in mins ", (toc - tic) / 60)
+        self._make_ds_structure()
+        toc2 = time.time()
+        print("Elapsed time for processing in mins ", (toc2 - toc) / 60)
+        print("Elapsed time overall in mins ", (toc2 - tic) / 60)
+
+    def _download_videos(self) -> int:
+        """download tarballs containing the video to 
+        "tars" folder and extract them into the _split_ folder
+        where split is one of the official dataset splits.
+
+        Raises:
+            RuntimeError: if download folder exists, break to prevent
+              downloading entire dataset again.
+        """
+        if path.exists(self.root):
+            raise RuntimeError(
+                f"The directory {self.root} already exists. If you want to re-download or re-extract the images, "
+                f"delete the directory."
+            )
+
+        file_url = urllib.request.urlopen(
+            self._FILES[self.n_classes].format(split=self.split)
+        )
+        kinetics_dir, _ = path.split(self.root)
+        tar_path = path.join(kinetics_dir, "tars")
+
+        if self._num_download_workers < 2:
+            for line in file_url:
+                line = str(line.decode("utf-8")).replace("\n", "")
+                dl_wrap(tar_path, self.root, line)
+        else:
+            part = partial(_dl_wrap, tar_path, self.root)
+            lines = [str(line.decode("utf-8")).replace("\n", "") for line in file_url]
+            poolproc = Pool(self._num_download_workers)
+            poolproc.map(part, lines)
+        return 0
+
+
+
+    def _make_ds_structure(self):
+        """move videos from 
+        root/
+            ├── clip1.avi
+            ├── clip2.avi
+        
+        to the correct format as described below:
+        root/
+            ├── class1
+            │   ├── clip1.avi
+
+        """
+        for file in os.listdir(self.root):
+            if file.endswith(self.extensions):
+                ytid = file[:11]
+                try:
+                    df = self.annotations[self.annotations.youtube_id == ytid]
+                    label = (
+                        df.label.item()
+                        .replace(" ", "_")
+                        .replace("'", "")
+                        .replace("(", "")
+                        .replace(")", "")
+                    )
+                    os.makedirs(os.path.join(self.root, label), exist_ok=True)
+                    os.replace(
+                        os.path.join(self.root, file),
+                        os.path.join(self.root, label, file),
+                    )
+                except:
+                    warnings.warn(
+                        f"Unexpected error while processing {ytid}:", sys.exc_info()[0]
+                    )
+
     @property
     def metadata(self):
         return self.video_clips.metadata
@@ -89,3 +265,91 @@ def __getitem__(self, idx):
             video = self.transform(video)
 
         return video, audio, label
+
+
+class Kinetics400(Kinetics):
+    """
+    `Kinetics-400 <https://deepmind.com/research/open-source/open-source-datasets/kinetics/>`_
+    dataset.
+
+    Kinetics-400 is an action recognition video dataset.
+    This dataset consider every video as a collection of video clips of fixed size, specified
+    by ``frames_per_clip``, where the step in frames between each clip is given by
+    ``step_between_clips``.
+
+    To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
+    and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
+    elements will come from video 1, and the next three elements from video 2.
+    Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
+    frames in a video might be present.
+
+    Internally, it uses a VideoClips object to handle clip creation.
+
+    Args:
+        root (string): Root directory of the Kinetics-400 Dataset. Should be structured as follows:
+
+            .. code::
+
+                root/
+                ├── class1
+                │   ├── clip1.avi
+                │   ├── clip2.avi
+                │   └── ...
+                └── class2
+                    ├── clipx.avi
+                    └── ...
+
+        frames_per_clip (int): number of frames in a clip
+        step_between_clips (int): number of frames between each clip
+        transform (callable, optional): A function/transform that  takes in a TxHxWxC video
+            and returns a transformed version.
+
+    Returns:
+        tuple: A 3-tuple with the following entries:
+
+            - video (Tensor[T, H, W, C]): the `T` video frames
+            - audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
+              and `L` is the number of points
+            - label (int): class of the video clip
+    """
+
+    def __init__(
+        self,
+        root,
+        frames_per_clip,
+        step_between_clips=1,
+        frame_rate=None,
+        extensions=("avi",),
+        transform=None,
+        _precomputed_metadata=None,
+        num_workers=1,
+        _video_width=0,
+        _video_height=0,
+        _video_min_dimension=0,
+        _audio_samples=0,
+        _audio_channels=0,
+    ):
+        warnings.warn(
+            "torchvision now supports multiple versions of Kinetics"
+            "datasets, available via Kinetics class with a separate "
+            "n_classes parameter. This function might get deprecated in the future."
+        )
+
+        super(Kinetics400, self).__init__(
+            root=root,
+            n_classes=400,
+            frame_rate=frame_rate,
+            step_between_clips=step_between_clips,
+            frames_per_clip=frames_per_clip,
+            extensions=extensions,
+            transform=transform,
+            _precomputed_metadata=_precomputed_metadata,
+            num_workers=num_workers,
+            _video_width=_video_width,
+            _video_height=_video_height,
+            _video_min_dimension=_video_min_dimension,
+            _audio_channels=_audio_channels,
+            _audio_samples=_audio_samples,
+            download=False,
+        )
+

From 62c5e0595d8309b77475399ba5ba692b8e8c8633 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 20 Apr 2021 14:48:24 +0100
Subject: [PATCH 02/61] pmeiers comments

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index d96d7fc09e4..3e5e38bca67 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -245,9 +245,9 @@ def _make_ds_structure(self):
                         os.path.join(self.root, file),
                         os.path.join(self.root, label, file),
                     )
-                except:
+                except Exception as error:
                     warnings.warn(
-                        f"Unexpected error while processing {ytid}:", sys.exc_info()[0]
+                        f"Unexpected error while processing {ytid}: {error}"
                     )
 
     @property
@@ -352,4 +352,3 @@ def __init__(
             _audio_samples=_audio_samples,
             download=False,
         )
-

From 090e526e1d078d030bf969b937e24bd441c256a5 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 20 Apr 2021 14:48:50 +0100
Subject: [PATCH 03/61] pmeiers changes

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 3e5e38bca67..29e43f1364a 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -203,7 +203,7 @@ def _download_videos(self) -> int:
         kinetics_dir, _ = path.split(self.root)
         tar_path = path.join(kinetics_dir, "tars")
 
-        if self._num_download_workers < 2:
+        if self._num_download_workers > 1:
             for line in file_url:
                 line = str(line.decode("utf-8")).replace("\n", "")
                 dl_wrap(tar_path, self.root, line)

From 9403bcf74c83d963211930deb141c0599a70cb36 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 20 Apr 2021 14:49:11 +0100
Subject: [PATCH 04/61] pmeiers comments

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 29e43f1364a..7841f5a555b 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -87,7 +87,7 @@ class Kinetics(VisionDataset):
     def __init__(
         self,
         root: str,
-        n_classes: int = 400,
+        num_classes: str = "400",
         split: str = "train",
         frame_rate: float = None,
         frames_per_clip: int = 5,

From e08cf092b6e6004aad9ebab79d85f01534d1e171 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Thu, 22 Apr 2021 12:24:47 -0500
Subject: [PATCH 05/61] replace pandas with system library to avoid crashes

---
 torchvision/datasets/kinetics.py | 76 ++++++++++++++------------------
 1 file changed, 33 insertions(+), 43 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 7841f5a555b..a97a2dcb29e 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -1,17 +1,16 @@
 import urllib
 import time
 import os
-import sys
 import warnings
 
 
 from os import path
-import pandas as pd
+import csv
 from typing import Callable, Optional
 from functools import partial
 from multiprocessing import Pool
 
-from .utils import download_and_extract_archive
+from .utils import download_and_extract_archive, download_url
 from .folder import find_classes, make_dataset
 from .video_utils import VideoClips
 from .vision import VisionDataset
@@ -74,14 +73,14 @@ class Kinetics(VisionDataset):
     """
 
     _FILES = {
-        400: "https://s3.amazonaws.com/kinetics/400/{split}/k400_{split}_path.txt",
-        600: "https://s3.amazonaws.com/kinetics/600/{split}/k600_{split}_path.txt",
-        700: "https://s3.amazonaws.com/kinetics/700_2020/{split}/k700_2020_{split}_path.txt",
+        "400": "https://s3.amazonaws.com/kinetics/400/{split}/k400_{split}_path.txt",
+        "600": "https://s3.amazonaws.com/kinetics/600/{split}/k600_{split}_path.txt",
+        "700": "https://s3.amazonaws.com/kinetics/700_2020/{split}/k700_2020_{split}_path.txt",
     }
     _ANNOTATION = {
-        400: "https://s3.amazonaws.com/kinetics/400/annotations/{split}.csv",
-        600: "https://s3.amazonaws.com/kinetics/600/annotations/{split}.txt",
-        700: "https://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csv",
+        "400": "https://s3.amazonaws.com/kinetics/400/annotations/{split}.csv",
+        "600": "https://s3.amazonaws.com/kinetics/600/annotations/{split}.txt",
+        "700": "https://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csv",
     }
 
     def __init__(
@@ -108,21 +107,16 @@ def __init__(
 
         # TODO: support test
         assert split in ["train", "val"]
-        assert n_classes in [400, 700]
-        self.n_classes = n_classes
+        assert num_classes in ["400", "600", "700"]
+        self.n_classes = num_classes
         self.extensions = extensions
         self._num_download_workers = _num_download_workers
 
-        # set up self.root and self.split
         self._set_up_paths(root, split)
 
-        # load annotation files
         if annotation_path is not None:
-            self.annotations = pd.read_csv(annotation_path)
-        else:
-            self.annotations = pd.read_csv(
-                self._ANNOTATION[self.n_classes].format(split=self.split)
-            )
+            self.annotations = annotation_path
+            
 
         if download:
             self.download_and_process_videos()
@@ -174,7 +168,7 @@ def download_and_process_videos(self) -> None:
         in the expected format
         """
         tic = time.time()
-        _ = self._download_videos()
+        self._download_videos()
         toc = time.time()
         print("Elapsed time for downloading in mins ", (toc - tic) / 60)
         self._make_ds_structure()
@@ -182,7 +176,7 @@ def download_and_process_videos(self) -> None:
         print("Elapsed time for processing in mins ", (toc2 - toc) / 60)
         print("Elapsed time overall in mins ", (toc2 - tic) / 60)
 
-    def _download_videos(self) -> int:
+    def _download_videos(self) -> None:
         """download tarballs containing the video to 
         "tars" folder and extract them into the _split_ folder
         where split is one of the official dataset splits.
@@ -202,17 +196,21 @@ def _download_videos(self) -> int:
         )
         kinetics_dir, _ = path.split(self.root)
         tar_path = path.join(kinetics_dir, "tars")
+        annotation_path = path.join(kinetics_dir, "annotations")
+
+        # download annotations
+        download_url(self._ANNOTATION[self.n_classes].format(split=self.split), annotation_path)
+        self.annotations = os.path.join(annotation_path, f"{self.split}.csv")
 
-        if self._num_download_workers > 1:
+        if self._num_download_workers == 1:
             for line in file_url:
                 line = str(line.decode("utf-8")).replace("\n", "")
-                dl_wrap(tar_path, self.root, line)
+                download_and_extract_archive(line, tar_path, self.root)
         else:
             part = partial(_dl_wrap, tar_path, self.root)
             lines = [str(line.decode("utf-8")).replace("\n", "") for line in file_url]
             poolproc = Pool(self._num_download_workers)
             poolproc.map(part, lines)
-        return 0
 
 
 
@@ -228,27 +226,19 @@ def _make_ds_structure(self):
             │   ├── clip1.avi
 
         """
-        for file in os.listdir(self.root):
-            if file.endswith(self.extensions):
-                ytid = file[:11]
-                try:
-                    df = self.annotations[self.annotations.youtube_id == ytid]
-                    label = (
-                        df.label.item()
-                        .replace(" ", "_")
-                        .replace("'", "")
-                        .replace("(", "")
-                        .replace(")", "")
-                    )
-                    os.makedirs(os.path.join(self.root, label), exist_ok=True)
+        file_tmp = "{ytid}_{start:06}_{end:06}.mp4"
+        with open(self.annotations) as csvfile:
+            reader = csv.DictReader(csvfile)
+            for row in reader:
+                f = file_tmp.format(ytid=row['youtube_id'],start=int(row['time_start']), end=int(row['time_end']))
+                label = row["label"].replace(" ", "_").replace("'", "").replace("(", "").replace(")", "")
+                os.makedirs(os.path.join(self.root, label), exist_ok=True)
+                existing_file = os.path.join(self.root, f)
+                if os.path.isfile(existing_file):
                     os.replace(
-                        os.path.join(self.root, file),
-                        os.path.join(self.root, label, file),
-                    )
-                except Exception as error:
-                    warnings.warn(
-                        f"Unexpected error while processing {ytid}: {error}"
-                    )
+                            existing_file,
+                            os.path.join(self.root, label, f),
+                        )
 
     @property
     def metadata(self):

From 29a4f038d3bcc3737a149f9938f86629f7c3dffb Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Thu, 22 Apr 2021 12:43:01 -0500
Subject: [PATCH 06/61] Lint

---
 torchvision/datasets/kinetics.py | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index a97a2dcb29e..ee4909a43db 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -19,6 +19,7 @@
 def _dl_wrap(tarpath, videopath, line):
     download_and_extract_archive(line, tarpath, videopath)
 
+
 class Kinetics(VisionDataset):
     """` Generic Kinetics <https://deepmind.com/research/open-source/open-source-datasets/kinetics/>`_
     dataset.
@@ -116,7 +117,6 @@ def __init__(
 
         if annotation_path is not None:
             self.annotations = annotation_path
-            
 
         if download:
             self.download_and_process_videos()
@@ -199,7 +199,9 @@ def _download_videos(self) -> None:
         annotation_path = path.join(kinetics_dir, "annotations")
 
         # download annotations
-        download_url(self._ANNOTATION[self.n_classes].format(split=self.split), annotation_path)
+        download_url(
+            self._ANNOTATION[self.n_classes].format(split=self.split), annotation_path
+        )
         self.annotations = os.path.join(annotation_path, f"{self.split}.csv")
 
         if self._num_download_workers == 1:
@@ -212,8 +214,6 @@ def _download_videos(self) -> None:
             poolproc = Pool(self._num_download_workers)
             poolproc.map(part, lines)
 
-
-
     def _make_ds_structure(self):
         """move videos from 
         root/
@@ -230,15 +230,24 @@ def _make_ds_structure(self):
         with open(self.annotations) as csvfile:
             reader = csv.DictReader(csvfile)
             for row in reader:
-                f = file_tmp.format(ytid=row['youtube_id'],start=int(row['time_start']), end=int(row['time_end']))
-                label = row["label"].replace(" ", "_").replace("'", "").replace("(", "").replace(")", "")
+                f = file_tmp.format(
+                    ytid=row["youtube_id"],
+                    start=int(row["time_start"]),
+                    end=int(row["time_end"]),
+                )
+                label = (
+                    row["label"]
+                    .replace(" ", "_")
+                    .replace("'", "")
+                    .replace("(", "")
+                    .replace(")", "")
+                )
                 os.makedirs(os.path.join(self.root, label), exist_ok=True)
                 existing_file = os.path.join(self.root, f)
                 if os.path.isfile(existing_file):
                     os.replace(
-                            existing_file,
-                            os.path.join(self.root, label, f),
-                        )
+                        existing_file, os.path.join(self.root, label, f),
+                    )
 
     @property
     def metadata(self):
@@ -327,7 +336,7 @@ def __init__(
 
         super(Kinetics400, self).__init__(
             root=root,
-            n_classes=400,
+            num_classes="400",
             frame_rate=frame_rate,
             step_between_clips=step_between_clips,
             frames_per_clip=frames_per_clip,

From 8cd5209be9d92208ab540c3c8a6dca24bbcd2c78 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Thu, 22 Apr 2021 12:51:29 -0500
Subject: [PATCH 07/61] Lint

---
 torchvision/datasets/kinetics.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index ee4909a43db..77a767b43c9 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -36,7 +36,7 @@ class Kinetics(VisionDataset):
     frames in a video might be present.
 
     Args:
-        root (string): Root directory of the (split of the) Kinetics Dataset. 
+        root (string): Root directory of the (split of the) Kinetics Dataset.
             Directory should be structured as follows:
             .. code::
 
@@ -49,7 +49,7 @@ class Kinetics(VisionDataset):
                     ├── clipx.avi
                     └── ...
             If the split is not defined, it is appended using the split argument.
-        n_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700 
+        n_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700
         split (str): split of the dataset to consider; currently supports ["train", "val"]
         frame_rate (float): If not None, interpolate different frame rate for each clip.
         frames_per_clip (int): number of frames in a clip
@@ -68,7 +68,7 @@ class Kinetics(VisionDataset):
             - audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
               and `L` is the number of points in torch.float tensor
             - label (int): class of the video clip
-    
+
     Raises:
         RuntimeError: If ``download is True`` and the image archive is already extracted.
     """
@@ -146,7 +146,7 @@ def __init__(
 
     def _set_up_paths(self, root, split) -> None:
         """Sets up self.root and self.split to avoid confusion.
-        Split in the root (e.g. kinetics/val) overrides the setting in 
+        Split in the root (e.g. kinetics/val) overrides the setting in
         the split.
         """
         self.split = split
@@ -177,7 +177,7 @@ def download_and_process_videos(self) -> None:
         print("Elapsed time overall in mins ", (toc2 - tic) / 60)
 
     def _download_videos(self) -> None:
-        """download tarballs containing the video to 
+        """download tarballs containing the video to
         "tars" folder and extract them into the _split_ folder
         where split is one of the official dataset splits.
 
@@ -215,11 +215,11 @@ def _download_videos(self) -> None:
             poolproc.map(part, lines)
 
     def _make_ds_structure(self):
-        """move videos from 
+        """move videos from
         root/
             ├── clip1.avi
             ├── clip2.avi
-        
+
         to the correct format as described below:
         root/
             ├── class1

From a6d2490023fb3f8ad711a87b6ad8a83046feafa7 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Thu, 22 Apr 2021 13:40:39 -0500
Subject: [PATCH 08/61] fixing unittest

---
 torchvision/datasets/kinetics.py | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 77a767b43c9..95aaffbf9d7 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -113,7 +113,8 @@ def __init__(
         self.extensions = extensions
         self._num_download_workers = _num_download_workers
 
-        self._set_up_paths(root, split)
+        self.root = root
+        self.split = split
 
         if annotation_path is not None:
             self.annotations = annotation_path
@@ -144,24 +145,6 @@ def __init__(
         )
         self.transform = transform
 
-    def _set_up_paths(self, root, split) -> None:
-        """Sets up self.root and self.split to avoid confusion.
-        Split in the root (e.g. kinetics/val) overrides the setting in
-        the split.
-        """
-        self.split = split
-        if path.basename(root) == split:
-            self.root = root
-        elif path.basename(root) in ["train", "val"]:
-            self.root = root
-            self.split = path.basename(root)
-            warnings.warn(
-                f"Root {root} points to a different split than {split}."
-                f"Assigning self.split to {self.split}."
-            )
-        else:
-            self.root = path.join(root, split)
-
     def download_and_process_videos(self) -> None:
         """
         downloads all the videos to the _root_ folder

From 93d1444dec084f412912d7522aa01544220bb74f Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Thu, 22 Apr 2021 22:50:05 +0100
Subject: [PATCH 09/61] Minor comments removal

---
 torchvision/datasets/kinetics.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 95aaffbf9d7..3bf4d9f9bb0 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -121,10 +121,8 @@ def __init__(
 
         if download:
             self.download_and_process_videos()
-        # init folder dataset at the end
         super().__init__(self.root)
 
-        # and then figure out the rest
         self.classes, class_to_idx = find_classes(self.root)
         self.samples = make_dataset(
             self.root, class_to_idx, extensions, is_valid_file=None

From 9e3d3f3a0f39c8bfc187eff4e79a9920aed29a89 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 10:26:11 +0100
Subject: [PATCH 10/61] pmeier comments

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 3bf4d9f9bb0..fa3b1f25144 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -179,7 +179,6 @@ def _download_videos(self) -> None:
         tar_path = path.join(kinetics_dir, "tars")
         annotation_path = path.join(kinetics_dir, "annotations")
 
-        # download annotations
         download_url(
             self._ANNOTATION[self.n_classes].format(split=self.split), annotation_path
         )

From 139ec6d0623e341964fd713e895e4468c7f4dea0 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 04:30:29 -0500
Subject: [PATCH 11/61] remove asserts

---
 torchvision/datasets/kinetics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 95aaffbf9d7..60b758f7f68 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -10,7 +10,7 @@
 from functools import partial
 from multiprocessing import Pool
 
-from .utils import download_and_extract_archive, download_url
+from .utils import download_and_extract_archive, download_url, verify_str_arg
 from .folder import find_classes, make_dataset
 from .video_utils import VideoClips
 from .vision import VisionDataset
@@ -107,8 +107,8 @@ def __init__(
     ) -> None:
 
         # TODO: support test
-        assert split in ["train", "val"]
-        assert num_classes in ["400", "600", "700"]
+        verify_str_arg(split, arg="split", valid_values=['train', 'val'])
+        verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"])
         self.n_classes = num_classes
         self.extensions = extensions
         self._num_download_workers = _num_download_workers

From 33a9f98644203a174db10c69a9008f931d287652 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 04:32:55 -0500
Subject: [PATCH 12/61] address pmeier formatting changes

---
 torchvision/datasets/kinetics.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 60b758f7f68..a241fac0d79 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -73,12 +73,12 @@ class Kinetics(VisionDataset):
         RuntimeError: If ``download is True`` and the image archive is already extracted.
     """
 
-    _FILES = {
+    _TAR_URLS = {
         "400": "https://s3.amazonaws.com/kinetics/400/{split}/k400_{split}_path.txt",
         "600": "https://s3.amazonaws.com/kinetics/600/{split}/k600_{split}_path.txt",
         "700": "https://s3.amazonaws.com/kinetics/700_2020/{split}/k700_2020_{split}_path.txt",
     }
-    _ANNOTATION = {
+    _ANNOTATION_URLS = {
         "400": "https://s3.amazonaws.com/kinetics/400/annotations/{split}.csv",
         "600": "https://s3.amazonaws.com/kinetics/600/annotations/{split}.txt",
         "700": "https://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csv",
@@ -126,9 +126,8 @@ def __init__(
 
         # and then figure out the rest
         self.classes, class_to_idx = find_classes(self.root)
-        self.samples = make_dataset(
-            self.root, class_to_idx, extensions, is_valid_file=None
-        )
+        self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None)
+
         video_list = [x[0] for x in self.samples]
         self.video_clips = VideoClips(
             video_list,
@@ -175,7 +174,7 @@ def _download_videos(self) -> None:
             )
 
         file_url = urllib.request.urlopen(
-            self._FILES[self.n_classes].format(split=self.split)
+            self._TAR_URLS[self.n_classes].format(split=self.split)
         )
         kinetics_dir, _ = path.split(self.root)
         tar_path = path.join(kinetics_dir, "tars")
@@ -183,7 +182,7 @@ def _download_videos(self) -> None:
 
         # download annotations
         download_url(
-            self._ANNOTATION[self.n_classes].format(split=self.split), annotation_path
+            self._ANNOTATION_URLS[self.n_classes].format(split=self.split), annotation_path
         )
         self.annotations = os.path.join(annotation_path, f"{self.split}.csv")
 

From abdd2f6346a5159d7212b9c985d016477fec9f42 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 10:33:11 +0100
Subject: [PATCH 13/61] address pmeier changes

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index fa3b1f25144..6b10acfa3f5 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -144,10 +144,7 @@ def __init__(
         self.transform = transform
 
     def download_and_process_videos(self) -> None:
-        """
-        downloads all the videos to the _root_ folder
-        in the expected format
-        """
+        """Downloads all the videos to the _root_ folder in the expected format."""
         tic = time.time()
         self._download_videos()
         toc = time.time()

From 1460886f5b8b106021f92ad915915c0505c4305c Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 10:34:14 +0100
Subject: [PATCH 14/61] pmeier changes

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 6b10acfa3f5..ebc8b1cd178 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -306,9 +306,8 @@ def __init__(
         _audio_channels=0,
     ):
         warnings.warn(
-            "torchvision now supports multiple versions of Kinetics"
-            "datasets, available via Kinetics class with a separate "
-            "n_classes parameter. This function might get deprecated in the future."
+            "Kinetics400 is deprecated and will be removed in a future release."
+            "It was replaced by Kinetics(..., n_classes="400")".
         )
 
         super(Kinetics400, self).__init__(

From 7b069066d043f74d5e0dfaa1c25e251b05ad7a1b Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 04:35:19 -0500
Subject: [PATCH 15/61] rename n_classes to num_classes

---
 torchvision/datasets/kinetics.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index a241fac0d79..3ca02846e82 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -49,7 +49,7 @@ class Kinetics(VisionDataset):
                     ├── clipx.avi
                     └── ...
             If the split is not defined, it is appended using the split argument.
-        n_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700
+        num_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700
         split (str): split of the dataset to consider; currently supports ["train", "val"]
         frame_rate (float): If not None, interpolate different frame rate for each clip.
         frames_per_clip (int): number of frames in a clip
@@ -109,7 +109,7 @@ def __init__(
         # TODO: support test
         verify_str_arg(split, arg="split", valid_values=['train', 'val'])
         verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"])
-        self.n_classes = num_classes
+        self.num_classes = num_classes
         self.extensions = extensions
         self._num_download_workers = _num_download_workers
 
@@ -174,7 +174,7 @@ def _download_videos(self) -> None:
             )
 
         file_url = urllib.request.urlopen(
-            self._TAR_URLS[self.n_classes].format(split=self.split)
+            self._TAR_URLS[self.num_classes].format(split=self.split)
         )
         kinetics_dir, _ = path.split(self.root)
         tar_path = path.join(kinetics_dir, "tars")
@@ -182,7 +182,7 @@ def _download_videos(self) -> None:
 
         # download annotations
         download_url(
-            self._ANNOTATION_URLS[self.n_classes].format(split=self.split), annotation_path
+            self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path
         )
         self.annotations = os.path.join(annotation_path, f"{self.split}.csv")
 
@@ -313,7 +313,7 @@ def __init__(
         warnings.warn(
             "torchvision now supports multiple versions of Kinetics"
             "datasets, available via Kinetics class with a separate "
-            "n_classes parameter. This function might get deprecated in the future."
+            "num_classes parameter. This function might get deprecated in the future."
         )
 
         super(Kinetics400, self).__init__(

From e76f4aba1bbaf4b07b5098f6a0c9d54e697f47bd Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 04:38:36 -0500
Subject: [PATCH 16/61] formatting changes

---
 torchvision/datasets/kinetics.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 57fd72aed85..f641b25d550 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -44,6 +44,7 @@ class Kinetics(VisionDataset):
                 ├── class1
                 │   ├── clip1.avi
                 │   ├── clip2.avi
+                │   ├── clip3.mp4
                 │   └── ...
                 └── class2
                     ├── clipx.avi
@@ -269,6 +270,7 @@ class Kinetics400(Kinetics):
                 ├── class1
                 │   ├── clip1.avi
                 │   ├── clip2.avi
+                │   ├── clip3.mp4
                 │   └── ...
                 └── class2
                     ├── clipx.avi
@@ -306,7 +308,7 @@ def __init__(
     ):
         warnings.warn(
             "Kinetics400 is deprecated and will be removed in a future release."
-            "It was replaced by Kinetics(..., num_classes="400")".
+            "It was replaced by Kinetics(..., num_classes=\"400\")".
         )
 
         super(Kinetics400, self).__init__(

From 0a8f2164c8c130f512a2e08a9ddf5b9a8520296c Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 04:41:38 -0500
Subject: [PATCH 17/61] doc change to add ".mp4" to backported class

---
 torchvision/datasets/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index f641b25d550..1c654cb969b 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -296,7 +296,7 @@ def __init__(
         frames_per_clip,
         step_between_clips=1,
         frame_rate=None,
-        extensions=("avi",),
+        extensions=("avi", "mp4"),
         transform=None,
         _precomputed_metadata=None,
         num_workers=1,

From 94a40aab6e5c281fb4b1fc73126583518575a14f Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 04:47:43 -0500
Subject: [PATCH 18/61] formatting to correct line length

---
 torchvision/datasets/kinetics.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 1c654cb969b..4212004c05c 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -155,13 +155,11 @@ def download_and_process_videos(self) -> None:
         print("Elapsed time overall in mins ", (toc2 - tic) / 60)
 
     def _download_videos(self) -> None:
-        """download tarballs containing the video to
-        "tars" folder and extract them into the _split_ folder
-        where split is one of the official dataset splits.
+        """download tarballs containing the video to "tars" folder and extract them into the _split_ folder where
+        split is one of the official dataset splits.
 
         Raises:
-            RuntimeError: if download folder exists, break to prevent
-              downloading entire dataset again.
+            RuntimeError: if download folder exists, break to prevent downloading entire dataset again.
         """
         if path.exists(self.root):
             raise RuntimeError(

From c585a5f62bf6af9902f5d5dd1377120441f46685 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 04:56:45 -0500
Subject: [PATCH 19/61] adding **kwargs to Kinetics400 class

---
 torchvision/datasets/kinetics.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 4212004c05c..742f18c4d20 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -42,12 +42,12 @@ class Kinetics(VisionDataset):
 
                 root/
                 ├── class1
-                │   ├── clip1.avi
-                │   ├── clip2.avi
+                │   ├── clip1.mp4
+                │   ├── clip2.mp4
                 │   ├── clip3.mp4
                 │   └── ...
                 └── class2
-                    ├── clipx.avi
+                    ├── clipx.mp4
                     └── ...
             If the split is not defined, it is appended using the split argument.
         num_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700
@@ -303,6 +303,7 @@ def __init__(
         _video_min_dimension=0,
         _audio_samples=0,
         _audio_channels=0,
+        **kwargs
     ):
         warnings.warn(
             "Kinetics400 is deprecated and will be removed in a future release."

From 8cacd804513d5d1df79288cd90dc2a1ed417da4f Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 06:08:07 -0500
Subject: [PATCH 20/61] remove urlib request and download the file directly

---
 torchvision/datasets/kinetics.py | 35 ++++++++++++++++----------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 742f18c4d20..5b750679b7c 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -1,4 +1,3 @@
-import urllib
 import time
 import os
 import warnings
@@ -167,25 +166,26 @@ def _download_videos(self) -> None:
                 f"delete the directory."
             )
 
-        file_url = urllib.request.urlopen(
-            self._TAR_URLS[self.num_classes].format(split=self.split)
-        )
-        kinetics_dir, _ = path.split(self.root)
+        kinetics_dir, split = path.split(self.root)
+        assert split == self.split
         tar_path = path.join(kinetics_dir, "tars")
         annotation_path = path.join(kinetics_dir, "annotations")
+        file_list_path = path.join(kinetics_dir, "files")
 
-        download_url(
-            self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path
-        )
-        self.annotations = os.path.join(annotation_path, f"{self.split}.csv")
+        split_url = self._TAR_URLS[self.num_classes].format(split=self.split)
+        download_url(split_url, file_list_path)
+        list_video_urls = open(path.join(file_list_path, path.basename(split_url)), "r")
+
+        download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path)
+        self.annotations = path.join(annotation_path, f"{self.split}.csv")
 
         if self._num_download_workers == 1:
-            for line in file_url:
-                line = str(line.decode("utf-8")).replace("\n", "")
+            for line in list_video_urls.readlines():
+                line = str(line).replace("\n", "")
                 download_and_extract_archive(line, tar_path, self.root)
         else:
             part = partial(_dl_wrap, tar_path, self.root)
-            lines = [str(line.decode("utf-8")).replace("\n", "") for line in file_url]
+            lines = [str(line).replace("\n", "") for line in list_video_urls.readlines()]
             poolproc = Pool(self._num_download_workers)
             poolproc.map(part, lines)
 
@@ -217,11 +217,11 @@ def _make_ds_structure(self):
                     .replace("(", "")
                     .replace(")", "")
                 )
-                os.makedirs(os.path.join(self.root, label), exist_ok=True)
-                existing_file = os.path.join(self.root, f)
-                if os.path.isfile(existing_file):
+                os.makedirs(path.join(self.root, label), exist_ok=True)
+                existing_file = path.join(self.root, f)
+                if path.isfile(existing_file):
                     os.replace(
-                        existing_file, os.path.join(self.root, label, f),
+                        existing_file, path.join(self.root, label, f),
                     )
 
     @property
@@ -307,8 +307,7 @@ def __init__(
     ):
         warnings.warn(
             "Kinetics400 is deprecated and will be removed in a future release."
-            "It was replaced by Kinetics(..., num_classes=\"400\")".
-        )
+            "It was replaced by Kinetics(..., num_classes=\"400\").")
 
         super(Kinetics400, self).__init__(
             root=root,

From 802f8f9f95315211b03ae3046554e002aeb3aa04 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Fri, 30 Apr 2021 06:15:41 -0500
Subject: [PATCH 21/61] annotations and files can be already downloaded

---
 torchvision/datasets/kinetics.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 5b750679b7c..565a9225a65 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -173,10 +173,13 @@ def _download_videos(self) -> None:
         file_list_path = path.join(kinetics_dir, "files")
 
         split_url = self._TAR_URLS[self.num_classes].format(split=self.split)
-        download_url(split_url, file_list_path)
-        list_video_urls = open(path.join(file_list_path, path.basename(split_url)), "r")
+        split_url_filepath = path.join(file_list_path, path.basename(split_url))
+        if not path.isfile(split_url_filepath):
+            download_url(split_url, file_list_path)
+        list_video_urls = open(split_url_filepath, "r")
 
-        download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path)
+        if not path.isfile(path.join(annotation_path, f"{self.split}.csv")):
+            download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path)
         self.annotations = path.join(annotation_path, f"{self.split}.csv")
 
         if self._num_download_workers == 1:

From af70e5f3493b156f6a1c57f6c922ff96133ae29c Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 4 May 2021 08:50:45 -0500
Subject: [PATCH 22/61] test fix

---
 test/test_datasets.py            | 24 ++++++++++++++++++++++++
 torchvision/datasets/kinetics.py |  4 +++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index f28885d064d..e8a427900bb 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -1064,8 +1064,32 @@ def test_not_found_or_corrupted(self):
             super().test_not_found_or_corrupted()
 
 
+class KineticsTestCase(datasets_utils.VideoDatasetTestCase):
+    DATASET_CLASS = datasets.Kinetics
+    # DEFAULT_CONFIG = {"frames_per_clip": 1}
+    # ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
+    #     split=("train", "val")
+    # )
+
+    
+    def inject_fake_data(self, tmpdir, config):
+        classes = ("Abseiling", "Zumba")
+        num_videos_per_class = 2
+        #tmpdir = pathlib.Path(tmpdir) / config['split']
+        digits = string.ascii_letters + string.digits + "-_"
+        for cls in classes:
+            datasets_utils.create_video_folder(
+                tmpdir,
+                cls,
+                lambda _: f"{datasets_utils.create_random_string(11, digits)}.mp4",
+                num_videos_per_class,
+            )
+        # ret = {'num_examples': num_videos_per_class * len(classes)}
+        return num_videos_per_class * len(classes)
+
 class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase):
     DATASET_CLASS = datasets.Kinetics400
+    # DEFAULT_CONFIG = {"frames_per_clip": 1}
 
     def inject_fake_data(self, tmpdir, config):
         classes = ("Abseiling", "Zumba")
diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 565a9225a65..ffc2a1a4350 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -87,10 +87,10 @@ class Kinetics(VisionDataset):
     def __init__(
         self,
         root: str,
+        frames_per_clip: int,
         num_classes: str = "400",
         split: str = "train",
         frame_rate: float = None,
-        frames_per_clip: int = 5,
         step_between_clips: int = 1,
         annotation_path: str = None,
         transform: Optional[Callable] = None,
@@ -121,6 +121,8 @@ def __init__(
 
         if download:
             self.download_and_process_videos()
+
+        print("HERE")
         super().__init__(self.root)
 
         self.classes, class_to_idx = find_classes(self.root)

From 6ec32534d46d7e7a59b6277c8e3be4e7880b73e2 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 4 May 2021 16:08:19 +0200
Subject: [PATCH 23/61] add download tests for Kinetics

---
 test/test_datasets_download.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py
index 6ff3a33bcc9..3a11f9962de 100644
--- a/test/test_datasets_download.py
+++ b/test/test_datasets_download.py
@@ -391,6 +391,25 @@ def widerface():
     )
 
 
+def kinetics():
+    return itertools.chain(
+        *[
+            collect_download_configs(
+                lambda: datasets.Kinetics(
+                    path.join(ROOT, "Kinetics", split),
+                    frames_per_clip=1,
+                    num_classes=num_classes,
+                    split=split,
+                    download=True,
+                ),
+                name=f"Kinetics, {num_classes}, {split}",
+                file="kinetics",
+            )
+            for num_classes, split in itertools.product(("400", "600", "700"), ("train", "val"))
+        ]
+    )
+
+
 def make_parametrize_kwargs(download_configs):
     argvalues = []
     ids = []
@@ -426,6 +445,7 @@ def make_parametrize_kwargs(download_configs):
             usps(),
             celeba(),
             widerface(),
+            kinetics(),
         )
     )
 )

From b84b298553b06916fdb37d015a184633292ac340 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 4 May 2021 12:57:33 -0500
Subject: [PATCH 24/61] users now dont need to provide full path within the
 root for new Kinetics dataset

---
 test/test_datasets.py            | 20 +++++++++++-------
 test/test_datasets_download.py   |  2 +-
 torchvision/datasets/kinetics.py | 35 ++++++++++++++++++--------------
 3 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index e8a427900bb..07fe89cdfdb 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -1066,16 +1066,15 @@ def test_not_found_or_corrupted(self):
 
 class KineticsTestCase(datasets_utils.VideoDatasetTestCase):
     DATASET_CLASS = datasets.Kinetics
-    # DEFAULT_CONFIG = {"frames_per_clip": 1}
-    # ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
-    #     split=("train", "val")
-    # )
+    ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
+        split=("train", "val"), num_classes=("400", "600", "700")
+    )
 
     
     def inject_fake_data(self, tmpdir, config):
         classes = ("Abseiling", "Zumba")
         num_videos_per_class = 2
-        #tmpdir = pathlib.Path(tmpdir) / config['split']
+        tmpdir = pathlib.Path(tmpdir) / config['split']
         digits = string.ascii_letters + string.digits + "-_"
         for cls in classes:
             datasets_utils.create_video_folder(
@@ -1084,18 +1083,25 @@ def inject_fake_data(self, tmpdir, config):
                 lambda _: f"{datasets_utils.create_random_string(11, digits)}.mp4",
                 num_videos_per_class,
             )
-        # ret = {'num_examples': num_videos_per_class * len(classes)}
         return num_videos_per_class * len(classes)
 
 class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase):
     DATASET_CLASS = datasets.Kinetics400
-    # DEFAULT_CONFIG = {"frames_per_clip": 1}
+
+    def dataset_args(self, tmpdir, config):
+        # note: train is here hardcoded by default bc we expect the user to supply it,
+        # but that requirement have changed in subsequent version of the dataset
+        root = pathlib.Path(tmpdir) / "train"
+        return root, 1
 
     def inject_fake_data(self, tmpdir, config):
         classes = ("Abseiling", "Zumba")
         num_videos_per_class = 2
 
         digits = string.ascii_letters + string.digits + "-_"
+        # note: train is here hardcoded by default bc we expect the user to supply it,
+        # but that requirement have changed in subsequent version of the dataset
+        tmpdir = pathlib.Path(tmpdir) / "train"
         for cls in classes:
             datasets_utils.create_video_folder(
                 tmpdir,
diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py
index 3a11f9962de..f3213ddd31c 100644
--- a/test/test_datasets_download.py
+++ b/test/test_datasets_download.py
@@ -396,7 +396,7 @@ def kinetics():
         *[
             collect_download_configs(
                 lambda: datasets.Kinetics(
-                    path.join(ROOT, "Kinetics", split),
+                    path.join(ROOT, f"Kinetics_{num_classes}"),
                     frames_per_clip=1,
                     num_classes=num_classes,
                     split=split,
diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index ffc2a1a4350..3b6371f9c13 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -35,20 +35,21 @@ class Kinetics(VisionDataset):
     frames in a video might be present.
 
     Args:
-        root (string): Root directory of the (split of the) Kinetics Dataset.
+        root (string): Root directory of the Kinetics Dataset.
             Directory should be structured as follows:
             .. code::
 
                 root/
-                ├── class1
-                │   ├── clip1.mp4
-                │   ├── clip2.mp4
-                │   ├── clip3.mp4
-                │   └── ...
-                └── class2
-                    ├── clipx.mp4
-                    └── ...
-            If the split is not defined, it is appended using the split argument.
+                ├── split
+                │   ├──  class1
+                │   │   ├──  clip1.mp4
+                │   │   ├──  clip2.mp4
+                │   │   ├──  clip3.mp4
+                │   │   ├──  ...
+                │   ├──  class2
+                │   │   ├──   clipx.mp4
+                │   │    └── ...
+            Split is appended using the split argument.
         num_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700
         split (str): split of the dataset to consider; currently supports ["train", "val"]
         frame_rate (float): If not None, interpolate different frame rate for each clip.
@@ -113,7 +114,10 @@ def __init__(
         self.extensions = extensions
         self._num_download_workers = _num_download_workers
 
-        self.root = root
+        if path.basename(root) != split:
+            self.root = path.join(root, split)
+        else:
+            self.root = root
         self.split = split
 
         if annotation_path is not None:
@@ -122,7 +126,6 @@ def __init__(
         if download:
             self.download_and_process_videos()
 
-        print("HERE")
         super().__init__(self.root)
 
         self.classes, class_to_idx = find_classes(self.root)
@@ -167,9 +170,9 @@ def _download_videos(self) -> None:
                 f"The directory {self.root} already exists. If you want to re-download or re-extract the images, "
                 f"delete the directory."
             )
-
+        # check that the assignment was made properly
         kinetics_dir, split = path.split(self.root)
-        assert split == self.split
+        assert split == self.split, 'File folder assignment not done properly'
         tar_path = path.join(kinetics_dir, "tars")
         annotation_path = path.join(kinetics_dir, "annotations")
         file_list_path = path.join(kinetics_dir, "files")
@@ -314,8 +317,10 @@ def __init__(
             "Kinetics400 is deprecated and will be removed in a future release."
             "It was replaced by Kinetics(..., num_classes=\"400\").")
 
+        kinetics_dir, split = path.split(root)
         super(Kinetics400, self).__init__(
-            root=root,
+            root=kinetics_dir,
+            split=split,
             num_classes="400",
             frame_rate=frame_rate,
             step_between_clips=step_between_clips,

From d7f14d0e6652b1cefd65eef500db07eccd3f889b Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 4 May 2021 13:22:43 -0500
Subject: [PATCH 25/61] linter

---
 test/test_datasets.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index 07fe89cdfdb..87c62e9dd1f 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -961,7 +961,6 @@ def _create_annotation_files(self, root, video_files, fold, train):
         other_annotations.remove(current_annotation)
         for name in other_annotations:
             self._create_annotation_file(root, name, other_videos)
-
         return len(current_videos)
 
     def _annotation_file_name(self, fold, train):
@@ -1070,7 +1069,6 @@ class KineticsTestCase(datasets_utils.VideoDatasetTestCase):
         split=("train", "val"), num_classes=("400", "600", "700")
     )
 
-    
     def inject_fake_data(self, tmpdir, config):
         classes = ("Abseiling", "Zumba")
         num_videos_per_class = 2
@@ -1085,6 +1083,7 @@ def inject_fake_data(self, tmpdir, config):
             )
         return num_videos_per_class * len(classes)
 
+
 class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase):
     DATASET_CLASS = datasets.Kinetics400
 

From 96e2becb244ed3fd0e608e6d3b9d3a5c0ba140b0 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 5 May 2021 10:44:40 +0200
Subject: [PATCH 26/61] Update test/test_datasets_download.py

---
 test/test_datasets_download.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py
index 9b0f8df4504..1defa9c4eca 100644
--- a/test/test_datasets_download.py
+++ b/test/test_datasets_download.py
@@ -407,7 +407,8 @@ def kinetics():
                 file="kinetics",
             )
             for num_classes, split in itertools.product(("400", "600", "700"), ("train", "val"))
-
+        ]
+    )
 def kitti():
     return itertools.chain(
         *[

From 20dc75d3d344392c4a6e360874417b61e6c4d122 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 14:56:20 +0100
Subject: [PATCH 27/61] Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 3b6371f9c13..aa58bd5d765 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -71,7 +71,7 @@ class Kinetics(VisionDataset):
             - label (int): class of the video clip
 
     Raises:
-        RuntimeError: If ``download is True`` and the image archive is already extracted.
+        RuntimeError: If ``download is True`` and the video archives are already extracted.
     """
 
     _TAR_URLS = {

From 5ea1232864a27b62f7fc8fbfaa6b6aa209bc5c29 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 09:49:03 -0500
Subject: [PATCH 28/61] revert whitespace (3680#discussion_r626382842)

---
 test/test_datasets.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index 7d82fee9262..41defd2a631 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -841,6 +841,7 @@ def _create_annotation_files(self, root, video_files, fold, train):
         other_annotations.remove(current_annotation)
         for name in other_annotations:
             self._create_annotation_file(root, name, other_videos)
+
         return len(current_videos)
 
     def _annotation_file_name(self, fold, train):

From 607a3cb293879fbcccebb6a0e67dca19cd0ed16a Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 09:52:28 -0500
Subject: [PATCH 29/61] addressing annotation_path parameter which is
 unnecessary

---
 torchvision/datasets/kinetics.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index aa58bd5d765..6cc2c0072c5 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -55,7 +55,6 @@ class Kinetics(VisionDataset):
         frame_rate (float): If not None, interpolate different frame rate for each clip.
         frames_per_clip (int): number of frames in a clip
         step_between_clips (int): number of frames between each clip
-        annotation_path (str): path to official Kinetics annotation file.
         transform (callable, optional): A function/transform that  takes in a TxHxWxC video
             and returns a transformed version.
         download (bool): Download the official version of the dataset to root folder.
@@ -93,7 +92,6 @@ def __init__(
         split: str = "train",
         frame_rate: float = None,
         step_between_clips: int = 1,
-        annotation_path: str = None,
         transform: Optional[Callable] = None,
         extensions=("avi", "mp4"),
         download: bool = False,
@@ -120,9 +118,6 @@ def __init__(
             self.root = root
         self.split = split
 
-        if annotation_path is not None:
-            self.annotations = annotation_path
-
         if download:
             self.download_and_process_videos()
 

From da586c65b8793a458ae4f4ac7627d4dcbc62bde9 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 15:53:06 +0100
Subject: [PATCH 30/61] Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index aa58bd5d765..e9499dfc522 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -130,7 +130,6 @@ def __init__(
 
         self.classes, class_to_idx = find_classes(self.root)
         self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None)
-
         video_list = [x[0] for x in self.samples]
         self.video_clips = VideoClips(
             video_list,

From fd2208b063e08f85a2fa432e70e5ec545a0fdb6b Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 15:54:41 +0100
Subject: [PATCH 31/61] Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index e9499dfc522..d0d19688121 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -321,17 +321,6 @@ def __init__(
             root=kinetics_dir,
             split=split,
             num_classes="400",
-            frame_rate=frame_rate,
-            step_between_clips=step_between_clips,
-            frames_per_clip=frames_per_clip,
-            extensions=extensions,
-            transform=transform,
-            _precomputed_metadata=_precomputed_metadata,
-            num_workers=num_workers,
-            _video_width=_video_width,
-            _video_height=_video_height,
-            _video_min_dimension=_video_min_dimension,
-            _audio_channels=_audio_channels,
-            _audio_samples=_audio_samples,
             download=False,
+            **kwargs,
         )

From 0dc04d3dcc175c4db11b335d51dd23cc577af947 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 15:55:12 +0100
Subject: [PATCH 32/61] kwargs update

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index d0d19688121..32083b92032 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -299,17 +299,6 @@ def __init__(
         self,
         root,
         frames_per_clip,
-        step_between_clips=1,
-        frame_rate=None,
-        extensions=("avi", "mp4"),
-        transform=None,
-        _precomputed_metadata=None,
-        num_workers=1,
-        _video_width=0,
-        _video_height=0,
-        _video_min_dimension=0,
-        _audio_samples=0,
-        _audio_channels=0,
         **kwargs
     ):
         warnings.warn(

From 2bdd820e2ad46570726d61ee335a279bb41561bb Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 09:56:20 -0500
Subject: [PATCH 33/61] expose num_download_workers as public

---
 torchvision/datasets/kinetics.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 2ede26e48fc..3c2541fd0f9 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -59,7 +59,7 @@ class Kinetics(VisionDataset):
             and returns a transformed version.
         download (bool): Download the official version of the dataset to root folder.
         num_workers (int): Use multiple workers for VideoClips creation
-        _num_download_workers (int): Use multiprocessing in order to speed up download.
+        num_download_workers (int): Use multiprocessing in order to speed up download.
 
     Returns:
         tuple: A 3-tuple with the following entries:
@@ -97,7 +97,7 @@ def __init__(
         download: bool = False,
         num_workers: int = 1,
         _precomputed_metadata=None,
-        _num_download_workers=1,
+        num_download_workers=1,
         _video_width=0,
         _video_height=0,
         _video_min_dimension=0,
@@ -110,7 +110,7 @@ def __init__(
         verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"])
         self.num_classes = num_classes
         self.extensions = extensions
-        self._num_download_workers = _num_download_workers
+        self.num_download_workers = num_download_workers
 
         if path.basename(root) != split:
             self.root = path.join(root, split)
@@ -181,14 +181,14 @@ def _download_videos(self) -> None:
             download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path)
         self.annotations = path.join(annotation_path, f"{self.split}.csv")
 
-        if self._num_download_workers == 1:
+        if self.num_download_workers == 1:
             for line in list_video_urls.readlines():
                 line = str(line).replace("\n", "")
                 download_and_extract_archive(line, tar_path, self.root)
         else:
             part = partial(_dl_wrap, tar_path, self.root)
             lines = [str(line).replace("\n", "") for line in list_video_urls.readlines()]
-            poolproc = Pool(self._num_download_workers)
+            poolproc = Pool(self.num_download_workers)
             poolproc.map(part, lines)
 
     def _make_ds_structure(self):

From 5640dd9ded41664910ea6d18b17de92d7a6597cc Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 09:58:53 -0500
Subject: [PATCH 34/61] swap os.isfile with check_integrity

---
 torchvision/datasets/kinetics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 3c2541fd0f9..1d88967b18a 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -9,7 +9,7 @@
 from functools import partial
 from multiprocessing import Pool
 
-from .utils import download_and_extract_archive, download_url, verify_str_arg
+from .utils import download_and_extract_archive, download_url, verify_str_arg, check_integrity
 from .folder import find_classes, make_dataset
 from .video_utils import VideoClips
 from .vision import VisionDataset
@@ -173,11 +173,11 @@ def _download_videos(self) -> None:
 
         split_url = self._TAR_URLS[self.num_classes].format(split=self.split)
         split_url_filepath = path.join(file_list_path, path.basename(split_url))
-        if not path.isfile(split_url_filepath):
+        if not check_integrity(split_url_filepath):
             download_url(split_url, file_list_path)
         list_video_urls = open(split_url_filepath, "r")
 
-        if not path.isfile(path.join(annotation_path, f"{self.split}.csv")):
+        if not check_integrity(path.join(annotation_path, f"{self.split}.csv")):
             download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path)
         self.annotations = path.join(annotation_path, f"{self.split}.csv")
 

From 9ef70da53e1cb68c583687023c73e20f96c1580b Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 09:59:56 -0500
Subject: [PATCH 35/61] nit on private things

---
 torchvision/datasets/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 1d88967b18a..c6dcb6005cf 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -95,9 +95,9 @@ def __init__(
         transform: Optional[Callable] = None,
         extensions=("avi", "mp4"),
         download: bool = False,
+        num_download_workers=1,
         num_workers: int = 1,
         _precomputed_metadata=None,
-        num_download_workers=1,
         _video_width=0,
         _video_height=0,
         _video_min_dimension=0,

From b7b81b17b9d4dff3dcbe102730590c58df2d75ef Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 12:01:48 -0500
Subject: [PATCH 36/61] special case if there are no default arguments

---
 test/datasets_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 8077a03b910..f82e861bf64 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -637,7 +637,11 @@ def __init__(self, *args, **kwargs):
 
     def _set_default_frames_per_clip(self, inject_fake_data):
         argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__)
-        args_without_default = argspec.args[1:-len(argspec.defaults)]
+        # edge case if dataset doesn't have default parameters
+        if argspec.defaults:
+            args_without_default = argspec.args[1:-len(argspec.defaults)]
+        else:
+            args_without_default = argspec.args[1:]
         frames_per_clip_last = args_without_default[-1] == "frames_per_clip"
 
         @functools.wraps(inject_fake_data)

From 36bd2c70fb08846997cb5da228a5f6a82b2b25b6 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 12:02:37 -0500
Subject: [PATCH 37/61] revert changes to kinetics400 test case for BC

---
 test/test_datasets.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/test/test_datasets.py b/test/test_datasets.py
index 41defd2a631..bea2a2b80b9 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -968,20 +968,11 @@ def inject_fake_data(self, tmpdir, config):
 class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase):
     DATASET_CLASS = datasets.Kinetics400
 
-    def dataset_args(self, tmpdir, config):
-        # note: train is here hardcoded by default bc we expect the user to supply it,
-        # but that requirement have changed in subsequent version of the dataset
-        root = pathlib.Path(tmpdir) / "train"
-        return root, 1
-
     def inject_fake_data(self, tmpdir, config):
         classes = ("Abseiling", "Zumba")
         num_videos_per_class = 2
 
         digits = string.ascii_letters + string.digits + "-_"
-        # note: train is here hardcoded by default bc we expect the user to supply it,
-        # but that requirement have changed in subsequent version of the dataset
-        tmpdir = pathlib.Path(tmpdir) / "train"
         for cls in classes:
             datasets_utils.create_video_folder(
                 tmpdir,

From 2bda79c48637434ee7c36a0a25ed3f7ae17d3b6a Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Wed, 5 May 2021 12:03:32 -0500
Subject: [PATCH 38/61] add split_folder changes and support for legacy format

---
 torchvision/datasets/kinetics.py | 55 +++++++++++++++++---------------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index c6dcb6005cf..7e87d67083f 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -103,28 +103,35 @@ def __init__(
         _video_min_dimension=0,
         _audio_samples=0,
         _audio_channels=0,
+        **kwargs
     ) -> None:
 
         # TODO: support test
-        verify_str_arg(split, arg="split", valid_values=['train', 'val'])
+        verify_str_arg(split, arg="split", valid_values=['train', 'val', 'unknown'])
         verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"])
         self.num_classes = num_classes
         self.extensions = extensions
         self.num_download_workers = num_download_workers
 
-        if path.basename(root) != split:
-            self.root = path.join(root, split)
+        _use_legacy_structure = kwargs.get('_use_legacy_structure', False)
+        if _use_legacy_structure:
+            print("Using legacy structure")
+            self.root = root
+            self.split_folder = root
+            self.split = "unknown"
+            assert download == False, "Cannot download the videos using legacy_structure."
         else:
             self.root = root
-        self.split = split
+            self.split_folder = path.join(root, split)
+            self.split = split
 
         if download:
             self.download_and_process_videos()
 
-        super().__init__(self.root)
+        super().__init__(self.split_folder)
 
-        self.classes, class_to_idx = find_classes(self.root)
-        self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None)
+        self.classes, class_to_idx = find_classes(self.split_folder)
+        self.samples = make_dataset(self.split_folder, class_to_idx, extensions, is_valid_file=None)
         video_list = [x[0] for x in self.samples]
         self.video_clips = VideoClips(
             video_list,
@@ -159,17 +166,15 @@ def _download_videos(self) -> None:
         Raises:
             RuntimeError: if download folder exists, break to prevent downloading entire dataset again.
         """
-        if path.exists(self.root):
+        if path.exists(self.split_folder):
             raise RuntimeError(
-                f"The directory {self.root} already exists. If you want to re-download or re-extract the images, "
+                f"The directory {self.split_folder} already exists. If you want to re-download or re-extract the images, "
                 f"delete the directory."
             )
         # check that the assignment was made properly
-        kinetics_dir, split = path.split(self.root)
-        assert split == self.split, 'File folder assignment not done properly'
-        tar_path = path.join(kinetics_dir, "tars")
-        annotation_path = path.join(kinetics_dir, "annotations")
-        file_list_path = path.join(kinetics_dir, "files")
+        tar_path = path.join(self.root, "tars")
+        annotation_path = path.join(self.root, "annotations")
+        file_list_path = path.join(self.root, "files")
 
         split_url = self._TAR_URLS[self.num_classes].format(split=self.split)
         split_url_filepath = path.join(file_list_path, path.basename(split_url))
@@ -184,21 +189,21 @@ def _download_videos(self) -> None:
         if self.num_download_workers == 1:
             for line in list_video_urls.readlines():
                 line = str(line).replace("\n", "")
-                download_and_extract_archive(line, tar_path, self.root)
+                download_and_extract_archive(line, tar_path, self.split_folder)
         else:
-            part = partial(_dl_wrap, tar_path, self.root)
+            part = partial(_dl_wrap, tar_path, self.split_folder)
             lines = [str(line).replace("\n", "") for line in list_video_urls.readlines()]
             poolproc = Pool(self.num_download_workers)
             poolproc.map(part, lines)
 
     def _make_ds_structure(self):
         """move videos from
-        root/
+        split_folder/
             ├── clip1.avi
             ├── clip2.avi
 
         to the correct format as described below:
-        root/
+        split_folder/
             ├── class1
             │   ├── clip1.avi
 
@@ -219,11 +224,11 @@ def _make_ds_structure(self):
                     .replace("(", "")
                     .replace(")", "")
                 )
-                os.makedirs(path.join(self.root, label), exist_ok=True)
-                existing_file = path.join(self.root, f)
+                os.makedirs(path.join(self.split_folder, label), exist_ok=True)
+                existing_file = path.join(self.split_folder, f)
                 if path.isfile(existing_file):
                     os.replace(
-                        existing_file, path.join(self.root, label, f),
+                        existing_file, path.join(self.split_folder, label, f),
                     )
 
     @property
@@ -300,11 +305,9 @@ def __init__(
             "Kinetics400 is deprecated and will be removed in a future release."
             "It was replaced by Kinetics(..., num_classes=\"400\").")
 
-        kinetics_dir, split = path.split(root)
         super(Kinetics400, self).__init__(
-            root=kinetics_dir,
-            split=split,
-            num_classes="400",
-            download=False,
+            root=root,
+            frames_per_clip=frames_per_clip,
+            _use_legacy_structure=True,
             **kwargs,
         )

From 1a7a978d24f2ea51e0d976b9354ab45ee65666b9 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 17:31:24 +0100
Subject: [PATCH 39/61] pmeiers suggestions

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 7e87d67083f..4aa421770ea 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -208,11 +208,11 @@ def _make_ds_structure(self):
             │   ├── clip1.avi
 
         """
-        file_tmp = "{ytid}_{start:06}_{end:06}.mp4"
+        file_fmtstr = "{ytid}_{start:06}_{end:06}.mp4"
         with open(self.annotations) as csvfile:
             reader = csv.DictReader(csvfile)
             for row in reader:
-                f = file_tmp.format(
+                f = file_fmtstr.format(
                     ytid=row["youtube_id"],
                     start=int(row["time_start"]),
                     end=int(row["time_end"]),

From 89e41e6a03d8febc748a11aae9c4b91af5693b28 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 11:36:29 -0500
Subject: [PATCH 40/61] pmeiers suggestions - root comment

---
 torchvision/datasets/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 4aa421770ea..af0e5cdf032 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -128,7 +128,7 @@ def __init__(
         if download:
             self.download_and_process_videos()
 
-        super().__init__(self.split_folder)
+        super().__init__(self.root)
 
         self.classes, class_to_idx = find_classes(self.split_folder)
         self.samples = make_dataset(self.split_folder, class_to_idx, extensions, is_valid_file=None)

From 5941dab54a24af97e4b4100ef16ed605fd39546a Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 11:45:05 -0500
Subject: [PATCH 41/61] pmeiers comments - annotation attribute remmoved

---
 torchvision/datasets/kinetics.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index af0e5cdf032..84d945a1c22 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -171,9 +171,7 @@ def _download_videos(self) -> None:
                 f"The directory {self.split_folder} already exists. If you want to re-download or re-extract the images, "
                 f"delete the directory."
             )
-        # check that the assignment was made properly
         tar_path = path.join(self.root, "tars")
-        annotation_path = path.join(self.root, "annotations")
         file_list_path = path.join(self.root, "files")
 
         split_url = self._TAR_URLS[self.num_classes].format(split=self.split)
@@ -182,9 +180,6 @@ def _download_videos(self) -> None:
             download_url(split_url, file_list_path)
         list_video_urls = open(split_url_filepath, "r")
 
-        if not check_integrity(path.join(annotation_path, f"{self.split}.csv")):
-            download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path)
-        self.annotations = path.join(annotation_path, f"{self.split}.csv")
 
         if self.num_download_workers == 1:
             for line in list_video_urls.readlines():
@@ -208,8 +203,13 @@ def _make_ds_structure(self):
             │   ├── clip1.avi
 
         """
+        annotation_path = path.join(self.root, "annotations")
+        if not check_integrity(path.join(annotation_path, f"{self.split}.csv")):
+            download_url(self._ANNOTATION_URLS[self.num_classes].format(split=self.split), annotation_path)
+        annotations = path.join(annotation_path, f"{self.split}.csv")
+
         file_fmtstr = "{ytid}_{start:06}_{end:06}.mp4"
-        with open(self.annotations) as csvfile:
+        with open(annotations) as csvfile:
             reader = csv.DictReader(csvfile)
             for row in reader:
                 f = file_fmtstr.format(

From 72d260a207953e16db886cd078f1ffd087257a05 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 18:11:46 +0100
Subject: [PATCH 42/61] pmeiers suggestion

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 84d945a1c22..b90ad90b8e7 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -50,10 +50,10 @@ class Kinetics(VisionDataset):
                 │   │   ├──   clipx.mp4
                 │   │    └── ...
             Split is appended using the split argument.
+        frames_per_clip (int): number of frames in a clip
         num_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700
         split (str): split of the dataset to consider; currently supports ["train", "val"]
         frame_rate (float): If not None, interpolate different frame rate for each clip.
-        frames_per_clip (int): number of frames in a clip
         step_between_clips (int): number of frames between each clip
         transform (callable, optional): A function/transform that  takes in a TxHxWxC video
             and returns a transformed version.

From 51231cf67b1ed8466c53c907865ad87308d2f1a1 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 18:16:56 +0100
Subject: [PATCH 43/61] pmeiers suggestion

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index b90ad90b8e7..c18c914aa2a 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -51,9 +51,9 @@ class Kinetics(VisionDataset):
                 │   │    └── ...
             Split is appended using the split argument.
         frames_per_clip (int): number of frames in a clip
-        num_classes (int): select between Kinetics-400, Kinetics-600, and Kinetics-700
-        split (str): split of the dataset to consider; currently supports ["train", "val"]
-        frame_rate (float): If not None, interpolate different frame rate for each clip.
+        num_classes (int): select between Kinetics-400 (default), Kinetics-600, and Kinetics-700
+        split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"``
+        frame_rate (float): If omitted, interpolate different frame rate for each clip.
         step_between_clips (int): number of frames between each clip
         transform (callable, optional): A function/transform that  takes in a TxHxWxC video
             and returns a transformed version.

From 7b91bbe0064fdbbe475068ecb0797f0c3943ef5f Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 18:24:01 +0100
Subject: [PATCH 44/61] pmeiers suggestion

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 test/test_datasets_download.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py
index 1defa9c4eca..6c6e2e1d640 100644
--- a/test/test_datasets_download.py
+++ b/test/test_datasets_download.py
@@ -397,7 +397,7 @@ def kinetics():
         *[
             collect_download_configs(
                 lambda: datasets.Kinetics(
-                    path.join(ROOT, f"Kinetics_{num_classes}"),
+                    path.join(ROOT, f"Kinetics{num_classes}"),
                     frames_per_clip=1,
                     num_classes=num_classes,
                     split=split,

From cd2e55a99165770eed7b29d03b1025ee68bb3dc5 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 18:24:24 +0100
Subject: [PATCH 45/61] pmeiers suggestion

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 test/datasets_utils.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index f82e861bf64..9da6f73347b 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -637,11 +637,7 @@ def __init__(self, *args, **kwargs):
 
     def _set_default_frames_per_clip(self, inject_fake_data):
         argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__)
-        # edge case if dataset doesn't have default parameters
-        if argspec.defaults:
-            args_without_default = argspec.args[1:-len(argspec.defaults)]
-        else:
-            args_without_default = argspec.args[1:]
+        args_without_default = argspec.args[1:(-len(argspec.defaults) if argspec.defaults else None)]
         frames_per_clip_last = args_without_default[-1] == "frames_per_clip"
 
         @functools.wraps(inject_fake_data)

From 7b322e9147b41a17153015754a999efb863959fb Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 19:12:46 +0100
Subject: [PATCH 46/61] Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index c18c914aa2a..ca0ff06f71b 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -299,11 +299,20 @@ def __init__(
         self,
         root,
         frames_per_clip,
+        num_classes = None,
+        split = None,
+        download = None,
+        num_download_workers = None,
         **kwargs
     ):
         warnings.warn(
             "Kinetics400 is deprecated and will be removed in a future release."
             "It was replaced by Kinetics(..., num_classes=\"400\").")
+        if any(value is not None for value in (num_classes, split, download, num_download_workers)):
+            raise RuntimeError(
+                "Usage of 'num_classes', 'split', 'download', or 'num_download_workers' is not supported in Kinetics400. "
+                "Please use Kinetics instead."
+            )
 
         super(Kinetics400, self).__init__(
             root=root,

From 328c84e80bbf95999ba39b6d3129702d4bdfddd3 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 19:13:22 +0100
Subject: [PATCH 47/61] Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index ca0ff06f71b..ab30e701521 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -90,7 +90,7 @@ def __init__(
         frames_per_clip: int,
         num_classes: str = "400",
         split: str = "train",
-        frame_rate: float = None,
+        frame_rate: Optional[float] = None,
         step_between_clips: int = 1,
         transform: Optional[Callable] = None,
         extensions=("avi", "mp4"),

From 22e5d48a6813e60b3913f98d7b9f48b6f185c861 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 19:14:01 +0100
Subject: [PATCH 48/61] Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index ab30e701521..dae017134c9 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -93,7 +93,7 @@ def __init__(
         frame_rate: Optional[float] = None,
         step_between_clips: int = 1,
         transform: Optional[Callable] = None,
-        extensions=("avi", "mp4"),
+        extensions: Tuple[str, ...] = ("avi", "mp4"),
         download: bool = False,
         num_download_workers=1,
         num_workers: int = 1,

From 173d385942178d9d698b1f923f15fd36737f9550 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 19:14:35 +0100
Subject: [PATCH 49/61] Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index dae017134c9..a635868d63a 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -95,7 +95,7 @@ def __init__(
         transform: Optional[Callable] = None,
         extensions: Tuple[str, ...] = ("avi", "mp4"),
         download: bool = False,
-        num_download_workers=1,
+        num_download_workers: int = 1,
         num_workers: int = 1,
         _precomputed_metadata=None,
         _video_width=0,

From 5a7db27f658a6261b6cc243c897fd577c3187358 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 19:16:47 +0100
Subject: [PATCH 50/61] Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index a635868d63a..45321a12dbd 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -108,8 +108,7 @@ def __init__(
 
         # TODO: support test
         verify_str_arg(split, arg="split", valid_values=['train', 'val', 'unknown'])
-        verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"])
-        self.num_classes = num_classes
+        self.num_classes = verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"])
         self.extensions = extensions
         self.num_download_workers = num_download_workers
 

From 44030ee899499e309115bdcb2a3a38ee169d4069 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 19:17:43 +0100
Subject: [PATCH 51/61] Update torchvision/datasets/kinetics.py

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/datasets/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 45321a12dbd..9ab5336b057 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -118,7 +118,7 @@ def __init__(
             self.root = root
             self.split_folder = root
             self.split = "unknown"
-            assert download == False, "Cannot download the videos using legacy_structure."
+            assert not download, "Cannot download the videos using legacy_structure."
         else:
             self.root = root
             self.split_folder = path.join(root, split)

From ce5f80bdd636ff76167a85e1462655288f517041 Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 13:28:48 -0500
Subject: [PATCH 52/61] minor debugging

---
 torchvision/datasets/kinetics.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 9ab5336b057..8094cd8c254 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -5,7 +5,7 @@
 
 from os import path
 import csv
-from typing import Callable, Optional
+from typing import Callable, Optional, Tuple
 from functools import partial
 from multiprocessing import Pool
 
@@ -49,7 +49,7 @@ class Kinetics(VisionDataset):
                 │   ├──  class2
                 │   │   ├──   clipx.mp4
                 │   │    └── ...
-            Split is appended using the split argument.
+            Note: split is appended automatically using the split argument.
         frames_per_clip (int): number of frames in a clip
         num_classes (int): select between Kinetics-400 (default), Kinetics-600, and Kinetics-700
         split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"``
@@ -107,22 +107,20 @@ def __init__(
     ) -> None:
 
         # TODO: support test
-        verify_str_arg(split, arg="split", valid_values=['train', 'val', 'unknown'])
         self.num_classes = verify_str_arg(num_classes, arg="num_classes", valid_values=["400", "600", "700"])
         self.extensions = extensions
         self.num_download_workers = num_download_workers
 
+        self.root = root
         _use_legacy_structure = kwargs.get('_use_legacy_structure', False)
         if _use_legacy_structure:
             print("Using legacy structure")
-            self.root = root
             self.split_folder = root
             self.split = "unknown"
             assert not download, "Cannot download the videos using legacy_structure."
         else:
-            self.root = root
             self.split_folder = path.join(root, split)
-            self.split = split
+            self.split = verify_str_arg(split, arg="split", valid_values=["train", "val"])
 
         if download:
             self.download_and_process_videos()

From 803bab12658a43384cd3fb36eaa6285de34d79ad Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 11 May 2021 14:40:36 -0500
Subject: [PATCH 53/61] nit picks

---
 torchvision/datasets/kinetics.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 8094cd8c254..954e2c0d660 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -97,12 +97,12 @@ def __init__(
         download: bool = False,
         num_download_workers: int = 1,
         num_workers: int = 1,
-        _precomputed_metadata=None,
-        _video_width=0,
-        _video_height=0,
-        _video_min_dimension=0,
-        _audio_samples=0,
-        _audio_channels=0,
+        _precomputed_metadata = None,
+        _video_width: int = 0,
+        _video_height: int = 0,
+        _video_min_dimension: int = 0,
+        _audio_samples: int = 0,
+        _audio_channels: int = 0,
         **kwargs
     ) -> None:
 

From 6e64bb6165d97d96807f7fcd8b37dff1c59f118e Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 12 May 2021 09:26:39 +0200
Subject: [PATCH 54/61] only include public kwargs into defaults

---
 test/datasets_utils.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index 9da6f73347b..c305d5391db 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -416,7 +416,11 @@ def _populate_private_class_attributes(cls):
                 continue
 
             defaults.append(
-                {kwarg: default for kwarg, default in zip(argspec.args[-len(argspec.defaults):], argspec.defaults)}
+                {
+                    kwarg: default
+                    for kwarg, default in zip(argspec.args[-len(argspec.defaults):], argspec.defaults)
+                    if not kwarg.startswith("_")
+                }
             )
 
             if not argspec.varkw:
@@ -637,7 +641,7 @@ def __init__(self, *args, **kwargs):
 
     def _set_default_frames_per_clip(self, inject_fake_data):
         argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__)
-        args_without_default = argspec.args[1:(-len(argspec.defaults) if argspec.defaults else None)]
+        args_without_default = argspec.args[1 : (-len(argspec.defaults) if argspec.defaults else None)]
         frames_per_clip_last = args_without_default[-1] == "frames_per_clip"
 
         @functools.wraps(inject_fake_data)

From 8b64d1d816c7f66d693133a606116bd5e2bbf558 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 12 May 2021 09:27:18 +0200
Subject: [PATCH 55/61] add _use_legacy_structure in favour of **kwargs

---
 torchvision/datasets/kinetics.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 954e2c0d660..daf77bad881 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -103,7 +103,7 @@ def __init__(
         _video_min_dimension: int = 0,
         _audio_samples: int = 0,
         _audio_channels: int = 0,
-        **kwargs
+        _use_legacy_structure: bool = False,
     ) -> None:
 
         # TODO: support test
@@ -112,7 +112,6 @@ def __init__(
         self.num_download_workers = num_download_workers
 
         self.root = root
-        _use_legacy_structure = kwargs.get('_use_legacy_structure', False)
         if _use_legacy_structure:
             print("Using legacy structure")
             self.split_folder = root

From 94b21cc6ec8d34f38d047420cf00b6d0dfc3dfb8 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 12 May 2021 09:27:41 +0200
Subject: [PATCH 56/61] add type hints for Kinetics400

---
 torchvision/datasets/kinetics.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index daf77bad881..9aba2905b6c 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -5,7 +5,7 @@
 
 from os import path
 import csv
-from typing import Callable, Optional, Tuple
+from typing import Any, Callable, Optional, Tuple
 from functools import partial
 from multiprocessing import Pool
 
@@ -293,13 +293,13 @@ class Kinetics400(Kinetics):
 
     def __init__(
         self,
-        root,
-        frames_per_clip,
-        num_classes = None,
-        split = None,
-        download = None,
-        num_download_workers = None,
-        **kwargs
+        root: str,
+        frames_per_clip: int,
+        num_classes: Any = None,
+        split: Any = None,
+        download: Any = None,
+        num_download_workers: Any = None,
+        **kwargs: Any
     ):
         warnings.warn(
             "Kinetics400 is deprecated and will be removed in a future release."

From f8039462e886fe1fda050735b6bcc59f7cb857a2 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 12 May 2021 09:27:52 +0200
Subject: [PATCH 57/61] flake8

---
 torchvision/datasets/kinetics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 9aba2905b6c..c013a394809 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -306,8 +306,8 @@ def __init__(
             "It was replaced by Kinetics(..., num_classes=\"400\").")
         if any(value is not None for value in (num_classes, split, download, num_download_workers)):
             raise RuntimeError(
-                "Usage of 'num_classes', 'split', 'download', or 'num_download_workers' is not supported in Kinetics400. "
-                "Please use Kinetics instead."
+                "Usage of 'num_classes', 'split', 'download', or 'num_download_workers' is not supported in "
+                "Kinetics400. Please use Kinetics instead."
             )
 
         super(Kinetics400, self).__init__(

From b39646a94b1981c7011fa1a2cbcc0548f63144ec Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 12 May 2021 09:32:09 +0200
Subject: [PATCH 58/61] flake8

---
 torchvision/datasets/kinetics.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index c013a394809..2b005ddbc13 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -5,7 +5,7 @@
 
 from os import path
 import csv
-from typing import Any, Callable, Optional, Tuple
+from typing import Any, Callable, Dict, Optional, Tuple
 from functools import partial
 from multiprocessing import Pool
 
@@ -97,7 +97,7 @@ def __init__(
         download: bool = False,
         num_download_workers: int = 1,
         num_workers: int = 1,
-        _precomputed_metadata = None,
+        _precomputed_metadata: Optional[Dict] = None,
         _video_width: int = 0,
         _video_height: int = 0,
         _video_min_dimension: int = 0,
@@ -164,8 +164,8 @@ def _download_videos(self) -> None:
         """
         if path.exists(self.split_folder):
             raise RuntimeError(
-                f"The directory {self.split_folder} already exists. If you want to re-download or re-extract the images, "
-                f"delete the directory."
+                f"The directory {self.split_folder} already exists. "
+                f"If you want to re-download or re-extract the images, delete the directory."
             )
         tar_path = path.join(self.root, "tars")
         file_list_path = path.join(self.root, "files")
@@ -176,7 +176,6 @@ def _download_videos(self) -> None:
             download_url(split_url, file_list_path)
         list_video_urls = open(split_url_filepath, "r")
 
-
         if self.num_download_workers == 1:
             for line in list_video_urls.readlines():
                 line = str(line).replace("\n", "")

From c47c309abbe84ae85e58f5bc95e34b6646330161 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 12 May 2021 10:45:27 +0200
Subject: [PATCH 59/61] flake8

---
 test/datasets_utils.py         | 2 +-
 test/test_datasets_download.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/datasets_utils.py b/test/datasets_utils.py
index c305d5391db..d7853b46314 100644
--- a/test/datasets_utils.py
+++ b/test/datasets_utils.py
@@ -641,7 +641,7 @@ def __init__(self, *args, **kwargs):
 
     def _set_default_frames_per_clip(self, inject_fake_data):
         argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__)
-        args_without_default = argspec.args[1 : (-len(argspec.defaults) if argspec.defaults else None)]
+        args_without_default = argspec.args[1:(-len(argspec.defaults) if argspec.defaults else None)]
         frames_per_clip_last = args_without_default[-1] == "frames_per_clip"
 
         @functools.wraps(inject_fake_data)
diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py
index 6c6e2e1d640..8c2d575e01d 100644
--- a/test/test_datasets_download.py
+++ b/test/test_datasets_download.py
@@ -409,6 +409,8 @@ def kinetics():
             for num_classes, split in itertools.product(("400", "600", "700"), ("train", "val"))
         ]
     )
+
+
 def kitti():
     return itertools.chain(
         *[

From 18ad36d00ac4dbf2a8c658c55143421c64628f4f Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Mon, 24 May 2021 10:47:36 -0500
Subject: [PATCH 60/61] rename to make thigs clearer

---
 torchvision/datasets/kinetics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 2b005ddbc13..721e2232648 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -220,10 +220,10 @@ def _make_ds_structure(self):
                     .replace(")", "")
                 )
                 os.makedirs(path.join(self.split_folder, label), exist_ok=True)
-                existing_file = path.join(self.split_folder, f)
-                if path.isfile(existing_file):
+                downloaded_file = path.join(self.split_folder, f)
+                if path.isfile(downloaded_file):
                     os.replace(
-                        existing_file, path.join(self.split_folder, label, f),
+                        downloaded_file, path.join(self.split_folder, label, f),
                     )
 
     @property

From 12b76d7fbd9f9fb81d490d5cb27d4ce83ac9d27c Mon Sep 17 00:00:00 2001
From: Bruno Korbar <bjuncek@gmail.com>
Date: Tue, 8 Jun 2021 15:00:35 -0500
Subject: [PATCH 61/61] permuting the output

---
 torchvision/datasets/kinetics.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 721e2232648..2543b6c514d 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -64,7 +64,7 @@ class Kinetics(VisionDataset):
     Returns:
         tuple: A 3-tuple with the following entries:
 
-            - video (Tensor[T, H, W, C]): the `T` video frames in torch.uint8 tensor
+            - video (Tensor[T, C, H, W]): the `T` video frames in torch.uint8 tensor
             - audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
               and `L` is the number of points in torch.float tensor
             - label (int): class of the video clip
@@ -103,7 +103,7 @@ def __init__(
         _video_min_dimension: int = 0,
         _audio_samples: int = 0,
         _audio_channels: int = 0,
-        _use_legacy_structure: bool = False,
+        _legacy: bool = False,
     ) -> None:
 
         # TODO: support test
@@ -112,7 +112,8 @@ def __init__(
         self.num_download_workers = num_download_workers
 
         self.root = root
-        if _use_legacy_structure:
+        self._legacy = _legacy
+        if _legacy:
             print("Using legacy structure")
             self.split_folder = root
             self.split = "unknown"
@@ -235,6 +236,9 @@ def __len__(self):
 
     def __getitem__(self, idx):
         video, audio, info, video_idx = self.video_clips.get_clip(idx)
+        if not self._legacy:
+            # [T,H,W,C] --> [T,C,H,W]
+            video = video.permute(0, 3, 1, 2)
         label = self.samples[video_idx][1]
 
         if self.transform is not None:
@@ -312,6 +316,6 @@ def __init__(
         super(Kinetics400, self).__init__(
             root=root,
             frames_per_clip=frames_per_clip,
-            _use_legacy_structure=True,
+            _legacy=True,
             **kwargs,
         )