From 406efa4ccee3b1a2312108341433299f73cacc69 Mon Sep 17 00:00:00 2001
From: zyan3 <zyan3@devgpu002.prn1.facebook.com>
Date: Fri, 6 Sep 2019 22:51:47 -0700
Subject: [PATCH 01/26] video transforms

---
 test/test_transforms.py                    |   2 +-
 test/test_transforms_video.py              | 173 +++++++++++++++++++++
 torchvision/transforms/__init__.py         |   1 +
 torchvision/transforms/functional_video.py | 100 ++++++++++++
 torchvision/transforms/transforms.py       |  34 ++--
 torchvision/transforms/transforms_video.py | 171 ++++++++++++++++++++
 6 files changed, 463 insertions(+), 18 deletions(-)
 create mode 100644 test/test_transforms_video.py
 create mode 100644 torchvision/transforms/functional_video.py
 create mode 100644 torchvision/transforms/transforms_video.py

diff --git a/test/test_transforms.py b/test/test_transforms.py
index 7e8320d6d6c..e4c0759074c 100644
--- a/test/test_transforms.py
+++ b/test/test_transforms.py
@@ -148,7 +148,7 @@ def test_randomresized_params(self):
             aspect_min = max(round(random.random(), 2), epsilon)
             aspect_ratio_range = (aspect_min, aspect_min + round(random.random(), 2))
             randresizecrop = transforms.RandomResizedCrop(size, scale_range, aspect_ratio_range)
-            i, j, h, w = randresizecrop.get_params(img, scale_range, aspect_ratio_range)
+            i, j, h, w = randresizecrop.get_params(img.size[1], img.size[0], scale_range, aspect_ratio_range)
             aspect_ratio_obtained = w / h
             assert (min(aspect_ratio_range) - epsilon <= aspect_ratio_obtained <= max(aspect_ratio_range) + epsilon or
                     aspect_ratio_obtained == 1.0)
diff --git a/test/test_transforms_video.py b/test/test_transforms_video.py
new file mode 100644
index 00000000000..30370218ddb
--- /dev/null
+++ b/test/test_transforms_video.py
@@ -0,0 +1,173 @@
+from __future__ import division
+import torch
+import torchvision.transforms as transforms
+import unittest
+import random
+import numpy as np
+
+try:
+    from scipy import stats
+except ImportError:
+    stats = None
+
+
+class Tester(unittest.TestCase):
+
+    def test_random_crop_video(self):
+        numFrames = random.randint(4, 128)
+        height = random.randint(10, 32) * 2
+        width = random.randint(10, 32) * 2
+        oheight = random.randint(5, (height - 2) / 2) * 2
+        owidth = random.randint(5, (width - 2) / 2) * 2
+        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8)
+        result = transforms.Compose([
+            transforms.ToTensorVideo(),
+            transforms.RandomCropVideo((oheight, owidth)),
+        ])(clip)
+        assert result.size(2) == oheight
+        assert result.size(3) == owidth
+
+        transforms.RandomCropVideo((oheight, owidth)).__repr__()
+
+    def test_random_resized_crop_video(self):
+        numFrames = random.randint(4, 128)
+        height = random.randint(10, 32) * 2
+        width = random.randint(10, 32) * 2
+        oheight = random.randint(5, (height - 2) / 2) * 2
+        owidth = random.randint(5, (width - 2) / 2) * 2
+        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8)
+        result = transforms.Compose([
+            transforms.ToTensorVideo(),
+            transforms.RandomResizedCropVideo((oheight, owidth)),
+        ])(clip)
+        assert result.size(2) == oheight
+        assert result.size(3) == owidth
+
+        transforms.RandomResizedCropVideo((oheight, owidth)).__repr__()
+
+    def test_center_crop_video(self):
+        numFrames = random.randint(4, 128)
+        height = random.randint(10, 32) * 2
+        width = random.randint(10, 32) * 2
+        oheight = random.randint(5, (height - 2) / 2) * 2
+        owidth = random.randint(5, (width - 2) / 2) * 2
+
+        clip = torch.ones([numFrames, height, width, 3], dtype=torch.uint8)
+        oh1 = (height - oheight) // 2
+        ow1 = (width - owidth) // 2
+        clipNarrow = clip[:, oh1:oh1 + oheight, ow1:ow1 + owidth, :]
+        clipNarrow.fill_(0)
+        result = transforms.Compose([
+            transforms.ToTensorVideo(),
+            transforms.CenterCropVideo((oheight, owidth)),
+        ])(clip)
+
+        msg = "height: " + str(height) + " width: " \
+            + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
+        self.assertEqual(result.sum().item(), 0, msg)
+
+        oheight += 1
+        owidth += 1
+        result = transforms.Compose([
+            transforms.ToTensorVideo(),
+            transforms.CenterCropVideo((oheight, owidth)),
+        ])(clip)
+        sum1 = result.sum()
+
+        msg = "height: " + str(height) + " width: " \
+            + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
+        self.assertEqual(sum1.item() > 1, True, msg)
+
+        oheight += 1
+        owidth += 1
+        result = transforms.Compose([
+            transforms.ToTensorVideo(),
+            transforms.CenterCropVideo((oheight, owidth)),
+        ])(clip)
+        sum2 = result.sum()
+
+        msg = "height: " + str(height) + " width: " \
+            + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
+        self.assertTrue(sum2.item() > 1, msg)
+        self.assertTrue(sum2.item() > sum1.item(), msg)
+
+
+    @unittest.skipIf(stats is None, 'scipy.stats is not available')
+    def test_normalize_video(self):
+        def samples_from_standard_normal(tensor):
+            p_value = stats.kstest(list(tensor.view(-1)), 'norm', args=(0, 1)).pvalue
+            return p_value > 0.0001
+
+        random_state = random.getstate()
+        random.seed(42)
+        for channels in [1, 3]:
+            numFrames = random.randint(4, 128)
+            height = random.randint(32, 256)
+            width = random.randint(32, 256)
+            mean = random.random()
+            std = random.random()
+            clip = torch.normal(mean, std, size=(channels, numFrames, height, width))
+            mean = [clip[c].mean().item() for c in range(channels)]
+            std = [clip[c].std().item() for c in range(channels)]
+            normalized = transforms.NormalizeVideo(mean, std)(clip)
+            assert samples_from_standard_normal(normalized)
+        random.setstate(random_state)
+
+
+        # Checking the optional in-place behaviour
+        tensor = torch.rand((3, 128, 16, 16))
+        tensor_inplace = transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)(tensor)
+        assert torch.equal(tensor, tensor_inplace)
+
+        transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True).__repr__()
+
+    def test_to_tensor_video(self):
+        test_channels = [1, 3, 4]
+        numFrames, height, width = 64, 4, 4
+        trans = transforms.ToTensorVideo()
+
+        with self.assertRaises(TypeError):
+            trans(np.random.rand(numFrames, height, width, 1).tolist())
+            trans(torch.rand((numFrames, height, width, 1), dtype=torch.float))
+
+        with self.assertRaises(ValueError):
+            trans(torch.ones((3, numFrames, height, width, 3), dtype=torch.uint8))
+            trans(torch.ones((height, width, 3), dtype=torch.uint8))
+            trans(torch.ones((width, 3), dtype=torch.uint8))
+            trans(torch.ones((3), dtype=torch.uint8))
+
+        trans.__repr__()
+
+    @unittest.skipIf(stats is None, 'scipy.stats not available')
+    def test_random_horizontal_flip_video(self):
+        random_state = random.getstate()
+        random.seed(42)
+        clip = torch.rand((3, 4, 112, 112), dtype=torch.float)
+        hclip = clip.flip((-1))
+
+        num_samples = 250
+        num_horizontal = 0
+        for _ in range(num_samples):
+            out = transforms.RandomHorizontalFlipVideo()(clip)
+            if torch.all(torch.eq(out, hclip)):
+                num_horizontal += 1
+
+        p_value = stats.binom_test(num_horizontal, num_samples, p=0.5)
+        random.setstate(random_state)
+        assert p_value > 0.0001
+
+        num_samples = 250
+        num_horizontal = 0
+        for _ in range(num_samples):
+            out = transforms.RandomHorizontalFlipVideo(p=0.7)(clip)
+            if torch.all(torch.eq(out, hclip)):
+                num_horizontal += 1
+
+        p_value = stats.binom_test(num_horizontal, num_samples, p=0.7)
+        random.setstate(random_state)
+        assert p_value > 0.0001
+
+        transforms.RandomHorizontalFlipVideo().__repr__()
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/torchvision/transforms/__init__.py b/torchvision/transforms/__init__.py
index 7986cdd6429..175a8a8dc1b 100644
--- a/torchvision/transforms/__init__.py
+++ b/torchvision/transforms/__init__.py
@@ -1 +1,2 @@
 from .transforms import *
+from .transforms_video import *
diff --git a/torchvision/transforms/functional_video.py b/torchvision/transforms/functional_video.py
new file mode 100644
index 00000000000..0b4c84d5843
--- /dev/null
+++ b/torchvision/transforms/functional_video.py
@@ -0,0 +1,100 @@
+import torch
+
+
+def _is_tensor_video_clip(clip):
+    if not torch.is_tensor(clip):
+        raise TypeError("clip should be Tesnor. Got %s" % type(clip))
+
+    if not clip.ndimension() == 4:
+        raise ValueError("clip should be 4D. Got %dD" % clip.dim())
+
+    return True
+
+
+def crop(clip, i, j, h, w):
+    """
+    Args:
+        clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+    """
+    assert len(clip.size()) == 4, "clip should be a 4D tensor"
+    return clip[:, :, i : i + h, j : j + w]
+
+
+def resize(clip, target_size, interpolation_mode):
+    assert len(target_size) == 2, "target size should be tuple (height, width)"
+    return torch.nn.functional.interpolate(
+        clip, size=target_size, mode=interpolation_mode
+    )
+
+
+def resized_crop(clip, i, j, h, w, size, interpolation_mode="bilinear"):
+    """
+    Do spatial cropping and resizing to the video clip
+    Args:
+        clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+        i (int): i in (i,j) i.e coordinates of the upper left corner.
+        j (int): j in (i,j) i.e coordinates of the upper left corner.
+        h (int): Height of the cropped region.
+        w (int): Width of the cropped region.
+        size (tuple(int, int)): height and width of resized clip
+    Returns:
+        clip (torch.tensor): Resized and cropped clip. Size is (C, T, H, W)
+    """
+    assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+    clip = crop(clip, i, j, h, w)
+    clip = resize(clip, size, interpolation_mode)
+    return clip
+
+
+def center_crop(clip, crop_size):
+    assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+    h, w = clip.size(2), clip.size(3)
+    th, tw = crop_size
+    assert h >= th and w >= tw, "height and width must be no smaller than crop_size"
+
+    i = int(round((h - th) / 2.0))
+    j = int(round((w - tw) / 2.0))
+    return crop(clip, i, j, th, tw)
+
+
+def to_tensor(clip):
+    """
+    Convert tensor data type to be float and permute the dimenions of clip tensor
+    Args:
+        clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
+    Return:
+        clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W)
+    """
+    _is_tensor_video_clip(clip)
+    if not clip.dtype == torch.uint8:
+        raise TypeError("clip tensor should have data type uint8. Got %s" % str(clip.dtype))
+    return clip.float().permute(3, 0, 1, 2)
+
+
+def normalize(clip, mean, std, inplace=False):
+    """
+    Args:
+        clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W)
+        mean (tuple): pixel RGB mean. Size is (3)
+        std (tuple): pixel standard deviation. Size is (3)
+    Returns:
+        normalized clip (torch.tensor): Size is (C, T, H, W)
+    """
+    assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+    if not inplace:
+        clip = clip.clone()
+    mean = torch.as_tensor(mean, dtype=clip.dtype, device=clip.device)
+    std = torch.as_tensor(std, dtype=clip.dtype, device=clip.device)
+    clip.sub_(mean[:, None, None, None]).div_(std[:, None, None, None])
+    return clip
+
+
+def hflip(clip):
+    """
+    Args:
+        clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W)
+    Returns:
+        flipped clip (torch.tensor): Size is (C, T, H, W)
+    """
+    assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+    return clip.flip((-1))
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index 203dae345cd..1fa5d4461c3 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -434,17 +434,17 @@ def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode
         self.padding_mode = padding_mode
 
     @staticmethod
-    def get_params(img, output_size):
+    def get_params(w, h, output_size):
         """Get parameters for ``crop`` for a random crop.
 
         Args:
-            img (PIL Image): Image to be cropped.
+            w: width of the image/video
+            h: height of the image/video
             output_size (tuple): Expected output size of the crop.
 
         Returns:
             tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
         """
-        w, h = img.size
         th, tw = output_size
         if w == tw and h == th:
             return 0, 0, h, w
@@ -471,7 +471,7 @@ def __call__(self, img):
         if self.pad_if_needed and img.size[1] < self.size[0]:
             img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
 
-        i, j, h, w = self.get_params(img, self.size)
+        i, j, h, w = self.get_params(img.size[0], img.size[1], self.size)
 
         return F.crop(img, i, j, h, w)
 
@@ -623,7 +623,7 @@ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolat
         self.ratio = ratio
 
     @staticmethod
-    def get_params(img, scale, ratio):
+    def get_params(height, width, scale, ratio):
         """Get parameters for ``crop`` for a random sized crop.
 
         Args:
@@ -635,7 +635,7 @@ def get_params(img, scale, ratio):
             tuple: params (i, j, h, w) to be passed to ``crop`` for a random
                 sized crop.
         """
-        area = img.size[0] * img.size[1]
+        area = height * width
 
         for attempt in range(10):
             target_area = random.uniform(*scale) * area
@@ -645,24 +645,24 @@ def get_params(img, scale, ratio):
             w = int(round(math.sqrt(target_area * aspect_ratio)))
             h = int(round(math.sqrt(target_area / aspect_ratio)))
 
-            if 0 < w <= img.size[0] and 0 < h <= img.size[1]:
-                i = random.randint(0, img.size[1] - h)
-                j = random.randint(0, img.size[0] - w)
+            if 0 < w <= width and 0 < h <= height:
+                i = random.randint(0, height - h)
+                j = random.randint(0, width - w)
                 return i, j, h, w
 
         # Fallback to central crop
-        in_ratio = img.size[0] / img.size[1]
+        in_ratio = float(width) / float(height)
         if (in_ratio < min(ratio)):
-            w = img.size[0]
+            w = width
             h = int(round(w / min(ratio)))
         elif (in_ratio > max(ratio)):
-            h = img.size[1]
+            h = height
             w = int(round(h * max(ratio)))
         else:  # whole image
-            w = img.size[0]
-            h = img.size[1]
-        i = (img.size[1] - h) // 2
-        j = (img.size[0] - w) // 2
+            w = width
+            h = height
+        i = (height - h) // 2
+        j = (width - w) // 2
         return i, j, h, w
 
     def __call__(self, img):
@@ -673,7 +673,7 @@ def __call__(self, img):
         Returns:
             PIL Image: Randomly cropped and resized image.
         """
-        i, j, h, w = self.get_params(img, self.scale, self.ratio)
+        i, j, h, w = self.get_params(img.size[1], img.size[0], self.scale, self.ratio)
         return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
 
     def __repr__(self):
diff --git a/torchvision/transforms/transforms_video.py b/torchvision/transforms/transforms_video.py
new file mode 100644
index 00000000000..7da6010f59f
--- /dev/null
+++ b/torchvision/transforms/transforms_video.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+
+import math
+import numbers
+import random
+
+from torchvision.transforms import (
+    RandomCrop,
+    RandomResizedCrop,
+)
+
+from . import functional_video as F
+
+
+__all__ = [
+    "RandomCropVideo",
+    "RandomResizedCropVideo",
+    "CenterCropVideo",
+    "NormalizeVideo",
+    "ToTensorVideo",
+    "RandomHorizontalFlipVideo",
+]
+
+
+class RandomCropVideo(RandomCrop):
+    def __init__(self, size):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+
+    def __call__(self, clip):
+        """
+        Args:
+            clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+        Returns:
+            torch.tensor: randomly cropped/resized video clip.
+                size is (C, T, OH, OW)
+        """
+        i, j, h, w = self.get_params(clip.size(3), clip.size(2), self.size)
+        return F.crop(clip, i, j, h, w)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0})'.format(self.size)
+
+
+class RandomResizedCropVideo(RandomResizedCrop):
+    def __init__(
+        self,
+        size,
+        scale=(0.08, 1.0),
+        ratio=(3.0 / 4.0, 4.0 / 3.0),
+        interpolation_mode="bilinear",
+    ):
+        if isinstance(size, tuple):
+            assert len(size) == 2, "size should be tuple (height, width)"
+            self.size = size
+        else:
+            self.size = (size, size)
+
+        self.interpolation_mode = interpolation_mode
+        self.scale = scale
+        self.ratio = ratio
+
+    def __call__(self, clip):
+        """
+        Args:
+            clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+        Returns:
+            torch.tensor: randomly cropped/resized video clip.
+                size is (C, T, H, W)
+        """
+        i, j, h, w = self.get_params(clip.size(2), clip.size(3), self.scale, self.ratio)
+        return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format(
+            self.size, self.interpolation_mode, self.scale, self.ratio)
+
+
+
+class CenterCropVideo(object):
+    def __init__(self, crop_size):
+        if isinstance(crop_size, numbers.Number):
+            self.crop_size = (int(size), int(size))
+        else:
+            self.crop_size = crop_size
+
+    def __call__(self, clip):
+        """
+        Args:
+            clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+        Returns:
+            torch.tensor: central cropping of video clip. Size is (C, T, crop_size, crop_size)
+        """
+        return F.center_crop(clip, self.crop_size)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size)
+
+class NormalizeVideo(object):
+    """
+    Normalize the video clip by mean subtraction and division by standard deviation
+    Args:
+        mean (3-tuple): pixel RGB mean
+        std (3-tuple): pixel RGB standard deviation
+        inplace (boolean): whether do in-place normalization
+    """
+
+    def __init__(self, mean, std, inplace=False):
+        self.mean = mean
+        self.std = std
+        self.inplace = inplace
+
+    def __call__(self, clip):
+        """
+        Args:
+            clip (torch.tensor): video clip to be normalized. Size is (C, T, H, W)
+        """
+        return F.normalize(clip, self.mean, self.std, self.inplace)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(mean={0}, std={1}, inplace={2})'.format(
+            self.mean, self.std, self.inplace)
+
+
+class ToTensorVideo(object):
+    """
+    Convert tensor data type to be float and permute the dimenions of clip tensor
+    """
+
+    def __init__(self):
+        pass
+
+    def __call__(self, clip):
+        """
+        Convert tensor data type to be float and permute the dimenions of clip tensor
+        Args:
+            clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
+        Return:
+            clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W)
+        """
+        return F.to_tensor(clip)
+
+    def __repr__(self):
+        return self.__class__.__name__
+
+class RandomHorizontalFlipVideo(object):
+    """
+    Flip the video clip along the horizonal direction with a given probability
+    Args:
+        p (float): probability of the clip being flipped. Default value is 0.5
+    """
+
+    def __init__(self, p=0.5):
+        self.p = p
+
+    def __call__(self, clip):
+        """
+        Convert tensor data type to be float and permute the dimenions of clip tensor
+        Args:
+            clip (torch.tensor): Size is (C, T, H, W)
+        Return:
+            clip (torch.tensor): Size is (C, T, H, W)
+        """
+        if random.random() < self.p:
+            clip = F.hflip(clip)
+        return clip
+
+    def __repr__(self):
+        return self.__class__.__name__ + "(p={0})".format(self.p)

From cf66708079e2f494bcf1e10a1f24adf631e81bb3 Mon Sep 17 00:00:00 2001
From: zyan3 <zyan3@devgpu002.prn1.facebook.com>
Date: Sat, 7 Sep 2019 22:09:24 -0700
Subject: [PATCH 02/26] [video transforms]in ToTensorVideo, divide value by
 255.0

---
 test/test_transforms_video.py              | 6 +++---
 torchvision/transforms/functional_video.py | 5 +++--
 torchvision/transforms/transforms_video.py | 5 ++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/test_transforms_video.py b/test/test_transforms_video.py
index 30370218ddb..5028ef675f3 100644
--- a/test/test_transforms_video.py
+++ b/test/test_transforms_video.py
@@ -19,7 +19,7 @@ def test_random_crop_video(self):
         width = random.randint(10, 32) * 2
         oheight = random.randint(5, (height - 2) / 2) * 2
         owidth = random.randint(5, (width - 2) / 2) * 2
-        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8)
+        clip = torch.randint(0, 256, (numFrames, height, width, 3), dtype=torch.uint8)
         result = transforms.Compose([
             transforms.ToTensorVideo(),
             transforms.RandomCropVideo((oheight, owidth)),
@@ -35,7 +35,7 @@ def test_random_resized_crop_video(self):
         width = random.randint(10, 32) * 2
         oheight = random.randint(5, (height - 2) / 2) * 2
         owidth = random.randint(5, (width - 2) / 2) * 2
-        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8)
+        clip = torch.randint(0, 256, (numFrames, height, width, 3), dtype=torch.uint8)
         result = transforms.Compose([
             transforms.ToTensorVideo(),
             transforms.RandomResizedCropVideo((oheight, owidth)),
@@ -52,7 +52,7 @@ def test_center_crop_video(self):
         oheight = random.randint(5, (height - 2) / 2) * 2
         owidth = random.randint(5, (width - 2) / 2) * 2
 
-        clip = torch.ones([numFrames, height, width, 3], dtype=torch.uint8)
+        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8) * 255
         oh1 = (height - oheight) // 2
         ow1 = (width - owidth) // 2
         clipNarrow = clip[:, oh1:oh1 + oheight, ow1:ow1 + owidth, :]
diff --git a/torchvision/transforms/functional_video.py b/torchvision/transforms/functional_video.py
index 0b4c84d5843..627c0e3b0f1 100644
--- a/torchvision/transforms/functional_video.py
+++ b/torchvision/transforms/functional_video.py
@@ -59,7 +59,8 @@ def center_crop(clip, crop_size):
 
 def to_tensor(clip):
     """
-    Convert tensor data type to be float and permute the dimenions of clip tensor
+    Convert tensor data type from uint8 to float, divide value by 255.0 and
+    permute the dimenions of clip tensor
     Args:
         clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
     Return:
@@ -68,7 +69,7 @@ def to_tensor(clip):
     _is_tensor_video_clip(clip)
     if not clip.dtype == torch.uint8:
         raise TypeError("clip tensor should have data type uint8. Got %s" % str(clip.dtype))
-    return clip.float().permute(3, 0, 1, 2)
+    return clip.float().permute(3, 0, 1, 2) / 255.0
 
 
 def normalize(clip, mean, std, inplace=False):
diff --git a/torchvision/transforms/transforms_video.py b/torchvision/transforms/transforms_video.py
index 7da6010f59f..50c292e30d1 100644
--- a/torchvision/transforms/transforms_video.py
+++ b/torchvision/transforms/transforms_video.py
@@ -126,7 +126,8 @@ def __repr__(self):
 
 class ToTensorVideo(object):
     """
-    Convert tensor data type to be float and permute the dimenions of clip tensor
+    Convert tensor data type from uint8 to float, divide value by 255.0 and
+    permute the dimenions of clip tensor
     """
 
     def __init__(self):
@@ -134,7 +135,6 @@ def __init__(self):
 
     def __call__(self, clip):
         """
-        Convert tensor data type to be float and permute the dimenions of clip tensor
         Args:
             clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
         Return:
@@ -157,7 +157,6 @@ def __init__(self, p=0.5):
 
     def __call__(self, clip):
         """
-        Convert tensor data type to be float and permute the dimenions of clip tensor
         Args:
             clip (torch.tensor): Size is (C, T, H, W)
         Return:

From 88ea2c635577046cd06effd47d0e0289025b32b6 Mon Sep 17 00:00:00 2001
From: zyan3 <zyan3@devgpu002.prn1.facebook.com>
Date: Sat, 7 Sep 2019 22:39:31 -0700
Subject: [PATCH 03/26] [video transforms] fix a bug

---
 torchvision/transforms/transforms_video.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/transforms/transforms_video.py b/torchvision/transforms/transforms_video.py
index 50c292e30d1..bab4811b938 100644
--- a/torchvision/transforms/transforms_video.py
+++ b/torchvision/transforms/transforms_video.py
@@ -82,7 +82,7 @@ def __repr__(self):
 class CenterCropVideo(object):
     def __init__(self, crop_size):
         if isinstance(crop_size, numbers.Number):
-            self.crop_size = (int(size), int(size))
+            self.crop_size = (int(crop_size), int(crop_size))
         else:
             self.crop_size = crop_size
 

From 78f169b5e1b96871936e45ca310788f3b3e4bb78 Mon Sep 17 00:00:00 2001
From: Lara Haidar <haidar.lara@gmail.com>
Date: Mon, 9 Sep 2019 02:01:31 -0700
Subject: [PATCH 04/26] Register Torchvision Ops as Cutom Ops (#1267)

* Register torchvision ops

* install ORT only with python 3

* remane lib + address other comments

* fix lint

* fix lib copy

* find file with pattern instead of suffix

* use relative path

* revert rename and use imp to find lib

* fix typo
---
 .travis.yml                                |  4 +
 setup.py                                   | 21 +++++-
 test/test_onnx.py                          | 88 ++++++++++++++++++++++
 torchvision/csrc/ROIAlign.h                |  8 +-
 torchvision/csrc/ROIPool.h                 |  6 +-
 torchvision/csrc/custom_ops/custom_ops.cpp | 14 ++++
 torchvision/csrc/nms.h                     |  2 +-
 torchvision/csrc/vision.cpp                |  2 +
 torchvision/ops/_custom_ops.py             | 46 +++++++++++
 torchvision/ops/boxes.py                   |  5 +-
 torchvision/ops/roi_align.py               |  8 ++
 torchvision/ops/roi_pool.py                |  8 ++
 12 files changed, 199 insertions(+), 13 deletions(-)
 create mode 100644 test/test_onnx.py
 create mode 100644 torchvision/csrc/custom_ops/custom_ops.cpp
 create mode 100644 torchvision/ops/_custom_ops.py

diff --git a/.travis.yml b/.travis.yml
index fc0aa7240a4..497579f5a3a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -47,6 +47,10 @@ before_install:
   - pip install future
   - pip install pytest pytest-cov codecov
   - pip install mock
+  - |
+    if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then
+      pip install onnxruntime
+    fi
   - conda install av -c conda-forge
 
 
diff --git a/setup.py b/setup.py
index b96fbd43ebc..c9fb045ad31 100644
--- a/setup.py
+++ b/setup.py
@@ -96,12 +96,21 @@ def get_extensions():
     source_models = [os.path.join(models_dir, s) for s in source_models]
     tests = test_file + source_models
 
+    custom_ops_sources = [os.path.join(extensions_dir, "custom_ops", "custom_ops.cpp"),
+                          os.path.join(extensions_dir, "cpu", "nms_cpu.cpp"),
+                          os.path.join(extensions_dir, "cpu", "ROIAlign_cpu.cpp"),
+                          os.path.join(extensions_dir, "cpu", "ROIPool_cpu.cpp")]
+    custom_ops_sources_cuda = [os.path.join(extensions_dir, "cuda", "nms_cuda.cu"),
+                               os.path.join(extensions_dir, "cuda", "ROIAlign_cuda.cu"),
+                               os.path.join(extensions_dir, "cuda", "ROIPool_cuda.cu")]
+
     define_macros = []
 
     extra_compile_args = {}
     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1':
         extension = CUDAExtension
         sources += source_cuda
+        custom_ops_sources += custom_ops_sources_cuda
         define_macros += [('WITH_CUDA', None)]
         nvcc_flags = os.getenv('NVCC_FLAGS', '')
         if nvcc_flags == '':
@@ -135,7 +144,14 @@ def get_extensions():
             include_dirs=tests_include_dirs,
             define_macros=define_macros,
             extra_compile_args=extra_compile_args,
-        )
+        ),
+        extension(
+            "torchvision._custom_ops",
+            sources=custom_ops_sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+        ),
     ]
 
     return ext_modules
@@ -176,5 +192,6 @@ def run(self):
         "scipy": ["scipy"],
     },
     ext_modules=get_extensions(),
-    cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension, 'clean': clean}
+    cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension,
+              'clean': clean}
 )
diff --git a/test/test_onnx.py b/test/test_onnx.py
new file mode 100644
index 00000000000..57b3c8dd729
--- /dev/null
+++ b/test/test_onnx.py
@@ -0,0 +1,88 @@
+import io
+import torch
+from torchvision import ops
+
+# onnxruntime requires python 3.5 or above
+try:
+    import onnxruntime
+except ImportError:
+    onnxruntime = None
+
+import unittest
+
+
+@unittest.skipIf(onnxruntime is None, 'ONNX Runtime unavailable')
+class ONNXExporterTester(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        torch.manual_seed(123)
+
+    def run_model(self, model, inputs):
+        model.eval()
+
+        # run pytorch model
+        with torch.no_grad():
+            if isinstance(inputs, torch.Tensor):
+                inputs = (inputs,)
+            outputs = model(*inputs)
+            if isinstance(outputs, torch.Tensor):
+                outputs = (outputs,)
+
+        onnx_io = io.BytesIO()
+        # export to onnx
+        torch.onnx.export(model, inputs, onnx_io, do_constant_folding=True, opset_version=10)
+
+        # validate the exported model with onnx runtime
+        self.ort_validate(onnx_io, inputs, outputs)
+
+    def ort_validate(self, onnx_io, inputs, outputs):
+
+        inputs, _ = torch.jit._flatten(inputs)
+        outputs, _ = torch.jit._flatten(outputs)
+
+        def to_numpy(tensor):
+            if tensor.requires_grad:
+                return tensor.detach().cpu().numpy()
+            else:
+                return tensor.cpu().numpy()
+
+        inputs = list(map(to_numpy, inputs))
+        outputs = list(map(to_numpy, outputs))
+
+        ort_session = onnxruntime.InferenceSession(onnx_io.getvalue())
+        # compute onnxruntime output prediction
+        ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs))
+        ort_outs = ort_session.run(None, ort_inputs)
+
+        for i in range(0, len(outputs)):
+            torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05)
+
+    def test_nms(self):
+        boxes = torch.rand(5, 4)
+        boxes[:, 2:] += torch.rand(5, 2)
+        scores = torch.randn(5)
+
+        class Module(torch.nn.Module):
+            def forward(self, boxes, scores):
+                return ops.nms(boxes, scores, 0.5)
+
+        self.run_model(Module(), (boxes, scores))
+
+    def test_roi_pool(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 1, 2)
+        self.run_model(model, (x, single_roi))
+
+    def test_roi_align(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+        pool_h = 5
+        pool_w = 5
+        model = ops.RoIPool((pool_h, pool_w), 2)
+        model.eval()
+        self.run_model(model, (x, rois))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/torchvision/csrc/ROIAlign.h b/torchvision/csrc/ROIAlign.h
index 2e914ac0092..7e18cf68f57 100644
--- a/torchvision/csrc/ROIAlign.h
+++ b/torchvision/csrc/ROIAlign.h
@@ -10,11 +10,11 @@
 at::Tensor ROIAlign_forward(
     const at::Tensor& input, // Input feature map.
     const at::Tensor& rois, // List of ROIs to pool over.
-    const float spatial_scale, // The scale of the image features. ROIs will be
+    const double spatial_scale, // The scale of the image features. ROIs will be
     // scaled to this.
-    const int pooled_height, // The height of the pooled feature map.
-    const int pooled_width, // The width of the pooled feature
-    const int sampling_ratio) // The number of points to sample in each bin
+    const int64_t pooled_height, // The height of the pooled feature map.
+    const int64_t pooled_width, // The width of the pooled feature
+    const int64_t sampling_ratio) // The number of points to sample in each bin
 // along each axis.
 {
   if (input.type().is_cuda()) {
diff --git a/torchvision/csrc/ROIPool.h b/torchvision/csrc/ROIPool.h
index 8885be4739f..7aefcc5e810 100644
--- a/torchvision/csrc/ROIPool.h
+++ b/torchvision/csrc/ROIPool.h
@@ -9,9 +9,9 @@
 std::tuple<at::Tensor, at::Tensor> ROIPool_forward(
     const at::Tensor& input,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width) {
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width) {
   if (input.type().is_cuda()) {
 #ifdef WITH_CUDA
     return ROIPool_forward_cuda(
diff --git a/torchvision/csrc/custom_ops/custom_ops.cpp b/torchvision/csrc/custom_ops/custom_ops.cpp
new file mode 100644
index 00000000000..677e5866a21
--- /dev/null
+++ b/torchvision/csrc/custom_ops/custom_ops.cpp
@@ -0,0 +1,14 @@
+#include <torch/script.h>
+
+#include "ROIAlign.h"
+#include "ROIPool.h"
+#include "nms.h"
+
+using namespace at;
+
+static auto registry =
+    torch::RegisterOperators()
+        .op("torchvision::nms", &nms)
+        .op("torchvision::roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio) -> Tensor",
+            &ROIAlign_forward)
+        .op("torchvision::roi_pool", &ROIPool_forward);
diff --git a/torchvision/csrc/nms.h b/torchvision/csrc/nms.h
index 9a1a775e677..fc667fc8058 100644
--- a/torchvision/csrc/nms.h
+++ b/torchvision/csrc/nms.h
@@ -8,7 +8,7 @@
 at::Tensor nms(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold) {
+    const double iou_threshold) {
   if (dets.device().is_cuda()) {
 #ifdef WITH_CUDA
     if (dets.numel() == 0) {
diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp
index 4777d70a38b..61a4eeee727 100644
--- a/torchvision/csrc/vision.cpp
+++ b/torchvision/csrc/vision.cpp
@@ -7,6 +7,8 @@
 #endif
 
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  // TODO: remove nms from here since it is now registered
+  //       and used as a PyTorch custom op
   m.def("nms", &nms, "non-maximum suppression");
   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
diff --git a/torchvision/ops/_custom_ops.py b/torchvision/ops/_custom_ops.py
new file mode 100644
index 00000000000..70108bdf152
--- /dev/null
+++ b/torchvision/ops/_custom_ops.py
@@ -0,0 +1,46 @@
+import os
+import sys
+import imp
+import torch
+
+
+# load the custom_op_library and register the custom ops
+lib_dir = os.path.join(os.path.dirname(__file__), '..')
+file, path, description = imp.find_module("_custom_ops", [lib_dir])
+torch.ops.load_library(path)
+
+
+def register_custom_op():
+    from torch.onnx.symbolic_helper import parse_args, scalar_type_to_onnx
+    from torch.onnx.symbolic_opset9 import select, unsqueeze, squeeze, _cast_Long, reshape
+
+    @parse_args('v', 'v', 'f')
+    def symbolic_multi_label_nms(g, boxes, scores, iou_threshold):
+        boxes = unsqueeze(g, boxes, 0)
+        scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
+        max_output_per_class = g.op('Constant', value_t=torch.tensor([sys.maxsize], dtype=torch.long))
+        iou_threshold = g.op('Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float))
+        nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold)
+        return squeeze(g, select(g, nms_out, 1, g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), 1)
+
+    @parse_args('v', 'v', 'f', 'i', 'i', 'i')
+    def roi_align(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio):
+        batch_indices = _cast_Long(g, squeeze(g, select(g, rois, 1, g.op('Constant',
+                                   value_t=torch.tensor([0], dtype=torch.long))), 1), False)
+        rois = select(g, rois, 1, g.op('Constant', value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long)))
+        return g.op('RoiAlign', input, rois, batch_indices, spatial_scale_f=spatial_scale,
+                    output_height_i=pooled_height, output_width_i=pooled_width, sampling_ratio_i=sampling_ratio)
+
+    @parse_args('v', 'v', 'f', 'i', 'i')
+    def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width):
+        roi_pool = g.op('MaxRoiPool', input, rois,
+                        pooled_shape_i=(pooled_height, pooled_width), spatial_scale_f=spatial_scale)
+        return roi_pool, None
+
+    from torch.onnx import register_custom_op_symbolic
+    register_custom_op_symbolic('torchvision::nms', symbolic_multi_label_nms, 10)
+    register_custom_op_symbolic('torchvision::roi_align', roi_align, 10)
+    register_custom_op_symbolic('torchvision::roi_pool', roi_pool, 10)
+
+
+register_custom_op()
diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py
index af92a0cbf0e..5475d38db30 100644
--- a/torchvision/ops/boxes.py
+++ b/torchvision/ops/boxes.py
@@ -1,5 +1,5 @@
 import torch
-from torchvision.extension import _lazy_import
+import torchvision.ops._custom_ops
 
 
 def nms(boxes, scores, iou_threshold):
@@ -29,8 +29,7 @@ def nms(boxes, scores, iou_threshold):
         of the elements that have been kept
         by NMS, sorted in decreasing order of scores
     """
-    _C = _lazy_import()
-    return _C.nms(boxes, scores, iou_threshold)
+    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
 
 
 def batched_nms(boxes, scores, idxs, iou_threshold):
diff --git a/torchvision/ops/roi_align.py b/torchvision/ops/roi_align.py
index b2a900e0c4b..18a3354ab64 100644
--- a/torchvision/ops/roi_align.py
+++ b/torchvision/ops/roi_align.py
@@ -9,6 +9,8 @@
 from torchvision.extension import _lazy_import
 from ._utils import convert_boxes_to_roi_format
 
+import torchvision.ops._custom_ops
+
 
 class _RoIAlignFunction(Function):
     @staticmethod
@@ -66,6 +68,12 @@ def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
     rois = boxes
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
+    # TODO: Change this to support backwards, which we
+    #       do not currently support when JIT tracing.
+    if torch._C._get_tracing_state():
+        return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
+                                               output_size[0], output_size[1],
+                                               sampling_ratio)
     return _RoIAlignFunction.apply(input, rois, output_size, spatial_scale, sampling_ratio)
 
 
diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py
index f6c7c9ed7aa..cbfd8a318eb 100644
--- a/torchvision/ops/roi_pool.py
+++ b/torchvision/ops/roi_pool.py
@@ -9,6 +9,8 @@
 from torchvision.extension import _lazy_import
 from ._utils import convert_boxes_to_roi_format
 
+import torchvision.ops._custom_ops
+
 
 class _RoIPoolFunction(Function):
     @staticmethod
@@ -59,6 +61,12 @@ def roi_pool(input, boxes, output_size, spatial_scale=1.0):
     rois = boxes
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
+    # TODO: Change this to support backwards, which we
+    #       do not currently support when JIT tracing.
+    if torch._C._get_tracing_state():
+        output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale,
+                                                   output_size[0], output_size[1])
+        return output
     return _RoIPoolFunction.apply(input, rois, output_size, spatial_scale)
 
 

From e4d5003956db97d4e4bc1055ec8b045c39ee4882 Mon Sep 17 00:00:00 2001
From: Francisco Massa <fvsmassa@gmail.com>
Date: Mon, 9 Sep 2019 11:48:17 +0200
Subject: [PATCH 05/26] Add CircleCI (v2) (#1298)

* [WIP] Add CircleCI for CI

* Make jobs only run on master

* Add initial CI

* [wip] testing if works

* Trying some basic GPU tests

* [WIP] maybe it will work?

* [WIP] One more try

* Pin versions

* Simplify and reuse

* Fix

* [WIP] testing windows

* [WIP] testing windows

* Try windows

* Try Windows

* Try windows

* Try windows

* Try windows

* Try windows

* Try windows

* Try windows

* Windows speedup

* Windows multicores

* Add parallel flags for Windows

* Skip some tests on Windows

* Sync config.yml and config.yml.in

* Regenerate

* Run all tests

* Limit python and cuda version for finding pytorch

* Skip darwin for previous check

* Add description

* Fix logic

* Remove space

* Add CUDA test back

* Add back .travis.yml for now and remove duplicate test

* Add newline
---
 .circleci/config.yml              | 95 +++++++++++++++++++++++++++++++
 .circleci/config.yml.in           | 95 +++++++++++++++++++++++++++++++
 packaging/pkg_helpers.bash        | 15 ++++-
 setup.py                          |  3 +
 test/test_datasets.py             |  2 +
 test/test_datasets_utils.py       |  3 +
 test/test_datasets_video_utils.py |  3 +
 test/test_io.py                   |  1 +
 test/test_utils.py                |  3 +
 9 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index a5be618c982..abd9c8ccf74 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -6,6 +6,9 @@ version: 2.1
 #     - Replace binary_linux_wheel_py3.7 with the name of the job you want to test.
 #       Job names are 'name:' key.
 
+orbs:
+  win: circleci/windows@1.0.0
+
 binary_common: &binary_common
   parameters:
     # Edit these defaults to do a release`
@@ -81,6 +84,90 @@ jobs:
           paths:
             - "*"
 
+  binary_linux_conda_cuda:
+    <<: *binary_common
+    machine:
+      image: ubuntu-1604:201903-01
+    resource_class: gpu.medium
+    steps:
+    - checkout
+    - run:
+        name: Setup environment
+        command: |
+          set -e
+
+          curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add -
+          curl -L https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add -
+
+          sudo apt-get update
+
+          sudo apt-get install \
+              apt-transport-https \
+              ca-certificates \
+              curl \
+              gnupg-agent \
+              software-properties-common
+
+          curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+
+          sudo add-apt-repository \
+             "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
+             $(lsb_release -cs) \
+             stable"
+
+          sudo apt-get update
+          export DOCKER_VERSION="5:19.03.2~3-0~ubuntu-xenial"
+          sudo apt-get install docker-ce=${DOCKER_VERSION} docker-ce-cli=${DOCKER_VERSION} containerd.io
+
+          # Add the package repositories
+          distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
+          curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
+          curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
+
+          export NVIDIA_CONTAINER_VERSION="1.0.3-1"
+          sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit=${NVIDIA_CONTAINER_VERSION}
+          sudo systemctl restart docker
+
+          DRIVER_FN="NVIDIA-Linux-x86_64-410.104.run"
+          wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
+          sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
+          nvidia-smi
+
+    - run:
+        name: Pull docker image
+        command: |
+          set -e
+          export DOCKER_IMAGE=soumith/conda-cuda
+          echo Pulling docker image $DOCKER_IMAGE
+          docker pull $DOCKER_IMAGE >/dev/null
+
+    - run:
+        name: Build and run tests
+        command: |
+          set -e
+
+          cd ${HOME}/project/
+
+          export DOCKER_IMAGE=soumith/conda-cuda
+          export VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e UNICODE_ABI -e CU_VERSION"
+
+          docker run --gpus all  --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh
+
+  binary_win_conda:
+    <<: *binary_common
+    executor:
+      name: win/vs2019
+      shell: bash.exe
+    steps:
+      - checkout
+      - run:
+          command: |
+            choco install miniconda3
+            $env:PATH = "C:\tools\miniconda3;C:\tools\miniconda3\Library\usr\bin;C:\tools\miniconda3\Scripts;C:\tools\miniconda3\bin" + $env:PATH
+            conda install -yq conda-build
+            bash packaging/build_conda.sh
+          shell: powershell.exe
+
   binary_macos_wheel:
     <<: *binary_common
     macos:
@@ -328,6 +415,14 @@ workflows:
           name: binary_macos_conda_py3.7_cpu
           python_version: "3.7"
           cu_version: "cpu"
+      - binary_linux_conda_cuda:
+          name: torchvision_linux_py3.7_cu100
+          python_version: "3.7"
+          cu_version: "cu100"
+      - binary_win_conda:
+          name: torchvision_win_py3.6_cpu
+          python_version: "3.6"
+          cu_version: "cpu"
 
   nightly:
     triggers:
diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
index 91046fcd7b8..4ff3849db53 100644
--- a/.circleci/config.yml.in
+++ b/.circleci/config.yml.in
@@ -6,6 +6,9 @@ version: 2.1
 #     - Replace binary_linux_wheel_py3.7 with the name of the job you want to test.
 #       Job names are 'name:' key.
 
+orbs:
+  win: circleci/windows@1.0.0
+
 binary_common: &binary_common
   parameters:
     # Edit these defaults to do a release`
@@ -81,6 +84,90 @@ jobs:
           paths:
             - "*"
 
+  binary_linux_conda_cuda:
+    <<: *binary_common
+    machine:
+      image: ubuntu-1604:201903-01
+    resource_class: gpu.medium
+    steps:
+    - checkout
+    - run:
+        name: Setup environment
+        command: |
+          set -e
+
+          curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add -
+          curl -L https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add -
+
+          sudo apt-get update
+
+          sudo apt-get install \
+              apt-transport-https \
+              ca-certificates \
+              curl \
+              gnupg-agent \
+              software-properties-common
+
+          curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+
+          sudo add-apt-repository \
+             "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
+             $(lsb_release -cs) \
+             stable"
+
+          sudo apt-get update
+          export DOCKER_VERSION="5:19.03.2~3-0~ubuntu-xenial"
+          sudo apt-get install docker-ce=${DOCKER_VERSION} docker-ce-cli=${DOCKER_VERSION} containerd.io
+
+          # Add the package repositories
+          distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
+          curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
+          curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
+
+          export NVIDIA_CONTAINER_VERSION="1.0.3-1"
+          sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit=${NVIDIA_CONTAINER_VERSION}
+          sudo systemctl restart docker
+
+          DRIVER_FN="NVIDIA-Linux-x86_64-410.104.run"
+          wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
+          sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
+          nvidia-smi
+
+    - run:
+        name: Pull docker image
+        command: |
+          set -e
+          export DOCKER_IMAGE=soumith/conda-cuda
+          echo Pulling docker image $DOCKER_IMAGE
+          docker pull $DOCKER_IMAGE >/dev/null
+
+    - run:
+        name: Build and run tests
+        command: |
+          set -e
+
+          cd ${HOME}/project/
+
+          export DOCKER_IMAGE=soumith/conda-cuda
+          export VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e UNICODE_ABI -e CU_VERSION"
+
+          docker run --gpus all  --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh
+
+  binary_win_conda:
+    <<: *binary_common
+    executor:
+      name: win/vs2019
+      shell: bash.exe
+    steps:
+      - checkout
+      - run:
+          command: |
+            choco install miniconda3
+            $env:PATH = "C:\tools\miniconda3;C:\tools\miniconda3\Library\usr\bin;C:\tools\miniconda3\Scripts;C:\tools\miniconda3\bin" + $env:PATH
+            conda install -yq conda-build
+            bash packaging/build_conda.sh
+          shell: powershell.exe
+
   binary_macos_wheel:
     <<: *binary_common
     macos:
@@ -214,6 +301,14 @@ workflows:
     jobs:
       - circleci_consistency
       {{ workflows() }}
+      - binary_linux_conda_cuda:
+          name: torchvision_linux_py3.7_cu100
+          python_version: "3.7"
+          cu_version: "cu100"
+      - binary_win_conda:
+          name: torchvision_win_py3.6_cpu
+          python_version: "3.6"
+          cu_version: "cpu"
 
   nightly:
     triggers:
diff --git a/packaging/pkg_helpers.bash b/packaging/pkg_helpers.bash
index bd9cbedcdef..4e7d55e4be6 100644
--- a/packaging/pkg_helpers.bash
+++ b/packaging/pkg_helpers.bash
@@ -45,7 +45,7 @@ setup_cuda() {
       export VERSION_SUFFIX="$PYTORCH_VERSION_SUFFIX"
       # If the suffix is non-empty, we will use a wheel subdirectory
       if [[ -n "$PYTORCH_VERSION_SUFFIX" ]]; then
-        export WHEEL_DIR="$PYTORCH_VERSION_SUFFIX/"
+        export WHEEL_DIR="$CU_VERSION/"
       fi
     fi
   fi
@@ -181,7 +181,18 @@ setup_pip_pytorch_version() {
 setup_conda_pytorch_constraint() {
   if [[ -z "$PYTORCH_VERSION" ]]; then
     export CONDA_CHANNEL_FLAGS="-c pytorch-nightly"
-    export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | python -c "import sys, json, re; print(re.sub(r'\\+.*$', '', json.load(sys.stdin)['pytorch'][-1]['version']))")"
+    export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \
+                              python -c "import os, sys, json, re; cuver = os.environ.get('CU_VERSION'); \
+                               cuver = (cuver[:-1] + '.' + cuver[-1]).replace('cu', 'cuda') if cuver != 'cpu' else cuver; \
+                               print(re.sub(r'\\+.*$', '', \
+                                [x['version'] for x in json.load(sys.stdin)['pytorch'] \
+                                  if (x['platform'] == 'darwin' or cuver in x['fn']) \
+                                    and 'py' + os.environ['PYTHON_VERSION'] in x['fn']][-1]))")"
+    if [[ -z "$PYTORCH_VERSION" ]]; then
+      echo "PyTorch version auto detection failed"
+      echo "No package found for CU_VERSION=$CU_VERSION and PYTHON_VERSION=$PYTHON_VERSION"
+      exit 1
+    fi
   else
     export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly"
   fi
diff --git a/setup.py b/setup.py
index c9fb045ad31..e9b4627ec77 100644
--- a/setup.py
+++ b/setup.py
@@ -125,6 +125,9 @@ def get_extensions():
     if sys.platform == 'win32':
         define_macros += [('torchvision_EXPORTS', None)]
 
+        extra_compile_args.setdefault('cxx', [])
+        extra_compile_args['cxx'].append('/MP')
+
     sources = [os.path.join(extensions_dir, s) for s in sources]
 
     include_dirs = [extensions_dir]
diff --git a/test/test_datasets.py b/test/test_datasets.py
index d8f17d1acd4..f4ef4721370 100644
--- a/test/test_datasets.py
+++ b/test/test_datasets.py
@@ -1,3 +1,4 @@
+import sys
 import os
 import unittest
 import mock
@@ -149,6 +150,7 @@ def test_cifar100(self, mock_ext_check, mock_int_check):
             img, target = dataset[0]
             self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target)
 
+    @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
     def test_cityscapes(self):
         with cityscapes_root() as root:
 
diff --git a/test/test_datasets_utils.py b/test/test_datasets_utils.py
index 43fdbe8239b..376fe018e92 100644
--- a/test/test_datasets_utils.py
+++ b/test/test_datasets_utils.py
@@ -72,6 +72,7 @@ def test_download_url_dont_exist(self):
             with self.assertRaises(URLError):
                 utils.download_url(url, temp_dir)
 
+    @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
     def test_extract_zip(self):
         with get_tmp_dir() as temp_dir:
             with tempfile.NamedTemporaryFile(suffix='.zip') as f:
@@ -83,6 +84,7 @@ def test_extract_zip(self):
                     data = nf.read()
                 self.assertEqual(data, 'this is the content')
 
+    @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
     def test_extract_tar(self):
         for ext, mode in zip(['.tar', '.tar.gz'], ['w', 'w:gz']):
             with get_tmp_dir() as temp_dir:
@@ -98,6 +100,7 @@ def test_extract_tar(self):
                             data = nf.read()
                         self.assertEqual(data, 'this is the content')
 
+    @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
     def test_extract_gzip(self):
         with get_tmp_dir() as temp_dir:
             with tempfile.NamedTemporaryFile(suffix='.gz') as f:
diff --git a/test/test_datasets_video_utils.py b/test/test_datasets_video_utils.py
index d47d469ea31..a9cb7ab50ef 100644
--- a/test/test_datasets_video_utils.py
+++ b/test/test_datasets_video_utils.py
@@ -1,4 +1,5 @@
 import contextlib
+import sys
 import os
 import torch
 import unittest
@@ -58,6 +59,7 @@ def test_unfold(self):
         self.assertTrue(r.equal(expected))
 
     @unittest.skipIf(not io.video._av_available(), "this test requires av")
+    @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
     def test_video_clips(self):
         with get_list_of_videos(num_videos=3) as video_list:
             video_clips = VideoClips(video_list, 5, 5)
@@ -112,6 +114,7 @@ def test_video_sampler_unequal(self):
             self.assertTrue(count.equal(torch.tensor([3, 3])))
 
     @unittest.skipIf(not io.video._av_available(), "this test requires av")
+    @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
     def test_video_clips_custom_fps(self):
         with get_list_of_videos(num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6]) as video_list:
             num_frames = 4
diff --git a/test/test_io.py b/test/test_io.py
index 8b75cdea1c1..96c33a4be68 100644
--- a/test/test_io.py
+++ b/test/test_io.py
@@ -55,6 +55,7 @@ def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None,
 
 
 @unittest.skipIf(av is None, "PyAV unavailable")
+@unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
 class Tester(unittest.TestCase):
     # compression adds artifacts, thus we add a tolerance of
     # 6 in 0-255 range
diff --git a/test/test_utils.py b/test/test_utils.py
index 4c39520a692..9c833f3432f 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1,4 +1,5 @@
 import os
+import sys
 import tempfile
 import torch
 import torchvision.utils as utils
@@ -37,12 +38,14 @@ def test_normalize_in_make_grid(self):
         assert torch.equal(norm_max, rounded_grid_max), 'Normalized max is not equal to 1'
         assert torch.equal(norm_min, rounded_grid_min), 'Normalized min is not equal to 0'
 
+    @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
     def test_save_image(self):
         with tempfile.NamedTemporaryFile(suffix='.png') as f:
             t = torch.rand(2, 3, 64, 64)
             utils.save_image(t, f.name)
             assert os.path.exists(f.name), 'The image is not present after save'
 
+    @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
     def test_save_image_single_pixel(self):
         with tempfile.NamedTemporaryFile(suffix='.png') as f:
             t = torch.rand(1, 3, 1, 1)

From fe234fc8cfae5fdbc9168994a0547c822631c3aa Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang" <ezyang@fb.com>
Date: Mon, 9 Sep 2019 09:15:50 -0400
Subject: [PATCH 06/26] Revert "Register Torchvision Ops as Cutom Ops (#1267)"
 (#1316)

This reverts commit 78f169b5e1b96871936e45ca310788f3b3e4bb78.
---
 .travis.yml                                |  4 -
 setup.py                                   | 21 +-----
 test/test_onnx.py                          | 88 ----------------------
 torchvision/csrc/ROIAlign.h                |  8 +-
 torchvision/csrc/ROIPool.h                 |  6 +-
 torchvision/csrc/custom_ops/custom_ops.cpp | 14 ----
 torchvision/csrc/nms.h                     |  2 +-
 torchvision/csrc/vision.cpp                |  2 -
 torchvision/ops/_custom_ops.py             | 46 -----------
 torchvision/ops/boxes.py                   |  5 +-
 torchvision/ops/roi_align.py               |  8 --
 torchvision/ops/roi_pool.py                |  8 --
 12 files changed, 13 insertions(+), 199 deletions(-)
 delete mode 100644 test/test_onnx.py
 delete mode 100644 torchvision/csrc/custom_ops/custom_ops.cpp
 delete mode 100644 torchvision/ops/_custom_ops.py

diff --git a/.travis.yml b/.travis.yml
index 497579f5a3a..fc0aa7240a4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -47,10 +47,6 @@ before_install:
   - pip install future
   - pip install pytest pytest-cov codecov
   - pip install mock
-  - |
-    if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then
-      pip install onnxruntime
-    fi
   - conda install av -c conda-forge
 
 
diff --git a/setup.py b/setup.py
index e9b4627ec77..d6dac6c0deb 100644
--- a/setup.py
+++ b/setup.py
@@ -96,21 +96,12 @@ def get_extensions():
     source_models = [os.path.join(models_dir, s) for s in source_models]
     tests = test_file + source_models
 
-    custom_ops_sources = [os.path.join(extensions_dir, "custom_ops", "custom_ops.cpp"),
-                          os.path.join(extensions_dir, "cpu", "nms_cpu.cpp"),
-                          os.path.join(extensions_dir, "cpu", "ROIAlign_cpu.cpp"),
-                          os.path.join(extensions_dir, "cpu", "ROIPool_cpu.cpp")]
-    custom_ops_sources_cuda = [os.path.join(extensions_dir, "cuda", "nms_cuda.cu"),
-                               os.path.join(extensions_dir, "cuda", "ROIAlign_cuda.cu"),
-                               os.path.join(extensions_dir, "cuda", "ROIPool_cuda.cu")]
-
     define_macros = []
 
     extra_compile_args = {}
     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1':
         extension = CUDAExtension
         sources += source_cuda
-        custom_ops_sources += custom_ops_sources_cuda
         define_macros += [('WITH_CUDA', None)]
         nvcc_flags = os.getenv('NVCC_FLAGS', '')
         if nvcc_flags == '':
@@ -147,14 +138,7 @@ def get_extensions():
             include_dirs=tests_include_dirs,
             define_macros=define_macros,
             extra_compile_args=extra_compile_args,
-        ),
-        extension(
-            "torchvision._custom_ops",
-            sources=custom_ops_sources,
-            include_dirs=include_dirs,
-            define_macros=define_macros,
-            extra_compile_args=extra_compile_args,
-        ),
+        )
     ]
 
     return ext_modules
@@ -195,6 +179,5 @@ def run(self):
         "scipy": ["scipy"],
     },
     ext_modules=get_extensions(),
-    cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension,
-              'clean': clean}
+    cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension, 'clean': clean}
 )
diff --git a/test/test_onnx.py b/test/test_onnx.py
deleted file mode 100644
index 57b3c8dd729..00000000000
--- a/test/test_onnx.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import io
-import torch
-from torchvision import ops
-
-# onnxruntime requires python 3.5 or above
-try:
-    import onnxruntime
-except ImportError:
-    onnxruntime = None
-
-import unittest
-
-
-@unittest.skipIf(onnxruntime is None, 'ONNX Runtime unavailable')
-class ONNXExporterTester(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        torch.manual_seed(123)
-
-    def run_model(self, model, inputs):
-        model.eval()
-
-        # run pytorch model
-        with torch.no_grad():
-            if isinstance(inputs, torch.Tensor):
-                inputs = (inputs,)
-            outputs = model(*inputs)
-            if isinstance(outputs, torch.Tensor):
-                outputs = (outputs,)
-
-        onnx_io = io.BytesIO()
-        # export to onnx
-        torch.onnx.export(model, inputs, onnx_io, do_constant_folding=True, opset_version=10)
-
-        # validate the exported model with onnx runtime
-        self.ort_validate(onnx_io, inputs, outputs)
-
-    def ort_validate(self, onnx_io, inputs, outputs):
-
-        inputs, _ = torch.jit._flatten(inputs)
-        outputs, _ = torch.jit._flatten(outputs)
-
-        def to_numpy(tensor):
-            if tensor.requires_grad:
-                return tensor.detach().cpu().numpy()
-            else:
-                return tensor.cpu().numpy()
-
-        inputs = list(map(to_numpy, inputs))
-        outputs = list(map(to_numpy, outputs))
-
-        ort_session = onnxruntime.InferenceSession(onnx_io.getvalue())
-        # compute onnxruntime output prediction
-        ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs))
-        ort_outs = ort_session.run(None, ort_inputs)
-
-        for i in range(0, len(outputs)):
-            torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05)
-
-    def test_nms(self):
-        boxes = torch.rand(5, 4)
-        boxes[:, 2:] += torch.rand(5, 2)
-        scores = torch.randn(5)
-
-        class Module(torch.nn.Module):
-            def forward(self, boxes, scores):
-                return ops.nms(boxes, scores, 0.5)
-
-        self.run_model(Module(), (boxes, scores))
-
-    def test_roi_pool(self):
-        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
-        single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
-        model = ops.RoIAlign((5, 5), 1, 2)
-        self.run_model(model, (x, single_roi))
-
-    def test_roi_align(self):
-        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
-        rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
-        pool_h = 5
-        pool_w = 5
-        model = ops.RoIPool((pool_h, pool_w), 2)
-        model.eval()
-        self.run_model(model, (x, rois))
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/torchvision/csrc/ROIAlign.h b/torchvision/csrc/ROIAlign.h
index 7e18cf68f57..2e914ac0092 100644
--- a/torchvision/csrc/ROIAlign.h
+++ b/torchvision/csrc/ROIAlign.h
@@ -10,11 +10,11 @@
 at::Tensor ROIAlign_forward(
     const at::Tensor& input, // Input feature map.
     const at::Tensor& rois, // List of ROIs to pool over.
-    const double spatial_scale, // The scale of the image features. ROIs will be
+    const float spatial_scale, // The scale of the image features. ROIs will be
     // scaled to this.
-    const int64_t pooled_height, // The height of the pooled feature map.
-    const int64_t pooled_width, // The width of the pooled feature
-    const int64_t sampling_ratio) // The number of points to sample in each bin
+    const int pooled_height, // The height of the pooled feature map.
+    const int pooled_width, // The width of the pooled feature
+    const int sampling_ratio) // The number of points to sample in each bin
 // along each axis.
 {
   if (input.type().is_cuda()) {
diff --git a/torchvision/csrc/ROIPool.h b/torchvision/csrc/ROIPool.h
index 7aefcc5e810..8885be4739f 100644
--- a/torchvision/csrc/ROIPool.h
+++ b/torchvision/csrc/ROIPool.h
@@ -9,9 +9,9 @@
 std::tuple<at::Tensor, at::Tensor> ROIPool_forward(
     const at::Tensor& input,
     const at::Tensor& rois,
-    const double spatial_scale,
-    const int64_t pooled_height,
-    const int64_t pooled_width) {
+    const float spatial_scale,
+    const int pooled_height,
+    const int pooled_width) {
   if (input.type().is_cuda()) {
 #ifdef WITH_CUDA
     return ROIPool_forward_cuda(
diff --git a/torchvision/csrc/custom_ops/custom_ops.cpp b/torchvision/csrc/custom_ops/custom_ops.cpp
deleted file mode 100644
index 677e5866a21..00000000000
--- a/torchvision/csrc/custom_ops/custom_ops.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <torch/script.h>
-
-#include "ROIAlign.h"
-#include "ROIPool.h"
-#include "nms.h"
-
-using namespace at;
-
-static auto registry =
-    torch::RegisterOperators()
-        .op("torchvision::nms", &nms)
-        .op("torchvision::roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio) -> Tensor",
-            &ROIAlign_forward)
-        .op("torchvision::roi_pool", &ROIPool_forward);
diff --git a/torchvision/csrc/nms.h b/torchvision/csrc/nms.h
index fc667fc8058..9a1a775e677 100644
--- a/torchvision/csrc/nms.h
+++ b/torchvision/csrc/nms.h
@@ -8,7 +8,7 @@
 at::Tensor nms(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const double iou_threshold) {
+    const float iou_threshold) {
   if (dets.device().is_cuda()) {
 #ifdef WITH_CUDA
     if (dets.numel() == 0) {
diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp
index 61a4eeee727..4777d70a38b 100644
--- a/torchvision/csrc/vision.cpp
+++ b/torchvision/csrc/vision.cpp
@@ -7,8 +7,6 @@
 #endif
 
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  // TODO: remove nms from here since it is now registered
-  //       and used as a PyTorch custom op
   m.def("nms", &nms, "non-maximum suppression");
   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
diff --git a/torchvision/ops/_custom_ops.py b/torchvision/ops/_custom_ops.py
deleted file mode 100644
index 70108bdf152..00000000000
--- a/torchvision/ops/_custom_ops.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import os
-import sys
-import imp
-import torch
-
-
-# load the custom_op_library and register the custom ops
-lib_dir = os.path.join(os.path.dirname(__file__), '..')
-file, path, description = imp.find_module("_custom_ops", [lib_dir])
-torch.ops.load_library(path)
-
-
-def register_custom_op():
-    from torch.onnx.symbolic_helper import parse_args, scalar_type_to_onnx
-    from torch.onnx.symbolic_opset9 import select, unsqueeze, squeeze, _cast_Long, reshape
-
-    @parse_args('v', 'v', 'f')
-    def symbolic_multi_label_nms(g, boxes, scores, iou_threshold):
-        boxes = unsqueeze(g, boxes, 0)
-        scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
-        max_output_per_class = g.op('Constant', value_t=torch.tensor([sys.maxsize], dtype=torch.long))
-        iou_threshold = g.op('Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float))
-        nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold)
-        return squeeze(g, select(g, nms_out, 1, g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), 1)
-
-    @parse_args('v', 'v', 'f', 'i', 'i', 'i')
-    def roi_align(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio):
-        batch_indices = _cast_Long(g, squeeze(g, select(g, rois, 1, g.op('Constant',
-                                   value_t=torch.tensor([0], dtype=torch.long))), 1), False)
-        rois = select(g, rois, 1, g.op('Constant', value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long)))
-        return g.op('RoiAlign', input, rois, batch_indices, spatial_scale_f=spatial_scale,
-                    output_height_i=pooled_height, output_width_i=pooled_width, sampling_ratio_i=sampling_ratio)
-
-    @parse_args('v', 'v', 'f', 'i', 'i')
-    def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width):
-        roi_pool = g.op('MaxRoiPool', input, rois,
-                        pooled_shape_i=(pooled_height, pooled_width), spatial_scale_f=spatial_scale)
-        return roi_pool, None
-
-    from torch.onnx import register_custom_op_symbolic
-    register_custom_op_symbolic('torchvision::nms', symbolic_multi_label_nms, 10)
-    register_custom_op_symbolic('torchvision::roi_align', roi_align, 10)
-    register_custom_op_symbolic('torchvision::roi_pool', roi_pool, 10)
-
-
-register_custom_op()
diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py
index 5475d38db30..af92a0cbf0e 100644
--- a/torchvision/ops/boxes.py
+++ b/torchvision/ops/boxes.py
@@ -1,5 +1,5 @@
 import torch
-import torchvision.ops._custom_ops
+from torchvision.extension import _lazy_import
 
 
 def nms(boxes, scores, iou_threshold):
@@ -29,7 +29,8 @@ def nms(boxes, scores, iou_threshold):
         of the elements that have been kept
         by NMS, sorted in decreasing order of scores
     """
-    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
+    _C = _lazy_import()
+    return _C.nms(boxes, scores, iou_threshold)
 
 
 def batched_nms(boxes, scores, idxs, iou_threshold):
diff --git a/torchvision/ops/roi_align.py b/torchvision/ops/roi_align.py
index 18a3354ab64..b2a900e0c4b 100644
--- a/torchvision/ops/roi_align.py
+++ b/torchvision/ops/roi_align.py
@@ -9,8 +9,6 @@
 from torchvision.extension import _lazy_import
 from ._utils import convert_boxes_to_roi_format
 
-import torchvision.ops._custom_ops
-
 
 class _RoIAlignFunction(Function):
     @staticmethod
@@ -68,12 +66,6 @@ def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
     rois = boxes
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
-    # TODO: Change this to support backwards, which we
-    #       do not currently support when JIT tracing.
-    if torch._C._get_tracing_state():
-        return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
-                                               output_size[0], output_size[1],
-                                               sampling_ratio)
     return _RoIAlignFunction.apply(input, rois, output_size, spatial_scale, sampling_ratio)
 
 
diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py
index cbfd8a318eb..f6c7c9ed7aa 100644
--- a/torchvision/ops/roi_pool.py
+++ b/torchvision/ops/roi_pool.py
@@ -9,8 +9,6 @@
 from torchvision.extension import _lazy_import
 from ._utils import convert_boxes_to_roi_format
 
-import torchvision.ops._custom_ops
-
 
 class _RoIPoolFunction(Function):
     @staticmethod
@@ -61,12 +59,6 @@ def roi_pool(input, boxes, output_size, spatial_scale=1.0):
     rois = boxes
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
-    # TODO: Change this to support backwards, which we
-    #       do not currently support when JIT tracing.
-    if torch._C._get_tracing_state():
-        output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale,
-                                                   output_size[0], output_size[1])
-        return output
     return _RoIPoolFunction.apply(input, rois, output_size, spatial_scale)
 
 

From 6ddda3ae4f2b7616482b8f32ba9bea86be281720 Mon Sep 17 00:00:00 2001
From: Philip Meier <gamesnmore@online.de>
Date: Mon, 9 Sep 2019 20:20:16 +0200
Subject: [PATCH 07/26] Fix EMNSIT download URL (#1318)

* fix url

* update comment
---
 torchvision/datasets/mnist.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/torchvision/datasets/mnist.py b/torchvision/datasets/mnist.py
index b95e8fe3d43..8b421f467e9 100644
--- a/torchvision/datasets/mnist.py
+++ b/torchvision/datasets/mnist.py
@@ -226,8 +226,11 @@ class EMNIST(MNIST):
         target_transform (callable, optional): A function/transform that takes in the
             target and transforms it.
     """
-    # Updated URL from https://www.westernsydney.edu.au/bens/home/reproducible_research/emnist
-    url = 'https://cloudstor.aarnet.edu.au/plus/s/ZNmuFiuQTqZlu9W/download'
+    # Updated URL from https://www.nist.gov/node/1298471/emnist-dataset since the
+    # _official_ download link
+    # https://cloudstor.aarnet.edu.au/plus/s/ZNmuFiuQTqZlu9W/download
+    # is (currently) unavailable
+    url = 'http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip'
     splits = ('byclass', 'bymerge', 'balanced', 'letters', 'digits', 'mnist')
 
     def __init__(self, root, split, **kwargs):

From 7f7e7663e1bd5b8ae4e6f4a0b561319777769bdf Mon Sep 17 00:00:00 2001
From: Lara Haidar <lahaidar@microsoft.com>
Date: Tue, 10 Sep 2019 02:11:38 -0700
Subject: [PATCH 08/26] Fix Windows build in Torchvision Custom op Registration
 (#1320)

* Revert "Revert "Register Torchvision Ops as Cutom Ops (#1267)" (#1316)"

This reverts commit fe234fc8cfae5fdbc9168994a0547c822631c3aa.

* Make import of C++ extensions lazy

* define python initialization functions for extension

* Fix lint
---
 .travis.yml                                |  4 +
 setup.py                                   | 21 +++++-
 test/test_onnx.py                          | 88 ++++++++++++++++++++++
 torchvision/csrc/ROIAlign.h                |  8 +-
 torchvision/csrc/ROIPool.h                 |  6 +-
 torchvision/csrc/custom_ops/custom_ops.cpp | 33 ++++++++
 torchvision/csrc/nms.h                     |  2 +-
 torchvision/csrc/vision.cpp                |  2 +
 torchvision/extension.py                   |  1 +
 torchvision/ops/_custom_ops.py             | 46 +++++++++++
 torchvision/ops/boxes.py                   |  4 +-
 torchvision/ops/roi_align.py               |  7 ++
 torchvision/ops/roi_pool.py                |  7 ++
 13 files changed, 217 insertions(+), 12 deletions(-)
 create mode 100644 test/test_onnx.py
 create mode 100644 torchvision/csrc/custom_ops/custom_ops.cpp
 create mode 100644 torchvision/ops/_custom_ops.py

diff --git a/.travis.yml b/.travis.yml
index fc0aa7240a4..497579f5a3a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -47,6 +47,10 @@ before_install:
   - pip install future
   - pip install pytest pytest-cov codecov
   - pip install mock
+  - |
+    if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then
+      pip install onnxruntime
+    fi
   - conda install av -c conda-forge
 
 
diff --git a/setup.py b/setup.py
index d6dac6c0deb..e9b4627ec77 100644
--- a/setup.py
+++ b/setup.py
@@ -96,12 +96,21 @@ def get_extensions():
     source_models = [os.path.join(models_dir, s) for s in source_models]
     tests = test_file + source_models
 
+    custom_ops_sources = [os.path.join(extensions_dir, "custom_ops", "custom_ops.cpp"),
+                          os.path.join(extensions_dir, "cpu", "nms_cpu.cpp"),
+                          os.path.join(extensions_dir, "cpu", "ROIAlign_cpu.cpp"),
+                          os.path.join(extensions_dir, "cpu", "ROIPool_cpu.cpp")]
+    custom_ops_sources_cuda = [os.path.join(extensions_dir, "cuda", "nms_cuda.cu"),
+                               os.path.join(extensions_dir, "cuda", "ROIAlign_cuda.cu"),
+                               os.path.join(extensions_dir, "cuda", "ROIPool_cuda.cu")]
+
     define_macros = []
 
     extra_compile_args = {}
     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1':
         extension = CUDAExtension
         sources += source_cuda
+        custom_ops_sources += custom_ops_sources_cuda
         define_macros += [('WITH_CUDA', None)]
         nvcc_flags = os.getenv('NVCC_FLAGS', '')
         if nvcc_flags == '':
@@ -138,7 +147,14 @@ def get_extensions():
             include_dirs=tests_include_dirs,
             define_macros=define_macros,
             extra_compile_args=extra_compile_args,
-        )
+        ),
+        extension(
+            "torchvision._custom_ops",
+            sources=custom_ops_sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+        ),
     ]
 
     return ext_modules
@@ -179,5 +195,6 @@ def run(self):
         "scipy": ["scipy"],
     },
     ext_modules=get_extensions(),
-    cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension, 'clean': clean}
+    cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension,
+              'clean': clean}
 )
diff --git a/test/test_onnx.py b/test/test_onnx.py
new file mode 100644
index 00000000000..57b3c8dd729
--- /dev/null
+++ b/test/test_onnx.py
@@ -0,0 +1,88 @@
+import io
+import torch
+from torchvision import ops
+
+# onnxruntime requires python 3.5 or above
+try:
+    import onnxruntime
+except ImportError:
+    onnxruntime = None
+
+import unittest
+
+
+@unittest.skipIf(onnxruntime is None, 'ONNX Runtime unavailable')
+class ONNXExporterTester(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        torch.manual_seed(123)
+
+    def run_model(self, model, inputs):
+        model.eval()
+
+        # run pytorch model
+        with torch.no_grad():
+            if isinstance(inputs, torch.Tensor):
+                inputs = (inputs,)
+            outputs = model(*inputs)
+            if isinstance(outputs, torch.Tensor):
+                outputs = (outputs,)
+
+        onnx_io = io.BytesIO()
+        # export to onnx
+        torch.onnx.export(model, inputs, onnx_io, do_constant_folding=True, opset_version=10)
+
+        # validate the exported model with onnx runtime
+        self.ort_validate(onnx_io, inputs, outputs)
+
+    def ort_validate(self, onnx_io, inputs, outputs):
+
+        inputs, _ = torch.jit._flatten(inputs)
+        outputs, _ = torch.jit._flatten(outputs)
+
+        def to_numpy(tensor):
+            if tensor.requires_grad:
+                return tensor.detach().cpu().numpy()
+            else:
+                return tensor.cpu().numpy()
+
+        inputs = list(map(to_numpy, inputs))
+        outputs = list(map(to_numpy, outputs))
+
+        ort_session = onnxruntime.InferenceSession(onnx_io.getvalue())
+        # compute onnxruntime output prediction
+        ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs))
+        ort_outs = ort_session.run(None, ort_inputs)
+
+        for i in range(0, len(outputs)):
+            torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05)
+
+    def test_nms(self):
+        boxes = torch.rand(5, 4)
+        boxes[:, 2:] += torch.rand(5, 2)
+        scores = torch.randn(5)
+
+        class Module(torch.nn.Module):
+            def forward(self, boxes, scores):
+                return ops.nms(boxes, scores, 0.5)
+
+        self.run_model(Module(), (boxes, scores))
+
+    def test_roi_pool(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+        model = ops.RoIAlign((5, 5), 1, 2)
+        self.run_model(model, (x, single_roi))
+
+    def test_roi_align(self):
+        x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
+        rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+        pool_h = 5
+        pool_w = 5
+        model = ops.RoIPool((pool_h, pool_w), 2)
+        model.eval()
+        self.run_model(model, (x, rois))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/torchvision/csrc/ROIAlign.h b/torchvision/csrc/ROIAlign.h
index 2e914ac0092..7e18cf68f57 100644
--- a/torchvision/csrc/ROIAlign.h
+++ b/torchvision/csrc/ROIAlign.h
@@ -10,11 +10,11 @@
 at::Tensor ROIAlign_forward(
     const at::Tensor& input, // Input feature map.
     const at::Tensor& rois, // List of ROIs to pool over.
-    const float spatial_scale, // The scale of the image features. ROIs will be
+    const double spatial_scale, // The scale of the image features. ROIs will be
     // scaled to this.
-    const int pooled_height, // The height of the pooled feature map.
-    const int pooled_width, // The width of the pooled feature
-    const int sampling_ratio) // The number of points to sample in each bin
+    const int64_t pooled_height, // The height of the pooled feature map.
+    const int64_t pooled_width, // The width of the pooled feature
+    const int64_t sampling_ratio) // The number of points to sample in each bin
 // along each axis.
 {
   if (input.type().is_cuda()) {
diff --git a/torchvision/csrc/ROIPool.h b/torchvision/csrc/ROIPool.h
index 8885be4739f..7aefcc5e810 100644
--- a/torchvision/csrc/ROIPool.h
+++ b/torchvision/csrc/ROIPool.h
@@ -9,9 +9,9 @@
 std::tuple<at::Tensor, at::Tensor> ROIPool_forward(
     const at::Tensor& input,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width) {
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width) {
   if (input.type().is_cuda()) {
 #ifdef WITH_CUDA
     return ROIPool_forward_cuda(
diff --git a/torchvision/csrc/custom_ops/custom_ops.cpp b/torchvision/csrc/custom_ops/custom_ops.cpp
new file mode 100644
index 00000000000..879d295ff50
--- /dev/null
+++ b/torchvision/csrc/custom_ops/custom_ops.cpp
@@ -0,0 +1,33 @@
+#include <Python.h>
+#include <torch/script.h>
+
+#include "ROIAlign.h"
+#include "ROIPool.h"
+#include "nms.h"
+
+using namespace at;
+
+// If we are in a Windows environment, we need to define
+// initialization functions for the _custom_ops extension
+#ifdef _WIN32
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC init_custom_ops(void) {
+  // No need to do anything.
+  // _custom_ops.py will run on load
+  return NULL;
+}
+#else
+PyMODINIT_FUNC PyInit__custom_ops(void) {
+  // No need to do anything.
+  // _custom_ops.py will run on load
+  return NULL;
+}
+#endif
+#endif
+
+static auto registry =
+    torch::RegisterOperators()
+        .op("torchvision::nms", &nms)
+        .op("torchvision::roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio) -> Tensor",
+            &ROIAlign_forward)
+        .op("torchvision::roi_pool", &ROIPool_forward);
diff --git a/torchvision/csrc/nms.h b/torchvision/csrc/nms.h
index 9a1a775e677..fc667fc8058 100644
--- a/torchvision/csrc/nms.h
+++ b/torchvision/csrc/nms.h
@@ -8,7 +8,7 @@
 at::Tensor nms(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold) {
+    const double iou_threshold) {
   if (dets.device().is_cuda()) {
 #ifdef WITH_CUDA
     if (dets.numel() == 0) {
diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp
index 4777d70a38b..61a4eeee727 100644
--- a/torchvision/csrc/vision.cpp
+++ b/torchvision/csrc/vision.cpp
@@ -7,6 +7,8 @@
 #endif
 
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  // TODO: remove nms from here since it is now registered
+  //       and used as a PyTorch custom op
   m.def("nms", &nms, "non-maximum suppression");
   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
diff --git a/torchvision/extension.py b/torchvision/extension.py
index 06c41ceb5df..b872af6ee74 100644
--- a/torchvision/extension.py
+++ b/torchvision/extension.py
@@ -10,6 +10,7 @@ def _lazy_import():
         return _C
     import torch
     from torchvision import _C as C
+    import torchvision.ops._custom_ops
     _C = C
     if hasattr(_C, "CUDA_VERSION") and torch.version.cuda is not None:
         tv_version = str(_C.CUDA_VERSION)
diff --git a/torchvision/ops/_custom_ops.py b/torchvision/ops/_custom_ops.py
new file mode 100644
index 00000000000..70108bdf152
--- /dev/null
+++ b/torchvision/ops/_custom_ops.py
@@ -0,0 +1,46 @@
+import os
+import sys
+import imp
+import torch
+
+
+# load the custom_op_library and register the custom ops
+lib_dir = os.path.join(os.path.dirname(__file__), '..')
+file, path, description = imp.find_module("_custom_ops", [lib_dir])
+torch.ops.load_library(path)
+
+
+def register_custom_op():
+    from torch.onnx.symbolic_helper import parse_args, scalar_type_to_onnx
+    from torch.onnx.symbolic_opset9 import select, unsqueeze, squeeze, _cast_Long, reshape
+
+    @parse_args('v', 'v', 'f')
+    def symbolic_multi_label_nms(g, boxes, scores, iou_threshold):
+        boxes = unsqueeze(g, boxes, 0)
+        scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
+        max_output_per_class = g.op('Constant', value_t=torch.tensor([sys.maxsize], dtype=torch.long))
+        iou_threshold = g.op('Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float))
+        nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold)
+        return squeeze(g, select(g, nms_out, 1, g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), 1)
+
+    @parse_args('v', 'v', 'f', 'i', 'i', 'i')
+    def roi_align(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio):
+        batch_indices = _cast_Long(g, squeeze(g, select(g, rois, 1, g.op('Constant',
+                                   value_t=torch.tensor([0], dtype=torch.long))), 1), False)
+        rois = select(g, rois, 1, g.op('Constant', value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long)))
+        return g.op('RoiAlign', input, rois, batch_indices, spatial_scale_f=spatial_scale,
+                    output_height_i=pooled_height, output_width_i=pooled_width, sampling_ratio_i=sampling_ratio)
+
+    @parse_args('v', 'v', 'f', 'i', 'i')
+    def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width):
+        roi_pool = g.op('MaxRoiPool', input, rois,
+                        pooled_shape_i=(pooled_height, pooled_width), spatial_scale_f=spatial_scale)
+        return roi_pool, None
+
+    from torch.onnx import register_custom_op_symbolic
+    register_custom_op_symbolic('torchvision::nms', symbolic_multi_label_nms, 10)
+    register_custom_op_symbolic('torchvision::roi_align', roi_align, 10)
+    register_custom_op_symbolic('torchvision::roi_pool', roi_pool, 10)
+
+
+register_custom_op()
diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py
index af92a0cbf0e..3e773a02f8b 100644
--- a/torchvision/ops/boxes.py
+++ b/torchvision/ops/boxes.py
@@ -29,8 +29,8 @@ def nms(boxes, scores, iou_threshold):
         of the elements that have been kept
         by NMS, sorted in decreasing order of scores
     """
-    _C = _lazy_import()
-    return _C.nms(boxes, scores, iou_threshold)
+    _lazy_import()
+    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
 
 
 def batched_nms(boxes, scores, idxs, iou_threshold):
diff --git a/torchvision/ops/roi_align.py b/torchvision/ops/roi_align.py
index b2a900e0c4b..45496b2dc5d 100644
--- a/torchvision/ops/roi_align.py
+++ b/torchvision/ops/roi_align.py
@@ -66,6 +66,13 @@ def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
     rois = boxes
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
+    # TODO: Change this to support backwards, which we
+    #       do not currently support when JIT tracing.
+    if torch._C._get_tracing_state():
+        _lazy_import()
+        return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
+                                               output_size[0], output_size[1],
+                                               sampling_ratio)
     return _RoIAlignFunction.apply(input, rois, output_size, spatial_scale, sampling_ratio)
 
 
diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py
index f6c7c9ed7aa..b2a778a63b5 100644
--- a/torchvision/ops/roi_pool.py
+++ b/torchvision/ops/roi_pool.py
@@ -59,6 +59,13 @@ def roi_pool(input, boxes, output_size, spatial_scale=1.0):
     rois = boxes
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
+    # TODO: Change this to support backwards, which we
+    #       do not currently support when JIT tracing.
+    if torch._C._get_tracing_state():
+        _lazy_import()
+        output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale,
+                                                   output_size[0], output_size[1])
+        return output
     return _RoIPoolFunction.apply(input, rois, output_size, spatial_scale)
 
 

From cabca398d1abe4d456ebbf258028bfdc670b49a4 Mon Sep 17 00:00:00 2001
From: Dmitrii Petukhov <dimart.sp@gmail.com>
Date: Tue, 10 Sep 2019 11:29:23 +0200
Subject: [PATCH 09/26] Fix make_grid: support any number of channels in tensor
 (#1300)

---
 torchvision/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/torchvision/utils.py b/torchvision/utils.py
index f07a3bb4016..d7ddeb538c9 100644
--- a/torchvision/utils.py
+++ b/torchvision/utils.py
@@ -74,7 +74,8 @@ def norm_range(t, range):
     xmaps = min(nrow, nmaps)
     ymaps = int(math.ceil(float(nmaps) / xmaps))
     height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
-    grid = tensor.new_full((3, height * ymaps + padding, width * xmaps + padding), pad_value)
+    num_channels = tensor.size(1)
+    grid = tensor.new_full((num_channels, height * ymaps + padding, width * xmaps + padding), pad_value)
     k = 0
     for y in irange(ymaps):
         for x in irange(xmaps):

From a91fe7221b55c55dbbc23c23aecf33d470a5c08e Mon Sep 17 00:00:00 2001
From: Thomas Viehmann <tv.github@beamnet.de>
Date: Tue, 10 Sep 2019 13:47:48 +0200
Subject: [PATCH 10/26] Make custom ops differentiable (#1314)

* Make custom ops differentiable

and replace autograd.Function. Use ops unconditionally.

We may consider removing the extension functions in a follow-up.

The code-path is tested by the exisitng tests for differentiability.

* add scripting gradchecks tests and use intlist

* fix implicit tuple conversion for gcc-5

* fix merge
---
 test/test_ops.py                           |  24 ++++
 torchvision/csrc/custom_ops/custom_ops.cpp | 130 ++++++++++++++++++++-
 torchvision/ops/roi_align.py               |  41 +------
 torchvision/ops/roi_pool.py                |  39 +------
 4 files changed, 160 insertions(+), 74 deletions(-)

diff --git a/test/test_ops.py b/test/test_ops.py
index 14bc9705bb5..7db8c6981d0 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -188,6 +188,12 @@ def func(input):
         assert gradcheck(func, (x,)), 'gradcheck failed for roi_pool CPU'
         assert gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for roi_pool CPU'
 
+        @torch.jit.script
+        def script_func(input, rois):
+            return torch.ops.torchvision.roi_pool(input, rois, 1.0, 5, 5)[0]
+
+        assert gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_pool'
+
     @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
     def test_roi_pool_basic_cuda(self):
         device = torch.device('cuda')
@@ -274,6 +280,12 @@ def func(input):
         assert gradcheck(func, (x,)), 'gradcheck failed for roi_pool CUDA'
         assert gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for roi_pool CUDA'
 
+        @torch.jit.script
+        def script_func(input, rois):
+            return torch.ops.torchvision.roi_pool(input, rois, 1.0, 5, 5)[0]
+
+        assert gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_pool on CUDA'
+
 
 class RoIAlignTester(unittest.TestCase):
     @classmethod
@@ -428,6 +440,12 @@ def func(input):
         assert gradcheck(func, (x,)), 'gradcheck failed for RoIAlign CPU'
         assert gradcheck(func, (x.transpose(2, 3),)), 'gradcheck failed for RoIAlign CPU'
 
+        @torch.jit.script
+        def script_func(input, rois):
+            return torch.ops.torchvision.roi_align(input, rois, 0.5, 5, 5, 1)[0]
+
+        assert gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_align'
+
     @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
     def test_roi_align_gradient_cuda(self):
         """
@@ -462,6 +480,12 @@ def func(input):
         assert gradcheck(func, (x,)), 'gradcheck failed for RoIAlign CUDA'
         assert gradcheck(func, (x.transpose(2, 3),)), 'gradcheck failed for RoIAlign CUDA'
 
+        @torch.jit.script
+        def script_func(input, rois):
+            return torch.ops.torchvision.roi_align(input, rois, 0.5, 5, 5, 1)[0]
+
+        assert gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_align on CUDA'
+
 
 class NMSTester(unittest.TestCase):
     def reference_nms(self, boxes, scores, iou_threshold):
diff --git a/torchvision/csrc/custom_ops/custom_ops.cpp b/torchvision/csrc/custom_ops/custom_ops.cpp
index 879d295ff50..e3b7bc9f0f0 100644
--- a/torchvision/csrc/custom_ops/custom_ops.cpp
+++ b/torchvision/csrc/custom_ops/custom_ops.cpp
@@ -25,9 +25,135 @@ PyMODINIT_FUNC PyInit__custom_ops(void) {
 #endif
 #endif
 
+using torch::Tensor;
+using torch::autograd::AutogradContext;
+using torch::autograd::Variable;
+using torch::autograd::variable_list;
+
+class ROIAlignFunction : public torch::autograd::Function<ROIAlignFunction> {
+ public:
+  static variable_list forward(
+      AutogradContext* ctx,
+      Variable input,
+      Variable rois,
+      const double spatial_scale,
+      const int64_t pooled_height,
+      const int64_t pooled_width,
+      const int64_t sampling_ratio) {
+    ctx->saved_data["spatial_scale"] = spatial_scale;
+    ctx->saved_data["pooled_height"] = pooled_height;
+    ctx->saved_data["pooled_width"] = pooled_width;
+    ctx->saved_data["sampling_ratio"] = sampling_ratio;
+    ctx->saved_data["input_shape"] = input.sizes();
+    ctx->save_for_backward({rois});
+    auto result = ROIAlign_forward(
+        input,
+        rois,
+        spatial_scale,
+        pooled_height,
+        pooled_width,
+        sampling_ratio);
+    return {result};
+  }
+
+  static variable_list backward(
+      AutogradContext* ctx,
+      variable_list grad_output) {
+    // Use data saved in forward
+    auto saved = ctx->get_saved_variables();
+    auto rois = saved[0];
+    auto input_shape = ctx->saved_data["input_shape"].toIntList();
+    auto grad_in = ROIAlign_backward(
+        grad_output[0],
+        rois,
+        ctx->saved_data["spatial_scale"].toDouble(),
+        ctx->saved_data["pooled_height"].toInt(),
+        ctx->saved_data["pooled_width"].toInt(),
+        input_shape[0],
+        input_shape[1],
+        input_shape[2],
+        input_shape[3],
+        ctx->saved_data["sampling_ratio"].toInt());
+    return {
+        grad_in, Variable(), Variable(), Variable(), Variable(), Variable()};
+  }
+};
+
+Tensor roi_align(
+    const Tensor& input,
+    const Tensor& rois,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t sampling_ratio) {
+  return ROIAlignFunction::apply(
+      input,
+      rois,
+      spatial_scale,
+      pooled_height,
+      pooled_width,
+      sampling_ratio)[0];
+}
+
+class ROIPoolFunction : public torch::autograd::Function<ROIPoolFunction> {
+ public:
+  static variable_list forward(
+      AutogradContext* ctx,
+      Variable input,
+      Variable rois,
+      const double spatial_scale,
+      const int64_t pooled_height,
+      const int64_t pooled_width) {
+    ctx->saved_data["spatial_scale"] = spatial_scale;
+    ctx->saved_data["pooled_height"] = pooled_height;
+    ctx->saved_data["pooled_width"] = pooled_width;
+    ctx->saved_data["input_shape"] = input.sizes();
+    auto result = ROIPool_forward(
+        input, rois, spatial_scale, pooled_height, pooled_width);
+    auto output = std::get<0>(result);
+    auto argmax = std::get<1>(result);
+    ctx->save_for_backward({rois, argmax});
+    ctx->mark_non_differentiable({argmax});
+    return {output, argmax};
+  }
+
+  static variable_list backward(
+      AutogradContext* ctx,
+      variable_list grad_output) {
+    // Use data saved in forward
+    auto saved = ctx->get_saved_variables();
+    auto rois = saved[0];
+    auto argmax = saved[1];
+    auto input_shape = ctx->saved_data["input_shape"].toIntList();
+    auto grad_in = ROIPool_backward(
+        grad_output[0],
+        rois,
+        argmax,
+        ctx->saved_data["spatial_scale"].toDouble(),
+        ctx->saved_data["pooled_height"].toInt(),
+        ctx->saved_data["pooled_width"].toInt(),
+        input_shape[0],
+        input_shape[1],
+        input_shape[2],
+        input_shape[3]);
+    return {grad_in, Variable(), Variable(), Variable(), Variable()};
+  }
+};
+
+std::tuple<Tensor, Tensor> roi_pool(
+    const Tensor& input,
+    const Tensor& rois,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width) {
+  auto result = ROIPoolFunction::apply(
+      input, rois, spatial_scale, pooled_height, pooled_width);
+  return std::tuple<Tensor, Tensor>(result[0], result[1]);
+}
+
 static auto registry =
     torch::RegisterOperators()
         .op("torchvision::nms", &nms)
         .op("torchvision::roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio) -> Tensor",
-            &ROIAlign_forward)
-        .op("torchvision::roi_pool", &ROIPool_forward);
+            &roi_align)
+        .op("torchvision::roi_pool", &roi_pool);
diff --git a/torchvision/ops/roi_align.py b/torchvision/ops/roi_align.py
index 45496b2dc5d..3038fb0dca0 100644
--- a/torchvision/ops/roi_align.py
+++ b/torchvision/ops/roi_align.py
@@ -10,35 +10,6 @@
 from ._utils import convert_boxes_to_roi_format
 
 
-class _RoIAlignFunction(Function):
-    @staticmethod
-    def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
-        ctx.save_for_backward(roi)
-        ctx.output_size = _pair(output_size)
-        ctx.spatial_scale = spatial_scale
-        ctx.sampling_ratio = sampling_ratio
-        ctx.input_shape = input.size()
-        _C = _lazy_import()
-        output = _C.roi_align_forward(
-            input, roi, spatial_scale,
-            output_size[0], output_size[1], sampling_ratio)
-        return output
-
-    @staticmethod
-    @once_differentiable
-    def backward(ctx, grad_output):
-        rois, = ctx.saved_tensors
-        output_size = ctx.output_size
-        spatial_scale = ctx.spatial_scale
-        sampling_ratio = ctx.sampling_ratio
-        bs, ch, h, w = ctx.input_shape
-        _C = _lazy_import()
-        grad_input = _C.roi_align_backward(
-            grad_output, rois, spatial_scale,
-            output_size[0], output_size[1], bs, ch, h, w, sampling_ratio)
-        return grad_input, None, None, None, None
-
-
 def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
     """
     Performs Region of Interest (RoI) Align operator described in Mask R-CNN
@@ -66,14 +37,10 @@ def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
     rois = boxes
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
-    # TODO: Change this to support backwards, which we
-    #       do not currently support when JIT tracing.
-    if torch._C._get_tracing_state():
-        _lazy_import()
-        return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
-                                               output_size[0], output_size[1],
-                                               sampling_ratio)
-    return _RoIAlignFunction.apply(input, rois, output_size, spatial_scale, sampling_ratio)
+    _lazy_import()
+    return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
+                                           output_size[0], output_size[1],
+                                           sampling_ratio)
 
 
 class RoIAlign(nn.Module):
diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py
index b2a778a63b5..6a9eaf6fdd9 100644
--- a/torchvision/ops/roi_pool.py
+++ b/torchvision/ops/roi_pool.py
@@ -10,33 +10,6 @@
 from ._utils import convert_boxes_to_roi_format
 
 
-class _RoIPoolFunction(Function):
-    @staticmethod
-    def forward(ctx, input, rois, output_size, spatial_scale):
-        ctx.output_size = _pair(output_size)
-        ctx.spatial_scale = spatial_scale
-        ctx.input_shape = input.size()
-        _C = _lazy_import()
-        output, argmax = _C.roi_pool_forward(
-            input, rois, spatial_scale,
-            output_size[0], output_size[1])
-        ctx.save_for_backward(rois, argmax)
-        return output
-
-    @staticmethod
-    @once_differentiable
-    def backward(ctx, grad_output):
-        rois, argmax = ctx.saved_tensors
-        output_size = ctx.output_size
-        spatial_scale = ctx.spatial_scale
-        bs, ch, h, w = ctx.input_shape
-        _C = _lazy_import()
-        grad_input = _C.roi_pool_backward(
-            grad_output, rois, argmax, spatial_scale,
-            output_size[0], output_size[1], bs, ch, h, w)
-        return grad_input, None, None, None
-
-
 def roi_pool(input, boxes, output_size, spatial_scale=1.0):
     """
     Performs Region of Interest (RoI) Pool operator described in Fast R-CNN
@@ -59,14 +32,10 @@ def roi_pool(input, boxes, output_size, spatial_scale=1.0):
     rois = boxes
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
-    # TODO: Change this to support backwards, which we
-    #       do not currently support when JIT tracing.
-    if torch._C._get_tracing_state():
-        _lazy_import()
-        output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale,
-                                                   output_size[0], output_size[1])
-        return output
-    return _RoIPoolFunction.apply(input, rois, output_size, spatial_scale)
+    _lazy_import()
+    output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale,
+                                               output_size[0], output_size[1])
+    return output
 
 
 class RoIPool(nn.Module):

From a129b6b86a75f2d1fc80055f2cd0fae63efc0d2d Mon Sep 17 00:00:00 2001
From: Philip Meier <gamesnmore@online.de>
Date: Wed, 11 Sep 2019 15:50:38 +0200
Subject: [PATCH 11/26] Adds optional fill colour to rotate (#1280)

* Adds optional fill colour to rotate

* bug fix
---
 torchvision/transforms/functional.py | 9 +++++++--
 torchvision/transforms/transforms.py | 7 +++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
index fc07e402db8..72da9b7889d 100644
--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -686,7 +686,7 @@ def adjust_gamma(img, gamma, gain=1):
     return img
 
 
-def rotate(img, angle, resample=False, expand=False, center=None):
+def rotate(img, angle, resample=False, expand=False, center=None, fill=0):
     """Rotate the image by angle.
 
 
@@ -703,6 +703,8 @@ def rotate(img, angle, resample=False, expand=False, center=None):
         center (2-tuple, optional): Optional center of rotation.
             Origin is the upper left corner.
             Default is the center of the image.
+        fill (3-tuple or int): RGB pixel fill value for area outside the rotated image.
+            If int, it is used for all channels respectively.
 
     .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
 
@@ -711,7 +713,10 @@ def rotate(img, angle, resample=False, expand=False, center=None):
     if not _is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
-    return img.rotate(angle, resample, expand, center)
+    if isinstance(fill, int):
+        fill = tuple([fill] * 3)
+
+    return img.rotate(angle, resample, expand, center, fillcolor=fill)
 
 
 def _get_inverse_affine_matrix(center, angle, translate, scale, shear):
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index 203dae345cd..b21a6d86eef 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -946,12 +946,14 @@ class RandomRotation(object):
         center (2-tuple, optional): Optional center of rotation.
             Origin is the upper left corner.
             Default is the center of the image.
+        fill (3-tuple or int): RGB pixel fill value for area outside the rotated image.
+            If int, it is used for all channels respectively.
 
     .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
 
     """
 
-    def __init__(self, degrees, resample=False, expand=False, center=None):
+    def __init__(self, degrees, resample=False, expand=False, center=None, fill=0):
         if isinstance(degrees, numbers.Number):
             if degrees < 0:
                 raise ValueError("If degrees is a single number, it must be positive.")
@@ -964,6 +966,7 @@ def __init__(self, degrees, resample=False, expand=False, center=None):
         self.resample = resample
         self.expand = expand
         self.center = center
+        self.fill = fill
 
     @staticmethod
     def get_params(degrees):
@@ -987,7 +990,7 @@ def __call__(self, img):
 
         angle = self.get_params(self.degrees)
 
-        return F.rotate(img, angle, self.resample, self.expand, self.center)
+        return F.rotate(img, angle, self.resample, self.expand, self.center, self.fill)
 
     def __repr__(self):
         format_string = self.__class__.__name__ + '(degrees={0}'.format(self.degrees)

From 6de158c473b83cf43344a0651d7c01128c7850e6 Mon Sep 17 00:00:00 2001
From: Karl Ostmo <kostmo@gmail.com>
Date: Thu, 12 Sep 2019 16:41:00 -0700
Subject: [PATCH 12/26] use native python code generation logic (#1321)

use flake8 with Python 3 on .circleci directory
---
 .circleci/.gitignore    |   1 +
 .circleci/config.yml    | 529 ++++++++++++++++++++--------------------
 .circleci/config.yml.in |  43 +---
 .circleci/regenerate.py |  96 +++++++-
 .travis.yml             |   7 +-
 5 files changed, 359 insertions(+), 317 deletions(-)
 create mode 100644 .circleci/.gitignore

diff --git a/.circleci/.gitignore b/.circleci/.gitignore
new file mode 100644
index 00000000000..485dee64bcf
--- /dev/null
+++ b/.circleci/.gitignore
@@ -0,0 +1 @@
+.idea
diff --git a/.circleci/config.yml b/.circleci/config.yml
index abd9c8ccf74..5c9301ec4a7 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -50,7 +50,7 @@ jobs:
       - checkout
       - run:
           command: |
-            pip install --user --progress-bar off jinja2
+            pip install --user --progress-bar off jinja2 pyyaml
             python .circleci/regenerate.py
             git diff --exit-code || (echo ".circleci/config.yml not in sync with config.yml.in! Run .circleci/regenerate.py to update config"; exit 1)
 
@@ -253,168 +253,168 @@ jobs:
               aws s3 cp "$pkg" "s3://pytorch/whl/nightly/<< parameters.subfolder >>" --acl public-read
             done
 
+
 workflows:
   build:
     jobs:
       - circleci_consistency
-      
       - binary_linux_wheel:
+          cu_version: cpu
           name: binary_linux_wheel_py2.7_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
+          python_version: '2.7'
       - binary_linux_wheel:
+          cu_version: cpu
           name: binary_linux_wheel_py2.7u_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
-          unicode_abi: "1"
+          python_version: '2.7'
+          unicode_abi: '1'
       - binary_linux_wheel:
+          cu_version: cu92
           name: binary_linux_wheel_py2.7_cu92
-          python_version: "2.7"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '2.7'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_linux_wheel:
+          cu_version: cu92
           name: binary_linux_wheel_py2.7u_cu92
-          python_version: "2.7"
-          cu_version: "cu92"
-          unicode_abi: "1"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '2.7'
+          unicode_abi: '1'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_linux_wheel:
+          cu_version: cu100
           name: binary_linux_wheel_py2.7_cu100
-          python_version: "2.7"
-          cu_version: "cu100"
+          python_version: '2.7'
       - binary_linux_wheel:
+          cu_version: cu100
           name: binary_linux_wheel_py2.7u_cu100
-          python_version: "2.7"
-          cu_version: "cu100"
-          unicode_abi: "1"
+          python_version: '2.7'
+          unicode_abi: '1'
       - binary_linux_wheel:
+          cu_version: cpu
           name: binary_linux_wheel_py3.5_cpu
-          python_version: "3.5"
-          cu_version: "cpu"
+          python_version: '3.5'
       - binary_linux_wheel:
+          cu_version: cu92
           name: binary_linux_wheel_py3.5_cu92
-          python_version: "3.5"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.5'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_linux_wheel:
+          cu_version: cu100
           name: binary_linux_wheel_py3.5_cu100
-          python_version: "3.5"
-          cu_version: "cu100"
+          python_version: '3.5'
       - binary_linux_wheel:
+          cu_version: cpu
           name: binary_linux_wheel_py3.6_cpu
-          python_version: "3.6"
-          cu_version: "cpu"
+          python_version: '3.6'
       - binary_linux_wheel:
+          cu_version: cu92
           name: binary_linux_wheel_py3.6_cu92
-          python_version: "3.6"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.6'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_linux_wheel:
+          cu_version: cu100
           name: binary_linux_wheel_py3.6_cu100
-          python_version: "3.6"
-          cu_version: "cu100"
+          python_version: '3.6'
       - binary_linux_wheel:
+          cu_version: cpu
           name: binary_linux_wheel_py3.7_cpu
-          python_version: "3.7"
-          cu_version: "cpu"
+          python_version: '3.7'
       - binary_linux_wheel:
+          cu_version: cu92
           name: binary_linux_wheel_py3.7_cu92
-          python_version: "3.7"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.7'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_linux_wheel:
+          cu_version: cu100
           name: binary_linux_wheel_py3.7_cu100
-          python_version: "3.7"
-          cu_version: "cu100"
+          python_version: '3.7'
       - binary_macos_wheel:
+          cu_version: cpu
           name: binary_macos_wheel_py2.7_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
+          python_version: '2.7'
       - binary_macos_wheel:
+          cu_version: cpu
           name: binary_macos_wheel_py2.7u_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
-          unicode_abi: "1"
+          python_version: '2.7'
+          unicode_abi: '1'
       - binary_macos_wheel:
+          cu_version: cpu
           name: binary_macos_wheel_py3.5_cpu
-          python_version: "3.5"
-          cu_version: "cpu"
+          python_version: '3.5'
       - binary_macos_wheel:
+          cu_version: cpu
           name: binary_macos_wheel_py3.6_cpu
-          python_version: "3.6"
-          cu_version: "cpu"
+          python_version: '3.6'
       - binary_macos_wheel:
+          cu_version: cpu
           name: binary_macos_wheel_py3.7_cpu
-          python_version: "3.7"
-          cu_version: "cpu"
+          python_version: '3.7'
       - binary_linux_conda:
+          cu_version: cpu
           name: binary_linux_conda_py2.7_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
+          python_version: '2.7'
       - binary_linux_conda:
+          cu_version: cu92
           name: binary_linux_conda_py2.7_cu92
-          python_version: "2.7"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '2.7'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_linux_conda:
+          cu_version: cu100
           name: binary_linux_conda_py2.7_cu100
-          python_version: "2.7"
-          cu_version: "cu100"
+          python_version: '2.7'
       - binary_linux_conda:
+          cu_version: cpu
           name: binary_linux_conda_py3.5_cpu
-          python_version: "3.5"
-          cu_version: "cpu"
+          python_version: '3.5'
       - binary_linux_conda:
+          cu_version: cu92
           name: binary_linux_conda_py3.5_cu92
-          python_version: "3.5"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.5'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_linux_conda:
+          cu_version: cu100
           name: binary_linux_conda_py3.5_cu100
-          python_version: "3.5"
-          cu_version: "cu100"
+          python_version: '3.5'
       - binary_linux_conda:
+          cu_version: cpu
           name: binary_linux_conda_py3.6_cpu
-          python_version: "3.6"
-          cu_version: "cpu"
+          python_version: '3.6'
       - binary_linux_conda:
+          cu_version: cu92
           name: binary_linux_conda_py3.6_cu92
-          python_version: "3.6"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.6'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_linux_conda:
+          cu_version: cu100
           name: binary_linux_conda_py3.6_cu100
-          python_version: "3.6"
-          cu_version: "cu100"
+          python_version: '3.6'
       - binary_linux_conda:
+          cu_version: cpu
           name: binary_linux_conda_py3.7_cpu
-          python_version: "3.7"
-          cu_version: "cpu"
+          python_version: '3.7'
       - binary_linux_conda:
+          cu_version: cu92
           name: binary_linux_conda_py3.7_cu92
-          python_version: "3.7"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.7'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_linux_conda:
+          cu_version: cu100
           name: binary_linux_conda_py3.7_cu100
-          python_version: "3.7"
-          cu_version: "cu100"
+          python_version: '3.7'
       - binary_macos_conda:
+          cu_version: cpu
           name: binary_macos_conda_py2.7_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
+          python_version: '2.7'
       - binary_macos_conda:
+          cu_version: cpu
           name: binary_macos_conda_py3.5_cpu
-          python_version: "3.5"
-          cu_version: "cpu"
+          python_version: '3.5'
       - binary_macos_conda:
+          cu_version: cpu
           name: binary_macos_conda_py3.6_cpu
-          python_version: "3.6"
-          cu_version: "cpu"
+          python_version: '3.6'
       - binary_macos_conda:
+          cu_version: cpu
           name: binary_macos_conda_py3.7_cpu
-          python_version: "3.7"
-          cu_version: "cpu"
+          python_version: '3.7'
       - binary_linux_conda_cuda:
           name: torchvision_linux_py3.7_cu100
           python_version: "3.7"
@@ -434,361 +434,360 @@ workflows:
                 - master
     jobs:
       - circleci_consistency
-      
       - binary_linux_wheel:
+          cu_version: cpu
           name: nightly_binary_linux_wheel_py2.7_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
+          python_version: '2.7'
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py2.7_cpu_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py2.7_cpu_upload
           requires:
-            - nightly_binary_linux_wheel_py2.7_cpu
-          subfolder: "cpu/"
+          - nightly_binary_linux_wheel_py2.7_cpu
+          subfolder: cpu/
       - binary_linux_wheel:
+          cu_version: cpu
           name: nightly_binary_linux_wheel_py2.7u_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
-          unicode_abi: "1"
+          python_version: '2.7'
+          unicode_abi: '1'
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py2.7u_cpu_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py2.7u_cpu_upload
           requires:
-            - nightly_binary_linux_wheel_py2.7u_cpu
-          subfolder: "cpu/"
+          - nightly_binary_linux_wheel_py2.7u_cpu
+          subfolder: cpu/
       - binary_linux_wheel:
+          cu_version: cu92
           name: nightly_binary_linux_wheel_py2.7_cu92
-          python_version: "2.7"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '2.7'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py2.7_cu92_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py2.7_cu92_upload
           requires:
-            - nightly_binary_linux_wheel_py2.7_cu92
-          subfolder: "cu92/"
+          - nightly_binary_linux_wheel_py2.7_cu92
+          subfolder: cu92/
       - binary_linux_wheel:
+          cu_version: cu92
           name: nightly_binary_linux_wheel_py2.7u_cu92
-          python_version: "2.7"
-          cu_version: "cu92"
-          unicode_abi: "1"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '2.7'
+          unicode_abi: '1'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py2.7u_cu92_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py2.7u_cu92_upload
           requires:
-            - nightly_binary_linux_wheel_py2.7u_cu92
-          subfolder: "cu92/"
+          - nightly_binary_linux_wheel_py2.7u_cu92
+          subfolder: cu92/
       - binary_linux_wheel:
+          cu_version: cu100
           name: nightly_binary_linux_wheel_py2.7_cu100
-          python_version: "2.7"
-          cu_version: "cu100"
+          python_version: '2.7'
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py2.7_cu100_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py2.7_cu100_upload
           requires:
-            - nightly_binary_linux_wheel_py2.7_cu100
-          subfolder: "cu100/"
+          - nightly_binary_linux_wheel_py2.7_cu100
+          subfolder: cu100/
       - binary_linux_wheel:
+          cu_version: cu100
           name: nightly_binary_linux_wheel_py2.7u_cu100
-          python_version: "2.7"
-          cu_version: "cu100"
-          unicode_abi: "1"
+          python_version: '2.7'
+          unicode_abi: '1'
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py2.7u_cu100_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py2.7u_cu100_upload
           requires:
-            - nightly_binary_linux_wheel_py2.7u_cu100
-          subfolder: "cu100/"
+          - nightly_binary_linux_wheel_py2.7u_cu100
+          subfolder: cu100/
       - binary_linux_wheel:
+          cu_version: cpu
           name: nightly_binary_linux_wheel_py3.5_cpu
-          python_version: "3.5"
-          cu_version: "cpu"
+          python_version: '3.5'
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py3.5_cpu_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py3.5_cpu_upload
           requires:
-            - nightly_binary_linux_wheel_py3.5_cpu
-          subfolder: "cpu/"
+          - nightly_binary_linux_wheel_py3.5_cpu
+          subfolder: cpu/
       - binary_linux_wheel:
+          cu_version: cu92
           name: nightly_binary_linux_wheel_py3.5_cu92
-          python_version: "3.5"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.5'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py3.5_cu92_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py3.5_cu92_upload
           requires:
-            - nightly_binary_linux_wheel_py3.5_cu92
-          subfolder: "cu92/"
+          - nightly_binary_linux_wheel_py3.5_cu92
+          subfolder: cu92/
       - binary_linux_wheel:
+          cu_version: cu100
           name: nightly_binary_linux_wheel_py3.5_cu100
-          python_version: "3.5"
-          cu_version: "cu100"
+          python_version: '3.5'
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py3.5_cu100_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py3.5_cu100_upload
           requires:
-            - nightly_binary_linux_wheel_py3.5_cu100
-          subfolder: "cu100/"
+          - nightly_binary_linux_wheel_py3.5_cu100
+          subfolder: cu100/
       - binary_linux_wheel:
+          cu_version: cpu
           name: nightly_binary_linux_wheel_py3.6_cpu
-          python_version: "3.6"
-          cu_version: "cpu"
+          python_version: '3.6'
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py3.6_cpu_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py3.6_cpu_upload
           requires:
-            - nightly_binary_linux_wheel_py3.6_cpu
-          subfolder: "cpu/"
+          - nightly_binary_linux_wheel_py3.6_cpu
+          subfolder: cpu/
       - binary_linux_wheel:
+          cu_version: cu92
           name: nightly_binary_linux_wheel_py3.6_cu92
-          python_version: "3.6"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.6'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py3.6_cu92_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py3.6_cu92_upload
           requires:
-            - nightly_binary_linux_wheel_py3.6_cu92
-          subfolder: "cu92/"
+          - nightly_binary_linux_wheel_py3.6_cu92
+          subfolder: cu92/
       - binary_linux_wheel:
+          cu_version: cu100
           name: nightly_binary_linux_wheel_py3.6_cu100
-          python_version: "3.6"
-          cu_version: "cu100"
+          python_version: '3.6'
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py3.6_cu100_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py3.6_cu100_upload
           requires:
-            - nightly_binary_linux_wheel_py3.6_cu100
-          subfolder: "cu100/"
+          - nightly_binary_linux_wheel_py3.6_cu100
+          subfolder: cu100/
       - binary_linux_wheel:
+          cu_version: cpu
           name: nightly_binary_linux_wheel_py3.7_cpu
-          python_version: "3.7"
-          cu_version: "cpu"
+          python_version: '3.7'
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py3.7_cpu_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py3.7_cpu_upload
           requires:
-            - nightly_binary_linux_wheel_py3.7_cpu
-          subfolder: "cpu/"
+          - nightly_binary_linux_wheel_py3.7_cpu
+          subfolder: cpu/
       - binary_linux_wheel:
+          cu_version: cu92
           name: nightly_binary_linux_wheel_py3.7_cu92
-          python_version: "3.7"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.7'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py3.7_cu92_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py3.7_cu92_upload
           requires:
-            - nightly_binary_linux_wheel_py3.7_cu92
-          subfolder: "cu92/"
+          - nightly_binary_linux_wheel_py3.7_cu92
+          subfolder: cu92/
       - binary_linux_wheel:
+          cu_version: cu100
           name: nightly_binary_linux_wheel_py3.7_cu100
-          python_version: "3.7"
-          cu_version: "cu100"
+          python_version: '3.7'
       - binary_wheel_upload:
-          name: nightly_binary_linux_wheel_py3.7_cu100_upload
           context: org-member
+          name: nightly_binary_linux_wheel_py3.7_cu100_upload
           requires:
-            - nightly_binary_linux_wheel_py3.7_cu100
-          subfolder: "cu100/"
+          - nightly_binary_linux_wheel_py3.7_cu100
+          subfolder: cu100/
       - binary_macos_wheel:
+          cu_version: cpu
           name: nightly_binary_macos_wheel_py2.7_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
+          python_version: '2.7'
       - binary_wheel_upload:
-          name: nightly_binary_macos_wheel_py2.7_cpu_upload
           context: org-member
+          name: nightly_binary_macos_wheel_py2.7_cpu_upload
           requires:
-            - nightly_binary_macos_wheel_py2.7_cpu
-          subfolder: ""
+          - nightly_binary_macos_wheel_py2.7_cpu
+          subfolder: ''
       - binary_macos_wheel:
+          cu_version: cpu
           name: nightly_binary_macos_wheel_py2.7u_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
-          unicode_abi: "1"
+          python_version: '2.7'
+          unicode_abi: '1'
       - binary_wheel_upload:
-          name: nightly_binary_macos_wheel_py2.7u_cpu_upload
           context: org-member
+          name: nightly_binary_macos_wheel_py2.7u_cpu_upload
           requires:
-            - nightly_binary_macos_wheel_py2.7u_cpu
-          subfolder: ""
+          - nightly_binary_macos_wheel_py2.7u_cpu
+          subfolder: ''
       - binary_macos_wheel:
+          cu_version: cpu
           name: nightly_binary_macos_wheel_py3.5_cpu
-          python_version: "3.5"
-          cu_version: "cpu"
+          python_version: '3.5'
       - binary_wheel_upload:
-          name: nightly_binary_macos_wheel_py3.5_cpu_upload
           context: org-member
+          name: nightly_binary_macos_wheel_py3.5_cpu_upload
           requires:
-            - nightly_binary_macos_wheel_py3.5_cpu
-          subfolder: ""
+          - nightly_binary_macos_wheel_py3.5_cpu
+          subfolder: ''
       - binary_macos_wheel:
+          cu_version: cpu
           name: nightly_binary_macos_wheel_py3.6_cpu
-          python_version: "3.6"
-          cu_version: "cpu"
+          python_version: '3.6'
       - binary_wheel_upload:
-          name: nightly_binary_macos_wheel_py3.6_cpu_upload
           context: org-member
+          name: nightly_binary_macos_wheel_py3.6_cpu_upload
           requires:
-            - nightly_binary_macos_wheel_py3.6_cpu
-          subfolder: ""
+          - nightly_binary_macos_wheel_py3.6_cpu
+          subfolder: ''
       - binary_macos_wheel:
+          cu_version: cpu
           name: nightly_binary_macos_wheel_py3.7_cpu
-          python_version: "3.7"
-          cu_version: "cpu"
+          python_version: '3.7'
       - binary_wheel_upload:
-          name: nightly_binary_macos_wheel_py3.7_cpu_upload
           context: org-member
+          name: nightly_binary_macos_wheel_py3.7_cpu_upload
           requires:
-            - nightly_binary_macos_wheel_py3.7_cpu
-          subfolder: ""
+          - nightly_binary_macos_wheel_py3.7_cpu
+          subfolder: ''
       - binary_linux_conda:
+          cu_version: cpu
           name: nightly_binary_linux_conda_py2.7_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
+          python_version: '2.7'
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py2.7_cpu_upload
           context: org-member
+          name: nightly_binary_linux_conda_py2.7_cpu_upload
           requires:
-            - nightly_binary_linux_conda_py2.7_cpu
+          - nightly_binary_linux_conda_py2.7_cpu
       - binary_linux_conda:
+          cu_version: cu92
           name: nightly_binary_linux_conda_py2.7_cu92
-          python_version: "2.7"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '2.7'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py2.7_cu92_upload
           context: org-member
+          name: nightly_binary_linux_conda_py2.7_cu92_upload
           requires:
-            - nightly_binary_linux_conda_py2.7_cu92
+          - nightly_binary_linux_conda_py2.7_cu92
       - binary_linux_conda:
+          cu_version: cu100
           name: nightly_binary_linux_conda_py2.7_cu100
-          python_version: "2.7"
-          cu_version: "cu100"
+          python_version: '2.7'
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py2.7_cu100_upload
           context: org-member
+          name: nightly_binary_linux_conda_py2.7_cu100_upload
           requires:
-            - nightly_binary_linux_conda_py2.7_cu100
+          - nightly_binary_linux_conda_py2.7_cu100
       - binary_linux_conda:
+          cu_version: cpu
           name: nightly_binary_linux_conda_py3.5_cpu
-          python_version: "3.5"
-          cu_version: "cpu"
+          python_version: '3.5'
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py3.5_cpu_upload
           context: org-member
+          name: nightly_binary_linux_conda_py3.5_cpu_upload
           requires:
-            - nightly_binary_linux_conda_py3.5_cpu
+          - nightly_binary_linux_conda_py3.5_cpu
       - binary_linux_conda:
+          cu_version: cu92
           name: nightly_binary_linux_conda_py3.5_cu92
-          python_version: "3.5"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.5'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py3.5_cu92_upload
           context: org-member
+          name: nightly_binary_linux_conda_py3.5_cu92_upload
           requires:
-            - nightly_binary_linux_conda_py3.5_cu92
+          - nightly_binary_linux_conda_py3.5_cu92
       - binary_linux_conda:
+          cu_version: cu100
           name: nightly_binary_linux_conda_py3.5_cu100
-          python_version: "3.5"
-          cu_version: "cu100"
+          python_version: '3.5'
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py3.5_cu100_upload
           context: org-member
+          name: nightly_binary_linux_conda_py3.5_cu100_upload
           requires:
-            - nightly_binary_linux_conda_py3.5_cu100
+          - nightly_binary_linux_conda_py3.5_cu100
       - binary_linux_conda:
+          cu_version: cpu
           name: nightly_binary_linux_conda_py3.6_cpu
-          python_version: "3.6"
-          cu_version: "cpu"
+          python_version: '3.6'
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py3.6_cpu_upload
           context: org-member
+          name: nightly_binary_linux_conda_py3.6_cpu_upload
           requires:
-            - nightly_binary_linux_conda_py3.6_cpu
+          - nightly_binary_linux_conda_py3.6_cpu
       - binary_linux_conda:
+          cu_version: cu92
           name: nightly_binary_linux_conda_py3.6_cu92
-          python_version: "3.6"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.6'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py3.6_cu92_upload
           context: org-member
+          name: nightly_binary_linux_conda_py3.6_cu92_upload
           requires:
-            - nightly_binary_linux_conda_py3.6_cu92
+          - nightly_binary_linux_conda_py3.6_cu92
       - binary_linux_conda:
+          cu_version: cu100
           name: nightly_binary_linux_conda_py3.6_cu100
-          python_version: "3.6"
-          cu_version: "cu100"
+          python_version: '3.6'
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py3.6_cu100_upload
           context: org-member
+          name: nightly_binary_linux_conda_py3.6_cu100_upload
           requires:
-            - nightly_binary_linux_conda_py3.6_cu100
+          - nightly_binary_linux_conda_py3.6_cu100
       - binary_linux_conda:
+          cu_version: cpu
           name: nightly_binary_linux_conda_py3.7_cpu
-          python_version: "3.7"
-          cu_version: "cpu"
+          python_version: '3.7'
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py3.7_cpu_upload
           context: org-member
+          name: nightly_binary_linux_conda_py3.7_cpu_upload
           requires:
-            - nightly_binary_linux_conda_py3.7_cpu
+          - nightly_binary_linux_conda_py3.7_cpu
       - binary_linux_conda:
+          cu_version: cu92
           name: nightly_binary_linux_conda_py3.7_cu92
-          python_version: "3.7"
-          cu_version: "cu92"
-          wheel_docker_image: "soumith/manylinux-cuda92"
+          python_version: '3.7'
+          wheel_docker_image: soumith/manylinux-cuda92
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py3.7_cu92_upload
           context: org-member
+          name: nightly_binary_linux_conda_py3.7_cu92_upload
           requires:
-            - nightly_binary_linux_conda_py3.7_cu92
+          - nightly_binary_linux_conda_py3.7_cu92
       - binary_linux_conda:
+          cu_version: cu100
           name: nightly_binary_linux_conda_py3.7_cu100
-          python_version: "3.7"
-          cu_version: "cu100"
+          python_version: '3.7'
       - binary_conda_upload:
-          name: nightly_binary_linux_conda_py3.7_cu100_upload
           context: org-member
+          name: nightly_binary_linux_conda_py3.7_cu100_upload
           requires:
-            - nightly_binary_linux_conda_py3.7_cu100
+          - nightly_binary_linux_conda_py3.7_cu100
       - binary_macos_conda:
+          cu_version: cpu
           name: nightly_binary_macos_conda_py2.7_cpu
-          python_version: "2.7"
-          cu_version: "cpu"
+          python_version: '2.7'
       - binary_conda_upload:
-          name: nightly_binary_macos_conda_py2.7_cpu_upload
           context: org-member
+          name: nightly_binary_macos_conda_py2.7_cpu_upload
           requires:
-            - nightly_binary_macos_conda_py2.7_cpu
+          - nightly_binary_macos_conda_py2.7_cpu
       - binary_macos_conda:
+          cu_version: cpu
           name: nightly_binary_macos_conda_py3.5_cpu
-          python_version: "3.5"
-          cu_version: "cpu"
+          python_version: '3.5'
       - binary_conda_upload:
-          name: nightly_binary_macos_conda_py3.5_cpu_upload
           context: org-member
+          name: nightly_binary_macos_conda_py3.5_cpu_upload
           requires:
-            - nightly_binary_macos_conda_py3.5_cpu
+          - nightly_binary_macos_conda_py3.5_cpu
       - binary_macos_conda:
+          cu_version: cpu
           name: nightly_binary_macos_conda_py3.6_cpu
-          python_version: "3.6"
-          cu_version: "cpu"
+          python_version: '3.6'
       - binary_conda_upload:
-          name: nightly_binary_macos_conda_py3.6_cpu_upload
           context: org-member
+          name: nightly_binary_macos_conda_py3.6_cpu_upload
           requires:
-            - nightly_binary_macos_conda_py3.6_cpu
+          - nightly_binary_macos_conda_py3.6_cpu
       - binary_macos_conda:
+          cu_version: cpu
           name: nightly_binary_macos_conda_py3.7_cpu
-          python_version: "3.7"
-          cu_version: "cpu"
+          python_version: '3.7'
       - binary_conda_upload:
-          name: nightly_binary_macos_conda_py3.7_cpu_upload
           context: org-member
+          name: nightly_binary_macos_conda_py3.7_cpu_upload
           requires:
-            - nightly_binary_macos_conda_py3.7_cpu
\ No newline at end of file
+          - nightly_binary_macos_conda_py3.7_cpu
\ No newline at end of file
diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
index 4ff3849db53..70d5a613508 100644
--- a/.circleci/config.yml.in
+++ b/.circleci/config.yml.in
@@ -50,7 +50,7 @@ jobs:
       - checkout
       - run:
           command: |
-            pip install --user --progress-bar off jinja2
+            pip install --user --progress-bar off jinja2 pyyaml
             python .circleci/regenerate.py
             git diff --exit-code || (echo ".circleci/config.yml not in sync with config.yml.in! Run .circleci/regenerate.py to update config"; exit 1)
 
@@ -253,47 +253,6 @@ jobs:
               aws s3 cp "$pkg" "s3://pytorch/whl/nightly/<< parameters.subfolder >>" --acl public-read
             done
 
-{%- macro workflow(btype, os, python_version, cu_version, unicode, prefix='', upload=False) %}
-      - binary_{{os}}_{{btype}}:
-          name: {{prefix}}binary_{{os}}_{{btype}}_py{{python_version}}{{ "u" if unicode }}_{{cu_version}}
-          python_version: "{{python_version}}"
-          cu_version: "{{cu_version}}"
-{%-   if unicode %}
-          unicode_abi: "1"
-{%-   endif %}
-{%-   if cu_version == "cu92" %}
-          wheel_docker_image: "soumith/manylinux-cuda92"
-{%-   endif %}
-
-{%-   if upload %}
-      - binary_{{btype}}_upload:
-          name: {{prefix}}binary_{{os}}_{{btype}}_py{{python_version}}{{ "u" if unicode }}_{{cu_version}}_upload
-          context: org-member
-          requires:
-            - {{prefix}}binary_{{os}}_{{btype}}_py{{python_version}}{{ "u" if unicode }}_{{cu_version}}
-{%-     if btype == 'wheel' %}
-{%-       if os == 'macos' %}
-          subfolder: ""
-{%-       else %}
-          subfolder: "{{cu_version}}/"
-{%-       endif %}
-{%-     endif %}
-{%-   endif %}
-{%- endmacro %}
-
-{%- macro workflows(prefix='', upload=False) %}
-{%- for btype in ["wheel", "conda"] -%}
-{%-   for os in ["linux", "macos"] -%}
-{%-     for python_version in ["2.7", "3.5", "3.6", "3.7"] -%}
-{%-       for cu_version in (["cpu", "cu92", "cu100"] if os == "linux" else ["cpu"]) -%}
-{%-         for unicode in ([False, True] if btype == "wheel" and python_version == "2.7" else [False]) -%}
-          {{ workflow(btype, os, python_version, cu_version, unicode, prefix=prefix, upload=upload) }}
-{%-         endfor -%}
-{%-     endfor -%}
-{%-     endfor -%}
-{%-   endfor -%}
-{%- endfor %}
-{%- endmacro %}
 
 workflows:
   build:
diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py
index dcc3fb2c23b..7e6fd747399 100755
--- a/.circleci/regenerate.py
+++ b/.circleci/regenerate.py
@@ -1,13 +1,91 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
+
+"""
+This script should use a very simple, functional programming style.
+Avoid Jinja macros in favor of native Python functions.
+
+Don't go overboard on code generation; use Python only to generate
+content that can't be easily declared statically using CircleCI's YAML API.
+
+Data declarations (e.g. the nested loops for defining the configuration matrix)
+should be at the top of the file for easy updating.
+
+See this comment for design rationale:
+https://github.com/pytorch/vision/pull/1321#issuecomment-531033978
+"""
 
 import jinja2
+import yaml
 import os.path
 
-d = os.path.dirname(__file__)
-env = jinja2.Environment(
-    loader=jinja2.FileSystemLoader(d),
-    lstrip_blocks=True,
-    autoescape=False,
-)
-with open(os.path.join(d, 'config.yml'), 'w') as f:
-    f.write(env.get_template('config.yml.in').render())
+
+def workflows(prefix='', upload=False, indentation=6):
+    w = []
+    for btype in ["wheel", "conda"]:
+        for os_type in ["linux", "macos"]:
+            for python_version in ["2.7", "3.5", "3.6", "3.7"]:
+                for cu_version in (["cpu", "cu92", "cu100"] if os_type == "linux" else ["cpu"]):
+                    for unicode in ([False, True] if btype == "wheel" and python_version == "2.7" else [False]):
+                        w += workflow_pair(btype, os_type, python_version, cu_version, unicode, prefix, upload)
+
+    return indent(indentation, w)
+
+
+def workflow_pair(btype, os_type, python_version, cu_version, unicode, prefix='', upload=False):
+
+    w = []
+    unicode_suffix = "u" if unicode else ""
+    base_workflow_name = f"{prefix}binary_{os_type}_{btype}_py{python_version}{unicode_suffix}_{cu_version}"
+
+    w.append(generate_base_workflow(base_workflow_name, python_version, cu_version, unicode, os_type, btype))
+
+    if upload:
+        w.append(generate_upload_workflow(base_workflow_name, os_type, btype, cu_version))
+
+    return w
+
+
+def generate_base_workflow(base_workflow_name, python_version, cu_version, unicode, os_type, btype):
+
+    d = {
+        "name": base_workflow_name,
+        "python_version": python_version,
+        "cu_version": cu_version,
+    }
+
+    if unicode:
+        d["unicode_abi"] = '1'
+
+    if cu_version == "cu92":
+        d["wheel_docker_image"] = "soumith/manylinux-cuda92"
+
+    return {f"binary_{os_type}_{btype}": d}
+
+
+def generate_upload_workflow(base_workflow_name, os_type, btype, cu_version):
+    d = {
+        "name": f"{base_workflow_name}_upload",
+        "context": "org-member",
+        "requires": [base_workflow_name],
+    }
+
+    if btype == 'wheel':
+        d["subfolder"] = "" if os_type == 'macos' else cu_version + "/"
+
+    return {f"binary_{btype}_upload": d}
+
+
+def indent(indentation, data_list):
+    return ("\n" + " " * indentation).join(yaml.dump(data_list).splitlines())
+
+
+if __name__ == "__main__":
+    d = os.path.dirname(__file__)
+    env = jinja2.Environment(
+        loader=jinja2.FileSystemLoader(d),
+        lstrip_blocks=True,
+        autoescape=False,
+    )
+
+    with open(os.path.join(d, 'config.yml'), 'w') as f:
+        f.write(env.get_template('config.yml.in').render(workflows=workflows))
diff --git a/.travis.yml b/.travis.yml
index 497579f5a3a..76080f7138f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,7 +18,12 @@ matrix:
     - env: LINT_CHECK
       python: "2.7"
       install: pip install flake8
-      script: flake8
+      script: flake8 --exclude .circleci
+      after_success: []
+    - env: LINT_CHECK
+      python: "3.6"
+      install: pip install flake8
+      script: flake8 .circleci
       after_success: []
     - python: "2.7"
       env: IMAGE_BACKEND=Pillow-SIMD

From e8b830fcd6b99090206ee5f3d99ed7f688c0c711 Mon Sep 17 00:00:00 2001
From: Francisco Massa <fvsmassa@gmail.com>
Date: Mon, 16 Sep 2019 10:59:59 -0300
Subject: [PATCH 13/26] VOC2007 support test set (#1340)

---
 torchvision/datasets/voc.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/torchvision/datasets/voc.py b/torchvision/datasets/voc.py
index 8a6925011ba..001a6b367f3 100644
--- a/torchvision/datasets/voc.py
+++ b/torchvision/datasets/voc.py
@@ -83,8 +83,10 @@ def __init__(self,
         self.url = DATASET_YEAR_DICT[year]['url']
         self.filename = DATASET_YEAR_DICT[year]['filename']
         self.md5 = DATASET_YEAR_DICT[year]['md5']
-        self.image_set = verify_str_arg(image_set, "image_set",
-                                        ("train", "trainval", "val"))
+        valid_sets = ["train", "trainval", "val"]
+        if year == "2007":
+            valid_sets.append("test")
+        self.image_set = verify_str_arg(image_set, "image_set", valid_sets)
         base_dir = DATASET_YEAR_DICT[year]['base_dir']
         voc_root = os.path.join(self.root, base_dir)
         image_dir = os.path.join(voc_root, 'JPEGImages')
@@ -160,8 +162,10 @@ def __init__(self,
         self.url = DATASET_YEAR_DICT[year]['url']
         self.filename = DATASET_YEAR_DICT[year]['filename']
         self.md5 = DATASET_YEAR_DICT[year]['md5']
-        self.image_set = verify_str_arg(image_set, "image_set",
-                                        ("train", "trainval", "val"))
+        valid_sets = ["train", "trainval", "val"]
+        if year == "2007":
+            valid_sets.append("test")
+        self.image_set = verify_str_arg(image_set, "image_set", valid_sets)
 
         base_dir = DATASET_YEAR_DICT[year]['base_dir']
         voc_root = os.path.join(self.root, base_dir)

From 3c1ab2c1d84697b5ef7595ac13a79adfeeec325a Mon Sep 17 00:00:00 2001
From: eellison <elias_ellison@brown.edu>
Date: Tue, 17 Sep 2019 03:51:58 -0700
Subject: [PATCH 14/26] make resnext scriptable (#1343)

---
 test/test_models.py          | 2 +-
 torchvision/models/resnet.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_models.py b/test/test_models.py
index da5965aa225..4cc1d6a4bad 100644
--- a/test/test_models.py
+++ b/test/test_models.py
@@ -29,7 +29,7 @@ def get_available_video_models():
 torchub_models = {
     "deeplabv3_resnet101": False,
     "mobilenet_v2": True,
-    "resnext50_32x4d": False,
+    "resnext50_32x4d": True,
     "fcn_resnet101": False,
     "googlenet": False,
     "densenet121": False,
diff --git a/torchvision/models/resnet.py b/torchvision/models/resnet.py
index 2150a5d57d3..c8989a6c8d9 100644
--- a/torchvision/models/resnet.py
+++ b/torchvision/models/resnet.py
@@ -75,6 +75,7 @@ def forward(self, x):
 
 class Bottleneck(nn.Module):
     expansion = 4
+    __constants__ = ['downsample']
 
     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                  base_width=64, dilation=1, norm_layer=None):

From d4392a853c4bc1dfb66cefcc44f536d776cba61c Mon Sep 17 00:00:00 2001
From: Lara Haidar <haidar.lara@gmail.com>
Date: Tue, 17 Sep 2019 04:22:19 -0700
Subject: [PATCH 15/26] Support Exporting GeneralizedRCNNTransform to ONNX
 (#1325)

* Support Exporting GeneralizedRCNNTransform

* refactor code to address comments

* update tests

* address comments

* revert min_size to test CI

* re-revert min_size
---
 test/test_onnx.py                         | 50 ++++++++++++++++-------
 torchvision/__init__.py                   |  5 +++
 torchvision/models/detection/transform.py | 45 ++++++++++++++++++--
 3 files changed, 81 insertions(+), 19 deletions(-)

diff --git a/test/test_onnx.py b/test/test_onnx.py
index 57b3c8dd729..f4db56f0a14 100644
--- a/test/test_onnx.py
+++ b/test/test_onnx.py
@@ -1,6 +1,7 @@
 import io
 import torch
 from torchvision import ops
+from torchvision.models.detection.transform import GeneralizedRCNNTransform
 
 # onnxruntime requires python 3.5 or above
 try:
@@ -17,23 +18,23 @@ class ONNXExporterTester(unittest.TestCase):
     def setUpClass(cls):
         torch.manual_seed(123)
 
-    def run_model(self, model, inputs):
+    def run_model(self, model, inputs_list):
         model.eval()
 
-        # run pytorch model
-        with torch.no_grad():
-            if isinstance(inputs, torch.Tensor):
-                inputs = (inputs,)
-            outputs = model(*inputs)
-            if isinstance(outputs, torch.Tensor):
-                outputs = (outputs,)
-
         onnx_io = io.BytesIO()
-        # export to onnx
-        torch.onnx.export(model, inputs, onnx_io, do_constant_folding=True, opset_version=10)
+        # export to onnx with the first input
+        torch.onnx.export(model, inputs_list[0], onnx_io, do_constant_folding=True, opset_version=10)
 
         # validate the exported model with onnx runtime
-        self.ort_validate(onnx_io, inputs, outputs)
+        for test_inputs in inputs_list:
+            with torch.no_grad():
+                if isinstance(test_inputs, torch.Tensor) or \
+                   isinstance(test_inputs, list):
+                    test_inputs = (test_inputs,)
+                test_ouputs = model(*test_inputs)
+                if isinstance(test_ouputs, torch.Tensor):
+                    test_ouputs = (test_ouputs,)
+            self.ort_validate(onnx_io, test_inputs, test_ouputs)
 
     def ort_validate(self, onnx_io, inputs, outputs):
 
@@ -66,13 +67,13 @@ class Module(torch.nn.Module):
             def forward(self, boxes, scores):
                 return ops.nms(boxes, scores, 0.5)
 
-        self.run_model(Module(), (boxes, scores))
+        self.run_model(Module(), [(boxes, scores)])
 
     def test_roi_pool(self):
         x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
         single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
         model = ops.RoIAlign((5, 5), 1, 2)
-        self.run_model(model, (x, single_roi))
+        self.run_model(model, [(x, single_roi)])
 
     def test_roi_align(self):
         x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
@@ -81,7 +82,26 @@ def test_roi_align(self):
         pool_w = 5
         model = ops.RoIPool((pool_h, pool_w), 2)
         model.eval()
-        self.run_model(model, (x, rois))
+        self.run_model(model, [(x, rois)])
+
+    @unittest.skip("Disable test until Resize opset 11 is implemented in ONNX Runtime")
+    def test_transform_images(self):
+
+        class TransformModule(torch.nn.Module):
+            def __init__(self_module):
+                super(TransformModule, self_module).__init__()
+                min_size = 800
+                max_size = 1333
+                image_mean = [0.485, 0.456, 0.406]
+                image_std = [0.229, 0.224, 0.225]
+                self_module.transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)
+
+            def forward(self_module, images):
+                return self_module.transform(images)[0].tensors
+
+        input = [torch.rand(3, 800, 1280), torch.rand(3, 800, 800)]
+        input_test = [torch.rand(3, 800, 1280), torch.rand(3, 800, 800)]
+        self.run_model(TransformModule(), [input, input_test])
 
 
 if __name__ == '__main__':
diff --git a/torchvision/__init__.py b/torchvision/__init__.py
index 68361bfb029..297aca2b228 100644
--- a/torchvision/__init__.py
+++ b/torchvision/__init__.py
@@ -34,3 +34,8 @@ def get_image_backend():
     Gets the name of the package used to load images
     """
     return _image_backend
+
+
+def _is_tracing():
+    import torch
+    return torch._C._get_tracing_state()
diff --git a/torchvision/models/detection/transform.py b/torchvision/models/detection/transform.py
index f42ddc7416f..45fe037a86e 100644
--- a/torchvision/models/detection/transform.py
+++ b/torchvision/models/detection/transform.py
@@ -2,6 +2,7 @@
 import math
 import torch
 from torch import nn
+import torchvision
 
 from torchvision.ops import misc as misc_nn_ops
 from .image_list import ImageList
@@ -56,8 +57,9 @@ def normalize(self, image):
 
     def resize(self, image, target):
         h, w = image.shape[-2:]
-        min_size = float(min(image.shape[-2:]))
-        max_size = float(max(image.shape[-2:]))
+        im_shape = torch.tensor(image.shape[-2:])
+        min_size = float(torch.min(im_shape))
+        max_size = float(torch.max(im_shape))
         if self.training:
             size = random.choice(self.min_size)
         else:
@@ -87,10 +89,45 @@ def resize(self, image, target):
             target["keypoints"] = keypoints
         return image, target
 
+    # _onnx_dynamic_img_pad() creates a dynamic padding
+    # for an image supported in ONNx tracing.
+    # it is used to process the images in _onnx_batch_images().
+    def _onnx_dynamic_img_pad(self, img, padding):
+        concat_0 = torch.cat((img, torch.zeros(padding[0], img.shape[1], img.shape[2])), 0)
+        concat_1 = torch.cat((concat_0, torch.zeros(concat_0.shape[0], padding[1], concat_0.shape[2])), 1)
+        padded_img = torch.cat((concat_1, torch.zeros(concat_1.shape[0], concat_1.shape[1], padding[2])), 2)
+        return padded_img
+
+    # _onnx_batch_images() is an implementation of
+    # batch_images() that is supported by ONNX tracing.
+    def _onnx_batch_images(self, images, size_divisible=32):
+        max_size = []
+        for i in range(images[0].dim()):
+            max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)
+            max_size.append(max_size_i)
+        stride = size_divisible
+        max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)
+        max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)
+        max_size = tuple(max_size)
+
+        # work around for
+        # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+        # which is not yet supported in onnx
+        padded_imgs = []
+        for img in images:
+            padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
+            padded_img = self._onnx_dynamic_img_pad(img, padding)
+            padded_imgs.append(padded_img)
+
+        return torch.stack(padded_imgs)
+
     def batch_images(self, images, size_divisible=32):
-        # concatenate
-        max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
+        if torchvision._is_tracing():
+            # batch_images() does not export well to ONNX
+            # call _onnx_batch_images() instead
+            return self._onnx_batch_images(images, size_divisible)
 
+        max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
         stride = size_divisible
         max_size = list(max_size)
         max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)

From ef6d6c476f61ebe6afda30ad9d55acccd1e95db9 Mon Sep 17 00:00:00 2001
From: Francisco Massa <fvsmassa@gmail.com>
Date: Tue, 17 Sep 2019 11:13:53 -0300
Subject: [PATCH 16/26] Make CircleCI checkout merge commit (#1344)

* Set block style serialization in yaml

* Add new checkout command

* Put on a different branch name
---
 .circleci/config.yml    | 24 ++++++++++++++++++------
 .circleci/config.yml.in | 24 ++++++++++++++++++------
 .circleci/regenerate.py |  3 ++-
 3 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 5c9301ec4a7..0cbbef4fdcd 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -9,6 +9,18 @@ version: 2.1
 orbs:
   win: circleci/windows@1.0.0
 
+commands:
+  checkout_merge:
+    description: "checkout merge branch"
+    steps:
+      - checkout
+      - run:
+          name: Checkout merge branch
+          command: |
+            set -ex
+            git fetch --force origin ${CIRCLE_BRANCH}/merge:merged/${CIRCLE_BRANCH}
+            git checkout "merged/$CIRCLE_BRANCH"
+
 binary_common: &binary_common
   parameters:
     # Edit these defaults to do a release`
@@ -60,7 +72,7 @@ jobs:
       - image: << parameters.wheel_docker_image >>
     resource_class: 2xlarge+
     steps:
-      - checkout
+      - checkout_merge
       - run: packaging/build_wheel.sh
       - store_artifacts:
           path: dist
@@ -75,7 +87,7 @@ jobs:
       - image: "soumith/conda-cuda"
     resource_class: 2xlarge+
     steps:
-      - checkout
+      - checkout_merge
       - run: packaging/build_conda.sh
       - store_artifacts:
           path: /opt/conda/conda-bld/linux-64
@@ -90,7 +102,7 @@ jobs:
       image: ubuntu-1604:201903-01
     resource_class: gpu.medium
     steps:
-    - checkout
+    - checkout_merge
     - run:
         name: Setup environment
         command: |
@@ -159,7 +171,7 @@ jobs:
       name: win/vs2019
       shell: bash.exe
     steps:
-      - checkout
+      - checkout_merge
       - run:
           command: |
             choco install miniconda3
@@ -173,7 +185,7 @@ jobs:
     macos:
       xcode: "9.0"
     steps:
-      - checkout
+      - checkout_merge
       - run:
           # Cannot easily deduplicate this as source'ing activate
           # will set environment variables which we need to propagate
@@ -195,7 +207,7 @@ jobs:
     macos:
       xcode: "9.0"
     steps:
-      - checkout
+      - checkout_merge
       - run:
           command: |
             curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
index 70d5a613508..fe094ebcd1c 100644
--- a/.circleci/config.yml.in
+++ b/.circleci/config.yml.in
@@ -9,6 +9,18 @@ version: 2.1
 orbs:
   win: circleci/windows@1.0.0
 
+commands:
+  checkout_merge:
+    description: "checkout merge branch"
+    steps:
+      - checkout
+      - run:
+          name: Checkout merge branch
+          command: |
+            set -ex
+            git fetch --force origin ${CIRCLE_BRANCH}/merge:merged/${CIRCLE_BRANCH}
+            git checkout "merged/$CIRCLE_BRANCH"
+
 binary_common: &binary_common
   parameters:
     # Edit these defaults to do a release`
@@ -60,7 +72,7 @@ jobs:
       - image: << parameters.wheel_docker_image >>
     resource_class: 2xlarge+
     steps:
-      - checkout
+      - checkout_merge
       - run: packaging/build_wheel.sh
       - store_artifacts:
           path: dist
@@ -75,7 +87,7 @@ jobs:
       - image: "soumith/conda-cuda"
     resource_class: 2xlarge+
     steps:
-      - checkout
+      - checkout_merge
       - run: packaging/build_conda.sh
       - store_artifacts:
           path: /opt/conda/conda-bld/linux-64
@@ -90,7 +102,7 @@ jobs:
       image: ubuntu-1604:201903-01
     resource_class: gpu.medium
     steps:
-    - checkout
+    - checkout_merge
     - run:
         name: Setup environment
         command: |
@@ -159,7 +171,7 @@ jobs:
       name: win/vs2019
       shell: bash.exe
     steps:
-      - checkout
+      - checkout_merge
       - run:
           command: |
             choco install miniconda3
@@ -173,7 +185,7 @@ jobs:
     macos:
       xcode: "9.0"
     steps:
-      - checkout
+      - checkout_merge
       - run:
           # Cannot easily deduplicate this as source'ing activate
           # will set environment variables which we need to propagate
@@ -195,7 +207,7 @@ jobs:
     macos:
       xcode: "9.0"
     steps:
-      - checkout
+      - checkout_merge
       - run:
           command: |
             curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py
index 7e6fd747399..cc94e624bec 100755
--- a/.circleci/regenerate.py
+++ b/.circleci/regenerate.py
@@ -76,7 +76,8 @@ def generate_upload_workflow(base_workflow_name, os_type, btype, cu_version):
 
 
 def indent(indentation, data_list):
-    return ("\n" + " " * indentation).join(yaml.dump(data_list).splitlines())
+    return ("\n" + " " * indentation).join(
+        yaml.dump(data_list, default_flow_style=False).splitlines())
 
 
 if __name__ == "__main__":

From 490966f28ea6e11421d684e94cd995d6c53d4698 Mon Sep 17 00:00:00 2001
From: peterjc123 <peterghost86@gmail.com>
Date: Wed, 18 Sep 2019 14:29:20 +0800
Subject: [PATCH 17/26] Windows build scripts (#1241)

* Windows build scripts

Add testing yaml

* Change urls

* Test MS agent

* Switch base image

* Use fb links
---
 packaging/conda/build_vision.sh               | 207 ++++++++++++++----
 packaging/torchvision/bld.bat                 |   1 +
 packaging/torchvision/meta.yaml               |   3 +-
 packaging/vs2017/activate.bat                 |  44 ++++
 packaging/vs2017/conda_build_config.yaml      |  24 ++
 packaging/vs2017/install_activate.bat         |  30 +++
 packaging/vs2017/install_runtime.bat          |  49 +++++
 packaging/vs2017/meta.yaml                    |  45 ++++
 packaging/windows/azure-pipelines-ci.yml      |  11 +
 packaging/windows/azure-pipelines.yml         |  35 +++
 packaging/windows/build_vision.bat            |  47 +++-
 packaging/windows/cuda92.bat                  |  59 +++++
 packaging/windows/internal/7z_install.bat     |   9 -
 packaging/windows/internal/clone.bat          |  12 +-
 packaging/windows/internal/cuda_install.bat   | 103 ++++-----
 packaging/windows/internal/dep_install.bat    |  14 ++
 packaging/windows/internal/env_fix.bat        |  31 +++
 .../windows/internal/nightly_defaults.bat     | 200 +++++++++++++++++
 packaging/windows/internal/publish.bat        |  85 +++++++
 packaging/windows/internal/setup.bat          |  47 ----
 packaging/windows/internal/test.bat           |  12 +-
 packaging/windows/internal/upload.bat         |  96 ++++++++
 packaging/windows/internal/vs_install.bat     |  34 +--
 packaging/windows/{ => old}/cuda90.bat        |   0
 packaging/windows/templates/auth_task.yml     |  17 ++
 packaging/windows/templates/build_conda.yml   |  15 ++
 packaging/windows/templates/build_task.yml    | 140 ++++++++++++
 packaging/windows/templates/build_wheels.yml  |   9 +
 .../windows/templates/linux_build_task.yml    |  38 ++++
 .../templates/override_pytorch_version.yml    |   6 +
 .../windows/templates/publish_packages.yml    |   8 +
 .../templates/publish_test_results.yml        |   6 +
 .../templates/setup_env_for_msagent.yml       |  25 +++
 .../templates/setup_nightly_variables.yml     |  11 +
 .../windows/templates/upload_to_conda.yml     |  10 +
 packaging/windows/templates/upload_to_s3.yml  |  15 ++
 packaging/windows/templates/vsts_auth.yml     |   8 +
 37 files changed, 1310 insertions(+), 196 deletions(-)
 create mode 100644 packaging/vs2017/activate.bat
 create mode 100644 packaging/vs2017/conda_build_config.yaml
 create mode 100644 packaging/vs2017/install_activate.bat
 create mode 100644 packaging/vs2017/install_runtime.bat
 create mode 100644 packaging/vs2017/meta.yaml
 create mode 100644 packaging/windows/azure-pipelines-ci.yml
 create mode 100644 packaging/windows/azure-pipelines.yml
 create mode 100644 packaging/windows/cuda92.bat
 delete mode 100644 packaging/windows/internal/7z_install.bat
 create mode 100644 packaging/windows/internal/dep_install.bat
 create mode 100644 packaging/windows/internal/env_fix.bat
 create mode 100644 packaging/windows/internal/nightly_defaults.bat
 create mode 100644 packaging/windows/internal/publish.bat
 create mode 100644 packaging/windows/internal/upload.bat
 rename packaging/windows/{ => old}/cuda90.bat (100%)
 create mode 100644 packaging/windows/templates/auth_task.yml
 create mode 100644 packaging/windows/templates/build_conda.yml
 create mode 100644 packaging/windows/templates/build_task.yml
 create mode 100644 packaging/windows/templates/build_wheels.yml
 create mode 100644 packaging/windows/templates/linux_build_task.yml
 create mode 100644 packaging/windows/templates/override_pytorch_version.yml
 create mode 100644 packaging/windows/templates/publish_packages.yml
 create mode 100644 packaging/windows/templates/publish_test_results.yml
 create mode 100644 packaging/windows/templates/setup_env_for_msagent.yml
 create mode 100644 packaging/windows/templates/setup_nightly_variables.yml
 create mode 100644 packaging/windows/templates/upload_to_conda.yml
 create mode 100644 packaging/windows/templates/upload_to_s3.yml
 create mode 100644 packaging/windows/templates/vsts_auth.yml

diff --git a/packaging/conda/build_vision.sh b/packaging/conda/build_vision.sh
index 3061e4740bb..8122648a722 100755
--- a/packaging/conda/build_vision.sh
+++ b/packaging/conda/build_vision.sh
@@ -10,15 +10,56 @@ retry () {
     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
 }
 
-if [ "$#" -ne 1 ]; then
-    echo "Illegal number of parameters. Pass cuda version"
-    echo "CUDA version should be M.m with no dot, e.g. '8.0' or 'cpu'"
-    exit 1
+# Parse arguments and determmine version
+###########################################################
+if [[ -n "$DESIRED_CUDA" && -n "$TORCHVISION_BUILD_VERSION" && -n "$TORCHVISION_BUILD_NUMBER" ]]; then
+    desired_cuda="$DESIRED_CUDA"
+    build_version="$PYTORCH_BUILD_VERSION"
+    build_number="$PYTORCH_BUILD_NUMBER"
+else
+    if [ "$#" -ne 3 ]; then
+        echo "Illegal number of parameters. Pass cuda version, pytorch version, build number"
+        echo "CUDA version should be Mm with no dot, e.g. '80'"
+        echo "DESIRED_PYTHON should be M.m, e.g. '2.7'"
+        exit 1
+    fi
+
+    desired_cuda="$1"
+    build_version="$2"
+    build_number="$3"
 fi
-desired_cuda="$1"
+if [[ "$desired_cuda" != cpu ]]; then
+  desired_cuda="$(echo $desired_cuda | tr -d cuda. )"
+fi
+echo "Building cuda version $desired_cuda and torchvision version: $build_version build_number: $build_number"
 
-export TORCHVISION_BUILD_VERSION="0.3.0"
-export TORCHVISION_BUILD_NUMBER=1
+if [[ "$desired_cuda" == 'cpu' ]]; then
+    cpu_only=1
+    cuver="cpu"
+else
+    # Switch desired_cuda to be M.m to be consistent with other scripts in
+    # pytorch/builder
+    export FORCE_CUDA=1
+    cuda_nodot="$desired_cuda"
+
+    if [[ ${#cuda_nodot} -eq 2 ]]; then
+        desired_cuda="${desired_cuda:0:1}.${desired_cuda:1:1}"
+    elif [[ ${#cuda_nodot} -eq 3 ]]; then
+        desired_cuda="${desired_cuda:0:2}.${desired_cuda:2:1}"
+    else
+        echo "unknown cuda version $cuda_nodot"
+        exit 1
+    fi
+
+    cuver="cu$cuda_nodot"
+fi
+
+export TORCHVISION_BUILD_VERSION=$build_version
+export TORCHVISION_BUILD_NUMBER=$build_number
+
+if [[ -z "$DESIRED_PYTHON" ]]; then
+    DESIRED_PYTHON=('3.5' '3.6' '3.7')
+fi
 
 SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
 
@@ -26,69 +67,147 @@ if [[ -z "$WIN_PACKAGE_WORK_DIR" ]]; then
     WIN_PACKAGE_WORK_DIR="$(echo $(pwd -W) | tr '/' '\\')\\tmp_conda_$(date +%H%M%S)"
 fi
 
-if [[ "$OSTYPE" == "msys" ]]; then
-    mkdir -p "$WIN_PACKAGE_WORK_DIR" || true
-    vision_rootdir="$(realpath ${WIN_PACKAGE_WORK_DIR})/torchvision-src"
-    git config --system core.longpaths true
-else
-    vision_rootdir="$(pwd)/torchvision-src"
-fi
+mkdir -p "$WIN_PACKAGE_WORK_DIR" || true
+vision_rootdir="$(realpath ${WIN_PACKAGE_WORK_DIR})/torchvision-src"
+git config --system core.longpaths true
 
 if [[ ! -d "$vision_rootdir" ]]; then
     rm -rf "$vision_rootdir"
     git clone "https://github.com/pytorch/vision" "$vision_rootdir"
     pushd "$vision_rootdir"
-    git checkout v$TORCHVISION_BUILD_VERSION
+    git checkout $PYTORCH_BRANCH
     popd
 fi
 
 cd "$SOURCE_DIR"
 
-if [[ "$OSTYPE" == "msys" ]]; then
-    export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda"
-    export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe"
-    rm -rf "$tmp_conda"
-    rm -f "$miniconda_exe"
-    curl -sSk https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "$miniconda_exe"
-    "$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe"
-    pushd $tmp_conda
-    export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH"
-    popd
-    # We have to skip 3.17 because of the following bug.
-    # https://github.com/conda/conda-build/issues/3285
-    retry conda install -yq conda-build
-fi
+export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda"
+export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe"
+rm -rf "$tmp_conda"
+rm -f "$miniconda_exe"
+curl -sSk https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "$miniconda_exe"
+"$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe"
+pushd $tmp_conda
+export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH"
+popd
+retry conda install -yq conda-build
 
-ANACONDA_USER=pytorch
+ANACONDA_USER=pytorch-nightly
 conda config --set anaconda_upload no
 
 
 export TORCHVISION_PACKAGE_SUFFIX=""
 if [[ "$desired_cuda" == 'cpu' ]]; then
     export CONDA_CUDATOOLKIT_CONSTRAINT=""
+    export CONDA_CPUONLY_FEATURE="- cpuonly # [not osx]"
     export CUDA_VERSION="None"
-    if [[ "$OSTYPE" != "darwin"* ]]; then
-        export TORCHVISION_PACKAGE_SUFFIX="-cpu"
-    fi
 else
+    export CONDA_CPUONLY_FEATURE=""
     . ./switch_cuda_version.sh $desired_cuda
-    if [[ "$desired_cuda" == "10.0" ]]; then
-	export CONDA_CUDATOOLKIT_CONSTRAINT="    - cudatoolkit >=10.0,<10.1 # [not osx]"
+    if [[ "$desired_cuda" == "10.1" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]"
+    elif [[ "$desired_cuda" == "10.0" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]"
+    elif [[ "$desired_cuda" == "9.2" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]"
     elif [[ "$desired_cuda" == "9.0" ]]; then
-	export CONDA_CUDATOOLKIT_CONSTRAINT="    - cudatoolkit >=9.0,<9.1 # [not osx]"
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.0,<9.1 # [not osx]"
+    elif [[ "$desired_cuda" == "8.0" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=8.0,<8.1 # [not osx]"
     else
-	echo "unhandled desired_cuda: $desired_cuda"
-	exit 1
+        echo "unhandled desired_cuda: $desired_cuda"
+        exit 1
     fi
 fi
 
-if [[ "$OSTYPE" == "msys" ]]; then
-    time conda build -c $ANACONDA_USER --no-anaconda-upload vs2017
+if [[ -z "$PYTORCH_VERSION" ]]; then
+    export CONDA_CHANNEL_FLAGS="-c pytorch-nightly"
+    export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \
+                                python -c "import os, sys, json, re; cuver = '$cuver'; \
+                                cuver = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \
+                                print(re.sub(r'\\+.*$', '', \
+                                [x['version'] for x in json.load(sys.stdin)['pytorch'] \
+                                    if (x['platform'] == 'darwin' or cuver in x['fn']) \
+                                    and 'py' + os.environ['DESIRED_PYTHON'] in x['fn']][-1]))")"
+    if [[ -z "$PYTORCH_VERSION" ]]; then
+        echo "PyTorch version auto detection failed"
+        echo "No package found for desired_cuda=$desired_cuda and DESIRED_PYTHON=$DESIRED_PYTHON"
+        exit 1
+    fi
+else
+    export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly"
+fi
+if [[ "$desired_cuda" == 'cpu' ]]; then
+    export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION"
+    export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION"
 else
-    time conda build -c $ANACONDA_USER --no-anaconda-upload --python 2.7 torchvision
+    export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}"
+    export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}"
 fi
-time conda build -c $ANACONDA_USER --no-anaconda-upload --python 3.5 torchvision
-time conda build -c $ANACONDA_USER --no-anaconda-upload --python 3.6 torchvision
-time conda build -c $ANACONDA_USER --no-anaconda-upload --python 3.7 torchvision
+
+# Loop through all Python versions to build a package for each
+for py_ver in "${DESIRED_PYTHON[@]}"; do
+    build_string="py${py_ver}_${build_string_suffix}"
+    folder_tag="${build_string}_$(date +'%Y%m%d')"
+
+    # Create the conda package into this temporary folder. This is so we can find
+    # the package afterwards, as there's no easy way to extract the final filename
+    # from conda-build
+    output_folder="out_$folder_tag"
+    rm -rf "$output_folder"
+    mkdir "$output_folder"
+
+    # We need to build the compiler activation scripts first on Windows
+    time VSDEVCMD_ARGS=${VSDEVCMD_ARGS[@]} \
+        conda build -c "$ANACONDA_USER" \
+                    --no-anaconda-upload \
+                    --output-folder "$output_folder" \
+                    ../vs2017
+
+    conda config --set anaconda_upload no
+    echo "Calling conda-build at $(date)"
+    if [[ "$desired_cuda" == "9.2" ]]; then
+        time CMAKE_ARGS=${CMAKE_ARGS[@]} \
+            BUILD_VERSION="$TORCHVISION_BUILD_VERSION" \
+            CU_VERSION="$cuver" \
+            SOURCE_ROOT_DIR="$vision_rootdir" \
+            conda build -c "$ANACONDA_USER" \
+                        -c defaults \
+                        -c conda-forge \
+                        -c "numba/label/dev" \
+                        --no-anaconda-upload \
+                        --python "$py_ver" \
+                        --output-folder "$output_folder" \
+                        --no-verify \
+                        --no-test \
+                        ../torchvision
+    else
+        time CMAKE_ARGS=${CMAKE_ARGS[@]} \
+            BUILD_VERSION="$TORCHVISION_BUILD_VERSION" \
+            CU_VERSION="$cuver" \
+            SOURCE_ROOT_DIR="$vision_rootdir" \
+            conda build -c "$ANACONDA_USER" \
+                        -c defaults \
+                        -c conda-forge \
+                        --no-anaconda-upload \
+                        --python "$py_ver" \
+                        --output-folder "$output_folder" \
+                        --no-verify \
+                        --no-test \
+                        ../torchvision
+    fi
+    echo "Finished conda-build at $(date)"
+
+    # Extract the package for testing
+    ls -lah "$output_folder"
+    built_package="$(find $output_folder/ -name '*torchvision*.tar.bz2')"
+
+    # Copy the built package to the host machine for persistence before testing
+    if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
+        mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
+        cp "$built_package" "$PYTORCH_FINAL_PACKAGE_DIR/"
+    fi
+done
+
 
 set +e
diff --git a/packaging/torchvision/bld.bat b/packaging/torchvision/bld.bat
index 14f6935fba8..0f1265c25f6 100644
--- a/packaging/torchvision/bld.bat
+++ b/packaging/torchvision/bld.bat
@@ -16,6 +16,7 @@ set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda%
 set CUDA_BIN_PATH=%CUDA_PATH%\bin
 set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr
 if "%desired_cuda%" == "9.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50
+if "%desired_cuda%" == "9.2" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50
 if "%desired_cuda%" == "10.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50
 
 :cuda_flags_end
diff --git a/packaging/torchvision/meta.yaml b/packaging/torchvision/meta.yaml
index fdcfbf78dc8..20397c40f20 100644
--- a/packaging/torchvision/meta.yaml
+++ b/packaging/torchvision/meta.yaml
@@ -14,6 +14,7 @@ requirements:
     - setuptools
     {{ environ.get('CONDA_PYTORCH_BUILD_CONSTRAINT') }}
     {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
+    {{ environ.get('CONDA_CPUONLY_FEATURE') }}
 
   run:
     - python
@@ -31,7 +32,7 @@ build:
     - FORCE_CUDA
     - NVCC_FLAGS
   features:
-    {{ CONDA_CPUONLY_FEATURE }}
+    {{ environ.get('CONDA_CPUONLY_FEATURE') }}
 
 test:
   imports:
diff --git a/packaging/vs2017/activate.bat b/packaging/vs2017/activate.bat
new file mode 100644
index 00000000000..ccecfc25442
--- /dev/null
+++ b/packaging/vs2017/activate.bat
@@ -0,0 +1,44 @@
+:: Set env vars that tell distutils to use the compiler that we put on path
+SET DISTUTILS_USE_SDK=1
+SET MSSdk=1
+
+SET "VS_VERSION=15.0"
+SET "VS_MAJOR=15"
+SET "VS_YEAR=2017"
+
+set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out"
+set "MSYS2_ENV_CONV_EXCL=CL"
+
+:: For Python 3.5+, ensure that we link with the dynamic runtime.  See
+:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info
+set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll"
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
+    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
+        set "VSINSTALLDIR=%%i\"
+        goto :vswhere
+    )
+)
+
+:vswhere
+
+:: Shorten PATH to avoid the `input line too long` error.
+SET MyPath=%PATH%
+
+setlocal EnableDelayedExpansion
+
+SET TempPath="%MyPath:;=";"%"
+SET var=
+FOR %%a IN (%TempPath%) DO (
+    IF EXIST %%~sa (
+        SET "var=!var!;%%~sa"
+    )
+)
+
+set "TempPath=!var:~1!"
+endlocal & set "PATH=%TempPath%"
+
+:: Shorten current directory too
+FOR %%A IN (.) DO CD "%%~sA"
+
+:: other things added by install_activate.bat at package build time
diff --git a/packaging/vs2017/conda_build_config.yaml b/packaging/vs2017/conda_build_config.yaml
new file mode 100644
index 00000000000..5188bb0ebec
--- /dev/null
+++ b/packaging/vs2017/conda_build_config.yaml
@@ -0,0 +1,24 @@
+blas_impl:
+  - mkl                        # [x86_64]
+c_compiler:
+  - vs2017                     # [win]
+cxx_compiler:
+  - vs2017                     # [win]
+python:
+  - 3.5
+  - 3.6
+# This differs from target_platform in that it determines what subdir the compiler
+#    will target, not what subdir the compiler package will be itself.
+#    For example, we need a win-64 vs2008_win-32 package, so that we compile win-32
+#    code on win-64 miniconda.
+cross_compiler_target_platform:
+  - win-64                     # [win]
+target_platform:
+  - win-64                     # [win]
+vc:
+  - 14
+zip_keys:
+  -                             # [win]
+    - vc                        # [win]
+    - c_compiler                # [win]
+    - cxx_compiler              # [win]
diff --git a/packaging/vs2017/install_activate.bat b/packaging/vs2017/install_activate.bat
new file mode 100644
index 00000000000..de0e6ff3c52
--- /dev/null
+++ b/packaging/vs2017/install_activate.bat
@@ -0,0 +1,30 @@
+set YEAR=2017
+set VER=15
+
+mkdir "%PREFIX%\etc\conda\activate.d"
+COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+
+IF "%cross_compiler_target_platform%" == "win-64" (
+  set "target_platform=amd64"
+  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  IF "%VSDEVCMD_ARGS%" == "" (
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  ) ELSE (
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  )
+  echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  ) else (
+  set "target_platform=x86"
+  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
+  echo popd
+  )
+
diff --git a/packaging/vs2017/install_runtime.bat b/packaging/vs2017/install_runtime.bat
new file mode 100644
index 00000000000..5163c16cf24
--- /dev/null
+++ b/packaging/vs2017/install_runtime.bat
@@ -0,0 +1,49 @@
+set VC_PATH=x86
+if "%ARCH%"=="64" (
+   set VC_PATH=x64
+)
+
+set MSC_VER=2017
+
+rem :: This should always be present for VC installed with VS.  Not sure about VC installed with Visual C++ Build Tools 2015
+rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO (
+rem     set SP=%%A
+rem     )
+
+rem if not "%SP%" == "%PKG_VERSION%" (
+rem    echo "Version detected from registry: %SP%"
+rem    echo    "does not match version of package being built (%PKG_VERSION%)"
+rem    echo "Do you have current updates for VS 2015 installed?"
+rem    exit 1
+rem )
+
+
+REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below!
+robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%LIBRARY_BIN%" *.dll /E
+robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%PREFIX%" *.dll /E
+if %ERRORLEVEL% GEQ 8 exit 1
+
+REM ========== This one comes from visual studio 2017
+set "VC_VER=141"
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
+    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
+        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
+        goto :eof
+    )
+)
+
+@setlocal
+call "%VS15VARSALL%" x64
+
+set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%"
+
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E
+if %ERRORLEVEL% LSS 8 exit 0
+@endlocal
diff --git a/packaging/vs2017/meta.yaml b/packaging/vs2017/meta.yaml
new file mode 100644
index 00000000000..34f4860ba85
--- /dev/null
+++ b/packaging/vs2017/meta.yaml
@@ -0,0 +1,45 @@
+{% set vcver="14.1" %}
+{% set vcfeature="14" %}
+{% set vsyear="2017" %}
+{% set fullver="15.4.27004.2010" %}
+
+package:
+  name: vs{{ vsyear }}
+  version: {{ fullver }}
+
+build:
+  skip: True  [not win]
+  script_env:
+    - VSDEVCMD_ARGS # [win]
+
+outputs:
+  - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }}
+    script: install_activate.bat
+    track_features:
+      # VS 2017 is binary-compatible with VS 2015/vc14.  Tools are "v141".
+      strong:
+        - vc{{ vcfeature }}
+    run_exports:
+      - vc {{ vcver }}
+    about:
+      summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler
+      license: BSD 3-clause
+  - name: vs{{ vsyear }}_runtime
+    script: install_runtime.bat
+  - name: vc
+    version: {{ vcver }}
+    track_features:
+      - vc{{ vcfeature }}
+    requirements:
+      run:
+        - {{ pin_subpackage('vs' ~ vsyear ~ '_runtime') }}
+    about:
+      home: https://github.com/conda/conda/wiki/VC-features
+      license: Modified BSD License (3-clause)
+      license_family: BSD
+      summary: A meta-package to track VC features.
+      description: |
+          This metapackage is used to activate vc features without
+          depending on Python.
+      doc_url: https://github.com/conda/conda/wiki/VC-features
+      dev_url: https://github.com/conda/conda/wiki/VC-features
diff --git a/packaging/windows/azure-pipelines-ci.yml b/packaging/windows/azure-pipelines-ci.yml
new file mode 100644
index 00000000000..6f9f3468cfe
--- /dev/null
+++ b/packaging/windows/azure-pipelines-ci.yml
@@ -0,0 +1,11 @@
+
+# Turn off auto builds for commits
+trigger: none
+pr: none
+
+jobs:
+- template: templates/build_task.yml
+  parameters:
+    package: 'Wheels'
+    spec: 'CPU'
+    msagent: true
diff --git a/packaging/windows/azure-pipelines.yml b/packaging/windows/azure-pipelines.yml
new file mode 100644
index 00000000000..d0240570012
--- /dev/null
+++ b/packaging/windows/azure-pipelines.yml
@@ -0,0 +1,35 @@
+
+# Turn off auto builds for commits
+trigger: none
+pr: none
+
+jobs:
+- template: templates/auth_task.yml
+
+- template: templates/build_task.yml
+  parameters:
+    package: 'Wheels'
+    spec: 'CPU'
+    msagent: true
+
+- template: templates/build_task.yml
+  parameters:
+    package: 'Conda'
+    spec: 'CPU'
+    msagent: true
+
+- template: templates/build_task.yml
+  parameters:
+    package: 'Wheels'
+    spec: 'CUDA'
+    msagent: true
+
+- template: templates/build_task.yml
+  parameters:
+    package: 'Conda'
+    spec: 'CUDA'
+    msagent: true
+
+- template: templates/linux_build_task.yml
+  parameters:
+    msagent: $(ms.hosted.agent.cpu)
diff --git a/packaging/windows/build_vision.bat b/packaging/windows/build_vision.bat
index c7b65dc04f1..162dba44917 100644
--- a/packaging/windows/build_vision.bat
+++ b/packaging/windows/build_vision.bat
@@ -23,18 +23,21 @@ set CUDA_VERSION=%~1
 set TORCHVISION_BUILD_VERSION=%~2
 set TORCHVISION_BUILD_NUMBER=%~3
 
+set BUILD_VERSION=%TORCHVISION_BUILD_VERSION%
+
 :env_end
 
 if NOT "%CUDA_VERSION%" == "cpu" (
     set CUDA_PREFIX=cuda%CUDA_VERSION%
     set CUVER=cu%CUDA_VERSION%
+    set FORCE_CUDA=1
 ) else (
     set CUDA_PREFIX=cpu
     set CUVER=cpu
 )
 
 set BUILD_VISION=1
-set TORCH_WHEEL=torch -f https://download.pytorch.org/whl/%CUVER%/stable.html --no-index
+REM set TORCH_WHEEL=torch -f https://download.pytorch.org/whl/%CUVER%/stable.html --no-index
 
 IF "%DESIRED_PYTHON%" == "" set DESIRED_PYTHON=3.5;3.6;3.7
 set DESIRED_PYTHON_PREFIX=%DESIRED_PYTHON:.=%
@@ -61,8 +64,35 @@ FOR %%v IN (%DESIRED_PYTHON%) DO (
     set PYTHON_VERSION_STR=%%v
     set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=!
     conda remove -n py!PYTHON_VERSION_STR! --all -y || rmdir %CONDA_HOME%\envs\py!PYTHON_VERSION_STR! /s
-    conda create -n py!PYTHON_VERSION_STR! -y -q numpy>=1.11 mkl>=2018 python=%%v
+    conda create -n py!PYTHON_VERSION_STR! -y -q -c defaults -c conda-forge numpy>=1.11 mkl>=2018 python=%%v ca-certificates scipy av
+)
+
+:: Uncomment for stable releases
+:: FOR %%v IN (%DESIRED_PYTHON%) DO (
+::     set PYTHON_VERSION_STR=%%v
+::     set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=!
+::     set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%"
+
+::     if "%CUDA_VERSION%" == "100" (
+::         set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl
+::     ) else (
+::         set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0%%2B%CUVER%-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl
+::     )
+::     echo Installing !TORCH_WHEEL!...
+::     pip install "!TORCH_WHEEL!"
+:: )
+
+:: Uncomment for nightly releases
+FOR %%v IN (%DESIRED_PYTHON%) DO (
+    set PYTHON_VERSION_STR=%%v
+    set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=!
+    set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%"
+
+    set TORCH_WHEEL=torch --pre -f https://download.pytorch.org/whl/nightly/%CUVER%/torch_nightly.html
+    echo Installing !TORCH_WHEEL!...
+    pip install !TORCH_WHEEL!
 )
+
 endlocal
 
 if "%DEBUG%" == "1" (
@@ -71,11 +101,22 @@ if "%DEBUG%" == "1" (
     set BUILD_TYPE=release
 )
 
+:: Install sccache
+if "%USE_SCCACHE%" == "1" (
+    mkdir %CD%\tmp_bin
+    curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %CD%\tmp_bin\sccache.exe
+    if not "%CUDA_VERSION%" == "" (
+        copy %CD%\tmp_bin\sccache.exe %CD%\tmp_bin\nvcc.exe
+
+        set CUDA_NVCC_EXECUTABLE=%CD%\tmp_bin\nvcc
+        set "PATH=%CD%\tmp_bin;%PATH%"
+    )
+)
+
 for %%v in (%DESIRED_PYTHON_PREFIX%) do (
     :: Activate Python Environment
     set PYTHON_PREFIX=%%v
     set "PATH=%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%"
-    pip install %TORCH_WHEEL%
     @setlocal
     :: Set Flags
     if NOT "%CUDA_VERSION%"=="cpu" (
diff --git a/packaging/windows/cuda92.bat b/packaging/windows/cuda92.bat
new file mode 100644
index 00000000000..0bfcdc8e463
--- /dev/null
+++ b/packaging/windows/cuda92.bat
@@ -0,0 +1,59 @@
+@echo off
+
+IF NOT "%BUILD_VISION%" == "" (
+    set MODULE_NAME=vision
+) ELSE (
+    set MODULE_NAME=pytorch
+)
+
+IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
+    call internal\clone.bat
+    cd ..
+    IF ERRORLEVEL 1 goto eof
+) ELSE (
+    call internal\clean.bat
+)
+
+call internal\check_deps.bat
+IF ERRORLEVEL 1 goto eof
+
+REM Check for optional components
+
+set USE_CUDA=
+set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
+
+IF "%NVTOOLSEXT_PATH%"=="" (
+    echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
+    exit /b 1
+    goto optcheck
+)
+
+IF "%CUDA_PATH_V9_2%"=="" (
+    echo CUDA 9.2 not found, failing
+    exit /b 1
+) ELSE (
+    IF "%BUILD_VISION%" == "" (
+        set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0
+        set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
+    ) ELSE (
+        set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50
+    )
+
+    set "CUDA_PATH=%CUDA_PATH_V9_2%"
+    set "PATH=%CUDA_PATH_V9_2%\bin;%PATH%"
+)
+
+:optcheck
+
+IF "%BUILD_VISION%" == "" (
+    call internal\check_opts.bat
+    IF ERRORLEVEL 1 goto eof
+
+    call internal\copy.bat
+    IF ERRORLEVEL 1 goto eof
+)
+
+call internal\setup.bat
+IF ERRORLEVEL 1 goto eof
+
+:eof
diff --git a/packaging/windows/internal/7z_install.bat b/packaging/windows/internal/7z_install.bat
deleted file mode 100644
index d5a1156360d..00000000000
--- a/packaging/windows/internal/7z_install.bat
+++ /dev/null
@@ -1,9 +0,0 @@
-@echo off
-
-curl -k https://www.7-zip.org/a/7z1805-x64.exe -O
-if errorlevel 1 exit /b 1
-
-start /wait 7z1805-x64.exe /S
-if errorlevel 1 exit /b 1
-
-set "PATH=%ProgramFiles%\7-Zip;%PATH%"
diff --git a/packaging/windows/internal/clone.bat b/packaging/windows/internal/clone.bat
index f93271978c6..4ba181fa804 100644
--- a/packaging/windows/internal/clone.bat
+++ b/packaging/windows/internal/clone.bat
@@ -44,15 +44,11 @@ set PYTORCH_BRANCH=%last_commit%
 
 :latest_end
 
-IF "%BUILD_VISION%" == "" (
-    IF "%PYTORCH_BRANCH%" == "" (
-        set PYTORCH_BRANCH=v%PYTORCH_BUILD_VERSION%
-    )
-    git checkout %PYTORCH_BRANCH%
-    IF ERRORLEVEL 1 git checkout tags/%PYTORCH_BRANCH%
-) ELSE (
-    git checkout v%TORCHVISION_BUILD_VERSION%
+IF "%PYTORCH_BRANCH%" == "" (
+    set PYTORCH_BRANCH=v%TORCHVISION_BUILD_VERSION%
 )
+git checkout %PYTORCH_BRANCH%
+IF ERRORLEVEL 1 git checkout tags/%PYTORCH_BRANCH%
 
 :submodule
 
diff --git a/packaging/windows/internal/cuda_install.bat b/packaging/windows/internal/cuda_install.bat
index b6ea9e23739..a85018b11d7 100644
--- a/packaging/windows/internal/cuda_install.bat
+++ b/packaging/windows/internal/cuda_install.bat
@@ -1,4 +1,9 @@
-@echo off
+@echo on
+
+if "%CUDA_VERSION%" == "cpu" (
+    echo Skipping for CPU builds
+    exit /b 0
+)
 
 set SRC_DIR=%~dp0\..
 
@@ -9,87 +14,60 @@ set CUDA_VER_MAJOR=%CUDA_VERSION:~0,-1%
 set CUDA_VER_MINOR=%CUDA_VERSION:~-1,1%
 set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR%
 
-IF %CUDA_VER% LEQ 90 (
-    set "NVCC_PACKAGE=compiler_%CUDA_VERSION_STR%"
-) ELSE (
-    set "NVCC_PACKAGE=nvcc_%CUDA_VERSION_STR%"
-)
-
-IF %CUDA_VER% EQU 80 goto cuda80
-IF %CUDA_VER% EQU 90 goto cuda90
-IF %CUDA_VER% EQU 91 goto cuda91
-IF %CUDA_VER% EQU 92 goto cuda92
-IF %CUDA_VER% EQU 100 goto cuda100
+if %CUDA_VER% EQU 92 goto cuda92
+if %CUDA_VER% EQU 100 goto cuda100
 
 echo CUDA %CUDA_VERSION_STR% is not supported
 exit /b 1
 
-:cuda80
-
-echo CUDA 8.0 is not supported
-exit /b 1
-
-:cuda90
-IF NOT EXIST "%SRC_DIR%\temp_build\cuda_9.0.176_windows.7z" (
-    curl -k -L https://www.dropbox.com/s/z5b7ryz0zrimntl/cuda_9.0.176_windows.7z?dl=1 --output "%SRC_DIR%\temp_build\cuda_9.0.176_windows.7z"
+:cuda92
+if not exist "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" (
+    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_9.2.148_win10.exe --output "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe"
     if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_9.0.176_windows.7z"
-    set "NVCC_PACKAGE=compiler_%CUDA_VERSION_STR%"
+    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe"
+    set "ARGS=nvcc_9.2 cuobjdump_9.2 nvprune_9.2 cupti_9.2 cublas_9.2 cublas_dev_9.2 cudart_9.2 cufft_9.2 cufft_dev_9.2 curand_9.2 curand_dev_9.2 cusolver_9.2 cusolver_dev_9.2 cusparse_9.2 cusparse_dev_9.2 nvgraph_9.2 nvgraph_dev_9.2 npp_9.2 npp_dev_9.2 nvrtc_9.2 nvrtc_dev_9.2 nvml_dev_9.2"
 )
 
-IF NOT EXIST "%SRC_DIR%\temp_build\cudnn-9.0-windows7-x64-v7.zip" (
-    curl -k -L https://www.dropbox.com/s/6p0xyqh472nu8m1/cudnn-9.0-windows7-x64-v7.zip?dl=1 --output "%SRC_DIR%\temp_build\cudnn-9.0-windows7-x64-v7.zip"
+if not exist "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" (
+    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-9.2-windows10-x64-v7.2.1.38.zip --output "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip"
     if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-9.0-windows7-x64-v7.zip"
+    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip"
 )
 
 goto cuda_common
 
-:cuda91
+:cuda100
 
-IF NOT EXIST "%SRC_DIR%\temp_build\cuda_9.1.85_windows.7z" (
-    curl -k -L https://www.dropbox.com/s/7a4sbq0dln6v7t2/cuda_9.1.85_windows.7z?dl=1 --output "%SRC_DIR%\temp_build\cuda_9.1.85_windows.7z"
+if not exist "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" (
+    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_10.0.130_411.31_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe"
     if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_9.1.85_windows.7z"
-    set "NVCC_PACKAGE=nvcc_%CUDA_VERSION_STR%"
+    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe"
+    set "ARGS=nvcc_10.0 cuobjdump_10.0 nvprune_10.0 cupti_10.0 cublas_10.0 cublas_dev_10.0 cudart_10.0 cufft_10.0 cufft_dev_10.0 curand_10.0 curand_dev_10.0 cusolver_10.0 cusolver_dev_10.0 cusparse_10.0 cusparse_dev_10.0 nvgraph_10.0 nvgraph_dev_10.0 npp_10.0 npp_dev_10.0 nvrtc_10.0 nvrtc_dev_10.0 nvml_dev_10.0"
 )
 
-IF NOT EXIST "%SRC_DIR%\temp_build\cudnn-9.1-windows7-x64-v7.zip" (
-    curl -k -L https://www.dropbox.com/s/e0prhgsrbyfi4ov/cudnn-9.1-windows7-x64-v7.zip?dl=1 --output "%SRC_DIR%\temp_build\cudnn-9.1-windows7-x64-v7.zip"
+if not exist "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" (
+    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-10.0-windows10-x64-v7.4.1.5.zip --output "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip"
     if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-9.1-windows7-x64-v7.zip"
+    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip"
 )
 
 goto cuda_common
 
-:cuda92
-
-echo CUDA 9.2 is not supported
-exit /b 1
-
-:cuda100
-
-echo CUDA 10.0 is not supported
-exit /b 1
-
 :cuda_common
 
-set "CUDA_PREFIX=cuda%CUDA_VERSION%"
-
-IF NOT EXIST "%SRC_DIR%\temp_build\NvToolsExt.7z" (
+if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" (
     curl -k -L https://www.dropbox.com/s/9mcolalfdj4n979/NvToolsExt.7z?dl=1 --output "%SRC_DIR%\temp_build\NvToolsExt.7z"
     if errorlevel 1 exit /b 1
 )
 
 echo Installing CUDA toolkit...
-
 7z x %CUDA_SETUP_FILE% -o"%SRC_DIR%\temp_build\cuda"
 pushd "%SRC_DIR%\temp_build\cuda"
-dir
-start /wait setup.exe -s %NVCC_PACKAGE% cublas_%CUDA_VERSION_STR% cublas_dev_%CUDA_VERSION_STR% cudart_%CUDA_VERSION_STR% curand_%CUDA_VERSION_STR% curand_dev_%CUDA_VERSION_STR% cusparse_%CUDA_VERSION_STR% cusparse_dev_%CUDA_VERSION_STR% nvrtc_%CUDA_VERSION_STR% nvrtc_dev_%CUDA_VERSION_STR% cufft_%CUDA_VERSION_STR% cufft_dev_%CUDA_VERSION_STR%
+start /wait setup.exe -s %ARGS%
 popd
+
 echo Installing VS integration...
-xcopy /Y "%SRC_DIR%\temp_build\cuda\_vs\*.*" "c:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\V140\BuildCustomizations"
+xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\Common7\IDE\VC\VCTargets\BuildCustomizations"
 
 echo Installing NvToolsExt...
 7z x %SRC_DIR%\temp_build\NvToolsExt.7z -o"%SRC_DIR%\temp_build\NvToolsExt"
@@ -100,23 +78,22 @@ xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\bin\x64\*.*" "%ProgramFiles%\NVIDIA Co
 xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\include\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include"
 xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\lib\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64"
 
+echo Setting up environment...
+set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\libnvvp;%PATH%"
+set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
+set "CUDA_PATH_V%CUDA_VER_MAJOR%_%CUDA_VER_MINOR%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
+set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
+
+if not exist "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" (
+    echo CUDA %CUDA_VERSION_STR% installed failed.
+    exit /b 1
+)
+
 echo Installing cuDNN...
 7z x %CUDNN_SETUP_FILE% -o"%SRC_DIR%\temp_build\cudnn"
 xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\bin\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin"
 xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\lib\x64\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\lib\x64"
 xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\include\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\include"
 
-echo Setting up environment...
-set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\libnvvp;%PATH%"
-set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
-set "CUDA_PATH_V%CUDA_VER_MAJOR%_%CUDA_VER_MINOR%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
-set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt\"
-
 echo Cleaning temp files
-rd /s /q "%SRC_DIR%\temp_build"
-pushd %TEMP%
-rd /s /q .
-popd
-
-echo Using VS2015 as NVCC compiler
-set "CUDAHOSTCXX=%VS140COMNTOOLS%\..\..\VC\bin\amd64\cl.exe"
+rd /s /q "%SRC_DIR%\temp_build" || ver > nul
diff --git a/packaging/windows/internal/dep_install.bat b/packaging/windows/internal/dep_install.bat
new file mode 100644
index 00000000000..db665a99f26
--- /dev/null
+++ b/packaging/windows/internal/dep_install.bat
@@ -0,0 +1,14 @@
+@echo off
+
+REM curl -k https://www.7-zip.org/a/7z1805-x64.exe -O
+REM if errorlevel 1 exit /b 1
+
+REM start /wait 7z1805-x64.exe /S
+REM if errorlevel 1 exit /b 1
+
+REM set "PATH=%ProgramFiles%\7-Zip;%PATH%"
+
+choco feature disable --name showDownloadProgress
+choco feature enable --name allowGlobalConfirmation
+
+choco install curl 7zip
diff --git a/packaging/windows/internal/env_fix.bat b/packaging/windows/internal/env_fix.bat
new file mode 100644
index 00000000000..dd0aaf5f2d5
--- /dev/null
+++ b/packaging/windows/internal/env_fix.bat
@@ -0,0 +1,31 @@
+@echo off
+
+:: Caution: Please don't use this script locally
+:: It may destroy your build environment.
+
+setlocal
+
+IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" (
+    echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows
+    exit /b 1
+)
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
+    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
+        set "VS15INSTALLDIR=%%i"
+        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
+        goto vswhere
+    )
+)
+
+:vswhere
+
+IF "%VS15VCVARSALL%"=="" (
+    echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows
+    exit /b 1
+)
+
+call "%VS15VCVARSALL%" x86_amd64
+for /f "usebackq tokens=*" %%i in (`where link.exe`) do move "%%i" "%%i.bak"
+
+endlocal
diff --git a/packaging/windows/internal/nightly_defaults.bat b/packaging/windows/internal/nightly_defaults.bat
new file mode 100644
index 00000000000..208d8b85e78
--- /dev/null
+++ b/packaging/windows/internal/nightly_defaults.bat
@@ -0,0 +1,200 @@
+@echo on
+
+if "%~1"=="" goto arg_error
+if NOT "%~2"=="" goto arg_error
+goto arg_end
+
+:arg_error
+
+echo Illegal number of parameters. Pass packge type `Conda` or `Wheels`.
+exit /b 1
+
+:arg_end
+
+echo "nightly_defaults.bat at %CD% starting at %DATE%"
+
+set SRC_DIR=%~dp0\..
+
+:: NIGHTLIES_FOLDER
+:: N.B. this is also defined in cron_start.sh
+::   An arbitrary root folder to store all nightlies folders, each of which is a
+::   parent level date folder with separate subdirs for logs, wheels, conda
+::   packages, etc. This should be kept the same across all scripts called in a
+::   cron job, so it only has a default value in the top-most script
+::   build_cron.sh to avoid the default values from diverging.
+if "%NIGHTLIES_FOLDER%" == "" set "NIGHTLIES_FOLDER=%SRC_DIR%"
+
+:: NIGHTLIES_DATE
+:: N.B. this is also defined in cron_start.sh
+::   The date in YYYY_mm_dd format that we are building for. If this is not
+::   already set, then this will first try to find the date of the nightlies
+::   folder that this builder repo exists in; e.g. if this script exists in
+::   some_dir/2019_09_04/builder/cron/ then this will be set to 2019_09_04 (must
+::   match YYYY_mm_dd). This is for convenience when debugging/uploading past
+::   dates, so that you don't have to set NIGHTLIES_DATE yourself. If a date
+::   folder cannot be found in that exact location, then this will default to
+::   the current date.
+
+
+if "%NIGHTLIES_DATE%" == "" ( goto date_start ) else ( goto date_end )
+
+:date_start
+
+set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'"
+set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'"
+
+FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i
+FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i
+
+:date_end
+
+if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2%
+
+:: Used in lots of places as the root dir to store all conda/wheel/manywheel
+:: packages as well as logs for the day
+set today=%NIGHTLIES_FOLDER%\%NIGHTLIES_DATE%
+mkdir "%today%" || ver >nul
+
+
+::#############################################################################
+:: Add new configuration variables below this line. 'today' should always be
+:: defined ASAP to avoid weird errors
+::#############################################################################
+
+
+:: List of people to email when things go wrong. This is passed directly to
+:: `mail -t`
+:: TODO: Not supported yet
+if "%NIGHTLIES_EMAIL_LIST%" == "" set NIGHTLIES_EMAIL_LIST=peterghost86@gmail.com
+
+:: PYTORCH_CREDENTIALS_FILE
+::   A bash file that exports credentials needed to upload to aws and anaconda.
+::   Needed variables are PYTORCH_ANACONDA_USERNAME, PYTORCH_ANACONDA_PASSWORD,
+::   AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY. Or it can just export the AWS
+::   keys and then prepend a logged-in conda installation to the path.
+:: TODO: Not supported yet
+if "%PYTORCH_CREDENTIALS_FILE%" == "" set PYTORCH_CREDENTIALS_FILE=/c/Users/administrator/nightlies/credentials.sh
+
+:: Location of the temporary miniconda that is downloaded to install conda-build
+:: and aws to upload finished packages TODO this is messy to install this in
+:: upload.sh and later use it in upload_logs.sh
+if "%CONDA_UPLOADER_INSTALLATION%" == "" set "CONDA_UPLOADER_INSTALLATION=%today%\miniconda"
+
+:: N.B. BUILDER_REPO and BUILDER_BRANCH are both set in cron_start.sh, as that
+:: is the script that actually clones the builder repo that /this/ script is
+:: running from.
+pushd "%SRC_DIR%\.."
+set NIGHTLIES_BUILDER_ROOT=%CD%
+popd
+
+:: The shared pytorch repo to be used by all builds
+if "%NIGHTLIES_PYTORCH_ROOT%" == "" set "NIGHTLIES_PYTORCH_ROOT=%today%\vision"
+
+:: PYTORCH_REPO
+::   The Github org/user whose fork of Pytorch to check out (git clone
+::   https://github.com/<THIS_PART>/pytorch.git). This will always be cloned
+::   fresh to build with. Default is 'pytorch'
+if "%PYTORCH_REPO%" == "" set PYTORCH_REPO=pytorch
+
+:: PYTORCH_BRANCH
+::   The branch of Pytorch to checkout for building (git checkout <THIS_PART>).
+::   This can either be the name of the branch (e.g. git checkout
+::   my_branch_name) or can be a git commit (git checkout 4b2674n...). Default
+::   is 'latest', which is a special term that signals to pull the last commit
+::   before 0:00 midnight on the NIGHTLIES_DATE
+if "%PYTORCH_BRANCH%" == "" set PYTORCH_BRANCH=latest
+
+:: Clone the requested pytorch checkout
+if exist "%NIGHTLIES_PYTORCH_ROOT%" ( goto clone_end ) else ( goto clone_start )
+
+:clone_start
+
+git clone --recursive "https://github.com/%PYTORCH_REPO%/vision.git" "%NIGHTLIES_PYTORCH_ROOT%"
+pushd "%NIGHTLIES_PYTORCH_ROOT%"
+
+if "%PYTORCH_BRANCH%" == "latest" ( goto latest_start ) else ( goto latest_end )
+
+:latest_start
+
+:: Switch to the latest commit by 11:59 yesterday
+echo PYTORCH_BRANCH is set to latest so I will find the last commit
+echo before 0:00 midnight on %NIGHTLIES_DATE%
+set git_date=%NIGHTLIES_DATE:_=-%
+FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i
+echo Setting PYTORCH_BRANCH to %last_commit% since that was the last
+echo commit before %NIGHTLIES_DATE%
+set PYTORCH_BRANCH=%last_commit%
+
+:latest_end
+
+git checkout "%PYTORCH_BRANCH%"
+git submodule update
+popd
+
+:clone_end
+
+if "%CUDA_VERSION%" == "cpu" (
+    set _DESIRED_CUDA=cpu
+) else (
+    set _DESIRED_CUDA=cu%CUDA_VERSION%
+)
+
+:: PYTORCH_BUILD_VERSION
+::   The actual version string. Used in conda like
+::       pytorch-nightly==1.0.0.dev20180908
+::   or in manylinux like
+::       torch_nightly-1.0.0.dev20180908-cp27-cp27m-linux_x86_64.whl
+if "%TORCHVISION_BUILD_VERSION%" == "" set TORCHVISION_BUILD_VERSION=0.5.0.dev%NIGHTLIES_DATE_COMPACT%
+
+if "%~1" == "Wheels" (
+    if not "%CUDA_VERSION%" == "100" (
+        set TORCHVISION_BUILD_VERSION=%TORCHVISION_BUILD_VERSION%+%_DESIRED_CUDA%
+    )
+)
+
+:: PYTORCH_BUILD_NUMBER
+::   This is usually the number 1. If more than one build is uploaded for the
+::   same version/date, then this can be incremented to 2,3 etc in which case
+::   '.post2' will be appended to the version string of the package. This can
+::   be set to '0' only if OVERRIDE_PACKAGE_VERSION is being used to bypass
+::   all the version string logic in downstream scripts. Since we use the
+::   override below, exporting this shouldn't actually matter.
+if "%TORCHVISION_BUILD_NUMBER%" == "" set /a TORCHVISION_BUILD_NUMBER=1
+if %TORCHVISION_BUILD_NUMBER% GTR 1 set TORCHVISION_BUILD_VERSION=%TORCHVISION_BUILD_VERSION%%TORCHVISION_BUILD_NUMBER%
+
+:: The nightly builds use their own versioning logic, so we override whatever
+:: logic is in setup.py or other scripts
+:: TODO: Not supported yet
+set OVERRIDE_PACKAGE_VERSION=%TORCHVISION_BUILD_VERSION%
+set BUILD_VERSION=%TORCHVISION_BUILD_VERSION%
+
+:: Build folder for conda builds to use
+if "%TORCH_CONDA_BUILD_FOLDER%" == "" set TORCH_CONDA_BUILD_FOLDER=torchvision
+
+:: TORCH_PACKAGE_NAME
+::   The name of the package to upload. This should probably be pytorch or
+::   pytorch-nightly. N.B. that pip will change all '-' to '_' but conda will
+::   not. This is dealt with in downstream scripts.
+:: TODO: Not supported yet
+if "%TORCH_PACKAGE_NAME%" == "" set TORCH_PACKAGE_NAME=torchvision
+
+:: PIP_UPLOAD_FOLDER should end in a slash. This is to handle it being empty
+:: (when uploading to e.g. whl/cpu/) and also to handle nightlies (when
+:: uploading to e.g. /whl/nightly/cpu)
+:: TODO: Not supported yet
+if "%PIP_UPLOAD_FOLDER%" == "" set "PIP_UPLOAD_FOLDER=nightly\"
+
+:: The location of the binary_sizes dir in s3 is hardcoded into
+:: upload_binary_sizes.sh
+
+:: DAYS_TO_KEEP
+::   How many days to keep around for clean.sh. Build folders older than this
+::   will be purged at the end of cron jobs. '1' means to keep only the current
+::   day. Values less than 1 are not allowed. The default is 5.
+:: TODO: Not supported yet
+if "%DAYS_TO_KEEP%" == "" set /a DAYS_TO_KEEP=5
+if %DAYS_TO_KEEP% LSS 1 (
+    echo DAYS_TO_KEEP cannot be less than 1.
+    echo A value of 1 means to only keep the build for today
+    exit /b 1
+)
diff --git a/packaging/windows/internal/publish.bat b/packaging/windows/internal/publish.bat
new file mode 100644
index 00000000000..6ee9ef853a0
--- /dev/null
+++ b/packaging/windows/internal/publish.bat
@@ -0,0 +1,85 @@
+@echo off
+
+set SRC_DIR=%~dp0
+pushd %SRC_DIR%
+
+if NOT "%CUDA_VERSION%" == "cpu" (
+    set PACKAGE_SUFFIX=_cuda%CUDA_VERSION%
+) else (
+    set PACKAGE_SUFFIX=
+)
+
+if "%PACKAGEFULLNAME%" == "Conda" (
+    set PACKAGE=conda
+) else (
+    set PACKAGE=wheels
+)
+
+set PUBLISH_BRANCH=%PACKAGE%_%DESIRED_PYTHON%%PACKAGE_SUFFIX%
+
+git clone %ARTIFACT_REPO_URL% -b %PUBLISH_BRANCH% --single-branch >nul 2>&1
+
+IF ERRORLEVEL 1 (
+    echo Branch %PUBLISH_BRANCH% not exist, falling back to master
+    set NO_BRANCH=1
+    git clone %ARTIFACT_REPO_URL% -b master --single-branch >nul 2>&1
+)
+
+IF ERRORLEVEL 1 (
+    echo Clone failed
+    goto err
+)
+
+cd pytorch_builder
+attrib -s -h -r . /s /d
+
+:: Empty repo
+rd /s /q . || ver >nul
+
+IF NOT EXIST %PACKAGE% mkdir %PACKAGE%
+
+xcopy /S /E /Y ..\..\output\*.* %PACKAGE%\
+
+git config --global user.name "Azure DevOps"
+git config --global user.email peterghost86@gmail.com
+git init
+git checkout --orphan %PUBLISH_BRANCH%
+git remote add origin %ARTIFACT_REPO_URL%
+git add .
+git commit -m "Update artifacts"
+
+:push
+
+if "%RETRY_TIMES%" == "" (
+    set /a RETRY_TIMES=10
+    set /a SLEEP_TIME=2
+) else (
+    set /a RETRY_TIMES=%RETRY_TIMES%-1
+    set /a SLEEP_TIME=%SLEEP_TIME%*2
+)
+
+git push origin %PUBLISH_BRANCH% -f > nul 2>&1
+
+IF ERRORLEVEL 1 (
+    echo Git push retry times remaining: %RETRY_TIMES%
+    echo Sleep time: %SLEEP_TIME% seconds
+    IF %RETRY_TIMES% EQU 0 (
+        echo Push failed
+        goto err
+    )
+    waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul
+    goto push
+) ELSE (
+    set RETRY_TIMES=
+    set SLEEP_TIME=
+)
+
+popd
+
+exit /b 0
+
+:err
+
+popd
+
+exit /b 1
diff --git a/packaging/windows/internal/setup.bat b/packaging/windows/internal/setup.bat
index fcf10f185c7..d18dfb35023 100644
--- a/packaging/windows/internal/setup.bat
+++ b/packaging/windows/internal/setup.bat
@@ -28,53 +28,6 @@ if "%CXX%"=="sccache cl" (
     sccache --zero-stats
 )
 
-
-if "%BUILD_PYTHONLESS%" == "" goto pytorch else goto libtorch
-
-:libtorch
-set VARIANT=shared-with-deps
-
-mkdir libtorch
-mkdir libtorch\bin
-mkdir libtorch\cmake
-mkdir libtorch\include
-mkdir libtorch\lib
-mkdir libtorch\share
-mkdir libtorch\test
-
-mkdir build
-pushd build
-python ../tools/build_libtorch.py
-popd
-
-IF ERRORLEVEL 1 exit /b 1
-IF NOT ERRORLEVEL 0 exit /b 1
-
-move /Y torch\bin\*.* libtorch\bin\
-move /Y torch\cmake\*.* libtorch\cmake\
-robocopy /move /e torch\include\ libtorch\include\
-move /Y torch\lib\*.* libtorch\lib\
-robocopy /move /e torch\share\ libtorch\share\
-move /Y torch\test\*.* libtorch\test\
-
-move /Y libtorch\bin\*.dll libtorch\lib\
-
-git rev-parse HEAD > libtorch\build-hash
-
-IF "%DEBUG%" == "" (
-    set LIBTORCH_PREFIX=libtorch-win-%VARIANT%
-) ELSE (
-    set LIBTORCH_PREFIX=libtorch-win-%VARIANT%-debug
-)
-
-7z a -tzip %LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip libtorch\*
-
-mkdir ..\output\%CUDA_PREFIX%
-copy /Y %LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip ..\output\%CUDA_PREFIX%\
-copy /Y %LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip ..\output\%CUDA_PREFIX%\%LIBTORCH_PREFIX%-latest.zip
-
-goto build_end
-
 :pytorch
 :: This stores in e.g. D:/_work/1/s/windows/output/cpu
 pip wheel -e . --no-deps --wheel-dir ../output/%CUDA_PREFIX%
diff --git a/packaging/windows/internal/test.bat b/packaging/windows/internal/test.bat
index 1ad7d2ebeb8..a87fc1a2858 100644
--- a/packaging/windows/internal/test.bat
+++ b/packaging/windows/internal/test.bat
@@ -8,15 +8,19 @@ set PYTHON_VERSION=%PYTHON_PREFIX:py=cp%
 if "%BUILD_VISION%" == "" (
     pip install future pytest coverage hypothesis protobuf
 ) ELSE (
-    pip install future pytest "pillow>=4.1.1"
+    pip install future pytest "pillow>=4.1.1" mock
 )
 
-
-for /F "delims=" %%i in ('where /R %SRC_DIR%\output\%CUDA_PREFIX% %MODULE_NAME%*%PYTHON_VERSION%*.whl') do pip install "%%i"
+for /F "delims=" %%i in ('where /R %SRC_DIR%\output\%CUDA_PREFIX% *%MODULE_NAME%*%PYTHON_VERSION%*.whl') do pip install "%%i"
 
 if ERRORLEVEL 1 exit /b 1
 
-if NOT "%BUILD_VISION%" == "" goto smoke_test_end
+if NOT "%BUILD_VISION%" == "" (
+    echo Smoke testing imports
+    python -c "import torchvision"
+    if ERRORLEVEL 1 exit /b 1
+    goto smoke_test_end
+)
 
 echo Smoke testing imports
 python -c "import torch"
diff --git a/packaging/windows/internal/upload.bat b/packaging/windows/internal/upload.bat
new file mode 100644
index 00000000000..0d2946a3fe6
--- /dev/null
+++ b/packaging/windows/internal/upload.bat
@@ -0,0 +1,96 @@
+@echo off
+
+IF "%CONDA_UPLOADER_INSTALLATION%" == "" goto precheck_fail
+IF "%PYTORCH_FINAL_PACKAGE_DIR%" == "" goto precheck_fail
+IF "%today%" == "" goto precheck_fail
+IF "%PYTORCH_ANACONDA_USERNAME%" == "" goto precheck_fail
+IF "%PYTORCH_ANACONDA_PASSWORD%" == "" goto precheck_fail
+
+goto precheck_pass
+
+:precheck_fail
+
+echo Please run nightly_defaults.bat first.
+echo And remember to set `PYTORCH_FINAL_PACKAGE_DIR`
+echo Finally, don't forget to set anaconda tokens
+exit /b 1
+
+:precheck_pass
+
+pushd %today%
+
+:: Install anaconda client
+set "CONDA_HOME=%CONDA_UPLOADER_INSTALLATION%"
+set "tmp_conda=%CONDA_HOME%"
+set "miniconda_exe=%CD%\miniconda.exe"
+rmdir /s /q "%CONDA_HOME%"
+del miniconda.exe
+curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%"
+popd
+
+IF ERRORLEVEL 1 (
+    echo Conda download failed
+    exit /b 1
+)
+
+call %~dp0\..\..\conda\install_conda.bat
+
+IF ERRORLEVEL 1 (
+    echo Conda installation failed
+    exit /b 1
+)
+
+set "ORIG_PATH=%PATH%"
+set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
+
+REM conda install -y anaconda-client
+pip install git+https://github.com/peterjc123/anaconda-client.git@log_more_meaningfull_errors
+IF ERRORLEVEL 1 (
+    echo Anaconda client installation failed
+    exit /b 1
+)
+
+REM bash -c "yes | anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%""
+anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%"
+IF ERRORLEVEL 1 (
+    echo Anaconda client login failed
+    exit /b 1
+)
+
+set PYTORCH_FINAL_PACKAGE=
+:: Upload all the packages under `PYTORCH_FINAL_PACKAGE_DIR`
+FOR /F "delims=" %%i IN ('where /R %PYTORCH_FINAL_PACKAGE_DIR% *vision*.tar.bz2') DO (
+    set "PYTORCH_FINAL_PACKAGE=%%i"
+)
+
+IF "%PYTORCH_FINAL_PACKAGE%" == "" (
+    echo No package to upload
+    exit /b 0
+)
+
+:upload
+
+if "%RETRY_TIMES%" == "" (
+    set /a RETRY_TIMES=10
+    set /a SLEEP_TIME=2
+) else (
+    set /a RETRY_TIMES=%RETRY_TIMES%-1
+    set /a SLEEP_TIME=%SLEEP_TIME%*2
+)
+
+echo Uploading %PYTORCH_FINAL_PACKAGE% to Anaconda Cloud
+anaconda upload "%PYTORCH_FINAL_PACKAGE%" -u pytorch-nightly --label main --force --no-progress
+
+IF ERRORLEVEL 1 (
+    echo Anaconda upload retry times remaining: %RETRY_TIMES%
+    echo Sleep time: %SLEEP_TIME% seconds
+    IF %RETRY_TIMES% EQU 0 (
+        echo Upload failed
+        exit /b 1
+    )
+    waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul
+    goto upload
+) ELSE (
+    set RETRY_TIMES=
+    set SLEEP_TIME=
+)
diff --git a/packaging/windows/internal/vs_install.bat b/packaging/windows/internal/vs_install.bat
index 624227f0be0..e6589092372 100644
--- a/packaging/windows/internal/vs_install.bat
+++ b/packaging/windows/internal/vs_install.bat
@@ -1,23 +1,23 @@
 @echo off
 
 set VS_DOWNLOAD_LINK=https://aka.ms/vs/15/release/vs_buildtools.exe
-IF "%VS_LATEST%" == "1" (
-   set VS_INSTALL_ARGS= --nocache --norestart --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools
-   set VSDEVCMD_ARGS=
-) ELSE (
-   set VS_INSTALL_ARGS=--nocache --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools ^
-                                                --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^
-                                                --add Microsoft.Component.MSBuild ^
-                                                --add Microsoft.VisualStudio.Component.Roslyn.Compiler ^
-                                                --add Microsoft.VisualStudio.Component.TextTemplating ^
-                                                --add Microsoft.VisualStudio.Component.VC.CoreIde ^
-                                                --add Microsoft.VisualStudio.Component.VC.Redist.14.Latest ^
-                                                --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core ^
-                                                --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 ^
-                                                --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^
-                                                --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81
-   set VSDEVCMD_ARGS=-vcvars_ver=14.11
-)
+REM IF "%VS_LATEST%" == "1" (
+REM    set VS_INSTALL_ARGS= --nocache --norestart --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools
+REM    set VSDEVCMD_ARGS=
+REM ) ELSE (
+set VS_INSTALL_ARGS=--nocache --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools ^
+                                             --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^
+                                             --add Microsoft.Component.MSBuild ^
+                                             --add Microsoft.VisualStudio.Component.Roslyn.Compiler ^
+                                             --add Microsoft.VisualStudio.Component.TextTemplating ^
+                                             --add Microsoft.VisualStudio.Component.VC.CoreIde ^
+                                             --add Microsoft.VisualStudio.Component.VC.Redist.14.Latest ^
+                                             --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core ^
+                                             --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 ^
+                                             --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^
+                                             --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81
+set VSDEVCMD_ARGS=-vcvars_ver=14.11
+REM )
 
 curl -k -L %VS_DOWNLOAD_LINK% --output vs_installer.exe
 if errorlevel 1 exit /b 1
diff --git a/packaging/windows/cuda90.bat b/packaging/windows/old/cuda90.bat
similarity index 100%
rename from packaging/windows/cuda90.bat
rename to packaging/windows/old/cuda90.bat
diff --git a/packaging/windows/templates/auth_task.yml b/packaging/windows/templates/auth_task.yml
new file mode 100644
index 00000000000..ece66412ff4
--- /dev/null
+++ b/packaging/windows/templates/auth_task.yml
@@ -0,0 +1,17 @@
+jobs:
+- job: 'VSTS_Auth_Task'
+  timeoutInMinutes: 5
+  cancelTimeoutInMinutes: 5
+  variables:
+  - group: 'peterjc-vsts-token'
+
+  pool:
+    vmImage: 'win1803'
+
+  steps:
+  - checkout: self
+    clean: true
+
+  - template: vsts_auth.yml
+    parameters:
+      auth: $(vsts_auth)
diff --git a/packaging/windows/templates/build_conda.yml b/packaging/windows/templates/build_conda.yml
new file mode 100644
index 00000000000..2d88271ad33
--- /dev/null
+++ b/packaging/windows/templates/build_conda.yml
@@ -0,0 +1,15 @@
+parameters:
+  msagent: false
+
+steps:
+- bash: 'find . -name "*.sh" -exec dos2unix {} +'
+  displayName: Replace file endings
+
+- script: 'if not exist %PYTORCH_FINAL_PACKAGE_DIR% mkdir %PYTORCH_FINAL_PACKAGE_DIR%'
+  displayName: 'Create final package directory'
+
+- bash: './packaging/conda/build_vision.sh $CUDA_VERSION $TORCHVISION_BUILD_VERSION $TORCHVISION_BUILD_NUMBER'
+  displayName: Build
+  env:
+    ${{ if eq(parameters.msagent, 'true') }}:
+      MAX_JOBS: 2
diff --git a/packaging/windows/templates/build_task.yml b/packaging/windows/templates/build_task.yml
new file mode 100644
index 00000000000..8ecbf93abfa
--- /dev/null
+++ b/packaging/windows/templates/build_task.yml
@@ -0,0 +1,140 @@
+parameters:
+  package: ''
+  spec: ''
+  jobDesc: ''
+  packageDesc: ''
+  msagent: true
+  cpuEnabled: true
+  cudaEnabled: true
+  condaEnabled: true
+  wheelsEnabled: true
+  override: false
+
+jobs:
+- job: 'Windows_${{ parameters.spec }}_${{ parameters.package }}_Build'
+  timeoutInMinutes: 60
+  cancelTimeoutInMinutes: 5
+  condition: > 
+    or(and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CPU'),
+           eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')),
+       and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CPU'),
+           eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')),
+       and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CUDA'),
+           eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true')),
+       and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CUDA'),
+           eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true')))
+  variables:
+    - ${{ if eq(parameters.override, 'true') }}:
+      - name: TORCHVISION_BUILD_NUMBER
+        value: 1
+      - name: PYTORCH_REPO
+        value: 'pytorch'
+      - name: PYTORCH_BRANCH
+        value: 'v0.4.0'
+    - ${{ if eq(parameters.msagent, 'true') }}:
+      - name: USE_SCCACHE
+        value: 0
+    - ${{ if eq(parameters.msagent, 'false') }}:
+      - name: USE_SCCACHE
+        value: 1
+    - ${{ if eq(parameters.package, 'Conda') }}:
+      - group: peterjc_anaconda_token
+      - name: PYTORCH_FINAL_PACKAGE_DIR
+        value: '$(Build.Repository.LocalPath)\packaging\windows\output'
+      
+  strategy:
+    maxParallel: 10
+    matrix:
+      ${{ if eq(parameters.spec, 'CPU') }}:
+        PY3.5:
+          DESIRED_PYTHON: 3.5
+          CUDA_VERSION: cpu
+        PY3.6:
+          DESIRED_PYTHON: 3.6
+          CUDA_VERSION: cpu
+        PY3.7:
+          DESIRED_PYTHON: 3.7
+          CUDA_VERSION: cpu
+      ${{ if ne(parameters.spec, 'CPU') }}:
+        # PY3.5_92:
+        #   DESIRED_PYTHON: 3.5
+        #   CUDA_VERSION: 92
+        # PY3.6_92:
+        #   DESIRED_PYTHON: 3.6
+        #   CUDA_VERSION: 92
+        # PY3.7_92:
+        #   DESIRED_PYTHON: 3.7
+        #   CUDA_VERSION: 92
+        PY3.5_100:
+          DESIRED_PYTHON: 3.5
+          CUDA_VERSION: 100
+        PY3.6_100:
+          DESIRED_PYTHON: 3.6
+          CUDA_VERSION: 100
+        PY3.7_100:
+          DESIRED_PYTHON: 3.7
+          CUDA_VERSION: 100
+
+  pool:
+    ${{ if eq(parameters.msagent, 'true') }}:
+      vmImage: 'win1803'
+    ${{ if eq(parameters.msagent, 'false') }}:
+      name: 'release'
+
+  steps:
+  - checkout: self
+    clean: true
+
+  - template: setup_env_for_msagent.yml
+    parameters:
+      msagent: ${{ parameters.msagent }}
+
+  # - ${{ if and(eq(parameters.override, 'true'),  eq(parameters.package, 'Wheels')) }}:
+  #   - template: override_pytorch_version.yml
+
+  - template: setup_nightly_variables.yml
+    parameters:
+      package: ${{ parameters.package }}
+
+  - ${{ if eq(parameters.package, 'Wheels') }}:
+    - template: build_wheels.yml
+      parameters:
+        msagent: ${{ parameters.msagent }}
+
+  - ${{ if eq(parameters.package, 'Conda') }}:
+    - template: build_conda.yml
+      parameters:
+        msagent: ${{ parameters.msagent }}
+
+  - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}:
+    - template: publish_test_results.yml
+      parameters:
+        msagent: ${{ parameters.msagent }}
+
+  # If you want to upload binaries to S3 & Anaconda Cloud, please uncomment this section.
+  - ${{ if and(eq(parameters.package, 'Wheels'), eq(parameters.spec, 'CPU')) }}:
+    - template: upload_to_s3.yml
+      parameters:
+        cuVer: '$(CUDA_VERSION)'
+        cudaVer: '$(CUDA_VERSION)'
+
+  - ${{ if and(eq(parameters.package, 'Wheels'), ne(parameters.spec, 'CPU')) }}:
+    - template: upload_to_s3.yml
+      parameters:
+        cuVer: 'cu$(CUDA_VERSION)'
+        cudaVer: 'cuda$(CUDA_VERSION)'
+
+  - ${{ if eq(parameters.package, 'Conda') }}:
+    - template: upload_to_conda.yml
+      parameters:
+        user: $(peterjc_conda_username)
+        pass: $(peterjc_conda_password)
+
+  # If you want to upload binaries to Azure Git, please uncomment this section.
+  # - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}:
+  #   - template: publish_test_results.yml
+  #     parameters:
+  #       msagent: ${{ parameters.msagent }}
+  #   - template: publish_packages.yml
+  #     parameters:
+  #       package: ${{ parameters.package }}
diff --git a/packaging/windows/templates/build_wheels.yml b/packaging/windows/templates/build_wheels.yml
new file mode 100644
index 00000000000..05c5712e334
--- /dev/null
+++ b/packaging/windows/templates/build_wheels.yml
@@ -0,0 +1,9 @@
+parameters:
+  msagent: false
+
+steps:
+- script: 'call packaging/windows/build_vision.bat %CUDA_VERSION% %TORCHVISION_BUILD_VERSION% %TORCHVISION_BUILD_NUMBER%'
+  displayName: Build
+  env:
+    ${{ if eq(parameters.msagent, 'true') }}:
+      MAX_JOBS: 2
diff --git a/packaging/windows/templates/linux_build_task.yml b/packaging/windows/templates/linux_build_task.yml
new file mode 100644
index 00000000000..0b32892791a
--- /dev/null
+++ b/packaging/windows/templates/linux_build_task.yml
@@ -0,0 +1,38 @@
+parameters:
+  msagent: true
+  enabled: false
+
+jobs:
+- job: 'Linux_CPU_Conda_Build'
+  timeoutInMinutes: 0
+  cancelTimeoutInMinutes: 5
+  condition: ${{ eq(parameters.enabled, 'true') }}
+  variables:
+    CUDA_VERSION: cpu
+    TORCH_CONDA_BUILD_FOLDER: pytorch-nightly
+    PYTORCH_FINAL_PACKAGE_DIR: '$(Build.Repository.LocalPath)/output'
+
+  strategy:
+    maxParallel: 10
+    matrix:
+      PY3.5:
+        DESIRED_PYTHON: 3.5
+
+  pool:
+    vmImage: 'ubuntu-16.04'
+
+  steps:
+  - checkout: self
+    clean: true
+
+  - script: 'sudo apt-get install p7zip-full'
+    displayName: 'Install 7Zip'
+
+  - task: CondaEnvironment@1
+    displayName: 'Install conda-build'
+    inputs:
+      packageSpecs: 'conda-build'
+
+  - template: build_conda.yml
+    parameters:
+      msagent: ${{ parameters.msagent }}
diff --git a/packaging/windows/templates/override_pytorch_version.yml b/packaging/windows/templates/override_pytorch_version.yml
new file mode 100644
index 00000000000..8af93ae43a4
--- /dev/null
+++ b/packaging/windows/templates/override_pytorch_version.yml
@@ -0,0 +1,6 @@
+steps:
+- script: 'windows/internal/override_pytorch_version.bat'
+  displayName: 'Override PyTorch Build Version for Wheels'
+
+- script: 'echo $(PYTORCH_BUILD_VERSION)'
+  displayName: 'Show PyTorch Build Version'
diff --git a/packaging/windows/templates/publish_packages.yml b/packaging/windows/templates/publish_packages.yml
new file mode 100644
index 00000000000..51ce8247bf7
--- /dev/null
+++ b/packaging/windows/templates/publish_packages.yml
@@ -0,0 +1,8 @@
+parameters:
+  package: ''
+
+steps:
+- script: 'packaging/windows/internal/publish.bat'
+  displayName: 'Upload packages to Azure DevOps Repo'
+  env:
+    PACKAGEFULLNAME: ${{ parameters.package }}
diff --git a/packaging/windows/templates/publish_test_results.yml b/packaging/windows/templates/publish_test_results.yml
new file mode 100644
index 00000000000..1e0dc0215d3
--- /dev/null
+++ b/packaging/windows/templates/publish_test_results.yml
@@ -0,0 +1,6 @@
+steps:
+- task: PublishTestResults@2 # No test results to publish
+  inputs:
+    testResultsFiles: 'windows/pytorch/test/**/*.xml'
+    testRunTitle: 'Publish test results'
+  enabled: false
diff --git a/packaging/windows/templates/setup_env_for_msagent.yml b/packaging/windows/templates/setup_env_for_msagent.yml
new file mode 100644
index 00000000000..377734fa3db
--- /dev/null
+++ b/packaging/windows/templates/setup_env_for_msagent.yml
@@ -0,0 +1,25 @@
+parameters:
+   msagent: false
+
+steps:
+- ${{ if eq(parameters.msagent, 'true') }}:
+  - task: BatchScript@1
+    displayName: 'Install 7Zip & cURL'
+    inputs:
+      filename: 'packaging/windows/internal/dep_install.bat'
+
+      modifyEnvironment: true
+
+  - task: BatchScript@1
+    displayName: 'Install Visual Studio 2017'
+    inputs:
+      filename: 'packaging/windows/internal/vs_install.bat'
+
+      modifyEnvironment: true
+
+  - task: BatchScript@1
+    displayName: 'Install CUDA'
+    inputs:
+      filename: 'packaging/windows/internal/cuda_install.bat'
+
+      modifyEnvironment: true
diff --git a/packaging/windows/templates/setup_nightly_variables.yml b/packaging/windows/templates/setup_nightly_variables.yml
new file mode 100644
index 00000000000..94b2fe934ce
--- /dev/null
+++ b/packaging/windows/templates/setup_nightly_variables.yml
@@ -0,0 +1,11 @@
+parameters:
+  package: ''
+
+steps:
+- task: BatchScript@1
+  displayName: 'Setup nightly variables'
+  inputs:
+    filename: 'packaging/windows/internal/nightly_defaults.bat'
+    arguments: ${{ parameters.package }}
+
+    modifyEnvironment: true
diff --git a/packaging/windows/templates/upload_to_conda.yml b/packaging/windows/templates/upload_to_conda.yml
new file mode 100644
index 00000000000..dc172bcf878
--- /dev/null
+++ b/packaging/windows/templates/upload_to_conda.yml
@@ -0,0 +1,10 @@
+parameters:
+  user: ''
+  pass: ''
+
+steps:
+- script: 'call packaging/windows/internal/upload.bat'
+  displayName: 'Upload packages to Anaconda Cloud'
+  env:
+    PYTORCH_ANACONDA_USERNAME: ${{ parameters.user }}
+    PYTORCH_ANACONDA_PASSWORD: ${{ parameters.pass }}
diff --git a/packaging/windows/templates/upload_to_s3.yml b/packaging/windows/templates/upload_to_s3.yml
new file mode 100644
index 00000000000..a31bcb15ae1
--- /dev/null
+++ b/packaging/windows/templates/upload_to_s3.yml
@@ -0,0 +1,15 @@
+parameters:
+  cuVer: ''
+  cudaVer: ''
+
+steps:
+- task: AmazonWebServices.aws-vsts-tools.S3Upload.S3Upload@1
+  displayName: 'Upload ${{ parameters.cuVer }} wheel to S3'
+  inputs:
+    awsCredentials: 'Pytorch S3 bucket'
+    bucketName: 'pytorch'
+    sourceFolder: 'packaging/windows/output/${{ parameters.cudaVer }}'
+    globExpressions: '*.whl'
+    targetFolder: 'whl/nightly/${{ parameters.cuVer }}/'
+    filesAcl: 'public-read'
+    flattenFolders: 'true'
diff --git a/packaging/windows/templates/vsts_auth.yml b/packaging/windows/templates/vsts_auth.yml
new file mode 100644
index 00000000000..fde767d7f12
--- /dev/null
+++ b/packaging/windows/templates/vsts_auth.yml
@@ -0,0 +1,8 @@
+parameters:
+  auth: ''
+
+steps:
+- script: 'call packaging/windows/internal/auth.bat'
+  displayName: 'Sign in to Azure Pipelines'
+  env:
+    VSTS_AUTH: ${{ parameters.auth }}

From d7e88fb2625f0f427e02ea7d60a90a9fa02f9126 Mon Sep 17 00:00:00 2001
From: peterjc123 <peterghost86@gmail.com>
Date: Wed, 18 Sep 2019 22:08:07 +0800
Subject: [PATCH 18/26] Fix Windows CI (#1347)

* Fix Windows CI

* Use correct version
---
 .circleci/config.yml    | 2 +-
 .circleci/config.yml.in | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 0cbbef4fdcd..b112306b6fd 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -176,7 +176,7 @@ jobs:
           command: |
             choco install miniconda3
             $env:PATH = "C:\tools\miniconda3;C:\tools\miniconda3\Library\usr\bin;C:\tools\miniconda3\Scripts;C:\tools\miniconda3\bin" + $env:PATH
-            conda install -yq conda-build
+            conda install -yq conda-build "conda-package-handling<1.5.0"
             bash packaging/build_conda.sh
           shell: powershell.exe
 
diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
index fe094ebcd1c..52be9eef8fc 100644
--- a/.circleci/config.yml.in
+++ b/.circleci/config.yml.in
@@ -176,7 +176,7 @@ jobs:
           command: |
             choco install miniconda3
             $env:PATH = "C:\tools\miniconda3;C:\tools\miniconda3\Library\usr\bin;C:\tools\miniconda3\Scripts;C:\tools\miniconda3\bin" + $env:PATH
-            conda install -yq conda-build
+            conda install -yq conda-build "conda-package-handling<1.5.0"
             bash packaging/build_conda.sh
           shell: powershell.exe
 

From 5d5d425d6bd1ce403f03f4fd21e9a6ce95794040 Mon Sep 17 00:00:00 2001
From: Francisco Massa <fvsmassa@gmail.com>
Date: Wed, 18 Sep 2019 14:38:09 -0300
Subject: [PATCH 19/26] Fix anchor dtype in AnchorGenerator (#1341)

* Make AnchorGenerator support half precision

* Add test for fasterrcnn with double

* convert gt_boxes to right dtype
---
 test/test_models.py                       | 14 ++++++++++++++
 torchvision/models/detection/roi_heads.py |  3 ++-
 torchvision/models/detection/rpn.py       | 12 +++++++-----
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/test/test_models.py b/test/test_models.py
index 4cc1d6a4bad..8f9a87bfb8d 100644
--- a/test/test_models.py
+++ b/test/test_models.py
@@ -146,6 +146,20 @@ def test_mobilenetv2_residual_setting(self):
         out = model(x)
         self.assertEqual(out.shape[-1], 1000)
 
+    def test_fasterrcnn_double(self):
+        model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, pretrained_backbone=False)
+        model.double()
+        model.eval()
+        input_shape = (3, 300, 300)
+        x = torch.rand(input_shape, dtype=torch.float64)
+        model_input = [x]
+        out = model(model_input)
+        self.assertIs(model_input[0], x)
+        self.assertEqual(len(out), 1)
+        self.assertTrue("boxes" in out[0])
+        self.assertTrue("scores" in out[0])
+        self.assertTrue("labels" in out[0])
+
 
 for model_name in get_available_classification_models():
     # for-loop bodies don't define scopes, so we have to save the variables
diff --git a/torchvision/models/detection/roi_heads.py b/torchvision/models/detection/roi_heads.py
index beea0062f3a..7babaf0b891 100644
--- a/torchvision/models/detection/roi_heads.py
+++ b/torchvision/models/detection/roi_heads.py
@@ -444,7 +444,8 @@ def check_targets(self, targets):
 
     def select_training_samples(self, proposals, targets):
         self.check_targets(targets)
-        gt_boxes = [t["boxes"] for t in targets]
+        dtype = proposals[0].dtype
+        gt_boxes = [t["boxes"].to(dtype) for t in targets]
         gt_labels = [t["labels"] for t in targets]
 
         # append ground-truth bboxes to propos
diff --git a/torchvision/models/detection/rpn.py b/torchvision/models/detection/rpn.py
index 231d9edaeac..aa080984d2a 100644
--- a/torchvision/models/detection/rpn.py
+++ b/torchvision/models/detection/rpn.py
@@ -49,9 +49,9 @@ def __init__(
         self._cache = {}
 
     @staticmethod
-    def generate_anchors(scales, aspect_ratios, device="cpu"):
-        scales = torch.as_tensor(scales, dtype=torch.float32, device=device)
-        aspect_ratios = torch.as_tensor(aspect_ratios, dtype=torch.float32, device=device)
+    def generate_anchors(scales, aspect_ratios, dtype=torch.float32, device="cpu"):
+        scales = torch.as_tensor(scales, dtype=dtype, device=device)
+        aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)
         h_ratios = torch.sqrt(aspect_ratios)
         w_ratios = 1 / h_ratios
 
@@ -61,13 +61,14 @@ def generate_anchors(scales, aspect_ratios, device="cpu"):
         base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2
         return base_anchors.round()
 
-    def set_cell_anchors(self, device):
+    def set_cell_anchors(self, dtype, device):
         if self.cell_anchors is not None:
             return self.cell_anchors
         cell_anchors = [
             self.generate_anchors(
                 sizes,
                 aspect_ratios,
+                dtype,
                 device
             )
             for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios)
@@ -114,7 +115,8 @@ def forward(self, image_list, feature_maps):
         grid_sizes = tuple([feature_map.shape[-2:] for feature_map in feature_maps])
         image_size = image_list.tensors.shape[-2:]
         strides = tuple((image_size[0] / g[0], image_size[1] / g[1]) for g in grid_sizes)
-        self.set_cell_anchors(feature_maps[0].device)
+        dtype, device = feature_maps[0].dtype, feature_maps[0].device
+        self.set_cell_anchors(dtype, device)
         anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides)
         anchors = []
         for i, (image_height, image_width) in enumerate(image_list.image_sizes):

From 0dd5588207b6c0f9b3b129af7d73088e8157a153 Mon Sep 17 00:00:00 2001
From: Francisco Massa <fvsmassa@gmail.com>
Date: Wed, 18 Sep 2019 17:19:41 -0300
Subject: [PATCH 20/26] Add TorchHub tests to torchvision (#1319)

* Add Hub tests in torchvision

* Run test_hub in its own interpreter
---
 .travis.yml      |  1 +
 test/test_hub.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 test/test_hub.py

diff --git a/.travis.yml b/.travis.yml
index 76080f7138f..be331bb0bed 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -76,6 +76,7 @@ install:
 
 script:
   - pytest --cov-config .coveragerc --cov torchvision --cov $TV_INSTALL_PATH test
+  - pytest test/test_hub.py
 
 after_success:
   # Necessary to run coverage combine to rewrite paths from
diff --git a/test/test_hub.py b/test/test_hub.py
new file mode 100644
index 00000000000..2f0ddfb2537
--- /dev/null
+++ b/test/test_hub.py
@@ -0,0 +1,56 @@
+import torch.hub as hub
+import tempfile
+import shutil
+import os
+import sys
+import unittest
+
+
+def sum_of_model_parameters(model):
+    s = 0
+    for p in model.parameters():
+        s += p.sum()
+    return s
+
+
+SUM_OF_PRETRAINED_RESNET18_PARAMS = -12703.99609375
+
+
+@unittest.skipIf('torchvision' in sys.modules,
+                 'TestHub must start without torchvision imported')
+class TestHub(unittest.TestCase):
+    # Only run this check ONCE before all tests start.
+    # - If torchvision is imported before all tests start, e.g. we might find _C.so
+    #   which doesn't exist in downloaded zip but in the installed wheel.
+    # - After the first test is run, torchvision is already in sys.modules due to
+    #   Python cache as we run all hub tests in the same python process.
+
+    def test_load_from_github(self):
+        hub_model = hub.load(
+            'pytorch/vision',
+            'resnet18',
+            pretrained=True,
+            progress=False)
+        self.assertEqual(sum_of_model_parameters(hub_model).item(),
+                         SUM_OF_PRETRAINED_RESNET18_PARAMS)
+
+    def test_set_dir(self):
+        temp_dir = tempfile.gettempdir()
+        hub.set_dir(temp_dir)
+        hub_model = hub.load(
+            'pytorch/vision',
+            'resnet18',
+            pretrained=True,
+            progress=False)
+        self.assertEqual(sum_of_model_parameters(hub_model).item(),
+                         SUM_OF_PRETRAINED_RESNET18_PARAMS)
+        assert os.path.exists(temp_dir + '/pytorch_vision_master')
+        shutil.rmtree(temp_dir + '/pytorch_vision_master')
+
+    def test_list_entrypoints(self):
+        entry_lists = hub.list('pytorch/vision', force_reload=True)
+        self.assertIn('resnet18', entry_lists)
+
+
+if __name__ == "__main__":
+    unittest.main()

From f677ea31db8f45dbfec2fe5e519da82853815776 Mon Sep 17 00:00:00 2001
From: Francisco Massa <fvsmassa@gmail.com>
Date: Wed, 18 Sep 2019 18:20:08 -0300
Subject: [PATCH 21/26] Remove cpp extensions in favor of torch ops (#1348)

* Remove C++ extensions in favor of custom ops

* Remove unused custom_ops.cpp file

* Rename _custom_ops.py

* Reorganize functions

* Minor improvements and fixes

* Fix lint

* Fully scriptable ops

* Import types used by annotations
---
 setup.py                                      |  22 +--
 test/test_ops.py                              |   8 +-
 torchvision/__init__.py                       |   2 +
 torchvision/csrc/ROIAlign.h                   |  71 ++++++++
 torchvision/csrc/ROIPool.h                    |  64 ++++++-
 torchvision/csrc/custom_ops/custom_ops.cpp    | 159 ------------------
 torchvision/csrc/vision.cpp                   |  46 +++--
 torchvision/extension.py                      |  40 +++--
 torchvision/ops/__init__.py                   |   4 +
 .../{_custom_ops.py => _register_onnx_ops.py} |  13 +-
 torchvision/ops/_utils.py                     |  18 +-
 torchvision/ops/boxes.py                      |   2 -
 torchvision/ops/roi_align.py                  |  10 +-
 torchvision/ops/roi_pool.py                   |  10 +-
 14 files changed, 230 insertions(+), 239 deletions(-)
 delete mode 100644 torchvision/csrc/custom_ops/custom_ops.cpp
 rename torchvision/ops/{_custom_ops.py => _register_onnx_ops.py} (87%)

diff --git a/setup.py b/setup.py
index e9b4627ec77..74d39697513 100644
--- a/setup.py
+++ b/setup.py
@@ -52,9 +52,9 @@ def write_version_file():
     with open(version_path, 'w') as f:
         f.write("__version__ = '{}'\n".format(version))
         f.write("git_version = {}\n".format(repr(sha)))
-        f.write("from torchvision import _C\n")
-        f.write("if hasattr(_C, 'CUDA_VERSION'):\n")
-        f.write("    cuda = _C.CUDA_VERSION\n")
+        f.write("from torchvision.extension import _check_cuda_version\n")
+        f.write("if _check_cuda_version() > 0:\n")
+        f.write("    cuda = _check_cuda_version()\n")
 
 
 write_version_file()
@@ -96,21 +96,12 @@ def get_extensions():
     source_models = [os.path.join(models_dir, s) for s in source_models]
     tests = test_file + source_models
 
-    custom_ops_sources = [os.path.join(extensions_dir, "custom_ops", "custom_ops.cpp"),
-                          os.path.join(extensions_dir, "cpu", "nms_cpu.cpp"),
-                          os.path.join(extensions_dir, "cpu", "ROIAlign_cpu.cpp"),
-                          os.path.join(extensions_dir, "cpu", "ROIPool_cpu.cpp")]
-    custom_ops_sources_cuda = [os.path.join(extensions_dir, "cuda", "nms_cuda.cu"),
-                               os.path.join(extensions_dir, "cuda", "ROIAlign_cuda.cu"),
-                               os.path.join(extensions_dir, "cuda", "ROIPool_cuda.cu")]
-
     define_macros = []
 
     extra_compile_args = {}
     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1':
         extension = CUDAExtension
         sources += source_cuda
-        custom_ops_sources += custom_ops_sources_cuda
         define_macros += [('WITH_CUDA', None)]
         nvcc_flags = os.getenv('NVCC_FLAGS', '')
         if nvcc_flags == '':
@@ -148,13 +139,6 @@ def get_extensions():
             define_macros=define_macros,
             extra_compile_args=extra_compile_args,
         ),
-        extension(
-            "torchvision._custom_ops",
-            sources=custom_ops_sources,
-            include_dirs=include_dirs,
-            define_macros=define_macros,
-            extra_compile_args=extra_compile_args,
-        ),
     ]
 
     return ext_modules
diff --git a/test/test_ops.py b/test/test_ops.py
index 7db8c6981d0..b7d41e8e6c3 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -190,7 +190,7 @@ def func(input):
 
         @torch.jit.script
         def script_func(input, rois):
-            return torch.ops.torchvision.roi_pool(input, rois, 1.0, 5, 5)[0]
+            return ops.roi_pool(input, rois, 5, 1.0)[0]
 
         assert gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_pool'
 
@@ -282,7 +282,7 @@ def func(input):
 
         @torch.jit.script
         def script_func(input, rois):
-            return torch.ops.torchvision.roi_pool(input, rois, 1.0, 5, 5)[0]
+            return ops.roi_pool(input, rois, 5, 1.0)[0]
 
         assert gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_pool on CUDA'
 
@@ -442,7 +442,7 @@ def func(input):
 
         @torch.jit.script
         def script_func(input, rois):
-            return torch.ops.torchvision.roi_align(input, rois, 0.5, 5, 5, 1)[0]
+            return ops.roi_align(input, rois, 5, 0.5, 1)[0]
 
         assert gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_align'
 
@@ -482,7 +482,7 @@ def func(input):
 
         @torch.jit.script
         def script_func(input, rois):
-            return torch.ops.torchvision.roi_align(input, rois, 0.5, 5, 5, 1)[0]
+            return ops.roi_align(input, rois, 5, 0.5, 1)[0]
 
         assert gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_align on CUDA'
 
diff --git a/torchvision/__init__.py b/torchvision/__init__.py
index 297aca2b228..84dbe4fa1ee 100644
--- a/torchvision/__init__.py
+++ b/torchvision/__init__.py
@@ -5,6 +5,8 @@
 from torchvision import utils
 from torchvision import io
 
+from .extension import _HAS_OPS
+
 try:
     from .version import __version__  # noqa: F401
 except ImportError:
diff --git a/torchvision/csrc/ROIAlign.h b/torchvision/csrc/ROIAlign.h
index 7e18cf68f57..765d4879d99 100644
--- a/torchvision/csrc/ROIAlign.h
+++ b/torchvision/csrc/ROIAlign.h
@@ -74,3 +74,74 @@ at::Tensor ROIAlign_backward(
       width,
       sampling_ratio);
 }
+
+using namespace at;
+using torch::Tensor;
+using torch::autograd::AutogradContext;
+using torch::autograd::Variable;
+using torch::autograd::variable_list;
+
+class ROIAlignFunction : public torch::autograd::Function<ROIAlignFunction> {
+ public:
+  static variable_list forward(
+      AutogradContext* ctx,
+      Variable input,
+      Variable rois,
+      const double spatial_scale,
+      const int64_t pooled_height,
+      const int64_t pooled_width,
+      const int64_t sampling_ratio) {
+    ctx->saved_data["spatial_scale"] = spatial_scale;
+    ctx->saved_data["pooled_height"] = pooled_height;
+    ctx->saved_data["pooled_width"] = pooled_width;
+    ctx->saved_data["sampling_ratio"] = sampling_ratio;
+    ctx->saved_data["input_shape"] = input.sizes();
+    ctx->save_for_backward({rois});
+    auto result = ROIAlign_forward(
+        input,
+        rois,
+        spatial_scale,
+        pooled_height,
+        pooled_width,
+        sampling_ratio);
+    return {result};
+  }
+
+  static variable_list backward(
+      AutogradContext* ctx,
+      variable_list grad_output) {
+    // Use data saved in forward
+    auto saved = ctx->get_saved_variables();
+    auto rois = saved[0];
+    auto input_shape = ctx->saved_data["input_shape"].toIntList();
+    auto grad_in = ROIAlign_backward(
+        grad_output[0],
+        rois,
+        ctx->saved_data["spatial_scale"].toDouble(),
+        ctx->saved_data["pooled_height"].toInt(),
+        ctx->saved_data["pooled_width"].toInt(),
+        input_shape[0],
+        input_shape[1],
+        input_shape[2],
+        input_shape[3],
+        ctx->saved_data["sampling_ratio"].toInt());
+    return {
+        grad_in, Variable(), Variable(), Variable(), Variable(), Variable()};
+  }
+};
+
+Tensor roi_align(
+    const Tensor& input,
+    const Tensor& rois,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t sampling_ratio) {
+  return ROIAlignFunction::apply(
+      input,
+      rois,
+      spatial_scale,
+      pooled_height,
+      pooled_width,
+      sampling_ratio)[0];
+}
diff --git a/torchvision/csrc/ROIPool.h b/torchvision/csrc/ROIPool.h
index 7aefcc5e810..79b40293176 100644
--- a/torchvision/csrc/ROIPool.h
+++ b/torchvision/csrc/ROIPool.h
@@ -63,4 +63,66 @@ at::Tensor ROIPool_backward(
       channels,
       height,
       width);
-}
\ No newline at end of file
+}
+
+using namespace at;
+using torch::Tensor;
+using torch::autograd::AutogradContext;
+using torch::autograd::Variable;
+using torch::autograd::variable_list;
+
+class ROIPoolFunction : public torch::autograd::Function<ROIPoolFunction> {
+ public:
+  static variable_list forward(
+      AutogradContext* ctx,
+      Variable input,
+      Variable rois,
+      const double spatial_scale,
+      const int64_t pooled_height,
+      const int64_t pooled_width) {
+    ctx->saved_data["spatial_scale"] = spatial_scale;
+    ctx->saved_data["pooled_height"] = pooled_height;
+    ctx->saved_data["pooled_width"] = pooled_width;
+    ctx->saved_data["input_shape"] = input.sizes();
+    auto result = ROIPool_forward(
+        input, rois, spatial_scale, pooled_height, pooled_width);
+    auto output = std::get<0>(result);
+    auto argmax = std::get<1>(result);
+    ctx->save_for_backward({rois, argmax});
+    ctx->mark_non_differentiable({argmax});
+    return {output, argmax};
+  }
+
+  static variable_list backward(
+      AutogradContext* ctx,
+      variable_list grad_output) {
+    // Use data saved in forward
+    auto saved = ctx->get_saved_variables();
+    auto rois = saved[0];
+    auto argmax = saved[1];
+    auto input_shape = ctx->saved_data["input_shape"].toIntList();
+    auto grad_in = ROIPool_backward(
+        grad_output[0],
+        rois,
+        argmax,
+        ctx->saved_data["spatial_scale"].toDouble(),
+        ctx->saved_data["pooled_height"].toInt(),
+        ctx->saved_data["pooled_width"].toInt(),
+        input_shape[0],
+        input_shape[1],
+        input_shape[2],
+        input_shape[3]);
+    return {grad_in, Variable(), Variable(), Variable(), Variable()};
+  }
+};
+
+std::tuple<Tensor, Tensor> roi_pool(
+    const Tensor& input,
+    const Tensor& rois,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width) {
+  auto result = ROIPoolFunction::apply(
+      input, rois, spatial_scale, pooled_height, pooled_width);
+  return std::tuple<Tensor, Tensor>(result[0], result[1]);
+}
diff --git a/torchvision/csrc/custom_ops/custom_ops.cpp b/torchvision/csrc/custom_ops/custom_ops.cpp
deleted file mode 100644
index e3b7bc9f0f0..00000000000
--- a/torchvision/csrc/custom_ops/custom_ops.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-#include <Python.h>
-#include <torch/script.h>
-
-#include "ROIAlign.h"
-#include "ROIPool.h"
-#include "nms.h"
-
-using namespace at;
-
-// If we are in a Windows environment, we need to define
-// initialization functions for the _custom_ops extension
-#ifdef _WIN32
-#if PY_MAJOR_VERSION < 3
-PyMODINIT_FUNC init_custom_ops(void) {
-  // No need to do anything.
-  // _custom_ops.py will run on load
-  return NULL;
-}
-#else
-PyMODINIT_FUNC PyInit__custom_ops(void) {
-  // No need to do anything.
-  // _custom_ops.py will run on load
-  return NULL;
-}
-#endif
-#endif
-
-using torch::Tensor;
-using torch::autograd::AutogradContext;
-using torch::autograd::Variable;
-using torch::autograd::variable_list;
-
-class ROIAlignFunction : public torch::autograd::Function<ROIAlignFunction> {
- public:
-  static variable_list forward(
-      AutogradContext* ctx,
-      Variable input,
-      Variable rois,
-      const double spatial_scale,
-      const int64_t pooled_height,
-      const int64_t pooled_width,
-      const int64_t sampling_ratio) {
-    ctx->saved_data["spatial_scale"] = spatial_scale;
-    ctx->saved_data["pooled_height"] = pooled_height;
-    ctx->saved_data["pooled_width"] = pooled_width;
-    ctx->saved_data["sampling_ratio"] = sampling_ratio;
-    ctx->saved_data["input_shape"] = input.sizes();
-    ctx->save_for_backward({rois});
-    auto result = ROIAlign_forward(
-        input,
-        rois,
-        spatial_scale,
-        pooled_height,
-        pooled_width,
-        sampling_ratio);
-    return {result};
-  }
-
-  static variable_list backward(
-      AutogradContext* ctx,
-      variable_list grad_output) {
-    // Use data saved in forward
-    auto saved = ctx->get_saved_variables();
-    auto rois = saved[0];
-    auto input_shape = ctx->saved_data["input_shape"].toIntList();
-    auto grad_in = ROIAlign_backward(
-        grad_output[0],
-        rois,
-        ctx->saved_data["spatial_scale"].toDouble(),
-        ctx->saved_data["pooled_height"].toInt(),
-        ctx->saved_data["pooled_width"].toInt(),
-        input_shape[0],
-        input_shape[1],
-        input_shape[2],
-        input_shape[3],
-        ctx->saved_data["sampling_ratio"].toInt());
-    return {
-        grad_in, Variable(), Variable(), Variable(), Variable(), Variable()};
-  }
-};
-
-Tensor roi_align(
-    const Tensor& input,
-    const Tensor& rois,
-    const double spatial_scale,
-    const int64_t pooled_height,
-    const int64_t pooled_width,
-    const int64_t sampling_ratio) {
-  return ROIAlignFunction::apply(
-      input,
-      rois,
-      spatial_scale,
-      pooled_height,
-      pooled_width,
-      sampling_ratio)[0];
-}
-
-class ROIPoolFunction : public torch::autograd::Function<ROIPoolFunction> {
- public:
-  static variable_list forward(
-      AutogradContext* ctx,
-      Variable input,
-      Variable rois,
-      const double spatial_scale,
-      const int64_t pooled_height,
-      const int64_t pooled_width) {
-    ctx->saved_data["spatial_scale"] = spatial_scale;
-    ctx->saved_data["pooled_height"] = pooled_height;
-    ctx->saved_data["pooled_width"] = pooled_width;
-    ctx->saved_data["input_shape"] = input.sizes();
-    auto result = ROIPool_forward(
-        input, rois, spatial_scale, pooled_height, pooled_width);
-    auto output = std::get<0>(result);
-    auto argmax = std::get<1>(result);
-    ctx->save_for_backward({rois, argmax});
-    ctx->mark_non_differentiable({argmax});
-    return {output, argmax};
-  }
-
-  static variable_list backward(
-      AutogradContext* ctx,
-      variable_list grad_output) {
-    // Use data saved in forward
-    auto saved = ctx->get_saved_variables();
-    auto rois = saved[0];
-    auto argmax = saved[1];
-    auto input_shape = ctx->saved_data["input_shape"].toIntList();
-    auto grad_in = ROIPool_backward(
-        grad_output[0],
-        rois,
-        argmax,
-        ctx->saved_data["spatial_scale"].toDouble(),
-        ctx->saved_data["pooled_height"].toInt(),
-        ctx->saved_data["pooled_width"].toInt(),
-        input_shape[0],
-        input_shape[1],
-        input_shape[2],
-        input_shape[3]);
-    return {grad_in, Variable(), Variable(), Variable(), Variable()};
-  }
-};
-
-std::tuple<Tensor, Tensor> roi_pool(
-    const Tensor& input,
-    const Tensor& rois,
-    const double spatial_scale,
-    const int64_t pooled_height,
-    const int64_t pooled_width) {
-  auto result = ROIPoolFunction::apply(
-      input, rois, spatial_scale, pooled_height, pooled_width);
-  return std::tuple<Tensor, Tensor>(result[0], result[1]);
-}
-
-static auto registry =
-    torch::RegisterOperators()
-        .op("torchvision::nms", &nms)
-        .op("torchvision::roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio) -> Tensor",
-            &roi_align)
-        .op("torchvision::roi_pool", &roi_pool);
diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp
index 61a4eeee727..243e2e87ff9 100644
--- a/torchvision/csrc/vision.cpp
+++ b/torchvision/csrc/vision.cpp
@@ -1,20 +1,44 @@
+#include <Python.h>
+#include <torch/script.h>
+
+#ifdef WITH_CUDA
+#include <cuda.h>
+#endif
+
 #include "ROIAlign.h"
 #include "ROIPool.h"
 #include "nms.h"
 
-#ifdef WITH_CUDA
-#include <cuda.h>
+// If we are in a Windows environment, we need to define
+// initialization functions for the _custom_ops extension
+#ifdef _WIN32
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC init_custom_ops(void) {
+  // No need to do anything.
+  // _custom_ops.py will run on load
+  return NULL;
+}
+#else
+PyMODINIT_FUNC PyInit__custom_ops(void) {
+  // No need to do anything.
+  // _custom_ops.py will run on load
+  return NULL;
+}
+#endif
 #endif
 
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  // TODO: remove nms from here since it is now registered
-  //       and used as a PyTorch custom op
-  m.def("nms", &nms, "non-maximum suppression");
-  m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
-  m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
-  m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
-  m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
+int64_t _cuda_version() {
 #ifdef WITH_CUDA
-  m.attr("CUDA_VERSION") = CUDA_VERSION;
+  return CUDA_VERSION;
+#else
+  return -1;
 #endif
 }
+
+static auto registry =
+    torch::RegisterOperators()
+        .op("torchvision::nms", &nms)
+        .op("torchvision::roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio) -> Tensor",
+            &roi_align)
+        .op("torchvision::roi_pool", &roi_pool)
+        .op("torchvision::_cuda_version", &_cuda_version);
diff --git a/torchvision/extension.py b/torchvision/extension.py
index b872af6ee74..790bca0d1f4 100644
--- a/torchvision/extension.py
+++ b/torchvision/extension.py
@@ -1,19 +1,34 @@
-_C = None
+_HAS_OPS = False
 
 
-def _lazy_import():
+def _register_extensions():
+    import os
+    import imp
+    import torch
+
+    # load the custom_op_library and register the custom ops
+    lib_dir = os.path.dirname(__file__)
+    _, path, _ = imp.find_module("_C", [lib_dir])
+    torch.ops.load_library(path)
+
+
+try:
+    _register_extensions()
+    _HAS_OPS = True
+except (ImportError, OSError):
+    pass
+
+
+def _check_cuda_version():
     """
     Make sure that CUDA versions match between the pytorch install and torchvision install
     """
-    global _C
-    if _C is not None:
-        return _C
+    if not _HAS_OPS:
+        return -1
     import torch
-    from torchvision import _C as C
-    import torchvision.ops._custom_ops
-    _C = C
-    if hasattr(_C, "CUDA_VERSION") and torch.version.cuda is not None:
-        tv_version = str(_C.CUDA_VERSION)
+    _version = torch.ops.torchvision._cuda_version()
+    if _version != -1 and torch.version.cuda is not None:
+        tv_version = str(_version)
         if int(tv_version) < 10000:
             tv_major = int(tv_version[0])
             tv_minor = int(tv_version[2])
@@ -29,4 +44,7 @@ def _lazy_import():
                                "PyTorch has CUDA Version={}.{} and torchvision has CUDA Version={}.{}. "
                                "Please reinstall the torchvision that matches your PyTorch install."
                                .format(t_major, t_minor, tv_major, tv_minor))
-    return _C
+    return _version
+
+
+_check_cuda_version()
diff --git a/torchvision/ops/__init__.py b/torchvision/ops/__init__.py
index fbd1181929b..a05c754989f 100644
--- a/torchvision/ops/__init__.py
+++ b/torchvision/ops/__init__.py
@@ -4,6 +4,10 @@
 from .poolers import MultiScaleRoIAlign
 from .feature_pyramid_network import FeaturePyramidNetwork
 
+from ._register_onnx_ops import _register_custom_op
+
+_register_custom_op()
+
 
 __all__ = [
     'nms', 'roi_align', 'RoIAlign', 'roi_pool', 'RoIPool',
diff --git a/torchvision/ops/_custom_ops.py b/torchvision/ops/_register_onnx_ops.py
similarity index 87%
rename from torchvision/ops/_custom_ops.py
rename to torchvision/ops/_register_onnx_ops.py
index 70108bdf152..cc30ad81db6 100644
--- a/torchvision/ops/_custom_ops.py
+++ b/torchvision/ops/_register_onnx_ops.py
@@ -1,16 +1,8 @@
-import os
 import sys
-import imp
 import torch
 
 
-# load the custom_op_library and register the custom ops
-lib_dir = os.path.join(os.path.dirname(__file__), '..')
-file, path, description = imp.find_module("_custom_ops", [lib_dir])
-torch.ops.load_library(path)
-
-
-def register_custom_op():
+def _register_custom_op():
     from torch.onnx.symbolic_helper import parse_args, scalar_type_to_onnx
     from torch.onnx.symbolic_opset9 import select, unsqueeze, squeeze, _cast_Long, reshape
 
@@ -41,6 +33,3 @@ def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width):
     register_custom_op_symbolic('torchvision::nms', symbolic_multi_label_nms, 10)
     register_custom_op_symbolic('torchvision::roi_align', roi_align, 10)
     register_custom_op_symbolic('torchvision::roi_pool', roi_pool, 10)
-
-
-register_custom_op()
diff --git a/torchvision/ops/_utils.py b/torchvision/ops/_utils.py
index 67f0ea4feeb..269abaf7db3 100644
--- a/torchvision/ops/_utils.py
+++ b/torchvision/ops/_utils.py
@@ -1,24 +1,26 @@
 import torch
+from torch import Tensor
+from torch.jit.annotations import List
 
 
 def _cat(tensors, dim=0):
+    # type: (List[Tensor], int) -> Tensor
     """
     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
     """
-    assert isinstance(tensors, (list, tuple))
+    # TODO add back the assert
+    # assert isinstance(tensors, (list, tuple))
     if len(tensors) == 1:
         return tensors[0]
     return torch.cat(tensors, dim)
 
 
 def convert_boxes_to_roi_format(boxes):
+    # type: (List[Tensor]) -> Tensor
     concat_boxes = _cat([b for b in boxes], dim=0)
-    ids = _cat(
-        [
-            torch.full_like(b[:, :1], i)
-            for i, b in enumerate(boxes)
-        ],
-        dim=0,
-    )
+    temp = []
+    for i, b in enumerate(boxes):
+        temp.append(torch.full_like(b[:, :1], i))
+    ids = _cat(temp, dim=0)
     rois = torch.cat([ids, concat_boxes], dim=1)
     return rois
diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py
index 3e773a02f8b..239a2446e22 100644
--- a/torchvision/ops/boxes.py
+++ b/torchvision/ops/boxes.py
@@ -1,5 +1,4 @@
 import torch
-from torchvision.extension import _lazy_import
 
 
 def nms(boxes, scores, iou_threshold):
@@ -29,7 +28,6 @@ def nms(boxes, scores, iou_threshold):
         of the elements that have been kept
         by NMS, sorted in decreasing order of scores
     """
-    _lazy_import()
     return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
 
 
diff --git a/torchvision/ops/roi_align.py b/torchvision/ops/roi_align.py
index 3038fb0dca0..abba99d420a 100644
--- a/torchvision/ops/roi_align.py
+++ b/torchvision/ops/roi_align.py
@@ -1,16 +1,14 @@
 import torch
-from torch import nn
-
-from torch.autograd import Function
-from torch.autograd.function import once_differentiable
+from torch import nn, Tensor
 
 from torch.nn.modules.utils import _pair
+from torch.jit.annotations import List
 
-from torchvision.extension import _lazy_import
 from ._utils import convert_boxes_to_roi_format
 
 
 def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
+    # type: (Tensor, Tensor, int, float, int) -> Tensor
     """
     Performs Region of Interest (RoI) Align operator described in Mask R-CNN
 
@@ -35,9 +33,9 @@ def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
         output (Tensor[K, C, output_size[0], output_size[1]])
     """
     rois = boxes
+    output_size = _pair(output_size)
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
-    _lazy_import()
     return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
                                            output_size[0], output_size[1],
                                            sampling_ratio)
diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py
index 6a9eaf6fdd9..50381c4ff2f 100644
--- a/torchvision/ops/roi_pool.py
+++ b/torchvision/ops/roi_pool.py
@@ -1,16 +1,14 @@
 import torch
-from torch import nn
-
-from torch.autograd import Function
-from torch.autograd.function import once_differentiable
+from torch import nn, Tensor
 
 from torch.nn.modules.utils import _pair
+from torch.jit.annotations import List
 
-from torchvision.extension import _lazy_import
 from ._utils import convert_boxes_to_roi_format
 
 
 def roi_pool(input, boxes, output_size, spatial_scale=1.0):
+    # type: (Tensor, Tensor, int, float) -> Tensor
     """
     Performs Region of Interest (RoI) Pool operator described in Fast R-CNN
 
@@ -30,9 +28,9 @@ def roi_pool(input, boxes, output_size, spatial_scale=1.0):
         output (Tensor[K, C, output_size[0], output_size[1]])
     """
     rois = boxes
+    output_size = _pair(output_size)
     if not isinstance(rois, torch.Tensor):
         rois = convert_boxes_to_roi_format(rois)
-    _lazy_import()
     output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale,
                                                output_size[0], output_size[1])
     return output

From 718fc4048fe43c46bce4f144bec597f50d638675 Mon Sep 17 00:00:00 2001
From: zyan3 <zyan3@devgpu002.prn1.facebook.com>
Date: Thu, 19 Sep 2019 12:58:02 -0700
Subject: [PATCH 22/26] fix linting

---
 test/test_transforms_video.py              |  4 +---
 torchvision/transforms/functional_video.py |  2 +-
 torchvision/transforms/transforms_video.py | 13 ++++++++-----
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/test/test_transforms_video.py b/test/test_transforms_video.py
index 5028ef675f3..b0a237e9318 100644
--- a/test/test_transforms_video.py
+++ b/test/test_transforms_video.py
@@ -91,7 +91,6 @@ def test_center_crop_video(self):
         self.assertTrue(sum2.item() > 1, msg)
         self.assertTrue(sum2.item() > sum1.item(), msg)
 
-
     @unittest.skipIf(stats is None, 'scipy.stats is not available')
     def test_normalize_video(self):
         def samples_from_standard_normal(tensor):
@@ -113,7 +112,6 @@ def samples_from_standard_normal(tensor):
             assert samples_from_standard_normal(normalized)
         random.setstate(random_state)
 
-
         # Checking the optional in-place behaviour
         tensor = torch.rand((3, 128, 16, 16))
         tensor_inplace = transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)(tensor)
@@ -122,7 +120,6 @@ def samples_from_standard_normal(tensor):
         transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True).__repr__()
 
     def test_to_tensor_video(self):
-        test_channels = [1, 3, 4]
         numFrames, height, width = 64, 4, 4
         trans = transforms.ToTensorVideo()
 
@@ -169,5 +166,6 @@ def test_random_horizontal_flip_video(self):
 
         transforms.RandomHorizontalFlipVideo().__repr__()
 
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/torchvision/transforms/functional_video.py b/torchvision/transforms/functional_video.py
index 627c0e3b0f1..ee7c07c8c6f 100644
--- a/torchvision/transforms/functional_video.py
+++ b/torchvision/transforms/functional_video.py
@@ -17,7 +17,7 @@ def crop(clip, i, j, h, w):
         clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
     """
     assert len(clip.size()) == 4, "clip should be a 4D tensor"
-    return clip[:, :, i : i + h, j : j + w]
+    return clip[:, :, i:i + h, j:j + w]
 
 
 def resize(clip, target_size, interpolation_mode):
diff --git a/torchvision/transforms/transforms_video.py b/torchvision/transforms/transforms_video.py
index bab4811b938..28e1c9a7e23 100644
--- a/torchvision/transforms/transforms_video.py
+++ b/torchvision/transforms/transforms_video.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
 
-import math
 import numbers
 import random
 
@@ -74,9 +73,10 @@ def __call__(self, clip):
         return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode)
 
     def __repr__(self):
-        return self.__class__.__name__ + '(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format(
-            self.size, self.interpolation_mode, self.scale, self.ratio)
-
+        return self.__class__.__name__ + \
+            '(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format(
+                self.size, self.interpolation_mode, self.scale, self.ratio
+            )
 
 
 class CenterCropVideo(object):
@@ -91,13 +91,15 @@ def __call__(self, clip):
         Args:
             clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
         Returns:
-            torch.tensor: central cropping of video clip. Size is (C, T, crop_size, crop_size)
+            torch.tensor: central cropping of video clip. Size is
+            (C, T, crop_size, crop_size)
         """
         return F.center_crop(clip, self.crop_size)
 
     def __repr__(self):
         return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size)
 
+
 class NormalizeVideo(object):
     """
     Normalize the video clip by mean subtraction and division by standard deviation
@@ -145,6 +147,7 @@ def __call__(self, clip):
     def __repr__(self):
         return self.__class__.__name__
 
+
 class RandomHorizontalFlipVideo(object):
     """
     Flip the video clip along the horizonal direction with a given probability

From ba3ef5a0096ca72deb9bd9d613f9294205dcb3ed Mon Sep 17 00:00:00 2001
From: zyan3 <zyan3@devgpu002.prn1.facebook.com>
Date: Fri, 6 Sep 2019 22:51:47 -0700
Subject: [PATCH 23/26] video transforms

---
 test/test_transforms.py                    |   2 +-
 test/test_transforms_video.py              | 173 +++++++++++++++++++++
 torchvision/transforms/__init__.py         |   1 +
 torchvision/transforms/functional_video.py | 100 ++++++++++++
 torchvision/transforms/transforms.py       |  34 ++--
 torchvision/transforms/transforms_video.py | 171 ++++++++++++++++++++
 6 files changed, 463 insertions(+), 18 deletions(-)
 create mode 100644 test/test_transforms_video.py
 create mode 100644 torchvision/transforms/functional_video.py
 create mode 100644 torchvision/transforms/transforms_video.py

diff --git a/test/test_transforms.py b/test/test_transforms.py
index 7e8320d6d6c..e4c0759074c 100644
--- a/test/test_transforms.py
+++ b/test/test_transforms.py
@@ -148,7 +148,7 @@ def test_randomresized_params(self):
             aspect_min = max(round(random.random(), 2), epsilon)
             aspect_ratio_range = (aspect_min, aspect_min + round(random.random(), 2))
             randresizecrop = transforms.RandomResizedCrop(size, scale_range, aspect_ratio_range)
-            i, j, h, w = randresizecrop.get_params(img, scale_range, aspect_ratio_range)
+            i, j, h, w = randresizecrop.get_params(img.size[1], img.size[0], scale_range, aspect_ratio_range)
             aspect_ratio_obtained = w / h
             assert (min(aspect_ratio_range) - epsilon <= aspect_ratio_obtained <= max(aspect_ratio_range) + epsilon or
                     aspect_ratio_obtained == 1.0)
diff --git a/test/test_transforms_video.py b/test/test_transforms_video.py
new file mode 100644
index 00000000000..30370218ddb
--- /dev/null
+++ b/test/test_transforms_video.py
@@ -0,0 +1,173 @@
+from __future__ import division
+import torch
+import torchvision.transforms as transforms
+import unittest
+import random
+import numpy as np
+
+try:
+    from scipy import stats
+except ImportError:
+    stats = None
+
+
+class Tester(unittest.TestCase):
+
+    def test_random_crop_video(self):
+        numFrames = random.randint(4, 128)
+        height = random.randint(10, 32) * 2
+        width = random.randint(10, 32) * 2
+        oheight = random.randint(5, (height - 2) / 2) * 2
+        owidth = random.randint(5, (width - 2) / 2) * 2
+        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8)
+        result = transforms.Compose([
+            transforms.ToTensorVideo(),
+            transforms.RandomCropVideo((oheight, owidth)),
+        ])(clip)
+        assert result.size(2) == oheight
+        assert result.size(3) == owidth
+
+        transforms.RandomCropVideo((oheight, owidth)).__repr__()
+
+    def test_random_resized_crop_video(self):
+        numFrames = random.randint(4, 128)
+        height = random.randint(10, 32) * 2
+        width = random.randint(10, 32) * 2
+        oheight = random.randint(5, (height - 2) / 2) * 2
+        owidth = random.randint(5, (width - 2) / 2) * 2
+        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8)
+        result = transforms.Compose([
+            transforms.ToTensorVideo(),
+            transforms.RandomResizedCropVideo((oheight, owidth)),
+        ])(clip)
+        assert result.size(2) == oheight
+        assert result.size(3) == owidth
+
+        transforms.RandomResizedCropVideo((oheight, owidth)).__repr__()
+
+    def test_center_crop_video(self):
+        numFrames = random.randint(4, 128)
+        height = random.randint(10, 32) * 2
+        width = random.randint(10, 32) * 2
+        oheight = random.randint(5, (height - 2) / 2) * 2
+        owidth = random.randint(5, (width - 2) / 2) * 2
+
+        clip = torch.ones([numFrames, height, width, 3], dtype=torch.uint8)
+        oh1 = (height - oheight) // 2
+        ow1 = (width - owidth) // 2
+        clipNarrow = clip[:, oh1:oh1 + oheight, ow1:ow1 + owidth, :]
+        clipNarrow.fill_(0)
+        result = transforms.Compose([
+            transforms.ToTensorVideo(),
+            transforms.CenterCropVideo((oheight, owidth)),
+        ])(clip)
+
+        msg = "height: " + str(height) + " width: " \
+            + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
+        self.assertEqual(result.sum().item(), 0, msg)
+
+        oheight += 1
+        owidth += 1
+        result = transforms.Compose([
+            transforms.ToTensorVideo(),
+            transforms.CenterCropVideo((oheight, owidth)),
+        ])(clip)
+        sum1 = result.sum()
+
+        msg = "height: " + str(height) + " width: " \
+            + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
+        self.assertEqual(sum1.item() > 1, True, msg)
+
+        oheight += 1
+        owidth += 1
+        result = transforms.Compose([
+            transforms.ToTensorVideo(),
+            transforms.CenterCropVideo((oheight, owidth)),
+        ])(clip)
+        sum2 = result.sum()
+
+        msg = "height: " + str(height) + " width: " \
+            + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
+        self.assertTrue(sum2.item() > 1, msg)
+        self.assertTrue(sum2.item() > sum1.item(), msg)
+
+
+    @unittest.skipIf(stats is None, 'scipy.stats is not available')
+    def test_normalize_video(self):
+        def samples_from_standard_normal(tensor):
+            p_value = stats.kstest(list(tensor.view(-1)), 'norm', args=(0, 1)).pvalue
+            return p_value > 0.0001
+
+        random_state = random.getstate()
+        random.seed(42)
+        for channels in [1, 3]:
+            numFrames = random.randint(4, 128)
+            height = random.randint(32, 256)
+            width = random.randint(32, 256)
+            mean = random.random()
+            std = random.random()
+            clip = torch.normal(mean, std, size=(channels, numFrames, height, width))
+            mean = [clip[c].mean().item() for c in range(channels)]
+            std = [clip[c].std().item() for c in range(channels)]
+            normalized = transforms.NormalizeVideo(mean, std)(clip)
+            assert samples_from_standard_normal(normalized)
+        random.setstate(random_state)
+
+
+        # Checking the optional in-place behaviour
+        tensor = torch.rand((3, 128, 16, 16))
+        tensor_inplace = transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)(tensor)
+        assert torch.equal(tensor, tensor_inplace)
+
+        transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True).__repr__()
+
+    def test_to_tensor_video(self):
+        test_channels = [1, 3, 4]
+        numFrames, height, width = 64, 4, 4
+        trans = transforms.ToTensorVideo()
+
+        with self.assertRaises(TypeError):
+            trans(np.random.rand(numFrames, height, width, 1).tolist())
+            trans(torch.rand((numFrames, height, width, 1), dtype=torch.float))
+
+        with self.assertRaises(ValueError):
+            trans(torch.ones((3, numFrames, height, width, 3), dtype=torch.uint8))
+            trans(torch.ones((height, width, 3), dtype=torch.uint8))
+            trans(torch.ones((width, 3), dtype=torch.uint8))
+            trans(torch.ones((3), dtype=torch.uint8))
+
+        trans.__repr__()
+
+    @unittest.skipIf(stats is None, 'scipy.stats not available')
+    def test_random_horizontal_flip_video(self):
+        random_state = random.getstate()
+        random.seed(42)
+        clip = torch.rand((3, 4, 112, 112), dtype=torch.float)
+        hclip = clip.flip((-1))
+
+        num_samples = 250
+        num_horizontal = 0
+        for _ in range(num_samples):
+            out = transforms.RandomHorizontalFlipVideo()(clip)
+            if torch.all(torch.eq(out, hclip)):
+                num_horizontal += 1
+
+        p_value = stats.binom_test(num_horizontal, num_samples, p=0.5)
+        random.setstate(random_state)
+        assert p_value > 0.0001
+
+        num_samples = 250
+        num_horizontal = 0
+        for _ in range(num_samples):
+            out = transforms.RandomHorizontalFlipVideo(p=0.7)(clip)
+            if torch.all(torch.eq(out, hclip)):
+                num_horizontal += 1
+
+        p_value = stats.binom_test(num_horizontal, num_samples, p=0.7)
+        random.setstate(random_state)
+        assert p_value > 0.0001
+
+        transforms.RandomHorizontalFlipVideo().__repr__()
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/torchvision/transforms/__init__.py b/torchvision/transforms/__init__.py
index 7986cdd6429..175a8a8dc1b 100644
--- a/torchvision/transforms/__init__.py
+++ b/torchvision/transforms/__init__.py
@@ -1 +1,2 @@
 from .transforms import *
+from .transforms_video import *
diff --git a/torchvision/transforms/functional_video.py b/torchvision/transforms/functional_video.py
new file mode 100644
index 00000000000..0b4c84d5843
--- /dev/null
+++ b/torchvision/transforms/functional_video.py
@@ -0,0 +1,100 @@
+import torch
+
+
+def _is_tensor_video_clip(clip):
+    if not torch.is_tensor(clip):
+        raise TypeError("clip should be Tesnor. Got %s" % type(clip))
+
+    if not clip.ndimension() == 4:
+        raise ValueError("clip should be 4D. Got %dD" % clip.dim())
+
+    return True
+
+
+def crop(clip, i, j, h, w):
+    """
+    Args:
+        clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+    """
+    assert len(clip.size()) == 4, "clip should be a 4D tensor"
+    return clip[:, :, i : i + h, j : j + w]
+
+
+def resize(clip, target_size, interpolation_mode):
+    assert len(target_size) == 2, "target size should be tuple (height, width)"
+    return torch.nn.functional.interpolate(
+        clip, size=target_size, mode=interpolation_mode
+    )
+
+
+def resized_crop(clip, i, j, h, w, size, interpolation_mode="bilinear"):
+    """
+    Do spatial cropping and resizing to the video clip
+    Args:
+        clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+        i (int): i in (i,j) i.e coordinates of the upper left corner.
+        j (int): j in (i,j) i.e coordinates of the upper left corner.
+        h (int): Height of the cropped region.
+        w (int): Width of the cropped region.
+        size (tuple(int, int)): height and width of resized clip
+    Returns:
+        clip (torch.tensor): Resized and cropped clip. Size is (C, T, H, W)
+    """
+    assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+    clip = crop(clip, i, j, h, w)
+    clip = resize(clip, size, interpolation_mode)
+    return clip
+
+
+def center_crop(clip, crop_size):
+    assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+    h, w = clip.size(2), clip.size(3)
+    th, tw = crop_size
+    assert h >= th and w >= tw, "height and width must be no smaller than crop_size"
+
+    i = int(round((h - th) / 2.0))
+    j = int(round((w - tw) / 2.0))
+    return crop(clip, i, j, th, tw)
+
+
+def to_tensor(clip):
+    """
+    Convert tensor data type to be float and permute the dimenions of clip tensor
+    Args:
+        clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
+    Return:
+        clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W)
+    """
+    _is_tensor_video_clip(clip)
+    if not clip.dtype == torch.uint8:
+        raise TypeError("clip tensor should have data type uint8. Got %s" % str(clip.dtype))
+    return clip.float().permute(3, 0, 1, 2)
+
+
+def normalize(clip, mean, std, inplace=False):
+    """
+    Args:
+        clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W)
+        mean (tuple): pixel RGB mean. Size is (3)
+        std (tuple): pixel standard deviation. Size is (3)
+    Returns:
+        normalized clip (torch.tensor): Size is (C, T, H, W)
+    """
+    assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+    if not inplace:
+        clip = clip.clone()
+    mean = torch.as_tensor(mean, dtype=clip.dtype, device=clip.device)
+    std = torch.as_tensor(std, dtype=clip.dtype, device=clip.device)
+    clip.sub_(mean[:, None, None, None]).div_(std[:, None, None, None])
+    return clip
+
+
+def hflip(clip):
+    """
+    Args:
+        clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W)
+    Returns:
+        flipped clip (torch.tensor): Size is (C, T, H, W)
+    """
+    assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+    return clip.flip((-1))
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index b21a6d86eef..1d6171c1620 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -434,17 +434,17 @@ def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode
         self.padding_mode = padding_mode
 
     @staticmethod
-    def get_params(img, output_size):
+    def get_params(w, h, output_size):
         """Get parameters for ``crop`` for a random crop.
 
         Args:
-            img (PIL Image): Image to be cropped.
+            w: width of the image/video
+            h: height of the image/video
             output_size (tuple): Expected output size of the crop.
 
         Returns:
             tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
         """
-        w, h = img.size
         th, tw = output_size
         if w == tw and h == th:
             return 0, 0, h, w
@@ -471,7 +471,7 @@ def __call__(self, img):
         if self.pad_if_needed and img.size[1] < self.size[0]:
             img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
 
-        i, j, h, w = self.get_params(img, self.size)
+        i, j, h, w = self.get_params(img.size[0], img.size[1], self.size)
 
         return F.crop(img, i, j, h, w)
 
@@ -623,7 +623,7 @@ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolat
         self.ratio = ratio
 
     @staticmethod
-    def get_params(img, scale, ratio):
+    def get_params(height, width, scale, ratio):
         """Get parameters for ``crop`` for a random sized crop.
 
         Args:
@@ -635,7 +635,7 @@ def get_params(img, scale, ratio):
             tuple: params (i, j, h, w) to be passed to ``crop`` for a random
                 sized crop.
         """
-        area = img.size[0] * img.size[1]
+        area = height * width
 
         for attempt in range(10):
             target_area = random.uniform(*scale) * area
@@ -645,24 +645,24 @@ def get_params(img, scale, ratio):
             w = int(round(math.sqrt(target_area * aspect_ratio)))
             h = int(round(math.sqrt(target_area / aspect_ratio)))
 
-            if 0 < w <= img.size[0] and 0 < h <= img.size[1]:
-                i = random.randint(0, img.size[1] - h)
-                j = random.randint(0, img.size[0] - w)
+            if 0 < w <= width and 0 < h <= height:
+                i = random.randint(0, height - h)
+                j = random.randint(0, width - w)
                 return i, j, h, w
 
         # Fallback to central crop
-        in_ratio = img.size[0] / img.size[1]
+        in_ratio = float(width) / float(height)
         if (in_ratio < min(ratio)):
-            w = img.size[0]
+            w = width
             h = int(round(w / min(ratio)))
         elif (in_ratio > max(ratio)):
-            h = img.size[1]
+            h = height
             w = int(round(h * max(ratio)))
         else:  # whole image
-            w = img.size[0]
-            h = img.size[1]
-        i = (img.size[1] - h) // 2
-        j = (img.size[0] - w) // 2
+            w = width
+            h = height
+        i = (height - h) // 2
+        j = (width - w) // 2
         return i, j, h, w
 
     def __call__(self, img):
@@ -673,7 +673,7 @@ def __call__(self, img):
         Returns:
             PIL Image: Randomly cropped and resized image.
         """
-        i, j, h, w = self.get_params(img, self.scale, self.ratio)
+        i, j, h, w = self.get_params(img.size[1], img.size[0], self.scale, self.ratio)
         return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
 
     def __repr__(self):
diff --git a/torchvision/transforms/transforms_video.py b/torchvision/transforms/transforms_video.py
new file mode 100644
index 00000000000..7da6010f59f
--- /dev/null
+++ b/torchvision/transforms/transforms_video.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+
+import math
+import numbers
+import random
+
+from torchvision.transforms import (
+    RandomCrop,
+    RandomResizedCrop,
+)
+
+from . import functional_video as F
+
+
+__all__ = [
+    "RandomCropVideo",
+    "RandomResizedCropVideo",
+    "CenterCropVideo",
+    "NormalizeVideo",
+    "ToTensorVideo",
+    "RandomHorizontalFlipVideo",
+]
+
+
+class RandomCropVideo(RandomCrop):
+    def __init__(self, size):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+
+    def __call__(self, clip):
+        """
+        Args:
+            clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+        Returns:
+            torch.tensor: randomly cropped/resized video clip.
+                size is (C, T, OH, OW)
+        """
+        i, j, h, w = self.get_params(clip.size(3), clip.size(2), self.size)
+        return F.crop(clip, i, j, h, w)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0})'.format(self.size)
+
+
+class RandomResizedCropVideo(RandomResizedCrop):
+    def __init__(
+        self,
+        size,
+        scale=(0.08, 1.0),
+        ratio=(3.0 / 4.0, 4.0 / 3.0),
+        interpolation_mode="bilinear",
+    ):
+        if isinstance(size, tuple):
+            assert len(size) == 2, "size should be tuple (height, width)"
+            self.size = size
+        else:
+            self.size = (size, size)
+
+        self.interpolation_mode = interpolation_mode
+        self.scale = scale
+        self.ratio = ratio
+
+    def __call__(self, clip):
+        """
+        Args:
+            clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+        Returns:
+            torch.tensor: randomly cropped/resized video clip.
+                size is (C, T, H, W)
+        """
+        i, j, h, w = self.get_params(clip.size(2), clip.size(3), self.scale, self.ratio)
+        return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format(
+            self.size, self.interpolation_mode, self.scale, self.ratio)
+
+
+
+class CenterCropVideo(object):
+    def __init__(self, crop_size):
+        if isinstance(crop_size, numbers.Number):
+            self.crop_size = (int(size), int(size))
+        else:
+            self.crop_size = crop_size
+
+    def __call__(self, clip):
+        """
+        Args:
+            clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+        Returns:
+            torch.tensor: central cropping of video clip. Size is (C, T, crop_size, crop_size)
+        """
+        return F.center_crop(clip, self.crop_size)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size)
+
+class NormalizeVideo(object):
+    """
+    Normalize the video clip by mean subtraction and division by standard deviation
+    Args:
+        mean (3-tuple): pixel RGB mean
+        std (3-tuple): pixel RGB standard deviation
+        inplace (boolean): whether do in-place normalization
+    """
+
+    def __init__(self, mean, std, inplace=False):
+        self.mean = mean
+        self.std = std
+        self.inplace = inplace
+
+    def __call__(self, clip):
+        """
+        Args:
+            clip (torch.tensor): video clip to be normalized. Size is (C, T, H, W)
+        """
+        return F.normalize(clip, self.mean, self.std, self.inplace)
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(mean={0}, std={1}, inplace={2})'.format(
+            self.mean, self.std, self.inplace)
+
+
+class ToTensorVideo(object):
+    """
+    Convert tensor data type to be float and permute the dimenions of clip tensor
+    """
+
+    def __init__(self):
+        pass
+
+    def __call__(self, clip):
+        """
+        Convert tensor data type to be float and permute the dimenions of clip tensor
+        Args:
+            clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
+        Return:
+            clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W)
+        """
+        return F.to_tensor(clip)
+
+    def __repr__(self):
+        return self.__class__.__name__
+
+class RandomHorizontalFlipVideo(object):
+    """
+    Flip the video clip along the horizonal direction with a given probability
+    Args:
+        p (float): probability of the clip being flipped. Default value is 0.5
+    """
+
+    def __init__(self, p=0.5):
+        self.p = p
+
+    def __call__(self, clip):
+        """
+        Convert tensor data type to be float and permute the dimenions of clip tensor
+        Args:
+            clip (torch.tensor): Size is (C, T, H, W)
+        Return:
+            clip (torch.tensor): Size is (C, T, H, W)
+        """
+        if random.random() < self.p:
+            clip = F.hflip(clip)
+        return clip
+
+    def __repr__(self):
+        return self.__class__.__name__ + "(p={0})".format(self.p)

From 6e56e1f2cdb7fc07d61b0b75f408aaf0d09a373f Mon Sep 17 00:00:00 2001
From: zyan3 <zyan3@devgpu002.prn1.facebook.com>
Date: Sat, 7 Sep 2019 22:09:24 -0700
Subject: [PATCH 24/26] [video transforms]in ToTensorVideo, divide value by
 255.0

---
 test/test_transforms_video.py              | 6 +++---
 torchvision/transforms/functional_video.py | 5 +++--
 torchvision/transforms/transforms_video.py | 5 ++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/test_transforms_video.py b/test/test_transforms_video.py
index 30370218ddb..5028ef675f3 100644
--- a/test/test_transforms_video.py
+++ b/test/test_transforms_video.py
@@ -19,7 +19,7 @@ def test_random_crop_video(self):
         width = random.randint(10, 32) * 2
         oheight = random.randint(5, (height - 2) / 2) * 2
         owidth = random.randint(5, (width - 2) / 2) * 2
-        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8)
+        clip = torch.randint(0, 256, (numFrames, height, width, 3), dtype=torch.uint8)
         result = transforms.Compose([
             transforms.ToTensorVideo(),
             transforms.RandomCropVideo((oheight, owidth)),
@@ -35,7 +35,7 @@ def test_random_resized_crop_video(self):
         width = random.randint(10, 32) * 2
         oheight = random.randint(5, (height - 2) / 2) * 2
         owidth = random.randint(5, (width - 2) / 2) * 2
-        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8)
+        clip = torch.randint(0, 256, (numFrames, height, width, 3), dtype=torch.uint8)
         result = transforms.Compose([
             transforms.ToTensorVideo(),
             transforms.RandomResizedCropVideo((oheight, owidth)),
@@ -52,7 +52,7 @@ def test_center_crop_video(self):
         oheight = random.randint(5, (height - 2) / 2) * 2
         owidth = random.randint(5, (width - 2) / 2) * 2
 
-        clip = torch.ones([numFrames, height, width, 3], dtype=torch.uint8)
+        clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8) * 255
         oh1 = (height - oheight) // 2
         ow1 = (width - owidth) // 2
         clipNarrow = clip[:, oh1:oh1 + oheight, ow1:ow1 + owidth, :]
diff --git a/torchvision/transforms/functional_video.py b/torchvision/transforms/functional_video.py
index 0b4c84d5843..627c0e3b0f1 100644
--- a/torchvision/transforms/functional_video.py
+++ b/torchvision/transforms/functional_video.py
@@ -59,7 +59,8 @@ def center_crop(clip, crop_size):
 
 def to_tensor(clip):
     """
-    Convert tensor data type to be float and permute the dimenions of clip tensor
+    Convert tensor data type from uint8 to float, divide value by 255.0 and
+    permute the dimenions of clip tensor
     Args:
         clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
     Return:
@@ -68,7 +69,7 @@ def to_tensor(clip):
     _is_tensor_video_clip(clip)
     if not clip.dtype == torch.uint8:
         raise TypeError("clip tensor should have data type uint8. Got %s" % str(clip.dtype))
-    return clip.float().permute(3, 0, 1, 2)
+    return clip.float().permute(3, 0, 1, 2) / 255.0
 
 
 def normalize(clip, mean, std, inplace=False):
diff --git a/torchvision/transforms/transforms_video.py b/torchvision/transforms/transforms_video.py
index 7da6010f59f..50c292e30d1 100644
--- a/torchvision/transforms/transforms_video.py
+++ b/torchvision/transforms/transforms_video.py
@@ -126,7 +126,8 @@ def __repr__(self):
 
 class ToTensorVideo(object):
     """
-    Convert tensor data type to be float and permute the dimenions of clip tensor
+    Convert tensor data type from uint8 to float, divide value by 255.0 and
+    permute the dimenions of clip tensor
     """
 
     def __init__(self):
@@ -134,7 +135,6 @@ def __init__(self):
 
     def __call__(self, clip):
         """
-        Convert tensor data type to be float and permute the dimenions of clip tensor
         Args:
             clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
         Return:
@@ -157,7 +157,6 @@ def __init__(self, p=0.5):
 
     def __call__(self, clip):
         """
-        Convert tensor data type to be float and permute the dimenions of clip tensor
         Args:
             clip (torch.tensor): Size is (C, T, H, W)
         Return:

From d0cc43b5f03f6dc6fca80cf66e26db95669ca0a0 Mon Sep 17 00:00:00 2001
From: zyan3 <zyan3@devgpu002.prn1.facebook.com>
Date: Sat, 7 Sep 2019 22:39:31 -0700
Subject: [PATCH 25/26] [video transforms] fix a bug

---
 torchvision/transforms/transforms_video.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/transforms/transforms_video.py b/torchvision/transforms/transforms_video.py
index 50c292e30d1..bab4811b938 100644
--- a/torchvision/transforms/transforms_video.py
+++ b/torchvision/transforms/transforms_video.py
@@ -82,7 +82,7 @@ def __repr__(self):
 class CenterCropVideo(object):
     def __init__(self, crop_size):
         if isinstance(crop_size, numbers.Number):
-            self.crop_size = (int(size), int(size))
+            self.crop_size = (int(crop_size), int(crop_size))
         else:
             self.crop_size = crop_size
 

From 0f1d7217e39e7f2aee3a8a30a91d4ac42f95b275 Mon Sep 17 00:00:00 2001
From: zyan3 <zyan3@devgpu002.prn1.facebook.com>
Date: Thu, 19 Sep 2019 12:58:02 -0700
Subject: [PATCH 26/26] fix linting

---
 test/test_transforms_video.py              |  4 +---
 torchvision/transforms/functional_video.py |  2 +-
 torchvision/transforms/transforms_video.py | 13 ++++++++-----
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/test/test_transforms_video.py b/test/test_transforms_video.py
index 5028ef675f3..b0a237e9318 100644
--- a/test/test_transforms_video.py
+++ b/test/test_transforms_video.py
@@ -91,7 +91,6 @@ def test_center_crop_video(self):
         self.assertTrue(sum2.item() > 1, msg)
         self.assertTrue(sum2.item() > sum1.item(), msg)
 
-
     @unittest.skipIf(stats is None, 'scipy.stats is not available')
     def test_normalize_video(self):
         def samples_from_standard_normal(tensor):
@@ -113,7 +112,6 @@ def samples_from_standard_normal(tensor):
             assert samples_from_standard_normal(normalized)
         random.setstate(random_state)
 
-
         # Checking the optional in-place behaviour
         tensor = torch.rand((3, 128, 16, 16))
         tensor_inplace = transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)(tensor)
@@ -122,7 +120,6 @@ def samples_from_standard_normal(tensor):
         transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True).__repr__()
 
     def test_to_tensor_video(self):
-        test_channels = [1, 3, 4]
         numFrames, height, width = 64, 4, 4
         trans = transforms.ToTensorVideo()
 
@@ -169,5 +166,6 @@ def test_random_horizontal_flip_video(self):
 
         transforms.RandomHorizontalFlipVideo().__repr__()
 
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/torchvision/transforms/functional_video.py b/torchvision/transforms/functional_video.py
index 627c0e3b0f1..ee7c07c8c6f 100644
--- a/torchvision/transforms/functional_video.py
+++ b/torchvision/transforms/functional_video.py
@@ -17,7 +17,7 @@ def crop(clip, i, j, h, w):
         clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
     """
     assert len(clip.size()) == 4, "clip should be a 4D tensor"
-    return clip[:, :, i : i + h, j : j + w]
+    return clip[:, :, i:i + h, j:j + w]
 
 
 def resize(clip, target_size, interpolation_mode):
diff --git a/torchvision/transforms/transforms_video.py b/torchvision/transforms/transforms_video.py
index bab4811b938..28e1c9a7e23 100644
--- a/torchvision/transforms/transforms_video.py
+++ b/torchvision/transforms/transforms_video.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
 
-import math
 import numbers
 import random
 
@@ -74,9 +73,10 @@ def __call__(self, clip):
         return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode)
 
     def __repr__(self):
-        return self.__class__.__name__ + '(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format(
-            self.size, self.interpolation_mode, self.scale, self.ratio)
-
+        return self.__class__.__name__ + \
+            '(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format(
+                self.size, self.interpolation_mode, self.scale, self.ratio
+            )
 
 
 class CenterCropVideo(object):
@@ -91,13 +91,15 @@ def __call__(self, clip):
         Args:
             clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
         Returns:
-            torch.tensor: central cropping of video clip. Size is (C, T, crop_size, crop_size)
+            torch.tensor: central cropping of video clip. Size is
+            (C, T, crop_size, crop_size)
         """
         return F.center_crop(clip, self.crop_size)
 
     def __repr__(self):
         return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size)
 
+
 class NormalizeVideo(object):
     """
     Normalize the video clip by mean subtraction and division by standard deviation
@@ -145,6 +147,7 @@ def __call__(self, clip):
     def __repr__(self):
         return self.__class__.__name__
 
+
 class RandomHorizontalFlipVideo(object):
     """
     Flip the video clip along the horizonal direction with a given probability