From 583edc557b27ccc7a56f464708dc0b0e6158e4b6 Mon Sep 17 00:00:00 2001
From: Eli Uriegas <eliuriegas@fb.com>
Date: Wed, 4 Mar 2020 13:35:13 -0800
Subject: [PATCH] datasets: Fallback to our own mirrors for mnist

We are experiencing 403s when trying to download from the main mnist
site so lets fallback to our own mirror on failure.

Signed-off-by: Eli Uriegas <eliuriegas@fb.com>
---
 torchvision/datasets/mnist.py | 37 ++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/torchvision/datasets/mnist.py b/torchvision/datasets/mnist.py
index e798894089b..e87cd46eefe 100644
--- a/torchvision/datasets/mnist.py
+++ b/torchvision/datasets/mnist.py
@@ -10,6 +10,7 @@
 import gzip
 import lzma
 from typing import Any, Callable, Dict, IO, List, Optional, Tuple, Union
+from urllib.error import URLError
 from .utils import download_url, download_and_extract_archive, extract_archive, \
     verify_str_arg
 
@@ -31,11 +32,16 @@ class MNIST(VisionDataset):
             target and transforms it.
     """
 
+    mirrors = [
+        'http://yann.lecun.com/exdb/mnist/',
+        'https://ossci-datasets.s3.amazonaws.com/mnist/',
+    ]
+
     resources = [
-        ("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"),
-        ("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"),
-        ("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"),
-        ("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c")
+        ("train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"),
+        ("train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"),
+        ("t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"),
+        ("t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c")
     ]
 
     training_file = 'training.pt'
@@ -141,9 +147,26 @@ def download(self) -> None:
         os.makedirs(self.processed_folder, exist_ok=True)
 
         # download files
-        for url, md5 in self.resources:
-            filename = url.rpartition('/')[2]
-            download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5)
+        for filename, md5 in self.resources:
+            for mirror in self.mirrors:
+                url = "{}{}".format(mirror, filename)
+                try:
+                    print("Downloading {}".format(url))
+                    download_and_extract_archive(
+                        url, download_root=self.raw_folder,
+                        filename=filename,
+                        md5=md5
+                    )
+                except URLError as error:
+                    print(
+                        "Failed to download (trying next):\n{}".format(error)
+                    )
+                    continue
+                finally:
+                    print()
+                break
+            else:
+                raise RuntimeError("Error downloading {}".format(filename))
 
         # process and save as torch files
         print('Processing...')