From 583edc557b27ccc7a56f464708dc0b0e6158e4b6 Mon Sep 17 00:00:00 2001 From: Eli Uriegas Date: Wed, 4 Mar 2020 13:35:13 -0800 Subject: [PATCH] datasets: Fallback to our own mirrors for mnist We are experiencing 403s when trying to download from the main mnist site so lets fallback to our own mirror on failure. Signed-off-by: Eli Uriegas --- torchvision/datasets/mnist.py | 37 ++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/torchvision/datasets/mnist.py b/torchvision/datasets/mnist.py index e798894089b..e87cd46eefe 100644 --- a/torchvision/datasets/mnist.py +++ b/torchvision/datasets/mnist.py @@ -10,6 +10,7 @@ import gzip import lzma from typing import Any, Callable, Dict, IO, List, Optional, Tuple, Union +from urllib.error import URLError from .utils import download_url, download_and_extract_archive, extract_archive, \ verify_str_arg @@ -31,11 +32,16 @@ class MNIST(VisionDataset): target and transforms it. """ + mirrors = [ + 'http://yann.lecun.com/exdb/mnist/', + 'https://ossci-datasets.s3.amazonaws.com/mnist/', + ] + resources = [ - ("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"), - ("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"), - ("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"), - ("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c") + ("train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"), + ("train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"), + ("t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"), + ("t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c") ] training_file = 'training.pt' @@ -141,9 +147,26 @@ def download(self) -> None: os.makedirs(self.processed_folder, exist_ok=True) # download files - for url, md5 in self.resources: - filename = url.rpartition('/')[2] - download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5) + for filename, md5 in self.resources: + for mirror in self.mirrors: + url = "{}{}".format(mirror, filename) + try: + print("Downloading {}".format(url)) + download_and_extract_archive( + url, download_root=self.raw_folder, + filename=filename, + md5=md5 + ) + except URLError as error: + print( + "Failed to download (trying next):\n{}".format(error) + ) + continue + finally: + print() + break + else: + raise RuntimeError("Error downloading {}".format(filename)) # process and save as torch files print('Processing...')