From e1e1fd5c09d4538f50d61ca971c328c395f35609 Mon Sep 17 00:00:00 2001 From: Prabhat Roy Date: Thu, 13 May 2021 04:13:41 -0700 Subject: [PATCH] Added dataset download support in fbcode Summary: Uploaded FashionMNIST dataset to [manifold](https://www.internalfb.com/intern/network/manifold/?bucket=torchvision&path=tree%2Fdatasets) bucket `torchvision`. Any new dataset that needs to be added could be uploaded under `tree/datasets/`. Reviewed By: datumbox Differential Revision: D28358470 fbshipit-source-id: 4c15466d69bd5171da30da3882fbe3cb579e05cd --- torchvision/datasets/_utils.py | 6 +++++ torchvision/datasets/utils.py | 43 ++++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 17 deletions(-) create mode 100644 torchvision/datasets/_utils.py diff --git a/torchvision/datasets/_utils.py b/torchvision/datasets/_utils.py new file mode 100644 index 00000000000..d0664d50d54 --- /dev/null +++ b/torchvision/datasets/_utils.py @@ -0,0 +1,6 @@ +def _download_file_from_remote_location(fpath: str) -> None: + pass + + +def _is_remote_location_available() -> bool: + return False diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py index 0d1d879f045..36d6f8e657e 100644 --- a/torchvision/datasets/utils.py +++ b/torchvision/datasets/utils.py @@ -17,6 +17,11 @@ import torch from torch.utils.model_zoo import tqdm +from ._utils import ( + _download_file_from_remote_location, + _is_remote_location_available, +) + USER_AGENT = "pytorch/vision" @@ -117,26 +122,30 @@ def download_url( print('Using downloaded and verified file: ' + fpath) return - # expand redirect chain if needed - url = _get_redirect_url(url, max_hops=max_redirect_hops) + if _is_remote_location_available(): + _download_file_from_remote_location(fpath) + else: + # expand redirect chain if needed + url = _get_redirect_url(url, max_hops=max_redirect_hops) - # check if file is located on Google Drive - file_id = _get_google_drive_file_id(url) - if file_id is not None: - return download_file_from_google_drive(file_id, root, filename, md5) + # check if file is located on Google Drive + file_id = _get_google_drive_file_id(url) + if file_id is not None: + return download_file_from_google_drive(file_id, root, filename, md5) - # download the file - try: - print('Downloading ' + url + ' to ' + fpath) - _urlretrieve(url, fpath) - except (urllib.error.URLError, IOError) as e: # type: ignore[attr-defined] - if url[:5] == 'https': - url = url.replace('https:', 'http:') - print('Failed download. Trying https -> http instead.' - ' Downloading ' + url + ' to ' + fpath) + # download the file + try: + print('Downloading ' + url + ' to ' + fpath) _urlretrieve(url, fpath) - else: - raise e + except (urllib.error.URLError, IOError) as e: # type: ignore[attr-defined] + if url[:5] == 'https': + url = url.replace('https:', 'http:') + print('Failed download. Trying https -> http instead.' + ' Downloading ' + url + ' to ' + fpath) + _urlretrieve(url, fpath) + else: + raise e + # check integrity of downloaded file if not check_integrity(fpath, md5): raise RuntimeError("File not found or corrupted.")