From 5663821e05fc5b317b61ec26e53068386e5cc5df Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 10 Mar 2021 19:23:00 +0000 Subject: [PATCH 1/8] WIP --- test/datasets_utils.py | 7 ++- test/test_datasets.py | 99 +++++++++++++++++++++++++++++--- torchvision/datasets/imagenet.py | 9 +-- 3 files changed, 100 insertions(+), 15 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index 577bdb2eb32..6b129a3f305 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -316,9 +316,10 @@ def create_dataset( special_kwargs["download"] = False config.update(other_kwargs) - patchers = self._patch_download_extract() - if patch_checks: - patchers.update(self._patch_checks()) + patchers = set() + # patchers = self._patch_download_extract() + # if patch_checks: + # patchers.update(self._patch_checks()) with get_tmp_dir() as tmpdir: args = self.dataset_args(tmpdir, config) diff --git a/test/test_datasets.py b/test/test_datasets.py index 859419df2b0..b1e55ddfeab 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -146,15 +146,15 @@ def test_fashionmnist(self, mock_download_extract): img, target = dataset[0] self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - @mock.patch('torchvision.datasets.imagenet._verify_archive') - @unittest.skipIf(not HAS_SCIPY, "scipy unavailable") - def test_imagenet(self, mock_verify): - with imagenet_root() as root: - dataset = torchvision.datasets.ImageNet(root, split='train') - self.generic_classification_dataset_test(dataset) + # @mock.patch('torchvision.datasets.imagenet._verify_archive') + # @unittest.skipIf(not HAS_SCIPY, "scipy unavailable") + # def test_imagenet(self, mock_verify): + # with imagenet_root() as root: + # dataset = torchvision.datasets.ImageNet(root, split='train') + # self.generic_classification_dataset_test(dataset) - dataset = torchvision.datasets.ImageNet(root, split='val') - self.generic_classification_dataset_test(dataset) + # dataset = torchvision.datasets.ImageNet(root, split='val') + # self.generic_classification_dataset_test(dataset) @mock.patch('torchvision.datasets.WIDERFace._check_integrity') @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') @@ -490,6 +490,89 @@ def inject_fake_data(self, tmpdir, config): return num_images_per_category * len(categories) +# @mock.patch('torchvision.datasets.imagenet._verify_archive') +class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.ImageNet + REQUIRED_PACKAGES = ['scipy'] + + def imagenet_root(self, root): + import scipy.io as sio + import tarfile + + WNID = 'n01234567' + CLS = 'fakedata' + + def _make_image(file): + PIL.Image.fromarray(np.zeros((32, 32, 3), dtype=np.uint8)).save(file) + + def _make_tar(archive, content, arcname=None, compress=False): + mode = 'w:gz' if compress else 'w' + if arcname is None: + arcname = os.path.basename(content) + with tarfile.open(archive, mode) as fh: + fh.add(content, arcname=arcname) + + def _make_train_archive(root): + with get_tmp_dir() as tmp: + wnid_dir = os.path.join(tmp, WNID) + os.mkdir(wnid_dir) + + _make_image(os.path.join(wnid_dir, WNID + '_1.JPEG')) + + wnid_archive = wnid_dir + '.tar' + _make_tar(wnid_archive, wnid_dir) + + train_archive = os.path.join(root, 'ILSVRC2012_img_train.tar') + _make_tar(train_archive, wnid_archive) + # from torchvision.datasets.utils import extract_archive + # extract_archive(train_archive, os.path.join(root, 'train')) + + def _make_val_archive(root): + with get_tmp_dir() as tmp: + val_image = os.path.join(tmp, 'ILSVRC2012_val_00000001.JPEG') + _make_image(val_image) + + val_archive = os.path.join(root, 'ILSVRC2012_img_val.tar') + _make_tar(val_archive, val_image) + + def _make_devkit_archive(root): + with get_tmp_dir() as tmp: + data_dir = os.path.join(tmp, 'data') + os.mkdir(data_dir) + + meta_file = os.path.join(data_dir, 'meta.mat') + synsets = np.core.records.fromarrays([ + (0.0, 1.0), + (WNID, ''), + (CLS, ''), + ('fakedata for the torchvision testsuite', ''), + (0.0, 1.0), + ], names=['ILSVRC2012_ID', 'WNID', 'words', 'gloss', 'num_children']) + sio.savemat(meta_file, {'synsets': synsets}) + + groundtruth_file = os.path.join(data_dir, + 'ILSVRC2012_validation_ground_truth.txt') + with open(groundtruth_file, 'w') as fh: + fh.write('0\n') + + devkit_name = 'ILSVRC2012_devkit_t12' + devkit_archive = os.path.join(root, devkit_name + '.tar.gz') + _make_tar(devkit_archive, tmp, arcname=devkit_name, compress=True) + + _make_train_archive(root) + _make_val_archive(root) + _make_devkit_archive(root) + + + CONFIGS = datasets_utils.combinations_grid(split=('train', 'val')) + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + self.imagenet_root(tmpdir) + + return 1 + + class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.CIFAR10 CONFIGS = datasets_utils.combinations_grid(train=(True, False)) diff --git a/torchvision/datasets/imagenet.py b/torchvision/datasets/imagenet.py index 6dfc9bfebfd..e0a17de2702 100644 --- a/torchvision/datasets/imagenet.py +++ b/torchvision/datasets/imagenet.py @@ -97,10 +97,11 @@ def load_meta_file(root: str, file: Optional[str] = None) -> Tuple[Dict[str, str def _verify_archive(root: str, file: str, md5: str) -> None: - if not check_integrity(os.path.join(root, file), md5): - msg = ("The archive {} is not present in the root directory or is corrupted. " - "You need to download it externally and place it in {}.") - raise RuntimeError(msg.format(file, root)) + return + # if not check_integrity(os.path.join(root, file), md5): + # msg = ("The archive {} is not present in the root directory or is corrupted. " + # "You need to download it externally and place it in {}.") + # raise RuntimeError(msg.format(file, root)) def parse_devkit_archive(root: str, file: Optional[str] = None) -> None: From 675671d140051a87d60e97087ffd98b920d681d7 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 11 Mar 2021 11:25:17 +0000 Subject: [PATCH 2/8] Port ImageNet to new test architecture --- test/datasets_utils.py | 9 ++- test/fakedata_generation.py | 69 ---------------------- test/test_datasets.py | 99 ++++++-------------------------- torchvision/datasets/imagenet.py | 9 ++- 4 files changed, 25 insertions(+), 161 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index 6b129a3f305..0686c6501d1 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -313,13 +313,12 @@ def create_dataset( special_kwargs, other_kwargs = self._split_kwargs(kwargs) if "download" in self._HAS_SPECIAL_KWARG: - special_kwargs["download"] = False + special_kwargs["download"] = None if self.DATASET_CLASS.__name__ == 'ImageNet' else False config.update(other_kwargs) - patchers = set() - # patchers = self._patch_download_extract() - # if patch_checks: - # patchers.update(self._patch_checks()) + patchers = self._patch_download_extract() + if patch_checks: + patchers.update(self._patch_checks()) with get_tmp_dir() as tmpdir: args = self.dataset_args(tmpdir, config) diff --git a/test/fakedata_generation.py b/test/fakedata_generation.py index dac415df110..ae1f611b862 100644 --- a/test/fakedata_generation.py +++ b/test/fakedata_generation.py @@ -144,75 +144,6 @@ def _make_meta_file(file, classes_key): @contextlib.contextmanager -def imagenet_root(): - import scipy.io as sio - - WNID = 'n01234567' - CLS = 'fakedata' - - def _make_image(file): - PIL.Image.fromarray(np.zeros((32, 32, 3), dtype=np.uint8)).save(file) - - def _make_tar(archive, content, arcname=None, compress=False): - mode = 'w:gz' if compress else 'w' - if arcname is None: - arcname = os.path.basename(content) - with tarfile.open(archive, mode) as fh: - fh.add(content, arcname=arcname) - - def _make_train_archive(root): - with get_tmp_dir() as tmp: - wnid_dir = os.path.join(tmp, WNID) - os.mkdir(wnid_dir) - - _make_image(os.path.join(wnid_dir, WNID + '_1.JPEG')) - - wnid_archive = wnid_dir + '.tar' - _make_tar(wnid_archive, wnid_dir) - - train_archive = os.path.join(root, 'ILSVRC2012_img_train.tar') - _make_tar(train_archive, wnid_archive) - - def _make_val_archive(root): - with get_tmp_dir() as tmp: - val_image = os.path.join(tmp, 'ILSVRC2012_val_00000001.JPEG') - _make_image(val_image) - - val_archive = os.path.join(root, 'ILSVRC2012_img_val.tar') - _make_tar(val_archive, val_image) - - def _make_devkit_archive(root): - with get_tmp_dir() as tmp: - data_dir = os.path.join(tmp, 'data') - os.mkdir(data_dir) - - meta_file = os.path.join(data_dir, 'meta.mat') - synsets = np.core.records.fromarrays([ - (0.0, 1.0), - (WNID, ''), - (CLS, ''), - ('fakedata for the torchvision testsuite', ''), - (0.0, 1.0), - ], names=['ILSVRC2012_ID', 'WNID', 'words', 'gloss', 'num_children']) - sio.savemat(meta_file, {'synsets': synsets}) - - groundtruth_file = os.path.join(data_dir, - 'ILSVRC2012_validation_ground_truth.txt') - with open(groundtruth_file, 'w') as fh: - fh.write('0\n') - - devkit_name = 'ILSVRC2012_devkit_t12' - devkit_archive = os.path.join(root, devkit_name + '.tar.gz') - _make_tar(devkit_archive, tmp, arcname=devkit_name, compress=True) - - with get_tmp_dir() as root: - _make_train_archive(root) - _make_val_archive(root) - _make_devkit_archive(root) - - yield root - - @contextlib.contextmanager def widerface_root(): """ diff --git a/test/test_datasets.py b/test/test_datasets.py index a6b520995bb..3844a08d22d 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -10,7 +10,7 @@ import torchvision from torchvision.datasets import utils from common_utils import get_tmp_dir -from fakedata_generation import mnist_root, imagenet_root, \ +from fakedata_generation import mnist_root, \ cityscapes_root, svhn_root, places365_root, widerface_root, stl10_root import xml.etree.ElementTree as ET from urllib.request import Request, urlopen @@ -146,16 +146,6 @@ def test_fashionmnist(self, mock_download_extract): img, target = dataset[0] self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - # @mock.patch('torchvision.datasets.imagenet._verify_archive') - # @unittest.skipIf(not HAS_SCIPY, "scipy unavailable") - # def test_imagenet(self, mock_verify): - # with imagenet_root() as root: - # dataset = torchvision.datasets.ImageNet(root, split='train') - # self.generic_classification_dataset_test(dataset) - - # dataset = torchvision.datasets.ImageNet(root, split='val') - # self.generic_classification_dataset_test(dataset) - @mock.patch('torchvision.datasets.WIDERFace._check_integrity') @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') def test_widerface(self, mock_check_integrity): @@ -490,86 +480,31 @@ def inject_fake_data(self, tmpdir, config): return num_images_per_category * len(categories) -# @mock.patch('torchvision.datasets.imagenet._verify_archive') class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.ImageNet REQUIRED_PACKAGES = ['scipy'] - def imagenet_root(self, root): - import scipy.io as sio - import tarfile - - WNID = 'n01234567' - CLS = 'fakedata' - - def _make_image(file): - PIL.Image.fromarray(np.zeros((32, 32, 3), dtype=np.uint8)).save(file) - - def _make_tar(archive, content, arcname=None, compress=False): - mode = 'w:gz' if compress else 'w' - if arcname is None: - arcname = os.path.basename(content) - with tarfile.open(archive, mode) as fh: - fh.add(content, arcname=arcname) - - def _make_train_archive(root): - with get_tmp_dir() as tmp: - wnid_dir = os.path.join(tmp, WNID) - os.mkdir(wnid_dir) - - _make_image(os.path.join(wnid_dir, WNID + '_1.JPEG')) - - wnid_archive = wnid_dir + '.tar' - _make_tar(wnid_archive, wnid_dir) - - train_archive = os.path.join(root, 'ILSVRC2012_img_train.tar') - _make_tar(train_archive, wnid_archive) - # from torchvision.datasets.utils import extract_archive - # extract_archive(train_archive, os.path.join(root, 'train')) - - def _make_val_archive(root): - with get_tmp_dir() as tmp: - val_image = os.path.join(tmp, 'ILSVRC2012_val_00000001.JPEG') - _make_image(val_image) - - val_archive = os.path.join(root, 'ILSVRC2012_img_val.tar') - _make_tar(val_archive, val_image) - - def _make_devkit_archive(root): - with get_tmp_dir() as tmp: - data_dir = os.path.join(tmp, 'data') - os.mkdir(data_dir) - - meta_file = os.path.join(data_dir, 'meta.mat') - synsets = np.core.records.fromarrays([ - (0.0, 1.0), - (WNID, ''), - (CLS, ''), - ('fakedata for the torchvision testsuite', ''), - (0.0, 1.0), - ], names=['ILSVRC2012_ID', 'WNID', 'words', 'gloss', 'num_children']) - sio.savemat(meta_file, {'synsets': synsets}) - - groundtruth_file = os.path.join(data_dir, - 'ILSVRC2012_validation_ground_truth.txt') - with open(groundtruth_file, 'w') as fh: - fh.write('0\n') - - devkit_name = 'ILSVRC2012_devkit_t12' - devkit_archive = os.path.join(root, devkit_name + '.tar.gz') - _make_tar(devkit_archive, tmp, arcname=devkit_name, compress=True) - - _make_train_archive(root) - _make_val_archive(root) - _make_devkit_archive(root) - - CONFIGS = datasets_utils.combinations_grid(split=('train', 'val')) def inject_fake_data(self, tmpdir, config): + wnid = 'n01234567' + tmpdir = pathlib.Path(tmpdir) - self.imagenet_root(tmpdir) + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / 'train' / wnid / wnid, + file_name_fn=lambda image_idx: f"{wnid}_{image_idx}.JPEG", + num_examples=1, + ) + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / 'val' / wnid, + file_name_fn=lambda _: f"ILSVRC2012_val_00000001.JPEG", + num_examples=1, + ) + wnid_to_classes = {wnid: [1]} + torch.save((wnid_to_classes, None), tmpdir / 'meta.bin') return 1 diff --git a/torchvision/datasets/imagenet.py b/torchvision/datasets/imagenet.py index e0a17de2702..6dfc9bfebfd 100644 --- a/torchvision/datasets/imagenet.py +++ b/torchvision/datasets/imagenet.py @@ -97,11 +97,10 @@ def load_meta_file(root: str, file: Optional[str] = None) -> Tuple[Dict[str, str def _verify_archive(root: str, file: str, md5: str) -> None: - return - # if not check_integrity(os.path.join(root, file), md5): - # msg = ("The archive {} is not present in the root directory or is corrupted. " - # "You need to download it externally and place it in {}.") - # raise RuntimeError(msg.format(file, root)) + if not check_integrity(os.path.join(root, file), md5): + msg = ("The archive {} is not present in the root directory or is corrupted. " + "You need to download it externally and place it in {}.") + raise RuntimeError(msg.format(file, root)) def parse_devkit_archive(root: str, file: Optional[str] = None) -> None: From 7599c58f82e06905d8f3e24f685b5b2ea5c26b84 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 11 Mar 2021 11:27:01 +0000 Subject: [PATCH 3/8] falke8 --- test/test_datasets.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 3844a08d22d..d2bd344e976 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -483,8 +483,8 @@ def inject_fake_data(self, tmpdir, config): class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.ImageNet REQUIRED_PACKAGES = ['scipy'] - CONFIGS = datasets_utils.combinations_grid(split=('train', 'val')) + def inject_fake_data(self, tmpdir, config): wnid = 'n01234567' @@ -498,8 +498,8 @@ def inject_fake_data(self, tmpdir, config): ) datasets_utils.create_image_folder( root=tmpdir, - name=tmpdir / 'val' / wnid, - file_name_fn=lambda _: f"ILSVRC2012_val_00000001.JPEG", + name=tmpdir / 'val' / wnid, + file_name_fn=lambda _: "ILSVRC2012_val_00000001.JPEG", num_examples=1, ) From aa1f7918fa6296b35a6f87f83e7275c33f460302 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 11 Mar 2021 11:31:05 +0000 Subject: [PATCH 4/8] Only create dir depending on config --- test/test_datasets.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index d2bd344e976..bd304daf9a5 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -486,22 +486,23 @@ class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): CONFIGS = datasets_utils.combinations_grid(split=('train', 'val')) def inject_fake_data(self, tmpdir, config): - wnid = 'n01234567' - tmpdir = pathlib.Path(tmpdir) - datasets_utils.create_image_folder( - root=tmpdir, - name=tmpdir / 'train' / wnid / wnid, - file_name_fn=lambda image_idx: f"{wnid}_{image_idx}.JPEG", - num_examples=1, - ) - datasets_utils.create_image_folder( - root=tmpdir, - name=tmpdir / 'val' / wnid, - file_name_fn=lambda _: "ILSVRC2012_val_00000001.JPEG", - num_examples=1, - ) + wnid = 'n01234567' + if config['split'] == 'train': + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / 'train' / wnid / wnid, + file_name_fn=lambda image_idx: f"{wnid}_{image_idx}.JPEG", + num_examples=1, + ) + else: + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / 'val' / wnid, + file_name_fn=lambda _: "ILSVRC2012_val_00000001.JPEG", + num_examples=1, + ) wnid_to_classes = {wnid: [1]} torch.save((wnid_to_classes, None), tmpdir / 'meta.bin') From 602bd4e2b9fcb9f11aa3db3a66bb3ef4b1f0bd59 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 11 Mar 2021 12:01:58 +0000 Subject: [PATCH 5/8] remove double decorator --- test/fakedata_generation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/fakedata_generation.py b/test/fakedata_generation.py index ae1f611b862..473c15d19c4 100644 --- a/test/fakedata_generation.py +++ b/test/fakedata_generation.py @@ -143,7 +143,6 @@ def _make_meta_file(file, classes_key): yield root -@contextlib.contextmanager @contextlib.contextmanager def widerface_root(): """ From f9d4f9e63b33f0b004b46c1b6b7d4b25ad3c4d4d Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 11 Mar 2021 13:48:00 +0000 Subject: [PATCH 6/8] Addressed comments --- test/test_datasets.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index bd304daf9a5..11114ae5b36 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -482,7 +482,7 @@ def inject_fake_data(self, tmpdir, config): class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.ImageNet - REQUIRED_PACKAGES = ['scipy'] + REQUIRED_PACKAGES = ('scipy',) CONFIGS = datasets_utils.combinations_grid(split=('train', 'val')) def inject_fake_data(self, tmpdir, config): @@ -490,23 +490,25 @@ def inject_fake_data(self, tmpdir, config): wnid = 'n01234567' if config['split'] == 'train': + num_examples = 3 datasets_utils.create_image_folder( root=tmpdir, name=tmpdir / 'train' / wnid / wnid, file_name_fn=lambda image_idx: f"{wnid}_{image_idx}.JPEG", - num_examples=1, + num_examples=num_examples, ) else: + num_examples = 1 datasets_utils.create_image_folder( root=tmpdir, name=tmpdir / 'val' / wnid, - file_name_fn=lambda _: "ILSVRC2012_val_00000001.JPEG", - num_examples=1, + file_name_fn=lambda image_ifx: "ILSVRC2012_val_0000000{image_idx}.JPEG", + num_examples=num_examples, ) wnid_to_classes = {wnid: [1]} torch.save((wnid_to_classes, None), tmpdir / 'meta.bin') - return 1 + return num_examples class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase): From d4c66e4efd83e894f4c7555dd42b1c260f93e4e4 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 11 Mar 2021 13:53:07 +0000 Subject: [PATCH 7/8] Only override download default if the default is true-y --- test/datasets_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index 0686c6501d1..8387d26fe5d 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -312,8 +312,8 @@ def create_dataset( patch_checks = inject_fake_data special_kwargs, other_kwargs = self._split_kwargs(kwargs) - if "download" in self._HAS_SPECIAL_KWARG: - special_kwargs["download"] = None if self.DATASET_CLASS.__name__ == 'ImageNet' else False + if "download" in self._HAS_SPECIAL_KWARG and special_kwargs.get("download", False): + special_kwargs["download"] = False config.update(other_kwargs) patchers = self._patch_download_extract() From fdb7075e9d044388a13bf5f8c518bfaf159bdb3f Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 11 Mar 2021 13:58:31 +0000 Subject: [PATCH 8/8] added comment --- test/datasets_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index 8387d26fe5d..12f761c070e 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -313,6 +313,7 @@ def create_dataset( special_kwargs, other_kwargs = self._split_kwargs(kwargs) if "download" in self._HAS_SPECIAL_KWARG and special_kwargs.get("download", False): + # override download param to False param if its default is truthy special_kwargs["download"] = False config.update(other_kwargs)