Merge branch 'main' into prototype/ssd_multiweight

datumbox · web-flow · commit 19bb0f75a4e9 · 2021-11-08T11:15:55.000Z
diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py
@@ -452,11 +452,7 @@ def caltech256(info, root, config):
 
 @dataset_mocks.register_mock_data_fn
 def imagenet(info, root, config):
-    devkit_root = root / "ILSVRC2012_devkit_t12"
-    devkit_root.mkdir()
-
     wnids = tuple(info.extra.wnid_to_category.keys())
-
     if config.split == "train":
         images_root = root / "ILSVRC2012_img_train"
 
@@ -470,7 +466,7 @@ def imagenet(info, root, config):
                 num_examples=1,
             )
             make_tar(images_root, f"{wnid}.tar", files[0].parent)
-    else:
+    elif config.split == "val":
         num_samples = 3
         files = create_image_folder(
             root=root,
@@ -479,14 +475,26 @@ def imagenet(info, root, config):
             num_examples=num_samples,
         )
         images_root = files[0].parent
+    else:  # config.split == "test"
+        images_root = root / "ILSVRC2012_img_test_v10102019"
 
-        data_root = devkit_root / "data"
-        data_root.mkdir()
-        with open(data_root / "ILSVRC2012_validation_ground_truth.txt", "w") as file:
-            for label in torch.randint(0, len(wnids), (num_samples,)).tolist():
-                file.write(f"{label}\n")
+        num_samples = 3
 
+        create_image_folder(
+            root=images_root,
+            name="test",
+            file_name_fn=lambda image_idx: f"ILSVRC2012_test_{image_idx + 1:08d}.JPEG",
+            num_examples=num_samples,
+        )
     make_tar(root, f"{images_root.name}.tar", images_root)
+
+    devkit_root = root / "ILSVRC2012_devkit_t12"
+    devkit_root.mkdir()
+    data_root = devkit_root / "data"
+    data_root.mkdir()
+    with open(data_root / "ILSVRC2012_validation_ground_truth.txt", "w") as file:
+        for label in torch.randint(0, len(wnids), (num_samples,)).tolist():
+            file.write(f"{label}\n")
     make_tar(root, f"{devkit_root}.tar.gz", devkit_root, compression="gz")
 
     return num_samples
diff --git a/test/test_datasets.py b/test/test_datasets.py
@@ -1914,11 +1914,13 @@ def inject_fake_data(self, tmpdir, config):
 
     def test_flow(self):
         # Make sure flow exists for train split, and make sure there are as many flow values as (pairs of) images
+        h, w = self.FLOW_H, self.FLOW_W
+        expected_flow = np.arange(2 * h * w).reshape(h, w, 2).transpose(2, 0, 1)
         with self.create_dataset(split="train") as (dataset, _):
             assert dataset._flow_list and len(dataset._flow_list) == len(dataset._image_list)
             for _, _, flow in dataset:
-                assert flow.shape == (2, self.FLOW_H, self.FLOW_W)
-                np.testing.assert_allclose(flow, np.arange(flow.size).reshape(flow.shape))
+                assert flow.shape == (2, h, w)
+                np.testing.assert_allclose(flow, expected_flow)
 
         # Make sure flow is always None for test split
         with self.create_dataset(split="test") as (dataset, _):
@@ -2041,11 +2043,14 @@ def inject_fake_data(self, tmpdir, config):
     def test_flow(self, config):
         # Make sure flow always exists, and make sure there are as many flow values as (pairs of) images
         # Also make sure the flow is properly decoded
+
+        h, w = self.FLOW_H, self.FLOW_W
+        expected_flow = np.arange(2 * h * w).reshape(h, w, 2).transpose(2, 0, 1)
         with self.create_dataset(config=config) as (dataset, _):
             assert dataset._flow_list and len(dataset._flow_list) == len(dataset._image_list)
             for _, _, flow in dataset:
-                assert flow.shape == (2, self.FLOW_H, self.FLOW_W)
-                np.testing.assert_allclose(flow, np.arange(flow.size).reshape(flow.shape))
+                assert flow.shape == (2, h, w)
+                np.testing.assert_allclose(flow, expected_flow)
 
 
 class FlyingThings3DTestCase(datasets_utils.ImageDatasetTestCase):
@@ -2095,11 +2100,16 @@ def inject_fake_data(self, tmpdir, config):
 
     @datasets_utils.test_all_configs
     def test_flow(self, config):
+        h, w = self.FLOW_H, self.FLOW_W
+        expected_flow = np.arange(3 * h * w).reshape(h, w, 3).transpose(2, 0, 1)
+        expected_flow = np.flip(expected_flow, axis=1)
+        expected_flow = expected_flow[:2, :, :]
+
         with self.create_dataset(config=config) as (dataset, _):
             assert dataset._flow_list and len(dataset._flow_list) == len(dataset._image_list)
             for _, _, flow in dataset:
                 assert flow.shape == (2, self.FLOW_H, self.FLOW_W)
-                # We don't check the values because the reshaping and flipping makes it hard to figure out
+                np.testing.assert_allclose(flow, expected_flow)
 
     def test_bad_input(self):
         with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"):
diff --git a/test/test_models.py b/test/test_models.py
@@ -1,7 +1,10 @@
+import contextlib
 import functools
 import io
 import operator
 import os
+import pkgutil
+import sys
 import traceback
 import warnings
 from collections import OrderedDict
@@ -14,7 +17,6 @@
 from common_utils import map_nested_tensor_object, freeze_rng_state, set_rng_seed, cpu_and_gpu, needs_cuda
 from torchvision import models
 
-
 ACCEPT = os.getenv("EXPECTTEST_ACCEPT", "0") == "1"
 
 
@@ -23,6 +25,51 @@ def get_models_from_module(module):
     return [v for k, v in module.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"]
 
 
+@pytest.fixture
+def disable_weight_loading(mocker):
+    """When testing models, the two slowest operations are the downloading of the weights to a file and loading them
+    into the model. Unless, you want to test against specific weights, these steps can be disabled without any
+    drawbacks.
+
+    Including this fixture into the signature of your test, i.e. `test_foo(disable_weight_loading)`, will recurse
+    through all models in `torchvision.models` and will patch all occurrences of the function
+    `download_state_dict_from_url` as well as the method `load_state_dict` on all subclasses of `nn.Module` to be
+    no-ops.
+
+    .. warning:
+
+        Loaded models are still executable as normal, but will always have random weights. Make sure to not use this
+        fixture if you want to compare the model output against reference values.
+
+    """
+    starting_point = models
+    function_name = "load_state_dict_from_url"
+    method_name = "load_state_dict"
+
+    module_names = {info.name for info in pkgutil.walk_packages(starting_point.__path__, f"{starting_point.__name__}.")}
+    targets = {f"torchvision._internally_replaced_utils.{function_name}", f"torch.nn.Module.{method_name}"}
+    for name in module_names:
+        module = sys.modules.get(name)
+        if not module:
+            continue
+
+        if function_name in module.__dict__:
+            targets.add(f"{module.__name__}.{function_name}")
+
+        targets.update(
+            {
+                f"{module.__name__}.{obj.__name__}.{method_name}"
+                for obj in module.__dict__.values()
+                if isinstance(obj, type) and issubclass(obj, nn.Module) and method_name in obj.__dict__
+            }
+        )
+
+    for target in targets:
+        # See https://github.com/pytorch/vision/pull/4867#discussion_r743677802 for details
+        with contextlib.suppress(AttributeError):
+            mocker.patch(target)
+
+
 def _get_expected_file(name=None):
     # Determine expected file based on environment
     expected_file_base = get_relative_path(os.path.realpath(__file__), "expect")
@@ -762,7 +809,7 @@ def test_quantized_classification_model(model_fn):
 
 
 @pytest.mark.parametrize("model_fn", get_models_from_module(models.detection))
-def test_detection_model_trainable_backbone_layers(model_fn):
+def test_detection_model_trainable_backbone_layers(model_fn, disable_weight_loading):
     model_name = model_fn.__name__
     max_trainable = _model_tests_values[model_name]["max_trainable"]
     n_trainable_params = []
diff --git a/torchvision/datasets/_optical_flow.py b/torchvision/datasets/_optical_flow.py
@@ -376,7 +376,7 @@ def _read_flo(file_name):
         w = int(np.fromfile(f, "<i4", count=1))
         h = int(np.fromfile(f, "<i4", count=1))
         data = np.fromfile(f, "<f4", count=2 * w * h)
-        return data.reshape(2, h, w)
+        return data.reshape(h, w, 2).transpose(2, 0, 1)
 
 
 def _read_16bits_png_with_flow_and_valid_mask(file_name):
diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py
@@ -34,11 +34,17 @@ def _make_info(self) -> DatasetInfo:
             type=DatasetType.IMAGE,
             categories=categories,
             homepage="https://www.image-net.org/",
-            valid_options=dict(split=("train", "val")),
+            valid_options=dict(split=("train", "val", "test")),
             extra=dict(
                 wnid_to_category=FrozenMapping(zip(wnids, categories)),
                 category_to_wnid=FrozenMapping(zip(categories, wnids)),
-                sizes=FrozenMapping([(DatasetConfig(split="train"), 1281167), (DatasetConfig(split="val"), 50000)]),
+                sizes=FrozenMapping(
+                    [
+                        (DatasetConfig(split="train"), 1_281_167),
+                        (DatasetConfig(split="val"), 50_000),
+                        (DatasetConfig(split="test"), 100_000),
+                    ]
+                ),
             ),
         )
 
@@ -53,17 +59,15 @@ def category_to_wnid(self) -> Dict[str, str]:
     def wnid_to_category(self) -> Dict[str, str]:
         return cast(Dict[str, str], self.info.extra.wnid_to_category)
 
+    _IMAGES_CHECKSUMS = {
+        "train": "b08200a27a8e34218a0e58fde36b0fe8f73bc377f4acea2d91602057c3ca45bb",
+        "val": "c7e06a6c0baccf06d8dbeb6577d71efff84673a5dbdd50633ab44f8ea0456ae0",
+        "test_v10102019": "9cf7f8249639510f17d3d8a0deb47cd22a435886ba8e29e2b3223e65a4079eb4",
+    }
+
     def resources(self, config: DatasetConfig) -> List[OnlineResource]:
-        if config.split == "train":
-            images = HttpResource(
-                "ILSVRC2012_img_train.tar",
-                sha256="b08200a27a8e34218a0e58fde36b0fe8f73bc377f4acea2d91602057c3ca45bb",
-            )
-        else:  # config.split == "val"
-            images = HttpResource(
-                "ILSVRC2012_img_val.tar",
-                sha256="c7e06a6c0baccf06d8dbeb6577d71efff84673a5dbdd50633ab44f8ea0456ae0",
-            )
+        name = "test_v10102019" if config.split == "test" else config.split
+        images = HttpResource(f"ILSVRC2012_img_{name}.tar", sha256=self._IMAGES_CHECKSUMS[name])
 
         devkit = HttpResource(
             "ILSVRC2012_devkit_t12.tar.gz",
@@ -81,11 +85,11 @@ def _collate_train_data(self, data: Tuple[str, io.IOBase]) -> Tuple[Tuple[int, s
         label = self.categories.index(category)
         return (label, category, wnid), data
 
-    _VAL_IMAGE_NAME_PATTERN = re.compile(r"ILSVRC2012_val_(?P<id>\d{8})[.]JPEG")
+    _VAL_TEST_IMAGE_NAME_PATTERN = re.compile(r"ILSVRC2012_(val|test)_(?P<id>\d{8})[.]JPEG")
 
-    def _val_image_key(self, data: Tuple[str, Any]) -> int:
+    def _val_test_image_key(self, data: Tuple[str, Any]) -> int:
         path = pathlib.Path(data[0])
-        return int(self._VAL_IMAGE_NAME_PATTERN.match(path.name).group("id"))  # type: ignore[union-attr]
+        return int(self._VAL_TEST_IMAGE_NAME_PATTERN.match(path.name).group("id"))  # type: ignore[union-attr]
 
     def _collate_val_data(
         self, data: Tuple[Tuple[int, int], Tuple[str, io.IOBase]]
@@ -96,9 +100,12 @@ def _collate_val_data(
         wnid = self.category_to_wnid[category]
         return (label, category, wnid), image_data
 
+    def _collate_test_data(self, data: Tuple[str, io.IOBase]) -> Tuple[Tuple[None, None, None], Tuple[str, io.IOBase]]:
+        return (None, None, None), data
+
     def _collate_and_decode_sample(
         self,
-        data: Tuple[Tuple[int, str, str], Tuple[str, io.IOBase]],
+        data: Tuple[Tuple[Optional[int], Optional[str], Optional[str]], Tuple[str, io.IOBase]],
         *,
         decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
     ) -> Dict[str, Any]:
@@ -108,7 +115,7 @@ def _collate_and_decode_sample(
         return dict(
             path=path,
             image=decoder(buffer) if decoder else buffer,
-            label=torch.tensor(label),
+            label=label,
             category=category,
             wnid=wnid,
         )
@@ -129,7 +136,7 @@ def _make_datapipe(
             dp = TarArchiveReader(images_dp)
             dp = Shuffler(dp, buffer_size=INFINITE_BUFFER_SIZE)
             dp = Mapper(dp, self._collate_train_data)
-        else:
+        elif config.split == "val":
             devkit_dp = TarArchiveReader(devkit_dp)
             devkit_dp = Filter(devkit_dp, path_comparator("name", "ILSVRC2012_validation_ground_truth.txt"))
             devkit_dp = LineReader(devkit_dp, return_path=False)
@@ -141,10 +148,13 @@ def _make_datapipe(
                 devkit_dp,
                 images_dp,
                 key_fn=getitem(0),
-                ref_key_fn=self._val_image_key,
+                ref_key_fn=self._val_test_image_key,
                 buffer_size=INFINITE_BUFFER_SIZE,
             )
             dp = Mapper(dp, self._collate_val_data)
+        else:  # config.split == "test"
+            dp = Shuffler(images_dp, buffer_size=INFINITE_BUFFER_SIZE)
+            dp = Mapper(dp, self._collate_test_data)
 
         return Mapper(dp, self._collate_and_decode_sample, fn_kwargs=dict(decoder=decoder))
 
diff --git a/torchvision/prototype/models/detection/__init__.py b/torchvision/prototype/models/detection/__init__.py
@@ -3,3 +3,4 @@
 from .mask_rcnn import *
 from .retinanet import *
 from .ssd import *
+from .ssdlite import *
diff --git a/torchvision/prototype/models/detection/ssdlite.py b/torchvision/prototype/models/detection/ssdlite.py