pytorch · pmeier · Apr 1, 2022 · Apr 1, 2022 · Apr 1, 2022 · Apr 1, 2022
diff --git a/.circleci/config.yml b/.circleci/config.yml
diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
@@ -168,9 +168,9 @@ commands:
       file_or_dir:
         type: string
     steps:
-      - run:
-          name: Install test utilities
-          command: pip install --progress-bar=off pytest pytest-mock
+      - pip_install:
+          args: pytest pytest-mock
+          descr: Install test utilities
       - run:
           name: Run tests
           command: pytest --junitxml=test-results/junit.xml -v --durations 20 <<parameters.file_or_dir>>

diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py
@@ -1,12 +1,13 @@
 import functools
 import io
-import pickle
+import os
 from pathlib import Path
 
 import pytest
 import torch
 from builtin_dataset_mocks import parametrize_dataset_mocks, DATASET_MOCKS
 from torch.testing._comparison import assert_equal, TensorLikePair, ObjectPair
+from torch.utils.data.dataloader_experimental import DataLoader2
 from torch.utils.data.graph import traverse
 from torch.utils.data.graph_settings import get_all_graph_pipes
 from torchdata.datapipes.iter import IterDataPipe, Shuffler, ShardingFilter
@@ -116,13 +117,37 @@ def test_transformable(self, test_home, dataset_mock, config):
 
         next(iter(dataset.map(transforms.Identity())))
 
-    @parametrize_dataset_mocks(DATASET_MOCKS)
-    def test_serializable(self, test_home, dataset_mock, config):
+    @pytest.mark.parametrize("parallelism_mode", [None, "mp", "thread"])
+    @parametrize_dataset_mocks({name: mock for name, mock in DATASET_MOCKS.items() if name not in {"qmnist", "voc"}})
+    def test_pipeline(self, test_home, dataset_mock, config, parallelism_mode):
         dataset_mock.prepare(test_home, config)
-
         dataset = datasets.load(dataset_mock.name, **config)
 
-        pickle.dumps(dataset)
+        transform = transforms.Compose(transforms.DecodeImage(), transforms.Resize([3, 3]))
+
+        # TODO: add a .collate() here as soon as https://github.com/pytorch/vision/pull/5233 is resolved
+        dp = dataset.map(transform).batch(2, drop_last=parallelism_mode == "thread")
+
+        if parallelism_mode:
+            # Maybe we can make this is a static method of the data_loader?
+            try:
+                num_workers = len(os.sched_getaffinity(0))
+            except Exception:
+                num_workers = os.cpu_count() or 1
+        else:
+            num_workers = 0
+
+        dl = DataLoader2(
+            dp,
+            batch_size=None,
+            shuffle=True,
+            num_workers=num_workers,
+            parallelism_mode=parallelism_mode,
+            timeout=5 if num_workers > 0 else 0,
+        )
+
+        for _ in dl:
+            pass
 
     # TODO: we need to enforce not only that both a Shuffler and a ShardingFilter are part of the datapipe, but also
     #  that the Shuffler comes before the ShardingFilter. Early commits in https://github.com/pytorch/vision/pull/5680