pytorch · NicolasHug · Mar 22, 2024 · Mar 22, 2024 · Mar 22, 2024 · Mar 22, 2024
diff --git a/torchtext/datasets/ag_news.py b/torchtext/datasets/ag_news.py
@@ -2,8 +2,6 @@
 from functools import partial
 from typing import Union, Tuple
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import HttpReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _wrap_split_argument,
@@ -65,6 +63,7 @@ def AG_NEWS(root: str, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL[split]])
     cache_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/amazonreviewfull.py b/torchtext/datasets/amazonreviewfull.py
@@ -2,8 +2,6 @@
 from functools import partial
 from typing import Union, Tuple
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import GDriveReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _wrap_split_argument,
@@ -79,6 +77,7 @@ def AmazonReviewFull(root: str, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL])
     cache_compressed_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/amazonreviewpolarity.py b/torchtext/datasets/amazonreviewpolarity.py
@@ -2,8 +2,6 @@
 from functools import partial
 from typing import Union, Tuple
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import GDriveReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _wrap_split_argument,
@@ -76,6 +74,7 @@ def AmazonReviewPolarity(root: str, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL])
     cache_compressed_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/cc100.py b/torchtext/datasets/cc100.py
@@ -1,8 +1,7 @@
 import os.path
 from functools import partial
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import HttpReader
+from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _create_dataset_directory,
 )
@@ -167,6 +166,11 @@ def CC100(root: str, language_code: str = "en"):
     """
     if language_code not in VALID_CODES:
         raise ValueError(f"Invalid language code {language_code}")
+    if not is_module_available("torchdata"):
+        raise ModuleNotFoundError(
+            "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
+        )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url = URL % language_code
     url_dp = IterableWrapper([url])

diff --git a/torchtext/datasets/cnndm.py b/torchtext/datasets/cnndm.py
@@ -3,12 +3,6 @@
 from functools import partial
 from typing import Union, Set, Tuple
 
-from torchdata.datapipes.iter import (
-    FileOpener,
-    IterableWrapper,
-    OnlineReader,
-    GDriveReader,
-)
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _wrap_split_argument,
@@ -141,6 +135,12 @@ def CNNDM(root: str, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import (  # noqa
+        FileOpener,
+        IterableWrapper,
+        OnlineReader,
+        GDriveReader,
+    )
 
     cnn_dp = _load_stories(root, "cnn", split)
     dailymail_dp = _load_stories(root, "dailymail", split)

diff --git a/torchtext/datasets/cola.py b/torchtext/datasets/cola.py
@@ -3,8 +3,6 @@
 from functools import partial
 from typing import Union, Tuple
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import HttpReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import _create_dataset_directory, _wrap_split_argument
 
@@ -76,6 +74,7 @@ def CoLA(root: str, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL])
     cache_compressed_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/conll2000chunking.py b/torchtext/datasets/conll2000chunking.py
@@ -2,8 +2,6 @@
 from functools import partial
 from typing import Union, Tuple
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import HttpReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _wrap_split_argument,
@@ -68,6 +66,7 @@ def CoNLL2000Chunking(root: str, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL[split]])
 

diff --git a/torchtext/datasets/dbpedia.py b/torchtext/datasets/dbpedia.py
@@ -2,8 +2,6 @@
 from functools import partial
 from typing import Union, Tuple
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import GDriveReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _wrap_split_argument,
@@ -75,6 +73,7 @@ def DBpedia(root: str, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL])
     cache_compressed_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/enwik9.py b/torchtext/datasets/enwik9.py
@@ -1,8 +1,6 @@
 import os
 from functools import partial
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import HttpReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import _create_dataset_directory
 
@@ -50,6 +48,7 @@ def EnWik9(root: str):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL])
     cache_compressed_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/imdb.py b/torchtext/datasets/imdb.py
@@ -3,8 +3,6 @@
 from pathlib import Path
 from typing import Tuple, Union
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import HttpReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import _create_dataset_directory
 from torchtext.data.datasets_utils import _wrap_split_argument
@@ -89,6 +87,7 @@ def IMDB(root: str, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL])
 

diff --git a/torchtext/datasets/iwslt2016.py b/torchtext/datasets/iwslt2016.py
@@ -1,8 +1,6 @@
 import os
 from functools import partial
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import GDriveReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _clean_files,
@@ -219,6 +217,7 @@ def IWSLT2016(
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     if not isinstance(language_pair, list) and not isinstance(language_pair, tuple):
         raise ValueError("language_pair must be list or tuple but got {} instead".format(type(language_pair)))

diff --git a/torchtext/datasets/iwslt2017.py b/torchtext/datasets/iwslt2017.py
@@ -1,8 +1,6 @@
 import os
 from functools import partial
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import GDriveReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _clean_files,
@@ -184,6 +182,7 @@ def IWSLT2017(root=".data", split=("train", "valid", "test"), language_pair=("de
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     valid_set = "dev2010"
     test_set = "tst2010"

diff --git a/torchtext/datasets/mnli.py b/torchtext/datasets/mnli.py
@@ -3,11 +3,9 @@
 import os
 from functools import partial
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-
 # we import HttpReader from _download_hooks so we can swap out public URLs
 # with interal URLs when the dataset is used within Facebook
-from torchtext._download_hooks import HttpReader
+
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _create_dataset_directory,
@@ -89,6 +87,7 @@ def MNLI(root, split):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL])
     cache_compressed_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/mrpc.py b/torchtext/datasets/mrpc.py
@@ -3,7 +3,6 @@
 from functools import partial
 from typing import Union, Tuple
 
-from torchdata.datapipes.iter import FileOpener, HttpReader, IterableWrapper
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _wrap_split_argument,
@@ -67,6 +66,7 @@ def MRPC(root: str, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL[split]])
     # cache data on-disk with sanity check

diff --git a/torchtext/datasets/multi30k.py b/torchtext/datasets/multi30k.py
@@ -2,9 +2,8 @@
 from functools import partial
 from typing import Union, Tuple
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import GDriveReader  # noqa
-from torchtext._download_hooks import HttpReader
+# noqa
+
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _wrap_split_argument,
@@ -89,6 +88,7 @@ def Multi30k(root: str, split: Union[Tuple[str], str], language_pair: Tuple[str]
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL[split]])
 

diff --git a/torchtext/datasets/penntreebank.py b/torchtext/datasets/penntreebank.py
@@ -2,9 +2,8 @@
 from functools import partial
 from typing import Tuple, Union
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import GDriveReader  # noqa
-from torchtext._download_hooks import HttpReader
+# noqa
+
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _wrap_split_argument,
@@ -70,6 +69,7 @@ def PennTreebank(root, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL[split]])
     cache_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/qnli.py b/torchtext/datasets/qnli.py
@@ -3,11 +3,9 @@
 import os
 from functools import partial
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-
 # we import HttpReader from _download_hooks so we can swap out public URLs
 # with interal URLs when the dataset is used within Facebook
-from torchtext._download_hooks import HttpReader
+
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _create_dataset_directory,
@@ -81,6 +79,7 @@ def QNLI(root, split):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at `https://github.com/pytorch/data`"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL])
     cache_compressed_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/qqp.py b/torchtext/datasets/qqp.py
@@ -1,8 +1,6 @@
 import os
 from functools import partial
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import HttpReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import _create_dataset_directory
 
@@ -48,6 +46,7 @@ def QQP(root: str):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL])
     cache_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/rte.py b/torchtext/datasets/rte.py
@@ -3,11 +3,9 @@
 import os
 from functools import partial
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-
 # we import HttpReader from _download_hooks so we can swap out public URLs
 # with interal URLs when the dataset is used within Facebook
-from torchtext._download_hooks import HttpReader
+
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _create_dataset_directory,
@@ -81,6 +79,7 @@ def RTE(root, split):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at `https://github.com/pytorch/data`"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL])
     cache_compressed_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/sogounews.py b/torchtext/datasets/sogounews.py
@@ -2,8 +2,6 @@
 from functools import partial
 from typing import Union, Tuple
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import GDriveReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _wrap_split_argument,
@@ -79,6 +77,7 @@ def SogouNews(root: str, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL])
     cache_compressed_dp = url_dp.on_disk_cache(

diff --git a/torchtext/datasets/squad1.py b/torchtext/datasets/squad1.py
@@ -2,8 +2,6 @@
 from functools import partial
 from typing import Union, Tuple
 
-from torchdata.datapipes.iter import FileOpener, IterableWrapper
-from torchtext._download_hooks import HttpReader
 from torchtext._internal.module_utils import is_module_available
 from torchtext.data.datasets_utils import (
     _wrap_split_argument,
@@ -62,6 +60,7 @@ def SQuAD1(root: str, split: Union[Tuple[str], str]):
         raise ModuleNotFoundError(
             "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data"
         )
+    from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper  # noqa
 
     url_dp = IterableWrapper([URL[split]])
     # cache data on-disk with sanity check