diff --git a/torchtext/_download_hooks.py b/torchtext/_download_hooks.py index 4bfe7b5eed..611692b08a 100644 --- a/torchtext/_download_hooks.py +++ b/torchtext/_download_hooks.py @@ -3,6 +3,10 @@ from tqdm import tqdm # This is to allow monkey-patching in fbcode from torch.hub import load_state_dict_from_url # noqa +from torchtext._internal.module_utils import is_module_available + +if is_module_available("torchdata"): + from torchdata.datapipes.iter import HttpReader # noqa F401 def _stream_response(r, chunk_size=16 * 1024): diff --git a/torchtext/experimental/datasets/sst2.py b/torchtext/experimental/datasets/sst2.py index fa15b73304..6a25bd7d99 100644 --- a/torchtext/experimental/datasets/sst2.py +++ b/torchtext/experimental/datasets/sst2.py @@ -10,10 +10,11 @@ ) if is_module_available("torchdata"): - from torchdata.datapipes.iter import ( - HttpReader, - IterableWrapper, - ) + from torchdata.datapipes.iter import IterableWrapper + # we import HttpReader from _download_hooks so we can swap out public URLs + # with interal URLs when the dataset is used within Facebook + from torchtext._download_hooks import HttpReader + NUM_LINES = { "train": 67349,