diff --git a/.circleci/unittest/linux/scripts/install.sh b/.circleci/unittest/linux/scripts/install.sh index b5043d6065..fa56e74f7d 100755 --- a/.circleci/unittest/linux/scripts/install.sh +++ b/.circleci/unittest/linux/scripts/install.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash unset PYTORCH_VERSION -unset TORCHDATA_VERSION # For unittest, nightly PyTorch is used as the following section, # so no need to set PYTORCH_VERSION. # In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. @@ -30,10 +29,6 @@ printf "* Installing PyTorch\n" ) -printf "Installing torchdata nightly with portalocker\n" -pip install "portalocker>=2.0.0" -pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu - printf "* Installing torchtext\n" python setup.py develop diff --git a/.circleci/unittest/windows/scripts/install.sh b/.circleci/unittest/windows/scripts/install.sh index 9ce0558fcd..7eb4810408 100644 --- a/.circleci/unittest/windows/scripts/install.sh +++ b/.circleci/unittest/windows/scripts/install.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash unset PYTORCH_VERSION -unset TORCHDATA_VERSION # For unittest, nightly PyTorch is used as the following section, # so no need to set PYTORCH_VERSION. # In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. @@ -19,10 +18,6 @@ conda activate ./env printf "* Installing PyTorch\n" conda install -y -c "pytorch-${UPLOAD_CHANNEL}" ${CONDA_CHANNEL_FLAGS} pytorch cpuonly -printf "* Installing torchdata nightly with portalocker\n" -pip install "portalocker>=2.0.0" -pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu - printf "* Installing pywin32_postinstall script\n" curl --output pywin32_postinstall.py https://raw.githubusercontent.com/mhammond/pywin32/main/pywin32_postinstall.py python pywin32_postinstall.py -install diff --git a/.github/workflows/build-conda-linux.yml b/.github/workflows/build-conda-linux.yml index 31040eae4c..17da26d65b 100644 --- a/.github/workflows/build-conda-linux.yml +++ b/.github/workflows/build-conda-linux.yml @@ -29,7 +29,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" post-script: "" conda-package-directory: packaging/torchtext smoke-test-script: test/smoke_tests/smoke_tests.py diff --git a/.github/workflows/build-conda-m1.yml b/.github/workflows/build-conda-m1.yml index 2865d7e8a5..61c06ced49 100644 --- a/.github/workflows/build-conda-m1.yml +++ b/.github/workflows/build-conda-m1.yml @@ -28,7 +28,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" post-script: "" conda-package-directory: packaging/torchtext smoke-test-script: test/smoke_tests/smoke_tests.py diff --git a/.github/workflows/build-conda-windows.yml b/.github/workflows/build-conda-windows.yml index 14be76ed1b..83583932ae 100644 --- a/.github/workflows/build-conda-windows.yml +++ b/.github/workflows/build-conda-windows.yml @@ -29,7 +29,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" post-script: "" conda-package-directory: packaging/torchtext smoke-test-script: test/smoke_tests/smoke_tests.py diff --git a/.github/workflows/build-wheels-linux.yml b/.github/workflows/build-wheels-linux.yml index dcad9d50be..c3229bf610 100644 --- a/.github/workflows/build-wheels-linux.yml +++ b/.github/workflows/build-wheels-linux.yml @@ -34,7 +34,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" post-script: "" smoke-test-script: test/smoke_tests/smoke_tests.py package-name: torchtext diff --git a/.github/workflows/build-wheels-m1.yml b/.github/workflows/build-wheels-m1.yml index f68288b0af..b6cd5d821d 100644 --- a/.github/workflows/build-wheels-m1.yml +++ b/.github/workflows/build-wheels-m1.yml @@ -32,7 +32,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" post-script: "" package-name: torchtext smoke-test-script: test/smoke_tests/smoke_tests.py diff --git a/.github/workflows/build-wheels-windows.yml b/.github/workflows/build-wheels-windows.yml index dbb24314bd..a8e635a67a 100644 --- a/.github/workflows/build-wheels-windows.yml +++ b/.github/workflows/build-wheels-windows.yml @@ -33,7 +33,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" env-script: packaging/vc_env_helper.bat post-script: "" smoke-test-script: test/smoke_tests/smoke_tests.py diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index b6af768134..8e6163288c 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -31,7 +31,6 @@ jobs: - name: Install Torch run: | python -m pip install cmake - python -m pip install --quiet --pre torch torchdata -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html sudo ln -s /usr/bin/ninja /usr/bin/ninja-build - name: Build TorchText diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index e9a3816f50..664cafb5b8 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -39,15 +39,13 @@ jobs: python -m spacy download en_core_web_sm printf "* Downloading SpaCy German models\n" python -m spacy download de_core_news_sm - # Install PyTorch, Torchvision, and TorchData + # Install PyTorch, Torchvision set -ex conda install \ --yes \ -c "pytorch-${CHANNEL}" \ -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ "${CUDATOOLKIT}" - printf "Installing torchdata nightly\n" - python3 -m pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu python3 setup.py develop # Install integration test dependencies python3 -m pip --quiet install parameterized diff --git a/.github/workflows/test-linux-cpu.yml b/.github/workflows/test-linux-cpu.yml index 5ef752324a..ebb8196f83 100644 --- a/.github/workflows/test-linux-cpu.yml +++ b/.github/workflows/test-linux-cpu.yml @@ -50,16 +50,13 @@ jobs: printf "* Downloading SpaCy German models\n" python -m spacy download de_core_news_sm - # Install PyTorch, Torchvision, and TorchData + # Install PyTorch, Torchvision set -ex conda install \ --yes \ -c "pytorch-${CHANNEL}" \ -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ "${CUDATOOLKIT}" - printf "Installing torchdata nightly\n" - python3 -m pip install "portalocker>=2.0.0" - python3 -m pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu python3 setup.py develop python3 -m pip install parameterized diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml index 40e25dbc0f..ffad146815 100644 --- a/.github/workflows/test-linux-gpu.yml +++ b/.github/workflows/test-linux-gpu.yml @@ -54,7 +54,7 @@ jobs: printf "* Downloading SpaCy German models\n" python -m spacy download de_core_news_sm - # Install PyTorch and TorchData + # Install PyTorch set -ex conda install \ --yes \ @@ -62,9 +62,6 @@ jobs: -c "pytorch-${CHANNEL}" \ -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ "${CUDATOOLKIT}" - printf "Installing torchdata nightly\n" - python3 -m pip install "portalocker>=2.0.0" - python3 -m pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu --quiet python3 setup.py develop python3 -m pip install parameterized --quiet diff --git a/.github/workflows/test-macos-cpu.yml b/.github/workflows/test-macos-cpu.yml index fdec5f5d20..f6250d10a7 100644 --- a/.github/workflows/test-macos-cpu.yml +++ b/.github/workflows/test-macos-cpu.yml @@ -55,7 +55,7 @@ jobs: printf "* Downloading SpaCy German models\n" python -m spacy download de_core_news_sm - # Install PyTorch, Torchvision, and TorchData + # Install PyTorch, Torchvision set -ex conda install \ --yes \ @@ -64,9 +64,6 @@ jobs: "${MKL_CONSTRAINT}" \ pytorch \ "${CUDATOOLKIT}" - printf "Installing torchdata nightly\n" - python3 -m pip install "portalocker>=2.0.0" - python3 -m pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu python3 setup.py develop python3 -m pip install parameterized diff --git a/.github/workflows/test-windows-cpu.yml b/.github/workflows/test-windows-cpu.yml index 6ecfc79415..1315cb6e13 100644 --- a/.github/workflows/test-windows-cpu.yml +++ b/.github/workflows/test-windows-cpu.yml @@ -51,15 +51,12 @@ jobs: printf "* Downloading SpaCy German models\n" python -m spacy download de_core_news_sm - # Install PyTorch, Torchvision, and TorchData + # Install PyTorch, Torchvision conda install \ --yes \ -c "pytorch-${CHANNEL}" \ pytorch \ cpuonly - printf "Installing torchdata nightly\n" - python -m pip install "portalocker>=2.0.0" - python -m pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu printf "* Installing pywin32_postinstall script\n" curl --output pywin32_postinstall.py https://raw.githubusercontent.com/mhammond/pywin32/main/pywin32_postinstall.py diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml index 5c33e65a44..597f92f29f 100644 --- a/.github/workflows/validate-binaries.yml +++ b/.github/workflows/validate-binaries.yml @@ -43,6 +43,11 @@ on: default: "" required: false type: string + pytorch_version: + description: "PyTorch version to validate (ie. 2.0, 2.2.2, etc.) - optional" + default: "" + required: false + type: string jobs: validate-binaries: uses: pytorch/test-infra/.github/workflows/validate-domain-library.yml@release/2.2 diff --git a/README.rst b/README.rst index 784731e693..a31853f769 100644 --- a/README.rst +++ b/README.rst @@ -12,6 +12,9 @@ torchtext +++++++++ +CAUTION: As of September 2023 we have paused active development of TorchText because our focus has shifted away from building out this library offering. +We will continue to release new versions but do not anticipate any new feature development as we figure out future investments in this space. + This repository consists of: * `torchtext.datasets `_: The raw text iterators for common NLP datasets diff --git a/packaging/install_torchdata.sh b/packaging/install_torchdata.sh deleted file mode 100755 index 7db52358a3..0000000000 --- a/packaging/install_torchdata.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -package_type="$PACKAGE_TYPE" -channel="$CHANNEL" -if [ -z "$package_type" ]; then - package_type="wheel" -fi -if [ -z "$channel" ]; then - channel="nightly" -fi - -# Wrong values -if [ "$package_type" != "wheel" ] && [ "$package_type" != "conda" ]; then - exit 1 -fi -if [ "$channel" != "nightly" ] && [ "$channel" != "test" ]; then - exit 1 -fi - - -if [ "$package_type" = "wheel" ]; then - install_cmd="pip install" - if [ "$channel" = "nightly" ]; then - install_cmd="${install_cmd} --pre" - fi - install_channel="--index-url https://download.pytorch.org/whl/${channel}/cpu" -else - install_cmd="conda install" - install_channel="-c pytorch-${channel}" -fi - -$install_cmd torchdata $install_channel - -if [ "$package_type" = "wheel" ]; then - TORCHDATA_VERSION="$(pip show torchdata | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" -else - TORCHDATA_VERSION="$(conda list -fe torchdata | grep torchdata | sed -e 's/torchdata=\(.*\)=py.*/\1/')" - echo "export CONDA_TORCHDATA_CONSTRAINT='- torchdata==${TORCHDATA_VERSION}'" >> "${BUILD_ENV_FILE}" -fi - -echo "export TORCHDATA_VERSION=${TORCHDATA_VERSION}" >> "${BUILD_ENV_FILE}" diff --git a/packaging/pkg_helpers.bash b/packaging/pkg_helpers.bash index 5c45e8937a..221e1d639a 100644 --- a/packaging/pkg_helpers.bash +++ b/packaging/pkg_helpers.bash @@ -190,14 +190,6 @@ setup_pip_pytorch_version() { -f https://download.pytorch.org/whl/torch_stable.html \ -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/torch_${UPLOAD_CHANNEL}.html" fi - if [[ -z "$TORCHDATA_VERSION" ]]; then - pip_install --pre torchdata -f "https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html" - export TORCHDATA_VERSION="$(pip show torchdata | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" - else - pip_install "torchdata==$TORCHDATA_VERSION" \ - -f https://download.pytorch.org/whl/torch_stable.html \ - -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/torch_${UPLOAD_CHANNEL}.html" - fi } # Fill PYTORCH_VERSION with the latest conda nightly version, and @@ -232,10 +224,6 @@ setup_conda_pytorch_constraint() { export CONDA_EXTRA_BUILD_CONSTRAINT="- mkl<=2021.2.0" fi fi - if [[ -z "$TORCHDATA_VERSION" ]]; then - export TORCHDATA_VERSION="$(conda search --json 'torchdata[channel=pytorch-nightly]' | ${PYTHON} -c "import sys, json, re; print(re.sub(r'\\+.*$', '', json.load(sys.stdin)['torchdata'][-1]['version']))")" - fi - export CONDA_TORCHDATA_CONSTRAINT="- torchdata==$TORCHDATA_VERSION" } # Translate CUDA_VERSION into CUDA_CUDATOOLKIT_CONSTRAINT diff --git a/packaging/torchtext/meta.yaml b/packaging/torchtext/meta.yaml index 03221505e5..9d7502200d 100644 --- a/packaging/torchtext/meta.yaml +++ b/packaging/torchtext/meta.yaml @@ -24,7 +24,6 @@ requirements: - requests - tqdm {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} - {{ environ.get('CONDA_TORCHDATA_CONSTRAINT') }} build: string: py{{py}} diff --git a/pytest.ini b/pytest.ini index c7ba710bd7..b9bb2d26ca 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,5 @@ [pytest] +addopts = --ignore-glob=test/torchtext_unittest/datasets/* testpaths = test/ python_paths = ./ markers = diff --git a/requirements.txt b/requirements.txt index cbc13eefbf..079025ca62 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,6 @@ Sphinx pytest expecttest parameterized -torchdata>0.5 # Lets pytest find our code by automatically modifying PYTHONPATH pytest-pythonpath diff --git a/setup.py b/setup.py index d008cb9c90..a3fb2707c3 100644 --- a/setup.py +++ b/setup.py @@ -63,14 +63,10 @@ def _init_submodule(): print("-- Building version " + VERSION) pytorch_package_version = os.getenv("PYTORCH_VERSION") -torchdata_package_version = os.getenv("TORCHDATA_VERSION") pytorch_package_dep = "torch" if pytorch_package_version is not None: pytorch_package_dep += "==" + pytorch_package_version -torchdata_package_dep = "torchdata" -if torchdata_package_version is not None: - torchdata_package_dep += "==" + torchdata_package_version class clean(distutils.command.clean.clean): @@ -104,7 +100,7 @@ def run(self): description="Text utilities, models, transforms, and datasets for PyTorch.", long_description=read("README.rst"), license="BSD", - install_requires=["tqdm", "requests", pytorch_package_dep, "numpy", torchdata_package_dep], + install_requires=["tqdm", "requests", pytorch_package_dep, "numpy"], python_requires=">=3.8", classifiers=[ "Programming Language :: Python :: 3.8", diff --git a/test/smoke_tests/smoke_tests.py b/test/smoke_tests/smoke_tests.py index 2fbaeec5ec..58d579716a 100644 --- a/test/smoke_tests/smoke_tests.py +++ b/test/smoke_tests/smoke_tests.py @@ -1,28 +1,6 @@ """Run smoke tests""" -import os -import re - -import torchdata import torchtext -import torchtext.version # noqa: F401 - -NIGHTLY_ALLOWED_DELTA = 3 -channel = os.getenv("MATRIX_CHANNEL") - - -def validateTorchdataVersion(): - from datetime import datetime - - date_t_str = re.findall(r"dev\d+", torchdata.__version__)[0] - date_t_delta = datetime.now() - datetime.strptime(date_t_str[3:], "%Y%m%d") - - if date_t_delta.days >= NIGHTLY_ALLOWED_DELTA: - raise RuntimeError(f"torchdata binary {torchdata.__version__} is more than {NIGHTLY_ALLOWED_DELTA} days old!") - -if channel == "nightly": - validateTorchdataVersion() print("torchtext version is ", torchtext.__version__) -print("torchdata version is ", torchdata.__version__) diff --git a/torchtext/_download_hooks.py b/torchtext/_download_hooks.py index 89baafafa5..f7a236482b 100644 --- a/torchtext/_download_hooks.py +++ b/torchtext/_download_hooks.py @@ -4,7 +4,6 @@ # This is to allow monkey-patching in fbcode from torch.hub import load_state_dict_from_url # noqa -from torchdata.datapipes.iter import HttpReader, GDriveReader # noqa F401 from tqdm import tqdm diff --git a/torchtext/datasets/ag_news.py b/torchtext/datasets/ag_news.py index 5f1c7741f6..93f398329c 100644 --- a/torchtext/datasets/ag_news.py +++ b/torchtext/datasets/ag_news.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -65,6 +63,7 @@ def AG_NEWS(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) cache_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/amazonreviewfull.py b/torchtext/datasets/amazonreviewfull.py index 06e688279a..c916d2e034 100644 --- a/torchtext/datasets/amazonreviewfull.py +++ b/torchtext/datasets/amazonreviewfull.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -79,6 +77,7 @@ def AmazonReviewFull(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/amazonreviewpolarity.py b/torchtext/datasets/amazonreviewpolarity.py index 9616dc1d9e..a0ed0c6c40 100644 --- a/torchtext/datasets/amazonreviewpolarity.py +++ b/torchtext/datasets/amazonreviewpolarity.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -76,6 +74,7 @@ def AmazonReviewPolarity(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/cc100.py b/torchtext/datasets/cc100.py index 4ce2e92dd8..0f7cf2920f 100644 --- a/torchtext/datasets/cc100.py +++ b/torchtext/datasets/cc100.py @@ -1,8 +1,7 @@ import os.path from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader +from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, ) @@ -167,6 +166,11 @@ def CC100(root: str, language_code: str = "en"): """ if language_code not in VALID_CODES: raise ValueError(f"Invalid language code {language_code}") + if not is_module_available("torchdata"): + raise ModuleNotFoundError( + "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" + ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url = URL % language_code url_dp = IterableWrapper([url]) diff --git a/torchtext/datasets/cnndm.py b/torchtext/datasets/cnndm.py index 2adba04fd1..92b2da8ce1 100644 --- a/torchtext/datasets/cnndm.py +++ b/torchtext/datasets/cnndm.py @@ -3,12 +3,6 @@ from functools import partial from typing import Union, Set, Tuple -from torchdata.datapipes.iter import ( - FileOpener, - IterableWrapper, - OnlineReader, - GDriveReader, -) from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -141,6 +135,12 @@ def CNNDM(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import ( # noqa + FileOpener, + IterableWrapper, + OnlineReader, + GDriveReader, + ) cnn_dp = _load_stories(root, "cnn", split) dailymail_dp = _load_stories(root, "dailymail", split) diff --git a/torchtext/datasets/cola.py b/torchtext/datasets/cola.py index 214c435d03..6ec6cd8b29 100644 --- a/torchtext/datasets/cola.py +++ b/torchtext/datasets/cola.py @@ -3,8 +3,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory, _wrap_split_argument @@ -76,6 +74,7 @@ def CoLA(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/conll2000chunking.py b/torchtext/datasets/conll2000chunking.py index acbd9cbd0c..983059faf1 100644 --- a/torchtext/datasets/conll2000chunking.py +++ b/torchtext/datasets/conll2000chunking.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -68,6 +66,7 @@ def CoNLL2000Chunking(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) diff --git a/torchtext/datasets/dbpedia.py b/torchtext/datasets/dbpedia.py index be86f1a98c..d563f965cb 100644 --- a/torchtext/datasets/dbpedia.py +++ b/torchtext/datasets/dbpedia.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -75,6 +73,7 @@ def DBpedia(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/enwik9.py b/torchtext/datasets/enwik9.py index cbd5e647a7..8b30cc4da8 100644 --- a/torchtext/datasets/enwik9.py +++ b/torchtext/datasets/enwik9.py @@ -1,8 +1,6 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory @@ -50,6 +48,7 @@ def EnWik9(root: str): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/imdb.py b/torchtext/datasets/imdb.py index 09fba57b04..cefedc4bf0 100644 --- a/torchtext/datasets/imdb.py +++ b/torchtext/datasets/imdb.py @@ -3,8 +3,6 @@ from pathlib import Path from typing import Tuple, Union -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory from torchtext.data.datasets_utils import _wrap_split_argument @@ -89,6 +87,7 @@ def IMDB(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) diff --git a/torchtext/datasets/iwslt2016.py b/torchtext/datasets/iwslt2016.py index dd4b806e8c..f1a05dcaea 100644 --- a/torchtext/datasets/iwslt2016.py +++ b/torchtext/datasets/iwslt2016.py @@ -1,8 +1,6 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _clean_files, @@ -219,6 +217,7 @@ def IWSLT2016( raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa if not isinstance(language_pair, list) and not isinstance(language_pair, tuple): raise ValueError("language_pair must be list or tuple but got {} instead".format(type(language_pair))) diff --git a/torchtext/datasets/iwslt2017.py b/torchtext/datasets/iwslt2017.py index 4767218bd7..ff95b37510 100644 --- a/torchtext/datasets/iwslt2017.py +++ b/torchtext/datasets/iwslt2017.py @@ -1,8 +1,6 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _clean_files, @@ -184,6 +182,7 @@ def IWSLT2017(root=".data", split=("train", "valid", "test"), language_pair=("de raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa valid_set = "dev2010" test_set = "tst2010" diff --git a/torchtext/datasets/mnli.py b/torchtext/datasets/mnli.py index f4335c5ccf..def9354b53 100644 --- a/torchtext/datasets/mnli.py +++ b/torchtext/datasets/mnli.py @@ -3,11 +3,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -89,6 +87,7 @@ def MNLI(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/mrpc.py b/torchtext/datasets/mrpc.py index e9abea1721..c3e6f72a91 100644 --- a/torchtext/datasets/mrpc.py +++ b/torchtext/datasets/mrpc.py @@ -3,7 +3,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, HttpReader, IterableWrapper from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -67,6 +66,7 @@ def MRPC(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) # cache data on-disk with sanity check diff --git a/torchtext/datasets/multi30k.py b/torchtext/datasets/multi30k.py index ea1c2015ae..db666bfda9 100644 --- a/torchtext/datasets/multi30k.py +++ b/torchtext/datasets/multi30k.py @@ -2,9 +2,8 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader # noqa -from torchtext._download_hooks import HttpReader +# noqa + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -89,6 +88,7 @@ def Multi30k(root: str, split: Union[Tuple[str], str], language_pair: Tuple[str] raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) diff --git a/torchtext/datasets/penntreebank.py b/torchtext/datasets/penntreebank.py index 1e0d9f295f..a7f504b9a4 100644 --- a/torchtext/datasets/penntreebank.py +++ b/torchtext/datasets/penntreebank.py @@ -2,9 +2,8 @@ from functools import partial from typing import Tuple, Union -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader # noqa -from torchtext._download_hooks import HttpReader +# noqa + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -70,6 +69,7 @@ def PennTreebank(root, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) cache_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/qnli.py b/torchtext/datasets/qnli.py index aa71eeb208..cbdca8fbc4 100644 --- a/torchtext/datasets/qnli.py +++ b/torchtext/datasets/qnli.py @@ -3,11 +3,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -81,6 +79,7 @@ def QNLI(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at `https://github.com/pytorch/data`" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/qqp.py b/torchtext/datasets/qqp.py index 013a6a82a8..887675cfde 100644 --- a/torchtext/datasets/qqp.py +++ b/torchtext/datasets/qqp.py @@ -1,8 +1,6 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory @@ -48,6 +46,7 @@ def QQP(root: str): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/rte.py b/torchtext/datasets/rte.py index 06355468ae..61915a1790 100644 --- a/torchtext/datasets/rte.py +++ b/torchtext/datasets/rte.py @@ -3,11 +3,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -81,6 +79,7 @@ def RTE(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at `https://github.com/pytorch/data`" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/sogounews.py b/torchtext/datasets/sogounews.py index 80c7c9af9a..440e811ce4 100644 --- a/torchtext/datasets/sogounews.py +++ b/torchtext/datasets/sogounews.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -79,6 +77,7 @@ def SogouNews(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/squad1.py b/torchtext/datasets/squad1.py index 5c83bcdec2..0949eb103c 100644 --- a/torchtext/datasets/squad1.py +++ b/torchtext/datasets/squad1.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -62,6 +60,7 @@ def SQuAD1(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) # cache data on-disk with sanity check diff --git a/torchtext/datasets/squad2.py b/torchtext/datasets/squad2.py index 48ef86556c..0ad1e25ac1 100644 --- a/torchtext/datasets/squad2.py +++ b/torchtext/datasets/squad2.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -63,6 +61,7 @@ def SQuAD2(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) # cache data on-disk with sanity check diff --git a/torchtext/datasets/sst2.py b/torchtext/datasets/sst2.py index 132b22d68d..a14cf45709 100644 --- a/torchtext/datasets/sst2.py +++ b/torchtext/datasets/sst2.py @@ -2,11 +2,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -86,6 +84,7 @@ def SST2(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/stsb.py b/torchtext/datasets/stsb.py index 324ed77245..1f66bf5279 100644 --- a/torchtext/datasets/stsb.py +++ b/torchtext/datasets/stsb.py @@ -2,11 +2,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -82,6 +80,7 @@ def STSB(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/udpos.py b/torchtext/datasets/udpos.py index 3c7b76b124..c6ee494dae 100644 --- a/torchtext/datasets/udpos.py +++ b/torchtext/datasets/udpos.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -66,6 +64,7 @@ def UDPOS(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/wikitext103.py b/torchtext/datasets/wikitext103.py index 0914d708e9..6baff13ad6 100644 --- a/torchtext/datasets/wikitext103.py +++ b/torchtext/datasets/wikitext103.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -71,6 +69,7 @@ def WikiText103(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) # cache data on-disk diff --git a/torchtext/datasets/wikitext2.py b/torchtext/datasets/wikitext2.py index ec686b94cd..94e90f2031 100644 --- a/torchtext/datasets/wikitext2.py +++ b/torchtext/datasets/wikitext2.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -71,6 +69,7 @@ def WikiText2(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) # cache data on-disk diff --git a/torchtext/datasets/wnli.py b/torchtext/datasets/wnli.py index c864275899..f4574d5e4e 100644 --- a/torchtext/datasets/wnli.py +++ b/torchtext/datasets/wnli.py @@ -2,11 +2,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -78,6 +76,7 @@ def WNLI(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at `https://github.com/pytorch/data`" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/yahooanswers.py b/torchtext/datasets/yahooanswers.py index 9fad10ff1d..da357977cb 100644 --- a/torchtext/datasets/yahooanswers.py +++ b/torchtext/datasets/yahooanswers.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -75,6 +73,7 @@ def YahooAnswers(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) diff --git a/torchtext/datasets/yelpreviewfull.py b/torchtext/datasets/yelpreviewfull.py index 1272dae45c..7bea8f1211 100644 --- a/torchtext/datasets/yelpreviewfull.py +++ b/torchtext/datasets/yelpreviewfull.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -74,6 +72,7 @@ def YelpReviewFull(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) diff --git a/torchtext/datasets/yelpreviewpolarity.py b/torchtext/datasets/yelpreviewpolarity.py index 90e1e31e59..08559f0c68 100644 --- a/torchtext/datasets/yelpreviewpolarity.py +++ b/torchtext/datasets/yelpreviewpolarity.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -74,6 +72,7 @@ def YelpReviewPolarity(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL])