diff --git a/.circleci/unittest/linux/scripts/environment.yml b/.circleci/unittest/linux/scripts/environment.yml index 9400308196..1248909f6e 100644 --- a/.circleci/unittest/linux/scripts/environment.yml +++ b/.circleci/unittest/linux/scripts/environment.yml @@ -5,6 +5,7 @@ dependencies: - pip - pip: - dataclasses + - iopath - nltk - requests - revtok diff --git a/.circleci/unittest/windows/scripts/environment.yml b/.circleci/unittest/windows/scripts/environment.yml index b86ded6ada..66cedf0136 100644 --- a/.circleci/unittest/windows/scripts/environment.yml +++ b/.circleci/unittest/windows/scripts/environment.yml @@ -7,6 +7,7 @@ dependencies: - spacy - pip: - dataclasses + - iopath - nltk - requests - revtok diff --git a/packaging/torchtext/meta.yaml b/packaging/torchtext/meta.yaml index 9d7502200d..0534f35939 100644 --- a/packaging/torchtext/meta.yaml +++ b/packaging/torchtext/meta.yaml @@ -23,6 +23,7 @@ requirements: - python - requests - tqdm + - iopath {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} build: diff --git a/requirements.txt b/requirements.txt index 245ececedc..e28b300aa3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ tqdm # Downloading data and other files requests +iopath # Optional NLP tools nltk diff --git a/setup.py b/setup.py index d8afa7a550..cb011412ab 100644 --- a/setup.py +++ b/setup.py @@ -100,7 +100,7 @@ def run(self): description="Text utilities and datasets for PyTorch", long_description=read("README.rst"), license="BSD", - install_requires=["tqdm", "requests", pytorch_package_dep, "numpy"], + install_requires=["tqdm", "requests", pytorch_package_dep, "numpy", "iopath"], python_requires=">=3.7", classifiers=[ "Programming Language :: Python :: 3.7", diff --git a/torchtext/utils.py b/torchtext/utils.py index a7910b222f..c930d3b9a0 100644 --- a/torchtext/utils.py +++ b/torchtext/utils.py @@ -6,6 +6,7 @@ import zipfile import torch +from iopath.common.file_io import PathManager from torchtext import _CACHE_DIR from ._download_hooks import _DATASET_DOWNLOAD_MANAGER @@ -228,3 +229,16 @@ def get_asset_local_path(asset_path: str, overwite=False) -> str: else: local_path = download_from_url(url=asset_path, root=_CACHE_DIR, overwrite=overwite) return local_path + + +PATH_MANAGER = PathManager() +""" +We use iopath to handle local files, remote files with http/https urls, etc. +This global instance is registered with all the required handlers with the best +defaults. Learn more about iopath: https://github.com/facebookresearch/iopath + +Examples: + >>> from torchtext.utils import PATH_MANAGER + >>> with PATH_MANAGER.open(FILE_PATH) as f: + >>> f.read() +"""