Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 30 additions & 27 deletions torchtext/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import zipfile

import torch
from iopath.common.file_io import file_lock
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the third time that iopath is being introduced as a dependency.
Did they become available on all platforms that torchtext is distributed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope - will need to rework this to not depend on iopath.

from torchtext import _CACHE_DIR

from ._download_hooks import _DATASET_DOWNLOAD_MANAGER


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -96,32 +96,35 @@ def download_from_url(url, path=None, root=".data", overwrite=False, hash_value=
path = os.path.abspath(path)
root, filename = os.path.split(os.path.abspath(path))

# skip download if path exists and overwrite is not True
if os.path.exists(path):
logger.info("File %s already exists." % path)
if not overwrite:
if hash_value:
_check_hash(path, hash_value, hash_type)
return path

# make root dir if does not exist
if not os.path.exists(root):
try:
os.makedirs(root)
except OSError:
raise OSError("Can't create the download directory {}.".format(root))

# download data and move to path
_DATASET_DOWNLOAD_MANAGER.get_local_path(url, destination=path)

logger.info("File {} downloaded.".format(path))

# validate
if hash_value:
_check_hash(path, hash_value, hash_type)

# all good
return path
# In a concurrent setting, adding a file lock ensures the first thread to acquire will actually download the model
# and the other ones will just use the existing path (which will not contain a partially downloaded model).
with file_lock(path):
# skip download if path exists and overwrite is not True
if os.path.exists(path):
logger.info("File %s already exists." % path)
if not overwrite:
if hash_value:
_check_hash(path, hash_value, hash_type)
return path

# make root dir if does not exist
if not os.path.exists(root):
try:
os.makedirs(root)
except OSError as exc:
raise OSError("Can't create the download directory {}.".format(root)) from exc

# download data and move to path
_DATASET_DOWNLOAD_MANAGER.get_local_path(url, destination=path)

logger.info("File {} downloaded.".format(path))

# validate
if hash_value:
_check_hash(path, hash_value, hash_type)

# all good
return path


def extract_archive(from_path, to_path=None, overwrite=False):
Expand Down