Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .circleci/unittest/linux/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env

python -m torch.utils.collect_env
pytest --cov=torchtext --junitxml=test-results/junit.xml -v --durations 20 test
cd test
pytest --cov=torchtext --junitxml=test-results/junit.xml -v --durations 20 torchtext_unittest
3 changes: 2 additions & 1 deletion .circleci/unittest/windows/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')"
conda activate ./env

python -m torch.utils.collect_env
pytest --cov=torchtext --junitxml=test-results/junit.xml -v --durations 20 test
cd test
pytest --cov=torchtext --junitxml=test-results/junit.xml -v --durations 20 torchtext_unittest
31 changes: 31 additions & 0 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Integration Test

on:
pull_request:
branches: [main]

workflow_dispatch:

jobs:
build:
runs-on: ubuntu-18.04
strategy:
fail-fast: false
matrix:
python-version: [3.8]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install packages
run: |
python -m pip install --quiet --upgrade pip
python -m pip install --quiet --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
python -m pip install --quiet pytest requests cmake ninja sentencepiece parameterized tqdm expecttest
python setup.py install
- name: Run integration test
run: |
cd test && pytest integration_tests -v --use-tmp-hub-dir
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ option(BUILD_TORCHTEXT_PYTHON_EXTENSION "Build Python extension" OFF)

set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(TORCH_INSTALL_PREFIX "${CMAKE_PREFIX_PATH}/../.." CACHE STRING "Install path for torch")
set(TORCH_COMPILED_WITH_CXX_ABI "-D_GLIBCXX_USE_CXX11_ABI=0" CACHE STRING "Compile torchtext with cxx11_abi")

find_library(TORCH_C10_LIBRARY c10 PATHS "${TORCH_INSTALL_PREFIX}/lib")
find_library(TORCH_LIBRARY torch PATHS "${TORCH_INSTALL_PREFIX}/lib")
Expand All @@ -60,8 +61,7 @@ if(MSVC)
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
endif()

# TORCH_CXX_FLAGS contains the same -D_GLIBCXX_USE_CXX11_ABI value as PyTorch
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall ${TORCH_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_COMPILED_WITH_CXX_ABI} -Wall ${TORCH_CXX_FLAGS}")

add_subdirectory(third_party)
add_subdirectory(torchtext/csrc)
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Jinja2<3.1.0
sphinx==3.5.4
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@cece053#egg=pytorch_sphinx_theme
matplotlib
sphinx_gallery
46 changes: 0 additions & 46 deletions docs/source/experimental_datasets_raw.rst

This file was deleted.

Empty file removed test/.gitignore
Empty file.
4 changes: 2 additions & 2 deletions test/integration_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ def pytest_addoption(parser):
)


@pytest.fixture(scope="class")
@pytest.fixture(autouse=True, scope="class")
def temp_hub_dir(tmp_path_factory, pytestconfig):
if not pytestconfig.getoption("--use-tmp-hub-dir"):
if not pytestconfig.getoption("use_tmp_hub_dir"):
yield
else:
tmp_dir = tmp_path_factory.mktemp("hub", numbered=True).resolve()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import pytest # noqa: F401
import torch
from parameterized import parameterized, parameterized_class
from test.common.assets import get_asset_path
from test.common.parameterized_utils import nested_params
from test.common.torchtext_test_case import TorchtextTestCase
from torchtext.prototype.models import (
T5_BASE_ENCODER,
T5_BASE,
Expand All @@ -18,6 +15,9 @@
T5Transform,
)
from torchtext.prototype.models.t5.wrapper import T5Wrapper
from torchtext_unittest.common.assets import get_asset_path
from torchtext_unittest.common.parameterized_utils import nested_params
from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase


BUNDLERS = {
Expand Down
10 changes: 8 additions & 2 deletions test/integration_tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@
XLMR_BASE_ENCODER,
XLMR_LARGE_ENCODER,
)
from torchtext_unittest.common.assets import get_asset_path
from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase

from ..common.assets import get_asset_path
from ..common.torchtext_test_case import TorchtextTestCase
BUNDLERS = {
"xlmr_base": XLMR_BASE_ENCODER,
"xlmr_large": XLMR_LARGE_ENCODER,
"roberta_base": ROBERTA_BASE_ENCODER,
"roberta_large": ROBERTA_LARGE_ENCODER,
}

BUNDLERS = {
"xlmr_base": XLMR_BASE_ENCODER,
Expand Down
Empty file removed test/prototype/models/__init__.py
Empty file.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from unittest.mock import patch

import torch
from test.common.torchtext_test_case import TorchtextTestCase
from torch.nn import functional as F
from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase


class TestModels(TorchtextTestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
from test.common.assets import get_asset_path
from test.common.torchtext_test_case import TorchtextTestCase
from torchtext.prototype.models import T5Transform
from torchtext_unittest.common.assets import get_asset_path
from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase


class TestTransforms(TorchtextTestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import os
import platform
import unittest

import torch
import torchtext.data as data
Expand All @@ -10,8 +8,6 @@


class TestFunctional(TorchtextTestCase):
# TODO(Nayef211): remove decorator once https://github.com/pytorch/pytorch/issues/38207 is closed
@unittest.skipIf(platform.system() == "Windows", "Test is known to fail on Windows.")
def test_BasicEnglishNormalize(self) -> None:
test_sample = "'\".<br />,()!?;: Basic English Normalization for a Line of Text '\".<br />,()!?;:"
ref_results = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import tempfile

import torch
from test.common.assets import get_asset_path
from test.common.torchtext_test_case import TorchtextTestCase
from torchtext.prototype.transforms import (
sentencepiece_processor,
sentencepiece_tokenizer,
VectorTransform,
)
from torchtext.prototype.vectors import FastText
from torchtext_unittest.common.assets import get_asset_path
from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase


class TestTransforms(TorchtextTestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import unittest

import torch
from test.common.torchtext_test_case import TorchtextTestCase
from torchtext.prototype.vectors import build_vectors
from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase


class TestVectors(TorchtextTestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from functools import partial

import torch
from test.common.torchtext_test_case import TorchtextTestCase
from torch.utils.data import DataLoader
from torchtext.data.functional import custom_replace
from torchtext.prototype.transforms import (
Expand All @@ -19,6 +18,7 @@
from torchtext.prototype.vectors import build_vectors, FastText, GloVe, load_vectors_from_file_path
from torchtext.prototype.vocab_factory import build_vocab_from_text_file, load_vocab_from_file
from torchtext.utils import download_from_url
from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase

from ..common.assets import get_asset_path

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import unittest
from urllib.parse import urljoin

from test.common.assets import conditional_remove, get_asset_path
from torchtext import _TEXT_BUCKET
from torchtext import utils
from torchtext_unittest.common.assets import conditional_remove, get_asset_path

from .common.torchtext_test_case import TorchtextTestCase

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import pytest
import torch
from test.common.torchtext_test_case import TorchtextTestCase
from torchtext.vocab import build_vocab_from_iterator, vocab
from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase


class TestVocab(TorchtextTestCase):
Expand Down
5 changes: 5 additions & 0 deletions tools/setup_helpers/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
_ROOT_DIR = _THIS_DIR.parent.parent.resolve()


def _get_cxx11_abi():
return "-D_GLIBCXX_USE_CXX11_ABI=" + str(int(torch.compiled_with_cxx11_abi()))


def get_ext_modules():
modules = [
Extension(name=_LIBTORCHTEXT_NAME, sources=[]),
Expand Down Expand Up @@ -72,6 +76,7 @@ def build_extension(self, ext):
"-DBUILD_SHARED_LIBS=OFF",
"-DCMAKE_POLICY_DEFAULT_CMP0063=NEW",
"-DSPM_ENABLE_SHARED=OFF",
f"-DTORCH_COMPILED_WITH_CXX_ABI={_get_cxx11_abi()}",
]
build_args = ["--target", "install"]

Expand Down
7 changes: 3 additions & 4 deletions torchtext/datasets/multi30k.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@
from torchdata.datapipes.iter import FileOpener, IterableWrapper
from torchtext._download_hooks import HttpReader

# TODO: Update URL to original once the server is back up (see https://github.com/pytorch/text/issues/1756)
URL = {
"train": r"https://raw.githubusercontent.com/neychev/small_DL_repo/master/datasets/Multi30k/training.tar.gz",
"valid": r"https://raw.githubusercontent.com/neychev/small_DL_repo/master/datasets/Multi30k/validation.tar.gz",
"test": r"https://raw.githubusercontent.com/neychev/small_DL_repo/master/datasets/Multi30k/mmt16_task1_test.tar.gz",
"train": r"http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/training.tar.gz",
"valid": r"http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz",
"test": r"http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/mmt16_task1_test.tar.gz",
}

MD5 = {
Expand Down
7 changes: 6 additions & 1 deletion torchtext/models/roberta/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,12 @@ def __init__(
batch_first=True,
norm_first=normalize_before,
)
self.layers = torch.nn.TransformerEncoder(encoder_layer=layer, num_layers=num_encoder_layers)
self.layers = torch.nn.TransformerEncoder(
encoder_layer=layer,
num_layers=num_encoder_layers,
enable_nested_tensor=True,
mask_check=False,
)
self.positional_embedding = PositionalEmbedding(max_seq_len, embedding_dim, padding_idx)
self.embedding_layer_norm = nn.LayerNorm(embedding_dim)
self.dropout = nn.Dropout(dropout)
Expand Down