Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion test/common/torchtext_test_case.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
from unittest import TestCase
from torch.testing._internal.common_utils import TestCase
import json
import logging
import os
Expand Down
57 changes: 28 additions & 29 deletions test/data/test_builtin_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import torchtext.data as data
from torchtext.datasets import AG_NEWS
import torch
from torch.testing import assert_allclose
from ..common.torchtext_test_case import TorchtextTestCase


Expand Down Expand Up @@ -99,10 +98,10 @@ def test_text_classification(self):
ag_news_train, ag_news_test = AG_NEWS(root=datadir, ngrams=3)
self.assertEqual(len(ag_news_train), 120000)
self.assertEqual(len(ag_news_test), 7600)
assert_allclose(ag_news_train[-1][1][:10],
torch.tensor([3525, 319, 4053, 34, 5407, 3607, 70, 6798, 10599, 4053]).long())
assert_allclose(ag_news_test[-1][1][:10],
torch.tensor([2351, 758, 96, 38581, 2351, 220, 5, 396, 3, 14786]).long())
self.assertEqual(ag_news_train[-1][1][:10],
torch.tensor([3525, 319, 4053, 34, 5407, 3607, 70, 6798, 10599, 4053]).long())
self.assertEqual(ag_news_test[-1][1][:10],
torch.tensor([2351, 758, 96, 38581, 2351, 220, 5, 396, 3, 14786]).long())

def test_imdb(self):
from torchtext.experimental.datasets import IMDB
Expand All @@ -111,14 +110,14 @@ def test_imdb(self):
train_dataset, test_dataset = IMDB()
self.assertEqual(len(train_dataset), 25000)
self.assertEqual(len(test_dataset), 25000)
assert_allclose(train_dataset[0][1][:10],
torch.tensor([13, 1568, 13, 246, 35468, 43, 64, 398, 1135, 92]).long())
assert_allclose(train_dataset[-1][1][:10],
torch.tensor([2, 71, 4555, 194, 3328, 15144, 42, 227, 148, 8]).long())
assert_allclose(test_dataset[0][1][:10],
torch.tensor([13, 125, 1051, 5, 246, 1652, 8, 277, 66, 20]).long())
assert_allclose(test_dataset[-1][1][:10],
torch.tensor([13, 1035, 14, 21, 28, 2, 1051, 1275, 1008, 3]).long())
self.assertEqual(train_dataset[0][1][:10],
torch.tensor([13, 1568, 13, 246, 35468, 43, 64, 398, 1135, 92]).long())
self.assertEqual(train_dataset[-1][1][:10],
torch.tensor([2, 71, 4555, 194, 3328, 15144, 42, 227, 148, 8]).long())
self.assertEqual(test_dataset[0][1][:10],
torch.tensor([13, 125, 1051, 5, 246, 1652, 8, 277, 66, 20]).long())
self.assertEqual(test_dataset[-1][1][:10],
torch.tensor([13, 1035, 14, 21, 28, 2, 1051, 1275, 1008, 3]).long())

# Test API with a vocab input object
old_vocab = train_dataset.get_vocab()
Expand Down Expand Up @@ -164,14 +163,14 @@ def test_squad1(self):
train_dataset, dev_dataset = SQuAD1()
self.assertEqual(len(train_dataset), 87599)
self.assertEqual(len(dev_dataset), 10570)
assert_allclose(train_dataset[100]['question'],
torch.tensor([7, 24, 86, 52, 2, 373, 887, 18, 12797, 11090, 1356, 2, 1788, 3273, 16]).long())
assert_allclose(train_dataset[100]['ans_pos'][0],
torch.tensor([72, 72]).long())
assert_allclose(dev_dataset[100]['question'],
torch.tensor([42, 27, 669, 7438, 17, 2, 1950, 3273, 17252, 389, 16]).long())
assert_allclose(dev_dataset[100]['ans_pos'][0],
torch.tensor([45, 48]).long())
self.assertEqual(train_dataset[100]['question'],
torch.tensor([7, 24, 86, 52, 2, 373, 887, 18, 12797, 11090, 1356, 2, 1788, 3273, 16]).long())
self.assertEqual(train_dataset[100]['ans_pos'][0],
torch.tensor([72, 72]).long())
self.assertEqual(dev_dataset[100]['question'],
torch.tensor([42, 27, 669, 7438, 17, 2, 1950, 3273, 17252, 389, 16]).long())
self.assertEqual(dev_dataset[100]['ans_pos'][0],
torch.tensor([45, 48]).long())

# Test API with a vocab input object
old_vocab = train_dataset.get_vocab()
Expand All @@ -185,14 +184,14 @@ def test_squad2(self):
train_dataset, dev_dataset = SQuAD2()
self.assertEqual(len(train_dataset), 130319)
self.assertEqual(len(dev_dataset), 11873)
assert_allclose(train_dataset[200]['question'],
torch.tensor([84, 50, 1421, 12, 5439, 4569, 17, 30, 2, 15202, 4754, 1421, 16]).long())
assert_allclose(train_dataset[200]['ans_pos'][0],
torch.tensor([9, 9]).long())
assert_allclose(dev_dataset[200]['question'],
torch.tensor([41, 29, 2, 66, 17016, 30, 0, 1955, 16]).long())
assert_allclose(dev_dataset[200]['ans_pos'][0],
torch.tensor([40, 46]).long())
self.assertEqual(train_dataset[200]['question'],
torch.tensor([84, 50, 1421, 12, 5439, 4569, 17, 30, 2, 15202, 4754, 1421, 16]).long())
self.assertEqual(train_dataset[200]['ans_pos'][0],
torch.tensor([9, 9]).long())
self.assertEqual(dev_dataset[200]['question'],
torch.tensor([41, 29, 2, 66, 17016, 30, 0, 1955, 16]).long())
self.assertEqual(dev_dataset[200]['ans_pos'][0],
torch.tensor([40, 46]).long())

# Test API with a vocab input object
old_vocab = train_dataset.get_vocab()
Expand Down
9 changes: 4 additions & 5 deletions test/data/test_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from collections import Counter
import os

from numpy.testing import assert_allclose
import torch
import torchtext.data as data
import pytest
Expand Down Expand Up @@ -376,9 +375,9 @@ def test_numerical_features_no_vocab(self):
test_float_data = ["1.1", "0.1", "3.91", "0.2", "10.2"]

numericalized_int = int_field.numericalize(test_int_data)
assert_allclose(numericalized_int.data.numpy(), [1, 0, 1, 3, 19])
self.assertEqual(numericalized_int.data, [1, 0, 1, 3, 19])
numericalized_float = float_field.numericalize(test_float_data)
assert_allclose(numericalized_float.data.numpy(), [1.1, 0.1, 3.91, 0.2, 10.2])
self.assertEqual(numericalized_float.data, [1.1, 0.1, 3.91, 0.2, 10.2])

# Test with postprocessing applied
int_field = data.Field(sequential=False, use_vocab=False,
Expand All @@ -396,9 +395,9 @@ def test_numerical_features_no_vocab(self):
test_float_data = ["1.1", "0.1", "3.91", "0.2", "10.2"]

numericalized_int = int_field.numericalize(test_int_data)
assert_allclose(numericalized_int.data.numpy(), [2, 1, 2, 4, 20])
self.assertEqual(numericalized_int.data, [2, 1, 2, 4, 20])
numericalized_float = float_field.numericalize(test_float_data)
assert_allclose(numericalized_float.data.numpy(), [0.55, 0.05, 1.955, 0.1, 5.1])
self.assertEqual(numericalized_float.data, [0.55, 0.05, 1.955, 0.1, 5.1])

def test_errors(self):
# Test that passing a non-tuple (of data and length) to numericalize
Expand Down
23 changes: 11 additions & 12 deletions test/data/test_metrics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from torchtext.data.metrics import bleu_score
from torch.testing import assert_allclose
from ..common.torchtext_test_case import TorchtextTestCase


Expand All @@ -19,19 +18,19 @@ def test_bleu_score(self):
# Partial match
candidate = [['My', 'full', 'pytorch', 'test']]
refs = [[['My', 'full', 'pytorch', 'test', '!'], ['Different']]]
assert_allclose(bleu_score(candidate, refs), 0.7788007)
self.assertEqual(bleu_score(candidate, refs), 0.7788007)

# Bigrams and unigrams only
candidate = [['My', 'pytorch', 'test']]
refs = [[['My', 'full', 'pytorch', 'test'], ['Different']]]
assert_allclose(bleu_score(candidate, refs, max_n=2,
weights=[0.5, 0.5]), 0.5066641)
self.assertEqual(bleu_score(candidate, refs, max_n=2,
weights=[0.5, 0.5]), 0.5066641)

# Multi-sentence corpus
candidate = [['My', 'full', 'pytorch', 'test'], ['Another', 'Sentence']]
refs = [[['My', 'full', 'pytorch', 'test'], ['Completely', 'Different']],
[['No', 'Match']]]
assert_allclose(bleu_score(candidate, refs), 0.8408964)
self.assertEqual(bleu_score(candidate, refs), 0.8408964)

# Empty input
candidate = [[]]
Expand All @@ -52,13 +51,13 @@ def test_bleu_score(self):

# The comments below give the code used to get each hardcoded bleu score
# nltk.translate.bleu_score.corpus_bleu(refs, candidate)
assert_allclose(bleu_score(candidate, refs), 0.4573199)
self.assertEqual(bleu_score(candidate, refs), 0.4573199)
# nltk.translate.bleu_score.corpus_bleu(refs, candidate, weights=[0.33]*3)
assert_allclose(bleu_score(candidate, refs, 3,
weights=[0.33, 0.33, 0.33]), 0.4901113)
self.assertEqual(bleu_score(candidate, refs, 3,
weights=[0.33, 0.33, 0.33]), 0.4901113)
# nltk.translate.bleu_score.corpus_bleu(refs, candidate, weights=[0.5]*2)
assert_allclose(bleu_score(candidate, refs, 2,
weights=[0.5, 0.5]), 0.5119535)
self.assertEqual(bleu_score(candidate, refs, 2,
weights=[0.5, 0.5]), 0.5119535)
# nltk.translate.bleu_score.corpus_bleu(refs, candidate, weights=[1])
assert_allclose(bleu_score(candidate, refs, 1,
weights=[1]), 0.5515605)
self.assertEqual(bleu_score(candidate, refs, 1,
weights=[1]), 0.5515605)
55 changes: 27 additions & 28 deletions test/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
from collections import Counter

import numpy as np
import torch
import torchtext.data

Expand Down Expand Up @@ -130,16 +129,16 @@ def test_vectors_get_vecs(self):
self.assertEqual(vec.vectors.shape[0], len(vec))

tokens = ['chip', 'baby', 'Beautiful']
token_vecs = vec.get_vecs_by_tokens(tokens).numpy()
token_vecs = vec.get_vecs_by_tokens(tokens)
self.assertEqual(token_vecs.shape[0], len(tokens))
self.assertEqual(token_vecs.shape[1], vec.dim)
torch.testing.assert_allclose(vec[tokens[0]].numpy(), token_vecs[0])
torch.testing.assert_allclose(vec[tokens[1]].numpy(), token_vecs[1])
torch.testing.assert_allclose(vec['<unk>'].numpy(), token_vecs[2])
self.assertEqual(vec[tokens[0]], token_vecs[0])
self.assertEqual(vec[tokens[1]], token_vecs[1])
self.assertEqual(vec['<unk>'], token_vecs[2])

token_one_vec = vec.get_vecs_by_tokens(tokens[0], lower_case_backup=True).numpy()
token_one_vec = vec.get_vecs_by_tokens(tokens[0], lower_case_backup=True)
self.assertEqual(token_one_vec.shape[0], vec.dim)
torch.testing.assert_allclose(vec[tokens[0].lower()].numpy(), token_one_vec)
self.assertEqual(vec[tokens[0].lower()], token_one_vec)

def test_download_charngram_vectors(self):
c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
Expand All @@ -157,7 +156,7 @@ def test_download_charngram_vectors(self):
expected_stoi = {x: index for index, x in enumerate(expected_itos)}
self.assertEqual(v.itos, expected_itos)
self.assertEqual(dict(v.stoi), expected_stoi)
vectors = v.vectors.numpy()
vectors = v.vectors

# The first 5 entries in each vector.
expected_charngram = {
Expand All @@ -167,11 +166,11 @@ def test_download_charngram_vectors(self):
}

for word in expected_charngram:
torch.testing.assert_allclose(
self.assertEqual(
vectors[v.stoi[word], :5], expected_charngram[word])

torch.testing.assert_allclose(vectors[v.stoi['<unk>']], np.zeros(100))
torch.testing.assert_allclose(vectors[v.stoi['OOV token']], np.zeros(100))
self.assertEqual(vectors[v.stoi['<unk>']], torch.zeros(100))
self.assertEqual(vectors[v.stoi['OOV token']], torch.zeros(100))

def test_download_custom_vectors(self):
c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
Expand All @@ -187,7 +186,7 @@ def test_download_custom_vectors(self):

self.assertEqual(v.itos, ['<unk>', '<pad>', '<bos>',
'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T', 'hello', 'world'])
vectors = v.vectors.numpy()
vectors = v.vectors

# The first 5 entries in each vector.
expected_fasttext_simple_en = {
Expand All @@ -196,10 +195,10 @@ def test_download_custom_vectors(self):
}

for word in expected_fasttext_simple_en:
torch.testing.assert_allclose(
self.assertEqual(
vectors[v.stoi[word], :5], expected_fasttext_simple_en[word])

torch.testing.assert_allclose(vectors[v.stoi['<unk>']], np.zeros(300))
self.assertEqual(vectors[v.stoi['<unk>']], torch.zeros(300))

def test_download_fasttext_vectors(self):
c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
Expand All @@ -219,7 +218,7 @@ def test_download_fasttext_vectors(self):
expected_stoi = {x: index for index, x in enumerate(expected_itos)}
self.assertEqual(v.itos, expected_itos)
self.assertEqual(dict(v.stoi), expected_stoi)
vectors = v.vectors.numpy()
vectors = v.vectors

# The first 5 entries in each vector.
expected_fasttext_simple_en = {
Expand All @@ -228,11 +227,11 @@ def test_download_fasttext_vectors(self):
}

for word in expected_fasttext_simple_en:
torch.testing.assert_allclose(
self.assertEqual(
vectors[v.stoi[word], :5], expected_fasttext_simple_en[word])

torch.testing.assert_allclose(vectors[v.stoi['<unk>']], np.zeros(300))
torch.testing.assert_allclose(vectors[v.stoi['OOV token']], np.zeros(300))
self.assertEqual(vectors[v.stoi['<unk>']], torch.zeros(300))
self.assertEqual(vectors[v.stoi['OOV token']], torch.zeros(300))

def test_download_glove_vectors(self):
c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
Expand All @@ -253,7 +252,7 @@ def test_download_glove_vectors(self):
self.assertEqual(v.itos, expected_itos)
self.assertEqual(dict(v.stoi), expected_stoi)

vectors = v.vectors.numpy()
vectors = v.vectors

# The first 5 entries in each vector.
expected_twitter = {
Expand All @@ -262,11 +261,11 @@ def test_download_glove_vectors(self):
}

for word in expected_twitter:
torch.testing.assert_allclose(
self.assertEqual(
vectors[v.stoi[word], :5], expected_twitter[word])

torch.testing.assert_allclose(vectors[v.stoi['<unk>']], np.zeros(25))
torch.testing.assert_allclose(vectors[v.stoi['OOV token']], np.zeros(25))
self.assertEqual(vectors[v.stoi['<unk>']], torch.zeros(25))
self.assertEqual(vectors[v.stoi['OOV token']], torch.zeros(25))

def test_extend(self):
c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
Expand All @@ -281,7 +280,7 @@ def test_extend(self):

self.assertEqual(v.itos[:6], ['<unk>', '<pad>', '<bos>',
'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T', 'hello', 'world'])
vectors = v.vectors.numpy()
vectors = v.vectors

# The first 5 entries in each vector.
expected_fasttext_simple_en = {
Expand All @@ -290,10 +289,10 @@ def test_extend(self):
}

for word in expected_fasttext_simple_en:
torch.testing.assert_allclose(
self.assertEqual(
vectors[v.stoi[word], :5], expected_fasttext_simple_en[word])

torch.testing.assert_allclose(vectors[v.stoi['<unk>']], np.zeros(300))
self.assertEqual(vectors[v.stoi['<unk>']], torch.zeros(300))

def test_vectors_custom_cache(self):
c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
Expand All @@ -312,7 +311,7 @@ def test_vectors_custom_cache(self):

self.assertEqual(v.itos, ['<unk>', '<pad>', '<bos>',
'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T', 'hello', 'world'])
vectors = v.vectors.numpy()
vectors = v.vectors

# The first 5 entries in each vector.
expected_fasttext_simple_en = {
Expand All @@ -321,7 +320,7 @@ def test_vectors_custom_cache(self):
}

for word in expected_fasttext_simple_en:
torch.testing.assert_allclose(
self.assertEqual(
vectors[v.stoi[word], :5], expected_fasttext_simple_en[word])

torch.testing.assert_allclose(vectors[v.stoi['<unk>']], np.zeros(300))
self.assertEqual(vectors[v.stoi['<unk>']], torch.zeros(300))
3 changes: 1 addition & 2 deletions test/test_vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@


import numpy as np
from numpy.testing import assert_allclose
import torch
from torchtext import vocab

Expand Down Expand Up @@ -89,7 +88,7 @@ def test_vocab_set_vectors(self):
expected_vectors = np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0],
[0.0, 0.0], [0.1, 0.2], [0.5, 0.6],
[0.3, 0.4]])
assert_allclose(v.vectors.numpy(), expected_vectors)
self.assertEqual(v.vectors, expected_vectors)

def test_errors(self):
c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
Expand Down