Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit c8bced1

Browse files
parmeetfacebook-github-bot
authored andcommitted
Swapping experimental Vocab and retiring current Vocab into legacy (#1289)
Summary: allow-large-files to commit wikitext103_vocab.pt Reviewed By: cpuhrsch Differential Revision: D28478152 fbshipit-source-id: c2a871439f054024b95c05f7664a84028aacaca3
1 parent 6231993 commit c8bced1

26 files changed

+842
-856
lines changed

benchmark/benchmark_experimental_vocab.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
load_vocab_from_file,
1313
build_vocab_from_text_file
1414
)
15-
from torchtext.vocab import (
15+
from torchtext.legacy.vocab import (
1616
Vocab,
1717
build_vocab_from_iterator
1818
)

docs/source/experimental_vocab.rst

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,6 @@ torchtext.experimental.vocab
77
.. automodule:: torchtext.experimental.vocab
88
.. currentmodule:: torchtext.experimental.vocab
99

10-
:hidden:`Vocab`
11-
~~~~~~~~~~~~~~~
12-
13-
.. autoclass:: Vocab
14-
:members:
15-
:special-members:
16-
1710
:hidden:`vocab`
1811
~~~~~~~~~~~~~~~
1912

docs/source/vocab.rst

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,8 @@ torchtext.vocab
1212

1313
.. autoclass:: Vocab
1414
:members:
15-
:special-members: __init__
16-
17-
:hidden:`SubwordVocab`
18-
~~~~~~~~~~~~~~~~~~~~~~
15+
:special-members:
1916

20-
.. autoclass:: SubwordVocab
21-
:members:
22-
:special-members: __init__
2317

2418
:hidden:`Vectors`
2519
~~~~~~~~~~~~~~~~~
@@ -48,12 +42,3 @@ Pretrained Word Embeddings
4842

4943
.. autoclass:: CharNGram
5044
:members:
51-
52-
Misc.
53-
-----
54-
55-
:hidden:`build_vocab_from_iterator`
56-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
57-
58-
.. autofunction:: build_vocab_from_iterator
59-

examples/BERT/mlm_task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def run_main(args, rank=None):
125125
except:
126126
train_dataset, valid_dataset, test_dataset = WLMDataset()
127127
old_vocab = train_dataset.vocab
128-
vocab = torchtext.vocab.Vocab(counter=old_vocab.freqs,
128+
vocab = torchtext.legacy.vocab.Vocab(counter=old_vocab.freqs,
129129
specials=['<unk>', '<pad>', '<MASK>'])
130130
with open(args.save_vocab, 'wb') as f:
131131
torch.save(vocab, f)

examples/BERT/qa_task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def train():
163163
except:
164164
train_dataset, dev_dataset = SQuAD1()
165165
old_vocab = train_dataset.vocab
166-
vocab = torchtext.vocab.Vocab(counter=old_vocab.freqs,
166+
vocab = torchtext.legacy.vocab.Vocab(counter=old_vocab.freqs,
167167
specials=['<unk>', '<pad>', '<MASK>'])
168168
with open(args.save_vocab, 'wb') as f:
169169
torch.save(vocab, f)

examples/data_pipeline/pipelines.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def build_sp_pipeline(args):
4545
def build_legacy_torchtext_vocab_pipeline(args):
4646
vocab_file = args.vocab_filename
4747
tokenizer = get_tokenizer("basic_english")
48-
from torchtext.vocab import build_vocab_from_iterator
48+
from torchtext.legacy.vocab import build_vocab_from_iterator
4949

5050
def token_iterator(vocab_file):
5151
f = open(vocab_file, 'r')
@@ -72,7 +72,7 @@ def build_experimental_torchtext_pipeline(args):
7272
def build_legacy_batch_torchtext_vocab_pipeline(args):
7373
vocab_file = args.vocab_filename
7474
tokenizer = get_tokenizer("basic_english")
75-
from torchtext.vocab import build_vocab_from_iterator
75+
from torchtext.legacy.vocab import build_vocab_from_iterator
7676

7777
def token_iterator(vocab_file):
7878
f = open(vocab_file, 'r')

examples/vocab/pytext_vocab.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
from fairseq.data.dictionary import Dictionary
44
import torch
5-
from torchtext.experimental.vocab import vocab, Vocab
5+
from torchtext.experimental.vocab import vocab
6+
from torchtext.vocab import Vocab
67
from typing import Dict, List, Optional
78

89

examples/vocab/vocab.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import torch
55
import io
66

7-
from torchtext.vocab import build_vocab_from_iterator
7+
from torchtext.legacy.vocab import build_vocab_from_iterator
88
from torchtext.data.utils import ngrams_iterator
99
from torchtext.data.utils import get_tokenizer
1010
from torchtext.utils import unicode_csv_reader

test/asset/wikitext103_vocab.pt

0 Bytes
Binary file not shown.

test/data/test_builtin_datasets.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def test_next_method_dataset(self):
210210

211211
def test_imdb(self):
212212
from torchtext.experimental.datasets import IMDB
213-
from torchtext.vocab import Vocab
213+
from torchtext.legacy.vocab import Vocab
214214
# smoke test to ensure imdb works properly
215215
train_dataset, test_dataset = IMDB()
216216
self._helper_test_func(len(train_dataset), 25000, train_dataset[0][1][:10],
@@ -465,7 +465,7 @@ def test_conll_sequence_tagging(self):
465465

466466
def test_squad1(self):
467467
from torchtext.experimental.datasets import SQuAD1
468-
from torchtext.vocab import Vocab
468+
from torchtext.legacy.vocab import Vocab
469469
# smoke test to ensure imdb works properly
470470
train_dataset, dev_dataset = SQuAD1()
471471
context, question, answers, ans_pos = train_dataset[100]
@@ -494,7 +494,7 @@ def test_squad1(self):
494494

495495
def test_squad2(self):
496496
from torchtext.experimental.datasets import SQuAD2
497-
from torchtext.vocab import Vocab
497+
from torchtext.legacy.vocab import Vocab
498498
# smoke test to ensure imdb works properly
499499
train_dataset, dev_dataset = SQuAD2()
500500
context, question, answers, ans_pos = train_dataset[200]

0 commit comments

Comments
 (0)