@@ -206,22 +206,31 @@ def test_multi30k(self):
206206 from torchtext .experimental .datasets import Multi30k
207207 # smoke test to ensure multi30k works properly
208208 train_dataset , valid_dataset , test_dataset = Multi30k ()
209+
210+ # This change is due to the BC breaking in spacy 3.0
209211 self ._helper_test_func (len (train_dataset ), 29000 , train_dataset [20 ],
210- ([4 , 444 , 2531 , 47 , 17480 , 7423 , 8 , 158 , 10 , 12 , 5849 , 3 , 2 ],
212+ # ([4, 444, 2531, 47, 17480, 7423, 8, 158, 10, 12, 5849, 3, 2],
213+ ([4 , 444 , 2529 , 47 , 17490 , 7422 , 8 , 158 , 10 , 12 , 5846 , 3 , 2 ],
211214 [5 , 61 , 530 , 137 , 1494 , 10 , 9 , 280 , 6 , 2 , 3749 , 4 , 3 ]))
215+
212216 self ._helper_test_func (len (valid_dataset ), 1014 , valid_dataset [30 ],
213217 ([4 , 179 , 26 , 85 , 1005 , 57 , 19 , 154 , 3 , 2 ],
214218 [5 , 24 , 32 , 81 , 47 , 1348 , 6 , 2 , 119 , 4 , 3 ]))
219+
220+ # This change is due to the BC breaking in spacy 3.0
215221 self ._helper_test_func (len (test_dataset ), 1000 , test_dataset [40 ],
216- ([4 , 26 , 6 , 12 , 3915 , 1538 , 21 , 64 , 3 , 2 ],
222+ # ([4, 26, 6, 12, 3915, 1538, 21, 64, 3, 2],
223+ ([4 , 26 , 6 , 12 , 3913 , 1537 , 21 , 64 , 3 , 2 ],
217224 [5 , 32 , 20 , 2 , 747 , 345 , 1915 , 6 , 46 , 4 , 3 ]))
218225
219226 de_vocab , en_vocab = train_dataset .get_vocab ()
220227 de_tokens_ids = [
221228 de_vocab [token ] for token in
222229 'Zwei Männer verpacken Donuts in Kunststofffolie' .split ()
223230 ]
224- self .assertEqual (de_tokens_ids , [20 , 30 , 18705 , 4448 , 6 , 6241 ])
231+ # This change is due to the BC breaking in spacy 3.0
232+ # self.assertEqual(de_tokens_ids, [20, 30, 18705, 4448, 6, 6241])
233+ self .assertEqual (de_tokens_ids , [20 , 30 , 18714 , 4447 , 6 , 6239 ])
225234
226235 en_tokens_ids = [
227236 en_vocab [token ] for token in
@@ -240,8 +249,11 @@ def test_multi30k(self):
240249 'A group of men are loading cotton onto a truck\n ' ]))
241250 del train_iter , valid_iter
242251 train_dataset , = Multi30k (data_select = ('train' ))
252+
253+ # This change is due to the BC breaking in spacy 3.0
243254 self ._helper_test_func (len (train_dataset ), 29000 , train_dataset [20 ],
244- ([4 , 444 , 2531 , 47 , 17480 , 7423 , 8 , 158 , 10 , 12 , 5849 , 3 , 2 ],
255+ # ([4, 444, 2531, 47, 17480, 7423, 8, 158, 10, 12, 5849, 3, 2],
256+ ([4 , 444 , 2529 , 47 , 17490 , 7422 , 8 , 158 , 10 , 12 , 5846 , 3 , 2 ],
245257 [5 , 61 , 530 , 137 , 1494 , 10 , 9 , 280 , 6 , 2 , 3749 , 4 , 3 ]))
246258
247259 datafile = os .path .join (self .project_root , ".data" , "train*" )
0 commit comments