@@ -162,6 +162,34 @@ def test_imdb(self):
162162 self ._helper_test_func (len (test_iter ), 25000 , next (iter (test_iter ))[1 ][:25 ], 'I love sci-fi and am will' )
163163 del train_iter , test_iter
164164
165+ def test_iwslt (self ):
166+ from torchtext .experimental .datasets import IWSLT
167+
168+ train_dataset , valid_dataset , test_dataset = IWSLT ()
169+
170+ self .assertEqual (len (train_dataset ), 196884 )
171+ self .assertEqual (len (valid_dataset ), 993 )
172+ self .assertEqual (len (test_dataset ), 1305 )
173+
174+ de_vocab , en_vocab = train_dataset .get_vocab ()
175+
176+ def assert_nth_pair_is_equal (n , expected_sentence_pair ):
177+ de_sentence = [de_vocab .itos [index ] for index in train_dataset [n ][0 ]]
178+ en_sentence = [en_vocab .itos [index ] for index in train_dataset [n ][1 ]]
179+ expected_de_sentence , expected_en_sentence = expected_sentence_pair
180+
181+ self .assertEqual (de_sentence , expected_de_sentence )
182+ self .assertEqual (en_sentence , expected_en_sentence )
183+
184+ assert_nth_pair_is_equal (0 , (['David' , 'Gallo' , ':' , 'Das' , 'ist' , 'Bill' , 'Lange' , '.' , 'Ich' , 'bin' , 'Dave' , 'Gallo' , '.' , '\n ' ],
185+ ['David' , 'Gallo' , ':' , 'This' , 'is' , 'Bill' , 'Lange' , '.' , 'I' , "'m" , 'Dave' , 'Gallo' , '.' , '\n ' ]))
186+ assert_nth_pair_is_equal (10 , (['Die' , 'meisten' , 'Tiere' , 'leben' , 'in' , 'den' , 'Ozeanen' , '.' , '\n ' ],
187+ ['Most' , 'of' , 'the' , 'animals' , 'are' , 'in' , 'the' , 'oceans' , '.' , '\n ' ]))
188+ assert_nth_pair_is_equal (20 , (['Es' , 'ist' , 'einer' , 'meiner' , 'Lieblinge' , ',' , 'weil' , 'es' , 'alle' , 'möglichen' , 'Funktionsteile' , 'hat' , '.' , '\n ' ],
189+ ['It' , "'s" , 'one' , 'of' , 'my' , 'favorites' , ',' , 'because' , 'it' , "'s" , 'got' , 'all' , 'sorts' , 'of' , 'working' , 'parts' , '.' , '\n ' ]))
190+ datafile = os .path .join (self .project_root , ".data" , "2016-01.tgz" )
191+ conditional_remove (datafile )
192+
165193 def test_multi30k (self ):
166194 from torchtext .experimental .datasets import Multi30k
167195 # smoke test to ensure multi30k works properly
0 commit comments