11from torchtext ._internal .module_utils import is_module_available
22
33if is_module_available ("torchdata" ):
4- from torchdata .datapipes .iter import FileOpener , GDriveReader , IterableWrapper , FileLister
4+ from torchdata .datapipes .iter import FileOpener , GDriveReader , IterableWrapper
55
66import os
77from torchtext .data .datasets_utils import (
@@ -215,14 +215,14 @@ def IWSLT2016(root='.data', split=('train', 'valid', 'test'), language_pair=('de
215215
216216 languages = "-" .join ([src_language , tgt_language ])
217217
218- iwslt_tar = os .path .join (
219- "texts" , src_language , tgt_language , languages
218+ inner_iwslt_tar = os .path .join (
219+ root , os . path . splitext ( _PATH )[ 0 ], "texts" , src_language , tgt_language , languages
220220 ) + ".tgz"
221221
222222 cache_decompressed_dp = cache_compressed_dp .on_disk_cache (
223- filepath_fn = lambda x : os . path . join ( root , os . path . splitext ( _PATH )[ 0 ], iwslt_tar )
223+ filepath_fn = lambda x : inner_iwslt_tar
224224 )
225- cache_decompressed_dp = FileOpener (cache_decompressed_dp , mode = "b" ).read_from_tar ().filter (lambda x : iwslt_tar in x [0 ])
225+ cache_decompressed_dp = FileOpener (cache_decompressed_dp , mode = "b" ).read_from_tar ().filter (lambda x : inner_iwslt_tar in x [0 ])
226226 cache_decompressed_dp = cache_decompressed_dp .end_caching (mode = "wb" , same_filepath_fn = True )
227227
228228 file_path_by_lang_and_split = {
@@ -238,26 +238,25 @@ def IWSLT2016(root='.data', split=('train', 'valid', 'test'), language_pair=('de
238238 }
239239 }
240240
241- src_filepath = file_path_by_lang_and_split [src_language ][split ]
241+ src_filename = file_path_by_lang_and_split [src_language ][split ]
242242
243243 cache_inner_src_decompressed_dp = cache_decompressed_dp .on_disk_cache (
244- filepath_fn = lambda x : os .path .join (root , "2016-01/texts/" , src_language , tgt_language , languages , src_filepath )
244+ filepath_fn = lambda x : os .path .join (root , "2016-01/texts/" , src_language , tgt_language , languages , src_filename )
245245 )
246246 cache_inner_src_decompressed_dp = FileOpener (cache_inner_src_decompressed_dp , mode = "b" ).read_from_tar ()
247247 cache_inner_src_decompressed_dp = cache_inner_src_decompressed_dp .map (lambda x : _clean_files (x [0 ], os .path .splitext (os .path .dirname (os .path .dirname (x [0 ])))[0 ], x [1 ]))
248- cache_inner_src_decompressed_dp = cache_inner_src_decompressed_dp .filter (lambda x : src_filepath in x )
248+ cache_inner_src_decompressed_dp = cache_inner_src_decompressed_dp .filter (lambda x : src_filename in x )
249249 cache_inner_src_decompressed_dp = FileOpener (cache_inner_src_decompressed_dp , mode = "b" )
250250 cache_inner_src_decompressed_dp = cache_inner_src_decompressed_dp .end_caching (mode = "wb" , same_filepath_fn = True )
251251
252- tgt_filepath = file_path_by_lang_and_split [tgt_language ][split ]
253-
252+ tgt_filename = file_path_by_lang_and_split [tgt_language ][split ]
254253
255254 cache_inner_tgt_decompressed_dp = cache_decompressed_dp .on_disk_cache (
256- filepath_fn = lambda x : os .path .join (root , "2016-01/texts/" , src_language , tgt_language , languages , tgt_filepath )
255+ filepath_fn = lambda x : os .path .join (root , "2016-01/texts/" , src_language , tgt_language , languages , tgt_filename )
257256 )
258257 cache_inner_tgt_decompressed_dp = FileOpener (cache_inner_tgt_decompressed_dp , mode = "b" ).read_from_tar ()
259258 cache_inner_tgt_decompressed_dp = cache_inner_tgt_decompressed_dp .map (lambda x : _clean_files (x [0 ], os .path .splitext (os .path .dirname (os .path .dirname (x [0 ])))[0 ], x [1 ]))
260- cache_inner_tgt_decompressed_dp = cache_inner_tgt_decompressed_dp .filter (lambda x : tgt_filepath in x )
259+ cache_inner_tgt_decompressed_dp = cache_inner_tgt_decompressed_dp .filter (lambda x : tgt_filename in x )
261260 cache_inner_tgt_decompressed_dp = FileOpener (cache_inner_tgt_decompressed_dp , mode = "b" )
262261 cache_inner_tgt_decompressed_dp = cache_inner_tgt_decompressed_dp .end_caching (mode = "wb" , same_filepath_fn = True )
263262
0 commit comments