Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 77839ce

Browse files
committed
address initial style reviews.
1 parent c04f6d6 commit 77839ce

File tree

1 file changed

+11
-12
lines changed

1 file changed

+11
-12
lines changed

torchtext/datasets/iwslt2016.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from torchtext._internal.module_utils import is_module_available
22

33
if is_module_available("torchdata"):
4-
from torchdata.datapipes.iter import FileOpener, GDriveReader, IterableWrapper, FileLister
4+
from torchdata.datapipes.iter import FileOpener, GDriveReader, IterableWrapper
55

66
import os
77
from torchtext.data.datasets_utils import (
@@ -215,14 +215,14 @@ def IWSLT2016(root='.data', split=('train', 'valid', 'test'), language_pair=('de
215215

216216
languages = "-".join([src_language, tgt_language])
217217

218-
iwslt_tar = os.path.join(
219-
"texts", src_language, tgt_language, languages
218+
inner_iwslt_tar = os.path.join(
219+
root, os.path.splitext(_PATH)[0], "texts", src_language, tgt_language, languages
220220
) + ".tgz"
221221

222222
cache_decompressed_dp = cache_compressed_dp.on_disk_cache(
223-
filepath_fn=lambda x: os.path.join(root, os.path.splitext(_PATH)[0], iwslt_tar)
223+
filepath_fn=lambda x: inner_iwslt_tar
224224
)
225-
cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b").read_from_tar().filter(lambda x: iwslt_tar in x[0])
225+
cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b").read_from_tar().filter(lambda x: inner_iwslt_tar in x[0])
226226
cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True)
227227

228228
file_path_by_lang_and_split = {
@@ -238,26 +238,25 @@ def IWSLT2016(root='.data', split=('train', 'valid', 'test'), language_pair=('de
238238
}
239239
}
240240

241-
src_filepath = file_path_by_lang_and_split[src_language][split]
241+
src_filename = file_path_by_lang_and_split[src_language][split]
242242

243243
cache_inner_src_decompressed_dp = cache_decompressed_dp.on_disk_cache(
244-
filepath_fn=lambda x: os.path.join(root, "2016-01/texts/", src_language, tgt_language, languages, src_filepath)
244+
filepath_fn=lambda x: os.path.join(root, "2016-01/texts/", src_language, tgt_language, languages, src_filename)
245245
)
246246
cache_inner_src_decompressed_dp = FileOpener(cache_inner_src_decompressed_dp, mode="b").read_from_tar()
247247
cache_inner_src_decompressed_dp = cache_inner_src_decompressed_dp.map(lambda x: _clean_files(x[0], os.path.splitext(os.path.dirname(os.path.dirname(x[0])))[0], x[1]))
248-
cache_inner_src_decompressed_dp = cache_inner_src_decompressed_dp.filter(lambda x: src_filepath in x)
248+
cache_inner_src_decompressed_dp = cache_inner_src_decompressed_dp.filter(lambda x: src_filename in x)
249249
cache_inner_src_decompressed_dp = FileOpener(cache_inner_src_decompressed_dp, mode="b")
250250
cache_inner_src_decompressed_dp = cache_inner_src_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True)
251251

252-
tgt_filepath = file_path_by_lang_and_split[tgt_language][split]
253-
252+
tgt_filename = file_path_by_lang_and_split[tgt_language][split]
254253

255254
cache_inner_tgt_decompressed_dp = cache_decompressed_dp.on_disk_cache(
256-
filepath_fn=lambda x: os.path.join(root, "2016-01/texts/", src_language, tgt_language, languages, tgt_filepath)
255+
filepath_fn=lambda x: os.path.join(root, "2016-01/texts/", src_language, tgt_language, languages, tgt_filename)
257256
)
258257
cache_inner_tgt_decompressed_dp = FileOpener(cache_inner_tgt_decompressed_dp, mode="b").read_from_tar()
259258
cache_inner_tgt_decompressed_dp = cache_inner_tgt_decompressed_dp.map(lambda x: _clean_files(x[0], os.path.splitext(os.path.dirname(os.path.dirname(x[0])))[0], x[1]))
260-
cache_inner_tgt_decompressed_dp = cache_inner_tgt_decompressed_dp.filter(lambda x: tgt_filepath in x)
259+
cache_inner_tgt_decompressed_dp = cache_inner_tgt_decompressed_dp.filter(lambda x: tgt_filename in x)
261260
cache_inner_tgt_decompressed_dp = FileOpener(cache_inner_tgt_decompressed_dp, mode="b")
262261
cache_inner_tgt_decompressed_dp = cache_inner_tgt_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True)
263262

0 commit comments

Comments
 (0)