Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 887a234

Browse files
committed
fix comment.
1 parent 57d4661 commit 887a234

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

test/datasets/test_iwslt2016.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@ def _generate_uncleaned_train():
2222
'<translator', '<title', '<speaker', '<doc', '</doc'
2323
]
2424
for i in range(100):
25-
# Write one of the XML tags randomly to make sure we clean appropriately
2625
rand_string = " ".join(
2726
random.choice(string.ascii_letters) for i in range(10)
2827
)
28+
# With a 10% change, add one of the XML tags which is cleaned
29+
# to ensure cleaning happens appropriately
2930
if random.random() < 0.1:
3031
open_tag = random.choice(xml_tags) + ">"
3132
close_tag = "</" + open_tag[1:] + ">"
@@ -43,7 +44,6 @@ def _generate_uncleaned_valid():
4344
for doc_id in range(5):
4445
file_contents.append(f'<doc docid="{doc_id}" genre="lectures">')
4546
for seg_id in range(100):
46-
# Write one of the XML tags randomly to make sure we clean appropriately
4747
rand_string = " ".join(
4848
random.choice(string.ascii_letters) for i in range(10)
4949
)

0 commit comments

Comments
 (0)