Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit ab553b7

Browse files
author
nayef211
committed
Remove closing doc tag from xml list. Update how close tags are created
1 parent f18955d commit ab553b7

File tree

2 files changed

+2
-4
lines changed

2 files changed

+2
-4
lines changed

test/datasets/test_iwslt2016.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ def _generate_uncleaned_train():
4545
"<title",
4646
"<speaker",
4747
"<doc",
48-
"</doc",
4948
]
5049
for i in range(100):
5150
rand_string = " ".join(random.choice(string.ascii_letters) for i in range(10))
@@ -54,7 +53,7 @@ def _generate_uncleaned_train():
5453
if random.random() < 0.1:
5554
open_tag = random.choice(xml_tags) + ">"
5655
# Open tag already contains the closing >
57-
close_tag = "</" + open_tag[1:-1] + ">"
56+
close_tag = "</" + open_tag[1:]
5857
file_contents.append(open_tag + rand_string + close_tag)
5958
else:
6059
examples.append(rand_string + "\n")

test/datasets/test_iwslt2017.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ def _generate_uncleaned_train():
3838
"<title",
3939
"<speaker",
4040
"<doc",
41-
"</doc",
4241
]
4342
for i in range(100):
4443
rand_string = " ".join(random.choice(string.ascii_letters) for i in range(10))
@@ -47,7 +46,7 @@ def _generate_uncleaned_train():
4746
if random.random() < 0.1:
4847
open_tag = random.choice(xml_tags) + ">"
4948
# Open tag already contains the closing >
50-
close_tag = "</" + open_tag[1:-1] + ">"
49+
close_tag = "</" + open_tag[1:]
5150
file_contents.append(open_tag + rand_string + close_tag)
5251
else:
5352
examples.append(rand_string + "\n")

0 commit comments

Comments
 (0)