diff --git a/test/datasets/test_iwslt2016.py b/test/datasets/test_iwslt2016.py index 03b681f6f1..ccd5eb49e6 100644 --- a/test/datasets/test_iwslt2016.py +++ b/test/datasets/test_iwslt2016.py @@ -53,7 +53,8 @@ def _generate_uncleaned_train(): # to ensure cleaning happens appropriately if random.random() < 0.1: open_tag = random.choice(xml_tags) + ">" - close_tag = "" + # Open tag already contains the closing > + close_tag = "" file_contents.append(open_tag + rand_string + close_tag) else: examples.append(rand_string + "\n") diff --git a/test/datasets/test_iwslt2017.py b/test/datasets/test_iwslt2017.py index 375ca4525d..5e373ee8ce 100644 --- a/test/datasets/test_iwslt2017.py +++ b/test/datasets/test_iwslt2017.py @@ -46,7 +46,8 @@ def _generate_uncleaned_train(): # to ensure cleaning happens appropriately if random.random() < 0.1: open_tag = random.choice(xml_tags) + ">" - close_tag = "" + # Open tag already contains the closing > + close_tag = "" file_contents.append(open_tag + rand_string + close_tag) else: examples.append(rand_string + "\n")