From f18955d0242942fe396c8d4614569b66510009f8 Mon Sep 17 00:00:00 2001 From: Elijah Rippeth Date: Sat, 12 Feb 2022 11:31:16 -0500 Subject: [PATCH 1/2] meaningless change to make XML well formed (though it is not important). --- test/datasets/test_iwslt2016.py | 3 ++- test/datasets/test_iwslt2017.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/datasets/test_iwslt2016.py b/test/datasets/test_iwslt2016.py index 03b681f6f1..ccd5eb49e6 100644 --- a/test/datasets/test_iwslt2016.py +++ b/test/datasets/test_iwslt2016.py @@ -53,7 +53,8 @@ def _generate_uncleaned_train(): # to ensure cleaning happens appropriately if random.random() < 0.1: open_tag = random.choice(xml_tags) + ">" - close_tag = "" + # Open tag already contains the closing > + close_tag = "" file_contents.append(open_tag + rand_string + close_tag) else: examples.append(rand_string + "\n") diff --git a/test/datasets/test_iwslt2017.py b/test/datasets/test_iwslt2017.py index 375ca4525d..5e373ee8ce 100644 --- a/test/datasets/test_iwslt2017.py +++ b/test/datasets/test_iwslt2017.py @@ -46,7 +46,8 @@ def _generate_uncleaned_train(): # to ensure cleaning happens appropriately if random.random() < 0.1: open_tag = random.choice(xml_tags) + ">" - close_tag = "" + # Open tag already contains the closing > + close_tag = "" file_contents.append(open_tag + rand_string + close_tag) else: examples.append(rand_string + "\n") From ab553b768da46c9c3c95f61489de3fafd4a7aa3c Mon Sep 17 00:00:00 2001 From: nayef211 Date: Tue, 8 Mar 2022 16:19:38 -0800 Subject: [PATCH 2/2] Remove closing doc tag from xml list. Update how close tags are created --- test/datasets/test_iwslt2016.py | 3 +-- test/datasets/test_iwslt2017.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/test/datasets/test_iwslt2016.py b/test/datasets/test_iwslt2016.py index ccd5eb49e6..c5afd37664 100644 --- a/test/datasets/test_iwslt2016.py +++ b/test/datasets/test_iwslt2016.py @@ -45,7 +45,6 @@ def _generate_uncleaned_train(): "" # Open tag already contains the closing > - close_tag = "" + close_tag = "" # Open tag already contains the closing > - close_tag = "" + close_tag = "