Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 7049f1e

Browse files
committed
add appropriate encoding for cross-platform unicode writing.
1 parent 6936cd1 commit 7049f1e

18 files changed

+18
-18
lines changed

test/datasets/test_agnews.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def _get_mock_dataset(root_dir):
2020
mocked_data = defaultdict(list)
2121
for file_name in ("train.csv", "test.csv"):
2222
txt_file = os.path.join(base_dir, file_name)
23-
with open(txt_file, "w") as f:
23+
with open(txt_file, "w", encoding="utf-8") as f:
2424
for i in range(5):
2525
label = seed % 4 + 1
2626
rand_string = get_random_unicode(seed)

test/datasets/test_amazonreviewfull.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def _get_mock_dataset(root_dir):
2222
mocked_data = defaultdict(list)
2323
for file_name in ("train.csv", "test.csv"):
2424
txt_file = os.path.join(temp_dataset_dir, file_name)
25-
with open(txt_file, "w") as f:
25+
with open(txt_file, "w", encoding="utf-8") as f:
2626
for i in range(5):
2727
label = seed % 5 + 1
2828
rand_string = get_random_unicode(seed)

test/datasets/test_amazonreviewpolarity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def _get_mock_dataset(root_dir):
2222
mocked_data = defaultdict(list)
2323
for file_name in ("train.csv", "test.csv"):
2424
txt_file = os.path.join(temp_dataset_dir, file_name)
25-
with open(txt_file, "w") as f:
25+
with open(txt_file, "w", encoding="utf-8") as f:
2626
for i in range(5):
2727
label = seed % 2 + 1
2828
rand_string = get_random_unicode(seed)

test/datasets/test_cc100.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def _get_mock_dataset(root_dir):
2525
for language_code in VALID_CODES:
2626
file_name = f"{language_code}.txt.xz"
2727
compressed_file = os.path.join(base_dir, file_name)
28-
with lzma.open(compressed_file, "wt") as f:
28+
with lzma.open(compressed_file, "wt", encoding="utf-8") as f:
2929
for i in range(5):
3030
rand_string = get_random_unicode(seed)
3131
content = f"{rand_string}\n"

test/datasets/test_conll2000chunking.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def _get_mock_dataset(root_dir):
2323
for file_name in ("train.txt", "test.txt"):
2424
txt_file = os.path.join(temp_dataset_dir, file_name)
2525
mocked_lines = mocked_data[os.path.splitext(file_name)[0]]
26-
with open(txt_file, "w") as f:
26+
with open(txt_file, "w", encoding="utf-8") as f:
2727
for i in range(5):
2828
rand_strings = [get_random_unicode(seed)]
2929
rand_label_1 = [get_random_unicode(seed)]

test/datasets/test_dbpedia.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def _get_mock_dataset(root_dir):
2323
for file_name in ("train.csv", "test.csv"):
2424
csv_file = os.path.join(temp_dataset_dir, file_name)
2525
mocked_lines = mocked_data[os.path.splitext(file_name)[0]]
26-
with open(csv_file, "w") as f:
26+
with open(csv_file, "w", encoding="utf-8") as f:
2727
for i in range(5):
2828
label = seed % 14 + 1
2929
rand_string = get_random_unicode(seed)

test/datasets/test_enwik9.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def _get_mock_dataset(root_dir):
2020
file_name = "enwik9"
2121
txt_file = os.path.join(temp_dataset_dir, file_name)
2222
mocked_data = []
23-
with open(txt_file, "w") as f:
23+
with open(txt_file, "w", encoding="utf-8") as f:
2424
for i in range(5):
2525
rand_string = "<" + get_random_unicode(seed) + ">"
2626
dataset_line = f"'{rand_string}'"

test/datasets/test_imdb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def _get_mock_dataset(root_dir):
3232
label = "neg" if i < 2 else "pos"
3333
cur_dir = pos_dir if label == "pos" else neg_dir
3434
txt_file = os.path.join(cur_dir, f"{i}{i}_{i}.txt")
35-
with open(txt_file, "w") as f:
35+
with open(txt_file, "w", encoding="utf-8") as f:
3636
rand_string = get_random_unicode(seed)
3737
dataset_line = (label, rand_string)
3838
# append line to correct dataset split

test/datasets/test_multi30k.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def _get_mock_dataset(root_dir):
2222
mocked_data = defaultdict(list)
2323
for file_name in ("train.de", "train.en", "val.de", "val.en", "test.de", "test.en"):
2424
txt_file = os.path.join(temp_dataset_dir, file_name)
25-
with open(txt_file, "w") as f:
25+
with open(txt_file, "w", encoding="utf-8") as f:
2626
for i in range(5):
2727
rand_string = get_random_unicode(seed)
2828
f.write(rand_string + "\n")

test/datasets/test_penntreebank.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def _get_mock_dataset(root_dir):
2020
mocked_data = defaultdict(list)
2121
for file_name in ("ptb.train.txt", "ptb.valid.txt", "ptb.test.txt"):
2222
txt_file = os.path.join(base_dir, file_name)
23-
with open(txt_file, "w") as f:
23+
with open(txt_file, "w", encoding="utf-8") as f:
2424
for i in range(5):
2525
rand_string = get_random_unicode(seed)
2626
dataset_line = f"{rand_string}"

0 commit comments

Comments
 (0)