Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 7e50e69

Browse files
committed
update amazon dataset
1 parent c6118e2 commit 7e50e69

File tree

2 files changed

+2
-3
lines changed

2 files changed

+2
-3
lines changed

test/data/test_builtin_datasets.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ def test_raw_text_classification(self, info):
6363
return
6464
else:
6565
data_iter = torchtext.datasets.DATASETS[dataset_name](split=split)
66-
self.assertEqual(len(data_iter), info['NUM_LINES'])
67-
self.assertEqual(hashlib.md5(json.dumps(next(data_iter), sort_keys=True).encode('utf-8')).hexdigest(), info['first_line'])
66+
self.assertEqual(hashlib.md5(json.dumps(next(iter(data_iter)), sort_keys=True).encode('utf-8')).hexdigest(), info['first_line'])
6867
if dataset_name == "AG_NEWS":
6968
self.assertEqual(torchtext.datasets.URLS[dataset_name][split], info['URL'])
7069
self.assertEqual(torchtext.datasets.MD5[dataset_name][split], info['MD5'])

torchtext/datasets/amazonreviewpolarity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,4 @@ def AmazonReviewPolarity(root, split):
5050
filter_extracted_files = extracted_files.filter(lambda x: split in x[0])
5151

5252
# stack CSV reader and do some mapping
53-
return filter_extracted_files.parse_csv().map(fn=lambda t: (int(t[0]), t[1]))
53+
return filter_extracted_files.parse_csv().map(fn=lambda t: (int(t[0]), ' '.join(t[1:])))

0 commit comments

Comments
 (0)