Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 0977d45

Browse files
committed
parameterize test for minimal duplication.
1 parent c7e1282 commit 0977d45

File tree

1 file changed

+9
-56
lines changed

1 file changed

+9
-56
lines changed

test/data/test_dataset_utils.py

Lines changed: 9 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,15 @@
33
from torchtext.data.datasets_utils import _ParseIOBData
44
from torch.utils.data.datapipes.iter import IterableWrapper
55

6+
from parameterized import parameterized
7+
68

79
class TestDatasetUtils(TorchtextTestCase):
8-
def test_iob_datapipe_basic(self):
10+
@parameterized.expand([
11+
[lambda it: list(_ParseIOBData(IterableWrapper(it), sep=" "))],
12+
[lambda it: list(IterableWrapper(it).read_iob(sep=" "))]
13+
])
14+
def test_iob_datapipe(self, pipe_fn):
915
iob = [
1016
"Alex I-PER",
1117
"is O",
@@ -17,8 +23,7 @@ def test_iob_datapipe_basic(self):
1723
"California I-LOC"
1824
]
1925
iterable = [("ignored.txt", e) for e in iob]
20-
iterable = IterableWrapper(iterable)
21-
iob_dp = list(_ParseIOBData(iterable, sep=" "))
26+
iob_dp = pipe_fn(iterable)
2227
# There's only one example in this dataset
2328
self.assertEqual(len(iob_dp), 1)
2429
# The length of the list of surface forms is the number of lines in the example
@@ -45,8 +50,7 @@ def test_iob_datapipe_basic(self):
4550
"California I-LOC",
4651
]
4752
iterable = [("ignored.txt", e) for e in iob]
48-
iterable = IterableWrapper(iterable)
49-
iob_dp = list(_ParseIOBData(iterable, sep=" "))
53+
iob_dp = pipe_fn(iterable)
5054
# There are two examples in this dataset
5155
self.assertEqual(len(iob_dp), 2)
5256
# The length of the first list of surface forms is the length of everything before the empty line.
@@ -57,54 +61,3 @@ def test_iob_datapipe_basic(self):
5761
# The length of the second labels is the length of everything after the empty line.
5862
self.assertEqual(len(iob_dp[1][0]), len(iob) - iob.index("") - 1)
5963
self.assertEqual(len(iob_dp[1][1]), len(iob) - iob.index("") - 1)
60-
61-
def test_iob_datapipe_functional(self):
62-
iob = [
63-
"Alex I-PER",
64-
"is O",
65-
"going O",
66-
"to O",
67-
"Los I-LOC",
68-
"Angeles I-LOC",
69-
"in O",
70-
"California I-LOC"
71-
]
72-
iterable = [("ignored.txt", e) for e in iob]
73-
iob_dp = list(IterableWrapper(iterable).read_iob(sep=" "))
74-
# There's only one example in this dataset
75-
self.assertEqual(len(iob_dp), 1)
76-
# The length of the list of surface forms is the number of lines in the example
77-
self.assertEqual(len(iob_dp[0][0]), len(iob))
78-
# The length of the list labels is the number of lines in the example
79-
self.assertEqual(len(iob_dp[0][1]), len(iob))
80-
iob = [
81-
"Alex I-PER",
82-
"is O",
83-
"going O",
84-
"to O",
85-
"Los I-LOC",
86-
"Angeles I-LOC",
87-
"in O",
88-
"California I-LOC",
89-
"",
90-
"Alex I-PER",
91-
"is O",
92-
"going O",
93-
"to O",
94-
"Los I-LOC",
95-
"Angeles I-LOC",
96-
"in O",
97-
"California I-LOC",
98-
]
99-
iterable = [("ignored.txt", e) for e in iob]
100-
iob_dp = list(IterableWrapper(iterable).read_iob(sep=" "))
101-
# There's only one example in this dataset
102-
self.assertEqual(len(iob_dp), 2)
103-
# The length of the first list of surface forms is the length of everything before the empty line.
104-
# The length of the first labels is the length of everything before the empty line.
105-
self.assertEqual(len(iob_dp[0][0]), iob.index(""))
106-
self.assertEqual(len(iob_dp[0][1]), iob.index(""))
107-
# The length of the second list of surface forms is the length of everything after the empty line.
108-
# The length of the second labels is the length of everything after the empty line.
109-
self.assertEqual(len(iob_dp[1][0]), len(iob) - iob.index("") - 1)
110-
self.assertEqual(len(iob_dp[1][1]), len(iob) - iob.index("") - 1)

0 commit comments

Comments
 (0)