From 8947465671162cf7ee5d4325ac7a843c2ae42f81 Mon Sep 17 00:00:00 2001
From: nayef211 <n63ahmed@edu.uwaterloo.ca>
Date: Mon, 7 Feb 2022 13:37:09 -0800
Subject: [PATCH 1/2] Updating dataset to be consistent with other datasets

---
 torchtext/datasets/conll2000chunking.py | 31 +++++++++++++++----------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/torchtext/datasets/conll2000chunking.py b/torchtext/datasets/conll2000chunking.py
index 513132233c..deff390b09 100644
--- a/torchtext/datasets/conll2000chunking.py
+++ b/torchtext/datasets/conll2000chunking.py
@@ -45,20 +45,27 @@ def CoNLL2000Chunking(root: str, split: Union[Tuple[str], str]):
     url_dp = IterableWrapper([URL[split]])
 
     # Cache and check HTTP response
-    cache_dp = url_dp.on_disk_cache(
-        filepath_fn=lambda x: os.path.join(root, "conll2000chunking", os.path.basename(URL[split])),
-        hash_dict={os.path.join(root, "conll2000chunking", os.path.basename(URL[split])): MD5[split]},
-        hash_type="md5"
+    cache_compressed_dp = url_dp.on_disk_cache(
+        filepath_fn=lambda x: os.path.join(root, os.path.basename(URL[split])),
+        hash_dict={os.path.join(root, os.path.basename(URL[split])): MD5[split]},
+        hash_type="md5",
+    )
+    cache_compressed_dp = HttpReader(cache_compressed_dp).end_caching(
+        mode="wb", same_filepath_fn=True
     )
-    cache_dp = HttpReader(cache_dp).end_caching(mode="wb", same_filepath_fn=True)
-    cache_dp = FileOpener(cache_dp, mode="b")
 
     # Cache and check the gzip extraction for relevant split
-    cache_dp = cache_dp.on_disk_cache(
-        filepath_fn=lambda x: os.path.join(root, "conll2000chunking", _EXTRACTED_FILES[split])
+    cache_decompressed_dp = cache_compressed_dp.on_disk_cache(
+        filepath_fn=lambda x: os.path.join(root, _EXTRACTED_FILES[split])
+    )
+    cache_decompressed_dp = (
+        FileOpener(cache_decompressed_dp, mode="b")
+        .extract(file_type="gzip")
+        .filter(lambda x: _EXTRACTED_FILES[split] in x[0])
+    )
+    cache_decompressed_dp = cache_decompressed_dp.end_caching(
+        mode="wb", same_filepath_fn=True
     )
-    cache_dp = cache_dp.extract(file_type="gzip").filter(lambda x: _EXTRACTED_FILES[split] in x[0])
-    cache_dp = cache_dp.end_caching(mode="wb")
 
-    cache_dp = FileOpener(cache_dp, mode="b")
-    return cache_dp.readlines(decode=True).read_iob(sep=" ")
+    data_dp = FileOpener(cache_decompressed_dp, mode="b")
+    return data_dp.readlines(decode=True).read_iob(sep=" ")

From 06d6ce9c0336ef51e74872eee2bdebc17a22bd58 Mon Sep 17 00:00:00 2001
From: nayef211 <n63ahmed@edu.uwaterloo.ca>
Date: Mon, 7 Feb 2022 22:12:16 -0800
Subject: [PATCH 2/2] Resolving PR comments

---
 torchtext/datasets/conll2000chunking.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/torchtext/datasets/conll2000chunking.py b/torchtext/datasets/conll2000chunking.py
index deff390b09..fd9062a2fb 100644
--- a/torchtext/datasets/conll2000chunking.py
+++ b/torchtext/datasets/conll2000chunking.py
@@ -58,10 +58,8 @@ def CoNLL2000Chunking(root: str, split: Union[Tuple[str], str]):
     cache_decompressed_dp = cache_compressed_dp.on_disk_cache(
         filepath_fn=lambda x: os.path.join(root, _EXTRACTED_FILES[split])
     )
-    cache_decompressed_dp = (
-        FileOpener(cache_decompressed_dp, mode="b")
-        .extract(file_type="gzip")
-        .filter(lambda x: _EXTRACTED_FILES[split] in x[0])
+    cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b").extract(
+        file_type="gzip"
     )
     cache_decompressed_dp = cache_decompressed_dp.end_caching(
         mode="wb", same_filepath_fn=True