From c7f2628d43adbed9baec1d28bdc6290eeb3a125c Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Thu, 3 Feb 2022 10:23:14 -0500 Subject: [PATCH 1/2] [Bug-Fix] fix hash for datasets testing --- test/asset/raw_datasets.jsonl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/asset/raw_datasets.jsonl b/test/asset/raw_datasets.jsonl index eaf70d4ebf..4d5dbb2caa 100644 --- a/test/asset/raw_datasets.jsonl +++ b/test/asset/raw_datasets.jsonl @@ -36,11 +36,11 @@ {"dataset_name": "WikiText103", "split": "train", "NUM_LINES": 1801350, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": "c3e189c0ef8590f093c38b41bdba5239"} {"dataset_name": "WikiText103", "split": "valid", "NUM_LINES": 3760, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": "c3e189c0ef8590f093c38b41bdba5239"} {"dataset_name": "WikiText103", "split": "test", "NUM_LINES": 4358, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": "c3e189c0ef8590f093c38b41bdba5239"} -{"dataset_name": "PennTreebank", "split": "train", "NUM_LINES": 42068, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": "7c2d3356b501bef852361e03da99841a"} -{"dataset_name": "PennTreebank", "split": "valid", "NUM_LINES": 3370, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": "fe23e20e56c04bcbafef379e984df1f2"} -{"dataset_name": "PennTreebank", "split": "test", "NUM_LINES": 3761, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": "a1e01652513bd83a1925b0822ce19456"} +{"dataset_name": "PennTreebank", "split": "train", "NUM_LINES": 42068, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": "4da5e5d53c4f1befb6f2ccb7c2786883"} +{"dataset_name": "PennTreebank", "split": "valid", "NUM_LINES": 3370, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "5cd7ab9c524b0907447b8fec96c1e6d4"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": "5cd7ab9c524b0907447b8fec96c1e6d4"} +{"dataset_name": "PennTreebank", "split": "test", "NUM_LINES": 3761, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": "9a787f558c6fa754d70e0801bedfdc32"} {"dataset_name": "SQuAD1", "split": "train", "NUM_LINES": 87599, "MD5": {"train": "981b29407e0affa3b1b156f72073b945", "dev": "3e85deb501d4e538b6bc56f786231552"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json"}, "first_line": "72d1162738e38d973ed20c9e70469ed4"} {"dataset_name": "SQuAD1", "split": "dev", "NUM_LINES": 10570, "MD5": {"train": "981b29407e0affa3b1b156f72073b945", "dev": "3e85deb501d4e538b6bc56f786231552"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json"}, "first_line": "fd5bd80f392f3a03ec908508da3a4ea3"} {"dataset_name": "SQuAD2", "split": "train", "NUM_LINES": 130319, "MD5": {"train": "62108c273c268d70893182d5cf8df740", "dev": "246adae8b7002f8679c027697b0b7cf8"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json"}, "first_line": "9b719a13e9ea95ab9700c5c631885fc8"} {"dataset_name": "SQuAD2", "split": "dev", "NUM_LINES": 11873, "MD5": {"train": "62108c273c268d70893182d5cf8df740", "dev": "246adae8b7002f8679c027697b0b7cf8"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json"}, "first_line": "1e011c981d41cca284070532135eb9bd"} -{"dataset_name": "EnWik9", "split": "train", "NUM_LINES": 13147026, "MD5": "3e773f8a1577fda2e27f871ca17f31fd", "URL": "http://mattmahoney.net/dc/enwik9.zip", "first_line": "9ac868b1ea4f13083b6c923bc3134a70"} \ No newline at end of file +{"dataset_name": "EnWik9", "split": "train", "NUM_LINES": 13147026, "MD5": "3e773f8a1577fda2e27f871ca17f31fd", "URL": "http://mattmahoney.net/dc/enwik9.zip", "first_line": "02d4fbb967022ab80dfc2dda49faf5ea"} \ No newline at end of file From ba1d77aa46659772bac443e111a909d4ba5baaf6 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Thu, 3 Feb 2022 12:49:44 -0500 Subject: [PATCH 2/2] revert extra change --- test/asset/raw_datasets.jsonl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/asset/raw_datasets.jsonl b/test/asset/raw_datasets.jsonl index 4d5dbb2caa..be4a4dc215 100644 --- a/test/asset/raw_datasets.jsonl +++ b/test/asset/raw_datasets.jsonl @@ -37,7 +37,7 @@ {"dataset_name": "WikiText103", "split": "valid", "NUM_LINES": 3760, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": "c3e189c0ef8590f093c38b41bdba5239"} {"dataset_name": "WikiText103", "split": "test", "NUM_LINES": 4358, "MD5": "9ddaacaf6af0710eda8c456decff7832", "URL": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip", "first_line": "c3e189c0ef8590f093c38b41bdba5239"} {"dataset_name": "PennTreebank", "split": "train", "NUM_LINES": 42068, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": "4da5e5d53c4f1befb6f2ccb7c2786883"} -{"dataset_name": "PennTreebank", "split": "valid", "NUM_LINES": 3370, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "5cd7ab9c524b0907447b8fec96c1e6d4"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": "5cd7ab9c524b0907447b8fec96c1e6d4"} +{"dataset_name": "PennTreebank", "split": "valid", "NUM_LINES": 3370, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": "5cd7ab9c524b0907447b8fec96c1e6d4"} {"dataset_name": "PennTreebank", "split": "test", "NUM_LINES": 3761, "MD5": {"train": "f26c4b92c5fdc7b3f8c7cdcb991d8420", "valid": "aa0affc06ff7c36e977d7cd49e3839bf", "test": "8b80168b89c18661a38ef683c0dc3721"}, "URL": {"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt", "test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt", "valid": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt"}, "first_line": "9a787f558c6fa754d70e0801bedfdc32"} {"dataset_name": "SQuAD1", "split": "train", "NUM_LINES": 87599, "MD5": {"train": "981b29407e0affa3b1b156f72073b945", "dev": "3e85deb501d4e538b6bc56f786231552"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json"}, "first_line": "72d1162738e38d973ed20c9e70469ed4"} {"dataset_name": "SQuAD1", "split": "dev", "NUM_LINES": 10570, "MD5": {"train": "981b29407e0affa3b1b156f72073b945", "dev": "3e85deb501d4e538b6bc56f786231552"}, "URL": {"train": "https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json", "dev": "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json"}, "first_line": "fd5bd80f392f3a03ec908508da3a4ea3"}