From 02b3cfcc81a5a4d6bf4e4a7775935d3920489e5c Mon Sep 17 00:00:00 2001 From: Nayef Ahmed Date: Fri, 1 Jul 2022 03:47:26 -0400 Subject: [PATCH 1/2] Fix multi30k dataset urls --- torchtext/datasets/multi30k.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/torchtext/datasets/multi30k.py b/torchtext/datasets/multi30k.py index 6ba3c901a0..3c1f7df235 100644 --- a/torchtext/datasets/multi30k.py +++ b/torchtext/datasets/multi30k.py @@ -13,15 +13,15 @@ from torchtext._download_hooks import HttpReader URL = { - "train": r"http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/training.tar.gz", - "valid": r"http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz", - "test": r"http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/mmt16_task1_test.tar.gz", + "train": r"https://raw.githubusercontent.com/neychev/small_DL_repo/master/datasets/Multi30k/training.tar.gz", + "valid": r"https://raw.githubusercontent.com/neychev/small_DL_repo/master/datasets/Multi30k/validation.tar.gz", + "test": r"https://raw.githubusercontent.com/neychev/small_DL_repo/master/datasets/Multi30k/mmt16_task1_test.tar.gz", } MD5 = { "train": "20140d013d05dd9a72dfde46478663ba05737ce983f478f960c1123c6671be5e", "valid": "a7aa20e9ebd5ba5adce7909498b94410996040857154dab029851af3a866da8c", - "test": "0681be16a532912288a91ddd573594fbdd57c0fbb81486eff7c55247e35326c2", + "test": "6d1ca1dba99e2c5dd54cae1226ff11c2551e6ce63527ebb072a1f70f72a5cd36", } _PREFIX = { From 14a1276b3fcbf771b27c39453a8ec6106134dc27 Mon Sep 17 00:00:00 2001 From: Nayef Ahmed Date: Tue, 5 Jul 2022 23:09:35 -0400 Subject: [PATCH 2/2] Added todo comment --- torchtext/datasets/multi30k.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torchtext/datasets/multi30k.py b/torchtext/datasets/multi30k.py index 3c1f7df235..47f5fb3a33 100644 --- a/torchtext/datasets/multi30k.py +++ b/torchtext/datasets/multi30k.py @@ -12,6 +12,7 @@ from torchdata.datapipes.iter import FileOpener, IterableWrapper from torchtext._download_hooks import HttpReader +# TODO: Update URL to original once the server is back up (see https://github.com/pytorch/text/issues/1756) URL = { "train": r"https://raw.githubusercontent.com/neychev/small_DL_repo/master/datasets/Multi30k/training.tar.gz", "valid": r"https://raw.githubusercontent.com/neychev/small_DL_repo/master/datasets/Multi30k/validation.tar.gz",