From 3f1e4f2d74f8523e8e0075d65a2fc299cce335f3 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Fri, 16 Aug 2019 15:02:28 -0700 Subject: [PATCH 1/2] change: eliminate dependency on mnist dataset website This commit will modify the folder structure to avoid needing to download the datasets and relying on the dataset website from being available. This caused a canary to fail (TT-0407832742). The dependency has existed since Pytorch 1.1 upgrade, as the directory structure was modified in https://github.com/pytorch/vision/pull/601 --- tests/data/pytorch_mnist/mnist.py | 4 ++-- .../training/{ => MNIST}/processed/test.pt | Bin .../training/{ => MNIST}/processed/training.pt | Bin 3 files changed, 2 insertions(+), 2 deletions(-) rename tests/data/pytorch_mnist/training/{ => MNIST}/processed/test.pt (100%) rename tests/data/pytorch_mnist/training/{ => MNIST}/processed/training.pt (100%) diff --git a/tests/data/pytorch_mnist/mnist.py b/tests/data/pytorch_mnist/mnist.py index a7d542064e..f22bd0f315 100644 --- a/tests/data/pytorch_mnist/mnist.py +++ b/tests/data/pytorch_mnist/mnist.py @@ -47,7 +47,7 @@ def _get_train_data_loader(training_dir, is_distributed, batch_size, **kwargs): transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), - download=True, + download=False, # True sets a dependency on an external site for our canaries. ) train_sampler = ( torch.utils.data.distributed.DistributedSampler(dataset) if is_distributed else None @@ -71,7 +71,7 @@ def _get_test_data_loader(training_dir, **kwargs): transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), - download=True, + download=False, # True sets a dependency on an external site for our canaries. ), batch_size=1000, shuffle=True, diff --git a/tests/data/pytorch_mnist/training/processed/test.pt b/tests/data/pytorch_mnist/training/MNIST/processed/test.pt similarity index 100% rename from tests/data/pytorch_mnist/training/processed/test.pt rename to tests/data/pytorch_mnist/training/MNIST/processed/test.pt diff --git a/tests/data/pytorch_mnist/training/processed/training.pt b/tests/data/pytorch_mnist/training/MNIST/processed/training.pt similarity index 100% rename from tests/data/pytorch_mnist/training/processed/training.pt rename to tests/data/pytorch_mnist/training/MNIST/processed/training.pt From 7df89230e849bf24e173bef061b3ad940408f158 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Fri, 16 Aug 2019 15:24:48 -0700 Subject: [PATCH 2/2] change: correcting folder path --- tests/integ/test_git.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py index be3c85e3f2..8b6d796390 100644 --- a/tests/integ/test_git.py +++ b/tests/integ/test_git.py @@ -28,6 +28,8 @@ from sagemaker.sklearn.model import SKLearnModel from tests.integ import DATA_DIR, PYTHON_VERSION +MNIST_FOLDER_NAME = "MNIST" + GIT_REPO = "https://github.com/aws/sagemaker-python-sdk.git" BRANCH = "test-branch-git-config" COMMIT = "ae15c9d7d5b97ea95ea451e4662ee43da3401d73" @@ -69,7 +71,7 @@ def test_git_support_with_pytorch(sagemaker_local_session): git_config=git_config, ) - pytorch.fit({"training": "file://" + os.path.join(data_path, "training")}) + pytorch.fit({"training": "file://" + os.path.join(data_path, "training", MNIST_FOLDER_NAME)}) with lock.lock(LOCK_PATH): try: