From 3076a39fc47a0d07b64523faf8384b0201bdfdfd Mon Sep 17 00:00:00 2001 From: Winston Dong Date: Fri, 20 Apr 2018 00:29:34 +0000 Subject: [PATCH] increase endpoint creation timeouts to 35 minutes across the board for tests --- tests/integ/test_byo_estimator.py | 4 ++-- tests/integ/test_factorization_machines.py | 4 ++-- tests/integ/test_kmeans.py | 4 ++-- tests/integ/test_lda.py | 2 +- tests/integ/test_linear_learner.py | 4 ++-- tests/integ/test_mxnet_train.py | 6 +++--- tests/integ/test_ntm.py | 2 +- tests/integ/test_pca.py | 4 ++-- tests/integ/test_tf.py | 4 ++-- tests/integ/test_tf_cifar.py | 2 +- tests/integ/timeout.py | 2 +- 11 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/integ/test_byo_estimator.py b/tests/integ/test_byo_estimator.py index 1e62a401cc..4a0d8a71d6 100644 --- a/tests/integ/test_byo_estimator.py +++ b/tests/integ/test_byo_estimator.py @@ -91,7 +91,7 @@ def test_byo_estimator(sagemaker_session, region): endpoint_name = name_from_base('byo') - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = estimator.create_model() predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) predictor.serializer = fm_serializer @@ -145,7 +145,7 @@ def test_async_byo_estimator(sagemaker_session, region): estimator.fit({'train': s3_train_data}, wait=False) training_job_name = estimator.latest_training_job.name - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=30): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = Estimator.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = estimator.create_model() predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) diff --git a/tests/integ/test_factorization_machines.py b/tests/integ/test_factorization_machines.py index c43aeed072..c760af3ba8 100644 --- a/tests/integ/test_factorization_machines.py +++ b/tests/integ/test_factorization_machines.py @@ -41,7 +41,7 @@ def test_factorization_machines(sagemaker_session): fm.fit(fm.record_set(train_set[0][:200], train_set[1][:200].astype('float32'))) endpoint_name = name_from_base('fm') - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = FactorizationMachinesModel(fm.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) @@ -77,7 +77,7 @@ def test_async_factorization_machines(sagemaker_session): time.sleep(20) print("attaching now...") - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = FactorizationMachines.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = FactorizationMachinesModel(estimator.model_data, role='SageMakerRole', diff --git a/tests/integ/test_kmeans.py b/tests/integ/test_kmeans.py index 0c2e3712a6..d66a11156b 100644 --- a/tests/integ/test_kmeans.py +++ b/tests/integ/test_kmeans.py @@ -47,7 +47,7 @@ def test_kmeans(sagemaker_session): kmeans.fit(kmeans.record_set(train_set[0][:100])) endpoint_name = name_from_base('kmeans') - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = KMeansModel(kmeans.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(train_set[0][:10]) @@ -90,7 +90,7 @@ def test_async_kmeans(sagemaker_session): time.sleep(20) print("attaching now...") - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = KMeans.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = KMeansModel(estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) diff --git a/tests/integ/test_lda.py b/tests/integ/test_lda.py index 34deff0475..f67172058e 100644 --- a/tests/integ/test_lda.py +++ b/tests/integ/test_lda.py @@ -41,7 +41,7 @@ def test_lda(sagemaker_session): lda.fit(record_set, 100) endpoint_name = name_from_base('lda') - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = LDAModel(lda.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) diff --git a/tests/integ/test_linear_learner.py b/tests/integ/test_linear_learner.py index 24dad9850f..602679e771 100644 --- a/tests/integ/test_linear_learner.py +++ b/tests/integ/test_linear_learner.py @@ -77,7 +77,7 @@ def test_linear_learner(sagemaker_session): ll.fit(ll.record_set(train_set[0][:200], train_set[1][:200])) endpoint_name = name_from_base('linear-learner') - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = ll.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) @@ -147,7 +147,7 @@ def test_async_linear_learner(sagemaker_session): print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = LinearLearner.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model = LinearLearnerModel(estimator.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) diff --git a/tests/integ/test_mxnet_train.py b/tests/integ/test_mxnet_train.py index 9964fb7deb..05d736692a 100644 --- a/tests/integ/test_mxnet_train.py +++ b/tests/integ/test_mxnet_train.py @@ -45,7 +45,7 @@ def mxnet_training_job(sagemaker_session, mxnet_full_version): def test_attach_deploy(mxnet_training_job, sagemaker_session): endpoint_name = 'test-mxnet-attach-deploy-{}'.format(sagemaker_timestamp()) - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = MXNet.attach(mxnet_training_job, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) data = numpy.zeros(shape=(1, 1, 28, 28)) @@ -55,7 +55,7 @@ def test_attach_deploy(mxnet_training_job, sagemaker_session): def test_deploy_model(mxnet_training_job, sagemaker_session): endpoint_name = 'test-mxnet-deploy-model-{}'.format(sagemaker_timestamp()) - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=mxnet_training_job) model_data = desc['ModelArtifacts']['S3ModelArtifacts'] script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py') @@ -88,7 +88,7 @@ def test_async_fit(sagemaker_session): print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): print("Re-attaching now to: %s" % training_job_name) estimator = MXNet.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) predictor = estimator.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) diff --git a/tests/integ/test_ntm.py b/tests/integ/test_ntm.py index cc2336d3c8..51b35f4728 100644 --- a/tests/integ/test_ntm.py +++ b/tests/integ/test_ntm.py @@ -41,7 +41,7 @@ def test_ntm(sagemaker_session): ntm.fit(record_set, None) endpoint_name = name_from_base('ntm') - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = NTMModel(ntm.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.c4.xlarge', endpoint_name=endpoint_name) diff --git a/tests/integ/test_pca.py b/tests/integ/test_pca.py index b260c4b793..3b0d8f75df 100644 --- a/tests/integ/test_pca.py +++ b/tests/integ/test_pca.py @@ -41,7 +41,7 @@ def test_pca(sagemaker_session): pca.fit(pca.record_set(train_set[0][:100])) endpoint_name = name_from_base('pca') - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): pca_model = sagemaker.amazon.pca.PCAModel(model_data=pca.model_data, role='SageMakerRole', sagemaker_session=sagemaker_session) predictor = pca_model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge", @@ -79,7 +79,7 @@ def test_async_pca(sagemaker_session): print("Detached from training job. Will re-attach in 20 seconds") time.sleep(20) - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = sagemaker.amazon.pca.PCA.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) diff --git a/tests/integ/test_tf.py b/tests/integ/test_tf.py index b8d93dbcdd..a3fb955a7d 100644 --- a/tests/integ/test_tf.py +++ b/tests/integ/test_tf.py @@ -42,7 +42,7 @@ def test_tf(sagemaker_session, tf_full_version): print('job succeeded: {}'.format(estimator.latest_training_job.name)) endpoint_name = estimator.latest_training_job.name - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): json_predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge', endpoint_name=endpoint_name) @@ -75,7 +75,7 @@ def test_tf_async(sagemaker_session): time.sleep(20) endpoint_name = training_job_name - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=35): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) json_predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge', endpoint_name=endpoint_name) diff --git a/tests/integ/test_tf_cifar.py b/tests/integ/test_tf_cifar.py index 648d6133d3..b639b5efb7 100644 --- a/tests/integ/test_tf_cifar.py +++ b/tests/integ/test_tf_cifar.py @@ -47,7 +47,7 @@ def test_cifar(sagemaker_session, tf_full_version): print('job succeeded: {}'.format(estimator.latest_training_job.name)) endpoint_name = estimator.latest_training_job.name - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.p2.xlarge') predictor.serializer = PickleSerializer() predictor.content_type = PICKLE_CONTENT_TYPE diff --git a/tests/integ/timeout.py b/tests/integ/timeout.py index a6f7d6355e..54ed96a41e 100644 --- a/tests/integ/timeout.py +++ b/tests/integ/timeout.py @@ -56,7 +56,7 @@ def handler(signum, frame): @contextmanager -def timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, seconds=0, minutes=0, hours=0): +def timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, seconds=0, minutes=35, hours=0): with timeout(seconds=seconds, minutes=minutes, hours=hours) as t: try: yield [t]