Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docker/2.1.0/py3/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ RUN ${PIP} install --no-cache-dir -U \
awscli \
mpi4py==3.0.3 \
opencv-python==4.2.0.32 \
sagemaker-experiments==0.1.7 \
"sagemaker-tensorflow>=2.1,<2.2" \
# Let's install TensorFlow separately in the end to avoid
# the library version to be overwritten
Expand Down
1 change: 1 addition & 0 deletions docker/2.1.0/py3/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ RUN ${PIP} install --no-cache-dir -U \
awscli \
mpi4py==3.0.3 \
opencv-python==4.2.0.32 \
sagemaker-experiments==0.1.7 \
"sagemaker-tensorflow>=2.1,<2.2" \
# Let's install TensorFlow separately in the end to avoid
# the library version to be overwritten
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def read_version():
'sagemaker==1.50.1', 'tensorflow<2.0', 'docker-compose', 'boto3==1.10.50',
'six==1.13.0', 'python-dateutil>=2.1,<2.8.1', 'botocore==1.13.50',
'requests-mock', 'awscli==1.16.314'],
'benchmark': ['click']
'benchmark': ['click'],
':python_version=="3.6"': ['sagemaker-experiments==0.1.7']
},
)
7 changes: 7 additions & 0 deletions test/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,10 @@ def docker_image(docker_base_name, tag):
def ecr_image(account_id, docker_base_name, tag, region):
return '{}.dkr.ecr.{}.amazonaws.com/{}:{}'.format(
account_id, region, docker_base_name, tag)


@pytest.fixture(autouse=True)
def skip_py2_containers(request, tag):
if request.node.get_closest_marker('skip_py2_containers'):
if 'py2' in tag:
pytest.skip('Skipping python2 container with tag {}'.format(tag))
97 changes: 97 additions & 0 deletions test/integration/sagemaker/test_experiments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
from __future__ import absolute_import

import os
import time

import pytest
from sagemaker import utils
from sagemaker.tensorflow import TensorFlow
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent

from test.integration import DEFAULT_TIMEOUT
from test.integration import RESOURCE_PATH
from timeout import timeout

DATA_PATH = os.path.join(RESOURCE_PATH, "mnist")
SCRIPT_PATH = os.path.join(DATA_PATH, "mnist_gluon_basic_hook_demo.py")


@pytest.mark.skip_py2_containers
def test_training(sagemaker_session, ecr_image, instance_type, framework_version):

sm_client = sagemaker_session.sagemaker_client

experiment_name = f"tf-container-integ-test-{int(time.time())}"

experiment = Experiment.create(
experiment_name=experiment_name,
description="Integration test experiment from sagemaker-tf-container",
sagemaker_boto_client=sm_client,
)

trial_name = f"tf-container-integ-test-{int(time.time())}"
trial = Trial.create(
experiment_name=experiment_name, trial_name=trial_name, sagemaker_boto_client=sm_client
)

training_job_name = utils.unique_name_from_base("test-tf-experiments-mnist")

# create a training job and wait for it to complete
with timeout(minutes=DEFAULT_TIMEOUT):
resource_path = os.path.join(os.path.dirname(__file__), "..", "..", "resources")
script = os.path.join(resource_path, "mnist", "mnist.py")
estimator = TensorFlow(
entry_point=script,
role="SageMakerRole",
train_instance_type=instance_type,
train_instance_count=1,
sagemaker_session=sagemaker_session,
image_name=ecr_image,
framework_version=framework_version,
script_mode=True,
)
inputs = estimator.sagemaker_session.upload_data(
path=os.path.join(resource_path, "mnist", "data"), key_prefix="scriptmode/mnist"
)
estimator.fit(inputs, job_name=training_job_name)

training_job = sm_client.describe_training_job(TrainingJobName=training_job_name)
training_job_arn = training_job["TrainingJobArn"]

# verify trial component auto created from the training job
trial_components = list(
TrialComponent.list(source_arn=training_job_arn, sagemaker_boto_client=sm_client)
)

trial_component_summary = trial_components[0]
trial_component = TrialComponent.load(
trial_component_name=trial_component_summary.trial_component_name,
sagemaker_boto_name=sm_client,
)

# associate the trial component with the trial
trial.add_trial_component(trial_component)

# verify association
associated_trial_components = list(trial.list_trial_components())
assert len(associated_trial_components) == 1

# cleanup
trial.remove_trial_component(trial_component_summary.trial_component_name)
trial_component.delete()
trial.delete()
experiment.delete()