Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions src/sagemaker/amazon/amazon_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from six.moves.urllib.parse import urlparse

from sagemaker import image_uris
from sagemaker import image_uris, s3_utils
from sagemaker.amazon import validation
from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa
from sagemaker.amazon.common import write_numpy_to_dense_tensor
Expand Down Expand Up @@ -93,8 +93,15 @@ def __init__(
enable_network_isolation=enable_network_isolation,
**kwargs
)
data_location = data_location or "s3://{}/sagemaker-record-sets/".format(
self.sagemaker_session.default_bucket()

data_location = data_location or (
s3_utils.s3_path_join(
"s3://",
self.sagemaker_session.default_bucket(),
self.sagemaker_session.default_bucket_prefix,
"sagemaker-record-sets",
with_end_slash=True,
)
)
self._data_location = data_location

Expand Down
9 changes: 7 additions & 2 deletions src/sagemaker/automl/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from typing import Optional, List, Dict
from six import string_types

from sagemaker import Model, PipelineModel
from sagemaker import Model, PipelineModel, s3
from sagemaker.automl.candidate_estimator import CandidateEstimator
from sagemaker.config import (
AUTO_ML_ROLE_ARN_PATH,
Expand Down Expand Up @@ -676,7 +676,12 @@ def _prepare_for_auto_ml_job(self, job_name=None):
self.current_job_name = name_from_base(base_name, max_length=32)

if self.output_path is None:
self.output_path = "s3://{}/".format(self.sagemaker_session.default_bucket())
self.output_path = s3.s3_path_join(
"s3://",
self.sagemaker_session.default_bucket(),
self.sagemaker_session.default_bucket_prefix,
with_end_slash=True,
)

@classmethod
def _get_supported_inference_keys(cls, container, default=None):
Expand Down
7 changes: 7 additions & 0 deletions src/sagemaker/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@
AUTO_ML_VOLUME_KMS_KEY_ID_PATH,
AUTO_ML_INTER_CONTAINER_ENCRYPTION_PATH,
ENDPOINT_CONFIG_DATA_CAPTURE_KMS_KEY_ID_PATH,
SESSION_DEFAULT_S3_BUCKET_PATH,
SESSION_DEFAULT_S3_OBJECT_KEY_PREFIX_PATH,
MONITORING_SCHEDULE_CONFIG,
MONITORING_JOB_DEFINITION,
MONITORING_OUTPUT_CONFIG,
Expand Down Expand Up @@ -131,4 +133,9 @@
EXECUTION_ROLE_ARN,
ASYNC_INFERENCE_CONFIG,
SCHEMA_VERSION,
PYTHON_SDK,
MODULES,
DEFAULT_S3_BUCKET,
DEFAULT_S3_OBJECT_KEY_PREFIX,
SESSION,
)
51 changes: 45 additions & 6 deletions src/sagemaker/config/config_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,17 @@
OBJECT = "object"
ADDITIONAL_PROPERTIES = "additionalProperties"
ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION = "EnableInterContainerTrafficEncryption"
SESSION = "Session"
DEFAULT_S3_BUCKET = "DefaultS3Bucket"
DEFAULT_S3_OBJECT_KEY_PREFIX = "DefaultS3ObjectKeyPrefix"


def _simple_path(*args: str):
"""Appends an arbitrary number of strings to use as path constants"""
return ".".join(args)


# Paths for reference elsewhere in the code.
COMPILATION_JOB_VPC_CONFIG_PATH = _simple_path(SAGEMAKER, COMPILATION_JOB, VPC_CONFIG)
COMPILATION_JOB_KMS_KEY_ID_PATH = _simple_path(
SAGEMAKER, COMPILATION_JOB, OUTPUT_CONFIG, KMS_KEY_ID
Expand Down Expand Up @@ -231,7 +235,6 @@ def _simple_path(*args: str):
MODEL_PACKAGE_VALIDATION_PROFILES_PATH = _simple_path(
SAGEMAKER, MODEL_PACKAGE, VALIDATION_SPECIFICATION, VALIDATION_PROFILES
)

REMOTE_FUNCTION_DEPENDENCIES = _simple_path(
SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, DEPENDENCIES
)
Expand Down Expand Up @@ -274,9 +277,6 @@ def _simple_path(*args: str):
REMOTE_FUNCTION_ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION = _simple_path(
SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION
)

# Paths for reference elsewhere in the SDK.
# Names include the schema version since the paths could change with other schema versions
MONITORING_SCHEDULE_INTER_CONTAINER_ENCRYPTION_PATH = _simple_path(
SAGEMAKER,
MONITORING_SCHEDULE,
Expand All @@ -298,6 +298,13 @@ def _simple_path(*args: str):
TRAINING_JOB_INTER_CONTAINER_ENCRYPTION_PATH = _simple_path(
SAGEMAKER, TRAINING_JOB, ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION
)
SESSION_DEFAULT_S3_BUCKET_PATH = _simple_path(
SAGEMAKER, PYTHON_SDK, MODULES, SESSION, DEFAULT_S3_BUCKET
)
SESSION_DEFAULT_S3_OBJECT_KEY_PREFIX_PATH = _simple_path(
SAGEMAKER, PYTHON_SDK, MODULES, SESSION, DEFAULT_S3_OBJECT_KEY_PREFIX
)


SAGEMAKER_PYTHON_SDK_CONFIG_SCHEMA = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
Expand Down Expand Up @@ -447,6 +454,15 @@ def _simple_path(*args: str):
"s3Uri": {TYPE: "string", "pattern": "^(https|s3)://([^/]+)/?(.*)$", "maxLength": 1024},
# Regex is taken from https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_AlgorithmSpecification.html#sagemaker-Type-AlgorithmSpecification-ContainerEntrypoint
"preExecutionCommand": {TYPE: "string", "pattern": r".*"},
# Regex based on https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_PipelineDefinitionS3Location.html
# except with an additional ^ and $ for the beginning and the end to closer align to
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html
"s3Bucket": {
TYPE: "string",
"pattern": r"^[a-z0-9][\.\-a-z0-9]{1,61}[a-z0-9]$",
"minLength": 3,
"maxLength": 63,
},
},
PROPERTIES: {
SCHEMA_VERSION: {
Expand Down Expand Up @@ -477,6 +493,29 @@ def _simple_path(*args: str):
TYPE: OBJECT,
ADDITIONAL_PROPERTIES: False,
PROPERTIES: {
SESSION: {
TYPE: OBJECT,
ADDITIONAL_PROPERTIES: False,
PROPERTIES: {
DEFAULT_S3_BUCKET: {
"description": "sets `default_bucket` of Session",
"$ref": "#/definitions/s3Bucket",
},
DEFAULT_S3_OBJECT_KEY_PREFIX: {
"description": (
"sets `default_bucket_prefix` of Session"
),
TYPE: "string",
# S3 guidelines:
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
# Note that the PythonSDK at the time of writing
# tends to collapse multiple "/" in a row to one "/"
# (even though S3 allows multiple "/" in a row)
"minLength": 1,
"maxLength": 1024,
},
},
},
REMOTE_FUNCTION: {
TYPE: OBJECT,
ADDITIONAL_PROPERTIES: False,
Expand Down Expand Up @@ -504,9 +543,9 @@ def _simple_path(*args: str):
VOLUME_KMS_KEY_ID: {"$ref": "#/definitions/kmsKeyId"},
VPC_CONFIG: {"$ref": "#/definitions/vpcConfig"},
},
}
},
},
}
},
},
},
# Feature Group
Expand Down
22 changes: 15 additions & 7 deletions src/sagemaker/djl_inference/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from sagemaker.deserializers import JSONDeserializer, BaseDeserializer
from sagemaker.djl_inference import defaults
from sagemaker.model import FrameworkModel
from sagemaker.s3_utils import s3_path_join
from sagemaker.serializers import JSONSerializer, BaseSerializer
from sagemaker.session import Session
from sagemaker.utils import _tmpdir, _create_or_update_code_dir
Expand Down Expand Up @@ -502,12 +503,16 @@ def partition(
self.key_prefix, self.name, self.image_uri
)
if s3_output_uri is None:
bucket = self.bucket or self.sagemaker_session.default_bucket()
s3_output_uri = f"s3://{bucket}/{deploy_key_prefix}"
bucket, deploy_key_prefix = s3.determine_bucket_and_prefix(
bucket=self.bucket,
key_prefix=deploy_key_prefix,
sagemaker_session=self.sagemaker_session,
)
s3_output_uri = s3_path_join("s3://", bucket, deploy_key_prefix)
else:
s3_output_uri = f"{s3_output_uri}/{deploy_key_prefix}"
s3_output_uri = s3_path_join(s3_output_uri, deploy_key_prefix)

self.save_mp_checkpoint_path = f"{s3_output_uri}/aot-partitioned-checkpoints"
self.save_mp_checkpoint_path = s3_path_join(s3_output_uri, "aot-partitioned-checkpoints")

container_def = self._upload_model_to_s3(upload_as_tar=False)
estimator = _create_estimator(
Expand Down Expand Up @@ -673,7 +678,11 @@ def _upload_model_to_s3(self, upload_as_tar: bool = True):
deploy_key_prefix = fw_utils.model_code_key_prefix(
self.key_prefix, self.name, self.image_uri
)
bucket = self.bucket or self.sagemaker_session.default_bucket()
bucket, deploy_key_prefix = s3.determine_bucket_and_prefix(
bucket=self.bucket,
key_prefix=deploy_key_prefix,
sagemaker_session=self.sagemaker_session,
)
if upload_as_tar:
uploaded_code = fw_utils.tar_and_upload_dir(
self.sagemaker_session.boto_session,
Expand All @@ -686,10 +695,9 @@ def _upload_model_to_s3(self, upload_as_tar: bool = True):
)
model_data_url = uploaded_code.s3_prefix
else:
key_prefix = f"{deploy_key_prefix}/aot-model"
model_data_url = S3Uploader.upload(
tmp_code_dir,
"s3://%s/%s" % (bucket, key_prefix),
s3_path_join("s3://", bucket, deploy_key_prefix, "aot-model"),
self.model_kms_key,
self.sagemaker_session,
)
Expand Down
61 changes: 42 additions & 19 deletions src/sagemaker/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from six.moves.urllib.parse import urlparse

import sagemaker
from sagemaker import git_utils, image_uris, vpc_utils
from sagemaker import git_utils, image_uris, vpc_utils, s3
from sagemaker.analytics import TrainingJobAnalytics
from sagemaker.config import (
TRAINING_JOB_VOLUME_KMS_KEY_ID_PATH,
Expand Down Expand Up @@ -672,6 +672,9 @@ def __init__(
enable_network_isolation=self._enable_network_isolation,
)

# Internal flag
self._is_output_path_set_from_default_bucket_and_prefix = False

@abstractmethod
def training_image_uri(self):
"""Return the Docker image to use for training.
Expand Down Expand Up @@ -772,7 +775,13 @@ def _prepare_for_training(self, job_name=None):
if self.sagemaker_session.local_mode and local_code:
self.output_path = ""
else:
self.output_path = "s3://{}/".format(self.sagemaker_session.default_bucket())
self.output_path = s3.s3_path_join(
"s3://",
self.sagemaker_session.default_bucket(),
self.sagemaker_session.default_bucket_prefix,
with_end_slash=True,
)
self._is_output_path_set_from_default_bucket_and_prefix = True

if self.git_config:
updated_paths = git_utils.git_clone_repo(
Expand Down Expand Up @@ -847,7 +856,8 @@ def _stage_user_code_in_s3(self) -> UploadedCode:
if is_pipeline_variable(self.output_path):
if self.code_location is None:
code_bucket = self.sagemaker_session.default_bucket()
code_s3_prefix = self._assign_s3_prefix()
key_prefix = self.sagemaker_session.default_bucket_prefix
code_s3_prefix = self._assign_s3_prefix(key_prefix)
kms_key = None
else:
code_bucket, key_prefix = parse_s3_url(self.code_location)
Expand All @@ -860,16 +870,30 @@ def _stage_user_code_in_s3(self) -> UploadedCode:
if local_mode:
if self.code_location is None:
code_bucket = self.sagemaker_session.default_bucket()
code_s3_prefix = self._assign_s3_prefix()
key_prefix = self.sagemaker_session.default_bucket_prefix
code_s3_prefix = self._assign_s3_prefix(key_prefix)
kms_key = None
else:
code_bucket, key_prefix = parse_s3_url(self.code_location)
code_s3_prefix = self._assign_s3_prefix(key_prefix)
kms_key = None
else:
if self.code_location is None:
code_bucket, _ = parse_s3_url(self.output_path)
code_s3_prefix = self._assign_s3_prefix()
code_bucket, possible_key_prefix = parse_s3_url(self.output_path)

if self._is_output_path_set_from_default_bucket_and_prefix:
# Only include possible_key_prefix if the output_path was created from the
# Session's default bucket and prefix. In that scenario, possible_key_prefix
# will either be "" or Session.default_bucket_prefix.
# Note: We cannot do `if (code_bucket == session.default_bucket() and
# key_prefix == session.default_bucket_prefix)` instead because the user
# could have passed in equivalent values themselves to output_path. And
# including the prefix in that case could result in a potentially backwards
# incompatible behavior change for the end user.
code_s3_prefix = self._assign_s3_prefix(possible_key_prefix)
else:
code_s3_prefix = self._assign_s3_prefix()

kms_key = self.output_kms_key
else:
code_bucket, key_prefix = parse_s3_url(self.code_location)
Expand Down Expand Up @@ -905,18 +929,13 @@ def _assign_s3_prefix(self, key_prefix=""):
"""
from sagemaker.workflow.utilities import _pipeline_config

code_s3_prefix = "/".join(filter(None, [key_prefix, self._current_job_name, "source"]))
code_s3_prefix = s3.s3_path_join(key_prefix, self._current_job_name, "source")
if _pipeline_config and _pipeline_config.code_hash:
code_s3_prefix = "/".join(
filter(
None,
[
key_prefix,
_pipeline_config.pipeline_name,
"code",
_pipeline_config.code_hash,
],
)
code_s3_prefix = s3.s3_path_join(
key_prefix,
_pipeline_config.pipeline_name,
"code",
_pipeline_config.code_hash,
)
return code_s3_prefix

Expand Down Expand Up @@ -1060,8 +1079,12 @@ def _set_source_s3_uri(self, rule):
if "source_s3_uri" in (rule.rule_parameters or {}):
parse_result = urlparse(rule.rule_parameters["source_s3_uri"])
if parse_result.scheme != "s3":
desired_s3_uri = os.path.join(
"s3://", self.sagemaker_session.default_bucket(), rule.name, str(uuid.uuid4())
desired_s3_uri = s3.s3_path_join(
"s3://",
self.sagemaker_session.default_bucket(),
self.sagemaker_session.default_bucket_prefix,
rule.name,
str(uuid.uuid4()),
)
s3_uri = S3Uploader.upload(
local_path=rule.rule_parameters["source_s3_uri"],
Expand Down
27 changes: 23 additions & 4 deletions src/sagemaker/experiments/_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import botocore

from sagemaker import s3
from sagemaker.experiments._utils import is_already_exist_error

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -75,8 +76,17 @@ def upload_artifact(self, file_path):
raise ValueError(
"{} does not exist or is not a file. Please supply a file path.".format(file_path)
)
if not self.artifact_bucket:
self.artifact_bucket = self.sagemaker_session.default_bucket()

# If self.artifact_bucket is falsy, it will be set to sagemaker_session.default_bucket.
# In that case, and if sagemaker_session.default_bucket_prefix exists, self.artifact_prefix
# needs to be updated too (because not updating self.artifact_prefix would result in
# different behavior the 1st time this method is called vs the 2nd).
self.artifact_bucket, self.artifact_prefix = s3.determine_bucket_and_prefix(
bucket=self.artifact_bucket,
key_prefix=self.artifact_prefix,
sagemaker_session=self.sagemaker_session,
)

artifact_name = os.path.basename(file_path)
artifact_s3_key = "{}/{}/{}".format(
self.artifact_prefix, self.trial_component_name, artifact_name
Expand All @@ -96,8 +106,17 @@ def upload_object_artifact(self, artifact_name, artifact_object, file_extension=
Returns:
str: The s3 URI of the uploaded file and the version of the file.
"""
if not self.artifact_bucket:
self.artifact_bucket = self.sagemaker_session.default_bucket()

# If self.artifact_bucket is falsy, it will be set to sagemaker_session.default_bucket.
# In that case, and if sagemaker_session.default_bucket_prefix exists, self.artifact_prefix
# needs to be updated too (because not updating self.artifact_prefix would result in
# different behavior the 1st time this method is called vs the 2nd).
self.artifact_bucket, self.artifact_prefix = s3.determine_bucket_and_prefix(
bucket=self.artifact_bucket,
key_prefix=self.artifact_prefix,
sagemaker_session=self.sagemaker_session,
)

if file_extension:
artifact_name = (
artifact_name + ("" if file_extension.startswith(".") else ".") + file_extension
Expand Down
Loading