Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
e00a780
change: Add "distribution" parameter into record_set (#4408)
SifeiLi Feb 19, 2024
1dad4ea
Fix error message typo (#4378)
shinglyu Feb 21, 2024
3be85ce
Add telemetry metrics on usage of default images for ModelBuilder (#4…
jiapinw Feb 21, 2024
2a709b5
feat: ModelBuilder to fetch local schema when no SchemaBuilder presen…
makungaj1 Feb 23, 2024
2cacf19
fix: Fix telemetry image uri option logic for ModelBuilder (#4443)
jiapinw Feb 23, 2024
67c3c41
fix: add fixes for tarfile extractall functionality PEP-721 (#4441)
mufaddal-rohawala Feb 23, 2024
a2b1e07
fix: skip pytorch training compiler integ test (#4449)
mufaddal-rohawala Feb 23, 2024
b627f2c
prepare release v2.209.0
Feb 24, 2024
208e38c
update development version to v2.209.1.dev0
Feb 24, 2024
7323410
fix: fixed implementation of fail_on_violation for transform with mon…
keshav-chandak Feb 26, 2024
11cc600
change: Add validation for sagemaker version on remote job (#4393)
qidewenwhen Feb 26, 2024
384cd4f
fix: add missing regions to pytorch config (#4450)
tejaschumbalkar Feb 26, 2024
160d753
fix: tolerate vulnerable old model for integ test and temporarily ski…
evakravi Feb 27, 2024
0126e3c
feature: TGI 1.4.2 (#4455)
haixiw Feb 27, 2024
bdb8ff0
feat: TGI optimum 0.0.18 (general+llm) (#4436)
jinyoung-lim Feb 27, 2024
6c1fd84
feat: Prepend SageMaker Studio App Type to boto3 User Agent string (#…
knikure Feb 27, 2024
05d4131
change: bump jinja2 to 3.1.3 in doc/requirments.txt (#4421) (#4423)
evakravi Feb 16, 2024
5b564de
merge conflicts
bencrabtree Feb 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,554 changes: 2,285 additions & 2,269 deletions CHANGELOG.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
recursive-include src/sagemaker *.py

include src/sagemaker/image_uri_config/*.json
include src/sagemaker/serve/schema/*.json
include src/sagemaker/serve/requirements.txt
recursive-include requirements *

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.208.1.dev0
2.209.1.dev0
18 changes: 16 additions & 2 deletions src/sagemaker/amazon/amazon_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,14 @@ def fit(
if wait:
self.latest_training_job.wait(logs=logs)

def record_set(self, train, labels=None, channel="train", encrypt=False):
def record_set(
self,
train,
labels=None,
channel="train",
encrypt=False,
distribution="ShardedByS3Key",
):
"""Build a :class:`~RecordSet` from a numpy :class:`~ndarray` matrix and label vector.

For the 2D ``ndarray`` ``train``, each row is converted to a
Expand All @@ -294,6 +301,8 @@ def record_set(self, train, labels=None, channel="train", encrypt=False):
should be assigned to.
encrypt (bool): Specifies whether the objects uploaded to S3 are
encrypted on the server side using AES-256 (default: ``False``).
distribution (str): The SageMaker TrainingJob channel s3 data
distribution type (default: ``ShardedByS3Key``).

Returns:
RecordSet: A RecordSet referencing the encoded, uploading training
Expand All @@ -316,6 +325,7 @@ def record_set(self, train, labels=None, channel="train", encrypt=False):
num_records=train.shape[0],
feature_dim=train.shape[1],
channel=channel,
distribution=distribution,
)

def _get_default_mini_batch_size(self, num_records: int):
Expand Down Expand Up @@ -343,6 +353,7 @@ def __init__(
feature_dim: int,
s3_data_type: Union[str, PipelineVariable] = "ManifestFile",
channel: Union[str, PipelineVariable] = "train",
distribution: str = "ShardedByS3Key",
):
"""A collection of Amazon :class:~`Record` objects serialized and stored in S3.

Expand All @@ -358,12 +369,15 @@ def __init__(
single s3 manifest file, listing each s3 object to train on.
channel (str or PipelineVariable): The SageMaker Training Job channel this RecordSet
should be bound to
distribution (str): The SageMaker TrainingJob S3 data distribution type.
Valid values: 'ShardedByS3Key', 'FullyReplicated'.
"""
self.s3_data = s3_data
self.feature_dim = feature_dim
self.num_records = num_records
self.s3_data_type = s3_data_type
self.channel = channel
self.distribution = distribution

def __repr__(self):
"""Return an unambiguous representation of this RecordSet"""
Expand All @@ -377,7 +391,7 @@ def data_channel(self):
def records_s3_input(self):
"""Return a TrainingInput to represent the training data"""
return TrainingInput(
self.s3_data, distribution="ShardedByS3Key", s3_data_type=self.s3_data_type
self.s3_data, distribution=self.distribution, s3_data_type=self.s3_data_type
)


Expand Down
29 changes: 29 additions & 0 deletions src/sagemaker/image_uri_config/huggingface-llm-neuronx.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,35 @@
"container_version": {
"inf2": "ubuntu22.04"
}
},
"0.0.18": {
"py_versions": [
"py310"
],
"registries": {
"ap-northeast-1": "763104351884",
"ap-south-1": "763104351884",
"ap-south-2": "772153158452",
"ap-southeast-1": "763104351884",
"ap-southeast-2": "763104351884",
"ap-southeast-4": "457447274322",
"eu-central-1": "763104351884",
"eu-central-2": "380420809688",
"eu-south-2": "503227376785",
"eu-west-1": "763104351884",
"eu-west-3": "763104351884",
"il-central-1": "780543022126",
"sa-east-1": "763104351884",
"us-east-1": "763104351884",
"us-east-2": "763104351884",
"us-west-2": "763104351884",
"ca-west-1": "204538143572"
},
"tag_prefix": "1.13.1-optimum0.0.18",
"repository": "huggingface-pytorch-tgi-inference",
"container_version": {
"inf2": "ubuntu22.04"
}
}
}
}
Expand Down
Loading