From 29af4e7dfa266b68ea51bf53eb85b0248f70e748 Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Tue, 9 Jan 2024 18:49:01 -0800 Subject: [PATCH 01/13] fix: Huggingface glue failing tests --- tests/data/huggingface/requirements.txt | 1 + tests/data/huggingface_byoc/requirements.txt | 2 +- tests/integ/test_huggingface.py | 3 ++- tests/integ/test_huggingface_torch_distributed.py | 4 ++-- 4 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 tests/data/huggingface/requirements.txt diff --git a/tests/data/huggingface/requirements.txt b/tests/data/huggingface/requirements.txt new file mode 100644 index 0000000000..d40b9acb97 --- /dev/null +++ b/tests/data/huggingface/requirements.txt @@ -0,0 +1 @@ +datasets==2.16.1 diff --git a/tests/data/huggingface_byoc/requirements.txt b/tests/data/huggingface_byoc/requirements.txt index fed4662285..6845f50254 100644 --- a/tests/data/huggingface_byoc/requirements.txt +++ b/tests/data/huggingface_byoc/requirements.txt @@ -1,2 +1,2 @@ transformers<=4.28.1 -datasets<=2.12.0 +datasets==2.16.1 diff --git a/tests/integ/test_huggingface.py b/tests/integ/test_huggingface.py index c77ade62ee..a8be54c4d4 100644 --- a/tests/integ/test_huggingface.py +++ b/tests/integ/test_huggingface.py @@ -71,7 +71,8 @@ def test_huggingface_training( hf = HuggingFace( py_version=huggingface_pytorch_latest_training_py_version, - entry_point=os.path.join(data_path, "run_glue.py"), + source_dir=data_path, + entry_point="run_glue.py", role="SageMakerRole", transformers_version=huggingface_training_latest_version, pytorch_version=huggingface_training_pytorch_latest_version, diff --git a/tests/integ/test_huggingface_torch_distributed.py b/tests/integ/test_huggingface_torch_distributed.py index 0f78154ff8..733f59494c 100644 --- a/tests/integ/test_huggingface_torch_distributed.py +++ b/tests/integ/test_huggingface_torch_distributed.py @@ -24,10 +24,10 @@ def test_huggingface_torch_distributed_g5_glue( huggingface_pytorch_latest_training_py_version, ): with timeout.timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): - data_path = os.path.join(DATA_DIR, "huggingface") estimator = HuggingFace( py_version=huggingface_pytorch_latest_training_py_version, - entry_point=os.path.join(data_path, "run_glue.py"), + source_dir=os.path.join(DATA_DIR, "huggingface"), + entry_point="run_glue.py", role="SageMakerRole", transformers_version=huggingface_training_latest_version, pytorch_version=huggingface_training_pytorch_latest_version, From 4d3d4ce7b8ec8ad92bc12bff7f0f1540b98eaf49 Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Tue, 9 Jan 2024 19:11:19 -0800 Subject: [PATCH 02/13] fix: Sphinx doc build failure --- doc/requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/requirements.txt b/doc/requirements.txt index 365a7c1272..942183ae3a 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,6 +1,6 @@ -sphinx==3.1.2 -sphinx-rtd-theme==0.5.0 -docutils==0.15.2 -packaging==20.9 -jinja2<3.1 +sphinx==7.2.6 +sphinx-rtd-theme==2.0.0 +docutils==0.20.1 +packaging==23.2 +jinja2==3.1.2 schema==0.7.5 From e9c2c826fb32c7995305bebeb4fe9e9d32d517b9 Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Tue, 9 Jan 2024 20:23:45 -0800 Subject: [PATCH 03/13] fix: Huggingface glue failing tests --- doc/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/requirements.txt b/doc/requirements.txt index 942183ae3a..e031f87955 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,4 +1,4 @@ -sphinx==7.2.6 +sphinx==7.1.2 sphinx-rtd-theme==2.0.0 docutils==0.20.1 packaging==23.2 From b2e851fdca49e914c65d88895dd9d5bdab43b7b4 Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Tue, 9 Jan 2024 21:33:54 -0800 Subject: [PATCH 04/13] fix: failing sphinx tests --- doc/conf.py | 2 +- .../feature_processor/feature_processor.py | 5 ++--- src/sagemaker/session.py | 22 +++++++++---------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index d1ce73cb90..94a5c4d9c6 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -94,7 +94,7 @@ } # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {"http://docs.python.org/": None} +intersphinx_mapping = {"python": ("http://docs.python.org/", None)} # -- Options for autodoc ---------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#configuration diff --git a/src/sagemaker/feature_store/feature_processor/feature_processor.py b/src/sagemaker/feature_store/feature_processor/feature_processor.py index e957dbd0ea..fa95212442 100644 --- a/src/sagemaker/feature_store/feature_processor/feature_processor.py +++ b/src/sagemaker/feature_store/feature_processor/feature_processor.py @@ -45,8 +45,8 @@ def feature_processor( If the decorated function is executed without arguments then the decorated function's arguments are automatically loaded from the input data sources. Outputs are ingested to the output Feature - Group. If arguments are provided to this function, then arguments are not automatically loaded - (for testing). + Group. If arguments are provided to this function, then arguments are not automatically + loaded (for testing). Decorated functions must conform to the expected signature. Parameters: one parameter of type pyspark.sql.DataFrame for each DataSource in 'inputs'; followed by the optional parameters with @@ -96,7 +96,6 @@ def transform(input_feature_group, input_csv): development phase to ensure that data is not used until the function is ready. It also useful for users that want to manage their own data ingestion. Defaults to True. spark_config (Dict[str, str]): A dict contains the key-value paris for Spark configurations. - Raises: IngestionError: If any rows are not ingested successfully then a sample of the records, with failure reasons, is logged. diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 2cf7e78f41..5adf9cf356 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -4565,20 +4565,18 @@ def update_inference_component( Args: inference_component_name (str): Name of the Amazon SageMaker ``InferenceComponent``. specification ([dict[str,int]]): Resource configuration. Optional. - Example: { - "MinMemoryRequiredInMb": 1024, - "NumberOfCpuCoresRequired": 1, - "NumberOfAcceleratorDevicesRequired": 1, - "MaxMemoryRequiredInMb": 4096, - }, - + Example: { + "MinMemoryRequiredInMb": 1024, + "NumberOfCpuCoresRequired": 1, + "NumberOfAcceleratorDevicesRequired": 1, + "MaxMemoryRequiredInMb": 4096, + }, runtime_config ([dict[str,int]]): Number of copies. Optional. - Default: { - "copyCount": 1 - } - + Default: { + "copyCount": 1 + } wait: Wait for inference component to be created before return. Optional. Default is - True. + True. Return: str: inference component name From 1324ef919e099b7403edd77e48e2d2a1b026d973 Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Tue, 9 Jan 2024 22:07:00 -0800 Subject: [PATCH 05/13] fix: failing sphinx tests --- .../feature_processor/feature_processor.py | 7 ++++--- src/sagemaker/session.py | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/sagemaker/feature_store/feature_processor/feature_processor.py b/src/sagemaker/feature_store/feature_processor/feature_processor.py index fa95212442..9acd1a96b7 100644 --- a/src/sagemaker/feature_store/feature_processor/feature_processor.py +++ b/src/sagemaker/feature_store/feature_processor/feature_processor.py @@ -82,9 +82,9 @@ def transform(input_feature_group, input_csv): inputs (Sequence[Union[FeatureGroupDataSource, CSVDataSource, ParquetDataSource, BaseDataSource]]): A list of data sources. output (str): A Feature Group ARN to write results of this function to. - target_stores (Optional[list[str]], optional): A list containing at least one of - 'OnlineStore' or 'OfflineStore'. If unspecified, data will be ingested to the enabled - stores of the output feature group. Defaults to None. + target_stores (Optional[list[str]], optional): A list containing at least one + of 'OnlineStore' or 'OfflineStore'. If unspecified, data will be ingested to the + enabled stores of the output feature group. Defaults to None. parameters (Optional[Dict[str, Union[str, Dict]]], optional): Parameters to be provided to the decorated function, available as the 'params' argument. Useful for parameterized functions. The params argument also contains the set of system provided parameters @@ -96,6 +96,7 @@ def transform(input_feature_group, input_csv): development phase to ensure that data is not used until the function is ready. It also useful for users that want to manage their own data ingestion. Defaults to True. spark_config (Dict[str, str]): A dict contains the key-value paris for Spark configurations. + Raises: IngestionError: If any rows are not ingested successfully then a sample of the records, with failure reasons, is logged. diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 5adf9cf356..ac1bf6e343 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -4566,14 +4566,14 @@ def update_inference_component( inference_component_name (str): Name of the Amazon SageMaker ``InferenceComponent``. specification ([dict[str,int]]): Resource configuration. Optional. Example: { - "MinMemoryRequiredInMb": 1024, - "NumberOfCpuCoresRequired": 1, - "NumberOfAcceleratorDevicesRequired": 1, - "MaxMemoryRequiredInMb": 4096, + "MinMemoryRequiredInMb": 1024, + "NumberOfCpuCoresRequired": 1, + "NumberOfAcceleratorDevicesRequired": 1, + "MaxMemoryRequiredInMb": 4096, }, runtime_config ([dict[str,int]]): Number of copies. Optional. Default: { - "copyCount": 1 + "copyCount": 1 } wait: Wait for inference component to be created before return. Optional. Default is True. From 27945b574410fed5c29e60c8ee316f6f8f0cac8b Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Tue, 9 Jan 2024 22:15:23 -0800 Subject: [PATCH 06/13] fix: failing black check --- .../feature_store/feature_processor/feature_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/feature_store/feature_processor/feature_processor.py b/src/sagemaker/feature_store/feature_processor/feature_processor.py index 9acd1a96b7..3faf17da51 100644 --- a/src/sagemaker/feature_store/feature_processor/feature_processor.py +++ b/src/sagemaker/feature_store/feature_processor/feature_processor.py @@ -82,7 +82,7 @@ def transform(input_feature_group, input_csv): inputs (Sequence[Union[FeatureGroupDataSource, CSVDataSource, ParquetDataSource, BaseDataSource]]): A list of data sources. output (str): A Feature Group ARN to write results of this function to. - target_stores (Optional[list[str]], optional): A list containing at least one + target_stores (Optional[list[str]], optional): A list containing at least one of 'OnlineStore' or 'OfflineStore'. If unspecified, data will be ingested to the enabled stores of the output feature group. Defaults to None. parameters (Optional[Dict[str, Union[str, Dict]]], optional): Parameters to be provided to From 405d38d5701262da07cbee6d9b9320b167c74ddd Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Wed, 10 Jan 2024 10:09:12 -0800 Subject: [PATCH 07/13] fix: sphinx doc errors --- doc/requirements.txt | 2 +- .../feature_processor/feature_processor.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/requirements.txt b/doc/requirements.txt index e031f87955..b0fdd16780 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,4 +1,4 @@ -sphinx==7.1.2 +sphinx==3.4.3 sphinx-rtd-theme==2.0.0 docutils==0.20.1 packaging==23.2 diff --git a/src/sagemaker/feature_store/feature_processor/feature_processor.py b/src/sagemaker/feature_store/feature_processor/feature_processor.py index 3faf17da51..e957dbd0ea 100644 --- a/src/sagemaker/feature_store/feature_processor/feature_processor.py +++ b/src/sagemaker/feature_store/feature_processor/feature_processor.py @@ -45,8 +45,8 @@ def feature_processor( If the decorated function is executed without arguments then the decorated function's arguments are automatically loaded from the input data sources. Outputs are ingested to the output Feature - Group. If arguments are provided to this function, then arguments are not automatically - loaded (for testing). + Group. If arguments are provided to this function, then arguments are not automatically loaded + (for testing). Decorated functions must conform to the expected signature. Parameters: one parameter of type pyspark.sql.DataFrame for each DataSource in 'inputs'; followed by the optional parameters with @@ -82,9 +82,9 @@ def transform(input_feature_group, input_csv): inputs (Sequence[Union[FeatureGroupDataSource, CSVDataSource, ParquetDataSource, BaseDataSource]]): A list of data sources. output (str): A Feature Group ARN to write results of this function to. - target_stores (Optional[list[str]], optional): A list containing at least one - of 'OnlineStore' or 'OfflineStore'. If unspecified, data will be ingested to the - enabled stores of the output feature group. Defaults to None. + target_stores (Optional[list[str]], optional): A list containing at least one of + 'OnlineStore' or 'OfflineStore'. If unspecified, data will be ingested to the enabled + stores of the output feature group. Defaults to None. parameters (Optional[Dict[str, Union[str, Dict]]], optional): Parameters to be provided to the decorated function, available as the 'params' argument. Useful for parameterized functions. The params argument also contains the set of system provided parameters From 0caf80dd07fe89f8cf1c4c0b656059494ab63441 Mon Sep 17 00:00:00 2001 From: Mufaddal Rohawala Date: Wed, 10 Jan 2024 10:25:42 -0800 Subject: [PATCH 08/13] fix: sphinx doc errors --- doc/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/requirements.txt b/doc/requirements.txt index b0fdd16780..62541ef4e1 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,6 +1,6 @@ sphinx==3.4.3 -sphinx-rtd-theme==2.0.0 -docutils==0.20.1 -packaging==23.2 -jinja2==3.1.2 +sphinx-rtd-theme==0.5.0 +docutils==0.15.2 +packaging==20.9 +jinja2<3.1 schema==0.7.5 From d86dc439717cd8e1a4119d1ae074c85581a79a26 Mon Sep 17 00:00:00 2001 From: Erick Benitez-Ramos Date: Thu, 11 Jan 2024 00:48:09 +0000 Subject: [PATCH 09/13] sphinx --- .../feature_store/feature_processor/feature_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/feature_store/feature_processor/feature_processor.py b/src/sagemaker/feature_store/feature_processor/feature_processor.py index e957dbd0ea..d7ef8e6e9b 100644 --- a/src/sagemaker/feature_store/feature_processor/feature_processor.py +++ b/src/sagemaker/feature_store/feature_processor/feature_processor.py @@ -79,8 +79,8 @@ def transform(input_feature_group, input_csv): return ... Args: - inputs (Sequence[Union[FeatureGroupDataSource, CSVDataSource, ParquetDataSource, - BaseDataSource]]): A list of data sources. + inputs (Sequence[Union[FeatureGroupDataSource, CSVDataSource, ParquetDataSource, BaseDataSource]]): + A list of data sources. output (str): A Feature Group ARN to write results of this function to. target_stores (Optional[list[str]], optional): A list containing at least one of 'OnlineStore' or 'OfflineStore'. If unspecified, data will be ingested to the enabled From 31d2674159ccbd16b9e220c5c2f821c7cd30dcb1 Mon Sep 17 00:00:00 2001 From: Erick Benitez-Ramos Date: Thu, 11 Jan 2024 00:54:40 +0000 Subject: [PATCH 10/13] black-format --- .../feature_store/feature_processor/feature_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/feature_store/feature_processor/feature_processor.py b/src/sagemaker/feature_store/feature_processor/feature_processor.py index d7ef8e6e9b..d5904de35d 100644 --- a/src/sagemaker/feature_store/feature_processor/feature_processor.py +++ b/src/sagemaker/feature_store/feature_processor/feature_processor.py @@ -79,7 +79,7 @@ def transform(input_feature_group, input_csv): return ... Args: - inputs (Sequence[Union[FeatureGroupDataSource, CSVDataSource, ParquetDataSource, BaseDataSource]]): + inputs (Sequence[Union[FeatureGroupDataSource, CSVDataSource, ParquetDataSource, BaseDataSource]]): A list of data sources. output (str): A Feature Group ARN to write results of this function to. target_stores (Optional[list[str]], optional): A list containing at least one of From 8163815b52c8fa7eca29a90dec499cfdec1289e6 Mon Sep 17 00:00:00 2001 From: Erick Benitez-Ramos Date: Thu, 11 Jan 2024 01:05:18 +0000 Subject: [PATCH 11/13] sphinx --- src/sagemaker/jumpstart/estimator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/jumpstart/estimator.py b/src/sagemaker/jumpstart/estimator.py index 36a188ed55..a103e46438 100644 --- a/src/sagemaker/jumpstart/estimator.py +++ b/src/sagemaker/jumpstart/estimator.py @@ -292,8 +292,8 @@ def __init__( SageMaker Debugger rules for real-time analysis (Default: None). For more information, see `Continuous analyses through rules - `_. + `_. (Default: None). debugger_hook_config (Optional[Union[DebuggerHookConfig, bool]]): Configuration for how debugging information is emitted with From fcea913d96a21f3b9967c35a37bfcbb6e6fd2cda Mon Sep 17 00:00:00 2001 From: Erick Benitez-Ramos Date: Thu, 11 Jan 2024 04:33:45 +0000 Subject: [PATCH 12/13] sphinx --- src/sagemaker/jumpstart/estimator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/jumpstart/estimator.py b/src/sagemaker/jumpstart/estimator.py index a103e46438..6c374b7e09 100644 --- a/src/sagemaker/jumpstart/estimator.py +++ b/src/sagemaker/jumpstart/estimator.py @@ -251,8 +251,8 @@ def __init__( (Default: None). model_channel_name (Optional[Union[str, PipelineVariable]]): Name of the channel where 'model_uri' will be downloaded. (Default: None). - metric_definitions (Optional[Union[list[dict[str, str], list[dict[str, - PipelineVariable]]]]): A list of dictionaries that defines the metric(s) + metric_definitions (Optional[list[dict[str, Union[str, PipelineVariable]]]]): + A list of dictionaries that defines the metric(s) used to evaluate the training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for the regular expression used to extract the metric from the logs. This should be defined only for jobs that From 2ece6288582cefb999177f7ea51086579871a637 Mon Sep 17 00:00:00 2001 From: Erick Benitez-Ramos Date: Thu, 11 Jan 2024 05:07:10 +0000 Subject: [PATCH 13/13] sphinx --- .../feature_store/feature_processor/feature_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/feature_store/feature_processor/feature_processor.py b/src/sagemaker/feature_store/feature_processor/feature_processor.py index d5904de35d..95e1dd297c 100644 --- a/src/sagemaker/feature_store/feature_processor/feature_processor.py +++ b/src/sagemaker/feature_store/feature_processor/feature_processor.py @@ -79,8 +79,8 @@ def transform(input_feature_group, input_csv): return ... Args: - inputs (Sequence[Union[FeatureGroupDataSource, CSVDataSource, ParquetDataSource, BaseDataSource]]): - A list of data sources. + inputs (Sequence[Union[FeatureGroupDataSource, CSVDataSource, ParquetDataSource,\ + BaseDataSource]]): A list of data sources. output (str): A Feature Group ARN to write results of this function to. target_stores (Optional[list[str]], optional): A list containing at least one of 'OnlineStore' or 'OfflineStore'. If unspecified, data will be ingested to the enabled