diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 5b3b51e335..470297cbbd 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -581,9 +581,9 @@ def transform( input_config (dict): A dictionary describing the input data (and its location) for the job. output_config (dict): A dictionary describing the output location for the job. resource_config (dict): A dictionary describing the resources to complete the job. - tags (list[dict]): List of tags for labeling a transform job. + tags (list[dict]): List of tags for labeling a transform job. For more information, + see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. data_processing(dict): A dictionary describing config for combining the input data and transformed data. - For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. """ transform_request = { "TransformJobName": job_name, diff --git a/src/sagemaker/transformer.py b/src/sagemaker/transformer.py index 593caae67d..a0f570ecc3 100644 --- a/src/sagemaker/transformer.py +++ b/src/sagemaker/transformer.py @@ -142,10 +142,12 @@ def transform( input_filter (str): A JSONPath to select a portion of the input to pass to the algorithm container for inference. If you omit the field, it gets the value '$', representing the entire input. - Some examples: "$[1:]", "$.features"(default: None). + For more information, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateTransformJob.html. + Some examples: "$[1:]", "$.features" (default: None). output_filter (str): A JSONPath to select a portion of the - joined/original output to return as the output. Some examples: - "$[1:]", "$.prediction" (default: None). + joined/original output to return as the output. + For more information, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateTransformJob.html. + Some examples: "$[1:]", "$.prediction" (default: None). join_source (str): The source of data to be joined to the transform output. It can be set to 'Input' meaning the entire input record will be joined to the inference result. You can use OutputFilter diff --git a/tests/integ/test_transformer.py b/tests/integ/test_transformer.py index c519cb6786..ad3fd65c2d 100644 --- a/tests/integ/test_transformer.py +++ b/tests/integ/test_transformer.py @@ -67,12 +67,13 @@ def test_transform_mxnet(sagemaker_session, mxnet_full_version): kms_key_arn = get_or_create_kms_key(sagemaker_session) output_filter = "$" + input_filter = "$" transformer = _create_transformer_and_transform_job( mx, transform_input, kms_key_arn, - input_filter=None, + input_filter=input_filter, output_filter=output_filter, join_source=None, ) @@ -86,6 +87,7 @@ def test_transform_mxnet(sagemaker_session, mxnet_full_version): ) assert kms_key_arn == job_desc["TransformResources"]["VolumeKmsKeyId"] assert output_filter == job_desc["DataProcessing"]["OutputFilter"] + assert input_filter == job_desc["DataProcessing"]["InputFilter"] @pytest.mark.canary_quick diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index e401622353..714da580c8 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -676,12 +676,15 @@ def test_transform_pack_to_request(sagemaker_session): resource_config = {"InstanceCount": INSTANCE_COUNT, "InstanceType": INSTANCE_TYPE} + data_processing = {"OutputFilter": "$", "InputFilter": "$", "JoinSource": "Input"} + expected_args = { "TransformJobName": JOB_NAME, "ModelName": model_name, "TransformInput": in_config, "TransformOutput": out_config, "TransformResources": resource_config, + "DataProcessing": data_processing, } sagemaker_session.transform( @@ -695,7 +698,7 @@ def test_transform_pack_to_request(sagemaker_session): output_config=out_config, resource_config=resource_config, tags=None, - data_processing=None, + data_processing=data_processing, ) _, _, actual_args = sagemaker_session.sagemaker_client.method_calls[0]