feature: Add create inf rec api to session (aws#761)

gwang111 · gwang111 · commit ead601ffb2e9 · 2023-01-05T15:31:08.000-08:00
* feature: Add create inf rec api to session

* save

* fix error handling in submit. update docstring

* add in job_name param

Co-authored-by: Gary Wang &lt;garywan@amazon.com&gt;
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -4655,6 +4655,195 @@ def _intercept_create_request(
         """
         return create(request)
 
+    def _create_inference_recommendations_job_request(
+        self,
+        role: str,
+        job_name: str,
+        job_description: str,
+        framework: str,
+        sample_payload_url: str,
+        supported_content_types: List[str],
+        model_package_version_arn: str = None,
+        job_duration_in_seconds: int = None,
+        job_type: str = "Default",
+        framework_version: str = None,
+        nearest_model_name: str = None,
+        supported_instance_types: List[str] = None,
+        endpoint_configurations: List[Dict[str, Any]] = None,
+        traffic_pattern: Dict[str, Any] = None,
+        stopping_conditions: Dict[str, Any] = None,
+        resource_limit: Dict[str, Any] = None,
+    ) -> Dict[str, Any]:
+        """Get request dictionary for CreateInferenceRecommendationsJob API.
+
+        Args:
+            role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training
+                jobs and APIs that create Amazon SageMaker endpoints use this role to access
+                training data and model artifacts.
+                You must grant sufficient permissions to this role.
+            job_name (str): The name of the Inference Recommendations Job.
+            job_description (str): A description of the Inference Recommendations Job.
+            framework (str): The machine learning framework of the Image URI.
+            sample_payload_url (str): The S3 path where the sample payload is stored.
+            supported_content_types (List[str]): The supported MIME types for the input data.
+            model_package_version_arn (str): The Amazon Resource Name (ARN) of a
+                versioned model package.
+            job_duration_in_seconds (int): The maximum job duration that a job
+                can run for. Will be used for `Advanced` jobs.
+            job_type (str): The type of job being run. Must either be `Default` or `Advanced`.
+            framework_version (str): The framework version of the Image URI.
+            nearest_model_name (str): The name of a pre-trained machine learning model
+                benchmarked by Amazon SageMaker Inference Recommender that matches your model.
+            supported_instance_types (List[str]): A list of the instance types that are used
+                to generate inferences in real-time.
+            endpoint_configurations (List[Dict[str, any]]): Specifies the endpoint configurations
+                to use for a job. Will be used for `Advanced` jobs.
+            traffic_pattern (Dict[str, any]): Specifies the traffic pattern for the job.
+                Will be used for `Advanced` jobs.
+            stopping_conditions (Dict[str, any]): A set of conditions for stopping a
+                recommendation job.
+                If any of the conditions are met, the job is automatically stopped.
+                Will be used for `Advanced` jobs.
+            resource_limit (Dict[str, any]): Defines the resource limit for the job.
+                Will be used for `Advanced` jobs.
+        Returns:
+            Dict[str, Any]: request dictionary for the CreateInferenceRecommendationsJob API
+        """
+
+        containerConfig = {
+            "Domain": "MACHINE_LEARNING",
+            "Task": "OTHER",
+            "Framework": framework,
+            "PayloadConfig": {
+                "SamplePayloadUrl": sample_payload_url,
+                "SupportedContentTypes": supported_content_types,
+            },
+        }
+
+        if framework_version:
+            containerConfig["FrameworkVersion"] = framework_version
+        if nearest_model_name:
+            containerConfig["NearestModelName"] = nearest_model_name
+        if supported_instance_types:
+            containerConfig["SupportedInstanceTypes"] = supported_instance_types
+
+        request = {
+            "JobName": job_name,
+            "JobType": job_type,
+            "RoleArn": role,
+            "InputConfig": {
+                "ContainerConfig": containerConfig,
+                "ModelPackageVersionArn": model_package_version_arn,
+            },
+        }
+
+        if job_description:
+            request["JobDescription"] = job_description
+        if job_duration_in_seconds:
+            request["InputConfig"]["JobDurationInSeconds"] = job_duration_in_seconds
+
+        if job_type == "Advanced":
+            if stopping_conditions:
+                request["StoppingConditions"] = stopping_conditions
+            if resource_limit:
+                request["InputConfig"]["ResourceLimit"] = resource_limit
+            if traffic_pattern:
+                request["InputConfig"]["TrafficPattern"] = traffic_pattern
+            if endpoint_configurations:
+                request["InputConfig"]["EndpointConfigurations"] = endpoint_configurations
+
+        return request
+
+    def create_inference_recommendations_job(
+        self,
+        role: str,
+        sample_payload_url: str,
+        supported_content_types: List[str],
+        job_name: str = None,
+        job_type: str = "Default",
+        model_package_version_arn: str = None,
+        job_duration_in_seconds: int = None,
+        nearest_model_name: str = None,
+        supported_instance_types: List[str] = None,
+        framework: str = None,
+        framework_version: str = None,
+        endpoint_configurations: List[Dict[str, any]] = None,
+        traffic_pattern: Dict[str, any] = None,
+        stopping_conditions: Dict[str, any] = None,
+        resource_limit: Dict[str, any] = None,
+    ):
+        """Creates an Inference Recommendations Job
+
+        Args:
+            role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training
+                jobs and APIs that create Amazon SageMaker endpoints use this role to access
+                training data and model artifacts.
+                You must grant sufficient permissions to this role.
+            sample_payload_url (str): The S3 path where the sample payload is stored.
+            supported_content_types (List[str]): The supported MIME types for the input data.
+            model_package_version_arn (str): The Amazon Resource Name (ARN) of a
+                versioned model package.
+            job_name (str): The name of the job being run.
+            job_type (str): The type of job being run. Must either be `Default` or `Advanced`.
+            job_duration_in_seconds (int): The maximum job duration that a job
+                can run for. Will be used for `Advanced` jobs.
+            nearest_model_name (str): The name of a pre-trained machine learning model
+                benchmarked by Amazon SageMaker Inference Recommender that matches your model.
+            supported_instance_types (List[str]): A list of the instance types that are used
+                to generate inferences in real-time.
+            framework (str): The machine learning framework of the Image URI.
+            framework_version (str): The framework version of the Image URI.
+            endpoint_configurations (List[Dict[str, any]]): Specifies the endpoint configurations
+                to use for a job. Will be used for `Advanced` jobs.
+            traffic_pattern (Dict[str, any]): Specifies the traffic pattern for the job.
+                Will be used for `Advanced` jobs.
+            stopping_conditions (Dict[str, any]): A set of conditions for stopping a
+                recommendation job.
+                If any of the conditions are met, the job is automatically stopped.
+                Will be used for `Advanced` jobs.
+            resource_limit (Dict[str, any]): Defines the resource limit for the job.
+                Will be used for `Advanced` jobs.
+        Returns:
+            str: The name of the job created. In the form of `SMPYTHONSDK-<timestamp>`
+        """
+
+        if not job_name:
+            job_name = "SMPYTHONSDK-" + str(round(time.time()))
+        job_description = "#python-sdk-create"
+
+        create_inference_recommendations_job_request = (
+            self._create_inference_recommendations_job_request(
+                role=role,
+                model_package_version_arn=model_package_version_arn,
+                job_name=job_name,
+                job_type=job_type,
+                job_duration_in_seconds=job_duration_in_seconds,
+                job_description=job_description,
+                framework=framework,
+                framework_version=framework_version,
+                nearest_model_name=nearest_model_name,
+                sample_payload_url=sample_payload_url,
+                supported_content_types=supported_content_types,
+                supported_instance_types=supported_instance_types,
+                endpoint_configurations=endpoint_configurations,
+                traffic_pattern=traffic_pattern,
+                stopping_conditions=stopping_conditions,
+                resource_limit=resource_limit,
+            )
+        )
+
+        def submit(request):
+            LOGGER.info("Creating Inference Recommendations job with name: %s", job_name)
+            LOGGER.debug("process request: %s", json.dumps(request, indent=4))
+            self.sagemaker_client.create_inference_recommendations_job(**request)
+
+        self._intercept_create_request(
+            create_inference_recommendations_job_request,
+            submit,
+            self.create_inference_recommendations_job.__name__,
+        )
+        return job_name
+
 
 def get_model_package_args(
     content_types,
diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py
@@ -2937,3 +2937,178 @@ def test_wait_for_athena_query(query_execution, sagemaker_session):
     query_execution.return_value = {"QueryExecution": {"Status": {"State": "SUCCEEDED"}}}
     sagemaker_session.wait_for_athena_query(query_execution_id="query_id")
     assert query_execution.called_with(query_execution_id="query_id")
+
+
+IR_USER_JOB_NAME = "custom-job-name"
+IR_JOB_NAME = "SMPYTHONSDK-1234567891"
+IR_ADVANCED_JOB = "Advanced"
+IR_ROLE_ARN = "arn:aws:iam::123456789123:role/service-role/AmazonSageMaker-ExecutionRole-UnitTest"
+IR_SAMPLE_PAYLOAD_URL = "s3://sagemaker-us-west-2-123456789123/payload/payload.tar.gz"
+IR_SUPPORTED_CONTENT_TYPES = ["text/csv"]
+IR_MODEL_PACKAGE_VERSION_ARN = (
+    "arn:aws:sagemaker:us-west-2:123456789123:model-package/unit-test-package-version/1"
+)
+IR_NEAREST_MODEL_NAME = "xgboost"
+IR_SUPPORTED_INSTANCE_TYPES = ["ml.c5.xlarge", "ml.c5.2xlarge"]
+IR_FRAMEWORK = "XGBOOST"
+IR_FRAMEWORK_VERSION = "1.2.0"
+IR_NEAREST_MODEL_NAME = "xgboost"
+IR_JOB_DURATION_IN_SECONDS = 7200
+IR_ENDPOINT_CONFIGURATIONS = [
+    {
+        "EnvironmentParameterRanges": {
+            "CategoricalParameterRanges": [{"Name": "OMP_NUM_THREADS", "Value": ["2", "4", "10"]}]
+        },
+        "InferenceSpecificationName": "unit-test-specification",
+        "InstanceType": "ml.c5.xlarge",
+    }
+]
+IR_TRAFFIC_PATTERN = {
+    "Phases": [{"DurationInSeconds": 120, "InitialNumberOfUsers": 1, "SpawnRate": 1}],
+    "TrafficType": "PHASES",
+}
+IR_STOPPING_CONDITIONS = {
+    "MaxInvocations": 300,
+    "ModelLatencyThresholds": [{"Percentile": "P95", "ValueInMilliseconds": 100}],
+}
+IR_RESOURCE_LIMIT = {"MaxNumberOfTests": 10, "MaxParallelOfTests": 1}
+
+
+def create_inference_recommendations_job_default_happy_response():
+    return {
+        "JobName": IR_USER_JOB_NAME,
+        "JobType": "Default",
+        "RoleArn": IR_ROLE_ARN,
+        "InputConfig": {
+            "ContainerConfig": {
+                "Domain": "MACHINE_LEARNING",
+                "Task": "OTHER",
+                "Framework": IR_FRAMEWORK,
+                "PayloadConfig": {
+                    "SamplePayloadUrl": IR_SAMPLE_PAYLOAD_URL,
+                    "SupportedContentTypes": IR_SUPPORTED_CONTENT_TYPES,
+                },
+                "FrameworkVersion": IR_FRAMEWORK_VERSION,
+                "NearestModelName": IR_NEAREST_MODEL_NAME,
+                "SupportedInstanceTypes": IR_SUPPORTED_INSTANCE_TYPES,
+            },
+            "ModelPackageVersionArn": IR_MODEL_PACKAGE_VERSION_ARN,
+        },
+        "JobDescription": "#python-sdk-create",
+    }
+
+
+def create_inference_recommendations_job_advanced_happy_response():
+    base_advanced_job_response = create_inference_recommendations_job_default_happy_response()
+
+    base_advanced_job_response["JobName"] = IR_JOB_NAME
+    base_advanced_job_response["JobType"] = IR_ADVANCED_JOB
+    base_advanced_job_response["StoppingConditions"] = IR_STOPPING_CONDITIONS
+    base_advanced_job_response["InputConfig"]["JobDurationInSeconds"] = IR_JOB_DURATION_IN_SECONDS
+    base_advanced_job_response["InputConfig"]["EndpointConfigurations"] = IR_ENDPOINT_CONFIGURATIONS
+    base_advanced_job_response["InputConfig"]["TrafficPattern"] = IR_TRAFFIC_PATTERN
+    base_advanced_job_response["InputConfig"]["ResourceLimit"] = IR_RESOURCE_LIMIT
+
+    return base_advanced_job_response
+
+
+def test_create_inference_recommendations_job_default_happy(sagemaker_session):
+    job_name = sagemaker_session.create_inference_recommendations_job(
+        role=IR_ROLE_ARN,
+        sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
+        supported_content_types=IR_SUPPORTED_CONTENT_TYPES,
+        model_package_version_arn=IR_MODEL_PACKAGE_VERSION_ARN,
+        framework=IR_FRAMEWORK,
+        framework_version=IR_FRAMEWORK_VERSION,
+        nearest_model_name=IR_NEAREST_MODEL_NAME,
+        supported_instance_types=IR_SUPPORTED_INSTANCE_TYPES,
+        job_name=IR_USER_JOB_NAME,
+    )
+
+    sagemaker_session.sagemaker_client.create_inference_recommendations_job.assert_called_with(
+        **create_inference_recommendations_job_default_happy_response()
+    )
+
+    assert IR_USER_JOB_NAME == job_name
+
+
+@patch("time.time", MagicMock(return_value=1234567891))
+def test_create_inference_recommendations_job_advanced_happy(sagemaker_session):
+    job_name = sagemaker_session.create_inference_recommendations_job(
+        role=IR_ROLE_ARN,
+        sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
+        supported_content_types=IR_SUPPORTED_CONTENT_TYPES,
+        model_package_version_arn=IR_MODEL_PACKAGE_VERSION_ARN,
+        framework=IR_FRAMEWORK,
+        framework_version=IR_FRAMEWORK_VERSION,
+        nearest_model_name=IR_NEAREST_MODEL_NAME,
+        supported_instance_types=IR_SUPPORTED_INSTANCE_TYPES,
+        endpoint_configurations=IR_ENDPOINT_CONFIGURATIONS,
+        traffic_pattern=IR_TRAFFIC_PATTERN,
+        stopping_conditions=IR_STOPPING_CONDITIONS,
+        resource_limit=IR_RESOURCE_LIMIT,
+        job_type=IR_ADVANCED_JOB,
+        job_duration_in_seconds=IR_JOB_DURATION_IN_SECONDS,
+    )
+
+    sagemaker_session.sagemaker_client.create_inference_recommendations_job.assert_called_with(
+        **create_inference_recommendations_job_advanced_happy_response()
+    )
+
+    assert IR_JOB_NAME == job_name
+
+
+def test_create_inference_recommendations_job_propogate_validation_exception(sagemaker_session):
+    validation_exception_message = (
+        "Failed to describe model due to validation failure with following error: test_error"
+    )
+
+    validation_exception = ClientError(
+        {"Error": {"Code": "ValidationException", "Message": validation_exception_message}},
+        "create_inference_recommendations_job",
+    )
+
+    sagemaker_session.sagemaker_client.create_inference_recommendations_job.side_effect = (
+        validation_exception
+    )
+
+    with pytest.raises(ClientError) as error:
+        sagemaker_session.create_inference_recommendations_job(
+            role=IR_ROLE_ARN,
+            sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
+            supported_content_types=IR_SUPPORTED_CONTENT_TYPES,
+            model_package_version_arn=IR_MODEL_PACKAGE_VERSION_ARN,
+            framework=IR_FRAMEWORK,
+            framework_version=IR_FRAMEWORK_VERSION,
+            nearest_model_name=IR_NEAREST_MODEL_NAME,
+            supported_instance_types=IR_SUPPORTED_INSTANCE_TYPES,
+        )
+
+    assert "ValidationException" in str(error)
+
+
+def test_create_inference_recommendations_job_propogate_other_exception(sagemaker_session):
+    access_denied_exception_message = "Access is not allowed for the caller."
+
+    access_denied_exception = ClientError(
+        {"Error": {"Code": "AccessDeniedException", "Message": access_denied_exception_message}},
+        "create_inference_recommendations_job",
+    )
+
+    sagemaker_session.sagemaker_client.create_inference_recommendations_job.side_effect = (
+        access_denied_exception
+    )
+
+    with pytest.raises(ClientError) as error:
+        sagemaker_session.create_inference_recommendations_job(
+            role=IR_ROLE_ARN,
+            sample_payload_url=IR_SAMPLE_PAYLOAD_URL,
+            supported_content_types=IR_SUPPORTED_CONTENT_TYPES,
+            model_package_version_arn=IR_MODEL_PACKAGE_VERSION_ARN,
+            framework=IR_FRAMEWORK,
+            framework_version=IR_FRAMEWORK_VERSION,
+            nearest_model_name=IR_NEAREST_MODEL_NAME,
+            supported_instance_types=IR_SUPPORTED_INSTANCE_TYPES,
+        )
+
+    assert "AccessDeniedException" in str(error)