diff --git a/src/sagemaker/jumpstart/artifacts/model_packages.py b/src/sagemaker/jumpstart/artifacts/model_packages.py index 12166b1a76..67459519f3 100644 --- a/src/sagemaker/jumpstart/artifacts/model_packages.py +++ b/src/sagemaker/jumpstart/artifacts/model_packages.py @@ -96,7 +96,10 @@ def _retrieve_model_package_arn( if instance_specific_arn is not None: return instance_specific_arn - if model_specs.hosting_model_package_arns is None: + if ( + model_specs.hosting_model_package_arns is None + or model_specs.hosting_model_package_arns == {} + ): return None regional_arn = model_specs.hosting_model_package_arns.get(region) diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py index 7a43f0ff67..67d1622977 100644 --- a/src/sagemaker/jumpstart/types.py +++ b/src/sagemaker/jumpstart/types.py @@ -951,7 +951,10 @@ def from_json(self, json_obj: Dict[str, Any]) -> None: self.hosting_eula_key: Optional[str] = json_obj.get("hosting_eula_key") - self.hosting_model_package_arns: Optional[Dict] = json_obj.get("hosting_model_package_arns") + model_package_arns = json_obj.get("hosting_model_package_arns") + self.hosting_model_package_arns: Optional[Dict] = ( + model_package_arns if model_package_arns is not None else {} + ) self.hosting_use_script_uri: bool = json_obj.get("hosting_use_script_uri", True) self.hosting_instance_type_variants: Optional[JumpStartInstanceTypeVariants] = ( @@ -1147,6 +1150,12 @@ def resolved_config(self) -> Dict[str, Any]: deepcopy(component.to_json()), component.OVERRIDING_DENY_LIST, ) + + # Remove environment variables from resolved config if using model packages + hosting_model_pacakge_arns = resolved_config.get("hosting_model_package_arns") + if hosting_model_pacakge_arns is not None and hosting_model_pacakge_arns != {}: + resolved_config["inference_environment_variables"] = [] + self.resolved_metadata_config = resolved_config return resolved_config diff --git a/tests/unit/sagemaker/jumpstart/constants.py b/tests/unit/sagemaker/jumpstart/constants.py index a9c067a8da..fb7ca38bad 100644 --- a/tests/unit/sagemaker/jumpstart/constants.py +++ b/tests/unit/sagemaker/jumpstart/constants.py @@ -7357,7 +7357,7 @@ "training_model_package_artifact_uris": None, "deprecate_warn_message": None, "deprecated_message": None, - "hosting_model_package_arns": None, + "hosting_model_package_arns": {}, "hosting_eula_key": None, "model_subscription_link": None, "hyperparameters": [ @@ -7692,6 +7692,14 @@ }, "component_names": ["gpu-inference"], }, + "gpu-inference-model-package": { + "benchmark_metrics": { + "ml.p3.2xlarge": [ + {"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1} + ] + }, + "component_names": ["gpu-inference-model-package"], + }, }, "inference_config_components": { "neuron-base": { @@ -7733,6 +7741,14 @@ }, }, }, + "gpu-inference-model-package": { + "default_inference_instance_type": "ml.p2.xlarge", + "supported_inference_instance_types": ["ml.p2.xlarge", "ml.p3.2xlarge"], + "hosting_model_package_arns": { + "us-west-2": "arn:aws:sagemaker:us-west-2:594846645681:model-package/ll" + "ama2-7b-v3-740347e540da35b4ab9f6fc0ab3fed2c" + }, + }, "gpu-inference-budget": { "supported_inference_instance_types": ["ml.p2.xlarge", "ml.p3.2xlarge"], "hosting_artifact_key": "artifacts/meta-textgeneration-llama-2-7b/gpu-inference-budget/model/", diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py index 75b3fd7300..7b9d935fb6 100644 --- a/tests/unit/sagemaker/jumpstart/model/test_model.py +++ b/tests/unit/sagemaker/jumpstart/model/test_model.py @@ -1651,6 +1651,74 @@ def test_model_set_deployment_config( endpoint_logging=False, ) + @mock.patch( + "sagemaker.jumpstart.model.get_jumpstart_configs", side_effect=lambda *args, **kwargs: {} + ) + @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") + @mock.patch("sagemaker.jumpstart.factory.model.Session") + @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") + @mock.patch("sagemaker.jumpstart.model.Model.deploy") + @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) + def test_model_set_deployment_config_model_package( + self, + mock_model_deploy: mock.Mock, + mock_get_model_specs: mock.Mock, + mock_session: mock.Mock, + mock_get_manifest: mock.Mock, + mock_get_jumpstart_configs: mock.Mock, + ): + mock_get_model_specs.side_effect = get_prototype_spec_with_configs + mock_get_manifest.side_effect = ( + lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) + ) + mock_model_deploy.return_value = default_predictor + + model_id, _ = "pytorch-eqa-bert-base-cased", "*" + + mock_session.return_value = sagemaker_session + + model = JumpStartModel(model_id=model_id) + + assert model.config_name == "neuron-inference" + + model.deploy() + + mock_model_deploy.assert_called_once_with( + initial_instance_count=1, + instance_type="ml.inf2.xlarge", + tags=[ + {"Key": JumpStartTag.MODEL_ID, "Value": "pytorch-eqa-bert-base-cased"}, + {"Key": JumpStartTag.MODEL_VERSION, "Value": "1.0.0"}, + {"Key": JumpStartTag.INFERENCE_CONFIG_NAME, "Value": "neuron-inference"}, + ], + wait=True, + endpoint_logging=False, + ) + + mock_model_deploy.reset_mock() + + model.set_deployment_config( + config_name="gpu-inference-model-package", instance_type="ml.p2.xlarge" + ) + + assert ( + model.model_package_arn + == "arn:aws:sagemaker:us-west-2:594846645681:model-package/llama2-7b-v3-740347e540da35b4ab9f6fc0ab3fed2c" + ) + model.deploy() + + mock_model_deploy.assert_called_once_with( + initial_instance_count=1, + instance_type="ml.p2.xlarge", + tags=[ + {"Key": JumpStartTag.MODEL_ID, "Value": "pytorch-eqa-bert-base-cased"}, + {"Key": JumpStartTag.MODEL_VERSION, "Value": "1.0.0"}, + {"Key": JumpStartTag.INFERENCE_CONFIG_NAME, "Value": "gpu-inference-model-package"}, + ], + wait=True, + endpoint_logging=False, + ) + @mock.patch( "sagemaker.jumpstart.model.get_jumpstart_configs", side_effect=lambda *args, **kwargs: {} ) @@ -1706,12 +1774,7 @@ def test_model_set_deployment_config_incompatible_instance_type_or_name( with pytest.raises(ValueError) as error: model.set_deployment_config("neuron-inference-unknown-name", "ml.inf2.32xlarge") - assert ( - "Cannot find Jumpstart config name neuron-inference-unknown-name. " - "List of config names that is supported by the model: " - "['neuron-inference', 'neuron-inference-budget', 'gpu-inference-budget', 'gpu-inference']" - in str(error) - ) + assert "Cannot find Jumpstart config name neuron-inference-unknown-name. " in str(error) @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") diff --git a/tests/unit/sagemaker/jumpstart/test_types.py b/tests/unit/sagemaker/jumpstart/test_types.py index 7b9e8c4519..7f2c7b2aad 100644 --- a/tests/unit/sagemaker/jumpstart/test_types.py +++ b/tests/unit/sagemaker/jumpstart/test_types.py @@ -17,6 +17,7 @@ from sagemaker.jumpstart.types import ( JumpStartBenchmarkStat, JumpStartECRSpecs, + JumpStartEnvironmentVariable, JumpStartHyperparameter, JumpStartInstanceTypeVariants, JumpStartModelSpecs, @@ -927,6 +928,7 @@ def test_inference_configs_parsing(): "neuron-inference", "neuron-budget", "gpu-inference", + "gpu-inference-model-package", "gpu-inference-budget", ] @@ -1019,6 +1021,80 @@ def test_inference_configs_parsing(): } ), ] + assert specs1.inference_environment_variables == [ + JumpStartEnvironmentVariable( + { + "name": "SAGEMAKER_PROGRAM", + "type": "text", + "default": "inference.py", + "scope": "container", + "required_for_model_class": True, + } + ), + JumpStartEnvironmentVariable( + { + "name": "SAGEMAKER_SUBMIT_DIRECTORY", + "type": "text", + "default": "/opt/ml/model/code", + "scope": "container", + "required_for_model_class": False, + } + ), + JumpStartEnvironmentVariable( + { + "name": "SAGEMAKER_CONTAINER_LOG_LEVEL", + "type": "text", + "default": "20", + "scope": "container", + "required_for_model_class": False, + } + ), + JumpStartEnvironmentVariable( + { + "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT", + "type": "text", + "default": "3600", + "scope": "container", + "required_for_model_class": False, + } + ), + JumpStartEnvironmentVariable( + { + "name": "ENDPOINT_SERVER_TIMEOUT", + "type": "int", + "default": 3600, + "scope": "container", + "required_for_model_class": True, + } + ), + JumpStartEnvironmentVariable( + { + "name": "MODEL_CACHE_ROOT", + "type": "text", + "default": "/opt/ml/model", + "scope": "container", + "required_for_model_class": True, + } + ), + JumpStartEnvironmentVariable( + { + "name": "SAGEMAKER_ENV", + "type": "text", + "default": "1", + "scope": "container", + "required_for_model_class": True, + } + ), + JumpStartEnvironmentVariable( + { + "name": "SAGEMAKER_MODEL_SERVER_WORKERS", + "type": "int", + "default": 1, + "scope": "container", + "required_for_model_class": True, + } + ), + ] # Overrided fields in top config assert specs1.supported_inference_instance_types == ["ml.inf2.xlarge", "ml.inf2.2xlarge"] @@ -1057,6 +1133,20 @@ def test_inference_configs_parsing(): ) assert list(config.config_components.keys()) == ["neuron-inference"] + config = specs1.inference_configs.configs["gpu-inference-model-package"] + assert config.config_components["gpu-inference-model-package"] == JumpStartConfigComponent( + "gpu-inference-model-package", + { + "default_inference_instance_type": "ml.p2.xlarge", + "supported_inference_instance_types": ["ml.p2.xlarge", "ml.p3.2xlarge"], + "hosting_model_package_arns": { + "us-west-2": "arn:aws:sagemaker:us-west-2:594846645681:model-package/" + "llama2-7b-v3-740347e540da35b4ab9f6fc0ab3fed2c" + }, + }, + ) + assert config.resolved_config.get("inference_environment_variables") == [] + spec = { **BASE_SPEC, **INFERENCE_CONFIGS, @@ -1075,6 +1165,7 @@ def test_set_inference_configs(): "neuron-inference", "neuron-budget", "gpu-inference", + "gpu-inference-model-package", "gpu-inference-budget", ] @@ -1083,7 +1174,7 @@ def test_set_inference_configs(): assert "Cannot find Jumpstart config name invalid_name." "List of config names that is supported by the model: " "['neuron-inference', 'neuron-inference-budget', " - "'gpu-inference-budget', 'gpu-inference']" in str(error.value) + "'gpu-inference-budget', 'gpu-inference', 'gpu-inference-model-package']" in str(error.value) assert specs1.supported_inference_instance_types == ["ml.inf2.xlarge", "ml.inf2.2xlarge"] specs1.set_config("gpu-inference") diff --git a/tests/unit/sagemaker/jumpstart/test_utils.py b/tests/unit/sagemaker/jumpstart/test_utils.py index 8c3bb067be..a5a063c696 100644 --- a/tests/unit/sagemaker/jumpstart/test_utils.py +++ b/tests/unit/sagemaker/jumpstart/test_utils.py @@ -1639,6 +1639,7 @@ def test_get_jumpstart_config_names_success( "neuron-inference-budget", "gpu-inference-budget", "gpu-inference", + "gpu-inference-model-package", ] @patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") @@ -1735,6 +1736,13 @@ def test_get_jumpstart_benchmark_stats_full_list( ) ] }, + "gpu-inference-model-package": { + "ml.p3.2xlarge": [ + JumpStartBenchmarkStat( + {"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1} + ) + ] + }, } @patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs")