diff --git a/doc/conf.py b/doc/conf.py index 908cbb24a4..e792915dc3 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -96,6 +96,13 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {"http://docs.python.org/": None} +# -- Options for autodoc ---------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#configuration + +# Automatically extract typehints when specified and place them in +# descriptions of the relevant function/method. +autodoc_typehints = "description" + # autosummary autosummary_generate = True diff --git a/doc/workflows/pipelines/sagemaker.workflow.pipelines.rst b/doc/workflows/pipelines/sagemaker.workflow.pipelines.rst index f115b032c4..43e9be190e 100644 --- a/doc/workflows/pipelines/sagemaker.workflow.pipelines.rst +++ b/doc/workflows/pipelines/sagemaker.workflow.pipelines.rst @@ -46,6 +46,8 @@ Entities .. autoclass:: sagemaker.workflow.entities.Expression +.. autoclass:: sagemaker.workflow.entities.PipelineVariable + Execution Variables ------------------- diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index eaf4644da6..c0fdebd565 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -316,16 +316,16 @@ def __init__( """A collection of Amazon :class:~`Record` objects serialized and stored in S3. Args: - s3_data (str): The S3 location of the training data + s3_data (str or PipelineVariable): The S3 location of the training data num_records (int): The number of records in the set. feature_dim (int): The dimensionality of "values" arrays in the Record features, and label (if each Record is labeled). - s3_data_type (str): Valid values: 'S3Prefix', 'ManifestFile'. If - 'S3Prefix', ``s3_data`` defines a prefix of s3 objects to train + s3_data_type (str or PipelineVariable): Valid values: 'S3Prefix', 'ManifestFile'. + If 'S3Prefix', ``s3_data`` defines a prefix of s3 objects to train on. All objects with s3 keys beginning with ``s3_data`` will be used to train. If 'ManifestFile', then ``s3_data`` defines a single s3 manifest file, listing each s3 object to train on. - channel (str): The SageMaker Training Job channel this RecordSet + channel (str or PipelineVariable): The SageMaker Training Job channel this RecordSet should be bound to """ self.s3_data = s3_data diff --git a/src/sagemaker/amazon/factorization_machines.py b/src/sagemaker/amazon/factorization_machines.py index 5e9c2098b9..927f3d4ebf 100644 --- a/src/sagemaker/amazon/factorization_machines.py +++ b/src/sagemaker/amazon/factorization_machines.py @@ -333,7 +333,7 @@ def __init__( """Initialization for FactorizationMachinesModel class. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker diff --git a/src/sagemaker/amazon/ipinsights.py b/src/sagemaker/amazon/ipinsights.py index 097f6b45dc..a73853ad75 100644 --- a/src/sagemaker/amazon/ipinsights.py +++ b/src/sagemaker/amazon/ipinsights.py @@ -236,7 +236,7 @@ def __init__( """Creates object to get insights on S3 model data. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker diff --git a/src/sagemaker/amazon/kmeans.py b/src/sagemaker/amazon/kmeans.py index 581e93e02a..964a4dfde4 100644 --- a/src/sagemaker/amazon/kmeans.py +++ b/src/sagemaker/amazon/kmeans.py @@ -260,7 +260,7 @@ def __init__( """Initialization for KMeansModel class. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker diff --git a/src/sagemaker/amazon/knn.py b/src/sagemaker/amazon/knn.py index 14ba404ebf..a621d794fd 100644 --- a/src/sagemaker/amazon/knn.py +++ b/src/sagemaker/amazon/knn.py @@ -252,7 +252,7 @@ def __init__( """Function to initialize KNNModel. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker diff --git a/src/sagemaker/amazon/lda.py b/src/sagemaker/amazon/lda.py index 4158b6cc27..fc1de1cea6 100644 --- a/src/sagemaker/amazon/lda.py +++ b/src/sagemaker/amazon/lda.py @@ -234,7 +234,7 @@ def __init__( """Initialization for LDAModel class. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker diff --git a/src/sagemaker/amazon/linear_learner.py b/src/sagemaker/amazon/linear_learner.py index d02ed2875f..0b36b117dd 100644 --- a/src/sagemaker/amazon/linear_learner.py +++ b/src/sagemaker/amazon/linear_learner.py @@ -495,7 +495,7 @@ def __init__( """Initialization for LinearLearnerModel. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker diff --git a/src/sagemaker/amazon/ntm.py b/src/sagemaker/amazon/ntm.py index 83c2f97348..ddcc619ada 100644 --- a/src/sagemaker/amazon/ntm.py +++ b/src/sagemaker/amazon/ntm.py @@ -263,7 +263,7 @@ def __init__( """Initialization for NTMModel class. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker diff --git a/src/sagemaker/amazon/object2vec.py b/src/sagemaker/amazon/object2vec.py index 1fbd846cbf..6177c742ba 100644 --- a/src/sagemaker/amazon/object2vec.py +++ b/src/sagemaker/amazon/object2vec.py @@ -358,7 +358,7 @@ def __init__( """Initialization for Object2VecModel class. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker diff --git a/src/sagemaker/amazon/pca.py b/src/sagemaker/amazon/pca.py index e3127fd7a1..6236d20bef 100644 --- a/src/sagemaker/amazon/pca.py +++ b/src/sagemaker/amazon/pca.py @@ -251,7 +251,7 @@ def __init__( """Initialization for PCAModel. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker diff --git a/src/sagemaker/amazon/randomcutforest.py b/src/sagemaker/amazon/randomcutforest.py index c38d75e3e4..d690b1825e 100644 --- a/src/sagemaker/amazon/randomcutforest.py +++ b/src/sagemaker/amazon/randomcutforest.py @@ -223,7 +223,7 @@ def __init__( """Initialization for RandomCutForestModel class. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker diff --git a/src/sagemaker/chainer/estimator.py b/src/sagemaker/chainer/estimator.py index 12c22eae91..2c351dafa0 100644 --- a/src/sagemaker/chainer/estimator.py +++ b/src/sagemaker/chainer/estimator.py @@ -74,7 +74,7 @@ def __init__( home-page: https://github.com/aws/sagemaker-python-sdk Args: - entry_point (str): Path (absolute or relative) to the Python source + entry_point (str or PipelineVariable): Path (absolute or relative) to the Python source file which should be executed as the entry point to training. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. @@ -92,7 +92,7 @@ def __init__( command used to run the entry point. For example, '-X NCCL_DEBUG=WARN' will pass that option string to the mpirun command. - source_dir (str): Path (absolute or relative) to a directory with + source_dir (str or PipelineVariable): Path (absolute or relative) to a directory with any other training source code dependencies aside from the entry point file (default: None). Structure within this directory are preserved when training on Amazon SageMaker. diff --git a/src/sagemaker/chainer/model.py b/src/sagemaker/chainer/model.py index 1986febaaf..d723b8f4d5 100644 --- a/src/sagemaker/chainer/model.py +++ b/src/sagemaker/chainer/model.py @@ -94,7 +94,7 @@ def __init__( """Initialize an ChainerModel. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker @@ -105,8 +105,8 @@ def __init__( file which should be executed as the entry point to model hosting. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. - image_uri (str): A Docker image URI (default: None). If not specified, - a default image for Chainer will be used. + image_uri (str or PipelineVariable): A Docker image URI (default: None). + If not specified, a default image for Chainer will be used. If ``framework_version`` or ``py_version`` are ``None``, then ``image_uri`` is required. If ``image_uri`` is also ``None``, then a ``ValueError`` will be raised. @@ -120,7 +120,7 @@ def __init__( to call to create a predictor with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of invoking this function on the created endpoint name. - model_server_workers (int): Optional. The number of worker processes + model_server_workers (int or PipelineVariable): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. **kwargs: Keyword arguments passed to the @@ -173,43 +173,49 @@ def register( """Creates a model package for creating SageMaker models or listing on Marketplace. Args: - content_types (list): The supported MIME types for the input data. - response_types (list): The supported MIME types for the output data. - inference_instances (list): A list of the instance types that are used to - generate inferences in real-time. - transform_instances (list): A list of the instance types on which a transformation - job can be run or on which an endpoint can be deployed. - model_package_name (str): Model Package name, exclusive to `model_package_group_name`, - using `model_package_name` makes the Model Package un-versioned (default: None). - model_package_group_name (str): Model Package Group name, exclusive to - `model_package_name`, using `model_package_group_name` makes the Model Package - versioned (default: None). - image_uri (str): Inference image uri for the container. Model class' self.image will - be used if it is None (default: None). + content_types (list[str] or list[PipelineVariable]): The supported MIME types + for the input data. + response_types (list[str] or list[PipelineVariable]): The supported MIME types + for the output data. + inference_instances (list[str] or list[PipelineVariable]): A list of the instance + types that are used to generate inferences in real-time. + transform_instances (list[str] or list[PipelineVariable]): A list of the instance + types on which a transformation job can be run or on which an endpoint + can be deployed. + model_package_name (str or PipelineVariable): Model Package name, exclusive to + `model_package_group_name`, using `model_package_name` makes the Model Package + un-versioned (default: None). + model_package_group_name (str or PipelineVariable): Model Package Group name, + exclusive to `model_package_name`, using `model_package_group_name` makes the + Model Package versioned (default: None). + image_uri (str or PipelineVariable): Inference image uri for the container. Model class' + self.image will be used if it is None (default: None). model_metrics (ModelMetrics): ModelMetrics object (default: None). metadata_properties (MetadataProperties): MetadataProperties (default: None). marketplace_cert (bool): A boolean value indicating if the Model Package is certified for AWS Marketplace (default: False). - approval_status (str): Model Approval Status, values can be "Approved", "Rejected", - or "PendingManualApproval" (default: "PendingManualApproval"). + approval_status (str or PipelineVariable): Model Approval Status, values can be + "Approved", "Rejected", or "PendingManualApproval" + (default: "PendingManualApproval"). description (str): Model Package description (default: None). drift_check_baselines (DriftCheckBaselines): DriftCheckBaselines object (default: None). - customer_metadata_properties (dict[str, str]): A dictionary of key-value paired - metadata properties (default: None). - domain (str): Domain values can be "COMPUTER_VISION", "NATURAL_LANGUAGE_PROCESSING", - "MACHINE_LEARNING" (default: None). - sample_payload_url (str): The S3 path where the sample payload is stored + customer_metadata_properties (dict[str, str] or dict[str, PipelineVariable]): + A dictionary of key-value paired metadata properties (default: None). + domain (str or PipelineVariable): Domain values can be "COMPUTER_VISION", + "NATURAL_LANGUAGE_PROCESSING", "MACHINE_LEARNING" (default: None). + sample_payload_url (str or PipelineVariable): The S3 path where the sample payload + is stored (default: None). + task (str or PipelineVariable): Task values which are supported by Inference Recommender + are "FILL_MASK", "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", + "IMAGE_SEGMENTATION", "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). + framework (str or PipelineVariable): Machine learning framework of the model package + container image (default: None). + framework_version (str or PipelineVariable): Framework version of the Model Package + Container Image (default: None). + nearest_model_name (str or PipelineVariable): Name of a pre-trained machine learning + benchmarked by Amazon SageMaker Inference Recommender (default: None). + data_input_configuration (str or PipelineVariable): Input object for the model (default: None). - task (str): Task values which are supported by Inference Recommender are "FILL_MASK", - "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", "IMAGE_SEGMENTATION", - "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). - framework (str): Machine learning framework of the model package container image - (default: None). - framework_version (str): Framework version of the Model Package Container Image - (default: None). - nearest_model_name (str): Name of a pre-trained machine learning benchmarked by - Amazon SageMaker Inference Recommender (default: None). - data_input_configuration (str): Input object for the model (default: None). Returns: str: A string of SageMaker Model Package ARN. diff --git a/src/sagemaker/debugger/debugger.py b/src/sagemaker/debugger/debugger.py index 23f7b651a3..dca7568411 100644 --- a/src/sagemaker/debugger/debugger.py +++ b/src/sagemaker/debugger/debugger.py @@ -338,25 +338,29 @@ def custom( Args: name (str): Required. The name of the debugger rule. - image_uri (str): Required. The URI of the image to be used by the debugger rule. - instance_type (str): Required. Type of EC2 instance to use, for example, - 'ml.c4.xlarge'. - volume_size_in_gb (int): Required. Size in GB of the EBS volume - to use for storing data. + image_uri (str or PipelineVariable): Required. The URI of the image to + be used by the debugger rule. + instance_type (str or PipelineVariable): Required. Type of EC2 instance to use, + for example, 'ml.c4.xlarge'. + volume_size_in_gb (int or PipelineVariable): Required. Size in GB of the + EBS volume to use for storing data. source (str): Optional. A source file containing a rule to invoke. If provided, you must also provide rule_to_invoke. This can either be an S3 uri or a local path. - rule_to_invoke (str): Optional. The name of the rule to invoke within the source. - If provided, you must also provide source. - container_local_output_path (str): Optional. The local path in the container. - s3_output_path (str): Optional. The location in Amazon S3 to store the output tensors. + rule_to_invoke (str or PipelineVariable): Optional. The name of the rule to + invoke within the source. If provided, you must also provide source. + container_local_output_path (str or PipelineVariable): Optional. The local path + in the container. + s3_output_path (str or PipelineVariable): Optional. The location in Amazon S3 + to store the output tensors. The default Debugger output path for debugging data is created under the default output path of the :class:`~sagemaker.estimator.Estimator` class. For example, s3://sagemaker--<12digit_account_id>//debug-output/. - other_trials_s3_input_paths ([str]): Optional. The Amazon S3 input paths - of other trials to use the SimilarAcrossRuns rule. - rule_parameters (dict): Optional. A dictionary of parameters for the rule. + other_trials_s3_input_paths (list[str] or list[PipelineVariable]: Optional. + The Amazon S3 input paths of other trials to use the SimilarAcrossRuns rule. + rule_parameters (dict[str, str] or dict[str, PipelineVariable]): Optional. + A dictionary of parameters for the rule. collections_to_save ([sagemaker.debugger.CollectionConfig]): Optional. A list of :class:`~sagemaker.debugger.CollectionConfig` objects to be saved. @@ -621,13 +625,15 @@ def __init__( """Initialize the DebuggerHookConfig instance. Args: - s3_output_path (str): Optional. The location in Amazon S3 to store the output tensors. - The default Debugger output path is created under the + s3_output_path (str or PipelineVariable): Optional. The location in Amazon S3 to + store the output tensors. The default Debugger output path is created under the default output path of the :class:`~sagemaker.estimator.Estimator` class. For example, s3://sagemaker--<12digit_account_id>//debug-output/. - container_local_output_path (str): Optional. The local path in the container. - hook_parameters (dict): Optional. A dictionary of parameters. + container_local_output_path (str or PipelineVariable): Optional. The local path + in the container. + hook_parameters (dict[str, str] or dict[str, PipelineVariable]): Optional. + A dictionary of parameters. collection_configs ([sagemaker.debugger.CollectionConfig]): Required. A list of :class:`~sagemaker.debugger.CollectionConfig` objects to be saved at the **s3_output_path**. @@ -690,8 +696,10 @@ def __init__( """Initialize the TensorBoardOutputConfig instance. Args: - s3_output_path (str): Optional. The location in Amazon S3 to store the output. - container_local_output_path (str): Optional. The local path in the container. + s3_output_path (str or PipelineVariable): Optional. The location in Amazon S3 + to store the output. + container_local_output_path (str or PipelineVariable): Optional. The local path + in the container. """ self.s3_output_path = s3_output_path @@ -723,9 +731,9 @@ def __init__( """Constructor for collection configuration. Args: - name (str): Required. The name of the collection configuration. - parameters (dict): Optional. The parameters for the collection - configuration. + name (str or PipelineVariable): Required. The name of the collection configuration. + parameters (dict[str, str] or dict[str, PipelineVariable]): Optional. The parameters + for the collection configuration. **Example of creating a CollectionConfig object:** diff --git a/src/sagemaker/debugger/profiler_config.py b/src/sagemaker/debugger/profiler_config.py index 807ba91e79..3d4a24e8d1 100644 --- a/src/sagemaker/debugger/profiler_config.py +++ b/src/sagemaker/debugger/profiler_config.py @@ -40,14 +40,15 @@ def __init__( class and SageMaker Framework estimators. Args: - s3_output_path (str): The location in Amazon S3 to store the output. + s3_output_path (str or PipelineVariable): The location in Amazon S3 to store + the output. The default Debugger output path for profiling data is created under the default output path of the :class:`~sagemaker.estimator.Estimator` class. For example, s3://sagemaker--<12digit_account_id>//profiler-output/. - system_monitor_interval_millis (int): The time interval in milliseconds - to collect system metrics. Available values are 100, 200, 500, 1000 (1 second), - 5000 (5 seconds), and 60000 (1 minute) milliseconds. + system_monitor_interval_millis (int or PipelineVariable): The time interval in + milliseconds to collect system metrics. Available values are 100, 200, 500, + 1000 (1 second), 5000 (5 seconds), and 60000 (1 minute) milliseconds. The default is 500 milliseconds. framework_profile_params (:class:`~sagemaker.debugger.FrameworkProfile`): A parameter object for framework metrics profiling. Configure it using diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 62b702fc73..15b8f0862c 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -162,12 +162,12 @@ def __init__( endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. - instance_count (int): Number of Amazon EC2 instances to use + instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use for training. Required if instance_groups is not set. - instance_type (str): Type of EC2 instance to use for training, + instance_type (str or PipelineVariable): Type of EC2 instance to use for training, for example, ``'ml.c4.xlarge'``. Required if instance_groups is not set. - volume_size (int): Size in GB of the storage volume to use for + volume_size (int or PipelineVariable): Size in GB of the storage volume to use for storing input and output data during training (default: 30). Must be large enough to store training data if File mode is @@ -205,12 +205,12 @@ def __init__( Folders for Training Datasets, Checkpoints, Model Artifacts, and Outputs `_. - volume_kms_key (str): Optional. KMS key ID for encrypting EBS + volume_kms_key (str or PipelineVariable): Optional. KMS key ID for encrypting EBS volume attached to the training instance (default: None). - max_run (int): Timeout in seconds for training (default: 24 * + max_run (int or PipelineVariable): Timeout in seconds for training (default: 24 * 60 * 60). After this amount of time Amazon SageMaker terminates the job regardless of its current status. - input_mode (str): The input mode that the algorithm supports + input_mode (str or PipelineVariable): The input mode that the algorithm supports (default: 'File'). Valid modes: 'File' - Amazon SageMaker copies the training dataset from the S3 location to a local directory. @@ -220,14 +220,14 @@ def __init__( downloading the entire dataset before training begins. This argument can be overriden on a per-channel basis using ``sagemaker.inputs.TrainingInput.input_mode``. - output_path (str): S3 location for saving the training result (model + output_path (str or PipelineVariable): S3 location for saving the training result (model artifacts and output files). If not specified, results are stored to a default bucket. If the bucket with the specific name does not exist, the estimator creates the bucket during the :meth:`~sagemaker.estimator.EstimatorBase.fit` method execution. file:// urls are used for local mode. For example: 'file://model/' will save to the model folder in the current directory. - output_kms_key (str): Optional. KMS key ID for encrypting the + output_kms_key (str or PipelineVariable): Optional. KMS key ID for encrypting the training output (default: Your IAM role's KMS key for Amazon S3). If you don't provide a KMS key ID, Amazon SageMaker uses the default KMS key for Amazon S3 of the account linked to your @@ -240,13 +240,13 @@ def __init__( manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. - tags (list[dict]): List of tags for labeling a training job. For - more, see + tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): + List of tags for labeling a training job. For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. - subnets (list[str]): List of subnet ids. If not specified training - job will be created without VPC config. - security_group_ids (list[str]): List of security group ids. If not + subnets (list[str] or list[PipelineVariable]): List of subnet ids. If not specified training job will be created without VPC config. + security_group_ids (list[str] or list[PipelineVariable]): List of security group ids. + If not specified training job will be created without VPC config. model_uri (str): URI where a pre-trained model is stored, either locally or in S3 (default: None). If specified, the estimator will create a channel pointing to the model so the training job @@ -260,31 +260,31 @@ def __init__( More information: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization - model_channel_name (str): Name of the channel where 'model_uri' will + model_channel_name (str or PipelineVariable): Name of the channel where 'model_uri' will be downloaded (default: 'model'). - metric_definitions (list[dict]): A list of dictionaries that defines - the metric(s) used to evaluate the training jobs. Each - dictionary contains two keys: 'Name' for the name of the metric, + metric_definitions (list[dict[str, str] or list[dict[str, PipelineVariable]]): + A list of dictionaries that defines the metric(s) used to evaluate the + training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for the regular expression used to extract the metric from the logs. This should be defined only for jobs that don't use an Amazon algorithm. - encrypt_inter_container_traffic (bool): Specifies whether traffic + encrypt_inter_container_traffic (bool or PipelineVariable): Specifies whether traffic between training containers is encrypted for the training job (default: ``False``). - use_spot_instances (bool): Specifies whether to use SageMaker + use_spot_instances (bool or PipelineVariable): Specifies whether to use SageMaker Managed Spot instances for training. If enabled then the ``max_wait`` arg should also be set. More information: https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html (default: ``False``). - max_wait (int): Timeout in seconds waiting for spot training + max_wait (int or PipelineVariable): Timeout in seconds waiting for spot training job (default: None). After this amount of time Amazon SageMaker will stop waiting for managed spot training job to complete (default: None). - checkpoint_s3_uri (str): The S3 URI in which to persist checkpoints + checkpoint_s3_uri (str or PipelineVariable): The S3 URI in which to persist checkpoints that the algorithm persists (if any) during training. (default: ``None``). - checkpoint_local_path (str): The local path that the algorithm + checkpoint_local_path (str or PipelineVariable): The local path that the algorithm writes its checkpoints to. SageMaker will persist all files under this path to `checkpoint_s3_uri` continually during training. On job startup the reverse happens - data from the @@ -314,13 +314,13 @@ def __init__( see `Capture real time tensorboard data `_. - enable_sagemaker_metrics (bool): enable SageMaker Metrics Time + enable_sagemaker_metrics (bool or PipelineVariable): enable SageMaker Metrics Time Series. For more information, see `AlgorithmSpecification API `_. (default: None). - enable_network_isolation (bool): Specifies whether container will + enable_network_isolation (bool or PipelineVariable): Specifies whether container will run in network isolation mode (default: ``False``). Network isolation mode restricts the container access to outside networks (such as the Internet). The container does not make any inbound or @@ -335,17 +335,17 @@ def __init__( ``disable_profiler`` parameter to ``True``. disable_profiler (bool): Specifies whether Debugger monitoring and profiling will be disabled (default: ``False``). - environment (dict[str, str]) : Environment variables to be set for - use during training job (default: None) - max_retry_attempts (int): The number of times to move a job to the STARTING status. - You can specify between 1 and 30 attempts. + environment (dict[str, str] or dict[str, PipelineVariable]) : Environment variables + to be set for use during training job (default: None) + max_retry_attempts (int or PipelineVariable): The number of times to move a job + to the STARTING status. You can specify between 1 and 30 attempts. If the value of attempts is greater than zero, the job is retried on InternalServerFailure the same number of attempts as the value. You can cap the total duration for your job by setting ``max_wait`` and ``max_run`` (default: None) - source_dir (str): The absolute, relative, or S3 URI Path to a directory - with any other training source code dependencies aside from the entry + source_dir (str or PipelineVariable): The absolute, relative, or S3 URI Path to + a directory with any other training source code dependencies aside from the entry point file (default: None). If ``source_dir`` is an S3 URI, it must point to a tar.gz file. The structure within this directory is preserved when training on Amazon SageMaker. If 'git_config' is provided, @@ -412,9 +412,10 @@ def __init__( authentication if they are provided. If they are not provided, the SageMaker Python SDK attempts to use either the CodeCommit credential helper or local credential storage for authentication. - hyperparameters (dict): A dictionary containing the hyperparameters to + hyperparameters (dict[str, str] or dict[str, PipelineVariable]): + A dictionary containing the hyperparameters to initialize this estimator with. (Default: None). - container_log_level (int): The log level to use within the container + container_log_level (int or PipelineVariable): The log level to use within the container (default: logging.INFO). Valid values are defined in the Python logging module. code_location (str): The S3 prefix URI where custom code is @@ -422,7 +423,7 @@ def __init__( a string prepended with a "/" is appended to ``code_location``. The code file uploaded to S3 is 'code_location/job-name/source/sourcedir.tar.gz'. If not specified, the default ``code location`` is 's3://output_bucket/job-name/'. - entry_point (str): The absolute or relative path to the local Python + entry_point (str or PipelineVariable): The absolute or relative path to the local Python source file that should be executed as the entry point to training. (Default: None). If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. @@ -2258,18 +2259,18 @@ def __init__( """Initialize an ``Estimator`` instance. Args: - image_uri (str): The container image to use for training. + image_uri (str or PipelineVariable): The container image to use for training. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. - instance_count (int): Number of Amazon EC2 instances to use + instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use for training. Required if instance_groups is not set. - instance_type (str): Type of EC2 instance to use for training, + instance_type (str or PipelineVariable): Type of EC2 instance to use for training, for example, ``'ml.c4.xlarge'``. Required if instance_groups is not set. - volume_size (int): Size in GB of the storage volume to use for + volume_size (int or PipelineVariable): Size in GB of the storage volume to use for storing input and output data during training (default: 30). Must be large enough to store training data if File mode is @@ -2307,12 +2308,12 @@ def __init__( Folders for Training Datasets, Checkpoints, Model Artifacts, and Outputs `_. - volume_kms_key (str): Optional. KMS key ID for encrypting EBS + volume_kms_key (str or PipelineVariable): Optional. KMS key ID for encrypting EBS volume attached to the training instance (default: None). - max_run (int): Timeout in seconds for training (default: 24 * + max_run (int or PipelineVariable): Timeout in seconds for training (default: 24 * 60 * 60). After this amount of time Amazon SageMaker terminates the job regardless of its current status. - input_mode (str): The input mode that the algorithm supports + input_mode (str or PipelineVariable): The input mode that the algorithm supports (default: 'File'). Valid modes: * 'File' - Amazon SageMaker copies the training dataset from the @@ -2322,12 +2323,12 @@ def __init__( This argument can be overriden on a per-channel basis using ``sagemaker.inputs.TrainingInput.input_mode``. - output_path (str): S3 location for saving the training result (model - artifacts and output files). If not specified, results are + output_path (str or PipelineVariable): S3 location for saving the training result + (model artifacts and output files). If not specified, results are stored to a default bucket. If the bucket with the specific name does not exist, the estimator creates the bucket during the :meth:`~sagemaker.estimator.EstimatorBase.fit` method execution. - output_kms_key (str): Optional. KMS key ID for encrypting the + output_kms_key (str or PipelineVariable): Optional. KMS key ID for encrypting the training output (default: None). base_job_name (str): Prefix for training job name when the :meth:`~sagemaker.estimator.EstimatorBase.fit` method launches. @@ -2337,15 +2338,15 @@ def __init__( manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. - hyperparameters (dict): Dictionary containing the hyperparameters to - initialize this estimator with. - tags (list[dict]): List of tags for labeling a training job. For - more, see + hyperparameters (dict[str, str] or dict[str, PipelineVariable]): + Dictionary containing the hyperparameters to initialize this estimator with. + tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags for + labeling a training job. For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. - subnets (list[str]): List of subnet ids. If not specified training - job will be created without VPC config. - security_group_ids (list[str]): List of security group ids. If not - specified training job will be created without VPC config. + subnets (list[str] or list[PipelineVariable]): List of subnet ids. + If not specified training job will be created without VPC config. + security_group_ids (list[str] or list[PipelineVariable]): List of security group ids. + If not specified training job will be created without VPC config. model_uri (str): URI where a pre-trained model is stored, either locally or in S3 (default: None). If specified, the estimator will create a channel pointing to the model so the training job @@ -2359,32 +2360,32 @@ def __init__( More information: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization - model_channel_name (str): Name of the channel where 'model_uri' will + model_channel_name (str or PipelineVariable): Name of the channel where 'model_uri' will be downloaded (default: 'model'). - metric_definitions (list[dict]): A list of dictionaries that defines - the metric(s) used to evaluate the training jobs. Each + metric_definitions (list[dict[str, str] or list[dict[str, PipelineVariable]]): A list of + dictionaries that defines the metric(s) used to evaluate the training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for the regular expression used to extract the metric from the logs. This should be defined only for jobs that don't use an Amazon algorithm. - encrypt_inter_container_traffic (bool): Specifies whether traffic + encrypt_inter_container_traffic (bool or PipelineVariable): Specifies whether traffic between training containers is encrypted for the training job (default: ``False``). - use_spot_instances (bool): Specifies whether to use SageMaker + use_spot_instances (bool or PipelineVariable): Specifies whether to use SageMaker Managed Spot instances for training. If enabled then the ``max_wait`` arg should also be set. More information: https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html (default: ``False``). - max_wait (int): Timeout in seconds waiting for spot training + max_wait (int or PipelineVariable): Timeout in seconds waiting for spot training job (default: None). After this amount of time Amazon SageMaker will stop waiting for managed spot training job to complete (default: None). - checkpoint_s3_uri (str): The S3 URI in which to persist checkpoints + checkpoint_s3_uri (str or PipelineVariable): The S3 URI in which to persist checkpoints that the algorithm persists (if any) during training. (default: None). - checkpoint_local_path (str): The local path that the algorithm + checkpoint_local_path (str or PipelineVariable): The local path that the algorithm writes its checkpoints to. SageMaker will persist all files under this path to `checkpoint_s3_uri` continually during training. On job startup the reverse happens - data from the @@ -2392,7 +2393,7 @@ def __init__( started. If the path is unset then SageMaker assumes the checkpoints will be provided under `/opt/ml/checkpoints/`. (default: None). - enable_network_isolation (bool): Specifies whether container will + enable_network_isolation (bool or PipelineVariable): Specifies whether container will run in network isolation mode (default: ``False``). Network isolation mode restricts the container access to outside networks (such as the Internet). The container does not make any inbound or @@ -2419,7 +2420,7 @@ def __init__( see `Capture real time tensorboard data `_. - enable_sagemaker_metrics (bool): enable SageMaker Metrics Time + enable_sagemaker_metrics (bool or PipelineVariable): enable SageMaker Metrics Time Series. For more information, see `AlgorithmSpecification API >> |----- test.py You can assign entry_point='src/train.py'. - source_dir (str): Path (absolute, relative or an S3 URI) to a directory - with any other training source code dependencies aside from the entry - point file (default: None). If ``source_dir`` is an S3 URI, it must + source_dir (str or PipelineVariable): Path (absolute, relative or an S3 URI) + to a directory with any other training source code dependencies aside from + the entry point file (default: None). If ``source_dir`` is an S3 URI, it must point to a tar.gz file. Structure within this directory are preserved when training on Amazon SageMaker. If 'git_config' is provided, 'source_dir' should be a relative location to a directory in the Git @@ -2787,13 +2788,13 @@ def __init__( and you need 'train.py' as entry point and 'test.py' as training source code as well, you can assign entry_point='train.py', source_dir='src'. - hyperparameters (dict): Hyperparameters that will be used for - training (default: None). The hyperparameters are made + hyperparameters (dict[str, str] or dict[str, PipelineVariable]): Hyperparameters + that will be used for training (default: None). The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. For convenience, this accepts other types for keys and values, but ``str()`` will be called to convert them before training. - container_log_level (int): Log level to use within the container + container_log_level (int or PipelineVariable): Log level to use within the container (default: logging.INFO). Valid values are defined in the Python logging module. code_location (str): The S3 prefix URI where custom code will be @@ -2801,7 +2802,7 @@ def __init__( a string prepended with a "/" is appended to ``code_location``. The code file uploaded to S3 is 'code_location/job-name/source/sourcedir.tar.gz'. If not specified, the default ``code location`` is s3://output_bucket/job-name/. - image_uri (str): An alternate image name to use instead of the + image_uri (str or PipelineVariable): An alternate image name to use instead of the official Sagemaker image for the framework. This is useful to run one of the Sagemaker supported frameworks with an image containing custom dependencies. @@ -2830,7 +2831,7 @@ def __init__( >>> |------ virtual-env This is not supported with "local code" in Local Mode. - enable_network_isolation (bool): Specifies whether container will + enable_network_isolation (bool or PipelineVariable): Specifies whether container will run in network isolation mode. Network isolation mode restricts the container access to outside networks (such as the internet). The container does not make any inbound or outbound network @@ -2887,10 +2888,10 @@ def __init__( authentication if they are provided; otherwise, python SDK will try to use either CodeCommit credential helper or local credential storage for authentication. - checkpoint_s3_uri (str): The S3 URI in which to persist checkpoints + checkpoint_s3_uri (str or PipelineVariable): The S3 URI in which to persist checkpoints that the algorithm persists (if any) during training. (default: None). - checkpoint_local_path (str): The local path that the algorithm + checkpoint_local_path (str or PipelineVariable): The local path that the algorithm writes its checkpoints to. SageMaker will persist all files under this path to `checkpoint_s3_uri` continually during training. On job startup the reverse happens - data from the @@ -2898,7 +2899,7 @@ def __init__( started. If the path is unset then SageMaker assumes the checkpoints will be provided under `/opt/ml/checkpoints/`. (default: None). - enable_sagemaker_metrics (bool): enable SageMaker Metrics Time + enable_sagemaker_metrics (bool or PipelineVariable): enable SageMaker Metrics Time Series. For more information see: https://docs.aws.amazon.com/sagemaker/latest/dg/API_AlgorithmSpecification.html#SageMaker-Type-AlgorithmSpecification-EnableSageMakerMetricsTimeSeries (default: None). diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 5ea45e76dc..85669f2c66 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -165,12 +165,12 @@ def validate_source_code_input_against_pipeline_variables( """Validate source code input against pipeline variables Args: - entry_point (str, PipelineVariable): The path to the local Python source file that + entry_point (str or PipelineVariable): The path to the local Python source file that should be executed as the entry point to training (default: None). - source_dir (str, PipelineVariable): The Path to a directory with any other + source_dir (str or PipelineVariable): The Path to a directory with any other training source code dependencies aside from the entry point file (default: None). git_config (Dict[str, str]): Git configurations used for cloning files (default: None). - enable_network_isolation (bool, PipelineVariable): Specifies whether container will run + enable_network_isolation (bool or PipelineVariable): Specifies whether container will run in network isolation mode (default: False). """ if is_pipeline_variable(enable_network_isolation) or enable_network_isolation is True: diff --git a/src/sagemaker/huggingface/estimator.py b/src/sagemaker/huggingface/estimator.py index ad756da3b0..5b53059509 100644 --- a/src/sagemaker/huggingface/estimator.py +++ b/src/sagemaker/huggingface/estimator.py @@ -67,7 +67,7 @@ def __init__( code. Defaults to ``None``. Required unless ``image_uri`` is provided. If using PyTorch, the current supported version is ``py36``. If using TensorFlow, the current supported version is ``py37``. - entry_point (str): Path (absolute or relative) to the Python source + entry_point (str or PipelineVariable): Path (absolute or relative) to the Python source file which should be executed as the entry point to training. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. @@ -80,18 +80,18 @@ def __init__( pytorch_version (str): PyTorch version you want to use for executing your model training code. Defaults to ``None``. Required unless ``tensorflow_version`` is provided. The current supported versions are ``1.7.1`` and ``1.6.0``. - source_dir (str): Path (absolute, relative or an S3 URI) to a directory - with any other training source code dependencies aside from the entry + source_dir (str or PipelineVariable): Path (absolute, relative or an S3 URI) to a + directory with any other training source code dependencies aside from the entry point file (default: None). If ``source_dir`` is an S3 URI, it must point to a tar.gz file. Structure within this directory are preserved when training on Amazon SageMaker. - hyperparameters (dict): Hyperparameters that will be used for - training (default: None). The hyperparameters are made + hyperparameters (dict[str, str] or dict[str, PipelineVariable]): Hyperparameters + that will be used for training (default: None). The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. For convenience, this accepts other types for keys and values, but ``str()`` will be called to convert them before training. - image_uri (str): If specified, the estimator will use this image + image_uri (str or PipelineVariable): If specified, the estimator will use this image for training and hosting, instead of selecting the appropriate SageMaker official image based on framework_version and py_version. It can be an ECR url or dockerhub image and tag. diff --git a/src/sagemaker/huggingface/model.py b/src/sagemaker/huggingface/model.py index 6f810dc5e2..a66c7e2389 100644 --- a/src/sagemaker/huggingface/model.py +++ b/src/sagemaker/huggingface/model.py @@ -121,8 +121,8 @@ def __init__( """Initialize a HuggingFaceModel. Args: - model_data (str): The Amazon S3 location of a SageMaker model data - ``.tar.gz`` file. + model_data (str or PipelineVariable): The Amazon S3 location of a SageMaker + model data ``.tar.gz`` file. role (str): An AWS IAM role specified with either the name or full ARN. The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model @@ -147,16 +147,16 @@ def __init__( py_version (str): Python version you want to use for executing your model training code. Defaults to ``None``. Required unless ``image_uri`` is provided. - image_uri (str): A Docker image URI. Defaults to None. If not specified, a - default image for PyTorch will be used. If ``framework_version`` + image_uri (str or PipelineVariable): A Docker image URI. Defaults to None. + If not specified, a default image for PyTorch will be used. If ``framework_version`` or ``py_version`` are ``None``, then ``image_uri`` is required. If also ``None``, then a ``ValueError`` will be raised. predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of invoking this function on the created endpoint name. - model_server_workers (int): Optional. The number of worker processes - used by the inference server. If None, server will use one + model_server_workers (int or PipelineVariable): Optional. The number of + worker processes used by the inference server. If None, server will use one worker per vCPU. **kwargs: Keyword arguments passed to the superclass :class:`~sagemaker.model.FrameworkModel` and, subsequently, its @@ -330,45 +330,50 @@ def register( """Creates a model package for creating SageMaker models or listing on Marketplace. Args: - content_types (list): The supported MIME types for the input data. - response_types (list): The supported MIME types for the output data. - inference_instances (list): A list of the instance types that are used to - generate inferences in real-time (default: None). - transform_instances (list): A list of the instance types on which a transformation - job can be run or on which an endpoint can be deployed (default: None). - model_package_name (str): Model Package name, exclusive to `model_package_group_name`, - using `model_package_name` makes the Model Package un-versioned. - Defaults to ``None``. - model_package_group_name (str): Model Package Group name, exclusive to - `model_package_name`, using `model_package_group_name` makes the Model Package - versioned. Defaults to ``None``. - image_uri (str): Inference image URI for the container. Model class' self.image will - be used if it is None. Defaults to ``None``. + content_types (list[str] or list[PipelineVariable]): The supported MIME types + for the input data. + response_types (list[str] or list[PipelineVariable]): The supported MIME types + for the output data. + inference_instances (list[str] or list[PipelineVariable]): A list of the instance + types that are used to generate inferences in real-time (default: None). + transform_instances (list[str] or list[PipelineVariable]): A list of the instance types + on which a transformation job can be run or on which an endpoint can be deployed + (default: None). + model_package_name (str or PipelineVariable): Model Package name, exclusive to + `model_package_group_name`, using `model_package_name` makes the Model Package + un-versioned. Defaults to ``None``. + model_package_group_name (str or PipelineVariable): Model Package Group name, + exclusive to `model_package_name`, using `model_package_group_name` makes the + Model Package versioned. Defaults to ``None``. + image_uri (str or PipelineVariable): Inference image URI for the container. Model class' + self.image will be used if it is None. Defaults to ``None``. model_metrics (ModelMetrics): ModelMetrics object. Defaults to ``None``. metadata_properties (MetadataProperties): MetadataProperties object. Defaults to ``None``. marketplace_cert (bool): A boolean value indicating if the Model Package is certified for AWS Marketplace. Defaults to ``False``. - approval_status (str): Model Approval Status, values can be "Approved", "Rejected", - or "PendingManualApproval". Defaults to ``PendingManualApproval``. + approval_status (str or PipelineVariable): Model Approval Status, values can be + "Approved", "Rejected", or "PendingManualApproval". Defaults to + ``PendingManualApproval``. description (str): Model Package description. Defaults to ``None``. drift_check_baselines (DriftCheckBaselines): DriftCheckBaselines object (default: None). - customer_metadata_properties (dict[str, str]): A dictionary of key-value paired - metadata properties (default: None). - domain (str): Domain values can be "COMPUTER_VISION", "NATURAL_LANGUAGE_PROCESSING", - "MACHINE_LEARNING" (default: None). - sample_payload_url (str): The S3 path where the sample payload is stored - (default: None). - task (str): Task values which are supported by Inference Recommender are "FILL_MASK", - "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", "IMAGE_SEGMENTATION", - "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). - framework (str): Machine learning framework of the model package container image - (default: None). - framework_version (str): Framework version of the Model Package Container Image + customer_metadata_properties (dict[str, str] or dict[str, PipelineVariable]): + A dictionary of key-value paired metadata properties (default: None). + domain (str or PipelineVariable): Domain values can be "COMPUTER_VISION", + "NATURAL_LANGUAGE_PROCESSING", "MACHINE_LEARNING" (default: None). + sample_payload_url (str or PipelineVariable): The S3 path where the sample payload + is stored (default: None). + task (str or PipelineVariable): Task values which are supported by Inference Recommender + are "FILL_MASK", "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", + "IMAGE_SEGMENTATION", "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). + framework (str or PipelineVariable): Machine learning framework of the model package + container image (default: None). + framework_version (str or PipelineVariable): Framework version of the Model Package + Container Image (default: None). + nearest_model_name (str or PipelineVariable): Name of a pre-trained machine learning + benchmarked by Amazon SageMaker Inference Recommender (default: None). + data_input_configuration (str or PipelineVariable): Input object for the model (default: None). - nearest_model_name (str): Name of a pre-trained machine learning benchmarked by - Amazon SageMaker Inference Recommender (default: None). - data_input_configuration (str): Input object for the model (default: None). Returns: A `sagemaker.model.ModelPackage` instance. diff --git a/src/sagemaker/huggingface/training_compiler/config.py b/src/sagemaker/huggingface/training_compiler/config.py index 9f0288115a..135ea6edc1 100644 --- a/src/sagemaker/huggingface/training_compiler/config.py +++ b/src/sagemaker/huggingface/training_compiler/config.py @@ -53,10 +53,10 @@ def __init__( estimator. Args: - enabled (bool): Optional. Switch to enable SageMaker Training Compiler. - The default is ``True``. - debug (bool): Optional. Whether to dump detailed logs for debugging. - This comes with a potential performance slowdown. + enabled (bool or PipelineVariable): Optional. Switch to enable SageMaker + Training Compiler. The default is ``True``. + debug (bool or PipelineVariable): Optional. Whether to dump detailed logs + for debugging. This comes with a potential performance slowdown. The default is ``False``. **Example**: The following code shows the basic usage of the diff --git a/src/sagemaker/inputs.py b/src/sagemaker/inputs.py index 0fca307a97..59c7166792 100644 --- a/src/sagemaker/inputs.py +++ b/src/sagemaker/inputs.py @@ -50,17 +50,17 @@ def __init__( on the parameters. Args: - s3_data (str): Defines the location of S3 data to train on. - distribution (str): Valid values: ``'FullyReplicated'``, - ``'ShardedByS3Key'`` - (default: ``'FullyReplicated'``). - compression (str): Valid values: ``'Gzip'``, ``None`` (default: None). - This is used only in - Pipe input mode. - content_type (str): MIME type of the input data (default: None). - record_wrapping (str): Valid values: 'RecordIO' (default: None). - s3_data_type (str): Valid values: ``'S3Prefix'``, ``'ManifestFile'``, - ``'AugmentedManifestFile'``. + s3_data (str or PipelineVariable): Defines the location of S3 data to train on. + distribution (str or PipelineVariable): Valid values: ``'FullyReplicated'``, + ``'ShardedByS3Key'`` (default: ``'FullyReplicated'``). + compression (str or PipelineVariable): Valid values: ``'Gzip'``, ``None`` + (default: None). This is used only in Pipe input mode. + content_type (str or PipelineVariable): MIME type of the input data + (default: None). + record_wrapping (str or PipelineVariable): Valid values: 'RecordIO' + (default: None). + s3_data_type (str or PipelineVariable): Valid values: ``'S3Prefix'``, + ``'ManifestFile'``, ``'AugmentedManifestFile'``. If ``'S3Prefix'``, ``s3_data`` defines a prefix of s3 objects to train on. All objects with s3 keys beginning with ``s3_data`` will be used to train. If ``'ManifestFile'`` or ``'AugmentedManifestFile'``, @@ -70,9 +70,9 @@ def __init__( AugmentedManifestFile formats are described at `S3DataSource `_ in the `Amazon SageMaker API reference`. - instance_groups (list[str]): Optional. A list of instance group names in string format - that you specified while configuring a heterogeneous cluster using the - :class:`sagemaker.instance_group.InstanceGroup`. + instance_groups (list[str] or list[PipelineVariable]): Optional. A list of + instance group names in string format that you specified while configuring + a heterogeneous cluster using the :class:`sagemaker.instance_group.InstanceGroup`. S3 data will be sent to all instance groups in the specified list. For instructions on how to use InstanceGroup objects to configure a heterogeneous cluster @@ -81,8 +81,8 @@ def __init__( `_ in the *Amazon SageMaker developer guide*. (default: None) - input_mode (str): Optional override for this channel's input mode (default: None). - By default, channels will use the input mode defined on + input_mode (str or PipelineVariable): Optional override for this channel's input mode + (default: None). By default, channels will use the input mode defined on ``sagemaker.estimator.EstimatorBase.input_mode``, but they will ignore that setting if this parameter is set. @@ -94,10 +94,11 @@ def __init__( * 'FastFile' - Amazon SageMaker streams data from S3 on demand instead of downloading the entire dataset before training begins. - attribute_names (list[str]): A list of one or more attribute names to use that are - found in a specified AugmentedManifestFile. - target_attribute_name (str): The name of the attribute will be predicted (classified) - in a SageMaker AutoML job. It is required if the input is for SageMaker AutoML job. + attribute_names (list[str] or list[PipelineVariable]): A list of one or more attribute + names to use that are found in a specified AugmentedManifestFile. + target_attribute_name (str or PipelineVariable): The name of the attribute will be + predicted (classified) in a SageMaker AutoML job. It is required if the input is + for SageMaker AutoML job. shuffle_config (sagemaker.inputs.ShuffleConfig): If specified this configuration enables shuffling on this channel. See the SageMaker API documentation for more info: https://docs.aws.amazon.com/sagemaker/latest/dg/API_ShuffleConfig.html diff --git a/src/sagemaker/metadata_properties.py b/src/sagemaker/metadata_properties.py index b25aff9168..25cb6b4169 100644 --- a/src/sagemaker/metadata_properties.py +++ b/src/sagemaker/metadata_properties.py @@ -32,10 +32,10 @@ def __init__( # TODO: flesh out docstrings Args: - commit_id (str): - repository (str): - generated_by (str): - project_id (str): + commit_id (str or PipelineVariable): + repository (str or PipelineVariable): + generated_by (str or PipelineVariable): + project_id (str or PipelineVariable): """ self.commit_id = commit_id self.repository = repository diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index f81b591809..4c4a4229d3 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -109,9 +109,9 @@ def __init__( """Initialize an SageMaker ``Model``. Args: - image_uri (str): A Docker image URI. - model_data (str): The S3 location of a SageMaker model data - ``.tar.gz`` file (default: None). + image_uri (str or PipelineVariable): A Docker image URI. + model_data (str or PipelineVariable): The S3 location of a SageMaker + model data ``.tar.gz`` file (default: None). role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model @@ -124,28 +124,28 @@ def __init__( function to call to create a predictor (default: None). If not None, ``deploy`` will return the result of invoking this function on the created endpoint name. - env (dict[str, str]): Environment variables to run with ``image_uri`` - when hosted in SageMaker (default: None). + env (dict[str, str] or dict[str, PipelineVariable]): Environment variables + to run with ``image_uri`` when hosted in SageMaker (default: None). name (str): The model name. If None, a default model name will be selected on each ``deploy``. - vpc_config (dict[str, list[str]]): The VpcConfig set on the model - (default: None) + vpc_config (dict[str, list[str]] or dict[str, list[PipelineVariable]]): + The VpcConfig set on the model (default: None) * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker interactions (default: None). If not specified, one is created using the default AWS configuration chain. - enable_network_isolation (Boolean): Default False. if True, enables - network isolation in the endpoint, isolating the model + enable_network_isolation (Boolean or PipelineVariable): Default False. + if True, enables network isolation in the endpoint, isolating the model container. No inbound or outbound network calls can be made to or from the model container. model_kms_key (str): KMS key ARN used to encrypt the repacked model archive file if the model is repacked - image_config (dict[str, str]): Specifies whether the image of - model container is pulled from ECR, or private registry in your - VPC. By default it is set to pull model container image from - ECR. (default: None). + image_config (dict[str, str] or dict[str, PipelineVariable]): Specifies + whether the image of model container is pulled from ECR, or private + registry in your VPC. By default it is set to pull model container + image from ECR. (default: None). source_dir (str): The absolute, relative, or S3 URI Path to a directory with any other training source code dependencies aside from the entry point file (default: None). If ``source_dir`` is an S3 URI, it must @@ -184,8 +184,8 @@ def __init__( >>> |----- test.py You can assign entry_point='src/inference.py'. - container_log_level (int): Log level to use within the container - (default: logging.INFO). Valid values are defined in the Python + container_log_level (int or PipelineVariable): Log level to use within the + container (default: logging.INFO). Valid values are defined in the Python logging module. dependencies (list[str]): A list of absolute or relative paths to directories with any additional libraries that should be exported @@ -330,43 +330,49 @@ def register( """Creates a model package for creating SageMaker models or listing on Marketplace. Args: - content_types (list): The supported MIME types for the input data. - response_types (list): The supported MIME types for the output data. - inference_instances (list): A list of the instance types that are used to - generate inferences in real-time (default: None). - transform_instances (list): A list of the instance types on which a transformation - job can be run or on which an endpoint can be deployed (default: None). - model_package_name (str): Model Package name, exclusive to `model_package_group_name`, - using `model_package_name` makes the Model Package un-versioned (default: None). - model_package_group_name (str): Model Package Group name, exclusive to - `model_package_name`, using `model_package_group_name` makes the Model Package - versioned (default: None). - image_uri (str): Inference image uri for the container. Model class' self.image will - be used if it is None (default: None). + content_types (list[str] or list[PipelineVariable]): The supported MIME types + for the input data. + response_types (list[str] or list[PipelineVariable]): The supported MIME types + for the output data. + inference_instances (list[str] or list[PipelineVariable]): A list of the instance + types that are used to generate inferences in real-time (default: None). + transform_instances (list[str] or list[PipelineVariable]): A list of the instance + types on which a transformation job can be run or on which an endpoint can be + deployed (default: None). + model_package_name (str or PipelineVariable): Model Package name, exclusive to + `model_package_group_name`, using `model_package_name` makes the Model Package + un-versioned (default: None). + model_package_group_name (str or PipelineVariable): Model Package Group name, + exclusive to `model_package_name`, using `model_package_group_name` makes + the Model Package versioned (default: None). + image_uri (str or PipelineVariable): Inference image uri for the container. + Model class' self.image will be used if it is None (default: None). model_metrics (ModelMetrics): ModelMetrics object (default: None). metadata_properties (MetadataProperties): MetadataProperties object (default: None). marketplace_cert (bool): A boolean value indicating if the Model Package is certified for AWS Marketplace (default: False). - approval_status (str): Model Approval Status, values can be "Approved", "Rejected", - or "PendingManualApproval" (default: "PendingManualApproval"). + approval_status (str or PipelineVariable): Model Approval Status, values can be + "Approved", "Rejected", or "PendingManualApproval" + (default: "PendingManualApproval"). description (str): Model Package description (default: None). drift_check_baselines (DriftCheckBaselines): DriftCheckBaselines object (default: None). - customer_metadata_properties (dict[str, str]): A dictionary of key-value paired - metadata properties (default: None). - domain (str): Domain values can be "COMPUTER_VISION", "NATURAL_LANGUAGE_PROCESSING", - "MACHINE_LEARNING" (default: None). - task (str): Task values which are supported by Inference Recommender are "FILL_MASK", - "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", "IMAGE_SEGMENTATION", - "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). - sample_payload_url (str): The S3 path where the sample payload is stored - (default: None). - framework (str): Machine learning framework of the model package container image + customer_metadata_properties (dict[str, str] or dict[str, PipelineVariable]): + A dictionary of key-value paired metadata properties (default: None). + domain (str or PipelineVariable): Domain values can be "COMPUTER_VISION", + "NATURAL_LANGUAGE_PROCESSING", "MACHINE_LEARNING" (default: None). + task (str or PipelineVariable): Task values which are supported by Inference Recommender + are "FILL_MASK", "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", + "IMAGE_SEGMENTATION", "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). + sample_payload_url (str or PipelineVariable): The S3 path where the sample + payload is stored (default: None). + framework (str or PipelineVariable): Machine learning framework of the model package + container image (default: None). + framework_version (str or PipelineVariable): Framework version of the Model Package + Container Image (default: None). + nearest_model_name (str or PipelineVariable): Name of a pre-trained machine learning + benchmarked by Amazon SageMaker Inference Recommender (default: None). + data_input_configuration (str or PipelineVariable): Input object for the model (default: None). - framework_version (str): Framework version of the Model Package Container Image - (default: None). - nearest_model_name (str): Name of a pre-trained machine learning benchmarked by - Amazon SageMaker Inference Recommender (default: None). - data_input_configuration (str): Input object for the model (default: None). Returns: A `sagemaker.model.ModelPackage` instance or pipeline step arguments @@ -446,8 +452,8 @@ def create( Specifies configuration related to serverless endpoint. Instance type is not provided in serverless inference. So this is used to find image URIs (default: None). - tags (List[Dict[str, str]]): The list of tags to add to - the model (default: None). Example:: + tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): The list of + tags to add to the model (default: None). Example:: tags = [{'Key': 'tagname', 'Value':'tagvalue'}] @@ -1310,9 +1316,9 @@ def __init__( """Initialize a ``FrameworkModel``. Args: - model_data (str): The S3 location of a SageMaker model data - ``.tar.gz`` file. - image_uri (str): A Docker image URI. + model_data (str or PipelineVariable): The S3 location of a SageMaker + model data ``.tar.gz`` file. + image_uri (str or PipelineVariable): A Docker image URI. role (str): An IAM role name or ARN for SageMaker to access AWS resources on your behalf. entry_point (str): Path (absolute or relative) to the Python source @@ -1354,13 +1360,13 @@ def __init__( function to call to create a predictor (default: None). If not None, ``deploy`` will return the result of invoking this function on the created endpoint name. - env (dict[str, str]): Environment variables to run with ``image_uri`` - when hosted in SageMaker (default: None). + env (dict[str, str] or dict[str, PipelineVariable]): Environment variables to + run with ``image_uri`` when hosted in SageMaker (default: None). name (str): The model name. If None, a default model name will be selected on each ``deploy``. - container_log_level (int): Log level to use within the container - (default: logging.INFO). Valid values are defined in the Python - logging module. + container_log_level (int or PipelineVariable): Log level to use within + the container (default: logging.INFO). Valid values are defined + in the Python logging module. code_location (str): Name of the S3 bucket where custom code is uploaded (default: None). If not specified, default bucket created by ``sagemaker.session.Session`` is used. diff --git a/src/sagemaker/model_metrics.py b/src/sagemaker/model_metrics.py index 83a43d3f18..297f8ce7f7 100644 --- a/src/sagemaker/model_metrics.py +++ b/src/sagemaker/model_metrics.py @@ -110,9 +110,11 @@ def __init__( """Initialize a ``MetricsSource`` instance and turn parameters into dict. Args: - content_type (str): Specifies the type of content in S3 URI - s3_uri (str): The S3 URI of the metric - content_digest (str): The digest of the metric (default: None) + content_type (str or PipelineVariable): Specifies the type of content + in S3 URI + s3_uri (str or PipelineVariable): The S3 URI of the metric + content_digest (str or PipelineVariable): The digest of the metric + (default: None) """ self.content_type = content_type self.s3_uri = s3_uri @@ -138,9 +140,11 @@ def __init__( """Initialize a ``FileSource`` instance and turn parameters into dict. Args: - s3_uri (str): The S3 URI of the metric - content_digest (str): The digest of the metric (default: None) - content_type (str): Specifies the type of content in S3 URI (default: None) + s3_uri (str or PipelineVariable): The S3 URI of the metric + content_digest (str or PipelineVariable): The digest of the metric + (default: None) + content_type (str or PipelineVariable): Specifies the type of content + in S3 URI (default: None) """ self.content_type = content_type self.s3_uri = s3_uri diff --git a/src/sagemaker/multidatamodel.py b/src/sagemaker/multidatamodel.py index d90a5ca76f..2cb6674ffd 100644 --- a/src/sagemaker/multidatamodel.py +++ b/src/sagemaker/multidatamodel.py @@ -58,8 +58,8 @@ def __init__( If this is present, the attributes from this model are used when deploying the ``MultiDataModel``. Parameters 'image_uri', 'role' and 'kwargs' are not permitted when model parameter is set. - image_uri (str): A Docker image URI. It can be null if the 'model' parameter - is passed to during ``MultiDataModel`` initialization (default: None) + image_uri (str or PipelineVariable): A Docker image URI. It can be null if the 'model' + parameter is passed to during ``MultiDataModel`` initialization (default: None) role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index 3f0c054929..cce41bd889 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -70,8 +70,8 @@ def __init__( home-page: https://github.com/aws/sagemaker-python-sdk Args: - entry_point (str): Path (absolute or relative) to the Python source - file which should be executed as the entry point to training. + entry_point (str or PipelineVariable): Path (absolute or relative) to the + Python source file which should be executed as the entry point to training. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. framework_version (str): MXNet version you want to use for executing @@ -81,20 +81,21 @@ def __init__( py_version (str): Python version you want to use for executing your model training code. One of 'py2' or 'py3'. Defaults to ``None``. Required unless ``image_uri`` is provided. - source_dir (str): Path (absolute, relative or an S3 URI) to a directory - with any other training source code dependencies aside from the entry + source_dir (str or PipelineVariable): Path (absolute, relative or an S3 URI) to + a directory with any other training source code dependencies aside from the entry point file (default: None). If ``source_dir`` is an S3 URI, it must point to a tar.gz file. Structure within this directory are preserved when training on Amazon SageMaker. - hyperparameters (dict): Hyperparameters that will be used for - training (default: None). The hyperparameters are made + hyperparameters (dict[str, str] or dict[str, PipelineVariable]): Hyperparameters + that will be used for training (default: None). The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. For convenience, this accepts other types for keys and values, but ``str()`` will be called to convert them before training. - image_uri (str): If specified, the estimator will use this image for training and - hosting, instead of selecting the appropriate SageMaker official image based on - framework_version and py_version. It can be an ECR url or dockerhub image and tag. + image_uri (str or PipelineVariable): If specified, the estimator will use this image + for training and hosting, instead of selecting the appropriate SageMaker official + image based on framework_version and py_version. It can be an ECR url or dockerhub + image and tag. Examples: * ``123412341234.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0`` diff --git a/src/sagemaker/mxnet/model.py b/src/sagemaker/mxnet/model.py index f2e18c009e..32f6a096f5 100644 --- a/src/sagemaker/mxnet/model.py +++ b/src/sagemaker/mxnet/model.py @@ -96,7 +96,7 @@ def __init__( """Initialize an MXNetModel. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker @@ -113,8 +113,8 @@ def __init__( py_version (str): Python version you want to use for executing your model training code. Defaults to ``None``. Required unless ``image_uri`` is provided. - image_uri (str): A Docker image URI (default: None). If not specified, - a default image for MXNet will be used. + image_uri (str or PipelineVariable): A Docker image URI (default: None). + If not specified, a default image for MXNet will be used. If ``framework_version`` or ``py_version`` are ``None``, then ``image_uri`` is required. If ``image_uri`` is also ``None``, then a ``ValueError`` will be raised. @@ -122,7 +122,7 @@ def __init__( to call to create a predictor with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of invoking this function on the created endpoint name. - model_server_workers (int): Optional. The number of worker processes + model_server_workers (int or PipelineVariable): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. **kwargs: Keyword arguments passed to the superclass @@ -175,43 +175,49 @@ def register( """Creates a model package for creating SageMaker models or listing on Marketplace. Args: - content_types (list): The supported MIME types for the input data. - response_types (list): The supported MIME types for the output data. - inference_instances (list): A list of the instance types that are used to - generate inferences in real-time (default: None). - transform_instances (list): A list of the instance types on which a transformation - job can be run or on which an endpoint can be deployed (default: None). - model_package_name (str): Model Package name, exclusive to `model_package_group_name`, - using `model_package_name` makes the Model Package un-versioned (default: None). - model_package_group_name (str): Model Package Group name, exclusive to - `model_package_name`, using `model_package_group_name` makes the Model Package + content_types (list[str] or list[PipelineVariable]): The supported MIME types for + the input data. + response_types (list[str] or list[PipelineVariable]): The supported MIME types for + the output data. + inference_instances (list[str] or list[PipelineVariable]): A list of the instance types + that are used to generate inferences in real-time (default: None). + transform_instances (list[str] or list[PipelineVariable]): A list of the instance types + on which a transformation job can be run or on which an endpoint can be deployed + (default: None). + model_package_name (str or PipelineVariable): Model Package name, exclusive to + `model_package_group_name`, using `model_package_name` makes the Model Package + un-versioned (default: None). + model_package_group_name (str or PipelineVariable): Model Package Group name, exclusive + to `model_package_name`, using `model_package_group_name` makes the Model Package versioned (default: None). - image_uri (str): Inference image uri for the container. Model class' self.image will - be used if it is None (default: None). + image_uri (str or PipelineVariable): Inference image uri for the container. Model class' + self.image will be used if it is None (default: None). model_metrics (ModelMetrics): ModelMetrics object (default: None). metadata_properties (MetadataProperties): MetadataProperties (default: None). marketplace_cert (bool): A boolean value indicating if the Model Package is certified for AWS Marketplace (default: False). - approval_status (str): Model Approval Status, values can be "Approved", "Rejected", - or "PendingManualApproval" (default: "PendingManualApproval"). + approval_status (str or PipelineVariable): Model Approval Status, values can be + "Approved", "Rejected", or "PendingManualApproval" + (default: "PendingManualApproval"). description (str): Model Package description (default: None). drift_check_baselines (DriftCheckBaselines): DriftCheckBaselines object (default: None). - customer_metadata_properties (dict[str, str]): A dictionary of key-value paired - metadata properties (default: None). - domain (str): Domain values can be "COMPUTER_VISION", "NATURAL_LANGUAGE_PROCESSING", - "MACHINE_LEARNING" (default: None). - sample_payload_url (str): The S3 path where the sample payload is stored - (default: None). - task (str): Task values which are supported by Inference Recommender are "FILL_MASK", - "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", "IMAGE_SEGMENTATION", - "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). - framework (str): Machine learning framework of the model package container image - (default: None). - framework_version (str): Framework version of the Model Package Container Image + customer_metadata_properties (dict[str, str] or dict[str, PipelineVariable]): + A dictionary of key-value paired metadata properties (default: None). + domain (str or PipelineVariable): Domain values can be "COMPUTER_VISION", + "NATURAL_LANGUAGE_PROCESSING", "MACHINE_LEARNING" (default: None). + sample_payload_url (str or PipelineVariable): The S3 path where the sample payload + is stored (default: None). + task (str or PipelineVariable): Task values which are supported by Inference Recommender + are "FILL_MASK", "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", + "IMAGE_SEGMENTATION", "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). + framework (str or PipelineVariable): Machine learning framework of the model package + container image (default: None). + framework_version (str or PipelineVariable): Framework version of the Model Package + Container Image (default: None). + nearest_model_name (str or PipelineVariable): Name of a pre-trained machine learning + benchmarked by Amazon SageMaker Inference Recommender (default: None). + data_input_configuration (str or PipelineVariable): Input object for the model (default: None). - nearest_model_name (str): Name of a pre-trained machine learning benchmarked by - Amazon SageMaker Inference Recommender (default: None). - data_input_configuration (str): Input object for the model (default: None). Returns: A `sagemaker.model.ModelPackage` instance. diff --git a/src/sagemaker/network.py b/src/sagemaker/network.py index b3bf72a95a..cc874091f6 100644 --- a/src/sagemaker/network.py +++ b/src/sagemaker/network.py @@ -40,12 +40,13 @@ def __init__( these parameters into a dictionary. Args: - enable_network_isolation (bool): Boolean that determines whether to enable - network isolation. - security_group_ids ([str]): A list of strings representing security group IDs. - subnets ([str]): A list of strings representing subnets. - encrypt_inter_container_traffic (bool): Boolean that determines whether to - encrypt inter-container traffic. Default value is None. + enable_network_isolation (bool or PipelineVariable): Boolean that determines + whether to enable network isolation. + security_group_ids (list[str] or list[PipelineVariable]): A list of strings representing + security group IDs. + subnets (list[str] or list[PipelineVariable]): A list of strings representing subnets. + encrypt_inter_container_traffic (bool or PipelineVariable): Boolean that determines + whether to encrypt inter-container traffic. Default value is None. """ self.enable_network_isolation = enable_network_isolation self.security_group_ids = security_group_ids diff --git a/src/sagemaker/parameter.py b/src/sagemaker/parameter.py index b44e6f9ef2..e8634e99c5 100644 --- a/src/sagemaker/parameter.py +++ b/src/sagemaker/parameter.py @@ -38,9 +38,9 @@ def __init__( """Initialize a parameter range. Args: - min_value (float or int): The minimum value for the range. - max_value (float or int): The maximum value for the range. - scaling_type (str): The scale used for searching the range during + min_value (float or int or PipelineVariable): The minimum value for the range. + max_value (float or int or PipelineVariable): The maximum value for the range. + scaling_type (str or PipelineVariable): The scale used for searching the range during tuning (default: 'Auto'). Valid values: 'Auto', 'Linear', 'Logarithmic' and 'ReverseLogarithmic'. """ diff --git a/src/sagemaker/pipeline.py b/src/sagemaker/pipeline.py index f7c1bded9a..5a293d0aec 100644 --- a/src/sagemaker/pipeline.py +++ b/src/sagemaker/pipeline.py @@ -66,16 +66,16 @@ def __init__( function on the created endpoint name. name (str): The model name. If None, a default model name will be selected on each ``deploy``. - vpc_config (dict[str, list[str]]): The VpcConfig set on the model - (default: None) + vpc_config (dict[str, list[str]] or dict[str, list[PipelineVariable]]): + The VpcConfig set on the model (default: None) * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker interactions (default: None). If not specified, one is created using the default AWS configuration chain. - enable_network_isolation (bool): Default False. if True, enables - network isolation in the endpoint, isolating the model + enable_network_isolation (bool or PipelineVariable): Default False. if True, + enables network isolation in the endpoint, isolating the model container. No inbound or outbound network calls can be made to or from the model container.Boolean """ @@ -293,43 +293,49 @@ def register( """Creates a model package for creating SageMaker models or listing on Marketplace. Args: - content_types (list): The supported MIME types for the input data. - response_types (list): The supported MIME types for the output data. - inference_instances (list): A list of the instance types that are used to - generate inferences in real-time (default: None). - transform_instances (list): A list of the instance types on which a transformation - job can be run or on which an endpoint can be deployed (default: None). - model_package_name (str): Model Package name, exclusive to `model_package_group_name`, - using `model_package_name` makes the Model Package un-versioned (default: None). - model_package_group_name (str): Model Package Group name, exclusive to - `model_package_name`, using `model_package_group_name` makes the Model Package - versioned (default: None). - image_uri (str): Inference image uri for the container. Model class' self.image will - be used if it is None (default: None). + content_types (list[str] or list[PipelineVariable]): The supported MIME types + for the input data. + response_types (list[str] or list[PipelineVariable]): The supported MIME types + for the output data. + inference_instances (list[str] or list[PipelineVariable]): A list of the instance + types that are used to generate inferences in real-time (default: None). + transform_instances (list[str] or list[PipelineVariable]): A list of the instance types + on which a transformation job can be run or on which an endpoint can be deployed + (default: None). + model_package_name (str or PipelineVariable): Model Package name, exclusive to + `model_package_group_name`, using `model_package_name` makes the Model Package + un-versioned (default: None). + model_package_group_name (str or PipelineVariable): Model Package Group name, + exclusive to `model_package_name`, using `model_package_group_name` makes + the Model Package versioned (default: None). + image_uri (str or PipelineVariable): Inference image uri for the container. + Model class' self.image will be used if it is None (default: None). model_metrics (ModelMetrics): ModelMetrics object (default: None). metadata_properties (MetadataProperties): MetadataProperties object (default: None). marketplace_cert (bool): A boolean value indicating if the Model Package is certified for AWS Marketplace (default: False). - approval_status (str): Model Approval Status, values can be "Approved", "Rejected", - or "PendingManualApproval" (default: "PendingManualApproval"). + approval_status (str or PipelineVariable): Model Approval Status, values can + be "Approved", "Rejected", or "PendingManualApproval" + (default: "PendingManualApproval"). description (str): Model Package description (default: None). drift_check_baselines (DriftCheckBaselines): DriftCheckBaselines object (default: None). - customer_metadata_properties (dict[str, str]): A dictionary of key-value paired - metadata properties (default: None). - domain (str): Domain values can be "COMPUTER_VISION", "NATURAL_LANGUAGE_PROCESSING", - "MACHINE_LEARNING" (default: None). - sample_payload_url (str): The S3 path where the sample payload is stored - (default: None). - task (str): Task values which are supported by Inference Recommender are "FILL_MASK", - "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", "IMAGE_SEGMENTATION", - "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). - framework (str): Machine learning framework of the model package container image - (default: None). - framework_version (str): Framework version of the Model Package Container Image + customer_metadata_properties (dict[str, str] or dict[str, PipelineVariable]): + A dictionary of key-value paired metadata properties (default: None). + domain (str or PipelineVariable): Domain values can be "COMPUTER_VISION", + "NATURAL_LANGUAGE_PROCESSING", "MACHINE_LEARNING" (default: None). + sample_payload_url (str or PipelineVariable): The S3 path where the sample payload + is stored (default: None). + task (str or PipelineVariable): Task values which are supported by Inference Recommender + are "FILL_MASK", "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", + "IMAGE_SEGMENTATION", "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). + framework (str or PipelineVariable): Machine learning framework of the model package + container image (default: None). + framework_version (str or PipelineVariable): Framework version of the Model Package + Container Image (default: None). + nearest_model_name (str or PipelineVariable): Name of a pre-trained machine learning + benchmarked by Amazon SageMaker Inference Recommender (default: None). + data_input_configuration (str or PipelineVariable): Input object for the model (default: None). - nearest_model_name (str): Name of a pre-trained machine learning benchmarked by - Amazon SageMaker Inference Recommender (default: None). - data_input_configuration (str): Input object for the model (default: None). Returns: A `sagemaker.model.ModelPackage` instance. diff --git a/src/sagemaker/processing.py b/src/sagemaker/processing.py index 9a1d8bd431..60df2abb8e 100644 --- a/src/sagemaker/processing.py +++ b/src/sagemaker/processing.py @@ -74,20 +74,22 @@ def __init__( role (str): An AWS IAM role name or ARN. Amazon SageMaker Processing uses this role to access AWS resources, such as data stored in Amazon S3. - image_uri (str): The URI of the Docker image to use for the + image_uri (str or PipelineVariable): The URI of the Docker image to use for the processing jobs. - instance_count (int): The number of instances to run + instance_count (int or PipelineVariable): The number of instances to run a processing job with. - instance_type (str): The type of EC2 instance to use for + instance_type (str or PipelineVariable): The type of EC2 instance to use for processing, for example, 'ml.c4.xlarge'. - entrypoint (list[str]): The entrypoint for the processing job (default: None). - This is in the form of a list of strings that make a command. - volume_size_in_gb (int): Size in GB of the EBS volume + entrypoint (list[str] or list[PipelineVariable]): The entrypoint for the + processing job (default: None). This is in the form of a list of strings + that make a command. + volume_size_in_gb (int or PipelineVariable): Size in GB of the EBS volume to use for storing data during processing (default: 30). - volume_kms_key (str): A KMS key for the processing + volume_kms_key (str or PipelineVariable): A KMS key for the processing volume (default: None). - output_kms_key (str): The KMS key ID for processing job outputs (default: None). - max_runtime_in_seconds (int): Timeout in seconds (default: None). + output_kms_key (str or PipelineVariable): The KMS key ID for processing job + outputs (default: None). + max_runtime_in_seconds (int or PipelineVariable): Timeout in seconds (default: None). After this amount of time, Amazon SageMaker terminates the job, regardless of its current status. If `max_runtime_in_seconds` is not specified, the default value is 24 hours. @@ -98,10 +100,10 @@ def __init__( Session object which manages interactions with Amazon SageMaker and any other AWS services needed. If not specified, the processor creates one using the default AWS configuration chain. - env (dict[str, str]): Environment variables to be passed to - the processing jobs (default: None). - tags (list[dict]): List of tags to be passed to the processing job - (default: None). For more, see + env (dict[str, str] or dict[str, PipelineVariable]): Environment variables + to be passed to the processing jobs (default: None). + tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags + to be passed to the processing job (default: None). For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. network_config (:class:`~sagemaker.network.NetworkConfig`): A :class:`~sagemaker.network.NetworkConfig` @@ -155,8 +157,8 @@ def run( outputs (list[:class:`~sagemaker.processing.ProcessingOutput`]): Outputs for the processing job. These can be specified as either path strings or :class:`~sagemaker.processing.ProcessingOutput` objects (default: None). - arguments (list[str]): A list of string arguments to be passed to a - processing job (default: None). + arguments (list[str] or list[PipelineVariable]): A list of string arguments + to be passed to a processing job (default: None). wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. Only meaningful when ``wait`` is True (default: True). @@ -424,20 +426,21 @@ def __init__( role (str): An AWS IAM role name or ARN. Amazon SageMaker Processing uses this role to access AWS resources, such as data stored in Amazon S3. - image_uri (str): The URI of the Docker image to use for the + image_uri (str or PipelineVariable): The URI of the Docker image to use for the processing jobs. command ([str]): The command to run, along with any command-line flags. Example: ["python3", "-v"]. - instance_count (int): The number of instances to run + instance_count (int or PipelineVariable): The number of instances to run a processing job with. - instance_type (str): The type of EC2 instance to use for + instance_type (str or PipelineVariable): The type of EC2 instance to use for processing, for example, 'ml.c4.xlarge'. - volume_size_in_gb (int): Size in GB of the EBS volume + volume_size_in_gb (int or PipelineVariable): Size in GB of the EBS volume to use for storing data during processing (default: 30). - volume_kms_key (str): A KMS key for the processing + volume_kms_key (str or PipelineVariable): A KMS key for the processing volume (default: None). - output_kms_key (str): The KMS key ID for processing job outputs (default: None). - max_runtime_in_seconds (int): Timeout in seconds (default: None). + output_kms_key (str or PipelineVariable): The KMS key ID for processing + job outputs (default: None). + max_runtime_in_seconds (int or PipelineVariable): Timeout in seconds (default: None). After this amount of time, Amazon SageMaker terminates the job, regardless of its current status. If `max_runtime_in_seconds` is not specified, the default value is 24 hours. @@ -448,10 +451,10 @@ def __init__( Session object which manages interactions with Amazon SageMaker and any other AWS services needed. If not specified, the processor creates one using the default AWS configuration chain. - env (dict[str, str]): Environment variables to be passed to - the processing jobs (default: None). - tags (list[dict]): List of tags to be passed to the processing job - (default: None). For more, see + env (dict[str, str] or dict[str, PipelineVariable])): Environment variables to + be passed to the processing jobs (default: None). + tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags to + be passed to the processing job (default: None). For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. network_config (:class:`~sagemaker.network.NetworkConfig`): A :class:`~sagemaker.network.NetworkConfig` @@ -1108,22 +1111,23 @@ def __init__( for a processing job and provides a method to turn those parameters into a dictionary. Args: - source (str): The source for the input. If a local path is provided, it will - automatically be uploaded to S3 under: + source (str or PipelineVariable): The source for the input. If a local path + is provided, it will automatically be uploaded to S3 under: "s3:////input/". - destination (str): The destination of the input. - input_name (str): The name for the input. If a name + destination (str or PipelineVariable): The destination of the input. + input_name (str or PipelineVariable): The name for the input. If a name is not provided, one will be generated (eg. "input-1"). - s3_data_type (str): Valid options are "ManifestFile" or "S3Prefix". - s3_input_mode (str): Valid options are "Pipe" or "File". - s3_data_distribution_type (str): Valid options are "FullyReplicated" + s3_data_type (str or PipelineVariable): Valid options are "ManifestFile" or "S3Prefix". + s3_input_mode (str or PipelineVariable): Valid options are "Pipe" or "File". + s3_data_distribution_type (str or PipelineVariable): Valid options are "FullyReplicated" or "ShardedByS3Key". - s3_compression_type (str): Valid options are "None" or "Gzip". + s3_compression_type (str or PipelineVariable): Valid options are "None" or "Gzip". s3_input (:class:`~sagemaker.dataset_definition.inputs.S3Input`) Metadata of data objects stored in S3 dataset_definition (:class:`~sagemaker.dataset_definition.inputs.DatasetDefinition`) DatasetDefinition input - app_managed (bool): Whether the input are managed by SageMaker or application + app_managed (bool or PipelineVariable): Whether the input are managed by SageMaker + or application """ self.source = source self.destination = destination @@ -1211,16 +1215,18 @@ def __init__( processing job and provides a method to turn those parameters into a dictionary. Args: - source (str): The source for the output. - destination (str): The destination of the output. If a destination + source (str or PipelineVariable): The source for the output. + destination (str or PipelineVariable): The destination of the output. If a destination is not provided, one will be generated: "s3:////output/" (Note: this does not apply when used with :class:`~sagemaker.workflow.steps.ProcessingStep`). - output_name (str): The name of the output. If a name + output_name (str or PipelineVariable): The name of the output. If a name is not provided, one will be generated (eg. "output-1"). - s3_upload_mode (str): Valid options are "EndOfJob" or "Continuous". - app_managed (bool): Whether the input are managed by SageMaker or application + s3_upload_mode (str or PipelineVariable): Valid options are "EndOfJob" + or "Continuous". + app_managed (bool or PipelineVariable): Whether the input are managed by SageMaker + or application feature_store_output (:class:`~sagemaker.processing.FeatureStoreOutput`) Configuration for processing job outputs of FeatureStore. """ @@ -1328,26 +1334,29 @@ def __init__( ``image_uri`` is provided. role (str): An AWS IAM role name or ARN. Amazon SageMaker Processing uses this role to access AWS resources, such as data stored in Amazon S3. - instance_count (int): The number of instances to run a processing job with. - instance_type (str): The type of EC2 instance to use for processing, for - example, 'ml.c4.xlarge'. + instance_count (int or PipelineVariable): The number of instances to run a + processing job with. + instance_type (str or PipelineVariable): The type of EC2 instance to use for + processing, for example, 'ml.c4.xlarge'. py_version (str): Python version you want to use for executing your model training code. One of 'py2' or 'py3'. Defaults to 'py3'. Value is ignored when ``image_uri`` is provided. - image_uri (str): The URI of the Docker image to use for the + image_uri (str or PipelineVariable): The URI of the Docker image to use for the processing jobs (default: None). command ([str]): The command to run, along with any command-line flags to *precede* the ```code script```. Example: ["python3", "-v"]. If not provided, ["python"] will be chosen (default: None). - volume_size_in_gb (int): Size in GB of the EBS volume + volume_size_in_gb (int or PipelineVariable): Size in GB of the EBS volume to use for storing data during processing (default: 30). - volume_kms_key (str): A KMS key for the processing volume (default: None). - output_kms_key (str): The KMS key ID for processing job outputs (default: None). + volume_kms_key (str or PipelineVariable): A KMS key for the processing volume + (default: None). + output_kms_key (str or PipelineVariable): The KMS key ID for processing job outputs + (default: None). code_location (str): The S3 prefix URI where custom code will be uploaded (default: None). The code file uploaded to S3 is 'code_location/job-name/source/sourcedir.tar.gz'. If not specified, the default ``code location`` is 's3://{sagemaker-default-bucket}' - max_runtime_in_seconds (int): Timeout in seconds (default: None). + max_runtime_in_seconds (int or PipelineVariable): Timeout in seconds (default: None). After this amount of time, Amazon SageMaker terminates the job, regardless of its current status. If `max_runtime_in_seconds` is not specified, the default value is 24 hours. @@ -1358,10 +1367,10 @@ def __init__( Session object which manages interactions with Amazon SageMaker and any other AWS services needed. If not specified, the processor creates one using the default AWS configuration chain (default: None). - env (dict[str, str]): Environment variables to be passed to - the processing jobs (default: None). - tags (list[dict]): List of tags to be passed to the processing job - (default: None). For more, see + env (dict[str, str] or dict[str, PipelineVariable]): Environment variables to + be passed to the processing jobs (default: None). + tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags to + be passed to the processing job (default: None). For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. network_config (:class:`~sagemaker.network.NetworkConfig`): A :class:`~sagemaker.network.NetworkConfig` @@ -1594,8 +1603,8 @@ def run( # type: ignore[override] outputs (list[:class:`~sagemaker.processing.ProcessingOutput`]): Outputs for the processing job. These can be specified as either path strings or :class:`~sagemaker.processing.ProcessingOutput` objects (default: None). - arguments (list[str]): A list of string arguments to be passed to a - processing job (default: None). + arguments (list[str] or list[PipelineVariable]): A list of string arguments + to be passed to a processing job (default: None). wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. Only meaningful when wait is True (default: True). diff --git a/src/sagemaker/pytorch/estimator.py b/src/sagemaker/pytorch/estimator.py index 153d4656d4..cc2c80b688 100644 --- a/src/sagemaker/pytorch/estimator.py +++ b/src/sagemaker/pytorch/estimator.py @@ -70,8 +70,8 @@ def __init__( home-page: https://github.com/aws/sagemaker-python-sdk Args: - entry_point (str): Path (absolute or relative) to the Python source - file which should be executed as the entry point to training. + entry_point (str or PipelineVariable): Path (absolute or relative) to the + Python source file which should be executed as the entry point to training. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. framework_version (str): PyTorch version you want to use for @@ -81,8 +81,8 @@ def __init__( py_version (str): Python version you want to use for executing your model training code. One of 'py2' or 'py3'. Defaults to ``None``. Required unless ``image_uri`` is provided. - source_dir (str): Path (absolute, relative or an S3 URI) to a directory - with any other training source code dependencies aside from the entry + source_dir (str or PipelineVariable): Path (absolute, relative or an S3 URI) to + a directory with any other training source code dependencies aside from the entry point file (default: None). If ``source_dir`` is an S3 URI, it must point to a tar.gz file. Structure within this directory are preserved when training on Amazon SageMaker. diff --git a/src/sagemaker/pytorch/model.py b/src/sagemaker/pytorch/model.py index a16fc4d5e2..73ecdd7ad7 100644 --- a/src/sagemaker/pytorch/model.py +++ b/src/sagemaker/pytorch/model.py @@ -97,7 +97,7 @@ def __init__( """Initialize a PyTorchModel. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker @@ -114,8 +114,8 @@ def __init__( py_version (str): Python version you want to use for executing your model training code. Defaults to ``None``. Required unless ``image_uri`` is provided. - image_uri (str): A Docker image URI (default: None). If not specified, - a default image for PyTorch will be used. + image_uri (str or PipelineVariable): A Docker image URI (default: None). + If not specified, a default image for PyTorch will be used. If ``framework_version`` or ``py_version`` are ``None``, then ``image_uri`` is required. If ``image_uri`` is also ``None``, then a ``ValueError`` will be raised. @@ -123,7 +123,7 @@ def __init__( to call to create a predictor with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of invoking this function on the created endpoint name. - model_server_workers (int): Optional. The number of worker processes + model_server_workers (int or PipelineVariable): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. **kwargs: Keyword arguments passed to the superclass @@ -177,43 +177,49 @@ def register( """Creates a model package for creating SageMaker models or listing on Marketplace. Args: - content_types (list): The supported MIME types for the input data. - response_types (list): The supported MIME types for the output data. - inference_instances (list): A list of the instance types that are used to - generate inferences in real-time (default: None). - transform_instances (list): A list of the instance types on which a transformation - job can be run or on which an endpoint can be deployed (default: None). - model_package_name (str): Model Package name, exclusive to `model_package_group_name`, - using `model_package_name` makes the Model Package un-versioned (default: None). - model_package_group_name (str): Model Package Group name, exclusive to - `model_package_name`, using `model_package_group_name` makes the Model Package + content_types (list[str] or list[PipelineVariable]): The supported MIME types + for the input data. + response_types (list[str] or list[PipelineVariable]): The supported MIME types + for the output data. + inference_instances (list[str] or list[PipelineVariable]): A list of the + instance types that are used to generate inferences in real-time (default: None). + transform_instances (list[str] or list[PipelineVariable]): A list of the + instance types on which a transformation job can be run or on which an + endpoint can be deployed (default: None). + model_package_name (str or PipelineVariable): Model Package name, exclusive to + `model_package_group_name`, using `model_package_name` makes the Model Package + un-versioned (default: None). + model_package_group_name (str or PipelineVariable): Model Package Group name, exclusive + to `model_package_name`, using `model_package_group_name` makes the Model Package versioned (default: None). - image_uri (str): Inference image uri for the container. Model class' self.image will - be used if it is None (default: None). + image_uri (str or PipelineVariable): Inference image uri for the container. + Model class' self.image will be used if it is None (default: None). model_metrics (ModelMetrics): ModelMetrics object (default: None). metadata_properties (MetadataProperties): MetadataProperties object (default: None). marketplace_cert (bool): A boolean value indicating if the Model Package is certified for AWS Marketplace (default: False). - approval_status (str): Model Approval Status, values can be "Approved", "Rejected", - or "PendingManualApproval" (default: "PendingManualApproval"). + approval_status (str or PipelineVariable): Model Approval Status, values can be + "Approved", "Rejected", or "PendingManualApproval" + (default: "PendingManualApproval"). description (str): Model Package description (default: None). drift_check_baselines (DriftCheckBaselines): DriftCheckBaselines object (default: None). - customer_metadata_properties (dict[str, str]): A dictionary of key-value paired - metadata properties (default: None). - domain (str): Domain values can be "COMPUTER_VISION", "NATURAL_LANGUAGE_PROCESSING", - "MACHINE_LEARNING" (default: None). - sample_payload_url (str): The S3 path where the sample payload is stored + customer_metadata_properties (dict[str, str] or dict[str, PipelineVariable]): + A dictionary of key-value paired metadata properties (default: None). + domain (str or PipelineVariable): Domain values can be "COMPUTER_VISION", + "NATURAL_LANGUAGE_PROCESSING", "MACHINE_LEARNING" (default: None). + sample_payload_url (str or PipelineVariable): The S3 path where the sample payload + is stored (default: None). + task (str or PipelineVariable): Task values which are supported by Inference Recommender + are "FILL_MASK", "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", + "IMAGE_SEGMENTATION", "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). + framework (str or PipelineVariable): Machine learning framework of the model package + container image (default: None). + framework_version (str or PipelineVariable): Framework version of the Model Package + Container Image (default: None). + nearest_model_name (str or PipelineVariable): Name of a pre-trained machine learning + benchmarked by Amazon SageMaker Inference Recommender (default: None). + data_input_configuration (str or PipelineVariable): Input object for the model (default: None). - task (str): Task values which are supported by Inference Recommender are "FILL_MASK", - "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", "IMAGE_SEGMENTATION", - "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). - framework (str): Machine learning framework of the model package container image - (default: None). - framework_version (str): Framework version of the Model Package Container Image - (default: None). - nearest_model_name (str): Name of a pre-trained machine learning benchmarked by - Amazon SageMaker Inference Recommender (default: None). - data_input_configuration (str): Input object for the model (default: None). Returns: A `sagemaker.model.ModelPackage` instance. diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index b004dd87b8..b07897584f 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -106,7 +106,7 @@ def __init__( homepage: https://github.com/aws/sagemaker-python-sdk Args: - entry_point (str): Path (absolute or relative) to the Python source + entry_point (str or PipelineVariable): Path (absolute or relative) to the Python source file which should be executed as the entry point to training. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. @@ -117,9 +117,9 @@ def __init__( framework (sagemaker.rl.RLFramework): Framework (MXNet or TensorFlow) you want to be used as a toolkit backed for reinforcement learning training. - source_dir (str): Path (absolute, relative or an S3 URI) to a directory - with any other training source code dependencies aside from the entry - point file (default: None). If ``source_dir`` is an S3 URI, it must + source_dir (str or PipelineVariable): Path (absolute, relative or an S3 URI) + to a directory with any other training source code dependencies aside from + the entry point file (default: None). If ``source_dir`` is an S3 URI, it must point to a tar.gz file. Structure within this directory are preserved when training on Amazon SageMaker. hyperparameters (dict): Hyperparameters that will be used for diff --git a/src/sagemaker/sklearn/estimator.py b/src/sagemaker/sklearn/estimator.py index e13fbb764c..787056f250 100644 --- a/src/sagemaker/sklearn/estimator.py +++ b/src/sagemaker/sklearn/estimator.py @@ -67,7 +67,7 @@ def __init__( the project home-page: https://github.com/aws/sagemaker-python-sdk Args: - entry_point (str): Path (absolute or relative) to the Python source + entry_point (str or PipelineVariable): Path (absolute or relative) to the Python source file which should be executed as the entry point to training. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. @@ -79,8 +79,8 @@ def __init__( model training code (default: 'py3'). Currently, 'py3' is the only supported version. If ``None`` is passed in, ``image_uri`` must be provided. - source_dir (str): Path (absolute, relative or an S3 URI) to a directory - with any other training source code dependencies aside from the entry + source_dir (str or PipelineVariable): Path (absolute, relative or an S3 URI) to + a directory with any other training source code dependencies aside from the entry point file (default: None). If ``source_dir`` is an S3 URI, it must point to a tar.gz file. Structure within this directory are preserved when training on Amazon SageMaker. diff --git a/src/sagemaker/sklearn/model.py b/src/sagemaker/sklearn/model.py index 5bb469991a..1a5129dd15 100644 --- a/src/sagemaker/sklearn/model.py +++ b/src/sagemaker/sklearn/model.py @@ -90,7 +90,7 @@ def __init__( """Initialize an SKLearnModel. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker @@ -108,8 +108,8 @@ def __init__( model training code (default: 'py3'). Currently, 'py3' is the only supported version. If ``None`` is passed in, ``image_uri`` must be provided. - image_uri (str): A Docker image URI (default: None). If not specified, - a default image for Scikit-learn will be used. + image_uri (str or PipelineVariable): A Docker image URI (default: None). + If not specified, a default image for Scikit-learn will be used. If ``framework_version`` or ``py_version`` are ``None``, then ``image_uri`` is required. If ``image_uri`` is also ``None``, then a ``ValueError`` will be raised. @@ -117,7 +117,7 @@ def __init__( to call to create a predictor with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of invoking this function on the created endpoint name. - model_server_workers (int): Optional. The number of worker processes + model_server_workers (int or PipelineVariable): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. **kwargs: Keyword arguments passed to the ``FrameworkModel`` @@ -170,43 +170,49 @@ def register( """Creates a model package for creating SageMaker models or listing on Marketplace. Args: - content_types (list): The supported MIME types for the input data. - response_types (list): The supported MIME types for the output data. - inference_instances (list): A list of the instance types that are used to - generate inferences in real-time (default: None). - transform_instances (list): A list of the instance types on which a transformation - job can be run or on which an endpoint can be deployed (default: None). - model_package_name (str): Model Package name, exclusive to `model_package_group_name`, - using `model_package_name` makes the Model Package un-versioned (default: None). - model_package_group_name (str): Model Package Group name, exclusive to - `model_package_name`, using `model_package_group_name` makes the Model Package - versioned (default: None). - image_uri (str): Inference image uri for the container. Model class' self.image will - be used if it is None (default: None). + content_types (list[str] or list[PipelineVariable]): The supported MIME types + for the input data. + response_types (list[str] or list[PipelineVariable]): The supported MIME types + for the output data. + inference_instances (list[str] or list[PipelineVariable]): A list of the instance + types that are used to generate inferences in real-time (default: None). + transform_instances (list[str] or list[PipelineVariable]): A list of the instance types + on which a transformation job can be run or on which an endpoint can be deployed + (default: None). + model_package_name (str or PipelineVariable): Model Package name, exclusive to + `model_package_group_name`, using `model_package_name` makes the Model Package + un-versioned (default: None). + model_package_group_name (str or PipelineVariable): Model Package Group name, + exclusive to `model_package_name`, using `model_package_group_name` makes the + Model Package versioned (default: None). + image_uri (str or PipelineVariable): Inference image uri for the container. Model class' + self.image will be used if it is None (default: None). model_metrics (ModelMetrics): ModelMetrics object (default: None). metadata_properties (MetadataProperties): MetadataProperties object (default: None). marketplace_cert (bool): A boolean value indicating if the Model Package is certified for AWS Marketplace (default: False). - approval_status (str): Model Approval Status, values can be "Approved", "Rejected", - or "PendingManualApproval" (default: "PendingManualApproval"). + approval_status (str or PipelineVariable): Model Approval Status, values can be + "Approved", "Rejected", or "PendingManualApproval" + (default: "PendingManualApproval"). description (str): Model Package description (default: None). drift_check_baselines (DriftCheckBaselines): DriftCheckBaselines object (default: None). - customer_metadata_properties (dict[str, str]): A dictionary of key-value paired - metadata properties (default: None). - domain (str): Domain values can be "COMPUTER_VISION", "NATURAL_LANGUAGE_PROCESSING", - "MACHINE_LEARNING" (default: None). - sample_payload_url (str): The S3 path where the sample payload is stored - (default: None). - task (str): Task values which are supported by Inference Recommender are "FILL_MASK", - "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", "IMAGE_SEGMENTATION", - "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). - framework (str): Machine learning framework of the model package container image - (default: None). - framework_version (str): Framework version of the Model Package Container Image + customer_metadata_properties (dict[str, str] or dict[str, PipelineVariable]): + A dictionary of key-value paired metadata properties (default: None). + domain (str or PipelineVariable): Domain values can be "COMPUTER_VISION", + "NATURAL_LANGUAGE_PROCESSING", "MACHINE_LEARNING" (default: None). + sample_payload_url (str or PipelineVariable): The S3 path where the sample payload + is stored (default: None). + task (str or PipelineVariable): Task values which are supported by Inference Recommender + are "FILL_MASK", "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", + "IMAGE_SEGMENTATION", "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). + framework (str or PipelineVariable): Machine learning framework of the model package + container image (default: None). + framework_version (str or PipelineVariable): Framework version of the Model Package + Container Image (default: None). + nearest_model_name (str or PipelineVariable): Name of a pre-trained machine learning + benchmarked by Amazon SageMaker Inference Recommender (default: None). + data_input_configuration (str or PipelineVariable): Input object for the model (default: None). - nearest_model_name (str): Name of a pre-trained machine learning benchmarked by - Amazon SageMaker Inference Recommender (default: None). - data_input_configuration (str): Input object for the model (default: None). Returns: A `sagemaker.model.ModelPackage` instance. diff --git a/src/sagemaker/spark/processing.py b/src/sagemaker/spark/processing.py index 90f6a3d8ae..6f27df98db 100644 --- a/src/sagemaker/spark/processing.py +++ b/src/sagemaker/spark/processing.py @@ -710,16 +710,16 @@ def __init__( to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. - instance_type (str): Type of EC2 instance to use for + instance_type (str or PipelineVariable): Type of EC2 instance to use for processing, for example, 'ml.c4.xlarge'. - instance_count (int): The number of instances to run + instance_count (int or PipelineVariable): The number of instances to run the Processing job with. Defaults to 1. - volume_size_in_gb (int): Size in GB of the EBS volume to + volume_size_in_gb (int or PipelineVariable): Size in GB of the EBS volume to use for storing data during processing (default: 30). - volume_kms_key (str): A KMS key for the processing + volume_kms_key (str or PipelineVariable): A KMS key for the processing volume. - output_kms_key (str): The KMS key id for all ProcessingOutputs. - max_runtime_in_seconds (int): Timeout in seconds. + output_kms_key (str or PipelineVariable): The KMS key id for all ProcessingOutputs. + max_runtime_in_seconds (int or PipelineVariable): Timeout in seconds. After this amount of time Amazon SageMaker terminates the job regardless of its current status. base_job_name (str): Prefix for processing name. If not specified, @@ -729,8 +729,10 @@ def __init__( manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the processor creates one using the default AWS configuration chain. - env (dict): Environment variables to be passed to the processing job. - tags ([dict]): List of tags to be passed to the processing job. + env (dict[str, str] or dict[str, PipelineVariable]): Environment variables to + be passed to the processing job. + tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags to + be passed to the processing job. network_config (sagemaker.network.NetworkConfig): A NetworkConfig object that configures network isolation, encryption of inter-container traffic, security group IDs, and subnets. @@ -844,20 +846,20 @@ def run( Args: submit_app (str): Path (local or S3) to Python file to submit to Spark as the primary application - submit_py_files (list[str]): List of paths (local or S3) to provide for - `spark-submit --py-files` option - submit_jars (list[str]): List of paths (local or S3) to provide for - `spark-submit --jars` option - submit_files (list[str]): List of paths (local or S3) to provide for - `spark-submit --files` option + submit_py_files (list[str] or list[PipelineVariable]): List of paths (local or S3) + to provide for `spark-submit --py-files` option + submit_jars (list[str] or list[PipelineVariable]): List of paths (local or S3) + to provide for `spark-submit --jars` option + submit_files (list[str] or list[PipelineVariable]): List of paths (local or S3) + to provide for `spark-submit --files` option inputs (list[:class:`~sagemaker.processing.ProcessingInput`]): Input files for the processing job. These must be provided as :class:`~sagemaker.processing.ProcessingInput` objects (default: None). outputs (list[:class:`~sagemaker.processing.ProcessingOutput`]): Outputs for the processing job. These can be specified as either path strings or :class:`~sagemaker.processing.ProcessingOutput` objects (default: None). - arguments (list[str]): A list of string arguments to be passed to a - processing job (default: None). + arguments (list[str] or list[PipelineVariable]): A list of string arguments to + be passed to a processing job (default: None). wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. Only meaningful when wait is True (default: True). @@ -877,8 +879,8 @@ def run( configuration (list[dict] or dict): Configuration for Hadoop, Spark, or Hive. List or dictionary of EMR-style classifications. https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html - spark_event_logs_s3_uri (str): S3 path where spark application events will - be published to. + spark_event_logs_s3_uri (str or PipelineVariable): S3 path where spark application + events will be published to. kms_key (str): The ARN of the KMS key that is used to encrypt the user code file (default: None). """ @@ -967,16 +969,16 @@ def __init__( to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. - instance_type (str): Type of EC2 instance to use for + instance_type (str or PipelineVariable): Type of EC2 instance to use for processing, for example, 'ml.c4.xlarge'. - instance_count (int): The number of instances to run + instance_count (int or PipelineVariable): The number of instances to run the Processing job with. Defaults to 1. - volume_size_in_gb (int): Size in GB of the EBS volume to + volume_size_in_gb (int or PipelineVariable): Size in GB of the EBS volume to use for storing data during processing (default: 30). - volume_kms_key (str): A KMS key for the processing + volume_kms_key (str or PipelineVariable): A KMS key for the processing volume. - output_kms_key (str): The KMS key id for all ProcessingOutputs. - max_runtime_in_seconds (int): Timeout in seconds. + output_kms_key (str or PipelineVariable): The KMS key id for all ProcessingOutputs. + max_runtime_in_seconds (int or PipelineVariable): Timeout in seconds. After this amount of time Amazon SageMaker terminates the job regardless of its current status. base_job_name (str): Prefix for processing name. If not specified, @@ -986,8 +988,10 @@ def __init__( manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the processor creates one using the default AWS configuration chain. - env (dict): Environment variables to be passed to the processing job. - tags ([dict]): List of tags to be passed to the processing job. + env (dict[str, str] or dict[str, PipelineVariable]): Environment variables to + be passed to the processing job. + tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags to + be passed to the processing job. network_config (sagemaker.network.NetworkConfig): A NetworkConfig object that configures network isolation, encryption of inter-container traffic, security group IDs, and subnets. @@ -1101,20 +1105,20 @@ def run( Args: submit_app (str): Path (local or S3) to Jar file to submit to Spark as the primary application - submit_class (str): Java class reference to submit to Spark as the primary - application - submit_jars (list[str]): List of paths (local or S3) to provide for - `spark-submit --jars` option - submit_files (list[str]): List of paths (local or S3) to provide for - `spark-submit --files` option + submit_class (str or PipelineVariable): Java class reference to submit to Spark + as the primary application + submit_jars (list[str] or list[PipelineVariable]): List of paths (local or S3) + to provide for `spark-submit --jars` option + submit_files (list[str] or list[PipelineVariable]): List of paths (local or S3) + to provide for `spark-submit --files` option inputs (list[:class:`~sagemaker.processing.ProcessingInput`]): Input files for the processing job. These must be provided as :class:`~sagemaker.processing.ProcessingInput` objects (default: None). outputs (list[:class:`~sagemaker.processing.ProcessingOutput`]): Outputs for the processing job. These can be specified as either path strings or :class:`~sagemaker.processing.ProcessingOutput` objects (default: None). - arguments (list[str]): A list of string arguments to be passed to a - processing job (default: None). + arguments (list[str] or list[PipelineVariable]): A list of string arguments to + be passed to a processing job (default: None). wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. Only meaningful when wait is True (default: True). @@ -1134,8 +1138,8 @@ def run( configuration (list[dict] or dict): Configuration for Hadoop, Spark, or Hive. List or dictionary of EMR-style classifications. https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html - spark_event_logs_s3_uri (str): S3 path where spark application events will - be published to. + spark_event_logs_s3_uri (str or PipelineVariable): S3 path where spark application + events will be published to. kms_key (str): The ARN of the KMS key that is used to encrypt the user code file (default: None). """ diff --git a/src/sagemaker/sparkml/model.py b/src/sagemaker/sparkml/model.py index 527cae0957..2fdea591d6 100644 --- a/src/sagemaker/sparkml/model.py +++ b/src/sagemaker/sparkml/model.py @@ -85,7 +85,7 @@ def __init__( """Initialize a SparkMLModel. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. For SparkML, this will be the output that has been produced by the Spark job after serializing the Model via MLeap. diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index 9533f475a1..a2507e2bc2 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -60,10 +60,10 @@ def __init__( training code. Defaults to ``None``. Required unless ``image_uri`` is provided. List of supported versions: https://github.com/aws/sagemaker-python-sdk#tensorflow-sagemaker-estimators. - model_dir (str): S3 location where the checkpoint data and models can be exported to - during training (default: None). It will be passed in the training script as one of - the command line arguments. If not specified, one is provided based on - your training configuration: + model_dir (str or PipelineVariable): S3 location where the checkpoint data and models + can be exported to during training (default: None). It will be passed in the + training script as one of the command line arguments. If not specified, + one is provided based on your training configuration: * *distributed training with SMDistributed or MPI with Horovod* - ``/opt/ml/model`` * *single-machine training or distributed training without MPI* - \ @@ -73,9 +73,10 @@ def __init__( To disable having ``model_dir`` passed to your training script, set ``model_dir=False``. - image_uri (str): If specified, the estimator will use this image for training and - hosting, instead of selecting the appropriate SageMaker official image based on - framework_version and py_version. It can be an ECR url or dockerhub image and tag. + image_uri (str or PipelineVariable): If specified, the estimator will use this image + for training and hosting, instead of selecting the appropriate SageMaker official + image based on framework_version and py_version. + It can be an ECR url or dockerhub image and tag. Examples: 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py index 82885995b7..b59ce360db 100644 --- a/src/sagemaker/tensorflow/model.py +++ b/src/sagemaker/tensorflow/model.py @@ -142,7 +142,7 @@ def __init__( """Initialize a Model. Args: - model_data (str): The S3 location of a SageMaker model data + model_data (str or PipelineVariable): The S3 location of a SageMaker model data ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker @@ -153,8 +153,8 @@ def __init__( file which should be executed as the entry point to model hosting. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. - image_uri (str): A Docker image URI (default: None). If not specified, - a default image for TensorFlow Serving will be used. + image_uri (str or PipelineVariable): A Docker image URI (default: None). + If not specified, a default image for TensorFlow Serving will be used. If ``framework_version`` is ``None``, then ``image_uri`` is required. If ``image_uri`` is also ``None``, then a ``ValueError`` will be raised. @@ -222,43 +222,49 @@ def register( """Creates a model package for creating SageMaker models or listing on Marketplace. Args: - content_types (list): The supported MIME types for the input data. - response_types (list): The supported MIME types for the output data. - inference_instances (list): A list of the instance types that are used to - generate inferences in real-time (default: None). - transform_instances (list): A list of the instance types on which a transformation - job can be run or on which an endpoint can be deployed (default: None). - model_package_name (str): Model Package name, exclusive to `model_package_group_name`, - using `model_package_name` makes the Model Package un-versioned (default: None). - model_package_group_name (str): Model Package Group name, exclusive to - `model_package_name`, using `model_package_group_name` makes the Model Package - versioned (default: None). - image_uri (str): Inference image uri for the container. Model class' self.image will - be used if it is None (default: None). + content_types (list[str] or list[PipelineVariable]): The supported MIME types + for the input data. + response_types (list[str] or list[PipelineVariable]): The supported MIME types + for the output data. + inference_instances (list[str] or list[PipelineVariable]): A list of the instance + types that are used to generate inferences in real-time (default: None). + transform_instances (list[str] or list[PipelineVariable]): A list of the instance + types on which a transformation job can be run or on which an endpoint can + be deployed (default: None). + model_package_name (str or PipelineVariable): Model Package name, exclusive to + `model_package_group_name`, using `model_package_name` makes the Model Package + un-versioned (default: None). + model_package_group_name (str or PipelineVariable): Model Package Group name, + exclusive to `model_package_name`, using `model_package_group_name` makes the + Model Package versioned (default: None). + image_uri (str or PipelineVariable): Inference image uri for the container. Model class' + self.image will be used if it is None (default: None). model_metrics (ModelMetrics): ModelMetrics object (default: None). metadata_properties (MetadataProperties): MetadataProperties object (default: None). marketplace_cert (bool): A boolean value indicating if the Model Package is certified for AWS Marketplace (default: False). - approval_status (str): Model Approval Status, values can be "Approved", "Rejected", - or "PendingManualApproval" (default: "PendingManualApproval"). + approval_status (str or PipelineVariable): Model Approval Status, values can be + "Approved", "Rejected", or "PendingManualApproval" + (default: "PendingManualApproval"). description (str): Model Package description (default: None). drift_check_baselines (DriftCheckBaselines): DriftCheckBaselines object (default: None). - customer_metadata_properties (dict[str, str]): A dictionary of key-value paired - metadata properties (default: None). - domain (str): Domain values can be "COMPUTER_VISION", "NATURAL_LANGUAGE_PROCESSING", - "MACHINE_LEARNING" (default: None). - sample_payload_url (str): The S3 path where the sample payload is stored + customer_metadata_properties (dict[str, str] or dict[str, PipelineVariable]): + A dictionary of key-value paired metadata properties (default: None). + domain (str or PipelineVariable): Domain values can be "COMPUTER_VISION", + "NATURAL_LANGUAGE_PROCESSING", "MACHINE_LEARNING" (default: None). + sample_payload_url (str or PipelineVariable): The S3 path where the sample payload + is stored (default: None). + task (str or PipelineVariable): Task values which are supported by Inference Recommender + are "FILL_MASK", "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", + "IMAGE_SEGMENTATION", "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). + framework (str or PipelineVariable): Machine learning framework of the model package + container image (default: None). + framework_version (str or PipelineVariable): Framework version of the Model Package + Container Image (default: None). + nearest_model_name (str or PipelineVariable): Name of a pre-trained machine learning + benchmarked by Amazon SageMaker Inference Recommender (default: None). + data_input_configuration (str or PipelineVariable): Input object for the model (default: None). - task (str): Task values which are supported by Inference Recommender are "FILL_MASK", - "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", "IMAGE_SEGMENTATION", - "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). - framework (str): Machine learning framework of the model package container image - (default: None). - framework_version (str): Framework version of the Model Package Container Image - (default: None). - nearest_model_name (str): Name of a pre-trained machine learning benchmarked by - Amazon SageMaker Inference Recommender (default: None). - data_input_configuration (str): Input object for the model (default: None). Returns: A `sagemaker.model.ModelPackage` instance. diff --git a/src/sagemaker/transformer.py b/src/sagemaker/transformer.py index 6df56ad154..6b1f51f9d4 100644 --- a/src/sagemaker/transformer.py +++ b/src/sagemaker/transformer.py @@ -51,32 +51,32 @@ def __init__( """Initialize a ``Transformer``. Args: - model_name (str): Name of the SageMaker model being used for the - transform job. - instance_count (int): Number of EC2 instances to use. - instance_type (str): Type of EC2 instance to use, for example, + model_name (str or PipelineVariable): Name of the SageMaker model being + used for the transform job. + instance_count (int or PipelineVariable): Number of EC2 instances to use. + instance_type (str or PipelineVariable): Type of EC2 instance to use, for example, 'ml.c4.xlarge'. - strategy (str): The strategy used to decide how to batch records in - a single request (default: None). Valid values: 'MultiRecord' + strategy (str or PipelineVariable): The strategy used to decide how to batch records + in a single request (default: None). Valid values: 'MultiRecord' and 'SingleRecord'. - assemble_with (str): How the output is assembled (default: None). + assemble_with (str or PipelineVariable): How the output is assembled (default: None). Valid values: 'Line' or 'None'. - output_path (str): S3 location for saving the transform result. If + output_path (str or PipelineVariable): S3 location for saving the transform result. If not specified, results are stored to a default bucket. - output_kms_key (str): Optional. KMS key ID for encrypting the + output_kms_key (str or PipelineVariable): Optional. KMS key ID for encrypting the transform output (default: None). - accept (str): The accept header passed by the client to + accept (str or PipelineVariable): The accept header passed by the client to the inference endpoint. If it is supported by the endpoint, it will be the format of the batch transform output. - max_concurrent_transforms (int): The maximum number of HTTP requests + max_concurrent_transforms (int or PipelineVariable): The maximum number of HTTP requests to be made to each individual transform container at one time. - max_payload (int): Maximum size of the payload in a single HTTP + max_payload (int or PipelineVariable): Maximum size of the payload in a single HTTP request to the container in MB. - tags (list[dict]): List of tags for labeling a transform job - (default: None). For more, see the SageMaker API documentation for - `Tag `_. - env (dict): Environment variables to be set for use during the - transform job (default: None). + tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags for + labeling a transform job (default: None). For more, see the SageMaker API + documentation for `Tag `_. + env (dict[str, str] or dict[str, PipelineVariable]): Environment variables to be set + for use during the transform job (default: None). base_transform_job_name (str): Prefix for the transform job when the :meth:`~sagemaker.transformer.Transformer.transform` method launches. If not specified, a default prefix will be generated @@ -86,8 +86,8 @@ def __init__( manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. - volume_kms_key (str): Optional. KMS key ID for encrypting the volume - attached to the ML compute instance (default: None). + volume_kms_key (str or PipelineVariable): Optional. KMS key ID for encrypting + the volume attached to the ML compute instance (default: None). """ self.model_name = model_name self.strategy = strategy @@ -133,8 +133,8 @@ def transform( """Start a new transform job. Args: - data (str): Input data location in S3. - data_type (str): What the S3 location defines (default: 'S3Prefix'). + data (str or PipelineVariable): Input data location in S3. + data_type (str or PipelineVariable): What the S3 location defines (default: 'S3Prefix'). Valid values: * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix @@ -143,15 +143,15 @@ def transform( * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as an input for the transform job. - content_type (str): MIME type of the input data (default: None). - compression_type (str): Compression type of the input data, if + content_type (str or PipelineVariable): MIME type of the input data (default: None). + compression_type (str or PipelineVariable): Compression type of the input data, if compressed (default: None). Valid values: 'Gzip', None. - split_type (str): The record delimiter for the input object + split_type (str or PipelineVariable): The record delimiter for the input object (default: 'None'). Valid values: 'None', 'Line', 'RecordIO', and 'TFRecord'. job_name (str): job name (default: None). If not specified, one will be generated. - input_filter (str): A JSONPath to select a portion of the input to + input_filter (str or PipelineVariable): A JSONPath to select a portion of the input to pass to the algorithm container for inference. If you omit the field, it gets the value '$', representing the entire input. For CSV data, each row is taken as a JSON array, @@ -164,13 +164,13 @@ def transform( `CreateTransformJob `_. Some examples: "$[1:]", "$.features" (default: None). - output_filter (str): A JSONPath to select a portion of the + output_filter (str or PipelineVariable): A JSONPath to select a portion of the joined/original output to return as the output. For more information, see the SageMaker API documentation for `CreateTransformJob `_. Some examples: "$[1:]", "$.prediction" (default: None). - join_source (str): The source of data to be joined to the transform + join_source (str or PipelineVariable): The source of data to be joined to the transform output. It can be set to 'Input' meaning the entire input record will be joined to the inference result. You can use OutputFilter to select the useful portion before uploading to S3. (default: @@ -189,8 +189,8 @@ def transform( * Both `ExperimentName` and `TrialName` will be ignored if the Transformer instance is built with :class:`~sagemaker.workflow.pipeline_context.PipelineSession`. However, the value of `TrialComponentDisplayName` is honored for display in Studio. - model_client_config (dict[str, str]): Model configuration. - Dictionary contains two optional keys, + model_client_config (dict[str, str] or dict[str, PipelineVariable]): Model + configuration. Dictionary contains two optional keys, 'InvocationsTimeoutInSeconds', and 'InvocationsMaxRetries'. (default: ``None``). wait (bool): Whether the call should wait until the job completes diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 0440cee3b8..ddf64faa02 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -106,8 +106,8 @@ def __init__( Args: warm_start_type (sagemaker.tuner.WarmStartTypes): This should be one of the supported warm start types in WarmStartType - parents (set{str}): Set of parent tuning jobs which will be used to - warm start the new tuning job. + parents (set[str] or set[PipelineVariable]): Set of parent tuning jobs which + will be used to warm start the new tuning job. """ if warm_start_type not in list(WarmStartTypes): @@ -237,7 +237,7 @@ def __init__( that has been initialized with the desired configuration. There does not need to be a training job associated with this instance. - objective_metric_name (str): Name of the metric for evaluating + objective_metric_name (str or PipelineVariable): Name of the metric for evaluating training jobs. hyperparameter_ranges (dict[str, sagemaker.parameter.ParameterRange]): Dictionary of parameter ranges. These parameter ranges can be one @@ -245,24 +245,24 @@ def __init__( the dictionary are the names of the hyperparameter, and the values are the appropriate parameter range class to represent the range. - metric_definitions (list[dict]): A list of dictionaries that defines - the metric(s) used to evaluate the training jobs (default: + metric_definitions (list[dict[str, str] or list[dict[str, PipelineVariable]]): A list of + dictionaries that defines the metric(s) used to evaluate the training jobs (default: None). Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for the regular expression used to extract the metric from the logs. This should be defined only for hyperparameter tuning jobs that don't use an Amazon algorithm. - strategy (str): Strategy to be used for hyperparameter estimations + strategy (str or PipelineVariable): Strategy to be used for hyperparameter estimations (default: 'Bayesian'). - objective_type (str): The type of the objective metric for + objective_type (str or PipelineVariable): The type of the objective metric for evaluating training jobs. This value can be either 'Minimize' or 'Maximize' (default: 'Maximize'). - max_jobs (int): Maximum total number of training jobs to start for + max_jobs (int or PipelineVariable): Maximum total number of training jobs to start for the hyperparameter tuning job (default: 1). - max_parallel_jobs (int): Maximum number of parallel training jobs to + max_parallel_jobs (int or PipelineVariable): Maximum number of parallel training jobs to start (default: 1). - tags (list[dict]): List of tags for labeling the tuning job - (default: None). For more, see + tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags for + labeling the tuning job (default: None). For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. base_tuning_job_name (str): Prefix for the hyperparameter tuning job name when the :meth:`~sagemaker.tuner.HyperparameterTuner.fit` @@ -272,7 +272,7 @@ def __init__( warm_start_config (sagemaker.tuner.WarmStartConfig): A ``WarmStartConfig`` object that has been initialized with the configuration defining the nature of warm start tuning job. - early_stopping_type (str): Specifies whether early stopping is + early_stopping_type (str or PipelineVariable): Specifies whether early stopping is enabled for the job. Can be either 'Auto' or 'Off' (default: 'Off'). If set to 'Off', early stopping will not be attempted. If set to 'Auto', early stopping of some training jobs may diff --git a/src/sagemaker/workflow/entities.py b/src/sagemaker/workflow/entities.py index 5272cfded6..e648037749 100644 --- a/src/sagemaker/workflow/entities.py +++ b/src/sagemaker/workflow/entities.py @@ -62,7 +62,12 @@ def expr(self) -> RequestType: class PipelineVariable(Expression): """Base object for pipeline variables - PipelineVariables must implement the expr property. + PipelineVariable subclasses must implement the expr property. Its subclasses include: + :class:`~sagemaker.workflow.parameters.Parameter`, + :class:`~sagemaker.workflow.properties.Properties`, + :class:`~sagemaker.workflow.functions.Join`, + :class:`~sagemaker.workflow.functions.JsonGet`, + :class:`~sagemaker.workflow.execution_variables.ExecutionVariable`. """ def __add__(self, other: Union[Expression, PrimitiveType]): diff --git a/src/sagemaker/xgboost/estimator.py b/src/sagemaker/xgboost/estimator.py index f6f0005f1f..b59d2dbf2b 100644 --- a/src/sagemaker/xgboost/estimator.py +++ b/src/sagemaker/xgboost/estimator.py @@ -69,13 +69,14 @@ def __init__( https://github.com/aws/sagemaker-python-sdk Args: - entry_point (str): Path (absolute or relative) to the Python source file which should - be executed as the entry point to training. If ``source_dir`` is specified, - then ``entry_point`` must point to a file located at the root of ``source_dir``. + entry_point (str or PipelineVariable): Path (absolute or relative) to + the Python source file which should be executed as the entry point to training. + If ``source_dir`` is specified, then ``entry_point`` must point to + a file located at the root of ``source_dir``. framework_version (str): XGBoost version you want to use for executing your model training code. - source_dir (str): Path (absolute, relative or an S3 URI) to a directory - with any other training source code dependencies aside from the entry + source_dir (str or PipelineVariable): Path (absolute, relative or an S3 URI) to + a directory with any other training source code dependencies aside from the entry point file (default: None). If ``source_dir`` is an S3 URI, it must point to a tar.gz file. Structure within this directory are preserved when training on Amazon SageMaker. diff --git a/src/sagemaker/xgboost/model.py b/src/sagemaker/xgboost/model.py index 5279c07c50..7207d76e3d 100644 --- a/src/sagemaker/xgboost/model.py +++ b/src/sagemaker/xgboost/model.py @@ -89,7 +89,8 @@ def __init__( """Initialize an XGBoostModel. Args: - model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. + model_data (str or PipelineVariable): The S3 location of a SageMaker model data + ``.tar.gz`` file. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference @@ -97,8 +98,8 @@ def __init__( entry_point (str): Path (absolute or relative) to the Python source file which should be executed as the entry point to model hosting. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. - image_uri (str): A Docker image URI (default: None). If not specified, - a default image for XGBoost is be used. + image_uri (str or PipelineVariable): A Docker image URI (default: None). + If not specified, a default image for XGBoost is be used. py_version (str): Python version you want to use for executing your model training code (default: 'py3'). framework_version (str): XGBoost version you want to use for executing your model @@ -107,8 +108,8 @@ def __init__( a predictor with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of invoking this function on the created endpoint name. - model_server_workers (int): Optional. The number of worker processes used by the - inference server. If None, server will use one worker per vCPU. + model_server_workers (int or PipelineVariable): Optional. The number of worker processes + used by the inference server. If None, server will use one worker per vCPU. **kwargs: Keyword arguments passed to the superclass :class:`~sagemaker.model.FrameworkModel` and, subsequently, its superclass :class:`~sagemaker.model.Model`. @@ -157,43 +158,49 @@ def register( """Creates a model package for creating SageMaker models or listing on Marketplace. Args: - content_types (list): The supported MIME types for the input data. - response_types (list): The supported MIME types for the output data. - inference_instances (list): A list of the instance types that are used to - generate inferences in real-time. - transform_instances (list): A list of the instance types on which a transformation - job can be run or on which an endpoint can be deployed. - model_package_name (str): Model Package name, exclusive to `model_package_group_name`, - using `model_package_name` makes the Model Package un-versioned (default: None). - model_package_group_name (str): Model Package Group name, exclusive to - `model_package_name`, using `model_package_group_name` makes the Model Package - versioned (default: None). - image_uri (str): Inference image uri for the container. Model class' self.image will - be used if it is None (default: None). + content_types (list[str] or list[PipelineVariable]): The supported MIME types for + the input data. + response_types (list[str] or list[PipelineVariable]): The supported MIME types for + the output data. + inference_instances (list[str] or list[PipelineVariable]): A list of the instance + types that are used to generate inferences in real-time. + transform_instances (list[str] or list[PipelineVariable]): A list of the instance + types on which a transformation job can be run or on which an endpoint can + be deployed. + model_package_name (str or PipelineVariable): Model Package name, exclusive to + `model_package_group_name`, using `model_package_name` makes the Model Package + un-versioned (default: None). + model_package_group_name (str or PipelineVariable): Model Package Group name, + exclusive to `model_package_name`, using `model_package_group_name` makes the + Model Package versioned (default: None). + image_uri (str or PipelineVariable): Inference image uri for the container. Model class' + self.image will be used if it is None (default: None). model_metrics (ModelMetrics): ModelMetrics object (default: None). metadata_properties (MetadataProperties): MetadataProperties (default: None). marketplace_cert (bool): A boolean value indicating if the Model Package is certified for AWS Marketplace (default: False). - approval_status (str): Model Approval Status, values can be "Approved", "Rejected", - or "PendingManualApproval" (default: "PendingManualApproval"). + approval_status (str or PipelineVariable): Model Approval Status, values can be + "Approved", "Rejected", or "PendingManualApproval" + (default: "PendingManualApproval"). description (str): Model Package description (default: None). drift_check_baselines (DriftCheckBaselines): DriftCheckBaselines object (default: None). - customer_metadata_properties (dict[str, str]): A dictionary of key-value paired - metadata properties (default: None). - domain (str): Domain values can be "COMPUTER_VISION", "NATURAL_LANGUAGE_PROCESSING", - "MACHINE_LEARNING" (default: None). - sample_payload_url (str): The S3 path where the sample payload is stored + customer_metadata_properties (dict[str, str] or dict[str, PipelineVariable]): + A dictionary of key-value paired metadata properties (default: None). + domain (str or PipelineVariable): Domain values can be "COMPUTER_VISION", + "NATURAL_LANGUAGE_PROCESSING", "MACHINE_LEARNING" (default: None). + sample_payload_url (str or PipelineVariable): The S3 path where the sample payload + is stored (default: None). + task (str or PipelineVariable): Task values which are supported by Inference Recommender + are "FILL_MASK", "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", + "IMAGE_SEGMENTATION", "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). + framework (str or PipelineVariable): Machine learning framework of the model package + container image (default: None). + framework_version (str or PipelineVariable): Framework version of the Model Package + Container Image (default: None). + nearest_model_name (str or PipelineVariable): Name of a pre-trained machine learning + benchmarked by Amazon SageMaker Inference Recommender (default: None). + data_input_configuration (str or PipelineVariable): Input object for the model (default: None). - task (str): Task values which are supported by Inference Recommender are "FILL_MASK", - "IMAGE_CLASSIFICATION", "OBJECT_DETECTION", "TEXT_GENERATION", "IMAGE_SEGMENTATION", - "CLASSIFICATION", "REGRESSION", "OTHER" (default: None). - framework (str): Machine learning framework of the model package container image - (default: None). - framework_version (str): Framework version of the Model Package Container Image - (default: None). - nearest_model_name (str): Name of a pre-trained machine learning benchmarked by - Amazon SageMaker Inference Recommender (default: None). - data_input_configuration (str): Input object for the model (default: None). Returns: str: A string of SageMaker Model Package ARN.