@@ -26,35 +26,8 @@ class RedshiftDatasetDefinition(ApiObject):
2626 """DatasetDefinition for Redshift.
2727
2828 With this input, SQL queries will be executed using Redshift to generate datasets to S3.
29-
30- Parameters:
31- cluster_id (str, default=None): The Redshift cluster Identifier.
32- database (str, default=None):
33- The name of the Redshift database used in Redshift query execution.
34- db_user (str, default=None): The database user name used in Redshift query execution.
35- query_string (str, default=None): The SQL query statements to be executed.
36- cluster_role_arn (str, default=None): The IAM role attached to your Redshift cluster that
37- Amazon SageMaker uses to generate datasets.
38- output_s3_uri (str, default=None): The location in Amazon S3 where the Redshift query
39- results are stored.
40- kms_key_id (str, default=None): The AWS Key Management Service (AWS KMS) key that Amazon
41- SageMaker uses to encrypt data from a Redshift execution.
42- output_format (str, default=None): The data storage format for Redshift query results.
43- Valid options are "PARQUET", "CSV"
44- output_compression (str, default=None): The compression used for Redshift query results.
45- Valid options are "None", "GZIP", "SNAPPY", "ZSTD", "BZIP2"
4629 """
4730
48- cluster_id = None
49- database = None
50- db_user = None
51- query_string = None
52- cluster_role_arn = None
53- output_s3_uri = None
54- kms_key_id = None
55- output_format = None
56- output_compression = None
57-
5831 def __init__ (
5932 self ,
6033 cluster_id = None ,
@@ -67,7 +40,25 @@ def __init__(
6740 output_format = None ,
6841 output_compression = None ,
6942 ):
70- """Initialize RedshiftDatasetDefinition."""
43+ """Initialize RedshiftDatasetDefinition.
44+
45+ Args:
46+ cluster_id (str, default=None): The Redshift cluster Identifier.
47+ database (str, default=None):
48+ The name of the Redshift database used in Redshift query execution.
49+ db_user (str, default=None): The database user name used in Redshift query execution.
50+ query_string (str, default=None): The SQL query statements to be executed.
51+ cluster_role_arn (str, default=None): The IAM role attached to your Redshift cluster that
52+ Amazon SageMaker uses to generate datasets.
53+ output_s3_uri (str, default=None): The location in Amazon S3 where the Redshift query
54+ results are stored.
55+ kms_key_id (str, default=None): The AWS Key Management Service (AWS KMS) key that Amazon
56+ SageMaker uses to encrypt data from a Redshift execution.
57+ output_format (str, default=None): The data storage format for Redshift query results.
58+ Valid options are "PARQUET", "CSV"
59+ output_compression (str, default=None): The compression used for Redshift query results.
60+ Valid options are "None", "GZIP", "SNAPPY", "ZSTD", "BZIP2"
61+ """
7162 super (RedshiftDatasetDefinition , self ).__init__ (
7263 cluster_id = cluster_id ,
7364 database = database ,
@@ -85,32 +76,8 @@ class AthenaDatasetDefinition(ApiObject):
8576 """DatasetDefinition for Athena.
8677
8778 With this input, SQL queries will be executed using Athena to generate datasets to S3.
88-
89- Parameters:
90- catalog (str, default=None): The name of the data catalog used in Athena query execution.
91- database (str, default=None): The name of the database used in the Athena query execution.
92- query_string (str, default=None): The SQL query statements, to be executed.
93- output_s3_uri (str, default=None):
94- The location in Amazon S3 where Athena query results are stored.
95- work_group (str, default=None):
96- The name of the workgroup in which the Athena query is being started.
97- kms_key_id (str, default=None): The AWS Key Management Service (AWS KMS) key that Amazon
98- SageMaker uses to encrypt data generated from an Athena query execution.
99- output_format (str, default=None): The data storage format for Athena query results.
100- Valid options are "PARQUET", "ORC", "AVRO", "JSON", "TEXTFILE"
101- output_compression (str, default=None): The compression used for Athena query results.
102- Valid options are "GZIP", "SNAPPY", "ZLIB"
10379 """
10480
105- catalog = None
106- database = None
107- query_string = None
108- output_s3_uri = None
109- work_group = None
110- kms_key_id = None
111- output_format = None
112- output_compression = None
113-
11481 def __init__ (
11582 self ,
11683 catalog = None ,
@@ -122,7 +89,23 @@ def __init__(
12289 output_format = None ,
12390 output_compression = None ,
12491 ):
125- """Initialize AthenaDatasetDefinition."""
92+ """Initialize AthenaDatasetDefinition.
93+
94+ Args:
95+ catalog (str, default=None): The name of the data catalog used in Athena query execution.
96+ database (str, default=None): The name of the database used in the Athena query execution.
97+ query_string (str, default=None): The SQL query statements, to be executed.
98+ output_s3_uri (str, default=None):
99+ The location in Amazon S3 where Athena query results are stored.
100+ work_group (str, default=None):
101+ The name of the workgroup in which the Athena query is being started.
102+ kms_key_id (str, default=None): The AWS Key Management Service (AWS KMS) key that Amazon
103+ SageMaker uses to encrypt data generated from an Athena query execution.
104+ output_format (str, default=None): The data storage format for Athena query results.
105+ Valid options are "PARQUET", "ORC", "AVRO", "JSON", "TEXTFILE"
106+ output_compression (str, default=None): The compression used for Athena query results.
107+ Valid options are "GZIP", "SNAPPY", "ZLIB"
108+ """
126109 super (AthenaDatasetDefinition , self ).__init__ (
127110 catalog = catalog ,
128111 database = database ,
@@ -136,40 +119,13 @@ def __init__(
136119
137120
138121class DatasetDefinition (ApiObject ):
139- """DatasetDefinition input.
140-
141- Parameters:
142- data_distribution_type (str, default="ShardedByS3Key"):
143- Whether the generated dataset is FullyReplicated or ShardedByS3Key (default).
144- input_mode (str, default="File"):
145- Whether to use File or Pipe input mode. In File (default) mode, Amazon
146- SageMaker copies the data from the input source onto the local Amazon Elastic Block
147- Store (Amazon EBS) volumes before starting your training algorithm. This is the most
148- commonly used input mode. In Pipe mode, Amazon SageMaker streams input data from the
149- source directly to your algorithm without using the EBS volume.
150- local_path (str, default=None):
151- The local path where you want Amazon SageMaker to download the Dataset
152- Definition inputs to run a processing job. LocalPath is an absolute path to the input
153- data. This is a required parameter when `AppManaged` is False (default).
154- redshift_dataset_definition
155- (:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`,default=None):
156- Configuration for Redshift Dataset Definition input.
157- athena_dataset_definition
158- (:class:`~sagemaker.dataset_definition.inputs.AthenaDatasetDefinition`, default=None):
159- Configuration for Athena Dataset Definition input.
160- """
122+ """DatasetDefinition input."""
161123
162124 _custom_boto_types = {
163125 "redshift_dataset_definition" : (RedshiftDatasetDefinition , True ),
164126 "athena_dataset_definition" : (AthenaDatasetDefinition , True ),
165127 }
166128
167- data_distribution_type = "ShardedByS3Key"
168- input_mode = "File"
169- local_path = None
170- redshift_dataset_definition = None
171- athena_dataset_definition = None
172-
173129 def __init__ (
174130 self ,
175131 data_distribution_type = "ShardedByS3Key" ,
@@ -178,7 +134,28 @@ def __init__(
178134 redshift_dataset_definition = None ,
179135 athena_dataset_definition = None ,
180136 ):
181- """Initialize DatasetDefinition."""
137+ """Initialize DatasetDefinition.
138+
139+ Parameters:
140+ data_distribution_type (str, default="ShardedByS3Key"):
141+ Whether the generated dataset is FullyReplicated or ShardedByS3Key (default).
142+ input_mode (str, default="File"):
143+ Whether to use File or Pipe input mode. In File (default) mode, Amazon
144+ SageMaker copies the data from the input source onto the local Amazon Elastic Block
145+ Store (Amazon EBS) volumes before starting your training algorithm. This is the most
146+ commonly used input mode. In Pipe mode, Amazon SageMaker streams input data from the
147+ source directly to your algorithm without using the EBS volume.
148+ local_path (str, default=None):
149+ The local path where you want Amazon SageMaker to download the Dataset
150+ Definition inputs to run a processing job. LocalPath is an absolute path to the input
151+ data. This is a required parameter when `AppManaged` is False (default).
152+ redshift_dataset_definition
153+ (:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`,default=None):
154+ Configuration for Redshift Dataset Definition input.
155+ athena_dataset_definition
156+ (:class:`~sagemaker.dataset_definition.inputs.AthenaDatasetDefinition`, default=None):
157+ Configuration for Athena Dataset Definition input.
158+ """
182159 super (DatasetDefinition , self ).__init__ (
183160 data_distribution_type = data_distribution_type ,
184161 input_mode = input_mode ,
@@ -196,26 +173,8 @@ class S3Input(ApiObject):
196173 Note: Strong consistency is not guaranteed if S3Prefix is provided here.
197174 S3 list operations are not strongly consistent.
198175 Use ManifestFile if strong consistency is required.
199-
200- Parameters:
201- s3_uri (str, default=None): the path to a specific S3 object or a S3 prefix
202- local_path (str, default=None):
203- the path to a local directory. If not provided, skips data download
204- by SageMaker platform.
205- s3_data_type (str, default="S3Prefix"): Valid options are "ManifestFile" or "S3Prefix".
206- s3_input_mode (str, default="File"): Valid options are "Pipe" or "File".
207- s3_data_distribution_type (str, default="FullyReplicated"):
208- Valid options are "FullyReplicated" or "ShardedByS3Key".
209- s3_compression_type (str, default=None): Valid options are "None" or "Gzip".
210176 """
211177
212- s3_uri = None
213- local_path = None
214- s3_data_type = "S3Prefix"
215- s3_input_mode = "File"
216- s3_data_distribution_type = "FullyReplicated"
217- s3_compression_type = None
218-
219178 def __init__ (
220179 self ,
221180 s3_uri = None ,
@@ -225,7 +184,19 @@ def __init__(
225184 s3_data_distribution_type = "FullyReplicated" ,
226185 s3_compression_type = None ,
227186 ):
228- """Initialize S3Input."""
187+ """Initialize S3Input.
188+
189+ Parameters:
190+ s3_uri (str, default=None): the path to a specific S3 object or a S3 prefix
191+ local_path (str, default=None):
192+ the path to a local directory. If not provided, skips data download
193+ by SageMaker platform.
194+ s3_data_type (str, default="S3Prefix"): Valid options are "ManifestFile" or "S3Prefix".
195+ s3_input_mode (str, default="File"): Valid options are "Pipe" or "File".
196+ s3_data_distribution_type (str, default="FullyReplicated"):
197+ Valid options are "FullyReplicated" or "ShardedByS3Key".
198+ s3_compression_type (str, default=None): Valid options are "None" or "Gzip".
199+ """
229200 super (S3Input , self ).__init__ (
230201 s3_uri = s3_uri ,
231202 local_path = local_path ,
0 commit comments