Skip to content

Commit 876fde1

Browse files
fix: remove input dataset definition class attributes
1 parent 996c310 commit 876fde1

File tree

1 file changed

+72
-101
lines changed

1 file changed

+72
-101
lines changed

src/sagemaker/dataset_definition/inputs.py

Lines changed: 72 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -26,35 +26,8 @@ class RedshiftDatasetDefinition(ApiObject):
2626
"""DatasetDefinition for Redshift.
2727
2828
With this input, SQL queries will be executed using Redshift to generate datasets to S3.
29-
30-
Parameters:
31-
cluster_id (str, default=None): The Redshift cluster Identifier.
32-
database (str, default=None):
33-
The name of the Redshift database used in Redshift query execution.
34-
db_user (str, default=None): The database user name used in Redshift query execution.
35-
query_string (str, default=None): The SQL query statements to be executed.
36-
cluster_role_arn (str, default=None): The IAM role attached to your Redshift cluster that
37-
Amazon SageMaker uses to generate datasets.
38-
output_s3_uri (str, default=None): The location in Amazon S3 where the Redshift query
39-
results are stored.
40-
kms_key_id (str, default=None): The AWS Key Management Service (AWS KMS) key that Amazon
41-
SageMaker uses to encrypt data from a Redshift execution.
42-
output_format (str, default=None): The data storage format for Redshift query results.
43-
Valid options are "PARQUET", "CSV"
44-
output_compression (str, default=None): The compression used for Redshift query results.
45-
Valid options are "None", "GZIP", "SNAPPY", "ZSTD", "BZIP2"
4629
"""
4730

48-
cluster_id = None
49-
database = None
50-
db_user = None
51-
query_string = None
52-
cluster_role_arn = None
53-
output_s3_uri = None
54-
kms_key_id = None
55-
output_format = None
56-
output_compression = None
57-
5831
def __init__(
5932
self,
6033
cluster_id=None,
@@ -67,7 +40,25 @@ def __init__(
6740
output_format=None,
6841
output_compression=None,
6942
):
70-
"""Initialize RedshiftDatasetDefinition."""
43+
"""Initialize RedshiftDatasetDefinition.
44+
45+
Args:
46+
cluster_id (str, default=None): The Redshift cluster Identifier.
47+
database (str, default=None):
48+
The name of the Redshift database used in Redshift query execution.
49+
db_user (str, default=None): The database user name used in Redshift query execution.
50+
query_string (str, default=None): The SQL query statements to be executed.
51+
cluster_role_arn (str, default=None): The IAM role attached to your Redshift cluster that
52+
Amazon SageMaker uses to generate datasets.
53+
output_s3_uri (str, default=None): The location in Amazon S3 where the Redshift query
54+
results are stored.
55+
kms_key_id (str, default=None): The AWS Key Management Service (AWS KMS) key that Amazon
56+
SageMaker uses to encrypt data from a Redshift execution.
57+
output_format (str, default=None): The data storage format for Redshift query results.
58+
Valid options are "PARQUET", "CSV"
59+
output_compression (str, default=None): The compression used for Redshift query results.
60+
Valid options are "None", "GZIP", "SNAPPY", "ZSTD", "BZIP2"
61+
"""
7162
super(RedshiftDatasetDefinition, self).__init__(
7263
cluster_id=cluster_id,
7364
database=database,
@@ -85,32 +76,8 @@ class AthenaDatasetDefinition(ApiObject):
8576
"""DatasetDefinition for Athena.
8677
8778
With this input, SQL queries will be executed using Athena to generate datasets to S3.
88-
89-
Parameters:
90-
catalog (str, default=None): The name of the data catalog used in Athena query execution.
91-
database (str, default=None): The name of the database used in the Athena query execution.
92-
query_string (str, default=None): The SQL query statements, to be executed.
93-
output_s3_uri (str, default=None):
94-
The location in Amazon S3 where Athena query results are stored.
95-
work_group (str, default=None):
96-
The name of the workgroup in which the Athena query is being started.
97-
kms_key_id (str, default=None): The AWS Key Management Service (AWS KMS) key that Amazon
98-
SageMaker uses to encrypt data generated from an Athena query execution.
99-
output_format (str, default=None): The data storage format for Athena query results.
100-
Valid options are "PARQUET", "ORC", "AVRO", "JSON", "TEXTFILE"
101-
output_compression (str, default=None): The compression used for Athena query results.
102-
Valid options are "GZIP", "SNAPPY", "ZLIB"
10379
"""
10480

105-
catalog = None
106-
database = None
107-
query_string = None
108-
output_s3_uri = None
109-
work_group = None
110-
kms_key_id = None
111-
output_format = None
112-
output_compression = None
113-
11481
def __init__(
11582
self,
11683
catalog=None,
@@ -122,7 +89,23 @@ def __init__(
12289
output_format=None,
12390
output_compression=None,
12491
):
125-
"""Initialize AthenaDatasetDefinition."""
92+
"""Initialize AthenaDatasetDefinition.
93+
94+
Args:
95+
catalog (str, default=None): The name of the data catalog used in Athena query execution.
96+
database (str, default=None): The name of the database used in the Athena query execution.
97+
query_string (str, default=None): The SQL query statements, to be executed.
98+
output_s3_uri (str, default=None):
99+
The location in Amazon S3 where Athena query results are stored.
100+
work_group (str, default=None):
101+
The name of the workgroup in which the Athena query is being started.
102+
kms_key_id (str, default=None): The AWS Key Management Service (AWS KMS) key that Amazon
103+
SageMaker uses to encrypt data generated from an Athena query execution.
104+
output_format (str, default=None): The data storage format for Athena query results.
105+
Valid options are "PARQUET", "ORC", "AVRO", "JSON", "TEXTFILE"
106+
output_compression (str, default=None): The compression used for Athena query results.
107+
Valid options are "GZIP", "SNAPPY", "ZLIB"
108+
"""
126109
super(AthenaDatasetDefinition, self).__init__(
127110
catalog=catalog,
128111
database=database,
@@ -136,40 +119,13 @@ def __init__(
136119

137120

138121
class DatasetDefinition(ApiObject):
139-
"""DatasetDefinition input.
140-
141-
Parameters:
142-
data_distribution_type (str, default="ShardedByS3Key"):
143-
Whether the generated dataset is FullyReplicated or ShardedByS3Key (default).
144-
input_mode (str, default="File"):
145-
Whether to use File or Pipe input mode. In File (default) mode, Amazon
146-
SageMaker copies the data from the input source onto the local Amazon Elastic Block
147-
Store (Amazon EBS) volumes before starting your training algorithm. This is the most
148-
commonly used input mode. In Pipe mode, Amazon SageMaker streams input data from the
149-
source directly to your algorithm without using the EBS volume.
150-
local_path (str, default=None):
151-
The local path where you want Amazon SageMaker to download the Dataset
152-
Definition inputs to run a processing job. LocalPath is an absolute path to the input
153-
data. This is a required parameter when `AppManaged` is False (default).
154-
redshift_dataset_definition
155-
(:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`,default=None):
156-
Configuration for Redshift Dataset Definition input.
157-
athena_dataset_definition
158-
(:class:`~sagemaker.dataset_definition.inputs.AthenaDatasetDefinition`, default=None):
159-
Configuration for Athena Dataset Definition input.
160-
"""
122+
"""DatasetDefinition input."""
161123

162124
_custom_boto_types = {
163125
"redshift_dataset_definition": (RedshiftDatasetDefinition, True),
164126
"athena_dataset_definition": (AthenaDatasetDefinition, True),
165127
}
166128

167-
data_distribution_type = "ShardedByS3Key"
168-
input_mode = "File"
169-
local_path = None
170-
redshift_dataset_definition = None
171-
athena_dataset_definition = None
172-
173129
def __init__(
174130
self,
175131
data_distribution_type="ShardedByS3Key",
@@ -178,7 +134,28 @@ def __init__(
178134
redshift_dataset_definition=None,
179135
athena_dataset_definition=None,
180136
):
181-
"""Initialize DatasetDefinition."""
137+
"""Initialize DatasetDefinition.
138+
139+
Parameters:
140+
data_distribution_type (str, default="ShardedByS3Key"):
141+
Whether the generated dataset is FullyReplicated or ShardedByS3Key (default).
142+
input_mode (str, default="File"):
143+
Whether to use File or Pipe input mode. In File (default) mode, Amazon
144+
SageMaker copies the data from the input source onto the local Amazon Elastic Block
145+
Store (Amazon EBS) volumes before starting your training algorithm. This is the most
146+
commonly used input mode. In Pipe mode, Amazon SageMaker streams input data from the
147+
source directly to your algorithm without using the EBS volume.
148+
local_path (str, default=None):
149+
The local path where you want Amazon SageMaker to download the Dataset
150+
Definition inputs to run a processing job. LocalPath is an absolute path to the input
151+
data. This is a required parameter when `AppManaged` is False (default).
152+
redshift_dataset_definition
153+
(:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`,default=None):
154+
Configuration for Redshift Dataset Definition input.
155+
athena_dataset_definition
156+
(:class:`~sagemaker.dataset_definition.inputs.AthenaDatasetDefinition`, default=None):
157+
Configuration for Athena Dataset Definition input.
158+
"""
182159
super(DatasetDefinition, self).__init__(
183160
data_distribution_type=data_distribution_type,
184161
input_mode=input_mode,
@@ -196,26 +173,8 @@ class S3Input(ApiObject):
196173
Note: Strong consistency is not guaranteed if S3Prefix is provided here.
197174
S3 list operations are not strongly consistent.
198175
Use ManifestFile if strong consistency is required.
199-
200-
Parameters:
201-
s3_uri (str, default=None): the path to a specific S3 object or a S3 prefix
202-
local_path (str, default=None):
203-
the path to a local directory. If not provided, skips data download
204-
by SageMaker platform.
205-
s3_data_type (str, default="S3Prefix"): Valid options are "ManifestFile" or "S3Prefix".
206-
s3_input_mode (str, default="File"): Valid options are "Pipe" or "File".
207-
s3_data_distribution_type (str, default="FullyReplicated"):
208-
Valid options are "FullyReplicated" or "ShardedByS3Key".
209-
s3_compression_type (str, default=None): Valid options are "None" or "Gzip".
210176
"""
211177

212-
s3_uri = None
213-
local_path = None
214-
s3_data_type = "S3Prefix"
215-
s3_input_mode = "File"
216-
s3_data_distribution_type = "FullyReplicated"
217-
s3_compression_type = None
218-
219178
def __init__(
220179
self,
221180
s3_uri=None,
@@ -225,7 +184,19 @@ def __init__(
225184
s3_data_distribution_type="FullyReplicated",
226185
s3_compression_type=None,
227186
):
228-
"""Initialize S3Input."""
187+
"""Initialize S3Input.
188+
189+
Parameters:
190+
s3_uri (str, default=None): the path to a specific S3 object or a S3 prefix
191+
local_path (str, default=None):
192+
the path to a local directory. If not provided, skips data download
193+
by SageMaker platform.
194+
s3_data_type (str, default="S3Prefix"): Valid options are "ManifestFile" or "S3Prefix".
195+
s3_input_mode (str, default="File"): Valid options are "Pipe" or "File".
196+
s3_data_distribution_type (str, default="FullyReplicated"):
197+
Valid options are "FullyReplicated" or "ShardedByS3Key".
198+
s3_compression_type (str, default=None): Valid options are "None" or "Gzip".
199+
"""
229200
super(S3Input, self).__init__(
230201
s3_uri=s3_uri,
231202
local_path=local_path,

0 commit comments

Comments
 (0)