2222import tempfile
2323from abc import ABC , abstractmethod
2424from sagemaker import image_uris , s3 , utils
25- from sagemaker .deprecations import deprecation_warning
2625from sagemaker .processing import ProcessingInput , ProcessingOutput , Processor
2726
2827logger = logging .getLogger (__name__ )
2928
3029
31- @deprecation_warning (
32- msg = "s3_data_distribution_type parameter will no longer be supported. Everything else will"
33- " remain as is" ,
34- date = "15 Mar 2022" ,
35- )
3630class DataConfig :
3731 """Config object related to configurations of the input and output dataset."""
3832
@@ -64,8 +58,8 @@ def __init__(
6458 dataset format is JSONLines.
6559 dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
6660 "application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
67- s3_data_distribution_type (str): Deprecated. Only valid option is "FullyReplicated".
68- Any other value is ignored .
61+ s3_data_distribution_type (str): Valid options are "FullyReplicated" or
62+ "ShardedByS3Key" .
6963 s3_compression_type (str): Valid options are "None" or "Gzip".
7064 joinsource (str): The name or index of the column in the dataset that acts as an
7165 identifier column (for instance, while performing a join). This column is only
@@ -86,13 +80,7 @@ def __init__(
8680 self .s3_data_input_path = s3_data_input_path
8781 self .s3_output_path = s3_output_path
8882 self .s3_analysis_config_output_path = s3_analysis_config_output_path
89- if s3_data_distribution_type != "FullyReplicated" :
90- logger .warning (
91- "s3_data_distribution_type parameter, set to %s, is being ignored. Only"
92- " valid option is FullyReplicated" ,
93- s3_data_distribution_type ,
94- )
95- self .s3_data_distribution_type = "FullyReplicated"
83+ self .s3_data_distribution_type = s3_data_distribution_type
9684 self .s3_compression_type = s3_compression_type
9785 self .label = label
9886 self .headers = headers
0 commit comments