From d979d1b5d9957920859576d882cee387f8da41ab Mon Sep 17 00:00:00 2001 From: Ruban Hussain Date: Tue, 9 May 2023 11:25:32 -0700 Subject: [PATCH] change: SDK Defaults - switch from config printing to logging, avoid logging ARNs by default, and respect logging setup by the user --- src/sagemaker/config/config.py | 11 +- src/sagemaker/config/config_utils.py | 199 +++++++++++++++++++++++++++ src/sagemaker/session.py | 53 +------ src/sagemaker/utils.py | 86 +++--------- 4 files changed, 232 insertions(+), 117 deletions(-) create mode 100644 src/sagemaker/config/config_utils.py diff --git a/src/sagemaker/config/config.py b/src/sagemaker/config/config.py index 0d7f863820..ce994f2520 100644 --- a/src/sagemaker/config/config.py +++ b/src/sagemaker/config/config.py @@ -19,7 +19,6 @@ from __future__ import absolute_import import pathlib -import logging import os from typing import List import boto3 @@ -29,8 +28,9 @@ from botocore.utils import merge_dicts from six.moves.urllib.parse import urlparse from sagemaker.config.config_schema import SAGEMAKER_PYTHON_SDK_CONFIG_SCHEMA +from sagemaker.config.config_utils import get_sagemaker_config_logger -logger = logging.getLogger("sagemaker") +logger = get_sagemaker_config_logger() _APP_NAME = "sagemaker" # The default config file location of the Administrator provided config file. This path can be @@ -122,6 +122,9 @@ def load_sagemaker_config(additional_config_paths: List[str] = None, s3_resource if config_from_file: validate_sagemaker_config(config_from_file) merge_dicts(merged_config, config_from_file) + logger.info("Fetched defaults config from location: %s", file_path) + else: + logger.debug("Fetched defaults config from location: %s, but it was empty", file_path) return merged_config @@ -148,7 +151,7 @@ def _load_config_from_file(file_path: str) -> dict: f"Unable to load the config file from the location: {file_path}" f"Provide a valid file path" ) - logger.debug("Fetching config file from the path: %s", file_path) + logger.debug("Fetching defaults config from location: %s", file_path) return yaml.safe_load(open(inferred_file_path, "r")) @@ -164,7 +167,7 @@ def _load_config_from_s3(s3_uri, s3_resource_for_config) -> dict: ) s3_resource_for_config = boto_session.resource("s3", region_name=boto_region_name) - logger.debug("Fetching config file from the S3 URI: %s", s3_uri) + logger.debug("Fetching defaults config from location: %s", s3_uri) inferred_s3_uri = _get_inferred_s3_uri(s3_uri, s3_resource_for_config) parsed_url = urlparse(inferred_s3_uri) bucket, key_prefix = parsed_url.netloc, parsed_url.path.lstrip("/") diff --git a/src/sagemaker/config/config_utils.py b/src/sagemaker/config/config_utils.py new file mode 100644 index 0000000000..4801b3a0cc --- /dev/null +++ b/src/sagemaker/config/config_utils.py @@ -0,0 +1,199 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""This file contains util functions for the sagemaker Defaults Config. + +These utils may be used inside or outside the config module. +""" +from __future__ import absolute_import + +import logging +import sys + + +def get_sagemaker_config_logger(): + """Return a logger with the name 'sagemaker.config' + + If the logger to be returned has no level or handlers set, this will get level and handler + attributes. (So if the SDK user has setup loggers in a certain way, that setup will not be + changed by this function.) It is safe to make repeat calls to this function. + """ + sagemaker_config_logger = logging.getLogger("sagemaker.config") + sagemaker_logger = logging.getLogger("sagemaker") + + if sagemaker_config_logger.level == logging.NOTSET: + sagemaker_config_logger.setLevel(logging.INFO) + + # check sagemaker_logger here as well, so that if handlers were set for the parent logger + # already, we dont change behavior for the child logger + if len(sagemaker_config_logger.handlers) == 0 and len(sagemaker_logger.handlers) == 0: + # use sys.stdout so logs dont show up with a red background in a notebook + handler = logging.StreamHandler(sys.stdout) + + formatter = logging.Formatter("%(name)s %(levelname)-4s - %(message)s") + handler.setFormatter(formatter) + sagemaker_config_logger.addHandler(handler) + + # if a handler is being added, we dont want the root handler to also process the same events + sagemaker_config_logger.propagate = False + + return sagemaker_config_logger + + +def _log_sagemaker_config_single_substitution(source_value, config_value, config_key_path: str): + """Informs the SDK user whether a config value was present and automatically substituted + + Args: + source_value: The value that will be used if it exists. Usually, this is user-provided + input to a Class or to a session.py method, or None if no input was provided. + config_value: The value fetched from sagemaker_config. If it exists, this is the value that + will be used if direct_input is None. + config_key_path: A string denoting the path of keys that point to the config value in the + sagemaker_config. + + Returns: + None. Logs information to the "sagemaker.config" logger. + """ + logger = get_sagemaker_config_logger() + + if config_value is not None: + + if source_value is None: + # Sagemaker Config value is going to be used. By default the user should know about + # this scenario because the behavior they expect could change because of a new config + # value being injected in. + # However, it may not be safe to log ARNs to stdout by default. We can include more + # diagnostic info if the user enabled DEBUG logs though. + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "Applied value\n config key = %s\n config value that will be used = %s", + config_key_path, + config_value, + ) + else: + logger.info( + "Applied value from config key = %s", + config_key_path, + ) + + # The cases below here are logged as just debug statements because this info can be useful + # when debugging the config, but should not affect current behavior with/without the config. + + elif source_value is not None and config_value == source_value: + # Sagemaker Config had a value defined that is NOT going to be used here. + # Either (1) the config value was already fetched and applied earlier, or + # (2) the user happened to pass in the same value. + logger.debug( + ( + "Skipped value\n" + " config key = %s\n" + " config value = %s\n" + " source value that will be used = %s" + ), + config_key_path, + config_value, + source_value, + ) + elif source_value is not None and config_value != source_value: + # Sagemaker Config had a value defined that is NOT going to be used + # and the config value has not already been applied earlier (we know because the values + # are different). + logger.debug( + ( + "Skipped value\n" + " config key = %s\n" + " config value = %s\n" + " source value that will be used = %s", + ), + config_key_path, + config_value, + source_value, + ) + else: + # nothing was specified in the config and nothing is being automatically applied + logger.debug("Skipped value because no value defined\n config key = %s", config_key_path) + + +def _log_sagemaker_config_merge( + source_value=None, + config_value=None, + merged_source_and_config_value=None, + config_key_path: str = None, +): + """Informs the SDK user whether a config value was present and automatically substituted + + Args: + source_value: The dict or object that would be used if no default values existed. Usually, + this is user-provided input to a Class or to a session.py method, or None if no input + was provided. + config_value: The dict or object fetched from sagemaker_config. If it exists, this is the + value that will be used if source_value is None. + merged_source_and_config_value: The value that results from the merging of source_value and + original_config_value. This will be the value used. + config_key_path: A string denoting the path of keys that point to the config value in the + sagemaker_config. + + Returns: + None. Logs information to the "sagemaker.config" logger. + """ + logger = get_sagemaker_config_logger() + + if config_value: + + if source_value != merged_source_and_config_value: + # Sagemaker Config value(s) were used and affected the final object/dictionary. By + # default the user should know about this scenario because the behavior they expect + # could change because of new config values being injected in. + # However, it may not be safe to log ARNs to stdout by default. We can include more + # diagnostic info if the user enabled DEBUG logs though. + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + ( + "Applied value(s)\n" + " config key = %s\n" + " config value = %s\n" + " source value = %s\n" + " combined value that will be used = %s" + ), + config_key_path, + config_value, + source_value, + merged_source_and_config_value, + ) + else: + logger.info( + "Applied value(s) from config key = %s", + config_key_path, + ) + + # The cases below here are logged as just debug statements because this info can be useful + # when debugging the config, but should not affect current behavior with/without the config. + + else: + # Sagemaker Config had a value defined that is NOT going to be used here. + # Either (1) the config value was already fetched and applied earlier, or + # (2) the user happened to pass in the same values. + logger.debug( + ( + "Skipped value(s)\n" + " config key = %s\n" + " config value = %s\n" + " source value that will be used = %s" + ), + config_key_path, + config_value, + merged_source_and_config_value, + ) + + else: + # nothing was specified in the config and nothing is being automatically applied + logger.debug("Skipped value because no value defined\n config key = %s", config_key_path) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index c7fc6c1120..a42b8b8138 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -95,6 +95,7 @@ SESSION_DEFAULT_S3_BUCKET_PATH, SESSION_DEFAULT_S3_OBJECT_KEY_PREFIX_PATH, ) +from sagemaker.config.config_utils import _log_sagemaker_config_merge from sagemaker.deprecations import deprecated_class from sagemaker.inputs import ShuffleConfig, TrainingInput, BatchDataCaptureConfig from sagemaker.user_agent import prepend_user_agent @@ -606,47 +607,6 @@ def _create_s3_bucket_if_it_does_not_exist(self, bucket_name, region): else: raise - def _print_message_on_sagemaker_config_usage( - self, direct_input, config_value, config_path: str - ): - """Informs the SDK user whether a config value was present and automatically substituted - - Args: - direct_input: the value that would be used if no sagemaker_config or default values - existed. Usually this will be user-provided input to a Class or to a - session.py method, or None if no input was provided. - config_value: the value fetched from sagemaker_config. This is usually the value that - will be used if direct_input is None. - config_path: a string denoting the path of keys that point to the config value in the - sagemaker_config - - Returns: - No output (just prints information) - """ - - if config_value is not None: - - if direct_input is not None and config_value != direct_input: - # Sagemaker Config had a value defined that is NOT going to be used - # and the config value has not already been applied earlier - print( - "[Sagemaker Config - skipped value]\n", - "config key = {}\n".format(config_path), - "config value = {}\n".format(config_value), - "specified value that will be used = {}\n".format(direct_input), - ) - - elif direct_input is None: - # Sagemaker Config value is going to be used - print( - "[Sagemaker Config - applied value]\n", - "config key = {}\n".format(config_path), - "config value that will be used = {}\n".format(config_value), - ) - - # There is no print statement needed if nothing was specified in the config and nothing is - # being automatically applied - def _append_sagemaker_config_tags(self, tags: list, config_path_to_tags: str): """Appends tags specified in the sagemaker_config to the given list of tags. @@ -677,12 +637,11 @@ def _append_sagemaker_config_tags(self, tags: list, config_path_to_tags: str): # user-provided tags. all_tags.append(config_tag) - print( - "[Sagemaker Config - applied value]\n", - "config key = {}\n".format(config_path_to_tags), - "config value = {}\n".format(config_tags), - "source value = {}\n".format(tags), - "combined value that will be used = {}\n".format(all_tags), + _log_sagemaker_config_merge( + source_value=tags, + config_value=config_tags, + merged_source_and_config_value=all_tags, + config_key_path=config_path_to_tags, ) return all_tags diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index 109837641d..97d87f206f 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -38,6 +38,10 @@ from sagemaker import deprecations from sagemaker.config import validate_sagemaker_config +from sagemaker.config.config_utils import ( + _log_sagemaker_config_single_substitution, + _log_sagemaker_config_merge, +) from sagemaker.session_settings import SessionSettings from sagemaker.workflow import is_pipeline_variable, is_pipeline_parameter_string @@ -1069,7 +1073,7 @@ def resolve_value_from_config( config_value = ( get_sagemaker_config_value(sagemaker_session, config_path) if config_path else None ) - _print_message_on_sagemaker_config_usage(direct_input, config_value, config_path) + _log_sagemaker_config_single_substitution(direct_input, config_value, config_path) if direct_input is not None: return direct_input @@ -1100,46 +1104,6 @@ def get_sagemaker_config_value(sagemaker_session, key): return copy.deepcopy(config_value) -def _print_message_on_sagemaker_config_usage(direct_input, config_value, config_path: str): - """Informs the SDK user whether a config value was present and automatically substituted - - Args: - direct_input: The value that is used if no default values exist. Usually, - this is user-provided input to a Class or to a session.py method, or None if no input - was provided. - config_value: The value fetched from sagemaker_config. This is usually the value that - will be used if direct_input is None. - config_path: A string denoting the path of keys that point to the config value in the - sagemaker_config. - - Returns: - None. Prints information. - """ - - if config_value is not None: - - if direct_input is not None and config_value != direct_input: - # Sagemaker Config had a value defined that is NOT going to be used - # and the config value has not already been applied earlier - print( - "[Sagemaker Config - skipped value]\n", - "config key = {}\n".format(config_path), - "config value = {}\n".format(config_value), - "specified value that will be used = {}\n".format(direct_input), - ) - - elif direct_input is None: - # Sagemaker Config value is going to be used - print( - "[Sagemaker Config - applied value]\n", - "config key = {}\n".format(config_path), - "config value that will be used = {}\n".format(config_value), - ) - - # There is no print statement needed if nothing was specified in the config and nothing is - # being automatically applied - - def resolve_class_attribute_from_config( clazz: Optional[type], instance: Optional[object], @@ -1206,7 +1170,7 @@ def resolve_class_attribute_from_config( elif default_value is not None: setattr(instance, attribute, default_value) - _print_message_on_sagemaker_config_usage(current_value, config_value, config_path) + _log_sagemaker_config_single_substitution(current_value, config_value, config_path) return instance @@ -1259,7 +1223,7 @@ def resolve_nested_dict_value_from_config( elif default_value is not None: dictionary = set_nested_value(dictionary, nested_keys, default_value) - _print_message_on_sagemaker_config_usage(current_nested_value, config_value, config_path) + _log_sagemaker_config_single_substitution(current_nested_value, config_value, config_path) return dictionary @@ -1324,14 +1288,12 @@ def update_list_of_dicts_with_values_from_config( continue input_list[i] = dict_from_config - if unmodified_inputs_from_config: - print( - "[Sagemaker Config - applied value]\n", - "config key = {}\n".format(config_key_path), - "config value = {}\n".format(unmodified_inputs_from_config), - "source value = {}\n".format(inputs_copy), - "combined value that will be used = {}\n".format(input_list), - ) + _log_sagemaker_config_merge( + source_value=inputs_copy, + config_value=unmodified_inputs_from_config, + merged_source_and_config_value=input_list, + config_key_path=config_key_path, + ) def _validate_required_paths_in_a_dict(source_dict, required_key_paths: List[str] = None) -> bool: @@ -1392,19 +1354,11 @@ def update_nested_dictionary_with_values_from_config( # Don't need to print because no config value was used or defined return source_dict - if source_dict == inferred_config_dict: - # We didn't use any values from the config, but we should print if any of the config - # values were defined - _print_message_on_sagemaker_config_usage( - source_dict, original_config_dict_value, config_key_path - ) - else: - # Something from the config was merged in - print( - "[Sagemaker Config - applied value]\n", - "config key = {}\n".format(config_key_path), - "config value = {}\n".format(original_config_dict_value), - "source value = {}\n".format(source_dict), - "combined value that will be used = {}\n".format(inferred_config_dict), - ) + _log_sagemaker_config_merge( + source_value=source_dict, + config_value=original_config_dict_value, + merged_source_and_config_value=inferred_config_dict, + config_key_path=config_key_path, + ) + return inferred_config_dict