From 9f15206abfaf9514fcedce8b35975ece0d947b35 Mon Sep 17 00:00:00 2001 From: chiragvp-aws Date: Thu, 2 Oct 2025 20:17:34 +0000 Subject: [PATCH 1/2] feature: Added condition to allow eval recipe. --- src/sagemaker/pytorch/estimator.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/pytorch/estimator.py b/src/sagemaker/pytorch/estimator.py index 9e2f0f0dd4..dc58e0185d 100644 --- a/src/sagemaker/pytorch/estimator.py +++ b/src/sagemaker/pytorch/estimator.py @@ -162,6 +162,21 @@ def _is_nova_recipe(recipe): return bool(has_nova_model) or bool(has_distillation) +def _is_eval_recipe(recipe): + """Check if the recipe is an eval recipe. + + An eval recipe is identified by: + 1. Having a evaluation section + + Args: + recipe (OmegaConf): The loaded recipe configuration + + Returns: + bool: True if the recipe is an eval recipe, False otherwise + """ + # Check for eval model + eval_config = recipe.get("evaluation", {}) + return bool(eval_config) def _recipe_initialize_args(source_dir): """Initialize the arguments dictionary for recipe setup. @@ -949,7 +964,7 @@ def _device_validate_and_get_type(kwargs, recipe): if "instance_type" not in kwargs: raise ValueError("Must pass instance type to estimator when using training recipes.") - if not _is_nova_recipe(recipe) and "trainer" not in recipe: + if not _is_nova_recipe(recipe) and "trainer" not in recipe and not _is_eval_recipe(recipe): raise ValueError("Supplied recipe does not contain required field trainer.") instance_type = kwargs["instance_type"].split(".")[1] @@ -973,7 +988,7 @@ def _device_handle_instance_count(kwargs, recipe): """ # Check if instance_count is already provided in kwargs - is_nova = _is_nova_recipe(recipe) + is_nova_or_eval = _is_nova_recipe(recipe) or _is_eval_recipe(recipe) if "instance_count" in kwargs: # Warn if there are conflicting configurations in the recipe if "num_nodes" in recipe.get("trainer", {}): @@ -981,7 +996,7 @@ def _device_handle_instance_count(kwargs, recipe): "Using instance_count argument to estimator to set number " "of nodes. Ignoring trainer -> num_nodes in recipe." ) - if is_nova and "replicas" in recipe.get("run", {}): + if is_nova_or_eval and "replicas" in recipe.get("run", {}): logger.warning( "Using instance_count argument to estimator to set number " "of nodes. Ignoring run -> replicas in recipe." @@ -993,7 +1008,7 @@ def _device_handle_instance_count(kwargs, recipe): kwargs["instance_count"] = recipe["trainer"]["num_nodes"] return - if is_nova and "run" in recipe and "replicas" in recipe["run"]: + if is_nova_or_eval and "run" in recipe and "replicas" in recipe["run"]: kwargs["instance_count"] = recipe["run"]["replicas"] return From 7005516c0f0560776eb01b112aef91732d502a80 Mon Sep 17 00:00:00 2001 From: chiragvp-aws Date: Fri, 3 Oct 2025 16:41:59 +0000 Subject: [PATCH 2/2] fix: codestyle issue --- src/sagemaker/pytorch/estimator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/pytorch/estimator.py b/src/sagemaker/pytorch/estimator.py index dc58e0185d..611f6757b0 100644 --- a/src/sagemaker/pytorch/estimator.py +++ b/src/sagemaker/pytorch/estimator.py @@ -162,6 +162,7 @@ def _is_nova_recipe(recipe): return bool(has_nova_model) or bool(has_distillation) + def _is_eval_recipe(recipe): """Check if the recipe is an eval recipe. @@ -178,6 +179,7 @@ def _is_eval_recipe(recipe): eval_config = recipe.get("evaluation", {}) return bool(eval_config) + def _recipe_initialize_args(source_dir): """Initialize the arguments dictionary for recipe setup. @@ -964,7 +966,7 @@ def _device_validate_and_get_type(kwargs, recipe): if "instance_type" not in kwargs: raise ValueError("Must pass instance type to estimator when using training recipes.") - if not _is_nova_recipe(recipe) and "trainer" not in recipe and not _is_eval_recipe(recipe): + if not _is_nova_recipe(recipe) and "trainer" not in recipe and not _is_eval_recipe(recipe): raise ValueError("Supplied recipe does not contain required field trainer.") instance_type = kwargs["instance_type"].split(".")[1]