From a07a962c8bccb690a2da6c92a4198140fdc96158 Mon Sep 17 00:00:00 2001 From: Payton Staub Date: Tue, 9 Nov 2021 10:55:22 -0800 Subject: [PATCH 1/3] fix: Prevent repack_model script from referencing nonexistent directories --- src/sagemaker/workflow/_repack_model.py | 31 +++++++++++++++---------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/sagemaker/workflow/_repack_model.py b/src/sagemaker/workflow/_repack_model.py index 60b74d66c7..c8e64fccd0 100644 --- a/src/sagemaker/workflow/_repack_model.py +++ b/src/sagemaker/workflow/_repack_model.py @@ -62,15 +62,15 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None): with tarfile.open(name=local_path, mode="r:gz") as tf: tf.extractall(path=src_dir) - # copy the custom inference script to code/ - entry_point = os.path.join("/opt/ml/code", inference_script) - shutil.copy2(entry_point, os.path.join(src_dir, "code", inference_script)) - - # copy source_dir to code/ if source_dir: + # copy /opt/ml/code to code/ if os.path.exists(code_dir): shutil.rmtree(code_dir) - shutil.copytree(source_dir, code_dir) + shutil.copytree("/opt/ml/code", code_dir) + else: + # copy the custom inference script to code/ + entry_point = os.path.join("/opt/ml/code", inference_script) + shutil.copy2(entry_point, os.path.join(code_dir, inference_script)) # copy any dependencies to code/lib/ if dependencies: @@ -79,13 +79,20 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None): lib_dir = os.path.join(code_dir, "lib") if not os.path.exists(lib_dir): os.mkdir(lib_dir) - if os.path.isdir(actual_dependency_path): - shutil.copytree( - actual_dependency_path, - os.path.join(lib_dir, os.path.basename(actual_dependency_path)), - ) - else: + if os.path.isfile(actual_dependency_path): shutil.copy2(actual_dependency_path, lib_dir) + else: + if os.path.exists(lib_dir): + shutil.rmtree(lib_dir) + # a directory is in the dependencies. we have no choice but to copy + # all of /opt/ml/code into the lib dir because the original directory + # was flattened by the SDK training job upload.. + shutil.copytree("/opt/ml/code", lib_dir) + break + # shutil.copytree( + # actual_dependency_path, + # os.path.join(lib_dir, os.path.basename(actual_dependency_path)), + # ) # copy the "src" dir, which includes the previous training job's model and the # custom inference script, to the output of this training job From 120f1871b5ac3976e25fb7ec791438e6bc305f32 Mon Sep 17 00:00:00 2001 From: Payton Staub Date: Tue, 9 Nov 2021 11:19:35 -0800 Subject: [PATCH 2/3] Update unit tests --- tests/unit/sagemaker/workflow/test_repack_model_script.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/sagemaker/workflow/test_repack_model_script.py b/tests/unit/sagemaker/workflow/test_repack_model_script.py index 67c8231dcc..69c9e7b740 100644 --- a/tests/unit/sagemaker/workflow/test_repack_model_script.py +++ b/tests/unit/sagemaker/workflow/test_repack_model_script.py @@ -94,7 +94,7 @@ def test_repack_with_dependencies(tmp): _repack_model.repack( inference_script="inference.py", model_archive=model_tar_name, - dependencies=["dependencies/a", "bb", "dependencies/some/dir"], + dependencies="dependencies/a bb dependencies/some/dir", ) # /opt/ml/model should now have the original model and the inference script @@ -145,7 +145,7 @@ def test_repack_with_source_dir_and_dependencies(tmp): _repack_model.repack( inference_script="inference.py", model_archive=model_tar_name, - dependencies=["dependencies/a", "bb", "dependencies/some/dir"], + dependencies="dependencies/a bb dependencies/some/dir", source_dir="sourcedir", ) From 2ba3604a358d3ba918e7d6d7458e332de8d6e520 Mon Sep 17 00:00:00 2001 From: Payton Staub Date: Tue, 9 Nov 2021 11:21:06 -0800 Subject: [PATCH 3/3] Remove comment --- src/sagemaker/workflow/_repack_model.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/sagemaker/workflow/_repack_model.py b/src/sagemaker/workflow/_repack_model.py index c8e64fccd0..6ce7e41831 100644 --- a/src/sagemaker/workflow/_repack_model.py +++ b/src/sagemaker/workflow/_repack_model.py @@ -84,15 +84,11 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None): else: if os.path.exists(lib_dir): shutil.rmtree(lib_dir) - # a directory is in the dependencies. we have no choice but to copy + # a directory is in the dependencies. we have to copy # all of /opt/ml/code into the lib dir because the original directory # was flattened by the SDK training job upload.. shutil.copytree("/opt/ml/code", lib_dir) break - # shutil.copytree( - # actual_dependency_path, - # os.path.join(lib_dir, os.path.basename(actual_dependency_path)), - # ) # copy the "src" dir, which includes the previous training job's model and the # custom inference script, to the output of this training job