From 10d27c517e202e17fb61582cd1631b0c37032fa2 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Wed, 5 Jun 2019 17:15:39 -0700
Subject: [PATCH 01/19] add git_config and validate method

---
 src/sagemaker/estimator.py   | 55 +++++++++++++++++++++++++++++++++---
 src/sagemaker/fw_utils.py    | 22 +++++++++++++++
 tests/unit/test_estimator.py | 13 +++++++++
 tests/unit/test_fw_utils.py  | 14 +++++++++
 4 files changed, 100 insertions(+), 4 deletions(-)

diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index 6ff4d7b06e..b8f5abd524 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -15,6 +15,8 @@
 import json
 import logging
 import os
+import subprocess
+import tempfile
 import warnings
 from abc import ABCMeta
 from abc import abstractmethod
@@ -24,7 +26,7 @@
 import sagemaker
 from sagemaker.analytics import TrainingJobAnalytics
 from sagemaker.fw_utils import (create_image_uri, tar_and_upload_dir, parse_s3_url, UploadedCode,
-                                validate_source_dir)
+                                validate_source_dir, validate_git_config)
 from sagemaker.job import _Job
 from sagemaker.local import LocalSession
 from sagemaker.model import Model, NEO_ALLOWED_TARGET_INSTANCE_FAMILY, NEO_ALLOWED_FRAMEWORKS
@@ -771,13 +773,17 @@ class Framework(EstimatorBase):
     MPI_NUM_PROCESSES_PER_HOST = 'sagemaker_mpi_num_of_processes_per_host'
     MPI_CUSTOM_MPI_OPTIONS = 'sagemaker_mpi_custom_mpi_options'
 
-    def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cloudwatch_metrics=False,
-                 container_log_level=logging.INFO, code_location=None, image_name=None, dependencies=None, **kwargs):
+    def __init__(self, entry_point, git_config=None, source_dir=None, hyperparameters=None,
+                 enable_cloudwatch_metrics=False, container_log_level=logging.INFO, code_location=None,
+                 image_name=None, dependencies=None, **kwargs):
         """Base class initializer. Subclasses which override ``__init__`` should invoke ``super()``
 
         Args:
-            entry_point (str): Path (absolute or relative) to the local Python source file which should be executed
+            entry_point (str): Path (absolute or relative) to either: 1. the local Python source file if git_support
+                is False 2. the Python source file in Git repo if git_support is True, which should be executed
                 as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5.
+            git_config (dict[str, str]): Git configurations used for cloning files, including 'repo', 'branch'
+                and 'commit' for now (default: None).
             source_dir (str): Path (absolute or relative) to a directory with any other training
                 source code dependencies aside from the entry point file (default: None). Structure within this
                 directory are preserved when training on Amazon SageMaker.
@@ -815,9 +821,11 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
             **kwargs: Additional kwargs passed to the ``EstimatorBase`` constructor.
         """
         super(Framework, self).__init__(**kwargs)
+
         if entry_point.startswith('s3://'):
             raise ValueError('Invalid entry point script: {}. Must be a path to a local file.'.format(entry_point))
         self.entry_point = entry_point
+        self.git_config = git_config
         self.source_dir = source_dir
         self.dependencies = dependencies or []
         if enable_cloudwatch_metrics:
@@ -830,6 +838,45 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
 
         self._hyperparameters = hyperparameters or {}
 
+    def _git_clone_code(self):
+        """Git clone repo containing the training scripts.
+
+        This method also validate ``git_config``.
+        Set ``entry_point`` and ``source_dir`` to the right file or directory in the repo cloned.
+
+
+        """
+        validate_git_config(self.git_config)
+        # create a temporary directory to store the cloned repo
+        repo_dir = tempfile.mkdtemp()
+        try:
+            subprocess.check_call(['git', 'clone', self.git_config['repo'], repo_dir])
+        except subprocess.CalledProcessError:
+            raise ValueError('Failed to clone git repo.')
+
+        # checkout the specified branch and commit
+        os.chdir(repo_dir)
+        try:
+            subprocess.check_call(['git', 'checkout', self.git_config['branch']])
+        except subprocess.CalledProcessError:
+            raise ValueError('Failed to checkout the required branch.')
+        try:
+            subprocess.check_call(['git', 'checkout', self.git_config['commit']])
+        except subprocess.CalledProcessError:
+            raise ValueError('Failed to checkout the required commit.')
+
+        # check if the cloned repo contains entry point and source dir; if so, set ``entry_point`` and
+        # ``source_dir`` to the paths to local file system.
+        if not os.path.isfile(os.path.join(repo_dir, self.entry_point)):
+            raise ValueError('Entry point does not exist in the repo.')
+        else:
+            self.entry_point = os.path.join(repo_dir, self.entry_point)
+        if self.source_dir:
+            if not os.path.isdir(os.path.join(repo_dir, self.source_dir)):
+                raise ValueError('Source does not exist in the repo.')
+            else:
+                self.source_dir = os.path.join(repo_dir, self.source_dir)
+
     def _prepare_for_training(self, job_name=None):
         """Set hyperparameters needed for training. This method will also validate ``source_dir``.
 
diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py
index f186fb8780..05ed4a894a 100644
--- a/src/sagemaker/fw_utils.py
+++ b/src/sagemaker/fw_utils.py
@@ -126,6 +126,28 @@ def _accelerator_type_valid_for_framework(framework, accelerator_type=None, opti
     return True
 
 
+def validate_git_config(git_config):
+    """check if a git_config param is valid
+
+    Args:
+        git_config (dict[str, str]): Git configurations used for cloning files, including 'repo', 'branch',
+            and 'commit' for now.
+
+    Raises:
+        ValueError: If:
+            1. git_config has no key 'repo'
+            2. git_config['repo'] is in the wrong format.
+    """
+    if 'repo' not in git_config:
+        raise ValueError('Please provide a repo for git_config.')
+    codecommit_url = git_config['repo'].startswith('https://git-codecommit') \
+        or git_config['repo'].startswith('ssh://git-codecommit')
+    github_url = git_config['repo'].startswith('https://github') \
+        or git_config['repo'].startswith('git@github')
+    if not codecommit_url and not github_url:
+        raise ValueError('Please provide a valid git repo url.')
+
+
 def validate_source_dir(script, directory):
     """Validate that the source directory exists and it contains the user script
 
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index c5d7a9f426..a3e34396e2 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -596,6 +596,19 @@ def test_prepare_for_training_force_name_generation(strftime, sagemaker_session)
     assert JOB_NAME == fw._current_job_name
 
 
+def test_git_clone_code_succeed(sagemaker_session):
+    git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
+                  'branch': 'branch1',
+                  'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
+    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_config,
+                        source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
+                        train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
+                        enable_cloudwatch_metrics=True)
+    fw._git_clone_code()
+    assert os.path.isfile(fw.entry_point)
+    assert os.path.isdir(fw.source_dir)
+
+
 @patch('time.strftime', return_value=TIMESTAMP)
 def test_init_with_source_dir_s3(strftime, sagemaker_session):
     fw = DummyFramework(entry_point=SCRIPT_PATH, source_dir='s3://location', role=ROLE,
diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py
index ed5e97b631..3a598fb0fc 100644
--- a/tests/unit/test_fw_utils.py
+++ b/tests/unit/test_fw_utils.py
@@ -120,6 +120,20 @@ def test_create_image_uri_local_sagemaker_notebook_accelerator():
     assert image_uri == '520713654638.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mxnet-eia:1.0rc-gpu-py3'
 
 
+def test_validate_git_config_repo_not_provided():
+    git_config = {'branch': 'master', 'username': 'User1', 'password': 'passw0rd'}
+    with pytest.raises(ValueError) as error:
+        fw_utils.validate_git_config(git_config)
+    assert 'Please provide a repo for git_config.' in str(error)
+
+
+def test_validate_git_config_bad_repo_url():
+    git_config = {'repo': 'hhttps://github.com/user/repo.git', 'branch': 'master', 'password': 'passw0rd'}
+    with pytest.raises(ValueError) as error:
+        fw_utils.validate_git_config(git_config)
+    assert 'Please provide a valid git repo url.' in str(error)
+
+
 def test_invalid_accelerator():
     error_message = '{} is not a valid SageMaker Elastic Inference accelerator type.'.format(MOCK_ACCELERATOR)
     # accelerator type is missing 'ml.' prefix

From 6b78ed4be5beed9196f090d303b69d66d10a5960 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Thu, 6 Jun 2019 11:37:48 -0700
Subject: [PATCH 02/19] modify the order of git_config, add tests

---
 src/sagemaker/estimator.py   |  6 ++--
 tests/unit/test_estimator.py | 62 ++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index b8f5abd524..5b88304a20 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -773,9 +773,9 @@ class Framework(EstimatorBase):
     MPI_NUM_PROCESSES_PER_HOST = 'sagemaker_mpi_num_of_processes_per_host'
     MPI_CUSTOM_MPI_OPTIONS = 'sagemaker_mpi_custom_mpi_options'
 
-    def __init__(self, entry_point, git_config=None, source_dir=None, hyperparameters=None,
+    def __init__(self, entry_point, source_dir=None, hyperparameters=None,
                  enable_cloudwatch_metrics=False, container_log_level=logging.INFO, code_location=None,
-                 image_name=None, dependencies=None, **kwargs):
+                 image_name=None, dependencies=None, git_config=None, **kwargs):
         """Base class initializer. Subclasses which override ``__init__`` should invoke ``super()``
 
         Args:
@@ -873,7 +873,7 @@ def _git_clone_code(self):
             self.entry_point = os.path.join(repo_dir, self.entry_point)
         if self.source_dir:
             if not os.path.isdir(os.path.join(repo_dir, self.source_dir)):
-                raise ValueError('Source does not exist in the repo.')
+                raise ValueError('Source directory does not exist in the repo.')
             else:
                 self.source_dir = os.path.join(repo_dir, self.source_dir)
 
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index a3e34396e2..ac374a1da5 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -609,6 +609,68 @@ def test_git_clone_code_succeed(sagemaker_session):
     assert os.path.isdir(fw.source_dir)
 
 
+def test_git_clone_code_clone_fail(sagemaker_session):
+    git_config = {'repo': 'https://github.com/GaryTu1020/no-such-repo.git', 'branch': 'master'}
+    fw = DummyFramework(entry_point='entry_point', git_config=git_config, role=ROLE,
+                        sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT,
+                        train_instance_type=INSTANCE_TYPE, enable_cloudwatch_metrics=True)
+    with pytest.raises(ValueError) as error:
+        fw._git_clone_code()
+    assert 'Failed to clone git repo.' in str(error)
+
+
+def test_git_clone_code_branch_not_exist(sagemaker_session):
+    git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
+                  'branch': 'branch-that-does-not-exist',
+                  'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
+    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_config,
+                        source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
+                        train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
+                        enable_cloudwatch_metrics=True)
+    with pytest.raises(ValueError) as error:
+        fw._git_clone_code()
+    assert 'Failed to checkout the required branch.' in str(error)
+
+
+def test_git_clone_code_commit_not_exist(sagemaker_session):
+    git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
+                  'branch': 'master',
+                  'commit': 'commit-sha-that-does-not-exist'}
+    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_config,
+                        source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
+                        train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
+                        enable_cloudwatch_metrics=True)
+    with pytest.raises(ValueError) as error:
+        fw._git_clone_code()
+    assert 'Failed to checkout the required commit.' in str(error)
+
+
+def test_git_clone_code_entry_point_not_exist(sagemaker_session):
+    git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
+                  'branch': 'branch1',
+                  'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
+    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
+                        source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
+                        train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
+                        enable_cloudwatch_metrics=True)
+    with pytest.raises(ValueError) as error:
+        fw._git_clone_code()
+    assert 'Entry point does not exist in the repo.' in str(error)
+
+
+def test_git_clone_code_source_dir_not_exist(sagemaker_session):
+    git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
+                  'branch': 'branch1',
+                  'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
+    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_config,
+                        source_dir='source_dir_that_does_not_exist', role=ROLE,
+                        sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT,
+                        train_instance_type=INSTANCE_TYPE, enable_cloudwatch_metrics=True)
+    with pytest.raises(ValueError) as error:
+        fw._git_clone_code()
+    assert 'Source directory does not exist in the repo.' in str(error)
+
+
 @patch('time.strftime', return_value=TIMESTAMP)
 def test_init_with_source_dir_s3(strftime, sagemaker_session):
     fw = DummyFramework(entry_point=SCRIPT_PATH, source_dir='s3://location', role=ROLE,

From e59bb7922b80f82849d96a3425ac1021ffc414f4 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Sat, 8 Jun 2019 00:44:17 -0700
Subject: [PATCH 03/19] move validate_git_config, add integ test

---
 src/sagemaker/estimator.py   | 134 +++++++++++++++++++++++++++--------
 src/sagemaker/fw_utils.py    |  22 ------
 tests/integ/test_git.py      |  56 +++++++++++++++
 tests/unit/test_estimator.py |  92 ++++++++++++++++++------
 tests/unit/test_fw_utils.py  |  14 ----
 5 files changed, 229 insertions(+), 89 deletions(-)
 create mode 100644 tests/integ/test_git.py

diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index 5b88304a20..be3f541446 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -26,7 +26,7 @@
 import sagemaker
 from sagemaker.analytics import TrainingJobAnalytics
 from sagemaker.fw_utils import (create_image_uri, tar_and_upload_dir, parse_s3_url, UploadedCode,
-                                validate_source_dir, validate_git_config)
+                                validate_source_dir)
 from sagemaker.job import _Job
 from sagemaker.local import LocalSession
 from sagemaker.model import Model, NEO_ALLOWED_TARGET_INSTANCE_FAMILY, NEO_ALLOWED_FRAMEWORKS
@@ -779,14 +779,25 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None,
         """Base class initializer. Subclasses which override ``__init__`` should invoke ``super()``
 
         Args:
-            entry_point (str): Path (absolute or relative) to either: 1. the local Python source file if git_support
-                is False 2. the Python source file in Git repo if git_support is True, which should be executed
-                as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5.
+            entry_point (str): Path (absolute or relative) to either: 1. the local Python source file if git_config
+                is not provided. 2. the Python source file in Git repo if git_config is provided, which should be
+                executed as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5.
             git_config (dict[str, str]): Git configurations used for cloning files, including 'repo', 'branch'
-                and 'commit' for now (default: None).
+                and 'commit' (default: None).
+                'branch' and 'commit' are optional. If 'branch' is not specified, 'master' branch will be used. If
+                'commit' is not specified, the latest commit in the required branch will be used.
+                Example:
+
+                    The following config:
+                    >>> git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
+                    >>>               'branch': 'master',
+                    >>>               'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
+                    results in cloning the repo specified in 'repo', then checkout the 'master' branch, and checkout
+                    the specified commit.
             source_dir (str): Path (absolute or relative) to a directory with any other training
-                source code dependencies aside from the entry point file (default: None). Structure within this
-                directory are preserved when training on Amazon SageMaker.
+                source code dependencies aside from the entry point file (default: None). If git_config is not provided,
+                this should be a local path; if git_config is provided, this should be a path in the Git repo.
+                Structure within this directory are preserved when training on Amazon SageMaker.
             dependencies (list[str]): A list of paths to directories (absolute or relative) with
                 any additional libraries that will be exported to the container (default: []).
                 The library folders will be copied to SageMaker in the same folder where the entrypoint is copied.
@@ -838,44 +849,107 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None,
 
         self._hyperparameters = hyperparameters or {}
 
-    def _git_clone_code(self):
-        """Git clone repo containing the training scripts.
+    def fit(self, inputs=None, wait=True, logs=True, job_name=None):
+        """Train a model using the input training dataset. If gif_config provided, the method does git clone first.
 
-        This method also validate ``git_config``.
-        Set ``entry_point`` and ``source_dir`` to the right file or directory in the repo cloned.
+        The API calls the Amazon SageMaker CreateTrainingJob API to start model training.
+        The API uses configuration you provided to create the estimator and the
+        specified input training data to send the CreatingTrainingJob request to Amazon SageMaker.
+
+        This is a synchronous operation. After the model training successfully completes,
+        you can call the ``deploy()`` method to host the model using the Amazon SageMaker hosting services.
+
+        Args:
+            inputs (str or dict or sagemaker.session.s3_input): Information about the training data.
+                This can be one of three types:
 
+                * (str) the S3 location where training data is saved.
 
+                * (dict[str, str] or dict[str, sagemaker.session.s3_input]) If using multiple channels for
+                    training data, you can specify a dict mapping channel names
+                    to strings or :func:`~sagemaker.session.s3_input` objects.
+                * (sagemaker.session.s3_input) - channel configuration for S3 data sources that can provide
+                    additional information as well as the path to the training dataset.
+                    See :func:`sagemaker.session.s3_input` for full details.
+            wait (bool): Whether the call should wait until the job completes (default: True).
+            logs (bool): Whether to show the logs produced by the job.
+                Only meaningful when wait is True (default: True).
+            job_name (str): Training job name. If not specified, the estimator generates a default job name,
+                based on the training image name and current timestamp.
         """
-        validate_git_config(self.git_config)
+        if self.git_config:
+            self._git_clone_code()
+        super(Framework, self).fit(inputs=inputs, wait=wait, logs=logs, job_name=job_name)
+
+    def _git_clone_code(self):
+        """Git clone repo containing the training scripts. This method also validate ``git_config``,
+        and set ``entry_point`` and ``source_dir`` to the right file or directory in the repo cloned.
+
+        Raises:
+            CalledProcessError: If 1. failed to clone git repo
+                                   2. failed to checkout the required branch
+                                   3. failed to checkout the required commit
+            ValueError: If 1. entry point specified does not exist in the repo
+                           2. source dir specified does not exist in the repo
+        """
+        self._validate_git_config()
         # create a temporary directory to store the cloned repo
         repo_dir = tempfile.mkdtemp()
         try:
             subprocess.check_call(['git', 'clone', self.git_config['repo'], repo_dir])
         except subprocess.CalledProcessError:
-            raise ValueError('Failed to clone git repo.')
+            raise subprocess.CalledProcessError(1, cmd='git clone {} {}'.format(self.git_config['repo'], repo_dir))
 
-        # checkout the specified branch and commit
-        os.chdir(repo_dir)
-        try:
-            subprocess.check_call(['git', 'checkout', self.git_config['branch']])
-        except subprocess.CalledProcessError:
-            raise ValueError('Failed to checkout the required branch.')
-        try:
-            subprocess.check_call(['git', 'checkout', self.git_config['commit']])
-        except subprocess.CalledProcessError:
-            raise ValueError('Failed to checkout the required commit.')
+        self._checkout_branch_and_commit(repo_dir)
 
         # check if the cloned repo contains entry point and source dir; if so, set ``entry_point`` and
         # ``source_dir`` to the paths to local file system.
-        if not os.path.isfile(os.path.join(repo_dir, self.entry_point)):
-            raise ValueError('Entry point does not exist in the repo.')
-        else:
-            self.entry_point = os.path.join(repo_dir, self.entry_point)
         if self.source_dir:
-            if not os.path.isdir(os.path.join(repo_dir, self.source_dir)):
-                raise ValueError('Source directory does not exist in the repo.')
-            else:
+            if os.path.isdir(os.path.join(repo_dir, self.source_dir)):
                 self.source_dir = os.path.join(repo_dir, self.source_dir)
+                os.chdir(self.source_dir)
+            else:
+                raise ValueError('Source directory does not exist in the repo.')
+            if not os.path.isfile(os.path.join(self.source_dir, self.entry_point)):
+                raise ValueError('Entry point does not exist in the repo.')
+
+    def _checkout_branch_and_commit(self, repo_dir):
+        """Enter the directory where the repo is cloned, and  checkout the required branch and commit.
+
+        Args:
+            repo_dir: the directory where the repo is cloned
+
+        Raises:
+            ValueError: If 1. entry point specified does not exist in the repo
+                           2. source dir specified does not exist in the repo
+        """
+        os.chdir(repo_dir)
+        if 'branch' in self.git_config:
+            try:
+                subprocess.check_call(['git', 'checkout', self.git_config['branch']])
+            except subprocess.CalledProcessError:
+                raise subprocess.CalledProcessError(1, cmd='git checkout {}'.format(self.git_config['branch']))
+        if 'commit' in self.git_config:
+            try:
+                subprocess.check_call(['git', 'checkout', self.git_config['commit']])
+            except subprocess.CalledProcessError:
+                raise subprocess.CalledProcessError(1, cmd='git checkout {}'.format(self.git_config['commit']))
+
+    def _validate_git_config(self):
+        """check if a git_config param is valid
+
+        Raises:
+            ValueError: If:
+                1. git_config has no key 'repo'
+                2. git_config['repo'] is in the wrong format.
+        """
+        if 'repo' not in self.git_config:
+            raise ValueError('Please provide a repo for git_config.')
+        repo = self.git_config['repo']
+        codecommit_url = repo.startswith('https://git-codecommit') or repo.startswith('ssh://git-codecommit')
+        github_url = repo.startswith('https://github') or repo.startswith('git@github')
+        if not codecommit_url and not github_url:
+            raise ValueError('Please provide a valid git repo url.')
 
     def _prepare_for_training(self, job_name=None):
         """Set hyperparameters needed for training. This method will also validate ``source_dir``.
diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py
index 05ed4a894a..f186fb8780 100644
--- a/src/sagemaker/fw_utils.py
+++ b/src/sagemaker/fw_utils.py
@@ -126,28 +126,6 @@ def _accelerator_type_valid_for_framework(framework, accelerator_type=None, opti
     return True
 
 
-def validate_git_config(git_config):
-    """check if a git_config param is valid
-
-    Args:
-        git_config (dict[str, str]): Git configurations used for cloning files, including 'repo', 'branch',
-            and 'commit' for now.
-
-    Raises:
-        ValueError: If:
-            1. git_config has no key 'repo'
-            2. git_config['repo'] is in the wrong format.
-    """
-    if 'repo' not in git_config:
-        raise ValueError('Please provide a repo for git_config.')
-    codecommit_url = git_config['repo'].startswith('https://git-codecommit') \
-        or git_config['repo'].startswith('ssh://git-codecommit')
-    github_url = git_config['repo'].startswith('https://github') \
-        or git_config['repo'].startswith('git@github')
-    if not codecommit_url and not github_url:
-        raise ValueError('Please provide a valid git repo url.')
-
-
 def validate_source_dir(script, directory):
     """Validate that the source directory exists and it contains the user script
 
diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py
new file mode 100644
index 0000000000..b7454ed428
--- /dev/null
+++ b/tests/integ/test_git.py
@@ -0,0 +1,56 @@
+# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import os
+
+import numpy
+
+from sagemaker.pytorch.estimator import PyTorch
+from sagemaker.pytorch.model import PyTorchModel
+from sagemaker.utils import sagemaker_timestamp
+from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES
+from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
+
+
+def test_git_support_with_pytorch(sagemaker_local_session):
+    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
+        script_path = 'mnist.py'
+        data_path = os.path.join(DATA_DIR, 'pytorch_mnist')
+        git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
+                      'branch': 'branch1',
+                      'commit': '1867259a76ee740b99ce7ab00d6a32b582c85e06'}
+        pytorch = PyTorch(entry_point=script_path, role='SageMakerRole', source_dir='pytorch',
+                          framework_version=PyTorch.LATEST_VERSION, py_version=PYTHON_VERSION,
+                          train_instance_count=1, train_instance_type='ml.c4.xlarge',
+                          sagemaker_session=sagemaker_local_session, git_config=git_config)
+
+        train_input = pytorch.sagemaker_session.upload_data(path=os.path.join(data_path, 'training'),
+                                                            key_prefix='integ-test-data/pytorch_mnist/training')
+        pytorch.fit({'training': train_input})
+
+    files = [file for file in os.listdir(pytorch.source_dir)]
+    assert files == ['some-file', 'mnist.py']
+
+    endpoint_name = 'test-git_support-with-pytorch-{}'.format(sagemaker_timestamp())
+
+    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_local_session):
+        desc = sagemaker_local_session.sagemaker_client.describe_training_job(pytorch.latest_training_job.name)
+        model_data = desc['ModelArtifacts']['S3ModelArtifacts']
+        model = PyTorchModel(model_data, 'SageMakerRole', entry_point=script_path,
+                             sagemaker_session=sagemaker_local_session)
+        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
+
+        data = numpy.zeros(shape=(1, 1, 28, 28))
+        result = predictor.predict(data)
+        assert result is not None
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index ac374a1da5..381240e4d9 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -15,6 +15,7 @@
 import logging
 import json
 import os
+import subprocess
 from time import sleep
 
 import pytest
@@ -596,78 +597,123 @@ def test_prepare_for_training_force_name_generation(strftime, sagemaker_session)
     assert JOB_NAME == fw._current_job_name
 
 
-def test_git_clone_code_succeed(sagemaker_session):
+def test_git_support_with_branch_and_commit_succeed(sagemaker_session):
     git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
                   'branch': 'branch1',
                   'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
-    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_config,
+    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
+                        source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
+                        train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
+                        enable_cloudwatch_metrics=True)
+    fw.fit()
+    assert os.path.isfile(fw.entry_point)
+    assert os.path.isdir(fw.source_dir)
+
+
+def test_git_support_with_branch_succeed(sagemaker_session):
+    git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
+                  'branch': 'branch1'}
+    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
                         source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
-    fw._git_clone_code()
+    fw.fit()
     assert os.path.isfile(fw.entry_point)
     assert os.path.isdir(fw.source_dir)
 
 
-def test_git_clone_code_clone_fail(sagemaker_session):
+def test_git_support_without_branch_and_commit_succeed(sagemaker_session):
+    git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git'}
+    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
+                        role=ROLE, sagemaker_session=sagemaker_session,
+                        train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
+                        enable_cloudwatch_metrics=True)
+    fw.fit()
+    assert os.path.isfile(fw.entry_point)
+
+
+def test_git_support_repo_not_provided(sagemaker_session):
+    git_config = {'branch': 'master',
+                  'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
+    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
+                        source_dir='source_dir', role=ROLE,
+                        sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT,
+                        train_instance_type=INSTANCE_TYPE, enable_cloudwatch_metrics=True)
+    with pytest.raises(ValueError) as error:
+        fw.fit()
+    assert 'Please provide a repo for git_config.' in str(error)
+
+
+def test_git_support_bad_repo_url_format(sagemaker_session):
+    git_config = {'repo': 'hhttps://github.com/user/repo.git', 'branch': 'master', 'password': 'passw0rd'}
+    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
+                        source_dir='source_dir', role=ROLE,
+                        sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT,
+                        train_instance_type=INSTANCE_TYPE, enable_cloudwatch_metrics=True)
+    with pytest.raises(ValueError) as error:
+        fw.fit()
+    assert 'Please provide a valid git repo url.' in str(error)
+
+
+def test_git_support_git_clone_fail(sagemaker_session):
     git_config = {'repo': 'https://github.com/GaryTu1020/no-such-repo.git', 'branch': 'master'}
     fw = DummyFramework(entry_point='entry_point', git_config=git_config, role=ROLE,
                         sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT,
                         train_instance_type=INSTANCE_TYPE, enable_cloudwatch_metrics=True)
-    with pytest.raises(ValueError) as error:
-        fw._git_clone_code()
-    assert 'Failed to clone git repo.' in str(error)
+    with pytest.raises(subprocess.CalledProcessError) as error:
+        fw.fit()
+    assert "Command 'git clone" in str(error)
 
 
-def test_git_clone_code_branch_not_exist(sagemaker_session):
+def test_git_support_branch_not_exist(sagemaker_session):
     git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
                   'branch': 'branch-that-does-not-exist',
                   'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
-    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_config,
+    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
                         source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
-    with pytest.raises(ValueError) as error:
-        fw._git_clone_code()
-    assert 'Failed to checkout the required branch.' in str(error)
+    with pytest.raises(subprocess.CalledProcessError) as error:
+        fw.fit()
+    assert "Command 'git checkout" in str(error)
 
 
-def test_git_clone_code_commit_not_exist(sagemaker_session):
+def test_git_support_commit_not_exist(sagemaker_session):
     git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
                   'branch': 'master',
                   'commit': 'commit-sha-that-does-not-exist'}
-    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_config,
+    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
                         source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
-    with pytest.raises(ValueError) as error:
-        fw._git_clone_code()
-    assert 'Failed to checkout the required commit.' in str(error)
+    with pytest.raises(subprocess.CalledProcessError) as error:
+        fw.fit()
+    assert "Command 'git checkout" in str(error)
 
 
-def test_git_clone_code_entry_point_not_exist(sagemaker_session):
+def test_git_support_entry_point_not_exist(sagemaker_session):
     git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
                   'branch': 'branch1',
                   'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
-    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
+    fw = DummyFramework(entry_point='entry_point_that_does_not_exist', git_config=git_config,
                         source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     with pytest.raises(ValueError) as error:
-        fw._git_clone_code()
+        fw.fit()
     assert 'Entry point does not exist in the repo.' in str(error)
 
 
-def test_git_clone_code_source_dir_not_exist(sagemaker_session):
+def test_git_support_source_dir_not_exist(sagemaker_session):
     git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
                   'branch': 'branch1',
                   'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
-    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_config,
+    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
                         source_dir='source_dir_that_does_not_exist', role=ROLE,
                         sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT,
                         train_instance_type=INSTANCE_TYPE, enable_cloudwatch_metrics=True)
     with pytest.raises(ValueError) as error:
-        fw._git_clone_code()
+        fw.fit()
     assert 'Source directory does not exist in the repo.' in str(error)
 
 
diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py
index 3a598fb0fc..ed5e97b631 100644
--- a/tests/unit/test_fw_utils.py
+++ b/tests/unit/test_fw_utils.py
@@ -120,20 +120,6 @@ def test_create_image_uri_local_sagemaker_notebook_accelerator():
     assert image_uri == '520713654638.dkr.ecr.mars-south-3.amazonaws.com/sagemaker-mxnet-eia:1.0rc-gpu-py3'
 
 
-def test_validate_git_config_repo_not_provided():
-    git_config = {'branch': 'master', 'username': 'User1', 'password': 'passw0rd'}
-    with pytest.raises(ValueError) as error:
-        fw_utils.validate_git_config(git_config)
-    assert 'Please provide a repo for git_config.' in str(error)
-
-
-def test_validate_git_config_bad_repo_url():
-    git_config = {'repo': 'hhttps://github.com/user/repo.git', 'branch': 'master', 'password': 'passw0rd'}
-    with pytest.raises(ValueError) as error:
-        fw_utils.validate_git_config(git_config)
-    assert 'Please provide a valid git repo url.' in str(error)
-
-
 def test_invalid_accelerator():
     error_message = '{} is not a valid SageMaker Elastic Inference accelerator type.'.format(MOCK_ACCELERATOR)
     # accelerator type is missing 'ml.' prefix

From 7808faa22babdfe0f78b88878a21c5d5202498e6 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Mon, 10 Jun 2019 15:06:05 -0700
Subject: [PATCH 04/19] modify location _git_clone_code called

---
 src/sagemaker/estimator.py   | 75 ++++++++++++++++++------------------
 tests/unit/test_estimator.py |  4 +-
 2 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index be3f541446..41e34c623e 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -779,9 +779,21 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None,
         """Base class initializer. Subclasses which override ``__init__`` should invoke ``super()``
 
         Args:
-            entry_point (str): Path (absolute or relative) to either: 1. the local Python source file if git_config
-                is not provided. 2. the Python source file in Git repo if git_config is provided, which should be
-                executed as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5.
+            entry_point (str): Path (absolute or relative) to the local Python source file which should be executed
+                as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5.
+                If 'git_config' is provided, 'entry_point' should be a relative location to the Python source file in
+                the Git repo.
+                Example:
+
+                    If the following is the structure of a Git repo:
+
+                    >>> |----- README.md
+                    >>> |----- src
+                    >>>         |----- train.py
+                    >>>         |----- test.py
+
+                    and we need 'train.py' as entry point, and 'source_dir' is not provided, then 'entry_point' should
+                    be 'src/train.py'.
             git_config (dict[str, str]): Git configurations used for cloning files, including 'repo', 'branch'
                 and 'commit' (default: None).
                 'branch' and 'commit' are optional. If 'branch' is not specified, 'master' branch will be used. If
@@ -789,15 +801,28 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None,
                 Example:
 
                     The following config:
+
                     >>> git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
                     >>>               'branch': 'master',
                     >>>               'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
+
                     results in cloning the repo specified in 'repo', then checkout the 'master' branch, and checkout
                     the specified commit.
             source_dir (str): Path (absolute or relative) to a directory with any other training
-                source code dependencies aside from the entry point file (default: None). If git_config is not provided,
-                this should be a local path; if git_config is provided, this should be a path in the Git repo.
-                Structure within this directory are preserved when training on Amazon SageMaker.
+                source code dependencies aside from the entry point file (default: None). Structure within this
+                directory are preserved when training on Amazon SageMaker. If 'git_config' is provided,
+                source_dir should be a relative location to a directory in the Git repo.
+                Example:
+
+                    If the following is the structure of a Git repo:
+
+                    >>> |----- README.md
+                    >>> |----- src
+                    >>>         |----- train.py
+                    >>>         |----- test.py
+
+                    and we need 'train.py' as entry point and 'src' as source dir, then 'entry_point' should be
+                    'train.py', 'source_dir' should be 'src'.
             dependencies (list[str]): A list of paths to directories (absolute or relative) with
                 any additional libraries that will be exported to the container (default: []).
                 The library folders will be copied to SageMaker in the same folder where the entrypoint is copied.
@@ -849,38 +874,6 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None,
 
         self._hyperparameters = hyperparameters or {}
 
-    def fit(self, inputs=None, wait=True, logs=True, job_name=None):
-        """Train a model using the input training dataset. If gif_config provided, the method does git clone first.
-
-        The API calls the Amazon SageMaker CreateTrainingJob API to start model training.
-        The API uses configuration you provided to create the estimator and the
-        specified input training data to send the CreatingTrainingJob request to Amazon SageMaker.
-
-        This is a synchronous operation. After the model training successfully completes,
-        you can call the ``deploy()`` method to host the model using the Amazon SageMaker hosting services.
-
-        Args:
-            inputs (str or dict or sagemaker.session.s3_input): Information about the training data.
-                This can be one of three types:
-
-                * (str) the S3 location where training data is saved.
-
-                * (dict[str, str] or dict[str, sagemaker.session.s3_input]) If using multiple channels for
-                    training data, you can specify a dict mapping channel names
-                    to strings or :func:`~sagemaker.session.s3_input` objects.
-                * (sagemaker.session.s3_input) - channel configuration for S3 data sources that can provide
-                    additional information as well as the path to the training dataset.
-                    See :func:`sagemaker.session.s3_input` for full details.
-            wait (bool): Whether the call should wait until the job completes (default: True).
-            logs (bool): Whether to show the logs produced by the job.
-                Only meaningful when wait is True (default: True).
-            job_name (str): Training job name. If not specified, the estimator generates a default job name,
-                based on the training image name and current timestamp.
-        """
-        if self.git_config:
-            self._git_clone_code()
-        super(Framework, self).fit(inputs=inputs, wait=wait, logs=logs, job_name=job_name)
-
     def _git_clone_code(self):
         """Git clone repo containing the training scripts. This method also validate ``git_config``,
         and set ``entry_point`` and ``source_dir`` to the right file or directory in the repo cloned.
@@ -912,6 +905,9 @@ def _git_clone_code(self):
                 raise ValueError('Source directory does not exist in the repo.')
             if not os.path.isfile(os.path.join(self.source_dir, self.entry_point)):
                 raise ValueError('Entry point does not exist in the repo.')
+        else:
+            if not os.path.isfile(os.path.join(repo_dir, self.entry_point)):
+                raise ValueError('Entry point does not exist in the repo.')
 
     def _checkout_branch_and_commit(self, repo_dir):
         """Enter the directory where the repo is cloned, and  checkout the required branch and commit.
@@ -960,6 +956,9 @@ def _prepare_for_training(self, job_name=None):
         """
         super(Framework, self)._prepare_for_training(job_name=job_name)
 
+        if self.git_config:
+            self._git_clone_code()
+
         # validate source dir will raise a ValueError if there is something wrong with the
         # source directory. We are intentionally not handling it because this is a critical error.
         if self.source_dir and not self.source_dir.lower().startswith('s3://'):
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index 381240e4d9..274b8afcf2 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -695,8 +695,8 @@ def test_git_support_entry_point_not_exist(sagemaker_session):
     git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
                   'branch': 'branch1',
                   'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
-    fw = DummyFramework(entry_point='entry_point_that_does_not_exist', git_config=git_config,
-                        source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
+    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
+                        role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     with pytest.raises(ValueError) as error:

From f3978505153f615fc07f49f36bb179c1976d8361 Mon Sep 17 00:00:00 2001
From: GaryTu1020 <45720913+GaryTu1020@users.noreply.github.com>
Date: Wed, 12 Jun 2019 16:13:29 -0700
Subject: [PATCH 05/19] Update doc/overview.rst

Co-Authored-By: Marcio Vinicius dos Santos <mvs.usp@gmail.com>
---
 doc/overview.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/overview.rst b/doc/overview.rst
index adf7daa612..74e53338f9 100644
--- a/doc/overview.rst
+++ b/doc/overview.rst
@@ -86,7 +86,7 @@ For more `information <https://boto3.amazonaws.com/v1/documentation/api/latest/r
 
 Git Support
 ~~~~~~~~~~~
-The SageMaker Python SDK has Git support. If you have your training scripts in your GitHub repository, you can
+If you have your training scripts in your GitHub repository, you can
 use them directly wihtout the trouble to download them to local machine. Git support can be enabled simply by
 providing ``git_config`` parameter when initializing an estimator. If git support is enabled, then ``entry_point``,
 ``source_dir`` and  ``dependencies`` should all be relative paths in the Git repo. Git support works for frameworks

From 5b8d6849d76a56a41f9d4782be90f3d90b0d4dfc Mon Sep 17 00:00:00 2001
From: GaryTu1020 <45720913+GaryTu1020@users.noreply.github.com>
Date: Wed, 12 Jun 2019 16:14:37 -0700
Subject: [PATCH 06/19] Update doc/overview.rst

Co-Authored-By: Marcio Vinicius dos Santos <mvs.usp@gmail.com>
---
 doc/overview.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/overview.rst b/doc/overview.rst
index 74e53338f9..ca9312d2cd 100644
--- a/doc/overview.rst
+++ b/doc/overview.rst
@@ -87,7 +87,7 @@ For more `information <https://boto3.amazonaws.com/v1/documentation/api/latest/r
 Git Support
 ~~~~~~~~~~~
 If you have your training scripts in your GitHub repository, you can
-use them directly wihtout the trouble to download them to local machine. Git support can be enabled simply by
+use them directly without the trouble to download them to local machine. Git support can be enabled simply by
 providing ``git_config`` parameter when initializing an estimator. If git support is enabled, then ``entry_point``,
 ``source_dir`` and  ``dependencies`` should all be relative paths in the Git repo. Git support works for frameworks
 images (TensorFlow, MXNet, Chainer, PyTorch, and Scikit-Learn). Note that if you decided to

From a9e293204e1c0a939dbd669028fb5a27bce061d7 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Thu, 13 Jun 2019 15:08:11 -0700
Subject: [PATCH 07/19] add more integ tests

---
 doc/overview.rst             | 68 ++++++++++++++++++-------------
 src/sagemaker/estimator.py   | 68 +++++++++++++------------------
 tests/integ/test_git.py      | 79 ++++++++++++++++++++++++------------
 tests/unit/test_estimator.py | 18 ++++----
 4 files changed, 130 insertions(+), 103 deletions(-)

diff --git a/doc/overview.rst b/doc/overview.rst
index adf7daa612..6ae3c3f16c 100644
--- a/doc/overview.rst
+++ b/doc/overview.rst
@@ -86,53 +86,63 @@ For more `information <https://boto3.amazonaws.com/v1/documentation/api/latest/r
 
 Git Support
 ~~~~~~~~~~~
-The SageMaker Python SDK has Git support. If you have your training scripts in your GitHub repository, you can
-use them directly wihtout the trouble to download them to local machine. Git support can be enabled simply by
-providing ``git_config`` parameter when initializing an estimator. If git support is enabled, then ``entry_point``,
-``source_dir`` and  ``dependencies`` should all be relative paths in the Git repo. Git support works for frameworks
-images (TensorFlow, MXNet, Chainer, PyTorch, and Scikit-Learn). Note that if you decided to
-use Git support, then everything you need for ``entry_point``, ``source_dir`` and ``dependencies`` should be in
-a single Git repo.
+If you have your training scripts in your GitHub repository, you can use them directly without the trouble to download
+them to local machine. Git support can be enabled simply by providing ``git_config`` parameter when initializing an
+estimator. If git support is enabled, then ``entry_point``, ``source_dir`` and  ``dependencies`` should all be relative
+paths in the Git repo. Note that if you decided to use Git support, then everything you need for ``entry_point``,
+``source_dir`` and ``dependencies`` should be in a single Git repo.
 
-Here is an example:
+Here are ways to specify ``git_config``:
 
 .. code:: python
 
-        import sagemaker
-
         # Specifies the git_config parameter
         git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
                       'branch': 'branch1',
                       'commit': '4893e528afa4a790331e1b5286954f073b0f14a2'}
 
-        # Configures a PyTorch Estimator (no training happens here)
+        # Alternatively, you can also specify git_config by providing only 'repo' and 'branch'.
+        # If this is the case, the latest commit in the branch will be used.
+        git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
+                      'branch': 'branch1'}
+
+        # Only providing 'repo' is also allowed. If this is the case, latest commit in
+        # 'master' branch will be used.
+        git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git'
+
+The following are some examples to define estimators with Git support:
+
+.. code:: python
+
+        # In this example, the source directory 'pytorch' contains the entry point 'mnist.py' and other source code.
+        # and it is  relative path inside the Git repo.
         pytorch_estimator = PyTorch(entry_point='mnist.py',
                                     role='SageMakerRole',
                                     source_dir='pytorch',
                                     git_config=git_config,
-                                    framework_version=PyTorch.LATEST_VERSION,
-                                    py_version=PYTHON_VERSION,
                                     train_instance_count=1,
                                     train_instance_type='ml.c4.xlarge',
                                     sagemaker_session=sagemaker_local_session)
 
-        # Starts a SageMaker training job and waits until completion
-        pytorch_estimator.fit('s3://my_bucket/my_training_data/')
-
-        # Deploys the model that was generated by fit() to an existing SageMaker endpoint
-        mxnet_predictor = mxnet_estimator.deploy(initial_instance_count=1,
-                                                 instance_type='ml.p2.xlarge',
-                                                 update_endpoint=True,
-                                                 endpoint_name='existing-endpoint')
-
-        # Serializes data and makes a prediction request to the SageMaker endpoint
-        response = mxnet_predictor.predict(data)
-
-        # Tears down the SageMaker endpoint and endpoint configuration
-        mxnet_predictor.delete_endpoint()
+        # In this example, the entry point 'mnist.py' is all we need for source code.
+        # We need to specify the path to it in the Git repo.
+        mx_estimator = MXNet(entry_point='mxnet/mnist.py',
+                                    role='SageMakerRole',
+                                    git_config=git_config,
+                                    train_instance_count=1,
+                                    train_instance_type='ml.c4.xlarge',
+                                    sagemaker_session=sagemaker_local_session)
 
-        # Deletes the SageMaker model
-        mxnet_predictor.delete_model()
+        # In this example, besides entry point and other source code in source directory, we still need some
+        # dependencies for the training job. Dependencies should also be paths inside the Git repo.
+        pytorch_estimator = PyTorch(entry_point='mnist.py',
+                                    role='SageMakerRole',
+                                    source_dir='pytorch',
+                                    dependencies=['dep.py', 'foo/bar.py'],
+                                    git_config=git_config,
+                                    train_instance_count=1,
+                                    train_instance_type='ml.c4.xlarge',
+                                    sagemaker_session=sagemaker_local_session)
 
 When Git support is enabled, users can still use local mode in the same way.
 
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index c900e3e0c8..d734378f70 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -823,7 +823,8 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None,
                     >>>         |----- train.py
                     >>>         |----- test.py
 
-                    You can assign entry_point='train.py', source_dir='src'.
+                    and you need 'train.py' as entry point and 'test.py' as training source code as well, you can
+                    assign entry_point='train.py', source_dir='src'.
             dependencies (list[str]): A list of paths to directories (absolute or relative) with
                 any additional libraries that will be exported to the container (default: []).
                 The library folders will be copied to SageMaker in the same folder where the entrypoint is copied.
@@ -876,8 +877,8 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None,
         self._hyperparameters = hyperparameters or {}
 
     def _git_clone_code(self):
-        """Git clone repo containing the training scripts. This method also validate ``git_config``,
-        and set ``entry_point`` and ``source_dir`` to the right file or directory in the repo cloned.
+        """Git clone repo containing the training scripts and enter that directory. This method also validate
+        ``git_config``, and set ``entry_point`` and ``source_dir`` to the right file or directory in the repo cloned.
 
         Raises:
             CalledProcessError: If 1. failed to clone git repo
@@ -889,29 +890,39 @@ def _git_clone_code(self):
         self._validate_git_config()
         # create a temporary directory to store the cloned repo
         repo_dir = tempfile.mkdtemp()
-        try:
-            subprocess.check_call(['git', 'clone', self.git_config['repo'], repo_dir])
-        except subprocess.CalledProcessError:
-            raise subprocess.CalledProcessError(1, cmd='git clone {} {}'.format(self.git_config['repo'], repo_dir))
+        # try:
+        subprocess.check_call(['git', 'clone', self.git_config['repo'], repo_dir])
+        # except subprocess.CalledProcessError:
+        #    raise subprocess.CalledProcessError(1, cmd='git clone {} {}'.format(self.git_config['repo'], repo_dir))
 
         self._checkout_branch_and_commit(repo_dir)
 
-        # check if the cloned repo contains entry point and source dir; if so, set ``entry_point`` and
-        # ``source_dir`` to the paths to local file system.
+        # check if the cloned repo contains entry point, source directory and dependencies
         if self.source_dir:
-            if os.path.isdir(os.path.join(repo_dir, self.source_dir)):
-                self.source_dir = os.path.join(repo_dir, self.source_dir)
-                os.chdir(self.source_dir)
-            else:
+            if not os.path.isdir(os.path.join(repo_dir, self.source_dir)):
                 raise ValueError('Source directory does not exist in the repo.')
-            if not os.path.isfile(os.path.join(self.source_dir, self.entry_point)):
+            if not os.path.isfile(os.path.join(repo_dir, self.source_dir, self.entry_point)):
                 raise ValueError('Entry point does not exist in the repo.')
+            self.source_dir = os.path.join(repo_dir, self.source_dir)
         else:
             if not os.path.isfile(os.path.join(repo_dir, self.entry_point)):
                 raise ValueError('Entry point does not exist in the repo.')
+            self.entry_point = os.path.join(repo_dir, self.entry_point)
+        dependencies = []
         for path in self.dependencies:
-            if not os.path.isdir(os.path.join(repo_dir, path)):
+            if not os.path.exists(os.path.join(repo_dir, path)):
                 raise ValueError('Dependency {} does not exist in the repo.'.format(path))
+            dependencies.append(os.path.join(repo_dir, path))
+        self.dependencies = dependencies
+
+    def _validate_git_config(self):
+        """check if a git_config param is valid
+
+        Raises:
+            ValueError: If 'git_config' has no key 'repo'
+        """
+        if 'repo' not in self.git_config:
+            raise ValueError('Please provide a repo for git_config.')
 
     def _checkout_branch_and_commit(self, repo_dir):
         """Enter the directory where the repo is cloned, and  checkout the required branch and commit.
@@ -923,33 +934,10 @@ def _checkout_branch_and_commit(self, repo_dir):
             ValueError: If 1. entry point specified does not exist in the repo
                            2. source dir specified does not exist in the repo
         """
-        os.chdir(repo_dir)
         if 'branch' in self.git_config:
-            try:
-                subprocess.check_call(['git', 'checkout', self.git_config['branch']])
-            except subprocess.CalledProcessError:
-                raise subprocess.CalledProcessError(1, cmd='git checkout {}'.format(self.git_config['branch']))
+            subprocess.check_call(['git', 'checkout', self.git_config['branch']], cwd=str(repo_dir))
         if 'commit' in self.git_config:
-            try:
-                subprocess.check_call(['git', 'checkout', self.git_config['commit']])
-            except subprocess.CalledProcessError:
-                raise subprocess.CalledProcessError(1, cmd='git checkout {}'.format(self.git_config['commit']))
-
-    def _validate_git_config(self):
-        """check if a git_config param is valid
-
-        Raises:
-            ValueError: If:
-                1. git_config has no key 'repo'
-                2. git_config['repo'] is in the wrong format.
-        """
-        if 'repo' not in self.git_config:
-            raise ValueError('Please provide a repo for git_config.')
-        repo = self.git_config['repo']
-        codecommit_url = repo.startswith('https://git-codecommit') or repo.startswith('ssh://git-codecommit')
-        github_url = repo.startswith('https://github') or repo.startswith('git@github')
-        if not codecommit_url and not github_url:
-            raise ValueError('Please provide a valid git repo url.')
+            subprocess.check_call(['git', 'checkout', self.git_config['commit']], cwd=str(repo_dir))
 
     def _prepare_for_training(self, job_name=None):
         """Set hyperparameters needed for training. This method will also validate ``source_dir``.
diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py
index f0fe79e21c..4e27b1418d 100644
--- a/tests/integ/test_git.py
+++ b/tests/integ/test_git.py
@@ -16,42 +16,69 @@
 
 import numpy
 
+from sagemaker.mxnet.estimator import MXNet
+from sagemaker.mxnet.model import MXNetModel
 from sagemaker.pytorch.estimator import PyTorch
 from sagemaker.pytorch.model import PyTorchModel
 from sagemaker.utils import sagemaker_timestamp
-from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES
-from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
+from tests.integ import DATA_DIR, PYTHON_VERSION
+
 GIT_REPO = 'https://github.com/GaryTu1020/python-sdk-testing.git'
 BRANCH = 'branch1'
-COMMIT = '4893e528afa4a790331e1b5286954f073b0f14a2'
+COMMIT = 'b61c450200d6a309c8d24ac14b8adddc405acc56'
 
 
 def test_git_support_with_pytorch(sagemaker_local_session):
-    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
-        script_path = 'mnist.py'
-        data_path = os.path.join(DATA_DIR, 'pytorch_mnist')
-        git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
-        pytorch = PyTorch(entry_point=script_path, role='SageMakerRole', source_dir='pytorch',
-                          framework_version=PyTorch.LATEST_VERSION, py_version=PYTHON_VERSION,
-                          train_instance_count=1, train_instance_type='ml.c4.xlarge',
-                          sagemaker_session=sagemaker_local_session, git_config=git_config)
-
-        train_input = pytorch.sagemaker_session.upload_data(path=os.path.join(data_path, 'training'),
+    script_path = 'mnist.py'
+    data_path = os.path.join(DATA_DIR, 'pytorch_mnist')
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    pytorch = PyTorch(entry_point=script_path, role='SageMakerRole', source_dir='pytorch',
+                      framework_version=PyTorch.LATEST_VERSION, py_version=PYTHON_VERSION,
+                      train_instance_count=1, train_instance_type='local',
+                      sagemaker_session=sagemaker_local_session, git_config=git_config)
+
+    train_input = pytorch.sagemaker_session.upload_data(path=os.path.join(data_path, 'training'),
                                                             key_prefix='integ-test-data/pytorch_mnist/training')
-        pytorch.fit({'training': train_input})
+    pytorch.fit({'training': train_input})
+
+    predictor = pytorch.deploy(initial_instance_count=1, instance_type='local')
+
+    data = numpy.zeros(shape=(1, 1, 28, 28)).astype(numpy.float32)
+    result = predictor.predict(data)
+    assert result is not None
+
+
+def test_git_support_with_mxnet(sagemaker_local_session, mxnet_full_version):
+    script_path = 'mnist.py'
+    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    dependencies = ['foo/bar.py']
+    mx = MXNet(entry_point=script_path, role='SageMakerRole',
+               source_dir='mxnet', dependencies=dependencies,
+               framework_version=MXNet.LATEST_VERSION, py_version=PYTHON_VERSION,
+               train_instance_count=1, train_instance_type='local',
+               sagemaker_session=sagemaker_local_session, git_config=git_config)
+
+    train_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
+                                                   key_prefix='integ-test-data/mxnet_mnist/train')
+    test_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
+                                                   key_prefix='integ-test-data/mxnet_mnist/test')
+
+    mx.fit({'train': train_input, 'test': test_input})
 
-    files = [file for file in os.listdir(pytorch.source_dir)]
-    assert files == ['some-file', 'mnist.py']
+    files = [file for file in os.listdir(mx.source_dir)]
+    assert files == ['some_file', 'mnist.py']
 
-    endpoint_name = 'test-git_support-with-pytorch-{}'.format(sagemaker_timestamp())
+    endpoint_name = 'test-git_support-with-mxnet-{}'.format(sagemaker_timestamp())
 
-    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_local_session):
-        desc = sagemaker_local_session.sagemaker_client.describe_training_job(pytorch.latest_training_job.name)
-        model_data = desc['ModelArtifacts']['S3ModelArtifacts']
-        model = PyTorchModel(model_data, 'SageMakerRole', entry_point=script_path,
-                             sagemaker_session=sagemaker_local_session)
-        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
+    script_abs_path = os.path.join(mx.source_dir, script_path)
+    desc = sagemaker_local_session.sagemaker_client.describe_training_job(mx.latest_training_job.name)
+    model_data = desc['ModelArtifacts']['S3ModelArtifacts']
+    model = MXNetModel(model_data, 'SageMakerRole', entry_point=script_abs_path,
+                       py_version=PYTHON_VERSION, sagemaker_session=sagemaker_local_session,
+                       framework_version=mxnet_full_version)
+    predictor = model.deploy(1, 'local', endpoint_name=endpoint_name)
 
-        data = numpy.zeros(shape=(1, 1, 28, 28))
-        result = predictor.predict(data)
-        assert result is not None
+    data = numpy.zeros(shape=(1, 1, 28, 28))
+    result = predictor.predict(data)
+    assert result is not None
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index 85728b66ed..9458806a21 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -50,7 +50,7 @@
 OUTPUT_PATH = 's3://bucket/prefix'
 GIT_REPO = 'https://github.com/GaryTu1020/python-sdk-testing.git'
 BRANCH = 'branch1'
-COMMIT = '4893e528afa4a790331e1b5286954f073b0f14a2'
+COMMIT = 'b61c450200d6a309c8d24ac14b8adddc405acc56'
 
 DESCRIBE_TRAINING_JOB_RESULT = {
     'ModelArtifacts': {
@@ -607,7 +607,7 @@ def test_git_support_with_branch_and_commit_succeed(sagemaker_session):
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     fw.fit()
-    assert os.path.isfile(fw.entry_point)
+    assert os.path.isfile(os.path.join(fw.source_dir, fw.entry_point))
     assert os.path.isdir(fw.source_dir)
 
 
@@ -618,7 +618,7 @@ def test_git_support_with_branch_succeed(sagemaker_session):
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     fw.fit()
-    assert os.path.isfile(fw.entry_point)
+    assert os.path.isfile(os.path.join(fw.source_dir, fw.entry_point))
     assert os.path.isdir(fw.source_dir)
 
 
@@ -630,6 +630,8 @@ def test_git_support_with_dependencies_succeed(sagemaker_session):
                         enable_cloudwatch_metrics=True)
     fw.fit()
     assert os.path.isfile(fw.entry_point)
+    for item in fw.dependencies:
+        assert os.path.exists(item)
 
 
 def test_git_support_without_branch_and_commit_succeed(sagemaker_session):
@@ -659,9 +661,9 @@ def test_git_support_bad_repo_url_format(sagemaker_session):
                         source_dir='source_dir', role=ROLE,
                         sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT,
                         train_instance_type=INSTANCE_TYPE, enable_cloudwatch_metrics=True)
-    with pytest.raises(ValueError) as error:
+    with pytest.raises(subprocess.CalledProcessError) as error:
         fw.fit()
-    assert 'Please provide a valid git repo url.' in str(error)
+    assert 'returned non-zero exit status' in str(error)
 
 
 def test_git_support_git_clone_fail(sagemaker_session):
@@ -671,7 +673,7 @@ def test_git_support_git_clone_fail(sagemaker_session):
                         train_instance_type=INSTANCE_TYPE, enable_cloudwatch_metrics=True)
     with pytest.raises(subprocess.CalledProcessError) as error:
         fw.fit()
-    assert "Command 'git clone" in str(error)
+    assert 'returned non-zero exit status' in str(error)
 
 
 def test_git_support_branch_not_exist(sagemaker_session):
@@ -684,7 +686,7 @@ def test_git_support_branch_not_exist(sagemaker_session):
                         enable_cloudwatch_metrics=True)
     with pytest.raises(subprocess.CalledProcessError) as error:
         fw.fit()
-    assert "Command 'git checkout" in str(error)
+    assert 'returned non-zero exit status' in str(error)
 
 
 def test_git_support_commit_not_exist(sagemaker_session):
@@ -697,7 +699,7 @@ def test_git_support_commit_not_exist(sagemaker_session):
                         enable_cloudwatch_metrics=True)
     with pytest.raises(subprocess.CalledProcessError) as error:
         fw.fit()
-    assert "Command 'git checkout" in str(error)
+    assert 'returned non-zero exit status' in str(error)
 
 
 def test_git_support_entry_point_not_exist(sagemaker_session):

From 241ac92206c487bf47d3050b5aed20a4d959c271 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Fri, 14 Jun 2019 17:27:40 -0700
Subject: [PATCH 08/19] write unit tests for git_utils

---
 src/sagemaker/estimator.py   |  72 ++------------
 src/sagemaker/git_utils.py   | 104 ++++++++++++++++++++
 tests/integ/test_git.py      |   5 +-
 tests/unit/test_estimator.py |  70 ++++++++------
 tests/unit/test_git_utils.py | 177 +++++++++++++++++++++++++++++++++++
 5 files changed, 331 insertions(+), 97 deletions(-)
 create mode 100644 src/sagemaker/git_utils.py
 create mode 100644 tests/unit/test_git_utils.py

diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index d734378f70..f2e40e91a4 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -15,8 +15,6 @@
 import json
 import logging
 import os
-import subprocess
-import tempfile
 import warnings
 from abc import ABCMeta
 from abc import abstractmethod
@@ -24,6 +22,7 @@
 from six import string_types
 
 import sagemaker
+import sagemaker.git_utils
 from sagemaker.analytics import TrainingJobAnalytics
 from sagemaker.fw_utils import (create_image_uri, tar_and_upload_dir, parse_s3_url, UploadedCode,
                                 validate_source_dir)
@@ -876,69 +875,6 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None,
 
         self._hyperparameters = hyperparameters or {}
 
-    def _git_clone_code(self):
-        """Git clone repo containing the training scripts and enter that directory. This method also validate
-        ``git_config``, and set ``entry_point`` and ``source_dir`` to the right file or directory in the repo cloned.
-
-        Raises:
-            CalledProcessError: If 1. failed to clone git repo
-                                   2. failed to checkout the required branch
-                                   3. failed to checkout the required commit
-            ValueError: If 1. entry point specified does not exist in the repo
-                           2. source dir specified does not exist in the repo
-        """
-        self._validate_git_config()
-        # create a temporary directory to store the cloned repo
-        repo_dir = tempfile.mkdtemp()
-        # try:
-        subprocess.check_call(['git', 'clone', self.git_config['repo'], repo_dir])
-        # except subprocess.CalledProcessError:
-        #    raise subprocess.CalledProcessError(1, cmd='git clone {} {}'.format(self.git_config['repo'], repo_dir))
-
-        self._checkout_branch_and_commit(repo_dir)
-
-        # check if the cloned repo contains entry point, source directory and dependencies
-        if self.source_dir:
-            if not os.path.isdir(os.path.join(repo_dir, self.source_dir)):
-                raise ValueError('Source directory does not exist in the repo.')
-            if not os.path.isfile(os.path.join(repo_dir, self.source_dir, self.entry_point)):
-                raise ValueError('Entry point does not exist in the repo.')
-            self.source_dir = os.path.join(repo_dir, self.source_dir)
-        else:
-            if not os.path.isfile(os.path.join(repo_dir, self.entry_point)):
-                raise ValueError('Entry point does not exist in the repo.')
-            self.entry_point = os.path.join(repo_dir, self.entry_point)
-        dependencies = []
-        for path in self.dependencies:
-            if not os.path.exists(os.path.join(repo_dir, path)):
-                raise ValueError('Dependency {} does not exist in the repo.'.format(path))
-            dependencies.append(os.path.join(repo_dir, path))
-        self.dependencies = dependencies
-
-    def _validate_git_config(self):
-        """check if a git_config param is valid
-
-        Raises:
-            ValueError: If 'git_config' has no key 'repo'
-        """
-        if 'repo' not in self.git_config:
-            raise ValueError('Please provide a repo for git_config.')
-
-    def _checkout_branch_and_commit(self, repo_dir):
-        """Enter the directory where the repo is cloned, and  checkout the required branch and commit.
-
-        Args:
-            repo_dir: the directory where the repo is cloned
-
-        Raises:
-            ValueError: If 1. entry point specified does not exist in the repo
-                           2. source dir specified does not exist in the repo
-        """
-        if 'branch' in self.git_config:
-            subprocess.check_call(['git', 'checkout', self.git_config['branch']], cwd=str(repo_dir))
-        if 'commit' in self.git_config:
-            subprocess.check_call(['git', 'checkout', self.git_config['commit']], cwd=str(repo_dir))
-
     def _prepare_for_training(self, job_name=None):
         """Set hyperparameters needed for training. This method will also validate ``source_dir``.
 
@@ -949,7 +885,11 @@ def _prepare_for_training(self, job_name=None):
         super(Framework, self)._prepare_for_training(job_name=job_name)
 
         if self.git_config:
-            self._git_clone_code()
+            updates = sagemaker.git_utils.git_clone_repo(self.git_config, self.entry_point,
+                                                         self.source_dir, self.dependencies)
+            self.entry_point = updates['entry_point']
+            self.source_dir = updates['source_dir']
+            self.dependencies = updates['dependencies']
 
         # validate source dir will raise a ValueError if there is something wrong with the
         # source directory. We are intentionally not handling it because this is a critical error.
diff --git a/src/sagemaker/git_utils.py b/src/sagemaker/git_utils.py
new file mode 100644
index 0000000000..7051300db3
--- /dev/null
+++ b/src/sagemaker/git_utils.py
@@ -0,0 +1,104 @@
+# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import os
+import subprocess
+import tempfile
+
+
+def git_clone_repo(git_config, entry_point, source_dir=None, dependencies=None):
+    """Git clone repo containing the training code and serving code. This method also validate ``git_config``,
+    and set ``entry_point``, ``source_dir`` and ``dependencies`` to the right file or directory in the repo cloned.
+
+    Args:
+        git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``
+            and ``commit``. ``branch`` and ``commit`` are optional. If ``branch`` is not specified, master branch
+            will be used. If ``commit`` is not specified, the latest commit in the required branch will be used.
+        entry_point (str): A relative location to the Python source file which should be executed as the entry point
+            to training or model hosting in the Git repo.
+        source_dir (str): A relative location to a directory with other training or model hosting source code
+            dependencies aside from the entry point file in the Git repo (default: None). Structure within this
+            directory are preserved when training on Amazon SageMaker.
+        dependencies (list[str]): A list of relative locations to directories with any additional libraries that will
+            be exported to the container in the Git repo (default: []).
+
+    Raises:
+        CalledProcessError: If 1. failed to clone git repo
+                               2. failed to checkout the required branch
+                               3. failed to checkout the required commit
+        ValueError: If 1. entry point specified does not exist in the repo
+                       2. source dir specified does not exist in the repo
+
+    Returns:
+        dict: A dict that contains the updated values of entry_point, source_dir and dependencies
+    """
+    _validate_git_config(git_config)
+    repo_dir = tempfile.mkdtemp()
+    subprocess.check_call(['git', 'clone', git_config['repo'], repo_dir])
+
+    _checkout_branch_and_commit(git_config, repo_dir)
+
+    ret = {'entry_point': entry_point, 'source_dir': source_dir, 'dependencies': dependencies}
+    # check if the cloned repo contains entry point, source directory and dependencies
+    if source_dir:
+        if not os.path.isdir(os.path.join(repo_dir, source_dir)):
+            raise ValueError('Source directory does not exist in the repo.')
+        if not os.path.isfile(os.path.join(repo_dir, source_dir, entry_point)):
+            raise ValueError('Entry point does not exist in the repo.')
+        ret['source_dir'] = os.path.join(repo_dir, source_dir)
+    else:
+        if not os.path.isfile(os.path.join(repo_dir, entry_point)):
+            raise ValueError('Entry point does not exist in the repo.')
+        ret['entry_point'] = os.path.join(repo_dir, entry_point)
+
+    ret['dependencies'] = []
+    for path in dependencies:
+        if not os.path.exists(os.path.join(repo_dir, path)):
+            raise ValueError('Dependency {} does not exist in the repo.'.format(path))
+        ret['dependencies'].append(os.path.join(repo_dir, path))
+    return ret
+
+
+def _validate_git_config(git_config):
+    """check if a git_config param is valid
+
+    Args:
+        git_config ((dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``
+            and ``commit``.
+
+    Raises:
+        ValueError: If:
+            1. git_config has no key 'repo'
+            2. git_config['repo'] is in the wrong format.
+    """
+    if 'repo' not in git_config:
+        raise ValueError('Please provide a repo for git_config.')
+
+
+def _checkout_branch_and_commit(git_config, repo_dir):
+    """Checkout the required branch and commit.
+
+    Args:
+        git_config: (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``
+            and ``commit``.
+        repo_dir (str): the directory where the repo is cloned
+
+    Raises:
+        ValueError: If 1. entry point specified does not exist in the repo
+                       2. source dir specified does not exist in the repo
+    """
+    if 'branch' in git_config:
+        subprocess.check_call(args=['git', 'checkout', git_config['branch']], cwd=str(repo_dir))
+    if 'commit' in git_config:
+        subprocess.check_call(args=['git', 'checkout', git_config['commit']], cwd=str(repo_dir))
diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py
index 4e27b1418d..e3c3f17c47 100644
--- a/tests/integ/test_git.py
+++ b/tests/integ/test_git.py
@@ -19,7 +19,6 @@
 from sagemaker.mxnet.estimator import MXNet
 from sagemaker.mxnet.model import MXNetModel
 from sagemaker.pytorch.estimator import PyTorch
-from sagemaker.pytorch.model import PyTorchModel
 from sagemaker.utils import sagemaker_timestamp
 from tests.integ import DATA_DIR, PYTHON_VERSION
 
@@ -38,7 +37,7 @@ def test_git_support_with_pytorch(sagemaker_local_session):
                       sagemaker_session=sagemaker_local_session, git_config=git_config)
 
     train_input = pytorch.sagemaker_session.upload_data(path=os.path.join(data_path, 'training'),
-                                                            key_prefix='integ-test-data/pytorch_mnist/training')
+                                                        key_prefix='integ-test-data/pytorch_mnist/training')
     pytorch.fit({'training': train_input})
 
     predictor = pytorch.deploy(initial_instance_count=1, instance_type='local')
@@ -62,7 +61,7 @@ def test_git_support_with_mxnet(sagemaker_local_session, mxnet_full_version):
     train_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                    key_prefix='integ-test-data/mxnet_mnist/train')
     test_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
-                                                   key_prefix='integ-test-data/mxnet_mnist/test')
+                                                  key_prefix='integ-test-data/mxnet_mnist/test')
 
     mx.fit({'train': train_input, 'test': test_input})
 
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index 9458806a21..f5c5a9d88b 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -600,48 +600,54 @@ def test_prepare_for_training_force_name_generation(strftime, sagemaker_session)
     assert JOB_NAME == fw._current_job_name
 
 
-def test_git_support_with_branch_and_commit_succeed(sagemaker_session):
-    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
-    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
-                        source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
-                        train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
-                        enable_cloudwatch_metrics=True)
+@patch('sagemaker.git_utils.git_clone_repo')
+def test_git_support_with_branch_and_commit_succeed(git_clone_repo, sagemaker_session):
+    git_clone_repo.side_effect = lambda git_config, entry_point, source_dir=None, dependencies=None: {
+        'entry_point': '/tmp/repo_dir/entry_point', 'source_dir': None, 'dependencies': None}
+    git_conf = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+
+    fw = DummyFramework(entry_point='entry_point', git_config=git_conf, role=ROLE,
+                        sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT,
+                        train_instance_type=INSTANCE_TYPE, enable_cloudwatch_metrics=True)
     fw.fit()
-    assert os.path.isfile(os.path.join(fw.source_dir, fw.entry_point))
-    assert os.path.isdir(fw.source_dir)
 
 
-def test_git_support_with_branch_succeed(sagemaker_session):
-    git_config = {'repo': GIT_REPO, 'branch': BRANCH}
-    fw = DummyFramework(entry_point='entry_point', git_config=git_config,
-                        source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
+@patch('sagemaker.git_utils.git_clone_repo')
+def test_git_support_with_branch_succeed(git_clone_repo, sagemaker_session):
+    git_clone_repo.side_effect = lambda git_config, entry_point, source_dir, dependencies=None: {
+        'entry_point': '/tmp/repo_dir/source_dir/entry_point', 'source_dir': None, 'dependencies': None}
+    git_conf = {'repo': GIT_REPO, 'branch': BRANCH}
+    fw = DummyFramework(entry_point='entry_point', git_config=git_conf,
+                        role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     fw.fit()
-    assert os.path.isfile(os.path.join(fw.source_dir, fw.entry_point))
-    assert os.path.isdir(fw.source_dir)
 
 
-def test_git_support_with_dependencies_succeed(sagemaker_session):
-    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
-    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_config,
+@patch('sagemaker.git_utils.git_clone_repo')
+def test_git_support_with_dependencies_succeed(git_clone_repo, sagemaker_session):
+    git_clone_repo.side_effect = lambda git_config, entry_point, source_dir, dependencies: {
+        'entry_point': '/tmp/repo_dir/source_dir/entry_point',
+        'source_dir': None,
+        'dependencies': ['/tmp/repo_dir/foo', '/tmp/repo_dir/foo/bar']}
+    git_conf = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_conf,
                         dependencies=['foo', 'foo/bar'], role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     fw.fit()
-    assert os.path.isfile(fw.entry_point)
-    for item in fw.dependencies:
-        assert os.path.exists(item)
 
 
-def test_git_support_without_branch_and_commit_succeed(sagemaker_session):
-    git_config = {'repo': GIT_REPO}
-    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_config,
+@patch('sagemaker.git_utils.git_clone_repo')
+def test_git_support_without_branch_and_commit_succeed(git_clone_repo, sagemaker_session):
+    git_clone_repo.side_effect = lambda git_config, entry_point, source_dir, dependencies=None: {
+        'entry_point': '/tmp/repo_dir/source_dir/entry_point', 'source_dir': None, 'dependencies': None}
+    git_conf = {'repo': GIT_REPO}
+    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_conf,
                         role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     fw.fit()
-    assert os.path.isfile(fw.entry_point)
 
 
 def test_git_support_repo_not_provided(sagemaker_session):
@@ -676,12 +682,14 @@ def test_git_support_git_clone_fail(sagemaker_session):
     assert 'returned non-zero exit status' in str(error)
 
 
+@patch('sagemaker.git_utils.git_clone_repo',
+       side_effect=subprocess.CalledProcessError(returncode=1, cmd='git checkout branch-that-does-not-exist'))
 def test_git_support_branch_not_exist(sagemaker_session):
     git_config = {'repo': GIT_REPO,
                   'branch': 'branch-that-does-not-exist',
                   'commit': COMMIT}
     fw = DummyFramework(entry_point='entry_point', git_config=git_config,
-                        source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
+                        role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     with pytest.raises(subprocess.CalledProcessError) as error:
@@ -689,12 +697,14 @@ def test_git_support_branch_not_exist(sagemaker_session):
     assert 'returned non-zero exit status' in str(error)
 
 
+@patch('sagemaker.git_utils.git_clone_repo',
+       side_effect=subprocess.CalledProcessError(returncode=1, cmd='commit-sha-that-does-not-exist'))
 def test_git_support_commit_not_exist(sagemaker_session):
     git_config = {'repo': GIT_REPO,
                   'branch': BRANCH,
                   'commit': 'commit-sha-that-does-not-exist'}
     fw = DummyFramework(entry_point='entry_point', git_config=git_config,
-                        source_dir='source_dir', role=ROLE, sagemaker_session=sagemaker_session,
+                        role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     with pytest.raises(subprocess.CalledProcessError) as error:
@@ -702,6 +712,8 @@ def test_git_support_commit_not_exist(sagemaker_session):
     assert 'returned non-zero exit status' in str(error)
 
 
+@patch('sagemaker.git_utils.git_clone_repo',
+       side_effect=ValueError('Entry point does not exist in the repo.'))
 def test_git_support_entry_point_not_exist(sagemaker_session):
     git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
     fw = DummyFramework(entry_point='entry_point_that_does_not_exist', git_config=git_config,
@@ -713,6 +725,8 @@ def test_git_support_entry_point_not_exist(sagemaker_session):
     assert 'Entry point does not exist in the repo.' in str(error)
 
 
+@patch('sagemaker.git_utils.git_clone_repo',
+       side_effect=ValueError('Source directory does not exist in the repo.'))
 def test_git_support_source_dir_not_exist(sagemaker_session):
     git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
     fw = DummyFramework(entry_point='entry_point', git_config=git_config,
@@ -724,6 +738,8 @@ def test_git_support_source_dir_not_exist(sagemaker_session):
     assert 'Source directory does not exist in the repo.' in str(error)
 
 
+@patch('sagemaker.git_utils.git_clone_repo',
+       side_effect=ValueError('Dependency no-such-dir does not exist in the repo.'))
 def test_git_support_dependencies_not_exist(sagemaker_session):
     git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
     fw = DummyFramework(entry_point='entry_point', git_config=git_config,
@@ -1414,5 +1430,3 @@ def test_encryption_flag_in_non_vpc_mode_invalid(sagemaker_session):
         estimator.fit()
     assert '"EnableInterContainerTrafficEncryption" and "VpcConfig" must be provided together' in str(error)
 
-
-#################################################################################
diff --git a/tests/unit/test_git_utils.py b/tests/unit/test_git_utils.py
new file mode 100644
index 0000000000..dce38175e0
--- /dev/null
+++ b/tests/unit/test_git_utils.py
@@ -0,0 +1,177 @@
+# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import pytest
+import subprocess
+from mock import patch
+
+from sagemaker import git_utils
+
+REPO_DIR = '/tmp/repo_dir'
+GIT_REPO = 'https://github.com/GaryTu1020/python-sdk-testing.git'
+BRANCH = 'branch1'
+COMMIT = 'b61c450200d6a309c8d24ac14b8adddc405acc56'
+
+
+@patch('subprocess.check_call')
+@patch('tempfile.mkdtemp', return_value=REPO_DIR)
+@patch('sagemaker.git_utils._validate_git_config')
+@patch('sagemaker.git_utils._checkout_branch_and_commit')
+@patch('os.path.isfile', return_value=True)
+@patch('os.path.isdir', return_value=True)
+@patch('os.path.exists', return_value=True)
+def test_git_clone_repo_succeed(exists, isdir, isfile, checkout_branch_and_commit,
+                                validate_git_config, mkdtemp, check_call):
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    entry_point = 'entry_point'
+    source_dir = 'source_dir'
+    dependencies = ['foo', 'bar']
+    ret = git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
+    validate_git_config.assert_called_with(git_config)
+    check_call.assert_called_with(['git', 'clone', git_config['repo'], REPO_DIR])
+    mkdtemp.assert_called_once()
+    checkout_branch_and_commit.assert_called_with(git_config, REPO_DIR)
+    assert ret['entry_point'] == 'entry_point'
+    assert ret['source_dir'] == '/tmp/repo_dir/source_dir'
+    assert ret['dependencies'] == ['/tmp/repo_dir/foo', '/tmp/repo_dir/bar']
+
+
+@patch('subprocess.check_call')
+@patch('tempfile.mkdtemp', return_value=REPO_DIR)
+@patch('sagemaker.git_utils._validate_git_config',
+       side_effect=ValueError('Please provide a repo for git_config.'))
+@patch('sagemaker.git_utils._checkout_branch_and_commit')
+@patch('os.path.isfile', return_value=True)
+@patch('os.path.isdir', return_value=True)
+@patch('os.path.exists', return_value=True)
+def test_git_clone_repo_repo_not_provided(exists, isdir, isfile, checkout_branch_and_commit,
+                                          validate_git_config, mkdtemp, check_call):
+    git_config = {'branch': BRANCH, 'commit': COMMIT}
+    entry_point = 'entry_point_that_does_not_exist'
+    source_dir = 'source_dir'
+    dependencies = ['foo', 'bar']
+    with pytest.raises(ValueError) as error:
+        git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
+    assert 'Please provide a repo for git_config.' in str(error)
+
+
+@patch('subprocess.check_call',
+       side_effect=subprocess.CalledProcessError(returncode=1, cmd='git clone {} {}'.format(GIT_REPO, REPO_DIR)))
+@patch('tempfile.mkdtemp', return_value=REPO_DIR)
+@patch('sagemaker.git_utils._validate_git_config')
+@patch('sagemaker.git_utils._checkout_branch_and_commit')
+@patch('os.path.isfile', return_value=True)
+@patch('os.path.isdir', return_value=True)
+@patch('os.path.exists', return_value=True)
+def test_git_clone_repo_clone_fail(exists, isdir, isfile, checkout_branch_and_commit,
+                                   validate_git_config, mkdtemp, check_call):
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    entry_point = 'entry_point'
+    source_dir = 'source_dir'
+    dependencies = ['foo', 'bar']
+    with pytest.raises(subprocess.CalledProcessError) as error:
+        git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
+    assert 'returned non-zero exit status' in str(error)
+
+
+@patch('subprocess.check_call')
+@patch('tempfile.mkdtemp', return_value=REPO_DIR)
+@patch('sagemaker.git_utils._validate_git_config')
+@patch('sagemaker.git_utils._checkout_branch_and_commit',
+       side_effect=subprocess.CalledProcessError(returncode=1, cmd='git checkout {}'.format(BRANCH)))
+@patch('os.path.isfile', return_value=True)
+@patch('os.path.isdir', return_value=True)
+@patch('os.path.exists', return_value=True)
+def test_git_clone_repo_branch_not_exist(exists, isdir, isfile, checkout_branch_and_commit,
+                                         validate_git_config, mkdtemp, check_call):
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    entry_point = 'entry_point'
+    source_dir = 'source_dir'
+    dependencies = ['foo', 'bar']
+    with pytest.raises(subprocess.CalledProcessError) as error:
+        git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
+    assert 'returned non-zero exit status' in str(error)
+
+
+@patch('subprocess.check_call')
+@patch('tempfile.mkdtemp', return_value=REPO_DIR)
+@patch('sagemaker.git_utils._validate_git_config')
+@patch('sagemaker.git_utils._checkout_branch_and_commit',
+       side_effect=subprocess.CalledProcessError(returncode=1, cmd='git checkout {}'.format(COMMIT)))
+@patch('os.path.isfile', return_value=True)
+@patch('os.path.isdir', return_value=True)
+@patch('os.path.exists', return_value=True)
+def test_git_clone_repo_commit_not_exist(exists, isdir, isfile, checkout_branch_and_commit,
+                                         validate_git_config, mkdtemp, check_call):
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    entry_point = 'entry_point'
+    source_dir = 'source_dir'
+    dependencies = ['foo', 'bar']
+    with pytest.raises(subprocess.CalledProcessError) as error:
+        git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
+    assert 'returned non-zero exit status' in str(error)
+
+
+@patch('subprocess.check_call')
+@patch('tempfile.mkdtemp', return_value=REPO_DIR)
+@patch('sagemaker.git_utils._validate_git_config')
+@patch('sagemaker.git_utils._checkout_branch_and_commit')
+@patch('os.path.isfile', return_value=False)
+@patch('os.path.isdir', return_value=True)
+@patch('os.path.exists', return_value=True)
+def test_git_clone_repo_entry_point_not_exist(exists, isdir, isfile, checkout_branch_and_commit,
+                                              validate_git_config, mkdtemp, check_call):
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    entry_point = 'entry_point_that_does_not_exist'
+    source_dir = 'source_dir'
+    dependencies = ['foo', 'bar']
+    with pytest.raises(ValueError) as error:
+        git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
+    assert 'Entry point does not exist in the repo.' in str(error)
+
+
+@patch('subprocess.check_call')
+@patch('tempfile.mkdtemp', return_value=REPO_DIR)
+@patch('sagemaker.git_utils._validate_git_config')
+@patch('sagemaker.git_utils._checkout_branch_and_commit')
+@patch('os.path.isfile', return_value=True)
+@patch('os.path.isdir', return_value=False)
+@patch('os.path.exists', return_value=True)
+def test_git_clone_repo_source_dir_not_exist(exists, isdir, isfile, checkout_branch_and_commit,
+                                             validate_git_config, mkdtemp, check_call):
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    entry_point = 'entry_point'
+    source_dir = 'source_dir_that_does_not_exist'
+    dependencies = ['foo', 'bar']
+    with pytest.raises(ValueError) as error:
+        git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
+    assert 'Source directory does not exist in the repo.' in str(error)
+
+
+@patch('subprocess.check_call')
+@patch('tempfile.mkdtemp', return_value=REPO_DIR)
+@patch('sagemaker.git_utils._validate_git_config')
+@patch('sagemaker.git_utils._checkout_branch_and_commit')
+@patch('os.path.isfile', return_value=True)
+@patch('os.path.isdir', return_value=True)
+@patch('os.path.exists', side_effect=[True, False])
+def test_git_clone_repo_dependencies_not_exist(exists, isdir, isfile, checkout_branch_and_commit,
+                                               validate_git_config, mkdtemp, check_call):
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    entry_point = 'entry_point'
+    source_dir = 'source_dir'
+    dependencies = ['foo', 'dep_that_does_not_exist']
+    with pytest.raises(ValueError) as error:
+        git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
+    assert 'does not exist in the repo.' in str(error)

From c39c344494dc5c9347026c8a16af113f0f5fcc75 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Fri, 14 Jun 2019 17:41:25 -0700
Subject: [PATCH 09/19] delete a line

---
 tests/unit/test_estimator.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index f5c5a9d88b..6e9b559392 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -1429,4 +1429,3 @@ def test_encryption_flag_in_non_vpc_mode_invalid(sagemaker_session):
                               encrypt_inter_container_traffic=True)
         estimator.fit()
     assert '"EnableInterContainerTrafficEncryption" and "VpcConfig" must be provided together' in str(error)
-

From 2b1622bf555c877e0e3c11fb19a37829a8b0e206 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Mon, 17 Jun 2019 11:42:59 -0700
Subject: [PATCH 10/19] add assertion to some test functions

---
 src/sagemaker/estimator.py   |  6 +++---
 tests/unit/test_estimator.py | 33 ++++++++++++++++++++-------------
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index f2e40e91a4..e0a46fde2b 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -22,7 +22,7 @@
 from six import string_types
 
 import sagemaker
-import sagemaker.git_utils
+from sagemaker import git_utils
 from sagemaker.analytics import TrainingJobAnalytics
 from sagemaker.fw_utils import (create_image_uri, tar_and_upload_dir, parse_s3_url, UploadedCode,
                                 validate_source_dir)
@@ -885,8 +885,8 @@ def _prepare_for_training(self, job_name=None):
         super(Framework, self)._prepare_for_training(job_name=job_name)
 
         if self.git_config:
-            updates = sagemaker.git_utils.git_clone_repo(self.git_config, self.entry_point,
-                                                         self.source_dir, self.dependencies)
+            updates = git_utils.git_clone_repo(self.git_config, self.entry_point,
+                                               self.source_dir, self.dependencies)
             self.entry_point = updates['entry_point']
             self.source_dir = updates['source_dir']
             self.dependencies = updates['dependencies']
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index 6e9b559392..dd921e2cc4 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -602,52 +602,59 @@ def test_prepare_for_training_force_name_generation(strftime, sagemaker_session)
 
 @patch('sagemaker.git_utils.git_clone_repo')
 def test_git_support_with_branch_and_commit_succeed(git_clone_repo, sagemaker_session):
-    git_clone_repo.side_effect = lambda git_config, entry_point, source_dir=None, dependencies=None: {
+    git_clone_repo.side_effect = lambda gitconfig, entrypoint, source_dir=None, dependencies=None: {
         'entry_point': '/tmp/repo_dir/entry_point', 'source_dir': None, 'dependencies': None}
-    git_conf = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
-
-    fw = DummyFramework(entry_point='entry_point', git_config=git_conf, role=ROLE,
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    entry_point = 'entry_point'
+    fw = DummyFramework(entry_point=entry_point, git_config=git_config, role=ROLE,
                         sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT,
                         train_instance_type=INSTANCE_TYPE, enable_cloudwatch_metrics=True)
     fw.fit()
+    git_clone_repo.assert_called_once_with(git_config, entry_point, None, [])
 
 
 @patch('sagemaker.git_utils.git_clone_repo')
 def test_git_support_with_branch_succeed(git_clone_repo, sagemaker_session):
-    git_clone_repo.side_effect = lambda git_config, entry_point, source_dir, dependencies=None: {
+    git_clone_repo.side_effect = lambda gitconfig, entrypoint, source_dir, dependencies=None: {
         'entry_point': '/tmp/repo_dir/source_dir/entry_point', 'source_dir': None, 'dependencies': None}
-    git_conf = {'repo': GIT_REPO, 'branch': BRANCH}
-    fw = DummyFramework(entry_point='entry_point', git_config=git_conf,
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH}
+    entry_point = 'entry_point'
+    fw = DummyFramework(entry_point=entry_point, git_config=git_config,
                         role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     fw.fit()
+    git_clone_repo.assert_called_once_with(git_config, entry_point, None, [])
 
 
 @patch('sagemaker.git_utils.git_clone_repo')
 def test_git_support_with_dependencies_succeed(git_clone_repo, sagemaker_session):
-    git_clone_repo.side_effect = lambda git_config, entry_point, source_dir, dependencies: {
+    git_clone_repo.side_effect = lambda gitconfig, entrypoint, source_dir, dependencies: {
         'entry_point': '/tmp/repo_dir/source_dir/entry_point',
         'source_dir': None,
         'dependencies': ['/tmp/repo_dir/foo', '/tmp/repo_dir/foo/bar']}
-    git_conf = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
-    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_conf,
+    git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
+    entry_point = 'source_dir/entry_point'
+    fw = DummyFramework(entry_point=entry_point, git_config=git_config,
                         dependencies=['foo', 'foo/bar'], role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     fw.fit()
+    git_clone_repo.assert_called_once_with(git_config, entry_point, None, ['foo', 'foo/bar'])
 
 
 @patch('sagemaker.git_utils.git_clone_repo')
 def test_git_support_without_branch_and_commit_succeed(git_clone_repo, sagemaker_session):
-    git_clone_repo.side_effect = lambda git_config, entry_point, source_dir, dependencies=None: {
+    git_clone_repo.side_effect = lambda gitconfig, entrypoint, source_dir, dependencies=None: {
         'entry_point': '/tmp/repo_dir/source_dir/entry_point', 'source_dir': None, 'dependencies': None}
-    git_conf = {'repo': GIT_REPO}
-    fw = DummyFramework(entry_point='source_dir/entry_point', git_config=git_conf,
+    git_config = {'repo': GIT_REPO}
+    entry_point = 'source_dir/entry_point'
+    fw = DummyFramework(entry_point=entry_point, git_config=git_config,
                         role=ROLE, sagemaker_session=sagemaker_session,
                         train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
                         enable_cloudwatch_metrics=True)
     fw.fit()
+    git_clone_repo.assert_called_once_with(git_config, entry_point, None, [])
 
 
 def test_git_support_repo_not_provided(sagemaker_session):

From 28a5c5888900fd75649aa286e48f726218005c8e Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Mon, 17 Jun 2019 13:38:54 -0700
Subject: [PATCH 11/19] remove deploy part in test_git

---
 tests/integ/test_git.py      | 20 +++-----------------
 tests/unit/test_git_utils.py |  6 +++---
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py
index aaed1f4b7e..c6c4d009cd 100644
--- a/tests/integ/test_git.py
+++ b/tests/integ/test_git.py
@@ -22,9 +22,9 @@
 from sagemaker.utils import sagemaker_timestamp
 from tests.integ import DATA_DIR, PYTHON_VERSION
 
-GIT_REPO = 'https://github.com/GaryTu1020/python-sdk-testing.git'
-BRANCH = 'branch1'
-COMMIT = 'b61c450200d6a309c8d24ac14b8adddc405acc56'
+GIT_REPO = 'https://github.com/GaryTu1020/sagemaker-python-sdk.git'
+BRANCH = 'git_support_testing'
+COMMIT = 'b8724a04ee00cb74c12c1b9a0c79d4f065c3801d'
 
 
 def test_git_support_with_pytorch(sagemaker_local_session):
@@ -67,17 +67,3 @@ def test_git_support_with_mxnet(sagemaker_local_session, mxnet_full_version):
 
     files = [file for file in os.listdir(mx.source_dir)]
     assert 'some_file' in files and 'mnist.py' in files
-
-    endpoint_name = 'test-git_support-with-mxnet-{}'.format(sagemaker_timestamp())
-
-    script_abs_path = os.path.join(mx.source_dir, script_path)
-    desc = sagemaker_local_session.sagemaker_client.describe_training_job(mx.latest_training_job.name)
-    model_data = desc['ModelArtifacts']['S3ModelArtifacts']
-    model = MXNetModel(model_data, 'SageMakerRole', entry_point=script_abs_path,
-                       py_version=PYTHON_VERSION, sagemaker_session=sagemaker_local_session,
-                       framework_version=mxnet_full_version)
-    predictor = model.deploy(1, 'local', endpoint_name=endpoint_name)
-
-    data = numpy.zeros(shape=(1, 1, 28, 28))
-    result = predictor.predict(data)
-    assert result is not None
diff --git a/tests/unit/test_git_utils.py b/tests/unit/test_git_utils.py
index dce38175e0..2fb60c76ea 100644
--- a/tests/unit/test_git_utils.py
+++ b/tests/unit/test_git_utils.py
@@ -19,9 +19,9 @@
 from sagemaker import git_utils
 
 REPO_DIR = '/tmp/repo_dir'
-GIT_REPO = 'https://github.com/GaryTu1020/python-sdk-testing.git'
-BRANCH = 'branch1'
-COMMIT = 'b61c450200d6a309c8d24ac14b8adddc405acc56'
+GIT_REPO = 'https://github.com/GaryTu1020/sagemaker-python-sdk.git'
+BRANCH = 'git_support_testing'
+COMMIT = 'b8724a04ee00cb74c12c1b9a0c79d4f065c3801d'
 
 
 @patch('subprocess.check_call')

From 0797060a2bbea49184fbddcc00f421afe77f537f Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Mon, 17 Jun 2019 14:51:28 -0700
Subject: [PATCH 12/19] change testing git repo

---
 tests/integ/test_git.py      | 8 +++-----
 tests/unit/test_git_utils.py | 6 +++---
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py
index c6c4d009cd..645a3c613a 100644
--- a/tests/integ/test_git.py
+++ b/tests/integ/test_git.py
@@ -17,14 +17,12 @@
 import numpy
 
 from sagemaker.mxnet.estimator import MXNet
-from sagemaker.mxnet.model import MXNetModel
 from sagemaker.pytorch.estimator import PyTorch
-from sagemaker.utils import sagemaker_timestamp
 from tests.integ import DATA_DIR, PYTHON_VERSION
 
-GIT_REPO = 'https://github.com/GaryTu1020/sagemaker-python-sdk.git'
-BRANCH = 'git_support_testing'
-COMMIT = 'b8724a04ee00cb74c12c1b9a0c79d4f065c3801d'
+GIT_REPO = 'https://github.com/aws/sagemaker-python-sdk.git'
+BRANCH = 'test-branch-git-config'
+COMMIT = '329bfcf884482002c05ff7f44f62599ebc9f445a'
 
 
 def test_git_support_with_pytorch(sagemaker_local_session):
diff --git a/tests/unit/test_git_utils.py b/tests/unit/test_git_utils.py
index 2fb60c76ea..2630b23d4b 100644
--- a/tests/unit/test_git_utils.py
+++ b/tests/unit/test_git_utils.py
@@ -19,9 +19,9 @@
 from sagemaker import git_utils
 
 REPO_DIR = '/tmp/repo_dir'
-GIT_REPO = 'https://github.com/GaryTu1020/sagemaker-python-sdk.git'
-BRANCH = 'git_support_testing'
-COMMIT = 'b8724a04ee00cb74c12c1b9a0c79d4f065c3801d'
+GIT_REPO = 'https://github.com/aws/sagemaker-python-sdk.git'
+BRANCH = 'test-branch-git-config'
+COMMIT = '329bfcf884482002c05ff7f44f62599ebc9f445a'
 
 
 @patch('subprocess.check_call')

From e2e5c207d5911c7945aa5fd07fe5f2c94b8379d9 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Mon, 17 Jun 2019 16:58:03 -0700
Subject: [PATCH 13/19] change the testing repo

---
 src/sagemaker/estimator.py   | 6 +++---
 tests/unit/test_estimator.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index e0a46fde2b..fa0cf5789e 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -803,9 +803,9 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None,
 
                     The following config:
 
-                    >>> git_config = {'repo': 'https://github.com/GaryTu1020/python-sdk-testing.git',
-                    >>>               'branch': 'master',
-                    >>>               'commit': 'aea6f3acef9619f77f94772d9d654f041e16bf49'}
+                    >>> git_config = {'repo': 'https://github.com/aws/sagemaker-python-sdk.git',
+                    >>>               'branch': 'test-branch-git-config',
+                    >>>               'commit': '329bfcf884482002c05ff7f44f62599ebc9f445a'}
 
                     results in cloning the repo specified in 'repo', then checkout the 'master' branch, and checkout
                     the specified commit.
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index dd921e2cc4..6ae5c016de 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -48,9 +48,9 @@
 JOB_NAME = '{}-{}'.format(IMAGE_NAME, TIMESTAMP)
 TAGS = [{'Name': 'some-tag', 'Value': 'value-for-tag'}]
 OUTPUT_PATH = 's3://bucket/prefix'
-GIT_REPO = 'https://github.com/GaryTu1020/python-sdk-testing.git'
-BRANCH = 'branch1'
-COMMIT = 'b61c450200d6a309c8d24ac14b8adddc405acc56'
+GIT_REPO = 'https://github.com/aws/sagemaker-python-sdk.git'
+BRANCH = 'test-branch-git-config'
+COMMIT = '329bfcf884482002c05ff7f44f62599ebc9f445a'
 
 DESCRIBE_TRAINING_JOB_RESULT = {
     'ModelArtifacts': {

From c6daa5dbb7590507cbbbbc588d0f912b5ad75e74 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Tue, 18 Jun 2019 14:35:59 -0700
Subject: [PATCH 14/19] correct an error message

---
 tests/unit/test_estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index 6ae5c016de..1495307307 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -705,7 +705,7 @@ def test_git_support_branch_not_exist(sagemaker_session):
 
 
 @patch('sagemaker.git_utils.git_clone_repo',
-       side_effect=subprocess.CalledProcessError(returncode=1, cmd='commit-sha-that-does-not-exist'))
+       side_effect=subprocess.CalledProcessError(returncode=1, cmd='git checkout commit-sha-that-does-not-exist'))
 def test_git_support_commit_not_exist(sagemaker_session):
     git_config = {'repo': GIT_REPO,
                   'branch': BRANCH,

From e5bd806e30644a3572007fe3cc72a795ae6e8675 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Tue, 18 Jun 2019 16:17:01 -0700
Subject: [PATCH 15/19] stop patching private methods

---
 .python-version              |  1 +
 tests/integ/test_git.py      | 36 ++++++++++++++---------
 tests/unit/test_git_utils.py | 57 +++++++++++-------------------------
 3 files changed, 40 insertions(+), 54 deletions(-)
 create mode 100644 .python-version

diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000000..424e1794de
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.6.8
diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py
index 645a3c613a..859b19f4d7 100644
--- a/tests/integ/test_git.py
+++ b/tests/integ/test_git.py
@@ -34,15 +34,16 @@ def test_git_support_with_pytorch(sagemaker_local_session):
                       train_instance_count=1, train_instance_type='local',
                       sagemaker_session=sagemaker_local_session, git_config=git_config)
 
-    train_input = pytorch.sagemaker_session.upload_data(path=os.path.join(data_path, 'training'),
-                                                        key_prefix='integ-test-data/pytorch_mnist/training')
-    pytorch.fit({'training': train_input})
+    pytorch.fit({'training': 'file://' + os.path.join(data_path, 'training')})
 
-    predictor = pytorch.deploy(initial_instance_count=1, instance_type='local')
+    try:
+        predictor = pytorch.deploy(initial_instance_count=1, instance_type='local')
 
-    data = numpy.zeros(shape=(1, 1, 28, 28)).astype(numpy.float32)
-    result = predictor.predict(data)
-    assert result is not None
+        data = numpy.zeros(shape=(1, 1, 28, 28)).astype(numpy.float32)
+        result = predictor.predict(data)
+        assert result is not None
+    finally:
+        predictor.delete_endpoint()
 
 
 def test_git_support_with_mxnet(sagemaker_local_session, mxnet_full_version):
@@ -56,12 +57,19 @@ def test_git_support_with_mxnet(sagemaker_local_session, mxnet_full_version):
                train_instance_count=1, train_instance_type='local',
                sagemaker_session=sagemaker_local_session, git_config=git_config)
 
-    train_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
-                                                   key_prefix='integ-test-data/mxnet_mnist/train')
-    test_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
-                                                  key_prefix='integ-test-data/mxnet_mnist/test')
-
-    mx.fit({'train': train_input, 'test': test_input})
+    mx.fit({'train': 'file://' + os.path.join(data_path, 'train'),
+            'test': 'file://' + os.path.join(data_path, 'test')})
 
     files = [file for file in os.listdir(mx.source_dir)]
-    assert 'some_file' in files and 'mnist.py' in files
+    assert 'some_file' in files
+    assert 'mnist.py' in files
+    assert os.path.exists(mx.dependencies[0])
+
+    try:
+        predictor = mx.deploy(initial_instance_count=1, instance_type='local')
+
+        data = numpy.zeros(shape=(1, 1, 28, 28))
+        result = predictor.predict(data)
+        assert result is not None
+    finally:
+        predictor.delete_endpoint()
diff --git a/tests/unit/test_git_utils.py b/tests/unit/test_git_utils.py
index 2630b23d4b..3343adc5bd 100644
--- a/tests/unit/test_git_utils.py
+++ b/tests/unit/test_git_utils.py
@@ -26,22 +26,19 @@
 
 @patch('subprocess.check_call')
 @patch('tempfile.mkdtemp', return_value=REPO_DIR)
-@patch('sagemaker.git_utils._validate_git_config')
-@patch('sagemaker.git_utils._checkout_branch_and_commit')
 @patch('os.path.isfile', return_value=True)
 @patch('os.path.isdir', return_value=True)
 @patch('os.path.exists', return_value=True)
-def test_git_clone_repo_succeed(exists, isdir, isfile, checkout_branch_and_commit,
-                                validate_git_config, mkdtemp, check_call):
+def test_git_clone_repo_succeed(exists, isdir, isfile, mkdtemp, check_call):
     git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
     entry_point = 'entry_point'
     source_dir = 'source_dir'
     dependencies = ['foo', 'bar']
     ret = git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
-    validate_git_config.assert_called_with(git_config)
-    check_call.assert_called_with(['git', 'clone', git_config['repo'], REPO_DIR])
+    check_call.assert_any_call(['git', 'clone', git_config['repo'], REPO_DIR])
+    check_call.assert_any_call(args=['git', 'checkout', BRANCH], cwd=REPO_DIR)
+    check_call.assert_any_call(args=['git', 'checkout', COMMIT], cwd=REPO_DIR)
     mkdtemp.assert_called_once()
-    checkout_branch_and_commit.assert_called_with(git_config, REPO_DIR)
     assert ret['entry_point'] == 'entry_point'
     assert ret['source_dir'] == '/tmp/repo_dir/source_dir'
     assert ret['dependencies'] == ['/tmp/repo_dir/foo', '/tmp/repo_dir/bar']
@@ -49,14 +46,10 @@ def test_git_clone_repo_succeed(exists, isdir, isfile, checkout_branch_and_commi
 
 @patch('subprocess.check_call')
 @patch('tempfile.mkdtemp', return_value=REPO_DIR)
-@patch('sagemaker.git_utils._validate_git_config',
-       side_effect=ValueError('Please provide a repo for git_config.'))
-@patch('sagemaker.git_utils._checkout_branch_and_commit')
 @patch('os.path.isfile', return_value=True)
 @patch('os.path.isdir', return_value=True)
 @patch('os.path.exists', return_value=True)
-def test_git_clone_repo_repo_not_provided(exists, isdir, isfile, checkout_branch_and_commit,
-                                          validate_git_config, mkdtemp, check_call):
+def test_git_clone_repo_repo_not_provided(exists, isdir, isfile, mkdtemp, check_call):
     git_config = {'branch': BRANCH, 'commit': COMMIT}
     entry_point = 'entry_point_that_does_not_exist'
     source_dir = 'source_dir'
@@ -69,13 +62,10 @@ def test_git_clone_repo_repo_not_provided(exists, isdir, isfile, checkout_branch
 @patch('subprocess.check_call',
        side_effect=subprocess.CalledProcessError(returncode=1, cmd='git clone {} {}'.format(GIT_REPO, REPO_DIR)))
 @patch('tempfile.mkdtemp', return_value=REPO_DIR)
-@patch('sagemaker.git_utils._validate_git_config')
-@patch('sagemaker.git_utils._checkout_branch_and_commit')
 @patch('os.path.isfile', return_value=True)
 @patch('os.path.isdir', return_value=True)
 @patch('os.path.exists', return_value=True)
-def test_git_clone_repo_clone_fail(exists, isdir, isfile, checkout_branch_and_commit,
-                                   validate_git_config, mkdtemp, check_call):
+def test_git_clone_repo_clone_fail(exists, isdir, isfile, mkdtemp, check_call):
     git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
     entry_point = 'entry_point'
     source_dir = 'source_dir'
@@ -85,16 +75,14 @@ def test_git_clone_repo_clone_fail(exists, isdir, isfile, checkout_branch_and_co
     assert 'returned non-zero exit status' in str(error)
 
 
-@patch('subprocess.check_call')
+@patch('subprocess.check_call',
+       side_effect=[True,
+                    subprocess.CalledProcessError(returncode=1, cmd='git checkout banana')])
 @patch('tempfile.mkdtemp', return_value=REPO_DIR)
-@patch('sagemaker.git_utils._validate_git_config')
-@patch('sagemaker.git_utils._checkout_branch_and_commit',
-       side_effect=subprocess.CalledProcessError(returncode=1, cmd='git checkout {}'.format(BRANCH)))
 @patch('os.path.isfile', return_value=True)
 @patch('os.path.isdir', return_value=True)
 @patch('os.path.exists', return_value=True)
-def test_git_clone_repo_branch_not_exist(exists, isdir, isfile, checkout_branch_and_commit,
-                                         validate_git_config, mkdtemp, check_call):
+def test_git_clone_repo_branch_not_exist(exists, isdir, isfile, mkdtemp, check_call):
     git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
     entry_point = 'entry_point'
     source_dir = 'source_dir'
@@ -104,16 +92,14 @@ def test_git_clone_repo_branch_not_exist(exists, isdir, isfile, checkout_branch_
     assert 'returned non-zero exit status' in str(error)
 
 
-@patch('subprocess.check_call')
+@patch('subprocess.check_call',
+       side_effect=[True, True,
+                    subprocess.CalledProcessError(returncode=1, cmd='git checkout {}'.format(COMMIT))])
 @patch('tempfile.mkdtemp', return_value=REPO_DIR)
-@patch('sagemaker.git_utils._validate_git_config')
-@patch('sagemaker.git_utils._checkout_branch_and_commit',
-       side_effect=subprocess.CalledProcessError(returncode=1, cmd='git checkout {}'.format(COMMIT)))
 @patch('os.path.isfile', return_value=True)
 @patch('os.path.isdir', return_value=True)
 @patch('os.path.exists', return_value=True)
-def test_git_clone_repo_commit_not_exist(exists, isdir, isfile, checkout_branch_and_commit,
-                                         validate_git_config, mkdtemp, check_call):
+def test_git_clone_repo_commit_not_exist(exists, isdir, isfile, mkdtemp, check_call):
     git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
     entry_point = 'entry_point'
     source_dir = 'source_dir'
@@ -125,13 +111,10 @@ def test_git_clone_repo_commit_not_exist(exists, isdir, isfile, checkout_branch_
 
 @patch('subprocess.check_call')
 @patch('tempfile.mkdtemp', return_value=REPO_DIR)
-@patch('sagemaker.git_utils._validate_git_config')
-@patch('sagemaker.git_utils._checkout_branch_and_commit')
 @patch('os.path.isfile', return_value=False)
 @patch('os.path.isdir', return_value=True)
 @patch('os.path.exists', return_value=True)
-def test_git_clone_repo_entry_point_not_exist(exists, isdir, isfile, checkout_branch_and_commit,
-                                              validate_git_config, mkdtemp, check_call):
+def test_git_clone_repo_entry_point_not_exist(exists, isdir, isfile, mkdtemp, check_call):
     git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
     entry_point = 'entry_point_that_does_not_exist'
     source_dir = 'source_dir'
@@ -143,13 +126,10 @@ def test_git_clone_repo_entry_point_not_exist(exists, isdir, isfile, checkout_br
 
 @patch('subprocess.check_call')
 @patch('tempfile.mkdtemp', return_value=REPO_DIR)
-@patch('sagemaker.git_utils._validate_git_config')
-@patch('sagemaker.git_utils._checkout_branch_and_commit')
 @patch('os.path.isfile', return_value=True)
 @patch('os.path.isdir', return_value=False)
 @patch('os.path.exists', return_value=True)
-def test_git_clone_repo_source_dir_not_exist(exists, isdir, isfile, checkout_branch_and_commit,
-                                             validate_git_config, mkdtemp, check_call):
+def test_git_clone_repo_source_dir_not_exist(exists, isdir, isfile, mkdtemp, check_call):
     git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
     entry_point = 'entry_point'
     source_dir = 'source_dir_that_does_not_exist'
@@ -161,13 +141,10 @@ def test_git_clone_repo_source_dir_not_exist(exists, isdir, isfile, checkout_bra
 
 @patch('subprocess.check_call')
 @patch('tempfile.mkdtemp', return_value=REPO_DIR)
-@patch('sagemaker.git_utils._validate_git_config')
-@patch('sagemaker.git_utils._checkout_branch_and_commit')
 @patch('os.path.isfile', return_value=True)
 @patch('os.path.isdir', return_value=True)
 @patch('os.path.exists', side_effect=[True, False])
-def test_git_clone_repo_dependencies_not_exist(exists, isdir, isfile, checkout_branch_and_commit,
-                                               validate_git_config, mkdtemp, check_call):
+def test_git_clone_repo_dependencies_not_exist(exists, isdir, isfile, mkdtemp, check_call):
     git_config = {'repo': GIT_REPO, 'branch': BRANCH, 'commit': COMMIT}
     entry_point = 'entry_point'
     source_dir = 'source_dir'

From 2af9b245add7e67b55a990a34f2ba5bebc3ab1fa Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Wed, 19 Jun 2019 13:24:23 -0700
Subject: [PATCH 16/19] slight change to overview.rst

---
 doc/overview.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/overview.rst b/doc/overview.rst
index aa3ee3da38..a2885f8c9b 100644
--- a/doc/overview.rst
+++ b/doc/overview.rst
@@ -126,10 +126,10 @@ The following are some examples to define estimators with Git support:
         # In this example, the entry point 'mnist.py' is all we need for source code.
         # We need to specify the path to it in the Git repo.
         mx_estimator = MXNet(entry_point='mxnet/mnist.py',
-                                    role='SageMakerRole',
-                                    git_config=git_config,
-                                    train_instance_count=1,
-                                    train_instance_type='ml.c4.xlarge')
+                             role='SageMakerRole',
+                             git_config=git_config,
+                             train_instance_count=1,
+                             train_instance_type='ml.c4.xlarge')
 
         # In this example, besides entry point and other source code in source directory, we still need some
         # dependencies for the training job. Dependencies should also be paths inside the Git repo.

From b102563e594dfcdf2be98715d9a3dd80295582a8 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Wed, 19 Jun 2019 13:48:57 -0700
Subject: [PATCH 17/19] add a comment for lock

---
 tests/integ/test_git.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py
index d4ab2436bc..71c6c1a321 100644
--- a/tests/integ/test_git.py
+++ b/tests/integ/test_git.py
@@ -25,6 +25,8 @@
 GIT_REPO = 'https://github.com/aws/sagemaker-python-sdk.git'
 BRANCH = 'test-branch-git-config'
 COMMIT = '329bfcf884482002c05ff7f44f62599ebc9f445a'
+
+# endpoint tests all use the same port, so we use this lock to prevent concurrent execution
 LOCK_PATH = os.path.join(tempfile.gettempdir(), 'sagemaker_test_git_lock')
 
 

From b6e75d026d10e34456a8d8059e613ab1dee0bbce Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Fri, 21 Jun 2019 16:14:30 -0700
Subject: [PATCH 18/19] merge with master

---
 src/sagemaker/estimator.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index 945eed3bb5..3d6ba1c15c 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -780,7 +780,7 @@ class Framework(EstimatorBase):
 
     def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cloudwatch_metrics=False,
                  container_log_level=logging.INFO, code_location=None, image_name=None, dependencies=None,
-                 enable_network_isolation=False, **kwargs):
+                 enable_network_isolation=False, git_config=False, **kwargs):
         """Base class initializer. Subclasses which override ``__init__`` should invoke ``super()``
 
         Args:
@@ -814,7 +814,6 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
                     the specified commit.
             source_dir (str): Path (absolute or relative) to a directory with any other training
                 source code dependencies aside from the entry point file (default: None). Structure within this
-<<<<<<< HEAD
                 directory are preserved when training on Amazon SageMaker. If 'git_config' is provided,
                 source_dir should be a relative location to a directory in the Git repo.
                 Example:
@@ -828,8 +827,6 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
 
                     and you need 'train.py' as entry point and 'test.py' as training source code as well, you can
                     assign entry_point='train.py', source_dir='src'.
-=======
-                directory are preserved when training on Amazon SageMaker.
             hyperparameters (dict): Hyperparameters that will be used for training (default: None).
                 The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker.
                 For convenience, this accepts other types for keys and values, but ``str()`` will be called
@@ -845,7 +842,6 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
             image_name (str): An alternate image name to use instead of the official Sagemaker image
                 for the framework. This is useful to run one of the Sagemaker supported frameworks
                 with an image containing custom dependencies.
->>>>>>> f1d34ad4073f8d856ef9c596b491f8a4cd8ef31f
             dependencies (list[str]): A list of paths to directories (absolute or relative) with
                 any additional libraries that will be exported to the container (default: []).
                 The library folders will be copied to SageMaker in the same folder where the entrypoint is copied.

From 3621bd484affa1bd80b9c6fbd53f2bd3d3754796 Mon Sep 17 00:00:00 2001
From: Yue Tu <yuetu@amazon.com>
Date: Fri, 21 Jun 2019 16:19:40 -0700
Subject: [PATCH 19/19] merge with master

---
 src/sagemaker/estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index 3d6ba1c15c..915e74c916 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -780,7 +780,7 @@ class Framework(EstimatorBase):
 
     def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cloudwatch_metrics=False,
                  container_log_level=logging.INFO, code_location=None, image_name=None, dependencies=None,
-                 enable_network_isolation=False, git_config=False, **kwargs):
+                 enable_network_isolation=False, git_config=None, **kwargs):
         """Base class initializer. Subclasses which override ``__init__`` should invoke ``super()``
 
         Args: