Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 40 additions & 13 deletions bin/docker-image-tool.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ function image_ref {
echo "$image"
}

function docker_push {
local image_name="$1"
if [ ! -z $(docker images -q "$(image_ref ${image_name})") ]; then
docker push "$(image_ref ${image_name})"
if [ $? -ne 0 ]; then
error "Failed to push $image_name Docker image."
fi
else
echo "$(image_ref ${image_name}) image not found. Skipping push for this image."
fi
}

function build {
local BUILD_ARGS
local IMG_PATH
Expand Down Expand Up @@ -72,26 +84,36 @@ function build {
base_img=$(image_ref spark)
)
local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"}
local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"}
local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/spark/bindings/R/Dockerfile"}
local PYDOCKERFILE=${PYDOCKERFILE:-false}
local RDOCKERFILE=${RDOCKERFILE:-false}

docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
-t $(image_ref spark) \
-f "$BASEDOCKERFILE" .

docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-t $(image_ref spark-py) \
-f "$PYDOCKERFILE" .
if [ "${PYDOCKERFILE}" != "false" ]; then
docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-t $(image_ref spark-py) \
-f "$PYDOCKERFILE" .
if [ $? -ne 0 ]; then
error "Failed to build PySpark Docker image, please refer to Docker build output for details."
fi
fi

docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-t $(image_ref spark-r) \
-f "$RDOCKERFILE" .
if [ "${RDOCKERFILE}" != "false" ]; then
docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-t $(image_ref spark-r) \
-f "$RDOCKERFILE" .
if [ $? -ne 0 ]; then
error "Failed to build SparkR Docker image, please refer to Docker build output for details."
fi
fi
}

function push {
docker push "$(image_ref spark)"
docker push "$(image_ref spark-py)"
docker push "$(image_ref spark-r)"
docker_push "spark"
docker_push "spark-py"
docker_push "spark-r"
}

function usage {
Expand All @@ -106,8 +128,10 @@ Commands:

Options:
-f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark.
-p file Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark.
-R file Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark.
-p file (Optional) Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark.
Skips building PySpark docker image if not specified.
-R file (Optional) Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark.
Skips building SparkR docker image if not specified.
-r repo Repository address.
-t tag Tag to apply to the built image, or to identify the image to be pushed.
-m Use minikube's Docker daemon.
Expand All @@ -127,6 +151,9 @@ Examples:
- Build image in minikube with tag "testing"
$0 -m -t testing build

- Build PySpark docker image
$0 -r docker.io/myrepo -t v2.3.0 -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build

- Build and push image with tag "v2.3.0" to docker.io/myrepo
$0 -r docker.io/myrepo -t v2.3.0 build
$0 -r docker.io/myrepo -t v2.3.0 push
Expand Down
12 changes: 12 additions & 0 deletions docs/running-on-kubernetes.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ $ ./bin/docker-image-tool.sh -r <repo> -t my-tag build
$ ./bin/docker-image-tool.sh -r <repo> -t my-tag push
```

By default `bin/docker-image-tool.sh` builds docker image for running JVM jobs. You need to opt-in to build additional
language binding docker images.

Example usage is
```bash
# To build additional PySpark docker image
$ ./bin/docker-image-tool.sh -r <repo> -t my-tag -p ./kubernetes/dockerfiles/spark/bindings/python/Dockerfile build

# To build additional SparkR docker image
$ ./bin/docker-image-tool.sh -r <repo> -t my-tag -R ./kubernetes/dockerfiles/spark/bindings/R/Dockerfile build
```

## Cluster Mode

To launch Spark Pi in cluster mode,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,42 @@ if [[ $IMAGE_TAG == "N/A" ]];
then
IMAGE_TAG=$(uuidgen);
cd $UNPACKED_SPARK_TGZ
if [[ $DEPLOY_MODE == cloud ]] ;
then
$UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build
if [[ $IMAGE_REPO == gcr.io* ]] ;
then
gcloud docker -- push $IMAGE_REPO/spark:$IMAGE_TAG
else
$UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push
fi
else
# -m option for minikube.
$UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build
fi

# Build PySpark image
LANGUAGE_BINDING_BUILD_ARGS="-p $UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/python/Dockerfile"

# Build SparkR image
LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R $UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/R/Dockerfile"

case $DEPLOY_MODE in
cloud)
# Build images
$UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build

# Push images appropriately
if [[ $IMAGE_REPO == gcr.io* ]] ;
then
gcloud docker -- push $IMAGE_REPO/spark:$IMAGE_TAG
else
$UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push
fi
;;

docker-for-desktop)
# Only need to build as this will place it in our local Docker repo which is all
# we need for Docker for Desktop to work so no need to also push
$UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
;;

minikube)
# Only need to build and if we do this with the -m option for minikube we will
# build the images directly using the minikube Docker daemon so no need to push
$UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
;;
*)
echo "Unrecognized deploy mode $DEPLOY_MODE" && exit 1
;;
esac
cd -
fi

Expand Down