vanzin closed pull request #21279: [SPARK-24219][k8s] Improve the docker building script to avoid copying everything under examples to docker image URL: https://github.com/apache/spark/pull/21279
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index f090240065bf1..7ded77426340b 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -44,15 +44,37 @@ function image_ref { function build { local BUILD_ARGS local IMG_PATH + local TMPFOLDER if [ ! -f "$SPARK_HOME/RELEASE" ]; then # Set image build arguments accordingly if this is a source repo and not a distribution archive. + local JARS="${SPARK_HOME}/assembly/target/scala-${SPARK_SCALA_VERSION}/jars" + TMPFOLDER=`mktemp -q -d examples.XXXXXX` + if [ $? -ne 0 ]; then + ehco "Cannot create temp folder, exiting..." + exit 1 + fi + + mkdir -p "${TMPFOLDER}/jars" + cp "${SPARK_HOME}"/examples/target/scala*/jars/* "${TMPFOLDER}/jars" + for f in "${TMPFOLDER}"/jars/*; do + name=$(basename "$f") + if [ -f "${JARS}/${name}" ]; then + rm "${TMPFOLDER}/jars/${name}" + fi + done + + mkdir -p "${TMPFOLDER}/src/main" + cp -r "${SPARK_HOME}/examples/src/main" "${TMPFOLDER}/src" + IMG_PATH=resource-managers/kubernetes/docker/src/main/dockerfiles BUILD_ARGS=( --build-arg img_path=$IMG_PATH --build-arg spark_jars=assembly/target/scala-$SPARK_SCALA_VERSION/jars + --build-arg + spark_examples=$TMPFOLDER ) else # Not passed as an argument to docker, but used to validate the Spark directory. @@ -69,6 +91,10 @@ function build { docker build "${BUILD_ARGS[@]}" \ -t $(image_ref spark) \ -f "$DOCKERFILE" . + + if [ -d "${TMPFOLDER}" ]; then + rm -fr "${TMPFOLDER}" + fi } function push { diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile index 9badf8556afc3..198f14f2955d1 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile @@ -19,6 +19,7 @@ FROM openjdk:8-alpine ARG spark_jars=jars ARG img_path=kubernetes/dockerfiles +ARG spark_examples=examples # Before building the docker image, first build and make a Spark distribution following # the instructions in http://spark.apache.org/docs/latest/building-spark.html. @@ -41,7 +42,7 @@ COPY ${spark_jars} /opt/spark/jars COPY bin /opt/spark/bin COPY sbin /opt/spark/sbin COPY ${img_path}/spark/entrypoint.sh /opt/ -COPY examples /opt/spark/examples +COPY ${spark_examples} /opt/spark/examples COPY data /opt/spark/data ENV SPARK_HOME /opt/spark ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org