This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6c5fcac [SPARK-35373][BUILD] Check Maven artifact checksum in build/mvn 6c5fcac is described below commit 6c5fcac6b787d01ebf3d9f53410db2c894ab9abd Author: Sean Owen <sro...@gmail.com> AuthorDate: Thu May 13 09:06:57 2021 -0500 [SPARK-35373][BUILD] Check Maven artifact checksum in build/mvn ### What changes were proposed in this pull request? `./build/mvn` now downloads the .sha512 checksum of Maven artifacts it downloads, and checks the checksum after download. ### Why are the changes needed? This ensures the integrity of the Maven artifact during a user's build, which may come from several non-ASF mirrors. ### Does this PR introduce _any_ user-facing change? Should not affect anything about Spark per se, just the build. ### How was this patch tested? Manual testing wherein I forced Maven/Scala download, verified checksums are downloaded and checked, and verified it fails on error with a corrupted checksum. Closes #32505 from srowen/SPARK-35373. Authored-by: Sean Owen <sro...@gmail.com> Signed-off-by: Sean Owen <sro...@gmail.com> --- build/mvn | 90 +++++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 25 deletions(-) diff --git a/build/mvn b/build/mvn index 4e53a16..9e63cc2 100755 --- a/build/mvn +++ b/build/mvn @@ -26,36 +26,67 @@ _COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g" # Installs any application tarball given a URL, the expected tarball name, # and, optionally, a checkable binary path to determine if the binary has -# already been installed -## Arg1 - URL -## Arg2 - Tarball Name -## Arg3 - Checkable Binary +# already been installed. Arguments: +# 1 - Mirror host +# 2 - URL path on host +# 3 - URL query string +# 4 - checksum suffix +# 5 - Tarball Name +# 6 - Checkable Binary install_app() { - local remote_tarball="$1" - local local_tarball="${_DIR}/$2" - local binary="${_DIR}/$3" + local mirror_host="$1" + local url_path="$2" + local url_query="$3" + local checksum_suffix="$4" + local local_tarball="${_DIR}/$5" + local binary="${_DIR}/$6" + local remote_tarball="${mirror_host}/${url_path}${url_query}" + local local_checksum="${local_tarball}.${checksum_suffix}" + local remote_checksum="https://archive.apache.org/dist/${url_path}.${checksum_suffix}" local curl_opts="--silent --show-error -L" local wget_opts="--no-verbose" - if [ -z "$3" -o ! -f "$binary" ]; then + if [ ! -f "$binary" ]; then # check if we already have the tarball # check if we have curl installed # download application - [ ! -f "${local_tarball}" ] && [ $(command -v curl) ] && \ - echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2 && \ + if [ ! -f "${local_tarball}" -a $(command -v curl) ]; then + echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2 curl ${curl_opts} "${remote_tarball}" > "${local_tarball}" + if [ ! -z "${checksum_suffix}" ]; then + echo "exec: curl ${curl_opts} ${remote_checksum}" 1>&2 + curl ${curl_opts} "${remote_checksum}" > "${local_checksum}" + fi + fi # if the file still doesn't exist, lets try `wget` and cross our fingers - [ ! -f "${local_tarball}" ] && [ $(command -v wget) ] && \ - echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2 && \ + if [ ! -f "${local_tarball}" -a $(command -v wget) ]; then + echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2 wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}" + if [ ! -z "${checksum_suffix}" ]; then + echo "exec: wget ${wget_opts} ${remote_checksum}" 1>&2 + wget ${wget_opts} -O "${local_checksum}" "${remote_checksum}" + fi + fi # if both were unsuccessful, exit - [ ! -f "${local_tarball}" ] && \ - echo -n "ERROR: Cannot download $2 with cURL or wget; " && \ - echo "please install manually and try again." && \ + if [ ! -f "${local_tarball}" ]; then + echo -n "ERROR: Cannot download ${remote_tarball} with cURL or wget; please install manually and try again." exit 2 - cd "${_DIR}" && tar -xzf "$2" - rm -rf "$local_tarball" + fi + # Checksum may not have been specified; don't check if doesn't exist + if [ -f "${local_checksum}" ]; then + echo " ${local_tarball}" >> ${local_checksum} # two spaces + file are important! + # Assuming SHA512 here for now + echo "Veryfing checksum from ${local_checksum}" 1>&2 + if ! shasum -a 512 -q -c "${local_checksum}" ; then + echo "Bad checksum from ${remote_checksum}" + exit 2 + fi + fi + + cd "${_DIR}" && tar -xzf "${local_tarball}" + rm -rf "${local_tarball}" + rm -f "${local_checksum}" fi } @@ -71,21 +102,26 @@ install_mvn() { local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')" fi if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then - local FILE_PATH="maven/maven-3/${MVN_VERSION}/binaries/apache-maven-${MVN_VERSION}-bin.tar.gz" + local MVN_TARBALL="apache-maven-${MVN_VERSION}-bin.tar.gz" + local FILE_PATH="maven/maven-3/${MVN_VERSION}/binaries/${MVN_TARBALL}" local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua'} - local MIRROR_URL="${APACHE_MIRROR}/${FILE_PATH}?action=download" + local MIRROR_URL_QUERY="?action=download" if [ $(command -v curl) ]; then - if ! curl -L --output /dev/null --silent --head --fail "${MIRROR_URL}" ; then + if ! curl -L --output /dev/null --silent --head --fail "${APACHE_MIRROR}/${FILE_PATH}${MIRROR_URL_QUERY}" ; then # Fall back to archive.apache.org for older Maven echo "Falling back to archive.apache.org to download Maven" - MIRROR_URL="https://archive.apache.org/dist/${FILE_PATH}" + APACHE_MIRROR="https://archive.apache.org/dist" + MIRROR_URL_QUERY="" fi fi install_app \ - "${MIRROR_URL}" \ - "apache-maven-${MVN_VERSION}-bin.tar.gz" \ + "${APACHE_MIRROR}" \ + "${FILE_PATH}" \ + "${MIRROR_URL_QUERY}" \ + "sha512" \ + "${MVN_TARBALL}" \ "apache-maven-${MVN_VERSION}/bin/mvn" MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn" @@ -101,10 +137,14 @@ install_scala() { local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | grep ${scala_binary_version} | head -n1 | awk -F '[<>]' '{print $3}'` local scala_bin="${_DIR}/scala-${scala_version}/bin/scala" local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.lightbend.com} + local SCALA_TARBALL="scala-${scala_version}.tgz" install_app \ - "${TYPESAFE_MIRROR}/scala/${scala_version}/scala-${scala_version}.tgz" \ - "scala-${scala_version}.tgz" \ + "${TYPESAFE_MIRROR}" \ + "scala/${scala_version}/${SCALA_TARBALL}" \ + "" \ + "" \ + ${SCALA_TARBALL} \ "scala-${scala_version}/bin/scala" SCALA_COMPILER="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-compiler.jar" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org