This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 706f91e  [SPARK-35373][BUILD] Check Maven artifact checksum in 
build/mvn
706f91e is described below

commit 706f91e5e0ae3c6aa156232546f815840f977201
Author: Sean Owen <sro...@gmail.com>
AuthorDate: Thu May 13 09:06:57 2021 -0500

    [SPARK-35373][BUILD] Check Maven artifact checksum in build/mvn
    
    ### What changes were proposed in this pull request?
    
    `./build/mvn` now downloads the .sha512 checksum of Maven artifacts it 
downloads, and checks the checksum after download.
    
    ### Why are the changes needed?
    
    This ensures the integrity of the Maven artifact during a user's build, 
which may come from several non-ASF mirrors.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Should not affect anything about Spark per se, just the build.
    
    ### How was this patch tested?
    
    Manual testing wherein I forced Maven/Scala download, verified checksums 
are downloaded and checked, and verified it fails on error with a corrupted 
checksum.
    
    Closes #32505 from srowen/SPARK-35373.
    
    Authored-by: Sean Owen <sro...@gmail.com>
    Signed-off-by: Sean Owen <sro...@gmail.com>
---
 build/mvn | 90 +++++++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 65 insertions(+), 25 deletions(-)

diff --git a/build/mvn b/build/mvn
index f33dedc..9c9ea4b 100755
--- a/build/mvn
+++ b/build/mvn
@@ -26,14 +26,23 @@ _COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
 
 # Installs any application tarball given a URL, the expected tarball name,
 # and, optionally, a checkable binary path to determine if the binary has
-# already been installed
-## Arg1 - URL
-## Arg2 - Tarball Name
-## Arg3 - Checkable Binary
+# already been installed. Arguments:
+# 1 - Mirror host
+# 2 - URL path on host
+# 3 - URL query string
+# 4 - checksum suffix
+# 5 - Tarball Name
+# 6 - Checkable Binary
 install_app() {
-  local remote_tarball="$1"
-  local local_tarball="${_DIR}/$2"
-  local binary="${_DIR}/$3"
+  local mirror_host="$1"
+  local url_path="$2"
+  local url_query="$3"
+  local checksum_suffix="$4"
+  local local_tarball="${_DIR}/$5"
+  local binary="${_DIR}/$6"
+  local remote_tarball="${mirror_host}/${url_path}${url_query}"
+  local local_checksum="${local_tarball}.${checksum_suffix}"
+  local 
remote_checksum="https://archive.apache.org/dist/${url_path}.${checksum_suffix}";
 
   # setup `curl` and `wget` silent options if we're running on Jenkins
   local curl_opts="-L"
@@ -46,24 +55,46 @@ install_app() {
     wget_opts="--progress=bar:force ${wget_opts}"
   fi
 
-  if [ -z "$3" -o ! -f "$binary" ]; then
+  if [ ! -f "$binary" ]; then
     # check if we already have the tarball
     # check if we have curl installed
     # download application
-    [ ! -f "${local_tarball}" ] && [ $(command -v curl) ] && \
-      echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2 && \
+    if [ ! -f "${local_tarball}" -a $(command -v curl) ]; then
+      echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2
       curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
+      if [ ! -z "${checksum_suffix}" ]; then
+        echo "exec: curl ${curl_opts} ${remote_checksum}" 1>&2
+        curl ${curl_opts} "${remote_checksum}" > "${local_checksum}"
+      fi
+    fi
     # if the file still doesn't exist, lets try `wget` and cross our fingers
-    [ ! -f "${local_tarball}" ] && [ $(command -v wget) ] && \
-      echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2 && \
+    if [ ! -f "${local_tarball}" -a $(command -v wget) ]; then
+      echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2
       wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
+      if [ ! -z "${checksum_suffix}" ]; then
+        echo "exec: wget ${wget_opts} ${remote_checksum}" 1>&2
+        wget ${wget_opts} -O "${local_checksum}" "${remote_checksum}"
+      fi
+    fi
     # if both were unsuccessful, exit
-    [ ! -f "${local_tarball}" ] && \
-      echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
-      echo "please install manually and try again." && \
+    if [ ! -f "${local_tarball}" ]; then
+      echo -n "ERROR: Cannot download ${remote_tarball} with cURL or wget; 
please install manually and try again."
       exit 2
-    cd "${_DIR}" && tar -xzf "$2"
-    rm -rf "$local_tarball"
+    fi
+    # Checksum may not have been specified; don't check if doesn't exist
+    if [ -f "${local_checksum}" ]; then
+      echo "  ${local_tarball}" >> ${local_checksum} # two spaces + file are 
important!
+      # Assuming SHA512 here for now
+      echo "Veryfing checksum from ${local_checksum}" 1>&2
+      if ! shasum -a 512 -q -c "${local_checksum}" ; then
+        echo "Bad checksum from ${remote_checksum}"
+        exit 2
+      fi
+    fi
+
+    cd "${_DIR}" && tar -xzf "${local_tarball}"
+    rm -rf "${local_tarball}"
+    rm -f "${local_checksum}"
   fi
 }
 
@@ -79,21 +110,26 @@ install_mvn() {
     local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')"
   fi
   if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then
-    local 
FILE_PATH="maven/maven-3/${MVN_VERSION}/binaries/apache-maven-${MVN_VERSION}-bin.tar.gz"
+    local MVN_TARBALL="apache-maven-${MVN_VERSION}-bin.tar.gz"
+    local FILE_PATH="maven/maven-3/${MVN_VERSION}/binaries/${MVN_TARBALL}"
     local 
APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua'}
-    local MIRROR_URL="${APACHE_MIRROR}/${FILE_PATH}?action=download"
+    local MIRROR_URL_QUERY="?action=download"
 
     if [ $(command -v curl) ]; then
-      if ! curl -L --output /dev/null --silent --head --fail "${MIRROR_URL}" ; 
then
+      if ! curl -L --output /dev/null --silent --head --fail 
"${APACHE_MIRROR}/${FILE_PATH}${MIRROR_URL_QUERY}" ; then
         # Fall back to archive.apache.org for older Maven
         echo "Falling back to archive.apache.org to download Maven"
-        MIRROR_URL="https://archive.apache.org/dist/${FILE_PATH}";
+        APACHE_MIRROR="https://archive.apache.org/dist";
+        MIRROR_URL_QUERY=""
       fi
     fi
 
     install_app \
-      "${MIRROR_URL}" \
-      "apache-maven-${MVN_VERSION}-bin.tar.gz" \
+      "${APACHE_MIRROR}" \
+      "${FILE_PATH}" \
+      "${MIRROR_URL_QUERY}" \
+      "sha512" \
+      "${MVN_TARBALL}" \
       "apache-maven-${MVN_VERSION}/bin/mvn"
 
     MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn"
@@ -130,10 +166,14 @@ install_scala() {
   local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | grep 
${scala_binary_version} | head -n1 | awk -F '[<>]' '{print $3}'`
   local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"
   local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.lightbend.com}
+  local SCALA_TARBALL="scala-${scala_version}.tgz"
 
   install_app \
-    "${TYPESAFE_MIRROR}/scala/${scala_version}/scala-${scala_version}.tgz" \
-    "scala-${scala_version}.tgz" \
+    "${TYPESAFE_MIRROR}" \
+    "scala/${scala_version}/${SCALA_TARBALL}" \
+    "" \
+    "" \
+    ${SCALA_TARBALL} \
     "scala-${scala_version}/bin/scala"
 
   SCALA_COMPILER="$(cd "$(dirname "${scala_bin}")/../lib" && 
pwd)/scala-compiler.jar"

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to