This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new a1db38290a [INFRA] Switch archive.a.o to closer.lua to avoid abuse
while fetch spark resources (#8881)
a1db38290a is described below
commit a1db38290a84ac481cd5ae16ee3c9cdd3f7529c0
Author: Kent Yao <[email protected]>
AuthorDate: Wed Mar 5 04:54:12 2025 +0800
[INFRA] Switch archive.a.o to closer.lua to avoid abuse while fetch spark
resources (#8881)
The archive.a.o has a threshold for daily traffic and a threshold for
weekly total download, a continuous abuse might get automic ban.
---
.github/workflows/docker_image.yml | 1 +
.github/workflows/util/install_spark_resources.sh | 110 +++++++++++++---------
2 files changed, 66 insertions(+), 45 deletions(-)
diff --git a/.github/workflows/docker_image.yml
b/.github/workflows/docker_image.yml
index 727dc3cd05..15c4774053 100644
--- a/.github/workflows/docker_image.yml
+++ b/.github/workflows/docker_image.yml
@@ -21,6 +21,7 @@ on:
- main
paths:
- '.github/workflows/docker_image.yml'
+ - '.github/workflows/util/install_spark_resources.sh'
- 'dev/docker/Dockerfile.centos7-static-build'
- 'dev/docker/Dockerfile.centos8-dynamic-build'
- 'dev/docker/Dockerfile.centos8-dynamic-build-jdk11'
diff --git a/.github/workflows/util/install_spark_resources.sh
b/.github/workflows/util/install_spark_resources.sh
index ad454f601a..cf502a210d 100755
--- a/.github/workflows/util/install_spark_resources.sh
+++ b/.github/workflows/util/install_spark_resources.sh
@@ -19,72 +19,92 @@
set -e
+# Installs Spark binary and source releases with:
+# 1 - spark version
+# 2 - hadoop version
+# 3 - scala version
+function install_spark() {
+ local spark_version="$1"
+ local hadoop_version="$2"
+ local scala_version="$3"
+ local spark_version_short=$(echo "${spark_version}" | cut -d '.' -f 1,2 | tr
-d '.')
+ local scala_suffix=$([ "${scala_version}" == '2.13' ] && echo '-scala-2.13'
|| echo '')
+ local mirror_host='https://www.apache.org/dyn/closer.lua/'
+ local url_query='?action=download'
+ local checksum_suffix='sha512'
+ local url_path="spark/spark-${spark_version}/"
+ local
local_binary="spark-${spark_version}-bin-hadoop${hadoop_version}${scala_suffix}.tgz"
+ local local_binary_checksum="${local_binary}.${checksum_suffix}"
+ local local_source="spark-${spark_version}.tgz"
+ local local_source_checksum="${local_source}.${checksum_suffix}"
+ local remote_binary="${mirror_host}${url_path}${local_binary}${url_query}"
+ local
remote_binary_checksum="${mirror_host}${url_path}${local_binary_checksum}${url_query}"
+ local remote_source="${mirror_host}${url_path}${local_source}${url_query}"
+ local
remote_source_checksum="${mirror_host}${url_path}${local_source_checksum}${url_query}"
+ local wget_opts="--no-verbose"
+
+ wget ${wget_opts} -O "${local_binary}" "${remote_binary}"
+ wget ${wget_opts} -O "${local_source}" "${remote_source}"
+
+ # Checksum may not have been specified; don't check if doesn't exist
+ if [ "$(command -v shasum)" ]; then
+ wget ${wget_opts} -O "${local_binary_checksum}" "${remote_binary_checksum}"
+ if ! shasum -a 512 -c "${local_binary_checksum}" > /dev/null ; then
+ echo "Bad checksum from ${remote_binary_checksum}"
+ rm -f "${local_binary_checksum}"
+ exit 2
+ fi
+ rm -f "${local_binary_checksum}"
+
+ wget ${wget_opts} -O "${local_source_checksum}" "${remote_source_checksum}"
+ if ! shasum -a 512 -c "${local_source_checksum}" > /dev/null ; then
+ echo "Bad checksum from ${remote_source_checksum}"
+ rm -f "${local_source_checksum}"
+ exit 2
+ fi
+ rm -f "${local_source_checksum}"
+ else
+ echo "Skipping checksum because shasum is not installed." 1>&2
+ fi
+
+ tar --strip-components=1 -xf "${local_binary}"
spark-"${spark_version}"-bin-hadoop"${hadoop_version}"/jars/
+ mkdir -p
${INSTALL_DIR}/shims/spark"${spark_version_short}"/spark_home/assembly/target/scala-"${scala_version}"
+ mv jars
${INSTALL_DIR}/shims/spark"${spark_version_short}"/spark_home/assembly/target/scala-"${scala_version}"
+
+ tar --strip-components=1 -xf "${local_source}"
spark-"${spark_version}"/sql/core/src/test/resources/
+ mkdir -p shims/spark"${spark_version_short}${scala_suffix}"/spark_home/
+ mv sql shims/spark"${spark_version_short}${scala_suffix}"/spark_home/
+
+ rm -rf "${local_binary}"
+ rm -rf "${local_source}"
+}
+
INSTALL_DIR=/opt/
case "$1" in
3.2)
# Spark-3.2
cd ${INSTALL_DIR} && \
- wget -nv
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz
&& \
- tar --strip-components=1 -xf spark-3.2.2-bin-hadoop3.2.tgz
spark-3.2.2-bin-hadoop3.2/jars/ && \
- rm -rf spark-3.2.2-bin-hadoop3.2.tgz && \
- mkdir -p
${INSTALL_DIR}/shims/spark32/spark_home/assembly/target/scala-2.12 && \
- mv jars ${INSTALL_DIR}/shims/spark32/spark_home/assembly/target/scala-2.12
&& \
- wget -nv https://github.com/apache/spark/archive/refs/tags/v3.2.2.tar.gz
&& \
- tar --strip-components=1 -xf v3.2.2.tar.gz
spark-3.2.2/sql/core/src/test/resources/ && \
- mkdir -p shims/spark32/spark_home/ && \
- mv sql shims/spark32/spark_home/
+ install_spark "3.2.2" "3.2" "2.12"
;;
3.3)
# Spark-3.3
cd ${INSTALL_DIR} && \
- wget -nv
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz
&& \
- tar --strip-components=1 -xf spark-3.3.1-bin-hadoop3.tgz
spark-3.3.1-bin-hadoop3/jars/ && \
- rm -rf spark-3.3.1-bin-hadoop3.tgz && \
- mkdir -p
${INSTALL_DIR}/shims/spark33/spark_home/assembly/target/scala-2.12 && \
- mv jars ${INSTALL_DIR}/shims/spark33/spark_home/assembly/target/scala-2.12
&& \
- wget -nv https://github.com/apache/spark/archive/refs/tags/v3.3.1.tar.gz
&& \
- tar --strip-components=1 -xf v3.3.1.tar.gz
spark-3.3.1/sql/core/src/test/resources/ && \
- mkdir -p shims/spark33/spark_home/ && \
- mv sql shims/spark33/spark_home/
+ install_spark "3.3.1" "3" "2.12"
;;
3.4)
# Spark-3.4
cd ${INSTALL_DIR} && \
- wget -nv
https://archive.apache.org/dist/spark/spark-3.4.4/spark-3.4.4-bin-hadoop3.tgz
&& \
- tar --strip-components=1 -xf spark-3.4.4-bin-hadoop3.tgz
spark-3.4.4-bin-hadoop3/jars/ && \
- rm -rf spark-3.4.4-bin-hadoop3.tgz && \
- mkdir -p
${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 && \
- mv jars ${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12
&& \
- wget -nv https://github.com/apache/spark/archive/refs/tags/v3.4.4.tar.gz
&& \
- tar --strip-components=1 -xf v3.4.4.tar.gz
spark-3.4.4/sql/core/src/test/resources/ && \
- mkdir -p shims/spark34/spark_home/ && \
- mv sql shims/spark34/spark_home/
+ install_spark "3.4.4" "3" "2.12"
;;
3.5)
# Spark-3.5
cd ${INSTALL_DIR} && \
- wget -nv
https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz
&& \
- tar --strip-components=1 -xf spark-3.5.2-bin-hadoop3.tgz
spark-3.5.2-bin-hadoop3/jars/ && \
- rm -rf spark-3.5.2-bin-hadoop3.tgz && \
- mkdir -p
${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.12 && \
- mv jars ${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.12
&& \
- wget -nv https://github.com/apache/spark/archive/refs/tags/v3.5.2.tar.gz
&& \
- tar --strip-components=1 -xf v3.5.2.tar.gz
spark-3.5.2/sql/core/src/test/resources/ && \
- mkdir -p shims/spark35/spark_home/ && \
- mv sql shims/spark35/spark_home/
+ install_spark "3.5.2" "3" "2.12"
;;
3.5-scala2.13)
# Spark-3.5, scala 2.13
cd ${INSTALL_DIR} && \
- wget -nv
https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz
&& \
- tar --strip-components=1 -xf spark-3.5.2-bin-hadoop3.tgz
spark-3.5.2-bin-hadoop3/jars/ && \
- rm -rf spark-3.5.2-bin-hadoop3.tgz && \
- mkdir -p
${INSTALL_DIR}/shims/spark35-scala2.13/spark_home/assembly/target/scala-2.13 &&
\
- mv jars
${INSTALL_DIR}/shims/spark35-scala2.13/spark_home/assembly/target/scala-2.13 &&
\
- wget -nv https://github.com/apache/spark/archive/refs/tags/v3.5.2.tar.gz
&& \
- tar --strip-components=1 -xf v3.5.2.tar.gz
spark-3.5.2/sql/core/src/test/resources/ && \
- mkdir -p shims/spark35-scala2.13/spark_home/ && \
- mv sql shims/spark35-scala2.13/spark_home/
+ install_spark "3.5.2" "3" "2.13"
;;
*)
echo "Spark version is expected to be specified."
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]