This is an automated email from the ASF dual-hosted git repository.

wangdan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git


The following commit(s) were added to refs/heads/master by this push:
     new f7f608e0e feat(thirdparty): bump Hadoop to 3.3.6 (#2037)
f7f608e0e is described below

commit f7f608e0ef143cd537df86b5cee49415a1eb00e6
Author: Yingchun Lai <[email protected]>
AuthorDate: Wed Sep 25 19:35:04 2024 +0800

    feat(thirdparty): bump Hadoop to 3.3.6 (#2037)
---
 .../rebuild_thirdparty_if_needed/action.yaml       |  6 ++-
 .github/actions/upload_artifact/action.yaml        |  2 -
 .licenserc.yaml                                    |  1 +
 {build_tools => admin_tools}/clear_zk.sh           |  0
 {build_tools => admin_tools}/download_hadoop.sh    | 21 ++++++--
 {build_tools => admin_tools}/download_package.sh   | 52 +++++++++++--------
 {build_tools => admin_tools}/download_zk.sh        |  0
 {build_tools => admin_tools}/start_zk.sh           |  0
 {build_tools => admin_tools}/stop_zk.sh            |  0
 build_tools/pack_server.sh                         | 54 ++++++++++++--------
 build_tools/pack_tools.sh                          |  7 +--
 docker/thirdparties-bin/Dockerfile                 |  4 +-
 run.sh                                             | 17 ++++---
 src/sample/run.sh                                  |  2 +-
 src/test/function_test/recovery/test_recovery.cpp  |  2 +
 thirdparty/CMakeLists.txt                          | 59 +++++++++++++++-------
 thirdparty/fix_hdfs_native_client.patch            | 32 ++++++++++++
 17 files changed, 173 insertions(+), 86 deletions(-)

diff --git a/.github/actions/rebuild_thirdparty_if_needed/action.yaml 
b/.github/actions/rebuild_thirdparty_if_needed/action.yaml
index 838ac3f61..d0ebc3fbf 100644
--- a/.github/actions/rebuild_thirdparty_if_needed/action.yaml
+++ b/.github/actions/rebuild_thirdparty_if_needed/action.yaml
@@ -42,8 +42,10 @@ runs:
         cmake --build build/ -j $(nproc)
         rm -rf build/Build build/Download/[a-y]* build/Source/[a-g]* 
build/Source/[i-q]* build/Source/[s-z]*
         find ./ -name '*CMakeFiles*' -type d -exec rm -rf "{}" +
-        ../build_tools/download_hadoop.sh hadoop-bin
-        ../build_tools/download_zk.sh zookeeper-bin
+        ../admin_tools/download_hadoop.sh hadoop-bin
+        ../admin_tools/download_zk.sh zookeeper-bin
         rm -rf hadoop-bin/share/doc
         rm -rf zookeeper-bin/docs
+        mv hadoop-bin ..
+        mv zookeeper-bin ..
       shell: bash
diff --git a/.github/actions/upload_artifact/action.yaml 
b/.github/actions/upload_artifact/action.yaml
index 0fb6975d7..a58606f9e 100644
--- a/.github/actions/upload_artifact/action.yaml
+++ b/.github/actions/upload_artifact/action.yaml
@@ -21,8 +21,6 @@ runs:
   steps:
     - name: Tar files
       run: |
-        mv thirdparty/hadoop-bin ./
-        mv thirdparty/zookeeper-bin ./
         rm -rf thirdparty
         # The following operations are tricky, these directories and files 
don't exist if not build with '--test'.
         # When build binaries for client tests, it's not needed to add 
'--test'.
diff --git a/.licenserc.yaml b/.licenserc.yaml
index 49ab4b54a..839b467d8 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -68,6 +68,7 @@ header:
     - 'src/replica/duplication/test/log.1.0.handle_real_private_log2'
     - 'src/replica/duplication/test/log.1.0.all_loaded_are_write_empties'
     # Used for patches for thirdparties.
+    - 'thirdparty/fix_hdfs_native_client.patch'
     - 'thirdparty/fix_jemalloc_for_m1_on_macos.patch'
     - 'thirdparty/fix_libevent_for_macos.patch'
     - 'thirdparty/fix_rocksdb-cmake-PORTABLE-option.patch'
diff --git a/build_tools/clear_zk.sh b/admin_tools/clear_zk.sh
similarity index 100%
rename from build_tools/clear_zk.sh
rename to admin_tools/clear_zk.sh
diff --git a/build_tools/download_hadoop.sh b/admin_tools/download_hadoop.sh
similarity index 54%
rename from build_tools/download_hadoop.sh
rename to admin_tools/download_hadoop.sh
index 13fef962c..d90fd24f4 100755
--- a/build_tools/download_hadoop.sh
+++ b/admin_tools/download_hadoop.sh
@@ -19,13 +19,24 @@
 
 set -e
 
-CWD=$(cd $(dirname $0) && pwd)
+CWD=$(cd "$(dirname "$0")" && pwd)
 
 if [ $# -ge 1 ]; then
     HADOOP_BIN_PATH=$1
 fi
 
-HADOOP_VERSION=2.8.4
-HADOOP_DIR_NAME=hadoop-${HADOOP_VERSION}
-HADOOP_PACKAGE_MD5="b30b409bb69185003b3babd1504ba224"
-${CWD}/download_package.sh ${HADOOP_DIR_NAME} ${HADOOP_PACKAGE_MD5} 
${HADOOP_BIN_PATH}
+HADOOP_VERSION="hadoop-3.3.6"
+arch_output=$(arch)
+if [ "$arch_output"x == "aarch64"x ]; then
+  HADOOP_PACKAGE_MD5="369f899194a920e0d1c3c3bc1718b3b5"
+  HADOOP_BASE_NAME=${HADOOP_VERSION}-"$(arch)"
+else
+    if [ "$arch_output"x != "x86_64"x ]; then
+        echo "WARNING: unrecognized CPU architecture '$arch_output', use 
'x86_64' as default"
+    fi
+    HADOOP_PACKAGE_MD5="1cbe1214299cd3bd282d33d3934b5cbd"
+    HADOOP_BASE_NAME=${HADOOP_VERSION}
+fi
+
+DOWNLOAD_BASE_URL="https://mirrors.aliyun.com/apache/hadoop/common/${HADOOP_VERSION}/";
+"${CWD}"/download_package.sh "${HADOOP_BASE_NAME}" ${HADOOP_PACKAGE_MD5} 
"${HADOOP_BIN_PATH}" ${DOWNLOAD_BASE_URL} "${HADOOP_VERSION}"
diff --git a/build_tools/download_package.sh b/admin_tools/download_package.sh
similarity index 56%
rename from build_tools/download_package.sh
rename to admin_tools/download_package.sh
index 538393402..77e813eee 100755
--- a/build_tools/download_package.sh
+++ b/admin_tools/download_package.sh
@@ -21,59 +21,69 @@ set -e
 
 if [ $# -lt 2 ]; then
     echo "Invalid arguments !"
-    echo "USAGE: $0 <DIR_NAME> <PACKAGE_MD5> [TARGET_PATH]"
+    echo "USAGE: $0 <PACKAGE_BASE_NAME> <PACKAGE_MD5> [TARGET_PATH]"
     exit 1
 fi
 
-DIR_NAME=$1
+PACKAGE_BASE_NAME=$1
 PACKAGE_MD5=$2
 
 if [ $# -lt 3 ]; then
-    echo "TARGET_PATH is not provided, thus do not try to download ${DIR_NAME}"
+    echo "TARGET_PATH is not provided, thus do not try to download 
${PACKAGE_BASE_NAME}"
     exit 0
 fi
 
 TARGET_PATH=$3
-if [ -d ${TARGET_PATH} ]; then
-    echo "TARGET_PATH ${TARGET_PATH} has existed, thus do not try to download 
${DIR_NAME}"
+if [ -d "${TARGET_PATH}" ]; then
+    echo "TARGET_PATH ${TARGET_PATH} has existed, thus do not try to download 
${PACKAGE_BASE_NAME}"
     exit 0
 fi
 
-PACKAGE_NAME=${DIR_NAME}.tar.gz
-if [ ! -f ${PACKAGE_NAME} ]; then
-    echo "Downloading ${DIR_NAME}..."
+DEFAULT_DOWNLOAD_BASE_URL="https://pegasus-thirdparty-package.oss-cn-beijing.aliyuncs.com/";
+if [ $# -ge 4 ]; then
+    DEFAULT_DOWNLOAD_BASE_URL=$4
+fi
+
+DIR_NAME=${PACKAGE_BASE_NAME}
+if [ $# -ge 5 ]; then
+    DIR_NAME=$5
+fi
+
+PACKAGE_NAME=${PACKAGE_BASE_NAME}.tar.gz
+if [ ! -f "${PACKAGE_NAME}" ]; then
+    echo "Downloading ${PACKAGE_NAME} ..."
 
-    
DOWNLOAD_URL="https://pegasus-thirdparty-package.oss-cn-beijing.aliyuncs.com/${PACKAGE_NAME}";
-    if ! wget -T 10 -t 5 ${DOWNLOAD_URL}; then
-        echo "ERROR: download ${DIR_NAME} failed"
+    DOWNLOAD_URL=${DEFAULT_DOWNLOAD_BASE_URL}${PACKAGE_NAME}
+    if ! wget -q -T 10 -t 5 "${DOWNLOAD_URL}"; then
+        echo "ERROR: download ${PACKAGE_NAME} failed"
         exit 1
     fi
 
-    if [ `md5sum ${PACKAGE_NAME} | awk '{print$1}'` != ${PACKAGE_MD5} ]; then
+    if [ "$(md5sum "${PACKAGE_NAME}" | awk '{print$1}')" != "${PACKAGE_MD5}" 
]; then
         echo "Check file ${PACKAGE_NAME} md5sum failed!"
         exit 1
     fi
 fi
 
-rm -rf ${DIR_NAME}
+rm -rf "${DIR_NAME}"
 
-echo "Decompressing ${DIR_NAME}..."
-if ! tar xf ${PACKAGE_NAME}; then
-    echo "ERROR: decompress ${DIR_NAME} failed"
-    rm -f ${PACKAGE_NAME}
+echo "Decompressing ${PACKAGE_NAME} ..."
+if ! tar xf "${PACKAGE_NAME}"; then
+    echo "ERROR: decompress ${PACKAGE_NAME} failed"
+    rm -f "${PACKAGE_NAME}"
     exit 1
 fi
 
-rm -f ${PACKAGE_NAME}
+rm -f "${PACKAGE_NAME}"
 
-if [ ! -d ${DIR_NAME} ]; then
+if [ ! -d "${DIR_NAME}" ]; then
     echo "ERROR: ${DIR_NAME} does not exist"
     exit 1
 fi
 
-if [ -d ${TARGET_PATH} ]; then
+if [ -d "${TARGET_PATH}" ]; then
     echo "TARGET_PATH ${TARGET_PATH} has been generated, which means it and 
${DIR_NAME} are the same dir thus do not do mv any more"
     exit 0
 fi
 
-mv ${DIR_NAME} ${TARGET_PATH}
+mv "${DIR_NAME}" "${TARGET_PATH}"
diff --git a/build_tools/download_zk.sh b/admin_tools/download_zk.sh
similarity index 100%
rename from build_tools/download_zk.sh
rename to admin_tools/download_zk.sh
diff --git a/build_tools/start_zk.sh b/admin_tools/start_zk.sh
similarity index 100%
rename from build_tools/start_zk.sh
rename to admin_tools/start_zk.sh
diff --git a/build_tools/stop_zk.sh b/admin_tools/stop_zk.sh
similarity index 100%
rename from build_tools/stop_zk.sh
rename to admin_tools/stop_zk.sh
diff --git a/build_tools/pack_server.sh b/build_tools/pack_server.sh
index 46afc3e8f..53ee67bf3 100755
--- a/build_tools/pack_server.sh
+++ b/build_tools/pack_server.sh
@@ -148,30 +148,40 @@ pack_server_lib crypto $separate_servers
 pack_server_lib ssl $separate_servers
 
 # Pack hadoop-related files.
-# If you want to use hdfs service to backup/restore/bulkload pegasus tables,
-# you need to set env ${HADOOP_HOME}, edit 
${HADOOP_HOME}/etc/hadoop/core-site.xml,
-# and specify the keytab file.
-if [ -n "$HADOOP_HOME" ] && [ -n "$keytab_file" ]; then
-    mkdir -p ${pack}/hadoop
-    copy_file $keytab_file ${pack}/hadoop
-    copy_file ${HADOOP_HOME}/etc/hadoop/core-site.xml ${pack}/hadoop
-    if [ -d $HADOOP_HOME/share/hadoop ]; then
-        for f in ${HADOOP_HOME}/share/hadoop/common/lib/*.jar; do
-            copy_file $f ${pack}/hadoop
-        done
-        for f in ${HADOOP_HOME}/share/hadoop/common/*.jar; do
-            copy_file $f ${pack}/hadoop
-        done
-        for f in ${HADOOP_HOME}/share/hadoop/hdfs/lib/*.jar; do
-            copy_file $f ${pack}/hadoop
-        done
-        for f in ${HADOOP_HOME}/share/hadoop/hdfs/*.jar; do
-            copy_file $f ${pack}/hadoop
-        done
+# If you want to use hdfs service to backup/restore/bulkload pegasus tables, 
you need to
+# set env ${HADOOP_HOME} to the proper directory where contains Hadoop *.jar 
files.
+if [ -n "$HADOOP_HOME" ]; then
+    # Verify one of the jars.
+    arch_output=$(arch)
+    if [ "$arch_output"x == "aarch64"x ]; then
+        HDFS_JAR_MD5="fcc09dbed936cd8673918774cc3ead6b"
+    else
+      if [ "$arch_output"x != "x86_64"x ]; then
+          echo "WARNING: unrecognized CPU architecture '$arch_output', use 
'x86_64' as default"
+      fi
+      HDFS_JAR_MD5="f67f3a5613c885e1622b1056fd94262b"
     fi
+    HDFS_JAR=${HADOOP_HOME}/share/hadoop/hdfs/hadoop-hdfs-3.3.6.jar
+    if [ "$(md5sum "${HDFS_JAR}" | awk '{print$1}')" != "${HDFS_JAR_MD5}" ]; 
then
+        echo "check file ${HDFS_JAR} md5sum failed!"
+        exit 1
+    fi
+    # Pack the jars.
+    mkdir -p ${pack}/hadoop
+    for f in ${HADOOP_HOME}/share/hadoop/common/lib/*.jar; do
+        copy_file $f ${pack}/hadoop
+    done
+    for f in ${HADOOP_HOME}/share/hadoop/common/*.jar; do
+        copy_file $f ${pack}/hadoop
+    done
+    for f in ${HADOOP_HOME}/share/hadoop/hdfs/lib/*.jar; do
+        copy_file $f ${pack}/hadoop
+    done
+    for f in ${HADOOP_HOME}/share/hadoop/hdfs/*.jar; do
+        copy_file $f ${pack}/hadoop
+    done
 else
-    echo "Couldn't find env ${HADOOP_HOME} or no valid keytab file was 
specified,
-          hadoop-related files were not packed."
+    echo "Couldn't find env HADOOP_HOME, hadoop-related files were not packed."
 fi
 
 DISTRIB_ID=$(cat /etc/*-release | grep DISTRIB_ID | awk -F'=' '{print $2}')
diff --git a/build_tools/pack_tools.sh b/build_tools/pack_tools.sh
index 04ae6c2c1..3ca195d0f 100755
--- a/build_tools/pack_tools.sh
+++ b/build_tools/pack_tools.sh
@@ -156,13 +156,10 @@ chmod -x ${pack}/lib/*
 
 mkdir -p ${pack}/admin_tools
 copy_file ./admin_tools/* ${pack}/admin_tools/
+copy_file ./admin_tools/download_*.sh ${pack}/admin_tools/
+copy_file ./admin_tools/*_zk.sh ${pack}/admin_tools/
 chmod +x ${pack}/admin_tools/*.sh
 
-mkdir -p ${pack}/build_tools
-copy_file ./build_tools/download_*.sh ${pack}/build_tools/
-copy_file ./build_tools/*_zk.sh ${pack}/build_tools/
-chmod +x ${pack}/build_tools/*.sh
-
 mkdir -p ${pack}/src/server
 copy_file ./src/server/*.ini ${pack}/src/server/
 
diff --git a/docker/thirdparties-bin/Dockerfile 
b/docker/thirdparties-bin/Dockerfile
index f080da3f0..100da6d87 100644
--- a/docker/thirdparties-bin/Dockerfile
+++ b/docker/thirdparties-bin/Dockerfile
@@ -35,8 +35,8 @@ RUN git clone --depth=1 --branch=${GITHUB_BRANCH} 
${GITHUB_REPOSITORY_URL} \
     && unzip /root/thirdparties-src.zip -d . \
     && cmake -DCMAKE_BUILD_TYPE=Release -DROCKSDB_PORTABLE=${ROCKSDB_PORTABLE} 
-DUSE_JEMALLOC=${USE_JEMALLOC} -B build/ . \
     && cmake --build build/ -j $(($(nproc)/2+1)) \
-    && ../build_tools/download_hadoop.sh ${HADOOP_BIN_PATH} \
-    && ../build_tools/download_zk.sh ${ZOOKEEPER_BIN_PATH} \
+    && ../admin_tools/download_hadoop.sh ${HADOOP_BIN_PATH} \
+    && ../admin_tools/download_zk.sh ${ZOOKEEPER_BIN_PATH} \
     && zip -r ~/thirdparties-bin.zip output/ build/Source/rocksdb/cmake 
build/Source/http-parser build/Source/hadoop build/Download/zookeeper 
${HADOOP_BIN_PATH} ${ZOOKEEPER_BIN_PATH} \
     && cd ~ \
     && rm -rf incubator-pegasus;
diff --git a/run.sh b/run.sh
index f01b9c022..fd358f25f 100755
--- a/run.sh
+++ b/run.sh
@@ -28,12 +28,13 @@ export REPORT_DIR="$ROOT/test_report"
 export THIRDPARTY_ROOT=${PEGASUS_THIRDPARTY_ROOT:-"$ROOT/thirdparty"}
 ARCH_TYPE=''
 arch_output=$(arch)
-if [ "$arch_output"x == "x86_64"x ]; then
-    ARCH_TYPE="amd64"
-elif [ "$arch_output"x == "aarch64"x ]; then
+if [ "$arch_output"x == "aarch64"x ]; then
     ARCH_TYPE="aarch64"
 else
-    echo "WARNING: unsupported CPU architecture '$arch_output', use 'x86_64' 
as default"
+    if [ "$arch_output"x != "x86_64"x ]; then
+        echo "WARNING: unrecognized CPU architecture '$arch_output', use 
'x86_64' as default"
+    fi
+    ARCH_TYPE="amd64"
 fi
 export 
LD_LIBRARY_PATH=${JAVA_HOME}/jre/lib/${ARCH_TYPE}:${JAVA_HOME}/jre/lib/${ARCH_TYPE}/server:${BUILD_LATEST_DIR}/output/lib:${THIRDPARTY_ROOT}/output/lib:${LD_LIBRARY_PATH}
 # Disable AddressSanitizerOneDefinitionRuleViolation, see 
https://github.com/google/sanitizers/issues/1017 for details.
@@ -656,7 +657,7 @@ function run_start_zk()
         fi
     fi
 
-    INSTALL_DIR="$INSTALL_DIR" PORT="$PORT" $ROOT/build_tools/start_zk.sh
+    INSTALL_DIR="$INSTALL_DIR" PORT="$PORT" $ROOT/admin_tools/start_zk.sh
 }
 
 #####################
@@ -693,7 +694,7 @@ function run_stop_zk()
         esac
         shift
     done
-    INSTALL_DIR="$INSTALL_DIR" $ROOT/build_tools/stop_zk.sh
+    INSTALL_DIR="$INSTALL_DIR" $ROOT/admin_tools/stop_zk.sh
 }
 
 #####################
@@ -730,7 +731,7 @@ function run_clear_zk()
         esac
         shift
     done
-    INSTALL_DIR="$INSTALL_DIR" $ROOT/build_tools/clear_zk.sh
+    INSTALL_DIR="$INSTALL_DIR" $ROOT/admin_tools/clear_zk.sh
 }
 
 #####################
@@ -2105,6 +2106,8 @@ case $cmd in
         ;;
     pack_server)
         shift
+        # source the config_hdfs.sh to get the HADOOP_HOME.
+        source "${ROOT}"/admin_tools/config_hdfs.sh
         PEGASUS_ROOT=$ROOT ./build_tools/pack_server.sh $*
         ;;
     pack_client)
diff --git a/src/sample/run.sh b/src/sample/run.sh
index 5da1a9168..d5f7f5e74 100755
--- a/src/sample/run.sh
+++ b/src/sample/run.sh
@@ -33,7 +33,7 @@ if [ "$arch_output"x == "x86_64"x ]; then
 elif [ "$arch_output"x == "aarch64"x ]; then
     ARCH_TYPE="aarch64"
 else
-    echo "WARNING: unsupported CPU architecture '$arch_output', use 'x86_64' 
as default"
+    echo "WARNING: unrecognized CPU architecture '$arch_output', use 'x86_64' 
as default"
 fi
 export 
LD_LIBRARY_PATH=${JAVA_HOME}/jre/lib/${ARCH_TYPE}:${JAVA_HOME}/jre/lib/${ARCH_TYPE}/server:${PEGASUS_THIRDPARTY_ROOT}/output/lib:$(pwd)/../../lib:${LD_LIBRARY_PATH}
 
diff --git a/src/test/function_test/recovery/test_recovery.cpp 
b/src/test/function_test/recovery/test_recovery.cpp
index 33d1b0adf..89123960b 100644
--- a/src/test/function_test/recovery/test_recovery.cpp
+++ b/src/test/function_test/recovery/test_recovery.cpp
@@ -252,6 +252,8 @@ TEST_F(recovery_test, recovery)
         auto nodes = get_rpc_host_port_list({34801, 34802, 34803});
         ASSERT_EQ(dsn::ERR_OK, ddl_client_->do_recovery(nodes, 30, false, 
false, std::string()));
 
+        ASSERT_NO_FATAL_FAILURE(wait_table_healthy(table_name_));
+
         // then wait the apps to ready
         ASSERT_EQ(dsn::ERR_OK,
                   ddl_client_->create_app(table_name_, "pegasus", 
partition_count_, 3, {}, false));
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index 1c5ac59a7..ecdf8af5c 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -119,17 +119,51 @@ ExternalProject_Add(gperftools
         DOWNLOAD_NO_PROGRESS true
 )
 
+ExternalProject_Add(abseil
+        URL ${OSS_URL_PREFIX}/abseil-20230802.1.zip
+        https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.1.zip
+        URL_MD5 5c6193dbc82834f8e762c6a28c9cc615
+        CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}
+        -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+        -DABSL_FIND_GOOGLETEST=OFF
+        -DCMAKE_CXX_STANDARD=17
+        DOWNLOAD_EXTRACT_TIMESTAMP true
+        DOWNLOAD_NO_PROGRESS true
+)
+
+ExternalProject_Add(protobuf
+        URL 
https://github.com/protocolbuffers/protobuf/archive/refs/tags/v27.0.tar.gz
+        URL_MD5 c96aaf02c8acea549d65bb7b2d549bf6
+        CMAKE_ARGS -DCMAKE_BUILD_TYPE=release
+        -Dprotobuf_BUILD_TESTS=OFF
+        -Dprotobuf_BUILD_PROTOC_BINARIES=ON
+        -Dprotobuf_BUILD_LIBUPB=ON
+        -Dprotobuf_ABSL_PROVIDER=package
+        -DBUILD_SHARED_LIBS=ON
+        -DBUILD_SHARED_HDFSPP=ON
+        -DHDFSPP_LIBRARY_ONLY=ON
+        -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+        -DCMAKE_CXX_STANDARD=17
+        -DABSL_ROOT_DIR=${TP_OUTPUT}
+        -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}
+        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+        -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+        DEPENDS abseil
+        DOWNLOAD_EXTRACT_TIMESTAMP true
+        DOWNLOAD_NO_PROGRESS true
+)
+
 set(HDFS_CLIENT_DIR "hadoop-hdfs-project/hadoop-hdfs-native-client")
 ExternalProject_Add(hadoop
-        URL ${OSS_URL_PREFIX}/hadoop-release-2.8.4.tar.gz
-        
https://github.com/apache/hadoop/archive/refs/tags/rel/release-2.8.4.tar.gz
-        URL_MD5 a1be737d4bff14923689619ab6545a96
-        PATCH_COMMAND ""
+        URL 
https://mirrors.aliyun.com/apache/hadoop/common/hadoop-3.3.6/hadoop-3.3.6-src.tar.gz
+        URL_MD5 285c07d8ad2c837c8ee04a4fa49c73cd
+        PATCH_COMMAND patch -p1 < ${TP_DIR}/fix_hdfs_native_client.patch
         COMMAND cd ${HDFS_CLIENT_DIR} && mvn package -Pdist,native -DskipTests 
-Dmaven.javadoc.skip=true -Dtar
-        COMMAND cd ${HDFS_CLIENT_DIR} && cp -R 
target/hadoop-hdfs-native-client-2.8.4/include/. ${TP_OUTPUT}/include/hdfs && 
cp -R target/hadoop-hdfs-native-client-2.8.4/lib/native/. ${TP_OUTPUT}/lib
+        COMMAND cd ${HDFS_CLIENT_DIR} && cp -R 
target/hadoop-hdfs-native-client-3.3.6/include/. ${TP_OUTPUT}/include/hdfs && 
cp -R target/hadoop-hdfs-native-client-3.3.6/lib/native/. ${TP_OUTPUT}/lib
         CONFIGURE_COMMAND ""
         BUILD_COMMAND ""
         INSTALL_COMMAND ""
+        DEPENDS protobuf
         DOWNLOAD_EXTRACT_TIMESTAMP true
         DOWNLOAD_NO_PROGRESS true
 )
@@ -305,18 +339,6 @@ ExternalProject_Add(nlohmann_json
         DOWNLOAD_NO_PROGRESS true
 )
 
-ExternalProject_Add(abseil
-        URL ${OSS_URL_PREFIX}/abseil-20230802.1.zip
-        https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.1.zip
-        URL_MD5 5c6193dbc82834f8e762c6a28c9cc615
-        CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}
-        -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-        -DABSL_FIND_GOOGLETEST=OFF
-        -DCMAKE_CXX_STANDARD=17
-        DOWNLOAD_EXTRACT_TIMESTAMP true
-        DOWNLOAD_NO_PROGRESS true
-)
-
 ExternalProject_Add(s2geometry
         URL ${OSS_URL_PREFIX}/s2geometry-0.10.0.tar.gz
         https://github.com/google/s2geometry/archive/refs/tags/v0.10.0.tar.gz
@@ -359,8 +381,7 @@ set(SNAPPY_OPTIONS
         -DSNAPPY_FUZZING_BUILD=OFF
         -DSNAPPY_INSTALL=ON)
 execute_process(COMMAND arch OUTPUT_VARIABLE ARCH_NAME 
OUTPUT_STRIP_TRAILING_WHITESPACE)
-message(STATUS "ARCH_NAME = ${ARCH_NAME}")
-if (ARCH_NAME EQUAL "x86_64")
+if (ARCH_NAME STREQUAL "x86_64")
     set(SNAPPY_OPTIONS
             ${SNAPPY_OPTIONS}
             -DSNAPPY_REQUIRE_AVX=ON
diff --git a/thirdparty/fix_hdfs_native_client.patch 
b/thirdparty/fix_hdfs_native_client.patch
new file mode 100644
index 000000000..a7cd932f0
--- /dev/null
+++ b/thirdparty/fix_hdfs_native_client.patch
@@ -0,0 +1,32 @@
+diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt 
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt
+index 24ec297aa27b..e77c38435bba 100644
+--- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt
++++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt
+@@ -152,24 +152,13 @@ add_subdirectory(main/native/libhdfs-examples)
+
+ # Temporary fix to disable Libhdfs++ build on older systems that do not 
support thread_local
+ include(CheckCXXSourceCompiles)
+-unset (THREAD_LOCAL_SUPPORTED CACHE)
+ set (CMAKE_REQUIRED_DEFINITIONS "-std=c++11")
+ set (CMAKE_REQUIRED_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+-check_cxx_source_compiles(
+-    "#include <thread>
+-    int main(void) {
+-      thread_local int s;
+-      return 0;
+-    }"
+-    THREAD_LOCAL_SUPPORTED)
+-if (THREAD_LOCAL_SUPPORTED)
++if (REQUIRE_LIBHDFSPP)
+     add_subdirectory(main/native/libhdfspp)
+ else()
+-    message(WARNING
+-    "WARNING: Libhdfs++ library was not built because the required feature 
thread_local storage \
+-    is not supported by your compiler. Known compilers that support this 
feature: GCC 4.8+, Visual Studio 2015+, \
+-    Clang (community version 3.3+), Clang (version for Xcode 8+ and iOS 9+).")
+-endif (THREAD_LOCAL_SUPPORTED)
++    message(WARNING "WARNING: Libhdfs++ library was not built because the 
REQUIRE_LIBHDFSPP is not enabled.")
++endif (REQUIRE_LIBHDFSPP)
+
+ if(REQUIRE_LIBWEBHDFS)
+     add_subdirectory(contrib/libwebhdfs)
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to