This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 109f2e074ca [feature] Treat JuiceFS (jfs://) as HDFS-compatible in
FE/BE (#61031)
109f2e074ca is described below
commit 109f2e074ca2e7452f501b8fcb42517d00c8ce99
Author: Chenjunwei <[email protected]>
AuthorDate: Fri Mar 20 13:11:50 2026 +0800
[feature] Treat JuiceFS (jfs://) as HDFS-compatible in FE/BE (#61031)
## Summary
- Treat `jfs://` (JuiceFS) as HDFS-compatible in FE and BE code paths.
- Add regression case for JuiceFS HMS catalog read.
- Keep docker/startup-script local changes out of this PR.
## FE Changes
- Add `jfs` handling to storage schema/type mapping and HDFS properties
flow.
- Update resource/resource-manager related checks to recognize JuiceFS
as HDFS-like.
- Fix `CreateResourceCommandTest` lambda effectively-final compilation
issue.
## BE Changes
- Add `jfs://` recognition in file factory and paimon Doris file system
integration.
## Regression
- Add suite:
-
`regression-test/suites/external_table_p0/refactor_storage_param/test_jfs_hms_catalog_read.groovy`
## Local Validation
- Built FE/BE with the above changes.
- Local end-to-end smoke verified for catalog read/write:
- create catalog -> create db/table with `jfs://...` location -> insert
-> select
- readback rows: `101`, `202`
---------
Co-authored-by: Claude Opus 4.6 <[email protected]>
---
be/src/common/config.cpp | 35 ++++
be/src/common/config.h | 3 +
be/src/format/table/paimon_doris_file_system.cpp | 82 ++++++----
be/src/format/table/paimon_doris_file_system.h | 13 +-
be/src/gen_cpp/CMakeLists.txt | 2 +-
be/src/io/file_factory.h | 1 +
.../format/table/paimon_doris_file_system_test.cpp | 59 +++++++
bin/start_be.sh | 8 +
bin/start_fe.sh | 8 +
build.sh | 135 +++++++++++++---
.../docker-compose/hive/hadoop-hive-3x.env.tpl | 3 +-
.../docker-compose/hive/hadoop-hive.env.tpl | 3 +-
.../docker-compose/hive/hive-2x_settings.env | 6 +
.../docker-compose/hive/hive-3x.yaml.tpl | 1 +
.../docker-compose/hive/hive-3x_settings.env | 7 +-
.../docker-compose/hive/scripts/hive-metastore.sh | 14 +-
docker/thirdparties/run-thirdparties-docker.sh | 178 +++++++++++++++++++++
.../java/org/apache/doris/analysis/BrokerDesc.java | 2 +-
.../org/apache/doris/analysis/OutFileClause.java | 6 +-
.../java/org/apache/doris/catalog/Resource.java | 3 +
.../java/org/apache/doris/catalog/ResourceMgr.java | 3 +-
.../property/storage/HdfsProperties.java | 5 +-
.../java/org/apache/doris/fs/SchemaTypeMapper.java | 5 +-
.../apache/doris/common/util/LocationPathTest.java | 7 +-
.../property/storage/HdfsPropertiesTest.java | 17 ++
.../property/storage/HdfsPropertiesUtilsTest.java | 9 +-
.../plans/commands/CreateResourceCommandTest.java | 34 ++--
.../pipeline/external/conf/regression-conf.groovy | 9 ++
.../test_jfs_hms_catalog_read.groovy | 141 ++++++++++++++++
thirdparty/build-thirdparty.sh | 11 ++
thirdparty/juicefs-helpers.sh | 84 ++++++++++
thirdparty/vars.sh | 7 +
32 files changed, 814 insertions(+), 87 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index c94022cb016..54a65f7802a 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1525,6 +1525,41 @@ DEFINE_mInt64(hive_sink_max_file_size, "1073741824"); //
1GB
/** Iceberg sink configurations **/
DEFINE_mInt64(iceberg_sink_max_file_size, "1073741824"); // 1GB
+// URI scheme to Doris file type mappings used by paimon-cpp DorisFileSystem.
+// Each entry uses the format "<scheme>=<file_type>", and file_type must be
one of:
+// local, hdfs, s3, http, broker.
+DEFINE_Strings(paimon_file_system_scheme_mappings,
+ "file=local,hdfs=hdfs,viewfs=hdfs,local=hdfs,jfs=hdfs,"
+ "s3=s3,s3a=s3,s3n=s3,oss=s3,obs=s3,cos=s3,cosn=s3,gs=s3,"
+ "abfs=s3,abfss=s3,wasb=s3,wasbs=s3,http=http,https=http,"
+ "ofs=broker,gfs=broker");
+DEFINE_Validator(paimon_file_system_scheme_mappings,
+ ([](const std::vector<std::string>& mappings) -> bool {
+ doris::StringCaseUnorderedSet seen_schemes;
+ static const doris::StringCaseUnorderedSet
supported_types = {
+ "local", "hdfs", "s3", "http", "broker"};
+ for (const auto& raw_entry : mappings) {
+ std::string_view entry = doris::trim(raw_entry);
+ size_t separator = entry.find('=');
+ if (separator == std::string_view::npos) {
+ return false;
+ }
+ std::string scheme =
std::string(doris::trim(entry.substr(0, separator)));
+ std::string file_type =
+
std::string(doris::trim(entry.substr(separator + 1)));
+ if (scheme.empty() || file_type.empty()) {
+ return false;
+ }
+ if (supported_types.find(file_type) ==
supported_types.end()) {
+ return false;
+ }
+ if (!seen_schemes.insert(scheme).second) {
+ return false;
+ }
+ }
+ return true;
+ }));
+
DEFINE_mInt32(thrift_client_open_num_tries, "1");
DEFINE_Bool(enable_index_compaction, "false");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index afcd85a7a39..07272806481 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1617,6 +1617,9 @@ DECLARE_mInt64(hive_sink_max_file_size);
/** Iceberg sink configurations **/
DECLARE_mInt64(iceberg_sink_max_file_size);
+/** Paimon file system configurations **/
+DECLARE_Strings(paimon_file_system_scheme_mappings);
+
// Number of open tries, default 1 means only try to open once.
// Retry the Open num_retries time waiting 100 milliseconds between retries.
DECLARE_mInt32(thrift_client_open_num_tries);
diff --git a/be/src/format/table/paimon_doris_file_system.cpp
b/be/src/format/table/paimon_doris_file_system.cpp
index 999b6c684a5..bd303b5e26a 100644
--- a/be/src/format/table/paimon_doris_file_system.cpp
+++ b/be/src/format/table/paimon_doris_file_system.cpp
@@ -19,8 +19,6 @@
#include <gen_cpp/Types_types.h>
-#include <algorithm>
-#include <cctype>
#include <map>
#include <memory>
#include <mutex>
@@ -29,6 +27,7 @@
#include <utility>
#include <vector>
+#include "common/config.h"
#include "common/status.h"
#include "io/file_factory.h"
#include "io/fs/file_reader.h"
@@ -40,6 +39,7 @@
#include "paimon/fs/file_system_factory.h"
#include "paimon/result.h"
#include "paimon/status.h"
+#include "util/string_util.h"
namespace paimon {
@@ -48,12 +48,6 @@ struct ParsedUri {
std::string authority;
};
-std::string to_lower(std::string value) {
- std::ranges::transform(value, value.begin(),
- [](unsigned char c) { return
static_cast<char>(std::tolower(c)); });
- return value;
-}
-
ParsedUri parse_uri(const std::string& path) {
ParsedUri parsed;
size_t scheme_pos = path.find("://");
@@ -65,7 +59,7 @@ ParsedUri parse_uri(const std::string& path) {
if (scheme_pos == std::string::npos || scheme_pos == 0) {
return parsed;
}
- parsed.scheme = to_lower(path.substr(0, scheme_pos));
+ parsed.scheme = doris::to_lower(path.substr(0, scheme_pos));
size_t authority_start = scheme_pos + delim_len;
if (authority_start >= path.size() || path[authority_start] == '/') {
return parsed;
@@ -79,38 +73,58 @@ ParsedUri parse_uri(const std::string& path) {
return parsed;
}
-bool is_s3_scheme(const std::string& scheme) {
- return scheme == "s3" || scheme == "s3a" || scheme == "s3n" || scheme ==
"oss" ||
- scheme == "obs" || scheme == "cos" || scheme == "cosn" || scheme ==
"gs" ||
- scheme == "abfs" || scheme == "abfss" || scheme == "wasb" || scheme
== "wasbs";
-}
-
-bool is_hdfs_scheme(const std::string& scheme) {
- return scheme == "hdfs" || scheme == "viewfs" || scheme == "local";
+bool parse_scheme_mapping_target(std::string_view raw_target,
doris::TFileType::type* type) {
+ std::string target = doris::to_lower(std::string(doris::trim(raw_target)));
+ if (target == "local") {
+ *type = doris::TFileType::FILE_LOCAL;
+ return true;
+ }
+ if (target == "hdfs") {
+ *type = doris::TFileType::FILE_HDFS;
+ return true;
+ }
+ if (target == "s3") {
+ *type = doris::TFileType::FILE_S3;
+ return true;
+ }
+ if (target == "http") {
+ *type = doris::TFileType::FILE_HTTP;
+ return true;
+ }
+ if (target == "broker") {
+ *type = doris::TFileType::FILE_BROKER;
+ return true;
+ }
+ return false;
}
-bool is_http_scheme(const std::string& scheme) {
- return scheme == "http" || scheme == "https";
+bool parse_scheme_mapping_entry(std::string_view raw_entry, std::string*
scheme,
+ doris::TFileType::type* type) {
+ size_t separator = raw_entry.find('=');
+ if (separator == std::string_view::npos) {
+ return false;
+ }
+ *scheme = doris::to_lower(std::string(doris::trim(raw_entry.substr(0,
separator))));
+ if (scheme->empty()) {
+ return false;
+ }
+ return parse_scheme_mapping_target(raw_entry.substr(separator + 1), type);
}
doris::TFileType::type map_scheme_to_file_type(const std::string& scheme) {
if (scheme.empty()) {
return doris::TFileType::FILE_HDFS;
}
- if (scheme == "file") {
- return doris::TFileType::FILE_LOCAL;
- }
- if (is_hdfs_scheme(scheme)) {
- return doris::TFileType::FILE_HDFS;
- }
- if (is_s3_scheme(scheme)) {
- return doris::TFileType::FILE_S3;
- }
- if (is_http_scheme(scheme)) {
- return doris::TFileType::FILE_HTTP;
- }
- if (scheme == "ofs" || scheme == "gfs" || scheme == "jfs") {
- return doris::TFileType::FILE_BROKER;
+ std::string normalized_scheme = doris::to_lower(scheme);
+ for (const auto& mapping_entry :
doris::config::paimon_file_system_scheme_mappings) {
+ std::string configured_scheme;
+ doris::TFileType::type configured_type;
+ if (!parse_scheme_mapping_entry(mapping_entry, &configured_scheme,
&configured_type)) {
+ continue;
+ }
+ if (configured_scheme == normalized_scheme) {
+ return configured_type;
+ }
}
return doris::TFileType::FILE_HDFS;
}
@@ -149,7 +163,7 @@ std::string normalize_path_for_type(const std::string&
path, const std::string&
if (type == doris::TFileType::FILE_LOCAL) {
return normalize_local_path(path);
}
- if (type == doris::TFileType::FILE_S3 && scheme != "s3" &&
!is_http_scheme(scheme)) {
+ if (type == doris::TFileType::FILE_S3 && scheme != "s3") {
return replace_scheme(path, "s3");
}
return path;
diff --git a/be/src/format/table/paimon_doris_file_system.h
b/be/src/format/table/paimon_doris_file_system.h
index 1ed2bd8822d..561e7aeac30 100644
--- a/be/src/format/table/paimon_doris_file_system.h
+++ b/be/src/format/table/paimon_doris_file_system.h
@@ -17,9 +17,20 @@
#pragma once
+#include <gen_cpp/Types_types.h>
+
+#include <string>
+
+namespace paimon {
+
+// Visible for tests: maps a URI scheme to the Doris file type used by
paimon-cpp.
+doris::TFileType::type map_scheme_to_file_type(const std::string& scheme);
+
+} // namespace paimon
+
namespace doris {
// Force-link helper so the paimon-cpp file system factory registration is
kept.
void register_paimon_doris_file_system();
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/gen_cpp/CMakeLists.txt b/be/src/gen_cpp/CMakeLists.txt
index e91b5e29c14..c4463f75520 100644
--- a/be/src/gen_cpp/CMakeLists.txt
+++ b/be/src/gen_cpp/CMakeLists.txt
@@ -23,6 +23,7 @@ file(GLOB SRC_FILES CONFIGURE_DEPENDS
${GEN_CPP_DIR}/*.cpp
${GEN_CPP_DIR}/*.cc
)
+list(FILTER SRC_FILES EXCLUDE REGEX "_server\\.skeleton\\.cpp$")
add_compile_options(-Wno-return-type)
@@ -43,4 +44,3 @@ endif()
#add_dependencies(DorisGen thrift-cpp)
#add_dependencies(Opcode function)
-
diff --git a/be/src/io/file_factory.h b/be/src/io/file_factory.h
index 4966137508d..7d662e4fdde 100644
--- a/be/src/io/file_factory.h
+++ b/be/src/io/file_factory.h
@@ -119,6 +119,7 @@ public:
case TStorageBackendType::BROKER:
return TFileType::FILE_BROKER;
case TStorageBackendType::HDFS:
+ case TStorageBackendType::JFS:
return TFileType::FILE_HDFS;
default:
return ResultError(Status::FatalError("not match type to convert,
from type:{}", type));
diff --git a/be/test/format/table/paimon_doris_file_system_test.cpp
b/be/test/format/table/paimon_doris_file_system_test.cpp
new file mode 100644
index 00000000000..a2032dff818
--- /dev/null
+++ b/be/test/format/table/paimon_doris_file_system_test.cpp
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format/table/paimon_doris_file_system.h"
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "common/config.h"
+
+namespace doris {
+
+class PaimonDorisFileSystemTest : public testing::Test {
+protected:
+ void SetUp() override { saved_mappings_ =
config::paimon_file_system_scheme_mappings; }
+
+ void TearDown() override { config::paimon_file_system_scheme_mappings =
saved_mappings_; }
+
+ std::vector<std::string> saved_mappings_;
+};
+
+TEST_F(PaimonDorisFileSystemTest, UsesDefaultSchemeMappings) {
+ EXPECT_EQ(TFileType::FILE_LOCAL, paimon::map_scheme_to_file_type("file"));
+ EXPECT_EQ(TFileType::FILE_HDFS, paimon::map_scheme_to_file_type("jfs"));
+ EXPECT_EQ(TFileType::FILE_S3, paimon::map_scheme_to_file_type("s3a"));
+ EXPECT_EQ(TFileType::FILE_S3, paimon::map_scheme_to_file_type("gs"));
+ EXPECT_EQ(TFileType::FILE_HTTP, paimon::map_scheme_to_file_type("https"));
+ EXPECT_EQ(TFileType::FILE_BROKER, paimon::map_scheme_to_file_type("ofs"));
+ EXPECT_EQ(TFileType::FILE_HDFS,
paimon::map_scheme_to_file_type("unknown"));
+}
+
+TEST_F(PaimonDorisFileSystemTest, AllowsOverridingSchemeMappingsFromConfig) {
+ config::paimon_file_system_scheme_mappings = {"file=local", "jfs = s3",
"gs = hdfs",
+ "custom-http = http",
"custom-broker = broker"};
+
+ EXPECT_EQ(TFileType::FILE_LOCAL, paimon::map_scheme_to_file_type("file"));
+ EXPECT_EQ(TFileType::FILE_S3, paimon::map_scheme_to_file_type("JFS"));
+ EXPECT_EQ(TFileType::FILE_HDFS, paimon::map_scheme_to_file_type("gs"));
+ EXPECT_EQ(TFileType::FILE_HTTP,
paimon::map_scheme_to_file_type("custom-http"));
+ EXPECT_EQ(TFileType::FILE_BROKER,
paimon::map_scheme_to_file_type("custom-broker"));
+ EXPECT_EQ(TFileType::FILE_HDFS,
paimon::map_scheme_to_file_type("still-unknown"));
+}
+
+} // namespace doris
diff --git a/bin/start_be.sh b/bin/start_be.sh
index 36b490494d4..2727b1b1ad2 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -254,6 +254,14 @@ if [[ -d "${DORIS_HOME}/lib/java_extensions/jindofs" ]];
then
done
fi
+# add juicefs
+# should after jars in lib/hadoop_hdfs/, or it will override the hadoop jars
in lib/hadoop_hdfs
+if [[ -d "${DORIS_HOME}/lib/java_extensions/juicefs" ]]; then
+ for f in "${DORIS_HOME}/lib/java_extensions/juicefs"/*.jar; do
+ DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
+ done
+fi
+
# add custom_libs to CLASSPATH
# ATTN, custom_libs is deprecated, use plugins/java_extensions
if [[ -d "${DORIS_HOME}/custom_lib" ]]; then
diff --git a/bin/start_fe.sh b/bin/start_fe.sh
index edf84001b80..bb2dea383f9 100755
--- a/bin/start_fe.sh
+++ b/bin/start_fe.sh
@@ -368,6 +368,14 @@ if [[ -d "${DORIS_HOME}/lib/jindofs" ]]; then
done
fi
+# add juicefs
+# should after jars in lib/, or it will override the hadoop jars in lib/
+if [[ -d "${DORIS_HOME}/lib/juicefs" ]]; then
+ for f in "${DORIS_HOME}/lib/juicefs"/*.jar; do
+ CLASSPATH="${CLASSPATH}:${f}"
+ done
+fi
+
# add plugins/java_extensions to CLASSPATH
if [[ -d "${DORIS_HOME}/plugins/java_extensions" ]]; then
for f in "${DORIS_HOME}/plugins/java_extensions"/*.jar; do
diff --git a/build.sh b/build.sh
index 34cb15bb6e1..37e13774f97 100755
--- a/build.sh
+++ b/build.sh
@@ -80,6 +80,7 @@ Usage: $0 <options>
DISABLE_BE_JAVA_EXTENSIONS If set DISABLE_BE_JAVA_EXTENSIONS=ON, we will
do not build binary with java-udf,hadoop-hudi-scanner,jdbc-scanner and so on
Default is OFF.
DISABLE_JAVA_CHECK_STYLE If set DISABLE_JAVA_CHECK_STYLE=ON, it will
skip style check of java code in FE.
DISABLE_BUILD_AZURE If set DISABLE_BUILD_AZURE=ON, it will not
build azure into BE.
+ DISABLE_BUILD_JUICEFS If set DISABLE_BUILD_JUICEFS=ON, it will skip
packaging juicefs-hadoop jar into FE/BE output.
Eg.
$0 build all
@@ -143,6 +144,45 @@ function copy_common_files() {
cp -r -p "${DORIS_HOME}/dist/licenses" "$1/"
}
+. "${DORIS_HOME}/thirdparty/juicefs-helpers.sh"
+
+find_juicefs_hadoop_jar() {
+ juicefs_find_hadoop_jar_by_globs \
+ "${DORIS_THIRDPARTY}/installed/juicefs_libs/juicefs-hadoop-[0-9]*.jar"
\
+ "${DORIS_THIRDPARTY}/src/juicefs-hadoop-[0-9]*.jar" \
+
"${DORIS_HOME}/thirdparty/installed/juicefs_libs/juicefs-hadoop-[0-9]*.jar" \
+ "${DORIS_HOME}/thirdparty/src/juicefs-hadoop-[0-9]*.jar"
+}
+
+detect_juicefs_version() {
+ local juicefs_jar
+ juicefs_jar=$(find_juicefs_hadoop_jar || true)
+ juicefs_detect_hadoop_version "${juicefs_jar}" "${JUICEFS_DEFAULT_VERSION}"
+}
+
+download_juicefs_hadoop_jar() {
+ local juicefs_version="$1"
+ local cache_dir="${DORIS_HOME}/thirdparty/installed/juicefs_libs"
+ juicefs_download_hadoop_jar_to_cache "${juicefs_version}" "${cache_dir}"
+}
+
+copy_juicefs_hadoop_jar() {
+ local target_dir="$1"
+ local source_jar=""
+ source_jar=$(find_juicefs_hadoop_jar || true)
+ if [[ -z "${source_jar}" ]]; then
+ local juicefs_version
+ juicefs_version=$(detect_juicefs_version)
+ source_jar=$(download_juicefs_hadoop_jar "${juicefs_version}" || true)
+ fi
+ if [[ -z "${source_jar}" ]]; then
+ echo "WARN: skip copying juicefs-hadoop jar, not found in thirdparty
and download failed"
+ return 0
+ fi
+ cp -r -p "${source_jar}" "${target_dir}/"
+ echo "Copy JuiceFS Hadoop jar to ${target_dir}: $(basename
"${source_jar}")"
+}
+
if ! OPTS="$(getopt \
-n "$0" \
-o '' \
@@ -506,6 +546,12 @@ else
BUILD_JINDOFS='ON'
fi
+if [[ "$(echo "${DISABLE_BUILD_JUICEFS}" | tr '[:lower:]' '[:upper:]')" ==
"ON" ]]; then
+ BUILD_JUICEFS='OFF'
+else
+ BUILD_JUICEFS='ON'
+fi
+
if [[ -z "${ENABLE_INJECTION_POINT}" ]]; then
ENABLE_INJECTION_POINT='OFF'
fi
@@ -554,6 +600,7 @@ echo "Get params:
BUILD_BE_JAVA_EXTENSIONS -- ${BUILD_BE_JAVA_EXTENSIONS}
BUILD_BE_CDC_CLIENT -- ${BUILD_BE_CDC_CLIENT}
BUILD_HIVE_UDF -- ${BUILD_HIVE_UDF}
+ BUILD_JUICEFS -- ${BUILD_JUICEFS}
PARALLEL -- ${PARALLEL}
CLEAN -- ${CLEAN}
GLIBC_COMPATIBILITY -- ${GLIBC_COMPATIBILITY}
@@ -776,6 +823,57 @@ function build_ui() {
cp -r "${ui_dist}"/* "${DORIS_HOME}/fe/fe-core/src/main/resources/static"/
}
+function build_fe_modules() {
+ local thread_count="${FE_MAVEN_THREADS:-1C}"
+ local retry_thread_count="${FE_MAVEN_RETRY_THREADS:-1}"
+ local log_file
+ local -a dependency_mvn_opts=()
+ local -a extra_mvn_opts=()
+ local -a user_settings_opts=()
+ local -a mvn_cmd=(
+ "${MVN_CMD}"
+ package
+ -pl
+ "${FE_MODULES}"
+ -am
+ -Dskip.doc=true
+ -DskipTests
+ )
+
+ if [[ "${DISABLE_JAVA_CHECK_STYLE}" = "ON" ]]; then
+ mvn_cmd+=("-Dcheckstyle.skip=true")
+ fi
+ if [[ -n "${MVN_OPT}" ]]; then
+ # shellcheck disable=SC2206
+ extra_mvn_opts=(${MVN_OPT})
+ fi
+ if [[ "${BUILD_OBS_DEPENDENCIES}" -eq 0 ]]; then
+ dependency_mvn_opts+=("-Dobs.dependency.scope=provided")
+ fi
+ if [[ "${BUILD_COS_DEPENDENCIES}" -eq 0 ]]; then
+ dependency_mvn_opts+=("-Dcos.dependency.scope=provided")
+ fi
+ if [[ -n "${USER_SETTINGS_MVN_REPO}" && -f "${USER_SETTINGS_MVN_REPO}" ]];
then
+ user_settings_opts=(-gs "${USER_SETTINGS_MVN_REPO}")
+ fi
+
+ mvn_cmd+=("${extra_mvn_opts[@]}" "${dependency_mvn_opts[@]}"
"${user_settings_opts[@]}" -T "${thread_count}")
+ log_file="$(mktemp)"
+ if "${mvn_cmd[@]}" 2>&1 | tee "${log_file}"; then
+ rm -f "${log_file}"
+ return 0
+ fi
+ if [[ "${thread_count}" != "${retry_thread_count}" ]] && grep -Fq "Could
not acquire lock(s)" "${log_file}"; then
+ echo "FE Maven build hit Maven resolver lock contention. Retrying with
-T ${retry_thread_count}."
+ mvn_cmd=("${mvn_cmd[@]:0:${#mvn_cmd[@]}-2}" -T "${retry_thread_count}")
+ "${mvn_cmd[@]}"
+ rm -f "${log_file}"
+ return 0
+ fi
+ rm -f "${log_file}"
+ return 1
+}
+
# FE UI must be built before building FE
if [[ "${BUILD_FE}" -eq 1 ]]; then
if [[ "${BUILD_UI}" -eq 1 ]]; then
@@ -790,28 +888,7 @@ if [[ "${FE_MODULES}" != '' ]]; then
if [[ "${CLEAN}" -eq 1 ]]; then
clean_fe
fi
- DEPENDENCIES_MVN_OPTS=" "
- if [[ "${BUILD_OBS_DEPENDENCIES}" -eq 0 ]]; then
- DEPENDENCIES_MVN_OPTS+=" -Dobs.dependency.scope=provided "
- fi
- if [[ "${BUILD_COS_DEPENDENCIES}" -eq 0 ]]; then
- DEPENDENCIES_MVN_OPTS+=" -Dcos.dependency.scope=provided "
- fi
-
- if [[ "${DISABLE_JAVA_CHECK_STYLE}" = "ON" ]]; then
- # Allowed user customer set env param USER_SETTINGS_MVN_REPO means
settings.xml file path
- if [[ -n ${USER_SETTINGS_MVN_REPO} && -f ${USER_SETTINGS_MVN_REPO} ]];
then
- "${MVN_CMD}" package -pl ${FE_MODULES:+${FE_MODULES}} -am
-Dskip.doc=true -DskipTests -Dcheckstyle.skip=true ${MVN_OPT:+${MVN_OPT}}
${DEPENDENCIES_MVN_OPTS} -gs "${USER_SETTINGS_MVN_REPO}" -T 1C
- else
- "${MVN_CMD}" package -pl ${FE_MODULES:+${FE_MODULES}} -am
-Dskip.doc=true -DskipTests -Dcheckstyle.skip=true ${MVN_OPT:+${MVN_OPT}}
${DEPENDENCIES_MVN_OPTS} -T 1C
- fi
- else
- if [[ -n ${USER_SETTINGS_MVN_REPO} && -f ${USER_SETTINGS_MVN_REPO} ]];
then
- "${MVN_CMD}" package -pl ${FE_MODULES:+${FE_MODULES}} -am
-Dskip.doc=true -DskipTests ${MVN_OPT:+${MVN_OPT}} ${DEPENDENCIES_MVN_OPTS}
-gs "${USER_SETTINGS_MVN_REPO}" -T 1C
- else
- "${MVN_CMD}" package -pl ${FE_MODULES:+${FE_MODULES}} -am
-Dskip.doc=true -DskipTests ${MVN_OPT:+${MVN_OPT}} ${DEPENDENCIES_MVN_OPTS} -T
1C
- fi
- fi
+ build_fe_modules
cd "${DORIS_HOME}"
fi
@@ -833,6 +910,9 @@ if [[ "${BUILD_FE}" -eq 1 ]]; then
if [[ "${BUILD_JINDOFS}" == "ON" ]]; then
install -d "${DORIS_OUTPUT}/fe/lib/jindofs"
fi
+ if [[ "${BUILD_JUICEFS}" == "ON" ]]; then
+ install -d "${DORIS_OUTPUT}/fe/lib/juicefs"
+ fi
cp -r -p "${DORIS_HOME}/fe/fe-core/target/lib"/* "${DORIS_OUTPUT}/fe/lib"/
cp -r -p "${DORIS_HOME}/fe/fe-core/target/doris-fe.jar"
"${DORIS_OUTPUT}/fe/lib"/
if [[ "${WITH_TDE_DIR}" != "" ]]; then
@@ -853,6 +933,11 @@ if [[ "${BUILD_FE}" -eq 1 ]]; then
fi
fi
+ # copy juicefs hadoop client jar
+ if [[ "${BUILD_JUICEFS}" == "ON" ]]; then
+ copy_juicefs_hadoop_jar "${DORIS_OUTPUT}/fe/lib/juicefs"
+ fi
+
cp -r -p "${DORIS_HOME}/minidump" "${DORIS_OUTPUT}/fe"/
cp -r -p "${DORIS_HOME}/webroot/static" "${DORIS_OUTPUT}/fe/webroot"/
@@ -1038,6 +1123,12 @@ EOF
cp -r -p
"${DORIS_THIRDPARTY}"/installed/jindofs_libs/jindo-sdk-[0-9]*.jar
"${DORIS_OUTPUT}/be/lib/java_extensions/jindofs"/
fi
fi
+ if [[ "${BUILD_JUICEFS}" == "ON" ]]; then
+ install -d "${DORIS_OUTPUT}/be/lib/java_extensions/juicefs"/
+
+ # copy juicefs hadoop client jar
+ copy_juicefs_hadoop_jar
"${DORIS_OUTPUT}/be/lib/java_extensions/juicefs"
+ fi
cp -r -p "${DORIS_THIRDPARTY}/installed/webroot"/*
"${DORIS_OUTPUT}/be/www"/
copy_common_files "${DORIS_OUTPUT}/be/"
diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
b/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
index 5698c84ff2b..a5676e8aed9 100644
--- a/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
+++ b/docker/thirdparties/docker-compose/hive/hadoop-hive-3x.env.tpl
@@ -22,6 +22,8 @@
HIVE_SITE_CONF_hive_metastore_transactional_event_listeners=org.apache.hive.hcat
HIVE_SITE_CONF_hive_stats_column_autogather=false
HIVE_SITE_CONF_fs_s3_impl=org.apache.hadoop.fs.s3a.S3AFileSystem
HIVE_SITE_CONF_fs_s3a_impl=org.apache.hadoop.fs.s3a.S3AFileSystem
+HIVE_SITE_CONF_fs_jfs_impl=io.juicefs.JuiceFileSystem
+HIVE_SITE_CONF_juicefs_cluster_meta=${JFS_CLUSTER_META}
HIVE_SITE_CONF_fs_s3a_access_key=${AWSAk}
HIVE_SITE_CONF_fs_s3a_secret_key=${AWSSk}
HIVE_SITE_CONF_fs_s3a_endpoint=${AWSEndpoint}
@@ -48,4 +50,3 @@
HIVE_SITE_CONF_fs_gs_auth_service_account_private_key_id=${GCSAccountPrivateKeyI
HIVE_SITE_CONF_fs_gs_auth_service_account_private_key=${GCSAccountPrivateKey}
HIVE_SITE_CONF_fs_gs_proxy_address=${GCSProxyAddress}
enablePaimonHms=${enablePaimonHms}
-
diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
index ecbf735216b..d48d497bafa 100644
--- a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
+++ b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl
@@ -32,6 +32,8 @@ HIVE_SITE_CONF_hive_stats_column_autogather=false
HIVE_SITE_CONF_hive_exec_parallel=true
CORE_CONF_fs_defaultFS=hdfs://${IP_HOST}:${FS_PORT}
+CORE_CONF_fs_jfs_impl=io.juicefs.JuiceFileSystem
+CORE_CONF_juicefs_cluster_meta=${JFS_CLUSTER_META}
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
@@ -62,4 +64,3 @@ HADOOP_HEAPSIZE=4096
NEED_LOAD_DATA=${NEED_LOAD_DATA}
LOAD_PARALLEL=${LOAD_PARALLEL}
-
diff --git a/docker/thirdparties/docker-compose/hive/hive-2x_settings.env
b/docker/thirdparties/docker-compose/hive/hive-2x_settings.env
index 9045bb91683..d076b9dbb5b 100644
--- a/docker/thirdparties/docker-compose/hive/hive-2x_settings.env
+++ b/docker/thirdparties/docker-compose/hive/hive-2x_settings.env
@@ -24,3 +24,9 @@ export FS_PORT=8020 # should be same as hive2HmsPort in
regression-conf.groovy
export HMS_PORT=9083 # should be same as hive2HmsPort in regression-conf.groovy
export HS_PORT=10000 # should be same as hive2ServerPort in
regression-conf.groovy
export PG_PORT=5432 # should be same as hive2PgPort in regression-conf.groovy
+
+# JuiceFS metadata endpoint for property `juicefs.cluster.meta`.
+# CI can override this env, e.g.:
+# export JFS_CLUSTER_META="mysql://user:pwd@(127.0.0.1:3316)/juicefs_meta"
+# default to mysql_57 (3316) because external pipeline always starts mysql,
but not redis.
+export
JFS_CLUSTER_META="${JFS_CLUSTER_META:-mysql://root:123456@(127.0.0.1:3316)/juicefs_meta}"
diff --git a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
index 5118b6bd65d..d6e4b1cfba5 100644
--- a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
+++ b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
@@ -92,6 +92,7 @@ services:
- "${HMS_PORT}"
volumes:
- ./scripts:/mnt/scripts
+ - /tmp/jfs-bucket:/tmp/jfs-bucket
depends_on:
hive-metastore-postgresql:
condition: service_healthy
diff --git a/docker/thirdparties/docker-compose/hive/hive-3x_settings.env
b/docker/thirdparties/docker-compose/hive/hive-3x_settings.env
index bf2bc4424f8..b0af3bc172b 100644
--- a/docker/thirdparties/docker-compose/hive/hive-3x_settings.env
+++ b/docker/thirdparties/docker-compose/hive/hive-3x_settings.env
@@ -25,6 +25,11 @@ export HMS_PORT=9383 # should be same as hive3HmsPort in
regression-conf.groovy
export HS_PORT=13000 # should be same as hive3ServerPort in
regression-conf.groovy
export PG_PORT=5732 # should be same as hive3PgPort in regression-conf.groovy
+# JuiceFS metadata endpoint for property `juicefs.cluster.meta`.
+# CI can override this env, e.g.:
+# export JFS_CLUSTER_META="mysql://user:pwd@(127.0.0.1:3316)/juicefs_meta"
+export
JFS_CLUSTER_META="${JFS_CLUSTER_META:-mysql://root:123456@(127.0.0.1:3316)/juicefs_meta}"
+
# prepare for paimon hms test,control load paimon hms data or not
export enablePaimonHms="false"
# hms on s3/oss/obs/cos
@@ -44,4 +49,4 @@ export GCSProjectId=""
export GCSAccountEmail=""
export GCSAccountPrivateKeyId=""
export GCSAccountPrivateKey=""
-export GCSProxyAddress=""
\ No newline at end of file
+export GCSProxyAddress=""
diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
index d2ac9fa17a1..69d5af071b7 100755
--- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
+++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
@@ -27,9 +27,21 @@ for file in "${AUX_LIB}"/*.tar.gz; do
done
ls "${AUX_LIB}/"
-# copy auxiliary jars to hive lib, avoid jars copy
+# Keep existing behavior for Hive metastore classpath.
cp -r "${AUX_LIB}"/* /opt/hive/lib/
+# Add JuiceFS jar into Hadoop classpath for `hadoop fs jfs://...`.
+shopt -s nullglob
+juicefs_jars=("${AUX_LIB}"/juicefs-hadoop-*.jar)
+if (( ${#juicefs_jars[@]} > 0 )); then
+ for target in /opt/hadoop-3.2.1/share/hadoop/common/lib
/opt/hadoop/share/hadoop/common/lib; do
+ if [[ -d "${target}" ]]; then
+ cp -f "${juicefs_jars[@]}" "${target}"/
+ fi
+ done
+fi
+shopt -u nullglob
+
# start metastore
nohup /opt/hive/bin/hive --service metastore &
diff --git a/docker/thirdparties/run-thirdparties-docker.sh
b/docker/thirdparties/run-thirdparties-docker.sh
index 7e3c4a53879..b4a98e27fc8 100755
--- a/docker/thirdparties/run-thirdparties-docker.sh
+++ b/docker/thirdparties/run-thirdparties-docker.sh
@@ -230,6 +230,167 @@ reserve_ports() {
fi
}
+JFS_META_FORMATTED=0
+DORIS_ROOT="$(cd "${ROOT}/../.." &>/dev/null && pwd)"
+. "${DORIS_ROOT}/thirdparty/juicefs-helpers.sh"
+JUICEFS_LOCAL_BIN="${DORIS_ROOT}/thirdparty/installed/juicefs_bin/juicefs"
+
+find_juicefs_hadoop_jar() {
+ local -a jar_globs=(
+
"${DORIS_ROOT}/thirdparty/installed/juicefs_libs/juicefs-hadoop-[0-9]*.jar"
+ "${DORIS_ROOT}/output/fe/lib/juicefs/juicefs-hadoop-[0-9]*.jar"
+
"${DORIS_ROOT}/output/be/lib/java_extensions/juicefs/juicefs-hadoop-[0-9]*.jar"
+
"${DORIS_ROOT}/../../../clusterEnv/*/Cluster*/fe/lib/juicefs/juicefs-hadoop-[0-9]*.jar"
+
"${DORIS_ROOT}/../../../clusterEnv/*/Cluster*/be/lib/java_extensions/juicefs/juicefs-hadoop-[0-9]*.jar"
+
"/mnt/ssd01/pipline/OpenSourceDoris/clusterEnv/*/Cluster*/fe/lib/juicefs/juicefs-hadoop-[0-9]*.jar"
+
"/mnt/ssd01/pipline/OpenSourceDoris/clusterEnv/*/Cluster*/be/lib/java_extensions/juicefs/juicefs-hadoop-[0-9]*.jar"
+ )
+ juicefs_find_hadoop_jar_by_globs "${jar_globs[@]}"
+}
+
+detect_juicefs_version() {
+ local juicefs_jar
+ juicefs_jar=$(find_juicefs_hadoop_jar || true)
+ juicefs_detect_hadoop_version "${juicefs_jar}" "${JUICEFS_DEFAULT_VERSION}"
+}
+
+download_juicefs_hadoop_jar() {
+ local juicefs_version="$1"
+ local cache_dir="${DORIS_ROOT}/thirdparty/installed/juicefs_libs"
+ juicefs_download_hadoop_jar_to_cache "${juicefs_version}" "${cache_dir}"
+}
+
+install_juicefs_cli() {
+ local juicefs_version="$1"
+ local cache_dir="${DORIS_ROOT}/thirdparty/installed/juicefs_bin"
+ local archive_name="juicefs-${juicefs_version}-linux-amd64.tar.gz"
+ local
download_url="https://github.com/juicedata/juicefs/releases/download/v${juicefs_version}/${archive_name}"
+ local tmp_dir
+ local extracted_bin
+
+ mkdir -p "${cache_dir}"
+ tmp_dir=$(mktemp -d "${cache_dir}/tmp.XXXXXX")
+
+ echo "Downloading JuiceFS CLI ${juicefs_version} from ${download_url}" >&2
+ if ! curl -fL --retry 3 --retry-delay 2 -o "${tmp_dir}/${archive_name}"
"${download_url}"; then
+ rm -rf "${tmp_dir}"
+ echo "ERROR: failed to download JuiceFS CLI from ${download_url}" >&2
+ return 1
+ fi
+
+ tar -xzf "${tmp_dir}/${archive_name}" -C "${tmp_dir}"
+ extracted_bin=$(find "${tmp_dir}" -maxdepth 2 -type f -name juicefs | head
-n 1)
+ if [[ -z "${extracted_bin}" ]]; then
+ rm -rf "${tmp_dir}"
+ echo "ERROR: failed to locate extracted JuiceFS CLI in
${archive_name}" >&2
+ return 1
+ fi
+
+ install -m 0755 "${extracted_bin}" "${JUICEFS_LOCAL_BIN}"
+ rm -rf "${tmp_dir}"
+}
+
+resolve_juicefs_cli() {
+ local juicefs_version
+
+ if command -v juicefs >/dev/null 2>&1; then
+ command -v juicefs
+ return 0
+ fi
+
+ if [[ -x "${JUICEFS_LOCAL_BIN}" ]]; then
+ echo "${JUICEFS_LOCAL_BIN}"
+ return 0
+ fi
+
+ juicefs_version=$(detect_juicefs_version)
+ install_juicefs_cli "${juicefs_version}" || return 1
+ echo "${JUICEFS_LOCAL_BIN}"
+}
+
+ensure_juicefs_meta_database() {
+ local jfs_meta="$1"
+ local meta_db
+ local mysql_container
+
+ if [[ "${jfs_meta}" != *"@(127.0.0.1:3316)/"* && "${jfs_meta}" !=
*"@(localhost:3316)/"* ]]; then
+ return 0
+ fi
+
+ meta_db="${jfs_meta##*/}"
+ meta_db="${meta_db%%\?*}"
+
+ if command -v mysql >/dev/null 2>&1; then
+ mysql -h127.0.0.1 -P3316 -uroot -p123456 -e "CREATE DATABASE IF NOT
EXISTS \`${meta_db}\`;"
+ return 0
+ fi
+
+ mysql_container=$(sudo docker ps --format '{{.Names}}' | grep -E
"(^|-)${CONTAINER_UID}mysql_57(-[0-9]+)?$" | head -n 1 || true)
+ if [[ -n "${mysql_container}" ]]; then
+ sudo docker exec "${mysql_container}" \
+ mysql -uroot -p123456 -e "CREATE DATABASE IF NOT EXISTS
\`${meta_db}\`;"
+ fi
+}
+
+run_juicefs_cli() {
+ local juicefs_cli
+ juicefs_cli=$(resolve_juicefs_cli)
+ "${juicefs_cli}" "$@"
+}
+
+ensure_juicefs_hadoop_jar_for_hive() {
+ local auxlib_dir="${ROOT}/docker-compose/hive/scripts/auxlib"
+ local source_jar
+ local juicefs_version
+
+ source_jar=$(find_juicefs_hadoop_jar || true)
+ if [[ -z "${source_jar}" ]]; then
+ juicefs_version=$(detect_juicefs_version)
+ source_jar=$(download_juicefs_hadoop_jar "${juicefs_version}" || true)
+ fi
+
+ if [[ -z "${source_jar}" ]]; then
+ echo "WARN: skip syncing juicefs-hadoop jar for hive, not found and
download failed."
+ return 0
+ fi
+
+ mkdir -p "${auxlib_dir}"
+ cp -f "${source_jar}" "${auxlib_dir}/"
+ echo "Synced JuiceFS Hadoop jar to hive auxlib: $(basename
"${source_jar}")"
+}
+
+prepare_juicefs_meta_for_hive() {
+ local jfs_meta="$1"
+ local jfs_cluster_name="${2:-cluster}"
+ if [[ -z "${jfs_meta}" || "${jfs_meta}" != mysql://* ]]; then
+ return 0
+ fi
+ if [[ "${JFS_META_FORMATTED}" -eq 1 ]]; then
+ return 0
+ fi
+
+ local bucket_dir="${JFS_BUCKET_DIR:-/tmp/jfs-bucket}"
+ sudo mkdir -p "${bucket_dir}"
+ sudo chmod 777 "${bucket_dir}"
+
+ # For local mysql_57 metadata DSN, ensure metadata database exists.
+ ensure_juicefs_meta_database "${jfs_meta}"
+
+ if run_juicefs_cli status "${jfs_meta}" >/dev/null 2>&1; then
+ echo "JuiceFS metadata is already formatted."
+ JFS_META_FORMATTED=1
+ return 0
+ fi
+
+ if ! run_juicefs_cli \
+ format --storage file --bucket "${bucket_dir}" "${jfs_meta}"
"${jfs_cluster_name}"; then
+ # If format reports conflict on rerun, verify by status and continue.
+ run_juicefs_cli status "${jfs_meta}" >/dev/null
+ fi
+
+ JFS_META_FORMATTED=1
+}
+
start_es() {
# elasticsearch
cp "${ROOT}"/docker-compose/elasticsearch/es.yaml.tpl
"${ROOT}"/docker-compose/elasticsearch/es.yaml
@@ -603,6 +764,12 @@ if [[ $need_prepare_hive_data -eq 1 ]]; then
bash "${ROOT}/docker-compose/hive/scripts/prepare-hive-data.sh"
fi
+if [[ "${STOP}" -ne 1 ]]; then
+ if [[ "${RUN_HIVE2}" -eq 1 ]] || [[ "${RUN_HIVE3}" -eq 1 ]]; then
+ ensure_juicefs_hadoop_jar_for_hive
+ fi
+fi
+
declare -A pids
if [[ "${RUN_ES}" -eq 1 ]]; then
@@ -723,6 +890,17 @@ for compose in "${!pids[@]}"; do
fi
done
+if [[ "${STOP}" -ne 1 ]]; then
+ if [[ "${RUN_HIVE2}" -eq 1 ]]; then
+ . "${ROOT}"/docker-compose/hive/hive-2x_settings.env
+ prepare_juicefs_meta_for_hive "${JFS_CLUSTER_META}" "cluster"
+ fi
+ if [[ "${RUN_HIVE3}" -eq 1 ]]; then
+ . "${ROOT}"/docker-compose/hive/hive-3x_settings.env
+ prepare_juicefs_meta_for_hive "${JFS_CLUSTER_META}" "cluster"
+ fi
+fi
+
echo "docker started"
sudo docker ps -a --format "{{.ID}} | {{.Image}} | {{.Status}}"
echo "all dockers started successfully"
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java
index ede0706a22e..02f8fe532cd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java
@@ -137,12 +137,12 @@ public class BrokerDesc extends StorageDesc implements
Writable {
case S3:
return TFileType.FILE_S3;
case HDFS:
+ case JFS:
return TFileType.FILE_HDFS;
case STREAM:
return TFileType.FILE_STREAM;
case BROKER:
case OFS:
- case JFS:
default:
return TFileType.FILE_BROKER;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
index a3f479ff01d..d92c7311a3d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
@@ -608,8 +608,9 @@ public class OutFileClause {
* - Centralize HDFS URI parsing logic
* - Add validation in FE to reject incomplete or malformed configs
*/
- if (null != brokerDesc.getStorageType() && brokerDesc.getStorageType()
- .equals(StorageBackend.StorageType.HDFS)) {
+ if (null != brokerDesc.getStorageType() && (brokerDesc.getStorageType()
+ .equals(StorageBackend.StorageType.HDFS)
+ ||
brokerDesc.getStorageType().equals(StorageBackend.StorageType.JFS))) {
String defaultFs =
HdfsPropertiesUtils.extractDefaultFsFromPath(filePath);
brokerDesc.getBackendConfigProperties().put(HdfsProperties.HDFS_DEFAULT_FS_NAME,
defaultFs);
}
@@ -767,4 +768,3 @@ public class OutFileClause {
}
}
-
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java
index de0c5542b3e..db0659c4b08 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Resource.java
@@ -61,6 +61,9 @@ public abstract class Resource implements Writable,
GsonPostProcessable {
AI;
public static ResourceType fromString(String resourceType) {
+ if ("jfs".equalsIgnoreCase(resourceType) ||
"juicefs".equalsIgnoreCase(resourceType)) {
+ return HDFS;
+ }
for (ResourceType type : ResourceType.values()) {
if (type.name().equalsIgnoreCase(resourceType)) {
return type;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ResourceMgr.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/ResourceMgr.java
index 81ce3b3009b..1135a8a35fa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ResourceMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ResourceMgr.java
@@ -75,7 +75,8 @@ public class ResourceMgr implements Writable {
public void createResource(CreateResourceCommand command) throws
DdlException {
CreateResourceInfo info = command.getInfo();
if (info.getResourceType() == ResourceType.UNKNOWN) {
- throw new DdlException("Only support SPARK, ODBC_CATALOG ,JDBC,
S3_COOLDOWN, S3, HDFS and HMS resource.");
+ throw new DdlException(
+ "Only support SPARK, ODBC_CATALOG, JDBC, S3_COOLDOWN, S3,
HDFS(JFS/JUICEFS), and HMS resource.");
}
Resource resource = Resource.fromCommand(command);
if (createResource(resource, info.isIfNotExists())) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/HdfsProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/HdfsProperties.java
index ad696c1eb4f..91249ad98d6 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/HdfsProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/HdfsProperties.java
@@ -88,7 +88,7 @@ public class HdfsProperties extends HdfsCompatibleProperties {
private static final String DFS_NAME_SERVICES_KEY = "dfs.nameservices";
- private static final Set<String> supportSchema = ImmutableSet.of("hdfs",
"viewfs");
+ private static final Set<String> supportSchema = ImmutableSet.of("hdfs",
"viewfs", "jfs");
/**
* The final HDFS configuration map that determines the effective settings.
@@ -143,7 +143,8 @@ public class HdfsProperties extends
HdfsCompatibleProperties {
}
userOverriddenHdfsConfig = new HashMap<>();
origProps.forEach((key, value) -> {
- if (key.startsWith("hadoop.") || key.startsWith("dfs.") ||
key.startsWith("fs.")) {
+ if (key.startsWith("hadoop.") || key.startsWith("dfs.") ||
key.startsWith("fs.")
+ || key.startsWith("juicefs.")) {
userOverriddenHdfsConfig.put(key, value);
}
});
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/SchemaTypeMapper.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/SchemaTypeMapper.java
index 0686f977d4d..1b6404a0b41 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/SchemaTypeMapper.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/SchemaTypeMapper.java
@@ -58,7 +58,9 @@ public enum SchemaTypeMapper {
//todo Support for this type is planned but not yet implemented.
OFS("ofs", StorageProperties.Type.BROKER, FileSystemType.OFS,
TFileType.FILE_BROKER),
GFS("gfs", StorageProperties.Type.BROKER, FileSystemType.HDFS,
TFileType.FILE_BROKER),
- JFS("jfs", StorageProperties.Type.BROKER, FileSystemType.JFS,
TFileType.FILE_BROKER),
+ // JuiceFS is mounted through Hadoop FileSystem implementation in Doris,
+ // so it should follow the HDFS-compatible path.
+ JFS("jfs", StorageProperties.Type.HDFS, FileSystemType.HDFS,
TFileType.FILE_HDFS),
VIEWFS("viewfs", StorageProperties.Type.HDFS, FileSystemType.HDFS,
TFileType.FILE_HDFS),
FILE("file", StorageProperties.Type.LOCAL, FileSystemType.FILE,
TFileType.FILE_LOCAL),
OSS_HDFS("oss", StorageProperties.Type.OSS_HDFS, FileSystemType.HDFS,
TFileType.FILE_HDFS),
@@ -158,4 +160,3 @@ public enum SchemaTypeMapper {
return SCHEMA_TO_FILE_TYPE_MAP.get(schema.toLowerCase());
}
}
-
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
index 170b0380874..1e5d404a7f6 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
@@ -103,16 +103,17 @@ public class LocationPathTest {
}
@Test
- public void testJFSLocationConvert() {
+ public void testJfsLocationConvertAsHdfsCompatible() {
LocationPath locationPath = LocationPath.of("jfs://test.com");
// FE
Assertions.assertTrue(locationPath.getNormalizedLocation().startsWith("jfs://"));
// BE
String loc = locationPath.toStorageLocation().toString();
Assertions.assertTrue(loc.startsWith("jfs://"));
- Assertions.assertEquals(FileSystemType.JFS,
locationPath.getFileSystemType());
+ // JFS is treated as Hadoop-compatible, so Doris maps it to HDFS file
system type.
+ Assertions.assertEquals(FileSystemType.HDFS,
locationPath.getFileSystemType());
Assertions.assertEquals("jfs://test.com",
locationPath.getFsIdentifier());
- Assertions.assertEquals(TFileType.FILE_BROKER,
locationPath.getTFileTypeForBE());
+ Assertions.assertEquals(TFileType.FILE_HDFS,
locationPath.getTFileTypeForBE());
}
@Disabled("not support in master")
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesTest.java
index f50cceaf0dd..f0a4e73231f 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesTest.java
@@ -41,6 +41,8 @@ public class HdfsPropertiesTest {
Map<String, String> simpleHdfsProperties = new HashMap<>();
simpleHdfsProperties.put("uri", "hdfs://test/1.orc");
Assertions.assertEquals(HdfsProperties.class,
StorageProperties.createPrimary(simpleHdfsProperties).getClass());
+ simpleHdfsProperties.put("uri", "jfs://test/1.orc");
+ Assertions.assertEquals(HdfsProperties.class,
StorageProperties.createPrimary(simpleHdfsProperties).getClass());
Map<String, String> origProps = createBaseHdfsProperties();
List<StorageProperties> storageProperties =
StorageProperties.createAll(origProps);
HdfsProperties hdfsProperties = (HdfsProperties)
storageProperties.get(0);
@@ -151,6 +153,21 @@ public class HdfsPropertiesTest {
}
+ @Test
+ public void testPassThroughJuicefsProperties() throws UserException {
+ Map<String, String> origProps = new HashMap<>();
+ origProps.put("hdfs.authentication.type", "simple");
+ origProps.put("fs.defaultFS", "jfs://cluster");
+ origProps.put("fs.jfs.impl", "io.juicefs.JuiceFileSystem");
+ origProps.put("juicefs.cluster.meta", "redis://127.0.0.1:6379/1");
+
+ StorageProperties properties =
StorageProperties.createAll(origProps).get(0);
+ Assertions.assertEquals(HdfsProperties.class, properties.getClass());
+ Map<String, String> beProperties =
properties.getBackendConfigProperties();
+ Assertions.assertEquals("redis://127.0.0.1:6379/1",
beProperties.get("juicefs.cluster.meta"));
+ Assertions.assertEquals("jfs://cluster",
beProperties.get("fs.defaultFS"));
+ }
+
// Helper methods to reduce code duplication
private Map<String, String> createBaseHdfsProperties() {
Map<String, String> origProps = Maps.newHashMap();
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesUtilsTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesUtilsTest.java
index e54b55186f6..589aebacd57 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesUtilsTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/HdfsPropertiesUtilsTest.java
@@ -30,7 +30,7 @@ import java.util.Set;
public class HdfsPropertiesUtilsTest {
- private static final Set<String> supportSchema = ImmutableSet.of("hdfs",
"viewfs");
+ private static final Set<String> supportSchema = ImmutableSet.of("hdfs",
"viewfs", "jfs");
@Test
public void testCheckLoadPropsAndReturnUri_success() throws Exception {
@@ -69,6 +69,13 @@ public class HdfsPropertiesUtilsTest {
Assertions.assertEquals("viewfs://cluster/user/test", result);
}
+ @Test
+ public void testConvertUrlToFilePath_jfs() throws Exception {
+ String uri = "jfs://cluster/user/test";
+ String result = HdfsPropertiesUtils.convertUrlToFilePath(uri, "",
supportSchema);
+ Assertions.assertEquals("jfs://cluster/user/test", result);
+ }
+
@Test
public void testConvertUrlToFilePath_invalidSchema() {
String uri = "s3://bucket/file.txt";
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateResourceCommandTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateResourceCommandTest.java
index 954d9e90745..e822474d5ce 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateResourceCommandTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateResourceCommandTest.java
@@ -46,27 +46,37 @@ public class CreateResourceCommandTest extends
TestWithFeService {
};
// test validate normal
- ImmutableMap<String, String> properties = ImmutableMap.of("type",
"es", "host", "http://127.0.0.1:29200");
- CreateResourceInfo info = new CreateResourceInfo(true, false, "test",
properties);
- CreateResourceCommand createResourceCommand = new
CreateResourceCommand(info);
- Assertions.assertDoesNotThrow(() ->
createResourceCommand.getInfo().validate());
+ final ImmutableMap<String, String> esProperties =
+ ImmutableMap.of("type", "es", "host",
"http://127.0.0.1:29200");
+ final CreateResourceInfo esInfo = new CreateResourceInfo(true, false,
"test", esProperties);
+ final CreateResourceCommand esCommand = new
CreateResourceCommand(esInfo);
+ Assertions.assertDoesNotThrow(() -> esCommand.getInfo().validate());
+
+ // jfs/juicefs should be treated as HDFS-compatible resource type
+ final ImmutableMap<String, String> jfsProperties =
+ ImmutableMap.of("type", "jfs", "fs.defaultFS",
"jfs://cluster");
+ final CreateResourceInfo jfsInfo = new CreateResourceInfo(true, false,
"test_jfs", jfsProperties);
+ final CreateResourceCommand jfsCommand = new
CreateResourceCommand(jfsInfo);
+ Assertions.assertDoesNotThrow(() -> jfsCommand.getInfo().validate());
// test validate abnormal
// test properties
- info = new CreateResourceInfo(false, false, "test", null);
- CreateResourceCommand createResourceCommand1 = new
CreateResourceCommand(info);
+ final CreateResourceInfo nullPropertiesInfo = new
CreateResourceInfo(false, false, "test", null);
+ final CreateResourceCommand createResourceCommand1 = new
CreateResourceCommand(nullPropertiesInfo);
Assertions.assertThrows(AnalysisException.class, () ->
createResourceCommand1.getInfo().validate());
// test resource type
- properties = ImmutableMap.of("host", "http://127.0.0.1:29200");
- info = new CreateResourceInfo(false, false, "test", properties);
- CreateResourceCommand createResourceCommand2 = new
CreateResourceCommand(info);
+ final ImmutableMap<String, String> noTypeProperties =
ImmutableMap.of("host", "http://127.0.0.1:29200");
+ final CreateResourceInfo noTypeInfo = new CreateResourceInfo(false,
false, "test", noTypeProperties);
+ final CreateResourceCommand createResourceCommand2 = new
CreateResourceCommand(noTypeInfo);
Assertions.assertThrows(AnalysisException.class, () ->
createResourceCommand2.getInfo().validate());
// test unsupported resource type
- properties = ImmutableMap.of("type", "flink", "host",
"http://127.0.0.1:29200");
- info = new CreateResourceInfo(false, false, "test", properties);
- CreateResourceCommand createResourceCommand3 = new
CreateResourceCommand(info);
+ final ImmutableMap<String, String> unsupportedTypeProperties =
+ ImmutableMap.of("type", "flink", "host",
"http://127.0.0.1:29200");
+ final CreateResourceInfo unsupportedTypeInfo =
+ new CreateResourceInfo(false, false, "test",
unsupportedTypeProperties);
+ final CreateResourceCommand createResourceCommand3 = new
CreateResourceCommand(unsupportedTypeInfo);
Assertions.assertThrows(AnalysisException.class, () ->
createResourceCommand3.getInfo().validate());
}
diff --git a/regression-test/pipeline/external/conf/regression-conf.groovy
b/regression-test/pipeline/external/conf/regression-conf.groovy
index 3350a767a5b..281085bce96 100644
--- a/regression-test/pipeline/external/conf/regression-conf.groovy
+++ b/regression-test/pipeline/external/conf/regression-conf.groovy
@@ -98,6 +98,15 @@ hdfsUser = "doris-test"
hdfsPasswd = ""
brokerName = "broker_name"
+// for JuiceFS(hadoop-compatible) regression cases
+// first step: enable case execution in external pipeline
+enableJfsTest=true
+jfsFs = "jfs://cluster"
+jfsImpl = "io.juicefs.JuiceFileSystem"
+jfsMeta = "mysql://root:123456@(127.0.0.1:3316)/juicefs_meta"
+jfsHiveMetastoreUris = "thrift://127.0.0.1:9383"
+jfsHadoopUser = "root"
+
// broker load test config
enableBrokerLoad=true
diff --git
a/regression-test/suites/external_table_p0/refactor_storage_param/test_jfs_hms_catalog_read.groovy
b/regression-test/suites/external_table_p0/refactor_storage_param/test_jfs_hms_catalog_read.groovy
new file mode 100644
index 00000000000..c87e0e82697
--- /dev/null
+++
b/regression-test/suites/external_table_p0/refactor_storage_param/test_jfs_hms_catalog_read.groovy
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_jfs_hms_catalog_read", "p0,external") {
+ String enableJfs = context.config.otherConfigs.get("enableJfsTest")
+ if (enableJfs == null || !enableJfs.equalsIgnoreCase("true")) {
+ logger.info("disable JFS test.")
+ return
+ }
+
+ String enableHive = context.config.otherConfigs.get("enableHiveTest")
+ if (enableHive == null || !enableHive.equalsIgnoreCase("true")) {
+ logger.info("disable Hive test.")
+ return
+ }
+
+ String jfsFs = context.config.otherConfigs.get("jfsFs")
+ if (jfsFs == null || jfsFs.trim().isEmpty()) {
+ logger.info("skip JFS test because jfsFs is empty.")
+ return
+ }
+
+ String jfsImpl = context.config.otherConfigs.get("jfsImpl")
+ if (jfsImpl == null || jfsImpl.trim().isEmpty()) {
+ jfsImpl = "io.juicefs.JuiceFileSystem"
+ }
+ String jfsMeta = context.config.otherConfigs.get("jfsMeta")
+ if (jfsMeta == null || jfsMeta.trim().isEmpty()) {
+ throw new IllegalStateException("jfsMeta must be configured for JFS
data IO regression")
+ }
+ String jfsCluster = jfsFs.replaceFirst("^jfs://", "")
+ int slashPos = jfsCluster.indexOf("/")
+ if (slashPos > 0) {
+ jfsCluster = jfsCluster.substring(0, slashPos)
+ }
+ String jfsMetaProperty = ",\n 'juicefs.${jfsCluster}.meta'
= '${jfsMeta}'"
+
+ String hdfsUser = context.config.otherConfigs.get("jfsHadoopUser")
+ if (hdfsUser == null || hdfsUser.trim().isEmpty()) {
+ hdfsUser = context.config.otherConfigs.get("hdfsUser")
+ }
+ if (hdfsUser == null || hdfsUser.trim().isEmpty()) {
+ hdfsUser = "root"
+ }
+
+ String hmsUris = context.config.otherConfigs.get("jfsHiveMetastoreUris")
+ if (hmsUris == null || hmsUris.trim().isEmpty()) {
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ String hmsPort = context.config.otherConfigs.get("hive3HmsPort")
+ if (hmsPort == null || hmsPort.trim().isEmpty()) {
+ hmsPort = context.config.otherConfigs.get("hive2HmsPort")
+ }
+ if (externalEnvIp == null || externalEnvIp.trim().isEmpty()
+ || hmsPort == null || hmsPort.trim().isEmpty()) {
+ logger.info("skip JFS test because jfsHiveMetastoreUris is empty
and fallback externalEnvIp/hmsPort is invalid.")
+ return
+ }
+ hmsUris = "thrift://${externalEnvIp}:${hmsPort}"
+ }
+ String catalogName = "test_jfs_hms_catalog_read"
+ String dbName = "test_jfs_hms_catalog_read_db"
+ String tableName = "test_jfs_hms_catalog_read_tbl"
+ String jfsDbBasePath = context.config.otherConfigs.get("jfsDbBasePath")
+ if (jfsDbBasePath == null || jfsDbBasePath.trim().isEmpty()) {
+ jfsDbBasePath = "${jfsFs}/doris_jfs/${hdfsUser}"
+ }
+ jfsDbBasePath = jfsDbBasePath.replaceAll('/+$', '')
+ String jfsStagingDir = context.config.otherConfigs.get("jfsStagingDir")
+ if (jfsStagingDir == null || jfsStagingDir.trim().isEmpty()) {
+ jfsStagingDir = "${jfsDbBasePath}/.doris_staging"
+ }
+ jfsStagingDir = jfsStagingDir.replaceAll('/+$', '')
+ String dbLocation = "${jfsDbBasePath}/${dbName}"
+
+ sql """drop catalog if exists ${catalogName}"""
+
+ try {
+ sql """
+ CREATE CATALOG ${catalogName} PROPERTIES (
+ 'type' = 'hms',
+ 'hive.metastore.uris' = '${hmsUris}',
+ 'fs.defaultFS' = '${jfsFs}',
+ 'fs.jfs.impl' = '${jfsImpl}',
+ 'hadoop.username' = '${hdfsUser}',
+ 'hive.staging_dir' = '${jfsStagingDir}'
+ ${jfsMetaProperty}
+ );
+ """
+
+ sql """switch ${catalogName}"""
+ def dbs = sql """show databases"""
+ assertTrue(dbs.size() > 0)
+
+ def hasDb = sql """show databases like '${dbName}'"""
+ if (hasDb.size() > 0) {
+ sql """drop table if exists `${dbName}`.`${tableName}`"""
+ sql """drop database if exists `${dbName}`"""
+ }
+ sql """
+ create database `${dbName}`
+ properties('location'='${dbLocation}')
+ """
+ sql """use `${dbName}`"""
+ sql """
+ CREATE TABLE `${tableName}` (
+ `id` INT,
+ `name` STRING
+ ) ENGINE=hive
+ PROPERTIES (
+ 'file_format'='parquet'
+ )
+ """
+ sql """insert into `${tableName}` values (1, 'jfs_1'), (2, 'jfs_2')"""
+
+ def cnt = sql """select count(*) from `${tableName}`"""
+ assertEquals("2", cnt[0][0].toString())
+
+ def rows = sql """select * from `${tableName}` order by id"""
+ assertTrue(rows.size() == 2)
+ assertEquals("1", rows[0][0].toString())
+ assertEquals("jfs_1", rows[0][1].toString())
+ assertEquals("2", rows[1][0].toString())
+ assertEquals("jfs_2", rows[1][1].toString())
+ } finally {
+ sql """switch internal"""
+ }
+}
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index d858596d1bd..793ff84cf38 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -1977,6 +1977,15 @@ build_jindofs() {
cp -r ${TP_SOURCE_DIR}/${JINDOFS_SOURCE}/*
"${TP_INSTALL_DIR}/jindofs_libs/"
}
+# juicefs
+build_juicefs() {
+ check_if_archive_exist "${JUICEFS_NAME}"
+
+ rm -rf "${TP_INSTALL_DIR}/juicefs_libs/"
+ mkdir -p "${TP_INSTALL_DIR}/juicefs_libs/"
+ cp -r "${TP_SOURCE_DIR}/${JUICEFS_NAME}" "${TP_INSTALL_DIR}/juicefs_libs/"
+}
+
# pugixml
build_pugixml() {
check_if_source_exist "${PUGIXML_SOURCE}"
@@ -2081,6 +2090,7 @@ build_paimon_cpp() {
if [[ "${#packages[@]}" -eq 0 ]]; then
packages=(
jindofs
+ juicefs
odbc
openssl
libevent
@@ -2248,6 +2258,7 @@ cleanup_package_source() {
dragonbox) src_var="DRAGONBOX_SOURCE" ;;
icu) src_var="ICU_SOURCE" ;;
jindofs) src_var="JINDOFS_SOURCE" ;;
+ juicefs) src_var="JUICEFS_SOURCE" ;;
pugixml) src_var="PUGIXML_SOURCE" ;;
paimon_cpp) src_var="PAIMON_CPP_SOURCE" ;;
aws_sdk) src_var="AWS_SDK_SOURCE" ;;
diff --git a/thirdparty/juicefs-helpers.sh b/thirdparty/juicefs-helpers.sh
new file mode 100644
index 00000000000..8ea41189ec3
--- /dev/null
+++ b/thirdparty/juicefs-helpers.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Shared JuiceFS helper functions used by build and docker scripts.
+
+JUICEFS_DEFAULT_VERSION="${JUICEFS_DEFAULT_VERSION:-1.3.1}"
+JUICEFS_HADOOP_MAVEN_REPO="${JUICEFS_HADOOP_MAVEN_REPO:-https://repo1.maven.org/maven2/io/juicefs/juicefs-hadoop}"
+
+juicefs_find_hadoop_jar_by_globs() {
+ local jar_glob=""
+ local matched_jar=""
+ for jar_glob in "$@"; do
+ matched_jar=$(compgen -G "${jar_glob}" | head -n 1 || true)
+ if [[ -n "${matched_jar}" ]]; then
+ echo "${matched_jar}"
+ return 0
+ fi
+ done
+ return 1
+}
+
+juicefs_detect_hadoop_version() {
+ local juicefs_jar="$1"
+ local default_version="${2:-${JUICEFS_DEFAULT_VERSION}}"
+ if [[ -z "${juicefs_jar}" ]]; then
+ echo "${default_version}"
+ return 0
+ fi
+ juicefs_jar=$(basename "${juicefs_jar}")
+ juicefs_jar=${juicefs_jar#juicefs-hadoop-}
+ echo "${juicefs_jar%.jar}"
+}
+
+juicefs_hadoop_jar_download_url() {
+ local juicefs_version="$1"
+ local jar_name="juicefs-hadoop-${juicefs_version}.jar"
+ echo "${JUICEFS_HADOOP_MAVEN_REPO}/${juicefs_version}/${jar_name}"
+}
+
+juicefs_download_hadoop_jar_to_cache() {
+ local juicefs_version="$1"
+ local cache_dir="$2"
+ local jar_name="juicefs-hadoop-${juicefs_version}.jar"
+ local target_jar="${cache_dir}/${jar_name}"
+ local download_url
+ download_url=$(juicefs_hadoop_jar_download_url "${juicefs_version}")
+
+ mkdir -p "${cache_dir}"
+ if [[ -s "${target_jar}" ]]; then
+ echo "${target_jar}"
+ return 0
+ fi
+
+ echo "Downloading JuiceFS Hadoop jar ${juicefs_version} from
${download_url}" >&2
+ if command -v curl >/dev/null 2>&1; then
+ if curl -fL --retry 3 --retry-delay 2 --connect-timeout 10 -o
"${target_jar}" "${download_url}"; then
+ echo "${target_jar}"
+ return 0
+ fi
+ elif command -v wget >/dev/null 2>&1; then
+ if wget -q "${download_url}" -O "${target_jar}"; then
+ echo "${target_jar}"
+ return 0
+ fi
+ fi
+
+ rm -f "${target_jar}"
+ return 1
+}
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 92b4427f179..4fe49dd5168 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -553,6 +553,12 @@ JINDOFS_NAME=jindofs-6.10.4-libs-0.1.tar.gz
JINDOFS_SOURCE=jindofs-6.10.4-libs-0.1
JINDOFS_MD5SUM="bd30b4c5fe97c4367eeb3bb228b317d9"
+# juicefs
+JUICEFS_DOWNLOAD="https://repo1.maven.org/maven2/io/juicefs/juicefs-hadoop/1.3.1/juicefs-hadoop-1.3.1.jar"
+JUICEFS_NAME=juicefs-hadoop-1.3.1.jar
+JUICEFS_SOURCE=
+JUICEFS_MD5SUM="f374dfbfbdc4b83417cfea78a6728c54"
+
# pugixml
PUGIXML_DOWNLOAD="https://github.com/zeux/pugixml/releases/download/v1.15/pugixml-1.15.tar.gz"
PUGIXML_NAME=pugixml-1.15.tar.gz
@@ -647,6 +653,7 @@ export TP_ARCHIVES=(
'DRAGONBOX'
'ICU'
'JINDOFS'
+ 'JUICEFS'
'PUGIXML'
'PAIMON_CPP'
)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]