Copilot commented on code in PR #60676:
URL: https://github.com/apache/doris/pull/60676#discussion_r2792056363
##########
be/CMakeLists.txt:
##########
@@ -545,6 +556,89 @@ set(COMMON_THIRDPARTY
${COMMON_THIRDPARTY}
)
+if (ENABLE_PAIMON_CPP)
+ if (NOT PAIMON_HOME)
+ set(_paimon_default_home "${THIRDPARTY_DIR}/paimon-cpp")
+ if (EXISTS
"${_paimon_default_home}/lib/cmake/Paimon/PaimonConfig.cmake")
+ set(PAIMON_HOME "${_paimon_default_home}" CACHE PATH "" FORCE)
+ endif()
+ unset(_paimon_default_home)
+ endif()
+ if (NOT PAIMON_HOME)
+ message(FATAL_ERROR "ENABLE_PAIMON_CPP=ON but PAIMON_HOME is not set")
+ endif()
Review Comment:
`ENABLE_PAIMON_CPP` is defaulted to ON, but if
`${THIRDPARTY_DIR}/paimon-cpp` is not present and `PAIMON_HOME` isn’t set, the
build hard-fails (`FATAL_ERROR`). In this repo `thirdparty/paimon-cpp` does not
exist, so a default build will break. Consider defaulting this option to OFF
(or auto-disabling when the config isn’t found) and only failing when the user
explicitly enables it.
##########
be/CMakeLists.txt:
##########
@@ -545,6 +556,89 @@ set(COMMON_THIRDPARTY
${COMMON_THIRDPARTY}
)
+if (ENABLE_PAIMON_CPP)
+ if (NOT PAIMON_HOME)
+ set(_paimon_default_home "${THIRDPARTY_DIR}/paimon-cpp")
+ if (EXISTS
"${_paimon_default_home}/lib/cmake/Paimon/PaimonConfig.cmake")
+ set(PAIMON_HOME "${_paimon_default_home}" CACHE PATH "" FORCE)
+ endif()
+ unset(_paimon_default_home)
+ endif()
+ if (NOT PAIMON_HOME)
+ message(FATAL_ERROR "ENABLE_PAIMON_CPP=ON but PAIMON_HOME is not set")
+ endif()
+ set(Paimon_DIR "${PAIMON_HOME}/lib/cmake/Paimon" CACHE PATH "" FORCE)
+ find_package(Paimon CONFIG REQUIRED NO_DEFAULT_PATH)
+ include_directories("${PAIMON_HOME}/include")
+ # Link glog after paimon static libs to satisfy RawLog__ in static link.
+ list(REMOVE_ITEM COMMON_THIRDPARTY glog)
+
+ # Paimon static library dependencies
+ set(paimon_fmt_lib "${PAIMON_HOME}/lib64/paimon_deps/libfmtd.a")
+ if (NOT EXISTS "${paimon_fmt_lib}")
+ set(paimon_fmt_lib "${PAIMON_HOME}/lib64/paimon_deps/libfmt.a")
+ endif()
+ add_library(paimon_fmt STATIC IMPORTED)
+ set_target_properties(paimon_fmt PROPERTIES IMPORTED_LOCATION
"${paimon_fmt_lib}")
+ set(paimon_tbb_lib "${PAIMON_HOME}/lib64/paimon_deps/libtbb_debug.a")
+ if (NOT EXISTS "${paimon_tbb_lib}")
+ set(paimon_tbb_lib "${PAIMON_HOME}/lib64/paimon_deps/libtbb.a")
+ endif()
+ add_library(paimon_tbb STATIC IMPORTED)
+ set_target_properties(paimon_tbb PROPERTIES IMPORTED_LOCATION
"${paimon_tbb_lib}")
+ add_library(paimon_roaring STATIC IMPORTED)
+ set_target_properties(paimon_roaring PROPERTIES IMPORTED_LOCATION
"${PAIMON_HOME}/lib64/paimon_deps/libroaring_bitmap.a")
+ add_library(paimon_xxhash STATIC IMPORTED)
+ set_target_properties(paimon_xxhash PROPERTIES IMPORTED_LOCATION
"${PAIMON_HOME}/lib64/paimon_deps/libxxhash.a")
+ add_library(paimon_arrow_dataset STATIC IMPORTED)
+ set_target_properties(paimon_arrow_dataset PROPERTIES IMPORTED_LOCATION
"${PAIMON_HOME}/lib64/paimon_deps/libarrow_dataset.a")
+ add_library(paimon_arrow_acero STATIC IMPORTED)
+ set_target_properties(paimon_arrow_acero PROPERTIES IMPORTED_LOCATION
"${PAIMON_HOME}/lib64/paimon_deps/libarrow_acero.a")
+ add_library(paimon_arrow STATIC IMPORTED)
+ set_target_properties(paimon_arrow PROPERTIES IMPORTED_LOCATION
"${PAIMON_HOME}/lib64/paimon_deps/libarrow.a")
+ set(paimon_arrow_bundled_deps_lib
"${PAIMON_HOME}/lib64/paimon_deps/libarrow_bundled_dependencies.a")
+ if (EXISTS "${paimon_arrow_bundled_deps_lib}")
+ add_library(paimon_arrow_bundled_dependencies STATIC IMPORTED)
+ set_target_properties(paimon_arrow_bundled_dependencies PROPERTIES
IMPORTED_LOCATION "${paimon_arrow_bundled_deps_lib}")
+ endif()
+ add_library(paimon_parquet STATIC IMPORTED)
+ set_target_properties(paimon_parquet PROPERTIES IMPORTED_LOCATION
"${PAIMON_HOME}/lib64/paimon_deps/libparquet.a")
+ # Always use paimon-cpp bundled ORC to avoid conflicts with Doris ORC.
+ add_library(paimon_orc STATIC IMPORTED)
+ set_target_properties(paimon_orc PROPERTIES IMPORTED_LOCATION
"${PAIMON_HOME}/lib64/paimon_deps/liborc.a")
+
Review Comment:
The paimon-cpp linkage setup hardcodes a
`${PAIMON_HOME}/lib64/paimon_deps/...` layout and creates a number of imported
static libs manually. This is brittle across platforms/install layouts (lib vs
lib64, debug vs release naming) and can easily break packaging. Prefer
consuming targets exported by `PaimonConfig.cmake` (and its transitive deps),
or at least probe both `lib` and `lib64` and fail with a clearer message
showing which paths were tried.
##########
fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonUtils.java:
##########
@@ -37,9 +38,14 @@ public static List<String> getFieldNames(RowType rowType) {
public static <T> T deserialize(String encodedStr) {
try {
- return InstantiationUtil.deserializeObject(
-
DECODER.decode(encodedStr.getBytes(java.nio.charset.StandardCharsets.UTF_8)),
- PaimonUtils.class.getClassLoader());
+ byte[] decoded;
+ try {
+ decoded =
URL_DECODER.decode(encodedStr.getBytes(java.nio.charset.StandardCharsets.UTF_8));
+ } catch (IllegalArgumentException e) {
+ // Fallback to standard Base64 for splits encoded by native
Paimon serialization.
Review Comment:
The fallback comment is inaccurate: decoding with standard Base64 doesn’t
make the payload compatible with `InstantiationUtil.deserializeObject`. That
method still requires Java-serialized bytes, not “native Paimon serialization”.
Please reword the comment to reflect that this fallback only supports
alternative Base64 alphabets/encodings for the *same* Java-serialized payload.
```suggestion
// Fallback to standard Base64 for splits encoded with a
non-URL-safe Base64 alphabet.
```
##########
fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java:
##########
@@ -1295,13 +1297,14 @@ public void checkQuerySlotCount(String slotCnt) {
public enum IgnoreSplitType {
NONE,
IGNORE_JNI,
- IGNORE_NATIVE
+ IGNORE_NATIVE,
+ IGNORE_PAIMON_CPP
}
public static final String IGNORE_SPLIT_TYPE = "ignore_split_type";
@VariableMgr.VarAttr(name = IGNORE_SPLIT_TYPE,
checker = "checkIgnoreSplitType",
- options = {"NONE", "IGNORE_JNI", "IGNORE_NATIVE"},
+ options = {"NONE", "IGNORE_JNI", "IGNORE_NATIVE",
"IGNORE_PAIMON_CPP"},
description = {"忽略指定类型的 split", "Ignore splits of the specified
type"})
Review Comment:
`IGNORE_PAIMON_CPP` was added to `IgnoreSplitType` and exposed as an option,
but there’s no corresponding handling in the split generation logic (e.g.,
`PaimonScanNode.getSplits` only checks `IGNORE_NATIVE` and `IGNORE_JNI`).
As-is, setting `ignore_split_type=IGNORE_PAIMON_CPP` will have no effect.
Either implement the behavior (skip paimon-cpp/JNI-style splits when paimon-cpp
is enabled) or remove the option to avoid misleading users.
##########
be/src/vec/exec/scan/file_scanner.cpp:
##########
@@ -67,7 +67,9 @@
#include "vec/exec/format/table/iceberg_reader.h"
#include "vec/exec/format/table/lakesoul_jni_reader.h"
#include "vec/exec/format/table/max_compute_jni_reader.h"
+#include "vec/exec/format/table/paimon_cpp_reader.h"
#include "vec/exec/format/table/paimon_jni_reader.h"
+#include "vec/exec/format/table/paimon_predicate_converter.h"
Review Comment:
Even with `ENABLE_PAIMON_CPP=OFF`, the current build still compiles all
`be/src/vec/**/*.cpp` via `file(GLOB_RECURSE VEC_FILES *.cpp)`, and
`file_scanner.cpp` now unconditionally includes paimon-cpp headers. This means
disabling the option won’t actually prevent compilation failures when
paimon-cpp headers/libs aren’t available. To make the option effective, gate
the includes/implementation with a compile definition (e.g., `#ifdef
ENABLE_PAIMON_CPP`) and/or exclude the paimon-cpp source files from the Vec
target when the option is OFF.
```suggestion
#ifdef ENABLE_PAIMON_CPP
#include "vec/exec/format/table/paimon_cpp_reader.h"
#include "vec/exec/format/table/paimon_predicate_converter.h"
#endif
#include "vec/exec/format/table/paimon_jni_reader.h"
```
##########
fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java:
##########
@@ -217,9 +217,19 @@ private void setPaimonParams(TFileRangeDesc rangeDesc,
PaimonSplit paimonSplit)
String fileFormat = getFileFormat(paimonSplit.getPathString());
if (split != null) {
- // use jni reader
+ // use jni reader or paimon-cpp reader
rangeDesc.setFormatType(TFileFormatType.FORMAT_JNI);
- fileDesc.setPaimonSplit(PaimonUtil.encodeObjectToString(split));
+ // Use Paimon native serialization for paimon-cpp reader
+ if (sessionVariable.isEnablePaimonCppReader() && split instanceof
DataSplit) {
+
fileDesc.setPaimonSplit(PaimonUtil.encodeDataSplitToString((DataSplit) split));
+ } else {
+
fileDesc.setPaimonSplit(PaimonUtil.encodeObjectToString(split));
+ }
+ // Set table location for paimon-cpp reader
+ String tableLocation = source.getTableLocation();
+ if (tableLocation != null) {
Review Comment:
When `enable_paimon_cpp_reader` is on, BE will error out if `paimon_table`
isn’t set. Here `tableLocation` can still be null/empty (e.g.,
non-FileStoreTable without a `path` option), in which case the query will fail
later with a generic BE internal error. Consider validating `tableLocation`
when paimon-cpp is enabled and throwing a clear FE-side exception (or ensuring
a non-empty location is always populated).
```suggestion
if (sessionVariable.isEnablePaimonCppReader() && split
instanceof DataSplit
&& (tableLocation == null || tableLocation.isEmpty())) {
throw new RuntimeException(
"Paimon table location is empty while paimon-cpp
reader is enabled. "
+ "Please ensure the Paimon table path is configured
or disable paimon-cpp reader.");
}
if (tableLocation != null && !tableLocation.isEmpty()) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]