This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2b1803ae4d6 [fix](paimon-cpp) deduplicate Arrow linking to fix SIGSEGV 
in FilterRowGroupsByPredicate (#60883)
2b1803ae4d6 is described below

commit 2b1803ae4d63a25780479c44aa45c657fc28a40f
Author: Chenjunwei <[email protected]>
AuthorDate: Mon Mar 9 12:41:12 2026 +0800

    [fix](paimon-cpp) deduplicate Arrow linking to fix SIGSEGV in 
FilterRowGroupsByPredicate (#60883)
    
    ## Proposed changes
    
    ### Problem
    
    When `ENABLE_PAIMON_CPP` is ON, both Doris's own `libarrow.a` and
    paimon-cpp's `libarrow.a` are linked into `doris_be`, causing **3698
    duplicate global symbols**. This leads to **SIGSEGV crashes** in
    `paimon::parquet::ParquetFileBatchReader::FilterRowGroupsByPredicate`
    when `libarrow_dataset.a` resolves arrow core calls to the wrong copy
    (compiled with different feature flags).
    
    Both are Arrow 17.0.0 but compiled with different options:
    
    | Feature | Doris Arrow | paimon Arrow |
    |---|---|---|
    | COMPUTE | OFF | **ON** |
    | DATASET | OFF | **ON** |
    | ACERO | OFF | **ON** |
    | FILESYSTEM | OFF | **ON** |
    | FLIGHT | **ON** | OFF |
    | FLIGHT_SQL | **ON** | OFF |
    | PARQUET | ON | ON |
    
    ### Crash Stack
    
    ```
    SIGSEGV invalid permissions for mapped object
     → std::string::basic_string(char const*, ...)
     → paimon::ToPaimonStatus(arrow::Status const&)
     → paimon::parquet::ParquetFileBatchReader::FilterRowGroupsByPredicate(...)
    ```
    
    ### Root Cause
    
    Inside `-Wl,--start-group ... --end-group`, the linker may resolve
    symbols from `libarrow_dataset.a` (paimon's) to Doris's `libarrow.a`,
    which was compiled without COMPUTE/FILESYSTEM modules. The internal
    object memory layout differs, causing `arrow::Status` and other objects
    to trigger illegal memory access when passed across library boundaries.
    
    ### Fix
    
    When the `paimon_deps` Arrow stack is selected (because Doris lacks
    `libarrow_dataset.a` / `libarrow_acero.a`), remove Doris's `arrow` from
    `COMMON_THIRDPARTY`.
    
    paimon's `libarrow.a` is a **superset** of Doris's version (same 17.0.0,
    with additional modules enabled), so it provides all symbols needed by
    Doris's `libarrow_flight.a` / `libarrow_flight_sql.a`.
    
    ### Impact
    
    - Only `be/CMakeLists.txt` changed (~10 lines).
    - No C++/Java business code changes.
    - No impact when `ENABLE_PAIMON_CPP=OFF`.
    
    ## Types of changes
    
    - [x] Bug fix (non-breaking change which fixes an issue)
---
 be/CMakeLists.txt         | 91 +++++------------------------------------------
 be/cmake/thirdparty.cmake |  2 ++
 2 files changed, 11 insertions(+), 82 deletions(-)

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 5adbd1dd1f1..550bf10d77a 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -623,77 +623,16 @@ if (BUILD_BENCHMARK)
 endif()
 
 set(PAIMON_FACTORY_REGISTRY_LIBS)
-set(PAIMON_ARROW_CORE_LIB)
-set(PAIMON_ARROW_FILESYSTEM_LIB)
-set(PAIMON_ARROW_DATASET_LIB)
-set(PAIMON_ARROW_ACERO_LIB)
 if (ENABLE_PAIMON_CPP)
-    set(_paimon_arrow_core_candidates
-        ${THIRDPARTY_DIR}/paimon-cpp/lib64/paimon_deps/libarrow.a
-        ${THIRDPARTY_DIR}/lib64/libarrow.a
-        ${THIRDPARTY_DIR}/lib/libarrow.a
-    )
-    foreach(_paimon_arrow_core_candidate IN LISTS 
_paimon_arrow_core_candidates)
-        if (EXISTS "${_paimon_arrow_core_candidate}")
-            add_library(paimon_arrow_core STATIC IMPORTED)
-            set_target_properties(paimon_arrow_core PROPERTIES
-                IMPORTED_LOCATION ${_paimon_arrow_core_candidate})
-            set(PAIMON_ARROW_CORE_LIB paimon_arrow_core)
-            break()
-        endif()
-    endforeach()
-    set(_paimon_arrow_filesystem_candidates
-        ${THIRDPARTY_DIR}/paimon-cpp/lib64/paimon_deps/libarrow_filesystem.a
-        ${THIRDPARTY_DIR}/lib64/libarrow_filesystem.a
-        ${THIRDPARTY_DIR}/lib/libarrow_filesystem.a
-    )
-    foreach(_paimon_arrow_filesystem_candidate IN LISTS 
_paimon_arrow_filesystem_candidates)
-        if (EXISTS "${_paimon_arrow_filesystem_candidate}")
-            add_library(paimon_arrow_filesystem STATIC IMPORTED)
-            set_target_properties(paimon_arrow_filesystem PROPERTIES
-                IMPORTED_LOCATION ${_paimon_arrow_filesystem_candidate})
-            set(PAIMON_ARROW_FILESYSTEM_LIB paimon_arrow_filesystem)
-            break()
-        endif()
-    endforeach()
-    set(_paimon_arrow_dataset_candidates
-        ${THIRDPARTY_DIR}/paimon-cpp/lib64/paimon_deps/libarrow_dataset.a
-        ${THIRDPARTY_DIR}/lib64/libarrow_dataset.a
-        ${THIRDPARTY_DIR}/lib/libarrow_dataset.a
-    )
-    foreach(_paimon_arrow_dataset_candidate IN LISTS 
_paimon_arrow_dataset_candidates)
-        if (EXISTS "${_paimon_arrow_dataset_candidate}")
-            add_library(paimon_arrow_dataset STATIC IMPORTED)
-            set_target_properties(paimon_arrow_dataset PROPERTIES
-                IMPORTED_LOCATION ${_paimon_arrow_dataset_candidate})
-            set(PAIMON_ARROW_DATASET_LIB paimon_arrow_dataset)
-            break()
-        endif()
-    endforeach()
-    set(_paimon_arrow_acero_candidates
-        ${THIRDPARTY_DIR}/paimon-cpp/lib64/paimon_deps/libarrow_acero.a
-        ${THIRDPARTY_DIR}/lib64/libarrow_acero.a
-        ${THIRDPARTY_DIR}/lib/libarrow_acero.a
-    )
-    foreach(_paimon_arrow_acero_candidate IN LISTS 
_paimon_arrow_acero_candidates)
-        if (EXISTS "${_paimon_arrow_acero_candidate}")
-            add_library(paimon_arrow_acero STATIC IMPORTED)
-            set_target_properties(paimon_arrow_acero PROPERTIES
-                IMPORTED_LOCATION ${_paimon_arrow_acero_candidate})
-            set(PAIMON_ARROW_ACERO_LIB paimon_arrow_acero)
-            break()
-        endif()
-    endforeach()
-    if (PAIMON_ARROW_DATASET_LIB)
-        # paimon_parquet_file_format depends on Arrow Dataset symbols.
-        # Force-link it only when arrow_dataset is available.
-        set(PAIMON_FACTORY_REGISTRY_LIBS
-            paimon_parquet_file_format
-        )
-        list(REMOVE_ITEM COMMON_THIRDPARTY ${PAIMON_FACTORY_REGISTRY_LIBS})
-    else()
-        message(STATUS "Paimon C++: libarrow_dataset.a not found, keep 
paimon_parquet_file_format as regular static lib")
-    endif()
+    # Plan B: Doris Arrow is now built with COMPUTE/DATASET/ACERO/FILESYSTEM,
+    # so arrow, arrow_dataset, arrow_acero are all in COMMON_THIRDPARTY via
+    # thirdparty.cmake.  paimon-cpp reuses the same Arrow (no paimon_deps).
+    # No dual-stack selection needed — single Arrow for everything.
+
+    # paimon_parquet_file_format depends on Arrow Dataset symbols.
+    # Force-link it with --whole-archive so its factory registration runs.
+    set(PAIMON_FACTORY_REGISTRY_LIBS paimon_parquet_file_format)
+    list(REMOVE_ITEM COMMON_THIRDPARTY ${PAIMON_FACTORY_REGISTRY_LIBS})
 endif()
 
 set(DORIS_DEPENDENCIES
@@ -720,18 +659,6 @@ if (ENABLE_PAIMON_CPP)
             ${PAIMON_FACTORY_REGISTRY_LIBS}
             -Wl,--no-whole-archive)
     endif()
-    if (PAIMON_ARROW_CORE_LIB)
-        set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ${PAIMON_ARROW_CORE_LIB})
-    endif()
-    if (PAIMON_ARROW_FILESYSTEM_LIB)
-        set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} 
${PAIMON_ARROW_FILESYSTEM_LIB})
-    endif()
-    if (PAIMON_ARROW_DATASET_LIB)
-        set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} 
${PAIMON_ARROW_DATASET_LIB})
-    endif()
-    if (PAIMON_ARROW_ACERO_LIB)
-        set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ${PAIMON_ARROW_ACERO_LIB})
-    endif()
 
     # paimon-cpp internal dependencies (renamed with _paimon suffix)
     # These must come after paimon libraries to resolve symbols.
diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake
index 441ebe8dc73..227f81411f1 100644
--- a/be/cmake/thirdparty.cmake
+++ b/be/cmake/thirdparty.cmake
@@ -106,6 +106,8 @@ add_thirdparty(zstd LIB64)
 add_thirdparty(arrow LIB64)
 add_thirdparty(arrow_flight LIB64)
 add_thirdparty(arrow_flight_sql LIB64)
+add_thirdparty(arrow_dataset LIB64)
+add_thirdparty(arrow_acero LIB64)
 add_thirdparty(parquet LIB64)
 add_thirdparty(brpc LIB64)
 add_thirdparty(rocksdb)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to