This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 16f2dda352c [fix](paimon) infer manifest format from split file format
in cpp reader (#60795)
16f2dda352c is described below
commit 16f2dda352c8b4c573e413e6597067547c379a30
Author: Chenjunwei <[email protected]>
AuthorDate: Sun Mar 8 05:03:39 2026 +0800
[fix](paimon) infer manifest format from split file format in cpp reader
(#60795)
## Problem
Followup #60676
When FE does not pass full table options in scan ranges, paimon-cpp may
default manifest.format to avro.
For non-avro environments, this can fail in PaimonCppReader
initialization with:
Could not find a FileFormatFactory implementation class for format avro.
## Solution
In PaimonCppReader::_build_options, if split-level file_format exists
and table options are missing/empty:
- set file.format from split file_format
- set manifest.format from split file_format
This keeps paimon-cpp format resolution consistent with the actual split
format and avoids unintended avro fallback.
## Verification
- Incremental BE build succeeded for doris_be target.
- Change scope is limited to
be/src/vec/exec/format/table/paimon_cpp_reader.cpp.
---
be/src/vec/exec/format/table/paimon_cpp_reader.cpp | 17 ++++++
.../paimon/test_paimon_cpp_reader.groovy | 70 ++++++++++++++++++++++
2 files changed, 87 insertions(+)
diff --git a/be/src/vec/exec/format/table/paimon_cpp_reader.cpp
b/be/src/vec/exec/format/table/paimon_cpp_reader.cpp
index 0bb89d0c73e..756c87c32f0 100644
--- a/be/src/vec/exec/format/table/paimon_cpp_reader.cpp
+++ b/be/src/vec/exec/format/table/paimon_cpp_reader.cpp
@@ -311,6 +311,23 @@ std::map<std::string, std::string>
PaimonCppReader::_build_options() const {
copy_if_missing("fs.s3a.region", "AWS_REGION");
copy_if_missing("fs.s3a.path.style.access", "use_path_style");
+ // FE currently does not pass paimon_options in scan ranges.
+ // Backfill file.format/manifest.format from split file_format to avoid
+ // paimon-cpp falling back to default manifest.format=avro.
+ if (_range.__isset.table_format_params &&
_range.table_format_params.__isset.paimon_params &&
+ _range.table_format_params.paimon_params.__isset.file_format &&
+ !_range.table_format_params.paimon_params.file_format.empty()) {
+ const auto& split_file_format =
_range.table_format_params.paimon_params.file_format;
+ auto file_format_it = options.find(paimon::Options::FILE_FORMAT);
+ if (file_format_it == options.end() || file_format_it->second.empty())
{
+ options[paimon::Options::FILE_FORMAT] = split_file_format;
+ }
+ auto manifest_format_it =
options.find(paimon::Options::MANIFEST_FORMAT);
+ if (manifest_format_it == options.end() ||
manifest_format_it->second.empty()) {
+ options[paimon::Options::MANIFEST_FORMAT] = split_file_format;
+ }
+ }
+
options[paimon::Options::FILE_SYSTEM] = "doris";
return options;
}
diff --git
a/regression-test/suites/external_table_p0/paimon/test_paimon_cpp_reader.groovy
b/regression-test/suites/external_table_p0/paimon/test_paimon_cpp_reader.groovy
new file mode 100644
index 00000000000..64ef323add6
--- /dev/null
+++
b/regression-test/suites/external_table_p0/paimon/test_paimon_cpp_reader.groovy
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_paimon_cpp_reader", "p0,external") {
+ String enabled = context.config.otherConfigs.get("enablePaimonTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disabled paimon test")
+ return
+ }
+
+ String catalogName = "test_paimon_cpp_reader"
+ String hdfsPort = context.config.otherConfigs.get("hive2HdfsPort")
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+ try {
+ sql """drop catalog if exists ${catalogName}"""
+ sql """create catalog if not exists ${catalogName} properties (
+ "type" = "paimon",
+ "paimon.catalog.type" = "filesystem",
+ "warehouse" =
"hdfs://${externalEnvIp}:${hdfsPort}/user/doris/paimon1"
+ );"""
+ sql """switch ${catalogName}"""
+ sql """use db1"""
+ // Do not force JNI; keep default selection behavior.
+ sql """set force_jni_scanner=false"""
+
+ def testQueries = [
+ """select c1 from complex_all order by c1""",
+ """select c1 from complex_all where c1 >= 2 order by c1""",
+ """select * from all_table order by c1""",
+ """select * from all_table_with_parquet where c13 like '13%'
order by c1""",
+ """select * from complex_tab order by c1""",
+ """select c3['a_test'], c3['b_test'], c3['bbb'], c3['ccc']
from complex_tab order by c3['a_test'], c3['b_test']""",
+ """select array_max(c2) c from complex_tab order by c""",
+ """select c20[0] c from complex_all order by c""",
+ """select * from deletion_vector_orc""",
+ """select * from deletion_vector_parquet"""
+ ]
+
+ // Default path is JNI when enable_paimon_cpp_reader=false.
+ sql """set enable_paimon_cpp_reader=false"""
+ def jniResults = testQueries.collect { query -> sql(query) }
+
+ sql """set enable_paimon_cpp_reader=true"""
+ def cppResults = testQueries.collect { query -> sql(query) }
+
+ assertTrue(cppResults[0].size() > 0)
+ for (int i = 0; i < testQueries.size(); i++) {
+ assertEquals(jniResults[i].toString(), cppResults[i].toString())
+ }
+ } finally {
+ sql """set enable_paimon_cpp_reader=false"""
+ sql """set force_jni_scanner=false"""
+ sql """drop catalog if exists ${catalogName}"""
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]