This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new e79c08eda21 branch-4.1: [fix](be) Correct Arrow UTF8/String size limit
#63137 (#63148)
e79c08eda21 is described below
commit e79c08eda21d4599caa79966c3ccd4a742cc7661
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue May 12 22:12:33 2026 +0800
branch-4.1: [fix](be) Correct Arrow UTF8/String size limit #63137 (#63148)
Cherry-picked from #63137
Co-authored-by: Jerry Hu <[email protected]>
---
be/src/format/arrow/arrow_row_batch.h | 2 +-
.../data/arrow_flight_sql_p0/test_select.out | 8 ++++-
.../suites/arrow_flight_sql_p0/test_select.groovy | 36 +++++++++++++++++++++-
3 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/be/src/format/arrow/arrow_row_batch.h
b/be/src/format/arrow/arrow_row_batch.h
index 3c572e18aa7..4c1a29ac2e6 100644
--- a/be/src/format/arrow/arrow_row_batch.h
+++ b/be/src/format/arrow/arrow_row_batch.h
@@ -38,7 +38,7 @@ class Schema;
namespace doris {
-constexpr size_t MAX_ARROW_UTF8 = (1ULL << 21); // 2G
+constexpr size_t MAX_ARROW_UTF8 = (1ULL << 31); // 2G
class RowDescriptor;
diff --git a/regression-test/data/arrow_flight_sql_p0/test_select.out
b/regression-test/data/arrow_flight_sql_p0/test_select.out
index f2f4b86bbf5..62888cd3dfc 100644
--- a/regression-test/data/arrow_flight_sql_p0/test_select.out
+++ b/regression-test/data/arrow_flight_sql_p0/test_select.out
@@ -5,4 +5,10 @@
-- !arrow_flight_sql_datetime --
333 plsql333 2024-07-21 12:00:00.123456 2024-07-21 12:00:00.0
222 plsql222 2024-07-20 12:00:00.123456 2024-07-20 12:00:00.0
-111 plsql111 2024-07-19 12:00:00.123456 2024-07-19 12:00:00.0
\ No newline at end of file
+111 plsql111 2024-07-19 12:00:00.123456 2024-07-19 12:00:00.0
+
+-- !arrow_flight_sql_jsonb --
+1 {"k1":1,"k2":"v2"}
+2 [1,2,{"nested":true}]
+3 \N
+
diff --git a/regression-test/suites/arrow_flight_sql_p0/test_select.groovy
b/regression-test/suites/arrow_flight_sql_p0/test_select.groovy
index 950fb4af7e9..85f119fc2c3 100644
--- a/regression-test/suites/arrow_flight_sql_p0/test_select.groovy
+++ b/regression-test/suites/arrow_flight_sql_p0/test_select.groovy
@@ -26,7 +26,7 @@ suite("test_select", "arrow_flight_sql") {
sql """INSERT INTO ${tableName} VALUES(222, "plsql222")"""
sql """INSERT INTO ${tableName} VALUES(333, "plsql333")"""
sql """INSERT INTO ${tableName} VALUES(111, "plsql333")"""
-
+
qt_arrow_flight_sql "select sum(id) as a, count(1) as b from ${tableName}"
tableName = "test_select_datetime"
@@ -40,4 +40,38 @@ suite("test_select", "arrow_flight_sql") {
sql """INSERT INTO ${tableName} VALUES(333, "plsql333","2024-07-21
12:00:00.123456","2024-07-21 12:00:00")"""
qt_arrow_flight_sql_datetime "select * from ${tableName} order by id desc"
+
+ tableName = "test_select_jsonb"
+ sql "DROP TABLE IF EXISTS ${tableName}"
+ sql """
+ create table ${tableName} (id int, payload jsonb) DUPLICATE key(`id`)
distributed by hash (`id`) buckets 4
+ properties ("replication_num"="1");
+ """
+ sql """
+ INSERT INTO ${tableName} VALUES
+ (1, '{"k1": 1, "k2": "v2"}'),
+ (2, '[1, 2, {"nested": true}]'),
+ (3, NULL)
+ """
+
+ qt_arrow_flight_sql_jsonb "select id, payload from ${tableName} order by
id"
+
+ def largeJsonValueSize = 2100000
+ sql """
+ INSERT INTO ${tableName}
+ SELECT 4, CAST(CONCAT('{"large":"', REPEAT('x',
${largeJsonValueSize}), '"}') AS JSONB)
+ """
+
+ // This row exceeds MAX_ARROW_UTF8 and exercises JSONB -> LargeString
serialization.
+ def largeJsonbResult = arrow_flight_sql """
+ select payload, length(cast(payload as string)) from ${tableName}
where id = 4
+ """
+ assertEquals(1, largeJsonbResult.size())
+ assertEquals(2, largeJsonbResult[0].size())
+ def expectedLargeJsonbSize = largeJsonValueSize + '{"large":""}'.length()
+ def largeJsonb = largeJsonbResult[0][0].toString()
+ assertEquals(expectedLargeJsonbSize, largeJsonb.length())
+ assertEquals(expectedLargeJsonbSize, (largeJsonbResult[0][1] as
Number).intValue())
+ assertTrue(largeJsonb.startsWith('{"large":"'))
+ assertTrue(largeJsonb.endsWith('"}'))
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]