This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new e79c08eda21 branch-4.1: [fix](be) Correct Arrow UTF8/String size limit 
#63137 (#63148)
e79c08eda21 is described below

commit e79c08eda21d4599caa79966c3ccd4a742cc7661
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue May 12 22:12:33 2026 +0800

    branch-4.1: [fix](be) Correct Arrow UTF8/String size limit #63137 (#63148)
    
    Cherry-picked from #63137
    
    Co-authored-by: Jerry Hu <[email protected]>
---
 be/src/format/arrow/arrow_row_batch.h              |  2 +-
 .../data/arrow_flight_sql_p0/test_select.out       |  8 ++++-
 .../suites/arrow_flight_sql_p0/test_select.groovy  | 36 +++++++++++++++++++++-
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/be/src/format/arrow/arrow_row_batch.h 
b/be/src/format/arrow/arrow_row_batch.h
index 3c572e18aa7..4c1a29ac2e6 100644
--- a/be/src/format/arrow/arrow_row_batch.h
+++ b/be/src/format/arrow/arrow_row_batch.h
@@ -38,7 +38,7 @@ class Schema;
 
 namespace doris {
 
-constexpr size_t MAX_ARROW_UTF8 = (1ULL << 21); // 2G
+constexpr size_t MAX_ARROW_UTF8 = (1ULL << 31); // 2G
 
 class RowDescriptor;
 
diff --git a/regression-test/data/arrow_flight_sql_p0/test_select.out 
b/regression-test/data/arrow_flight_sql_p0/test_select.out
index f2f4b86bbf5..62888cd3dfc 100644
--- a/regression-test/data/arrow_flight_sql_p0/test_select.out
+++ b/regression-test/data/arrow_flight_sql_p0/test_select.out
@@ -5,4 +5,10 @@
 -- !arrow_flight_sql_datetime --
 333    plsql333        2024-07-21 12:00:00.123456      2024-07-21 12:00:00.0
 222    plsql222        2024-07-20 12:00:00.123456      2024-07-20 12:00:00.0
-111    plsql111        2024-07-19 12:00:00.123456      2024-07-19 12:00:00.0
\ No newline at end of file
+111    plsql111        2024-07-19 12:00:00.123456      2024-07-19 12:00:00.0
+
+-- !arrow_flight_sql_jsonb --
+1      {"k1":1,"k2":"v2"}
+2      [1,2,{"nested":true}]
+3      \N
+
diff --git a/regression-test/suites/arrow_flight_sql_p0/test_select.groovy 
b/regression-test/suites/arrow_flight_sql_p0/test_select.groovy
index 950fb4af7e9..85f119fc2c3 100644
--- a/regression-test/suites/arrow_flight_sql_p0/test_select.groovy
+++ b/regression-test/suites/arrow_flight_sql_p0/test_select.groovy
@@ -26,7 +26,7 @@ suite("test_select", "arrow_flight_sql") {
     sql """INSERT INTO ${tableName} VALUES(222, "plsql222")"""
     sql """INSERT INTO ${tableName} VALUES(333, "plsql333")"""
     sql """INSERT INTO ${tableName} VALUES(111, "plsql333")"""
-    
+
     qt_arrow_flight_sql "select sum(id) as a, count(1) as b from ${tableName}"
 
     tableName = "test_select_datetime"
@@ -40,4 +40,38 @@ suite("test_select", "arrow_flight_sql") {
     sql """INSERT INTO ${tableName} VALUES(333, "plsql333","2024-07-21 
12:00:00.123456","2024-07-21 12:00:00")"""
 
     qt_arrow_flight_sql_datetime "select * from ${tableName} order by id desc"
+
+    tableName = "test_select_jsonb"
+    sql "DROP TABLE IF EXISTS ${tableName}"
+    sql """
+        create table ${tableName} (id int, payload jsonb) DUPLICATE key(`id`) 
distributed by hash (`id`) buckets 4
+        properties ("replication_num"="1");
+        """
+    sql """
+        INSERT INTO ${tableName} VALUES
+            (1, '{"k1": 1, "k2": "v2"}'),
+            (2, '[1, 2, {"nested": true}]'),
+            (3, NULL)
+        """
+
+    qt_arrow_flight_sql_jsonb "select id, payload from ${tableName} order by 
id"
+
+    def largeJsonValueSize = 2100000
+    sql """
+        INSERT INTO ${tableName}
+        SELECT 4, CAST(CONCAT('{"large":"', REPEAT('x', 
${largeJsonValueSize}), '"}') AS JSONB)
+        """
+
+    // This row exceeds MAX_ARROW_UTF8 and exercises JSONB -> LargeString 
serialization.
+    def largeJsonbResult = arrow_flight_sql """
+        select payload, length(cast(payload as string)) from ${tableName} 
where id = 4
+        """
+    assertEquals(1, largeJsonbResult.size())
+    assertEquals(2, largeJsonbResult[0].size())
+    def expectedLargeJsonbSize = largeJsonValueSize + '{"large":""}'.length()
+    def largeJsonb = largeJsonbResult[0][0].toString()
+    assertEquals(expectedLargeJsonbSize, largeJsonb.length())
+    assertEquals(expectedLargeJsonbSize, (largeJsonbResult[0][1] as 
Number).intValue())
+    assertTrue(largeJsonb.startsWith('{"large":"'))
+    assertTrue(largeJsonb.endsWith('"}'))
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to