This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ca858fd4a87 [fix](be) Correct Arrow UTF8/String size limit (#63137)
ca858fd4a87 is described below
commit ca858fd4a8752d25cfef15a09302022f7cfa2a92
Author: Jerry Hu <[email protected]>
AuthorDate: Mon May 11 21:40:16 2026 +0800
[fix](be) Correct Arrow UTF8/String size limit (#63137)
Issue Number: None
Related PR: None
Problem Summary: Fix the Arrow UTF8 max size constant so it matches the
documented 2G limit instead of a much smaller value.
None
- Test: No need to test
- No need to test (header constant fix only)
- Behavior changed: Yes (Arrow UTF8 size limit now matches the intended
2G threshold)
- Does this need documentation: No
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
be/src/format/arrow/arrow_row_batch.h | 2 +-
.../data/arrow_flight_sql_p0/test_select.out | 8 ++++-
.../suites/arrow_flight_sql_p0/test_select.groovy | 36 +++++++++++++++++++++-
3 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/be/src/format/arrow/arrow_row_batch.h
b/be/src/format/arrow/arrow_row_batch.h
index e0a37f6bf42..e7b77ed707b 100644
--- a/be/src/format/arrow/arrow_row_batch.h
+++ b/be/src/format/arrow/arrow_row_batch.h
@@ -39,7 +39,7 @@ class Schema;
namespace doris {
-constexpr size_t MAX_ARROW_UTF8 = (1ULL << 21); // 2G
+constexpr size_t MAX_ARROW_UTF8 = (1ULL << 31); // 2G
class RowDescriptor;
diff --git a/regression-test/data/arrow_flight_sql_p0/test_select.out
b/regression-test/data/arrow_flight_sql_p0/test_select.out
index f2f4b86bbf5..62888cd3dfc 100644
--- a/regression-test/data/arrow_flight_sql_p0/test_select.out
+++ b/regression-test/data/arrow_flight_sql_p0/test_select.out
@@ -5,4 +5,10 @@
-- !arrow_flight_sql_datetime --
333 plsql333 2024-07-21 12:00:00.123456 2024-07-21 12:00:00.0
222 plsql222 2024-07-20 12:00:00.123456 2024-07-20 12:00:00.0
-111 plsql111 2024-07-19 12:00:00.123456 2024-07-19 12:00:00.0
\ No newline at end of file
+111 plsql111 2024-07-19 12:00:00.123456 2024-07-19 12:00:00.0
+
+-- !arrow_flight_sql_jsonb --
+1 {"k1":1,"k2":"v2"}
+2 [1,2,{"nested":true}]
+3 \N
+
diff --git a/regression-test/suites/arrow_flight_sql_p0/test_select.groovy
b/regression-test/suites/arrow_flight_sql_p0/test_select.groovy
index 950fb4af7e9..85f119fc2c3 100644
--- a/regression-test/suites/arrow_flight_sql_p0/test_select.groovy
+++ b/regression-test/suites/arrow_flight_sql_p0/test_select.groovy
@@ -26,7 +26,7 @@ suite("test_select", "arrow_flight_sql") {
sql """INSERT INTO ${tableName} VALUES(222, "plsql222")"""
sql """INSERT INTO ${tableName} VALUES(333, "plsql333")"""
sql """INSERT INTO ${tableName} VALUES(111, "plsql333")"""
-
+
qt_arrow_flight_sql "select sum(id) as a, count(1) as b from ${tableName}"
tableName = "test_select_datetime"
@@ -40,4 +40,38 @@ suite("test_select", "arrow_flight_sql") {
sql """INSERT INTO ${tableName} VALUES(333, "plsql333","2024-07-21
12:00:00.123456","2024-07-21 12:00:00")"""
qt_arrow_flight_sql_datetime "select * from ${tableName} order by id desc"
+
+ tableName = "test_select_jsonb"
+ sql "DROP TABLE IF EXISTS ${tableName}"
+ sql """
+ create table ${tableName} (id int, payload jsonb) DUPLICATE key(`id`)
distributed by hash (`id`) buckets 4
+ properties ("replication_num"="1");
+ """
+ sql """
+ INSERT INTO ${tableName} VALUES
+ (1, '{"k1": 1, "k2": "v2"}'),
+ (2, '[1, 2, {"nested": true}]'),
+ (3, NULL)
+ """
+
+ qt_arrow_flight_sql_jsonb "select id, payload from ${tableName} order by
id"
+
+ def largeJsonValueSize = 2100000
+ sql """
+ INSERT INTO ${tableName}
+ SELECT 4, CAST(CONCAT('{"large":"', REPEAT('x',
${largeJsonValueSize}), '"}') AS JSONB)
+ """
+
+ // This row exceeds MAX_ARROW_UTF8 and exercises JSONB -> LargeString
serialization.
+ def largeJsonbResult = arrow_flight_sql """
+ select payload, length(cast(payload as string)) from ${tableName}
where id = 4
+ """
+ assertEquals(1, largeJsonbResult.size())
+ assertEquals(2, largeJsonbResult[0].size())
+ def expectedLargeJsonbSize = largeJsonValueSize + '{"large":""}'.length()
+ def largeJsonb = largeJsonbResult[0][0].toString()
+ assertEquals(expectedLargeJsonbSize, largeJsonb.length())
+ assertEquals(expectedLargeJsonbSize, (largeJsonbResult[0][1] as
Number).intValue())
+ assertTrue(largeJsonb.startsWith('{"large":"'))
+ assertTrue(largeJsonb.endsWith('"}'))
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]