This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new f894c5092bd branch-4.1: [fix](be) Fix sliced FixedSizeBinary Arrow 
string reads #64829 (#64969)
f894c5092bd is described below

commit f894c5092bd17860d486cc7853dffeb8ebc57301
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Jun 30 14:04:37 2026 +0800

    branch-4.1: [fix](be) Fix sliced FixedSizeBinary Arrow string reads #64829 
(#64969)
    
    Cherry-picked from #64829
    
    Co-authored-by: Mryange <[email protected]>
---
 .../core/data_type_serde/data_type_jsonb_serde.cpp |  5 +--
 .../data_type_serde/data_type_string_serde.cpp     |  5 +--
 .../data_type_serde/data_type_jsonb_serde_test.cpp | 48 +++++++++++++++++++++-
 .../data_type_serde_string_test.cpp                | 38 +++++++++++++++++
 4 files changed, 89 insertions(+), 7 deletions(-)

diff --git a/be/src/core/data_type_serde/data_type_jsonb_serde.cpp 
b/be/src/core/data_type_serde/data_type_jsonb_serde.cpp
index 90660dac372..7dc0e4cfd30 100644
--- a/be/src/core/data_type_serde/data_type_jsonb_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_jsonb_serde.cpp
@@ -150,12 +150,11 @@ Status 
DataTypeJsonbSerDe::read_column_from_arrow(IColumn& column, const arrow::
     } else if (arrow_array->type_id() == arrow::Type::FIXED_SIZE_BINARY) {
         const auto* concrete_array = dynamic_cast<const 
arrow::FixedSizeBinaryArray*>(arrow_array);
         uint32_t width = concrete_array->byte_width();
-        const auto* array_data = concrete_array->GetValue(start);
 
         JsonBinaryValue value;
-        for (size_t offset_i = 0; offset_i < end - start; ++offset_i) {
+        for (auto offset_i = start; offset_i < end; ++offset_i) {
             if (!concrete_array->IsNull(offset_i)) {
-                const auto* raw_data = array_data + (offset_i * width);
+                const auto* raw_data = concrete_array->GetValue(offset_i);
 
                 RETURN_IF_ERROR(
                         value.from_json_string(reinterpret_cast<const 
char*>(raw_data), width));
diff --git a/be/src/core/data_type_serde/data_type_string_serde.cpp 
b/be/src/core/data_type_serde/data_type_string_serde.cpp
index e30fbea7c30..72ee74a3ece 100644
--- a/be/src/core/data_type_serde/data_type_string_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_string_serde.cpp
@@ -282,11 +282,10 @@ Status 
DataTypeStringSerDeBase<ColumnType>::read_column_from_arrow(
     } else if (arrow_array->type_id() == arrow::Type::FIXED_SIZE_BINARY) {
         const auto* concrete_array = dynamic_cast<const 
arrow::FixedSizeBinaryArray*>(arrow_array);
         uint32_t width = concrete_array->byte_width();
-        const auto* array_data = concrete_array->GetValue(start);
 
-        for (size_t offset_i = 0; offset_i < end - start; ++offset_i) {
+        for (auto offset_i = start; offset_i < end; ++offset_i) {
             if (!concrete_array->IsNull(offset_i)) {
-                const auto* raw_data = array_data + (offset_i * width);
+                const auto* raw_data = concrete_array->GetValue(offset_i);
                 assert_cast<ColumnType&>(column).insert_data((char*)raw_data, 
width);
             } else {
                 assert_cast<ColumnType&>(column).insert_default();
diff --git a/be/test/core/data_type_serde/data_type_jsonb_serde_test.cpp 
b/be/test/core/data_type_serde/data_type_jsonb_serde_test.cpp
index 038f520487b..176369da58f 100644
--- a/be/test/core/data_type_serde/data_type_jsonb_serde_test.cpp
+++ b/be/test/core/data_type_serde/data_type_jsonb_serde_test.cpp
@@ -25,7 +25,9 @@
 #include <lz4/lz4.h>
 #include <streamvbyte.h>
 
+#include <array>
 #include <cstddef>
+#include <cstring>
 #include <iostream>
 #include <limits>
 #include <type_traits>
@@ -275,4 +277,48 @@ TEST_F(DataTypeJsonbSerDeTest, ArrowMemNotAligned) {
     EXPECT_TRUE(st.ok());
 }
 
-} // namespace doris
\ No newline at end of file
+TEST_F(DataTypeJsonbSerDeTest, 
FixedSizeBinaryReadColumnFromArrowWithNonZeroStart) {
+    constexpr int64_t num_elements = 4;
+    constexpr int byte_width = 7;
+    auto data_buf_result = arrow::AllocateBuffer(num_elements * byte_width);
+    ASSERT_TRUE(data_buf_result.ok());
+    std::shared_ptr<arrow::Buffer> data_buf = 
std::move(data_buf_result.ValueOrDie());
+
+    auto* data = data_buf->mutable_data();
+    const std::array<std::string, num_elements> values = {"{\"a\":1}", 
"{\"b\":2}", "{\"c\":3}",
+                                                          "{\"d\":4}"};
+    for (int64_t i = 0; i < num_elements; ++i) {
+        memcpy(data + i * byte_width, values[i].data(), byte_width);
+    }
+
+    auto null_bitmap_result = arrow::AllocateBuffer(1);
+    ASSERT_TRUE(null_bitmap_result.ok());
+    std::shared_ptr<arrow::Buffer> null_bitmap = 
std::move(null_bitmap_result.ValueOrDie());
+    memset(null_bitmap->mutable_data(), 0, null_bitmap->size());
+    arrow::bit_util::ClearBit(null_bitmap->mutable_data(), 0);
+    arrow::bit_util::SetBit(null_bitmap->mutable_data(), 1);
+    arrow::bit_util::SetBit(null_bitmap->mutable_data(), 2);
+    arrow::bit_util::SetBit(null_bitmap->mutable_data(), 3);
+
+    auto type = std::make_shared<arrow::FixedSizeBinaryType>(byte_width);
+    auto arr = std::make_shared<arrow::FixedSizeBinaryArray>(type, 
num_elements, data_buf,
+                                                             null_bitmap, 1);
+
+    auto column = ColumnString::create();
+    cctz::time_zone tz;
+    auto st = serde_jsonb->read_column_from_arrow(*column, arr.get(), 1, 4, 
tz);
+    ASSERT_TRUE(st.ok());
+    ASSERT_EQ(column->size(), 3);
+
+    DataTypeSerDe::FormatOptions options;
+    for (size_t i = 0; i < column->size(); ++i) {
+        auto serialized_column = ColumnString::create();
+        VectorBufferWriter buffer_writer(*serialized_column);
+        st = serde_jsonb->serialize_one_cell_to_json(*column, i, 
buffer_writer, options);
+        ASSERT_TRUE(st.ok());
+        buffer_writer.commit();
+        EXPECT_EQ(serialized_column->get_data_at(0).to_string(), values[i + 
1]);
+    }
+}
+
+} // namespace doris
diff --git a/be/test/core/data_type_serde/data_type_serde_string_test.cpp 
b/be/test/core/data_type_serde/data_type_serde_string_test.cpp
index 3708145e391..0cdd20a94de 100644
--- a/be/test/core/data_type_serde/data_type_serde_string_test.cpp
+++ b/be/test/core/data_type_serde/data_type_serde_string_test.cpp
@@ -23,7 +23,9 @@
 #include <lz4/lz4.h>
 #include <streamvbyte.h>
 
+#include <array>
 #include <cstddef>
+#include <cstring>
 #include <iostream>
 #include <limits>
 #include <type_traits>
@@ -319,4 +321,40 @@ TEST_F(DataTypeStringSerDeTest, 
ArrowMemNotAlignedNestedArr) {
     EXPECT_TRUE(st.ok());
 }
 
+TEST_F(DataTypeStringSerDeTest, 
FixedSizeBinaryReadColumnFromArrowWithNonZeroStart) {
+    constexpr int64_t num_elements = 4;
+    constexpr int byte_width = 4;
+    auto data_buf_result = arrow::AllocateBuffer(num_elements * byte_width);
+    ASSERT_TRUE(data_buf_result.ok());
+    std::shared_ptr<arrow::Buffer> data_buf = 
std::move(data_buf_result.ValueOrDie());
+
+    auto* data = data_buf->mutable_data();
+    const std::array<std::string, num_elements> values = {"aaaa", "bbbb", 
"cccc", "dddd"};
+    for (int64_t i = 0; i < num_elements; ++i) {
+        memcpy(data + i * byte_width, values[i].data(), byte_width);
+    }
+
+    auto null_bitmap_result = arrow::AllocateBuffer(1);
+    ASSERT_TRUE(null_bitmap_result.ok());
+    std::shared_ptr<arrow::Buffer> null_bitmap = 
std::move(null_bitmap_result.ValueOrDie());
+    memset(null_bitmap->mutable_data(), 0, null_bitmap->size());
+    arrow::bit_util::ClearBit(null_bitmap->mutable_data(), 0);
+    arrow::bit_util::SetBit(null_bitmap->mutable_data(), 1);
+    arrow::bit_util::SetBit(null_bitmap->mutable_data(), 2);
+    arrow::bit_util::SetBit(null_bitmap->mutable_data(), 3);
+
+    auto type = std::make_shared<arrow::FixedSizeBinaryType>(byte_width);
+    auto arr = std::make_shared<arrow::FixedSizeBinaryArray>(type, 
num_elements, data_buf,
+                                                             null_bitmap, 1);
+
+    auto column = ColumnString::create();
+    cctz::time_zone tz;
+    auto st = serde_str->read_column_from_arrow(*column, arr.get(), 1, 4, tz);
+    ASSERT_TRUE(st.ok());
+    ASSERT_EQ(column->size(), 3);
+    EXPECT_EQ(column->get_data_at(0).to_string(), "bbbb");
+    EXPECT_EQ(column->get_data_at(1).to_string(), "cccc");
+    EXPECT_EQ(column->get_data_at(2).to_string(), "dddd");
+}
+
 } // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to