This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new f894c5092bd branch-4.1: [fix](be) Fix sliced FixedSizeBinary Arrow
string reads #64829 (#64969)
f894c5092bd is described below
commit f894c5092bd17860d486cc7853dffeb8ebc57301
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Jun 30 14:04:37 2026 +0800
branch-4.1: [fix](be) Fix sliced FixedSizeBinary Arrow string reads #64829
(#64969)
Cherry-picked from #64829
Co-authored-by: Mryange <[email protected]>
---
.../core/data_type_serde/data_type_jsonb_serde.cpp | 5 +--
.../data_type_serde/data_type_string_serde.cpp | 5 +--
.../data_type_serde/data_type_jsonb_serde_test.cpp | 48 +++++++++++++++++++++-
.../data_type_serde_string_test.cpp | 38 +++++++++++++++++
4 files changed, 89 insertions(+), 7 deletions(-)
diff --git a/be/src/core/data_type_serde/data_type_jsonb_serde.cpp
b/be/src/core/data_type_serde/data_type_jsonb_serde.cpp
index 90660dac372..7dc0e4cfd30 100644
--- a/be/src/core/data_type_serde/data_type_jsonb_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_jsonb_serde.cpp
@@ -150,12 +150,11 @@ Status
DataTypeJsonbSerDe::read_column_from_arrow(IColumn& column, const arrow::
} else if (arrow_array->type_id() == arrow::Type::FIXED_SIZE_BINARY) {
const auto* concrete_array = dynamic_cast<const
arrow::FixedSizeBinaryArray*>(arrow_array);
uint32_t width = concrete_array->byte_width();
- const auto* array_data = concrete_array->GetValue(start);
JsonBinaryValue value;
- for (size_t offset_i = 0; offset_i < end - start; ++offset_i) {
+ for (auto offset_i = start; offset_i < end; ++offset_i) {
if (!concrete_array->IsNull(offset_i)) {
- const auto* raw_data = array_data + (offset_i * width);
+ const auto* raw_data = concrete_array->GetValue(offset_i);
RETURN_IF_ERROR(
value.from_json_string(reinterpret_cast<const
char*>(raw_data), width));
diff --git a/be/src/core/data_type_serde/data_type_string_serde.cpp
b/be/src/core/data_type_serde/data_type_string_serde.cpp
index e30fbea7c30..72ee74a3ece 100644
--- a/be/src/core/data_type_serde/data_type_string_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_string_serde.cpp
@@ -282,11 +282,10 @@ Status
DataTypeStringSerDeBase<ColumnType>::read_column_from_arrow(
} else if (arrow_array->type_id() == arrow::Type::FIXED_SIZE_BINARY) {
const auto* concrete_array = dynamic_cast<const
arrow::FixedSizeBinaryArray*>(arrow_array);
uint32_t width = concrete_array->byte_width();
- const auto* array_data = concrete_array->GetValue(start);
- for (size_t offset_i = 0; offset_i < end - start; ++offset_i) {
+ for (auto offset_i = start; offset_i < end; ++offset_i) {
if (!concrete_array->IsNull(offset_i)) {
- const auto* raw_data = array_data + (offset_i * width);
+ const auto* raw_data = concrete_array->GetValue(offset_i);
assert_cast<ColumnType&>(column).insert_data((char*)raw_data,
width);
} else {
assert_cast<ColumnType&>(column).insert_default();
diff --git a/be/test/core/data_type_serde/data_type_jsonb_serde_test.cpp
b/be/test/core/data_type_serde/data_type_jsonb_serde_test.cpp
index 038f520487b..176369da58f 100644
--- a/be/test/core/data_type_serde/data_type_jsonb_serde_test.cpp
+++ b/be/test/core/data_type_serde/data_type_jsonb_serde_test.cpp
@@ -25,7 +25,9 @@
#include <lz4/lz4.h>
#include <streamvbyte.h>
+#include <array>
#include <cstddef>
+#include <cstring>
#include <iostream>
#include <limits>
#include <type_traits>
@@ -275,4 +277,48 @@ TEST_F(DataTypeJsonbSerDeTest, ArrowMemNotAligned) {
EXPECT_TRUE(st.ok());
}
-} // namespace doris
\ No newline at end of file
+TEST_F(DataTypeJsonbSerDeTest,
FixedSizeBinaryReadColumnFromArrowWithNonZeroStart) {
+ constexpr int64_t num_elements = 4;
+ constexpr int byte_width = 7;
+ auto data_buf_result = arrow::AllocateBuffer(num_elements * byte_width);
+ ASSERT_TRUE(data_buf_result.ok());
+ std::shared_ptr<arrow::Buffer> data_buf =
std::move(data_buf_result.ValueOrDie());
+
+ auto* data = data_buf->mutable_data();
+ const std::array<std::string, num_elements> values = {"{\"a\":1}",
"{\"b\":2}", "{\"c\":3}",
+ "{\"d\":4}"};
+ for (int64_t i = 0; i < num_elements; ++i) {
+ memcpy(data + i * byte_width, values[i].data(), byte_width);
+ }
+
+ auto null_bitmap_result = arrow::AllocateBuffer(1);
+ ASSERT_TRUE(null_bitmap_result.ok());
+ std::shared_ptr<arrow::Buffer> null_bitmap =
std::move(null_bitmap_result.ValueOrDie());
+ memset(null_bitmap->mutable_data(), 0, null_bitmap->size());
+ arrow::bit_util::ClearBit(null_bitmap->mutable_data(), 0);
+ arrow::bit_util::SetBit(null_bitmap->mutable_data(), 1);
+ arrow::bit_util::SetBit(null_bitmap->mutable_data(), 2);
+ arrow::bit_util::SetBit(null_bitmap->mutable_data(), 3);
+
+ auto type = std::make_shared<arrow::FixedSizeBinaryType>(byte_width);
+ auto arr = std::make_shared<arrow::FixedSizeBinaryArray>(type,
num_elements, data_buf,
+ null_bitmap, 1);
+
+ auto column = ColumnString::create();
+ cctz::time_zone tz;
+ auto st = serde_jsonb->read_column_from_arrow(*column, arr.get(), 1, 4,
tz);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(column->size(), 3);
+
+ DataTypeSerDe::FormatOptions options;
+ for (size_t i = 0; i < column->size(); ++i) {
+ auto serialized_column = ColumnString::create();
+ VectorBufferWriter buffer_writer(*serialized_column);
+ st = serde_jsonb->serialize_one_cell_to_json(*column, i,
buffer_writer, options);
+ ASSERT_TRUE(st.ok());
+ buffer_writer.commit();
+ EXPECT_EQ(serialized_column->get_data_at(0).to_string(), values[i +
1]);
+ }
+}
+
+} // namespace doris
diff --git a/be/test/core/data_type_serde/data_type_serde_string_test.cpp
b/be/test/core/data_type_serde/data_type_serde_string_test.cpp
index 3708145e391..0cdd20a94de 100644
--- a/be/test/core/data_type_serde/data_type_serde_string_test.cpp
+++ b/be/test/core/data_type_serde/data_type_serde_string_test.cpp
@@ -23,7 +23,9 @@
#include <lz4/lz4.h>
#include <streamvbyte.h>
+#include <array>
#include <cstddef>
+#include <cstring>
#include <iostream>
#include <limits>
#include <type_traits>
@@ -319,4 +321,40 @@ TEST_F(DataTypeStringSerDeTest,
ArrowMemNotAlignedNestedArr) {
EXPECT_TRUE(st.ok());
}
+TEST_F(DataTypeStringSerDeTest,
FixedSizeBinaryReadColumnFromArrowWithNonZeroStart) {
+ constexpr int64_t num_elements = 4;
+ constexpr int byte_width = 4;
+ auto data_buf_result = arrow::AllocateBuffer(num_elements * byte_width);
+ ASSERT_TRUE(data_buf_result.ok());
+ std::shared_ptr<arrow::Buffer> data_buf =
std::move(data_buf_result.ValueOrDie());
+
+ auto* data = data_buf->mutable_data();
+ const std::array<std::string, num_elements> values = {"aaaa", "bbbb",
"cccc", "dddd"};
+ for (int64_t i = 0; i < num_elements; ++i) {
+ memcpy(data + i * byte_width, values[i].data(), byte_width);
+ }
+
+ auto null_bitmap_result = arrow::AllocateBuffer(1);
+ ASSERT_TRUE(null_bitmap_result.ok());
+ std::shared_ptr<arrow::Buffer> null_bitmap =
std::move(null_bitmap_result.ValueOrDie());
+ memset(null_bitmap->mutable_data(), 0, null_bitmap->size());
+ arrow::bit_util::ClearBit(null_bitmap->mutable_data(), 0);
+ arrow::bit_util::SetBit(null_bitmap->mutable_data(), 1);
+ arrow::bit_util::SetBit(null_bitmap->mutable_data(), 2);
+ arrow::bit_util::SetBit(null_bitmap->mutable_data(), 3);
+
+ auto type = std::make_shared<arrow::FixedSizeBinaryType>(byte_width);
+ auto arr = std::make_shared<arrow::FixedSizeBinaryArray>(type,
num_elements, data_buf,
+ null_bitmap, 1);
+
+ auto column = ColumnString::create();
+ cctz::time_zone tz;
+ auto st = serde_str->read_column_from_arrow(*column, arr.get(), 1, 4, tz);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(column->size(), 3);
+ EXPECT_EQ(column->get_data_at(0).to_string(), "bbbb");
+ EXPECT_EQ(column->get_data_at(1).to_string(), "cccc");
+ EXPECT_EQ(column->get_data_at(2).to_string(), "dddd");
+}
+
} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]