This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 3b0175d190 [FIX](map)fix map offsets set next_array_item_rowid (#23251)
3b0175d190 is described below
commit 3b0175d190c85f55edf6bf502720a1abdff9e0fc
Author: amory <[email protected]>
AuthorDate: Mon Aug 21 18:01:10 2023 +0800
[FIX](map)fix map offsets set next_array_item_rowid (#23251)
---
be/src/olap/rowset/segment_v2/column_reader.cpp | 26 +++++++++++++++++++------
be/src/olap/rowset/segment_v2/column_writer.cpp | 20 ++++++++++---------
2 files changed, 31 insertions(+), 15 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 03f29a6451..30d593ba80 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -675,6 +675,7 @@ Status MapFileColumnIterator::next_batch(size_t* n,
vectorized::MutableColumnPtr
auto& column_offsets =
static_cast<vectorized::ColumnArray::ColumnOffsets&>(*column_offsets_ptr);
RETURN_IF_ERROR(_offsets_iterator->_calculate_offsets(start,
column_offsets));
+ DCHECK(column_offsets.get_data().back() >= column_offsets.get_data()[start
- 1]);
size_t num_items =
column_offsets.get_data().back() - column_offsets.get_data()[start
- 1]; // -1 is valid
auto key_ptr = column_map->get_keys().assume_mutable();
@@ -809,20 +810,33 @@ Status
OffsetFileColumnIterator::_peek_one_offset(ordinal_t* offset) {
return Status::OK();
}
+/**
+ * first_storage_offset read from page should smaller than
next_storage_offset which here call _peek_one_offset from page,
+ and first_column_offset is keep in memory data which is different
dimension with (first_storage_offset and next_storage_offset)
+ eg. step1. read page: first_storage_offset = 16382
+ step2. read page below with _peek_one_offset(&last_offset):
last_offset = 16387
+ step3. first_offset = 126 which is calculate in column offsets
+ for loop column offsets element in size
+ we can calculate from first_storage_offset to next_storage_offset
one by one to fill with offsets_data in memory column offsets
+ * @param start
+ * @param column_offsets
+ * @return
+ */
Status OffsetFileColumnIterator::_calculate_offsets(
ssize_t start, vectorized::ColumnArray::ColumnOffsets& column_offsets)
{
- ordinal_t last_offset = 0;
- RETURN_IF_ERROR(_peek_one_offset(&last_offset));
+ ordinal_t next_storage_offset = 0;
+ RETURN_IF_ERROR(_peek_one_offset(&next_storage_offset));
// calculate real offsets
auto& offsets_data = column_offsets.get_data();
- ordinal_t first_offset = offsets_data[start - 1]; // -1 is valid
- ordinal_t first_ord = offsets_data[start];
+ ordinal_t first_column_offset = offsets_data[start - 1]; // -1 is valid
+ ordinal_t first_storage_offset = offsets_data[start];
for (ssize_t i = start; i < offsets_data.size() - 1; ++i) {
- offsets_data[i] = first_offset + (offsets_data[i + 1] - first_ord);
+ offsets_data[i] = first_column_offset + (offsets_data[i + 1] -
first_storage_offset);
}
// last offset
- offsets_data[offsets_data.size() - 1] = first_offset + (last_offset -
first_ord);
+ offsets_data[offsets_data.size() - 1] =
+ first_column_offset + (next_storage_offset - first_storage_offset);
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index 4ddd7e2c6b..dcb0f89858 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -1083,17 +1083,19 @@ Status MapColumnWriter::append_data(const uint8_t**
ptr, size_t num_rows) {
size_t element_cnt = size_t((unsigned long)(*data_ptr));
auto offset_data = *(data_ptr + 1);
const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
- RETURN_IF_ERROR(_offsets_writer->append_data(&offsets_ptr, num_rows));
- if (element_cnt == 0) {
- return Status::OK();
- }
- for (size_t i = 0; i < 2; ++i) {
- auto data = *(data_ptr + 2 + i);
- auto nested_null_map = *(data_ptr + 2 + 2 + i);
- RETURN_IF_ERROR(_kv_writers[i]->append(reinterpret_cast<const
uint8_t*>(nested_null_map),
- reinterpret_cast<const
void*>(data), element_cnt));
+ if (element_cnt > 0) {
+ for (size_t i = 0; i < 2; ++i) {
+ auto data = *(data_ptr + 2 + i);
+ auto nested_null_map = *(data_ptr + 2 + 2 + i);
+ RETURN_IF_ERROR(
+ _kv_writers[i]->append(reinterpret_cast<const
uint8_t*>(nested_null_map),
+ reinterpret_cast<const
void*>(data), element_cnt));
+ }
}
+ // make sure the order : offset writer flush next_array_item_ordinal after
kv_writers append_data
+ // because we use _kv_writers[0]->get_next_rowid() to set
next_array_item_ordinal in offset page footer
+ RETURN_IF_ERROR(_offsets_writer->append_data(&offsets_ptr, num_rows));
return Status::OK();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]