This is an automated email from the ASF dual-hosted git repository.
zhangchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0aa57d159e [Fix](Partial update) Fix wrong position using in segment
writer (#22782)
0aa57d159e is described below
commit 0aa57d159ed9d8706ae0031ea5124c01e0ca7b90
Author: abmdocrt <[email protected]>
AuthorDate: Wed Aug 16 19:31:06 2023 +0800
[Fix](Partial update) Fix wrong position using in segment writer (#22782)
---
be/src/olap/rowset/segment_v2/segment_writer.cpp | 41 ++++++++++++++++--------
be/src/olap/rowset/segment_v2/segment_writer.h | 2 +-
2 files changed, 28 insertions(+), 15 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 896a38493d..b88f63dd83 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -347,7 +347,10 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
// write including columns
std::vector<vectorized::IOlapColumnDataAccessor*> key_columns;
vectorized::IOlapColumnDataAccessor* seq_column = nullptr;
+ size_t segment_start_pos;
for (auto cid : including_cids) {
+ // here we get segment column row num before append data.
+ segment_start_pos = _column_writers[cid]->get_next_rowid();
// olap data convertor alway start from id = 0
auto converted_result = _olap_data_convertor->convert_column_data(cid);
if (converted_result.first != Status::OK()) {
@@ -374,7 +377,7 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
delete_sign_column != nullptr) {
auto& delete_sign_col =
reinterpret_cast<const
vectorized::ColumnInt8&>(*(delete_sign_column->column));
- if (delete_sign_col.size() == num_rows) {
+ if (delete_sign_col.size() >= row_pos + num_rows) {
delete_sign_column_data = delete_sign_col.get_data().data();
}
}
@@ -388,10 +391,18 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
// locate rows in base data
int64_t num_rows_filtered = 0;
- for (size_t pos = row_pos; pos < row_pos + num_rows; pos++) {
- std::string key = _full_encode_keys(key_columns, pos);
+ for (size_t block_pos = row_pos; block_pos < row_pos + num_rows;
block_pos++) {
+ // block segment
+ // 2 -> 0
+ // 3 -> 1
+ // 4 -> 2
+ // 5 -> 3
+ // here row_pos = 2, num_rows = 4.
+ size_t delta_pos = block_pos - row_pos;
+ size_t segment_pos = segment_start_pos + delta_pos;
+ std::string key = _full_encode_keys(key_columns, delta_pos);
if (have_input_seq_column) {
- _encode_seq_column(seq_column, pos, &key);
+ _encode_seq_column(seq_column, delta_pos, &key);
}
// If the table have sequence column, and the include-cids don't
contain the sequence
// column, we need to update the primary key index builder at the end
of this method.
@@ -411,7 +422,7 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
++num_rows_filtered;
// delete the invalid newly inserted row
_mow_context->delete_bitmap->add({_opts.rowset_ctx->rowset_id,
_segment_id, 0},
- pos);
+ segment_pos);
}
if (!_tablet_schema->can_insert_new_rows_in_partial_update()) {
@@ -431,20 +442,21 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
// if the delete sign is marked, it means that the value columns of
the row
// will not be read. So we don't need to read the missing values from
the previous rows.
// But we still need to mark the previous row on delete bitmap
- if (delete_sign_column_data != nullptr && delete_sign_column_data[pos
- row_pos] != 0) {
+ if (delete_sign_column_data != nullptr &&
delete_sign_column_data[block_pos] != 0) {
has_default_or_nullable = true;
use_default_or_null_flag.emplace_back(true);
} else {
// partial update should not contain invisible columns
use_default_or_null_flag.emplace_back(false);
_rsid_to_rowset.emplace(rowset->rowset_id(), rowset);
- _tablet->prepare_to_read(loc, pos, &_rssid_to_rid);
+ _tablet->prepare_to_read(loc, segment_pos, &_rssid_to_rid);
}
if (st.is<ALREADY_EXIST>()) {
// although we need to mark delete current row, we still need to
read missing columns
// for this row, we need to ensure that each column is aligned
- _mow_context->delete_bitmap->add({_opts.rowset_ctx->rowset_id,
_segment_id, 0}, pos);
+ _mow_context->delete_bitmap->add({_opts.rowset_ctx->rowset_id,
_segment_id, 0},
+ segment_pos);
} else {
_mow_context->delete_bitmap->add({loc.rowset_id, loc.segment_id,
0}, loc.row_id);
}
@@ -459,7 +471,7 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
// read and fill block
auto mutable_full_columns = full_block.mutate_columns();
RETURN_IF_ERROR(fill_missing_columns(mutable_full_columns,
use_default_or_null_flag,
- has_default_or_nullable));
+ has_default_or_nullable,
segment_start_pos));
// row column should be filled here
if (_tablet_schema->store_row_column()) {
// convert block to row store format
@@ -500,9 +512,9 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
"index builder num rows: {}",
_num_rows_written, row_pos,
_primary_key_index_builder->num_rows());
}
- for (size_t pos = row_pos; pos < row_pos + num_rows; pos++) {
- std::string key = _full_encode_keys(key_columns, pos);
- _encode_seq_column(seq_column, pos, &key);
+ for (size_t block_pos = row_pos; block_pos < row_pos + num_rows;
block_pos++) {
+ std::string key = _full_encode_keys(key_columns, block_pos -
row_pos);
+ _encode_seq_column(seq_column, block_pos - row_pos, &key);
RETURN_IF_ERROR(_primary_key_index_builder->add_item(key));
}
}
@@ -517,7 +529,8 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns&
mutable_full_columns,
const std::vector<bool>&
use_default_or_null_flag,
- bool has_default_or_nullable) {
+ bool has_default_or_nullable,
+ const size_t& segment_start_pos) {
// create old value columns
auto old_value_block = _tablet_schema->create_missing_columns_block();
std::vector<uint32_t> cids_missing = _tablet_schema->get_missing_cids();
@@ -596,7 +609,7 @@ Status
SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f
}
continue;
}
- auto pos_in_old_block = read_index[idx];
+ auto pos_in_old_block = read_index[idx + segment_start_pos];
for (auto i = 0; i < cids_missing.size(); ++i) {
mutable_full_columns[cids_missing[i]]->insert_from(
*old_value_block.get_columns_with_type_and_name()[i].column.get(),
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h
b/be/src/olap/rowset/segment_v2/segment_writer.h
index 04314e99a8..3d67c869ab 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -131,7 +131,7 @@ public:
void set_mow_context(std::shared_ptr<MowContext> mow_context);
Status fill_missing_columns(vectorized::MutableColumns&
mutable_full_columns,
const std::vector<bool>&
use_default_or_null_flag,
- bool has_default_or_nullable);
+ bool has_default_or_nullable, const size_t&
segment_start_pos);
private:
DISALLOW_COPY_AND_ASSIGN(SegmentWriter);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]