github-actions[bot] commented on code in PR #63001:
URL: https://github.com/apache/doris/pull/63001#discussion_r3275054668
##########
be/src/core/column/column_map.cpp:
##########
@@ -518,59 +546,84 @@ void ColumnMap::insert_range_from_ignore_overflow(const
IColumn& src, size_t sta
}
ColumnPtr ColumnMap::filter(const Filter& filt, ssize_t result_size_hint)
const {
- auto k_arr =
- ColumnArray::create(keys_column->assume_mutable(),
offsets_column->assume_mutable())
- ->filter(filt, result_size_hint);
- auto v_arr =
- ColumnArray::create(values_column->assume_mutable(),
offsets_column->assume_mutable())
- ->filter(filt, result_size_hint);
+ auto k_arr = ColumnArray::create(static_cast<const
IColumn::Ptr&>(keys_column),
+ static_cast<const
IColumn::Ptr&>(offsets_column))
+ ->filter(filt, result_size_hint);
+ auto v_arr = ColumnArray::create(static_cast<const
IColumn::Ptr&>(values_column),
+ static_cast<const
IColumn::Ptr&>(offsets_column))
+ ->filter(filt, result_size_hint);
return ColumnMap::create(assert_cast<const
ColumnArray&>(*k_arr).get_data_ptr(),
assert_cast<const
ColumnArray&>(*v_arr).get_data_ptr(),
assert_cast<const
ColumnArray&>(*k_arr).get_offsets_ptr());
}
size_t ColumnMap::filter(const Filter& filter) {
- MutableColumnPtr copied_off = offsets_column->clone_empty();
- copied_off->insert_range_from(*offsets_column, 0, offsets_column->size());
- ColumnArray::create(keys_column->assume_mutable(),
offsets_column->assume_mutable())
- ->filter(filter);
- ColumnArray::create(values_column->assume_mutable(),
copied_off->assume_mutable())
- ->filter(filter);
- return get_offsets().size();
+ // Move subcolumns out of this ColumnMap to get exclusive ownership, then
write back.
+ auto keys_mut =
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(keys_column)));
+ auto offsets_mut =
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(offsets_column)));
+ auto values_mut =
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(values_column)));
+ // Clone offsets for values (both keys and values share the same offsets
structure)
+ MutableColumnPtr copied_off = offsets_mut->clone_empty();
+ copied_off->insert_range_from(*offsets_mut, 0, offsets_mut->size());
+ auto k_arr = ColumnArray::create(std::move(keys_mut),
std::move(offsets_mut));
+ k_arr->filter(filter);
+ auto v_arr = ColumnArray::create(std::move(values_mut),
std::move(copied_off));
+ v_arr->filter(filter);
+ // Put filtered subcolumns back
+ static_cast<IColumn::Ptr&>(keys_column) = k_arr->get_data_ptr();
+ static_cast<IColumn::Ptr&>(offsets_column) = k_arr->get_offsets_ptr();
+ static_cast<IColumn::Ptr&>(values_column) = v_arr->get_data_ptr();
+ // Use const access to avoid assume_mutable_ref() on the just-written-back
offsets_column
+ // (k_arr still holds a ref, so use_count > 1 until k_arr goes out of
scope)
+ return static_cast<const IColumn::Ptr&>(offsets_column)->size();
}
MutableColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit)
const {
- // Make a temp column array
- auto k_arr =
- ColumnArray::create(keys_column->assume_mutable(),
offsets_column->assume_mutable())
- ->permute(perm, limit);
- auto v_arr =
- ColumnArray::create(values_column->assume_mutable(),
offsets_column->assume_mutable())
- ->permute(perm, limit);
+ auto k_arr = ColumnArray::create(static_cast<const
IColumn::Ptr&>(keys_column),
+ static_cast<const
IColumn::Ptr&>(offsets_column))
+ ->permute(perm, limit);
+ auto v_arr = ColumnArray::create(static_cast<const
IColumn::Ptr&>(values_column),
+ static_cast<const
IColumn::Ptr&>(offsets_column))
+ ->permute(perm, limit);
return ColumnMap::create(assert_cast<const
ColumnArray&>(*k_arr).get_data_ptr(),
assert_cast<const
ColumnArray&>(*v_arr).get_data_ptr(),
assert_cast<const
ColumnArray&>(*k_arr).get_offsets_ptr());
}
Status ColumnMap::deduplicate_keys(bool recursive) {
- const auto inner_rows = keys_column->size();
- const auto rows = offsets_column->size();
+ const IColumn& ck = *static_cast<const IColumn::Ptr&>(keys_column);
+ const IColumn& co = *static_cast<const IColumn::Ptr&>(offsets_column);
+ const auto inner_rows = ck.size();
+ const auto rows = co.size();
if (recursive) {
- auto values_column_ = values_column;
- if (values_column_->is_nullable()) {
- values_column_ =
(assert_cast<ColumnNullable&>(*values_column)).get_nested_column_ptr();
- }
-
- if (auto* values_map =
check_and_get_column<ColumnMap>(values_column_.get())) {
- RETURN_IF_ERROR(values_map->deduplicate_keys(recursive));
+ const auto& values_ptr = static_cast<const
IColumn::Ptr&>(values_column);
+ if (const auto* nullable_values =
check_and_get_column<ColumnNullable>(values_ptr.get())) {
+ if
(check_and_get_column<ColumnMap>(nullable_values->get_nested_column_ptr().get()))
{
+ auto values_mut =
+
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(values_column)));
+ auto& nullable_values_mut =
assert_cast<ColumnNullable&>(*values_mut);
Review Comment:
This recursive path now moves the live `values_column` out of the parent map
before the nested `deduplicate_keys()` call succeeds. If the nested map returns
an error, or if the nullable/nested replacement work throws, the
`RETURN_IF_ERROR` exits with the parent `ColumnMap` still holding a moved-from
values subcolumn. This is distinct from the earlier recursive COW-bypass thread
because the current code detaches the value column but still lacks a
restore/commit guard. Please detach from a copied owner or install a scoped
restore, and only commit `values_column` after the recursive deduplication and
nested replacement have completed successfully.
##########
be/src/format/parquet/vparquet_column_reader.cpp:
##########
@@ -550,6 +548,10 @@ Status ScalarColumnReader<IN_COLLECTION,
OFFSET_INDEX>::read_column_data(
ColumnPtr resolved_column =
_converter->get_physical_column(_field_schema->physical_type,
_field_schema->data_type,
doris_column, type,
is_dict_filter);
+ if (_converter->read_directly_into_dst_logical_column()) {
+ DCHECK_EQ(resolved_column.get(), doris_column.get());
+ resolved_column = std::move(doris_column);
+ }
Review Comment:
The direct-read path moves the caller-owned `doris_column` into
`resolved_column` before the normal scalar page-reading path runs. The existing
thread covers the nested `_read_nested_column()` failure, but the non-nested
path below can also return before `_converter->convert()` restores the column:
`parse_page_header()`, `load_page_data_idempotent()`, `_skip_values()`,
`_read_values()`, or `next_page()` can fail on corrupt input or decode errors.
That leaves the caller's reusable `doris_column` moved-from. Please add a
scoped restore for this direct-read ownership transfer, or defer moving out of
`doris_column` until after all error-producing page reads complete.
##########
be/src/core/column/column_map.cpp:
##########
@@ -636,8 +689,12 @@ Status ColumnMap::deduplicate_keys(bool recursive) {
if (has_duplicated_key) {
offsets_column = std::move(new_offsets);
- keys_column->filter(filter);
Review Comment:
The duplicate-key in-place update commits `offsets_column` before the
key/value filtering is guaranteed to finish. If detaching
`keys_column`/`values_column` or either `filter(filter)` call throws, the map
is left partially updated: offsets describe the deduplicated shape while one or
both child columns are still unfiltered or moved-from. Please build all three
replacement columns in temporaries and publish offsets/keys/values together
only after the throwing work succeeds, or use a scoped restore guard for the
original subcolumns.
##########
be/src/format/jni/jni_data_bridge.cpp:
##########
@@ -105,44 +105,55 @@ Status JniDataBridge::fill_column(TableMetaAddress&
address, ColumnPtr& doris_co
// org.apache.doris.common.jni.vec.ColumnType.Type#UNSUPPORTED will
set column address as 0
return Status::InternalError("Unsupported type {} in java side",
data_type->get_name());
}
+ auto mutable_doris_column = IColumn::mutate(std::move(doris_column));
MutableColumnPtr data_column;
- if (doris_column->is_nullable()) {
- auto* nullable_column =
-
reinterpret_cast<ColumnNullable*>(doris_column->assume_mutable().get());
+ if (mutable_doris_column->is_nullable()) {
+ auto* nullable_column =
assert_cast<ColumnNullable*>(mutable_doris_column.get());
data_column = nullable_column->get_nested_column_ptr();
NullMap& null_map = nullable_column->get_null_map_data();
size_t origin_size = null_map.size();
null_map.resize(origin_size + num_rows);
memcpy(null_map.data() + origin_size,
static_cast<bool*>(null_map_ptr), num_rows);
} else {
- data_column = doris_column->assume_mutable();
+ data_column = mutable_doris_column->get_ptr();
}
// Date and DateTime are deprecated and not supported.
+ Status status = Status::OK();
switch (logical_type) {
-#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \
- case TYPE_INDEX: \
- return _fill_fixed_length_column<COLUMN_TYPE, CPP_TYPE>( \
- data_column,
reinterpret_cast<CPP_TYPE*>(address.next_meta_as_ptr()), num_rows);
+#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE)
\
+ case TYPE_INDEX: {
\
+ auto* data = reinterpret_cast<CPP_TYPE*>(address.next_meta_as_ptr());
\
+ status = _fill_fixed_length_column<COLUMN_TYPE, CPP_TYPE>(data_column,
data, num_rows); \
+ break;
\
+ }
FOR_FIXED_LENGTH_TYPES(DISPATCH)
#undef DISPATCH
case PrimitiveType::TYPE_STRING:
[[fallthrough]];
case PrimitiveType::TYPE_CHAR:
[[fallthrough]];
case PrimitiveType::TYPE_VARCHAR:
- return _fill_string_column(address, data_column, num_rows);
+ status = _fill_string_column(address, data_column, num_rows);
+ break;
case PrimitiveType::TYPE_ARRAY:
- return _fill_array_column(address, data_column, data_type, num_rows);
+ status = _fill_array_column(address, data_column, data_type, num_rows);
+ break;
case PrimitiveType::TYPE_MAP:
- return _fill_map_column(address, data_column, data_type, num_rows);
+ status = _fill_map_column(address, data_column, data_type, num_rows);
+ break;
case PrimitiveType::TYPE_STRUCT:
- return _fill_struct_column(address, data_column, data_type, num_rows);
+ status = _fill_struct_column(address, data_column, data_type,
num_rows);
+ break;
case PrimitiveType::TYPE_VARBINARY:
- return _fill_varbinary_column(address, data_column, num_rows);
+ status = _fill_varbinary_column(address, data_column, num_rows);
+ break;
default:
- return Status::InvalidArgument("Unsupported type {} in jni scanner",
data_type->get_name());
+ status = Status::InvalidArgument("Unsupported type {} in jni scanner",
+ data_type->get_name());
+ break;
}
- return Status::OK();
+ doris_column = std::move(mutable_doris_column);
+ return status;
Review Comment:
`fill_column()` writes the mutated column back even when `status` is non-OK,
after the nullable branch may already have appended `num_rows` null-map
entries. For example, an unsupported nullable JNI type reaches the default case
with the null map extended but no nested data appended, then line 155 commits
that inconsistent nullable column before returning the error to the caller.
Please only assign `doris_column` back on success, or restore both the nested
column and null map sizes on every error path.
##########
be/src/exec/common/data_gen_functions/vnumbers_tvf.cpp:
##########
@@ -49,7 +49,7 @@ Status VNumbersTVF::get_next(RuntimeState* state, Block*
block, bool* eos) {
// now only support one column for tvf numbers
for (int i = 0; i < _slot_num; ++i) {
if (mem_reuse) {
- columns[i] =
std::move(*(block->get_by_position(i).column)).mutate();
+ columns[i] =
IColumn::mutate(std::move(block->get_by_position(i).column));
} else {
Review Comment:
With `mem_reuse`, `get_next()` moves each live output column out of `block`
and only restores all columns at the end. Allocation failures from
`insert_many_vals()` or `insert_range_of_integer()` leave the reusable block
slots moved-from. Please use `block->mutate_columns_scoped()` for this append
path, or otherwise restore the moved columns on every throwing exit.
##########
be/src/exec/operator/streaming_aggregation_operator.cpp:
##########
@@ -330,8 +330,11 @@ Status
StreamingAggLocalState::_pre_agg_with_serialized_key(doris::Block* in_blo
in_block->get_by_position(result_column_id).column =
in_block->get_by_position(result_column_id)
.column->convert_to_full_column_if_const();
+ auto mutable_column =
+
IColumn::mutate(std::move(in_block->get_by_position(result_column_id).column));
+ mutable_column->replace_float_special_values();
Review Comment:
This steals the expression result column from `in_block` before
`replace_float_special_values()` is guaranteed to finish. If the COW detach has
to clone and throws, or if normalizing a nested/variant column throws, the
input block slot remains moved-from. Please mutate through a scoped restore or
from a copied `ColumnPtr`, and assign back only after normalization succeeds.
##########
be/src/storage/segment/vertical_segment_writer.cpp:
##########
@@ -92,6 +92,14 @@ inline std::string
vertical_segment_writer_mem_tracker_name(uint32_t segment_id)
return "VerticalSegmentWriter:Segment-" + std::to_string(segment_id);
}
+static ColumnBitmap* get_mutable_skip_bitmap_column(Block* block, size_t
skip_bitmap_col_idx) {
+ auto skip_bitmap_column =
+
IColumn::mutate(std::move(block->get_by_position(skip_bitmap_col_idx).column));
+ auto* skip_bitmap_column_ptr =
assert_cast<ColumnBitmap*>(skip_bitmap_column.get());
Review Comment:
This helper repeats the skip-bitmap ownership issue in the segment writer
path: the block slot is consumed before the COW detach succeeds, so an
allocation failure during `IColumn::mutate()` leaves the skip-bitmap column
moved-from. Please avoid moving from the block until the replacement is ready,
or guard and restore the original slot on failure.
##########
be/src/exec/operator/hashjoin_build_sink.cpp:
##########
@@ -576,7 +576,9 @@ Status
HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state, Blo
for (auto& data : block) {
data.column =
std::move(*data.column).mutate()->convert_column_if_overflow();
if (p._need_finalize_variant_column) {
- std::move(*data.column).mutate()->finalize();
+ auto mutable_column = IColumn::mutate(std::move(data.column));
+ mutable_column->finalize();
Review Comment:
When variant finalization is enabled, this moves the live build-block column
out before `finalize()` succeeds. If finalizing a variant column throws, the
build block is left with an empty/moved-from slot. Please finalize a detached
temporary from a copied owner or use a scoped restore, and replace
`data.column` only after finalization succeeds.
##########
be/src/storage/iterator/block_reader.cpp:
##########
@@ -580,9 +582,10 @@ Status BlockReader::_unique_key_next_block(Block* block,
bool* eof) {
LOG(WARNING) << "tablet_id: " << tablet()->tablet_id() << " delete
sign idx "
<< delete_sign_idx
<< " not invalid, skip filter delete in base
compaction";
+ target_columns_guard.restore();
return Status::OK();
}
- MutableColumnPtr delete_filter_column =
(*std::move(_delete_filter_column)).mutate();
+ auto delete_filter_column =
IColumn::mutate(std::move(_delete_filter_column));
reinterpret_cast<ColumnUInt8*>(delete_filter_column.get())->resize(target_block_row);
Review Comment:
This moves `_delete_filter_column` out before resizing and filtering are
guaranteed to finish. If `resize()`, `block->insert()`, or
`Block::filter_block()` fails, the reader's reusable filter column has already
been consumed and the output block may also retain the temporary filter column.
Please protect `_delete_filter_column` with a restore guard, and only commit
the moved filter column after filtering succeeds.
##########
be/src/storage/partial_update_info.cpp:
##########
@@ -39,6 +40,18 @@
#include "storage/utils.h"
namespace doris {
+namespace {
+
+ColumnBitmap* get_mutable_skip_bitmap_column(Block* block, size_t
skip_bitmap_col_idx) {
+ auto skip_bitmap_column =
+
IColumn::mutate(std::move(block->get_by_position(skip_bitmap_col_idx).column));
+ auto* skip_bitmap_column_ptr =
assert_cast<ColumnBitmap*>(skip_bitmap_column.get());
Review Comment:
`get_mutable_skip_bitmap_column()` moves the live skip-bitmap slot out of
the block before `IColumn::mutate()` is guaranteed to succeed. If COW cloning
throws/OOMs, the block slot remains moved-from and the caller cannot safely
continue unwinding or reuse the block. Please detach from a copied `ColumnPtr`
and replace the slot only after success, or install a restore guard before
consuming the slot.
##########
be/src/exec/operator/streaming_aggregation_operator.cpp:
##########
@@ -462,7 +466,7 @@ Status
StreamingAggLocalState::_get_results_with_serialized_key(RuntimeState* st
MutableColumns key_columns;
for (int i = 0; i < key_size; ++i) {
if (mem_reuse) {
-
key_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate());
+
key_columns.emplace_back(IColumn::mutate(std::move(block->get_by_position(i).column)));
} else {
Review Comment:
In the `mem_reuse` path this moves key columns out of the caller-provided
output block, then performs hash-table iteration and key/value
insertion/serialization before `block->set_columns()` restores the slots at the
end. Any exception from `insert_keys_into_columns()`, nullable null-key
insertion, value-column detach, or `serialize_to_column()` leaves the reusable
output block with moved-from columns. This is the streaming aggregation source
path, distinct from the already-commented aggregation source paths; it needs
the same scoped restore/release pattern.
##########
be/src/storage/iterator/vertical_block_reader.cpp:
##########
@@ -483,12 +482,14 @@ Status VerticalBlockReader::_unique_key_next_block(Block*
block, bool* eof) {
// delete sign column must store in last column of the block
int delete_sign_idx = block->columns() - 1;
DCHECK(delete_sign_idx > 0);
- auto target_columns = block->mutate_columns();
- MutableColumnPtr delete_filter_column =
(*std::move(_delete_filter_column)).mutate();
-
reinterpret_cast<ColumnUInt8*>(delete_filter_column.get())->resize(block_rows);
-
- auto* __restrict filter_data =
-
reinterpret_cast<ColumnUInt8*>(delete_filter_column.get())->get_data().data();
+ auto target_columns_guard = block->mutate_columns_scoped();
+ auto& target_columns = target_columns_guard.mutable_columns();
+ auto delete_filter_column =
IColumn::mutate(std::move(_delete_filter_column));
+ auto* delete_filter_data_column =
Review Comment:
This vertical reader path has the same live-member ownership gap for
`_delete_filter_column`: it is moved out before `resize()`, appending the
temporary filter column, and `Block::filter_block()` can complete successfully.
Any failure leaves the reusable reader state with `_delete_filter_column`
consumed. Please restore the member on all exits or detach from a copied
pointer and commit only after the filter succeeds.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]