github-actions[bot] commented on code in PR #63001:
URL: https://github.com/apache/doris/pull/63001#discussion_r3275054668


##########
be/src/core/column/column_map.cpp:
##########
@@ -518,59 +546,84 @@ void ColumnMap::insert_range_from_ignore_overflow(const 
IColumn& src, size_t sta
 }
 
 ColumnPtr ColumnMap::filter(const Filter& filt, ssize_t result_size_hint) 
const {
-    auto k_arr =
-            ColumnArray::create(keys_column->assume_mutable(), 
offsets_column->assume_mutable())
-                    ->filter(filt, result_size_hint);
-    auto v_arr =
-            ColumnArray::create(values_column->assume_mutable(), 
offsets_column->assume_mutable())
-                    ->filter(filt, result_size_hint);
+    auto k_arr = ColumnArray::create(static_cast<const 
IColumn::Ptr&>(keys_column),
+                                     static_cast<const 
IColumn::Ptr&>(offsets_column))
+                         ->filter(filt, result_size_hint);
+    auto v_arr = ColumnArray::create(static_cast<const 
IColumn::Ptr&>(values_column),
+                                     static_cast<const 
IColumn::Ptr&>(offsets_column))
+                         ->filter(filt, result_size_hint);
     return ColumnMap::create(assert_cast<const 
ColumnArray&>(*k_arr).get_data_ptr(),
                              assert_cast<const 
ColumnArray&>(*v_arr).get_data_ptr(),
                              assert_cast<const 
ColumnArray&>(*k_arr).get_offsets_ptr());
 }
 
 size_t ColumnMap::filter(const Filter& filter) {
-    MutableColumnPtr copied_off = offsets_column->clone_empty();
-    copied_off->insert_range_from(*offsets_column, 0, offsets_column->size());
-    ColumnArray::create(keys_column->assume_mutable(), 
offsets_column->assume_mutable())
-            ->filter(filter);
-    ColumnArray::create(values_column->assume_mutable(), 
copied_off->assume_mutable())
-            ->filter(filter);
-    return get_offsets().size();
+    // Move subcolumns out of this ColumnMap to get exclusive ownership, then 
write back.
+    auto keys_mut = 
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(keys_column)));
+    auto offsets_mut = 
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(offsets_column)));
+    auto values_mut = 
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(values_column)));
+    // Clone offsets for values (both keys and values share the same offsets 
structure)
+    MutableColumnPtr copied_off = offsets_mut->clone_empty();
+    copied_off->insert_range_from(*offsets_mut, 0, offsets_mut->size());
+    auto k_arr = ColumnArray::create(std::move(keys_mut), 
std::move(offsets_mut));
+    k_arr->filter(filter);
+    auto v_arr = ColumnArray::create(std::move(values_mut), 
std::move(copied_off));
+    v_arr->filter(filter);
+    // Put filtered subcolumns back
+    static_cast<IColumn::Ptr&>(keys_column) = k_arr->get_data_ptr();
+    static_cast<IColumn::Ptr&>(offsets_column) = k_arr->get_offsets_ptr();
+    static_cast<IColumn::Ptr&>(values_column) = v_arr->get_data_ptr();
+    // Use const access to avoid assume_mutable_ref() on the just-written-back 
offsets_column
+    // (k_arr still holds a ref, so use_count > 1 until k_arr goes out of 
scope)
+    return static_cast<const IColumn::Ptr&>(offsets_column)->size();
 }
 
 MutableColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) 
const {
-    // Make a temp column array
-    auto k_arr =
-            ColumnArray::create(keys_column->assume_mutable(), 
offsets_column->assume_mutable())
-                    ->permute(perm, limit);
-    auto v_arr =
-            ColumnArray::create(values_column->assume_mutable(), 
offsets_column->assume_mutable())
-                    ->permute(perm, limit);
+    auto k_arr = ColumnArray::create(static_cast<const 
IColumn::Ptr&>(keys_column),
+                                     static_cast<const 
IColumn::Ptr&>(offsets_column))
+                         ->permute(perm, limit);
+    auto v_arr = ColumnArray::create(static_cast<const 
IColumn::Ptr&>(values_column),
+                                     static_cast<const 
IColumn::Ptr&>(offsets_column))
+                         ->permute(perm, limit);
 
     return ColumnMap::create(assert_cast<const 
ColumnArray&>(*k_arr).get_data_ptr(),
                              assert_cast<const 
ColumnArray&>(*v_arr).get_data_ptr(),
                              assert_cast<const 
ColumnArray&>(*k_arr).get_offsets_ptr());
 }
 
 Status ColumnMap::deduplicate_keys(bool recursive) {
-    const auto inner_rows = keys_column->size();
-    const auto rows = offsets_column->size();
+    const IColumn& ck = *static_cast<const IColumn::Ptr&>(keys_column);
+    const IColumn& co = *static_cast<const IColumn::Ptr&>(offsets_column);
+    const auto inner_rows = ck.size();
+    const auto rows = co.size();
 
     if (recursive) {
-        auto values_column_ = values_column;
-        if (values_column_->is_nullable()) {
-            values_column_ = 
(assert_cast<ColumnNullable&>(*values_column)).get_nested_column_ptr();
-        }
-
-        if (auto* values_map = 
check_and_get_column<ColumnMap>(values_column_.get())) {
-            RETURN_IF_ERROR(values_map->deduplicate_keys(recursive));
+        const auto& values_ptr = static_cast<const 
IColumn::Ptr&>(values_column);
+        if (const auto* nullable_values = 
check_and_get_column<ColumnNullable>(values_ptr.get())) {
+            if 
(check_and_get_column<ColumnMap>(nullable_values->get_nested_column_ptr().get()))
 {
+                auto values_mut =
+                        
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(values_column)));
+                auto& nullable_values_mut = 
assert_cast<ColumnNullable&>(*values_mut);

Review Comment:
   This recursive path now moves the live `values_column` out of the parent map 
before the nested `deduplicate_keys()` call succeeds. If the nested map returns 
an error, or if the nullable/nested replacement work throws, the 
`RETURN_IF_ERROR` exits with the parent `ColumnMap` still holding a moved-from 
values subcolumn. This is distinct from the earlier recursive COW-bypass thread 
because the current code detaches the value column but still lacks a 
restore/commit guard. Please detach from a copied owner or install a scoped 
restore, and only commit `values_column` after the recursive deduplication and 
nested replacement have completed successfully.



##########
be/src/format/parquet/vparquet_column_reader.cpp:
##########
@@ -550,6 +548,10 @@ Status ScalarColumnReader<IN_COLLECTION, 
OFFSET_INDEX>::read_column_data(
     ColumnPtr resolved_column =
             _converter->get_physical_column(_field_schema->physical_type, 
_field_schema->data_type,
                                             doris_column, type, 
is_dict_filter);
+    if (_converter->read_directly_into_dst_logical_column()) {
+        DCHECK_EQ(resolved_column.get(), doris_column.get());
+        resolved_column = std::move(doris_column);
+    }

Review Comment:
   The direct-read path moves the caller-owned `doris_column` into 
`resolved_column` before the normal scalar page-reading path runs. The existing 
thread covers the nested `_read_nested_column()` failure, but the non-nested 
path below can also return before `_converter->convert()` restores the column: 
`parse_page_header()`, `load_page_data_idempotent()`, `_skip_values()`, 
`_read_values()`, or `next_page()` can fail on corrupt input or decode errors. 
That leaves the caller's reusable `doris_column` moved-from. Please add a 
scoped restore for this direct-read ownership transfer, or defer moving out of 
`doris_column` until after all error-producing page reads complete.



##########
be/src/core/column/column_map.cpp:
##########
@@ -636,8 +689,12 @@ Status ColumnMap::deduplicate_keys(bool recursive) {
 
     if (has_duplicated_key) {
         offsets_column = std::move(new_offsets);
-        keys_column->filter(filter);

Review Comment:
   The duplicate-key in-place update commits `offsets_column` before the 
key/value filtering is guaranteed to finish. If detaching 
`keys_column`/`values_column` or either `filter(filter)` call throws, the map 
is left partially updated: offsets describe the deduplicated shape while one or 
both child columns are still unfiltered or moved-from. Please build all three 
replacement columns in temporaries and publish offsets/keys/values together 
only after the throwing work succeeds, or use a scoped restore guard for the 
original subcolumns.



##########
be/src/format/jni/jni_data_bridge.cpp:
##########
@@ -105,44 +105,55 @@ Status JniDataBridge::fill_column(TableMetaAddress& 
address, ColumnPtr& doris_co
         // org.apache.doris.common.jni.vec.ColumnType.Type#UNSUPPORTED will 
set column address as 0
         return Status::InternalError("Unsupported type {} in java side", 
data_type->get_name());
     }
+    auto mutable_doris_column = IColumn::mutate(std::move(doris_column));
     MutableColumnPtr data_column;
-    if (doris_column->is_nullable()) {
-        auto* nullable_column =
-                
reinterpret_cast<ColumnNullable*>(doris_column->assume_mutable().get());
+    if (mutable_doris_column->is_nullable()) {
+        auto* nullable_column = 
assert_cast<ColumnNullable*>(mutable_doris_column.get());
         data_column = nullable_column->get_nested_column_ptr();
         NullMap& null_map = nullable_column->get_null_map_data();
         size_t origin_size = null_map.size();
         null_map.resize(origin_size + num_rows);
         memcpy(null_map.data() + origin_size, 
static_cast<bool*>(null_map_ptr), num_rows);
     } else {
-        data_column = doris_column->assume_mutable();
+        data_column = mutable_doris_column->get_ptr();
     }
     // Date and DateTime are deprecated and not supported.
+    Status status = Status::OK();
     switch (logical_type) {
-#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE)              \
-    case TYPE_INDEX:                                             \
-        return _fill_fixed_length_column<COLUMN_TYPE, CPP_TYPE>( \
-                data_column, 
reinterpret_cast<CPP_TYPE*>(address.next_meta_as_ptr()), num_rows);
+#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE)                            
                 \
+    case TYPE_INDEX: {                                                         
                 \
+        auto* data = reinterpret_cast<CPP_TYPE*>(address.next_meta_as_ptr());  
                 \
+        status = _fill_fixed_length_column<COLUMN_TYPE, CPP_TYPE>(data_column, 
data, num_rows); \
+        break;                                                                 
                 \
+    }
         FOR_FIXED_LENGTH_TYPES(DISPATCH)
 #undef DISPATCH
     case PrimitiveType::TYPE_STRING:
         [[fallthrough]];
     case PrimitiveType::TYPE_CHAR:
         [[fallthrough]];
     case PrimitiveType::TYPE_VARCHAR:
-        return _fill_string_column(address, data_column, num_rows);
+        status = _fill_string_column(address, data_column, num_rows);
+        break;
     case PrimitiveType::TYPE_ARRAY:
-        return _fill_array_column(address, data_column, data_type, num_rows);
+        status = _fill_array_column(address, data_column, data_type, num_rows);
+        break;
     case PrimitiveType::TYPE_MAP:
-        return _fill_map_column(address, data_column, data_type, num_rows);
+        status = _fill_map_column(address, data_column, data_type, num_rows);
+        break;
     case PrimitiveType::TYPE_STRUCT:
-        return _fill_struct_column(address, data_column, data_type, num_rows);
+        status = _fill_struct_column(address, data_column, data_type, 
num_rows);
+        break;
     case PrimitiveType::TYPE_VARBINARY:
-        return _fill_varbinary_column(address, data_column, num_rows);
+        status = _fill_varbinary_column(address, data_column, num_rows);
+        break;
     default:
-        return Status::InvalidArgument("Unsupported type {} in jni scanner", 
data_type->get_name());
+        status = Status::InvalidArgument("Unsupported type {} in jni scanner",
+                                         data_type->get_name());
+        break;
     }
-    return Status::OK();
+    doris_column = std::move(mutable_doris_column);
+    return status;

Review Comment:
   `fill_column()` writes the mutated column back even when `status` is non-OK, 
after the nullable branch may already have appended `num_rows` null-map 
entries. For example, an unsupported nullable JNI type reaches the default case 
with the null map extended but no nested data appended, then line 155 commits 
that inconsistent nullable column before returning the error to the caller. 
Please only assign `doris_column` back on success, or restore both the nested 
column and null map sizes on every error path.



##########
be/src/exec/common/data_gen_functions/vnumbers_tvf.cpp:
##########
@@ -49,7 +49,7 @@ Status VNumbersTVF::get_next(RuntimeState* state, Block* 
block, bool* eos) {
     // now only support one column for tvf numbers
     for (int i = 0; i < _slot_num; ++i) {
         if (mem_reuse) {
-            columns[i] = 
std::move(*(block->get_by_position(i).column)).mutate();
+            columns[i] = 
IColumn::mutate(std::move(block->get_by_position(i).column));
         } else {

Review Comment:
   With `mem_reuse`, `get_next()` moves each live output column out of `block` 
and only restores all columns at the end. Allocation failures from 
`insert_many_vals()` or `insert_range_of_integer()` leave the reusable block 
slots moved-from. Please use `block->mutate_columns_scoped()` for this append 
path, or otherwise restore the moved columns on every throwing exit.



##########
be/src/exec/operator/streaming_aggregation_operator.cpp:
##########
@@ -330,8 +330,11 @@ Status 
StreamingAggLocalState::_pre_agg_with_serialized_key(doris::Block* in_blo
             in_block->get_by_position(result_column_id).column =
                     in_block->get_by_position(result_column_id)
                             .column->convert_to_full_column_if_const();
+            auto mutable_column =
+                    
IColumn::mutate(std::move(in_block->get_by_position(result_column_id).column));
+            mutable_column->replace_float_special_values();

Review Comment:
   This steals the expression result column from `in_block` before 
`replace_float_special_values()` is guaranteed to finish. If the COW detach has 
to clone and throws, or if normalizing a nested/variant column throws, the 
input block slot remains moved-from. Please mutate through a scoped restore or 
from a copied `ColumnPtr`, and assign back only after normalization succeeds.



##########
be/src/storage/segment/vertical_segment_writer.cpp:
##########
@@ -92,6 +92,14 @@ inline std::string 
vertical_segment_writer_mem_tracker_name(uint32_t segment_id)
     return "VerticalSegmentWriter:Segment-" + std::to_string(segment_id);
 }
 
+static ColumnBitmap* get_mutable_skip_bitmap_column(Block* block, size_t 
skip_bitmap_col_idx) {
+    auto skip_bitmap_column =
+            
IColumn::mutate(std::move(block->get_by_position(skip_bitmap_col_idx).column));
+    auto* skip_bitmap_column_ptr = 
assert_cast<ColumnBitmap*>(skip_bitmap_column.get());

Review Comment:
   This helper repeats the skip-bitmap ownership issue in the segment writer 
path: the block slot is consumed before the COW detach succeeds, so an 
allocation failure during `IColumn::mutate()` leaves the skip-bitmap column 
moved-from. Please avoid moving from the block until the replacement is ready, 
or guard and restore the original slot on failure.



##########
be/src/exec/operator/hashjoin_build_sink.cpp:
##########
@@ -576,7 +576,9 @@ Status 
HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state, Blo
     for (auto& data : block) {
         data.column = 
std::move(*data.column).mutate()->convert_column_if_overflow();
         if (p._need_finalize_variant_column) {
-            std::move(*data.column).mutate()->finalize();
+            auto mutable_column = IColumn::mutate(std::move(data.column));
+            mutable_column->finalize();

Review Comment:
   When variant finalization is enabled, this moves the live build-block column 
out before `finalize()` succeeds. If finalizing a variant column throws, the 
build block is left with an empty/moved-from slot. Please finalize a detached 
temporary from a copied owner or use a scoped restore, and replace 
`data.column` only after finalization succeeds.



##########
be/src/storage/iterator/block_reader.cpp:
##########
@@ -580,9 +582,10 @@ Status BlockReader::_unique_key_next_block(Block* block, 
bool* eof) {
             LOG(WARNING) << "tablet_id: " << tablet()->tablet_id() << " delete 
sign idx "
                          << delete_sign_idx
                          << " not invalid, skip filter delete in base 
compaction";
+            target_columns_guard.restore();
             return Status::OK();
         }
-        MutableColumnPtr delete_filter_column = 
(*std::move(_delete_filter_column)).mutate();
+        auto delete_filter_column = 
IColumn::mutate(std::move(_delete_filter_column));
         
reinterpret_cast<ColumnUInt8*>(delete_filter_column.get())->resize(target_block_row);

Review Comment:
   This moves `_delete_filter_column` out before resizing and filtering are 
guaranteed to finish. If `resize()`, `block->insert()`, or 
`Block::filter_block()` fails, the reader's reusable filter column has already 
been consumed and the output block may also retain the temporary filter column. 
Please protect `_delete_filter_column` with a restore guard, and only commit 
the moved filter column after filtering succeeds.



##########
be/src/storage/partial_update_info.cpp:
##########
@@ -39,6 +40,18 @@
 #include "storage/utils.h"
 
 namespace doris {
+namespace {
+
+ColumnBitmap* get_mutable_skip_bitmap_column(Block* block, size_t 
skip_bitmap_col_idx) {
+    auto skip_bitmap_column =
+            
IColumn::mutate(std::move(block->get_by_position(skip_bitmap_col_idx).column));
+    auto* skip_bitmap_column_ptr = 
assert_cast<ColumnBitmap*>(skip_bitmap_column.get());

Review Comment:
   `get_mutable_skip_bitmap_column()` moves the live skip-bitmap slot out of 
the block before `IColumn::mutate()` is guaranteed to succeed. If COW cloning 
throws/OOMs, the block slot remains moved-from and the caller cannot safely 
continue unwinding or reuse the block. Please detach from a copied `ColumnPtr` 
and replace the slot only after success, or install a restore guard before 
consuming the slot.



##########
be/src/exec/operator/streaming_aggregation_operator.cpp:
##########
@@ -462,7 +466,7 @@ Status 
StreamingAggLocalState::_get_results_with_serialized_key(RuntimeState* st
     MutableColumns key_columns;
     for (int i = 0; i < key_size; ++i) {
         if (mem_reuse) {
-            
key_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate());
+            
key_columns.emplace_back(IColumn::mutate(std::move(block->get_by_position(i).column)));
         } else {

Review Comment:
   In the `mem_reuse` path this moves key columns out of the caller-provided 
output block, then performs hash-table iteration and key/value 
insertion/serialization before `block->set_columns()` restores the slots at the 
end. Any exception from `insert_keys_into_columns()`, nullable null-key 
insertion, value-column detach, or `serialize_to_column()` leaves the reusable 
output block with moved-from columns. This is the streaming aggregation source 
path, distinct from the already-commented aggregation source paths; it needs 
the same scoped restore/release pattern.



##########
be/src/storage/iterator/vertical_block_reader.cpp:
##########
@@ -483,12 +482,14 @@ Status VerticalBlockReader::_unique_key_next_block(Block* 
block, bool* eof) {
             // delete sign column must store in last column of the block
             int delete_sign_idx = block->columns() - 1;
             DCHECK(delete_sign_idx > 0);
-            auto target_columns = block->mutate_columns();
-            MutableColumnPtr delete_filter_column = 
(*std::move(_delete_filter_column)).mutate();
-            
reinterpret_cast<ColumnUInt8*>(delete_filter_column.get())->resize(block_rows);
-
-            auto* __restrict filter_data =
-                    
reinterpret_cast<ColumnUInt8*>(delete_filter_column.get())->get_data().data();
+            auto target_columns_guard = block->mutate_columns_scoped();
+            auto& target_columns = target_columns_guard.mutable_columns();
+            auto delete_filter_column = 
IColumn::mutate(std::move(_delete_filter_column));
+            auto* delete_filter_data_column =

Review Comment:
   This vertical reader path has the same live-member ownership gap for 
`_delete_filter_column`: it is moved out before `resize()`, appending the 
temporary filter column, and `Block::filter_block()` can complete successfully. 
Any failure leaves the reusable reader state with `_delete_filter_column` 
consumed. Please restore the member on all exits or detach from a copied 
pointer and commit only after the filter succeeds.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to