github-actions[bot] commented on code in PR #63001:
URL: https://github.com/apache/doris/pull/63001#discussion_r3210117214
##########
be/src/core/column/column_array.cpp:
##########
@@ -98,6 +100,21 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column)
: data(std::move(nest
offsets = ColumnOffsets::create();
}
Review Comment:
This shared-column constructor skips the invariants enforced by the mutable
constructor above: `offsets_column` must actually be `ColumnOffset64`, and when
offsets are non-empty the nested data size must match `offsets.back()`. Public
callers such as `ColumnArray::create(const ColumnPtr&, const ColumnPtr&)` now
route here, so an invalid array can be constructed and later
`offset_at`/`size_at`, filtering, or serialization can read inconsistent
offsets. Please keep this path shared, but still perform the same const-only,
offsets-type, and data-size validation using const access.
##########
be/src/core/column/column_map.cpp:
##########
@@ -518,59 +518,82 @@ void ColumnMap::insert_range_from_ignore_overflow(const
IColumn& src, size_t sta
}
ColumnPtr ColumnMap::filter(const Filter& filt, ssize_t result_size_hint)
const {
- auto k_arr =
- ColumnArray::create(keys_column->assume_mutable(),
offsets_column->assume_mutable())
- ->filter(filt, result_size_hint);
- auto v_arr =
- ColumnArray::create(values_column->assume_mutable(),
offsets_column->assume_mutable())
- ->filter(filt, result_size_hint);
+ // For const filter we must clone subcolumns so the original ColumnMap
remains intact.
+ // IColumn::mutate(copy) clones if use_count>1, returns self if exclusive.
+ auto offsets_mut =
IColumn::mutate(static_cast<IColumn::Ptr>(offsets_column));
+ MutableColumnPtr offsets_copy = offsets_mut->clone_empty();
+ offsets_copy->insert_range_from(*offsets_mut, 0, offsets_mut->size());
+ auto k_arr =
ColumnArray::create(IColumn::mutate(static_cast<IColumn::Ptr>(keys_column)),
+ std::move(offsets_mut))
+ ->filter(filt, result_size_hint);
+ auto v_arr =
ColumnArray::create(IColumn::mutate(static_cast<IColumn::Ptr>(values_column)),
+ std::move(offsets_copy))
+ ->filter(filt, result_size_hint);
return ColumnMap::create(assert_cast<const
ColumnArray&>(*k_arr).get_data_ptr(),
assert_cast<const
ColumnArray&>(*v_arr).get_data_ptr(),
assert_cast<const
ColumnArray&>(*k_arr).get_offsets_ptr());
}
size_t ColumnMap::filter(const Filter& filter) {
- MutableColumnPtr copied_off = offsets_column->clone_empty();
- copied_off->insert_range_from(*offsets_column, 0, offsets_column->size());
- ColumnArray::create(keys_column->assume_mutable(),
offsets_column->assume_mutable())
- ->filter(filter);
- ColumnArray::create(values_column->assume_mutable(),
copied_off->assume_mutable())
- ->filter(filter);
- return get_offsets().size();
+ // Move subcolumns out of this ColumnMap to get exclusive ownership, then
write back.
+ auto keys_mut =
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(keys_column)));
+ auto offsets_mut =
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(offsets_column)));
+ auto values_mut =
IColumn::mutate(std::move(static_cast<IColumn::Ptr&>(values_column)));
+ // Clone offsets for values (both keys and values share the same offsets
structure)
+ MutableColumnPtr copied_off = offsets_mut->clone_empty();
+ copied_off->insert_range_from(*offsets_mut, 0, offsets_mut->size());
+ auto k_arr = ColumnArray::create(std::move(keys_mut),
std::move(offsets_mut));
+ k_arr->filter(filter);
+ auto v_arr = ColumnArray::create(std::move(values_mut),
std::move(copied_off));
+ v_arr->filter(filter);
+ // Put filtered subcolumns back
+ static_cast<IColumn::Ptr&>(keys_column) = k_arr->get_data_ptr();
+ static_cast<IColumn::Ptr&>(offsets_column) = k_arr->get_offsets_ptr();
+ static_cast<IColumn::Ptr&>(values_column) = v_arr->get_data_ptr();
+ // Use const access to avoid assume_mutable_ref() on the just-written-back
offsets_column
+ // (k_arr still holds a ref, so use_count > 1 until k_arr goes out of
scope)
+ return static_cast<const IColumn::Ptr&>(offsets_column)->size();
}
MutableColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit)
const {
- // Make a temp column array
- auto k_arr =
- ColumnArray::create(keys_column->assume_mutable(),
offsets_column->assume_mutable())
- ->permute(perm, limit);
- auto v_arr =
- ColumnArray::create(values_column->assume_mutable(),
offsets_column->assume_mutable())
- ->permute(perm, limit);
+ // Const permute: clone subcolumns so the original ColumnMap remains
intact.
+ auto offsets_mut =
IColumn::mutate(static_cast<IColumn::Ptr>(offsets_column));
+ MutableColumnPtr offsets_copy = offsets_mut->clone_empty();
+ offsets_copy->insert_range_from(*offsets_mut, 0, offsets_mut->size());
+ auto k_arr =
ColumnArray::create(IColumn::mutate(static_cast<IColumn::Ptr>(keys_column)),
+ std::move(offsets_mut))
+ ->permute(perm, limit);
+ auto v_arr =
ColumnArray::create(IColumn::mutate(static_cast<IColumn::Ptr>(values_column)),
+ std::move(offsets_copy))
+ ->permute(perm, limit);
return ColumnMap::create(assert_cast<const
ColumnArray&>(*k_arr).get_data_ptr(),
assert_cast<const
ColumnArray&>(*v_arr).get_data_ptr(),
assert_cast<const
ColumnArray&>(*k_arr).get_offsets_ptr());
}
Status ColumnMap::deduplicate_keys(bool recursive) {
- const auto inner_rows = keys_column->size();
- const auto rows = offsets_column->size();
+ const IColumn& ck = *static_cast<const IColumn::Ptr&>(keys_column);
+ const IColumn& co = *static_cast<const IColumn::Ptr&>(offsets_column);
+ const auto inner_rows = ck.size();
+ const auto rows = co.size();
if (recursive) {
- auto values_column_ = values_column;
+ const IColumn::Ptr& values_ptr = static_cast<const
IColumn::Ptr&>(values_column);
+ IColumn::Ptr values_column_ = values_ptr;
if (values_column_->is_nullable()) {
- values_column_ =
(assert_cast<ColumnNullable&>(*values_column)).get_nested_column_ptr();
+ values_column_ =
+ assert_cast<const
ColumnNullable&>(*values_column_).get_nested_column_ptr();
}
if (auto* values_map =
check_and_get_column<ColumnMap>(values_column_.get())) {
- RETURN_IF_ERROR(values_map->deduplicate_keys(recursive));
+
RETURN_IF_ERROR(const_cast<ColumnMap*>(values_map)->deduplicate_keys(recursive));
}
}
Review Comment:
This recursive mutation bypasses COW by const-casting the nested `ColumnMap`
reached through `values_column`. If the map value subcolumn is shared,
`deduplicate_keys(true)` can move/filter the nested map's subcolumns in place
and mutate all aliases of that nested map. This is distinct from the top-level
filtering below, which correctly detaches with
`IColumn::mutate(std::move(...))`; the recursive value path should similarly
detach the nullable/value subcolumn and write the deduplicated nested map back
into `values_column` before continuing.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]