This is an automated email from the ASF dual-hosted git repository.

Mryange pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2b4e9061c7b [refine](function) avoid unnecessary COW column clones 
(#64627)
2b4e9061c7b is described below

commit 2b4e9061c7b949108a605d0ee92bbd2e7dd78bdc
Author: Mryange <[email protected]>
AuthorDate: Tue Jun 23 14:07:57 2026 +0800

    [refine](function) avoid unnecessary COW column clones (#64627)
    
    ### What problem does this PR solve?
    
    Some function implementations cloned nullable null maps, array offsets,
    or pass-through columns even though the result only needs to share
    immutable column data. This change reuses those COW subcolumns directly
    in non-mutating paths and keeps explicit clones for paths that modify
    result data.
    
    
    ### Release note
    
    None
---
 be/src/exec/common/util.hpp                        | 23 ++++++++++++++--------
 .../exprs/function/array/function_array_element.h  |  5 ++---
 .../function/array/function_array_enumerate.cpp    |  3 +--
 .../array/function_array_enumerate_uniq.cpp        |  5 ++---
 .../exprs/function/array/function_array_exists.cpp | 12 ++++-------
 .../exprs/function/array/function_array_sortby.cpp | 13 ++++++------
 be/src/exprs/function/cast/cast_to_variant.h       |  4 +---
 be/src/exprs/function/function.cpp                 |  2 +-
 .../function_date_or_datetime_to_string.cpp        |  6 ++----
 be/src/exprs/function/function_nullables.cpp       |  6 +++---
 be/src/exprs/function/function_variant_element.cpp |  3 +--
 be/src/exprs/runtime_filter_expr.cpp               |  2 +-
 be/src/exprs/vtopn_pred.h                          |  2 +-
 .../function/cast/function_variant_cast_test.cpp   | 12 ++++++-----
 14 files changed, 47 insertions(+), 51 deletions(-)

diff --git a/be/src/exec/common/util.hpp b/be/src/exec/common/util.hpp
index 013d0a4f036..a975c0df900 100644
--- a/be/src/exec/common/util.hpp
+++ b/be/src/exec/common/util.hpp
@@ -243,29 +243,36 @@ inline ColumnPtr create_always_true_column(size_t size, 
bool is_nullable) {
 }
 
 // change null element to true element
-inline void change_null_to_true(MutableColumnPtr column, ColumnPtr argument = 
nullptr) {
+inline ColumnPtr change_null_to_true(ColumnPtr&& column, const ColumnPtr& 
argument = nullptr) {
     size_t rows = column->size();
     if (is_column_const(*column)) {
-        change_null_to_true(
-                
assert_cast<ColumnConst*>(column.get())->get_data_column_ptr()->assert_mutable());
-    } else if (column->has_null()) {
-        auto* nullable = assert_cast<ColumnNullable*>(column.get());
+        auto nested_column = assert_cast<const 
ColumnConst*>(column.get())->get_data_column_ptr();
+        auto nested = change_null_to_true(std::move(nested_column));
+        return ColumnConst::create(std::move(nested), rows);
+    }
+
+    auto mutable_column = IColumn::mutate(std::move(column));
+    if (auto* nullable = 
check_and_get_column<ColumnNullable>(*mutable_column)) {
         auto* __restrict data = 
assert_cast<ColumnUInt8*>(nullable->get_nested_column_ptr().get())
                                         ->get_data()
                                         .data();
-        const NullMap& null_map = nullable->get_null_map_data();
+        NullMap& null_map = nullable->get_null_map_data();
         for (size_t i = 0; i < rows; ++i) {
             data[i] |= null_map[i];
         }
         nullable->fill_false_to_nullmap(rows);
-    } else if (argument && argument->has_null()) {
+        return mutable_column;
+    }
+
+    if (argument && argument->has_null()) {
         const auto* __restrict null_map =
                 assert_cast<const 
ColumnNullable*>(argument.get())->get_null_map_data().data();
-        auto* __restrict data = 
assert_cast<ColumnUInt8*>(column.get())->get_data().data();
+        auto* __restrict data = 
assert_cast<ColumnUInt8*>(mutable_column.get())->get_data().data();
         for (size_t i = 0; i < rows; ++i) {
             data[i] |= null_map[i];
         }
     }
+    return mutable_column;
 }
 
 inline size_t calculate_false_number(ColumnPtr column) {
diff --git a/be/src/exprs/function/array/function_array_element.h 
b/be/src/exprs/function/array/function_array_element.h
index 1ea9f6227af..9b93680846b 100644
--- a/be/src/exprs/function/array/function_array_element.h
+++ b/be/src/exprs/function/array/function_array_element.h
@@ -262,9 +262,8 @@ private:
                 res_null_map[i] |= outer[i];
             }
         }
-        block.replace_by_position(
-                result, 
ColumnNullable::create(res_nested->clone_resized(input_rows_count),
-                                               std::move(res_null_column)));
+        block.replace_by_position(result,
+                                  ColumnNullable::create(res_nested, 
std::move(res_null_column)));
         return Status::OK();
     }
 
diff --git a/be/src/exprs/function/array/function_array_enumerate.cpp 
b/be/src/exprs/function/array/function_array_enumerate.cpp
index 7d82da40ddf..7b57df93deb 100644
--- a/be/src/exprs/function/array/function_array_enumerate.cpp
+++ b/be/src/exprs/function/array/function_array_enumerate.cpp
@@ -106,8 +106,7 @@ public:
         ColumnPtr res_column =
                 ColumnArray::create(std::move(nested_column), 
array->get_offsets_ptr());
         if (const auto* nullable = 
check_and_get_column<ColumnNullable>(left_column.get())) {
-            res_column = ColumnNullable::create(
-                    res_column, 
nullable->get_null_map_column().clone_resized(nullable->size()));
+            res_column = ColumnNullable::create(res_column, 
nullable->get_null_map_column_ptr());
         }
         block.replace_by_position(result, std::move(res_column));
         return Status::OK();
diff --git a/be/src/exprs/function/array/function_array_enumerate_uniq.cpp 
b/be/src/exprs/function/array/function_array_enumerate_uniq.cpp
index bff8758352e..c91b3a420b3 100644
--- a/be/src/exprs/function/array/function_array_enumerate_uniq.cpp
+++ b/be/src/exprs/function/array/function_array_enumerate_uniq.cpp
@@ -198,9 +198,8 @@ public:
             auto left_column =
                     
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
             if (const auto* nullable = 
check_and_get_column<ColumnNullable>(left_column.get())) {
-                res_column = ColumnNullable::create(
-                        res_column,
-                        
nullable->get_null_map_column().clone_resized(nullable->size()));
+                res_column =
+                        ColumnNullable::create(res_column, 
nullable->get_null_map_column_ptr());
             }
         }
 
diff --git a/be/src/exprs/function/array/function_array_exists.cpp 
b/be/src/exprs/function/array/function_array_exists.cpp
index ffa74d24e8d..3ae69836270 100644
--- a/be/src/exprs/function/array/function_array_exists.cpp
+++ b/be/src/exprs/function/array/function_array_exists.cpp
@@ -68,19 +68,16 @@ public:
         const auto first_column =
                 
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
         const ColumnArray& first_col_array = assert_cast<const 
ColumnArray&>(*first_column);
-        const auto& first_off_data = first_col_array.get_offsets_column();
-
         const auto& nested_nullable_column =
                 assert_cast<const 
ColumnNullable&>(*first_col_array.get_data_ptr());
         const auto nested_column = 
nested_nullable_column.get_nested_column_ptr();
         const size_t nested_column_size = nested_column->size();
-        MutableColumnPtr result_null_map =
-                
nested_nullable_column.get_null_map_column_ptr()->clone_resized(nested_column_size);
+        ColumnPtr result_null_map = 
nested_nullable_column.get_null_map_column_ptr();
 
         // 2. compute result
         auto result_column = ColumnUInt8::create(nested_column_size, 0);
         auto* __restrict result_column_data = result_column->get_data().data();
-        MutableColumnPtr result_offset_column = 
first_off_data.clone_resized(first_off_data.size());
+        ColumnPtr result_offset_column = first_col_array.get_offsets_ptr();
         const auto* __restrict nested_column_data =
                 assert_cast<const 
ColumnUInt8&>(*nested_column).get_data().data();
 
@@ -89,9 +86,8 @@ public:
         }
 
         ColumnPtr result_nullalble_column =
-                ColumnNullable::create(std::move(result_column), 
std::move(result_null_map));
-        ColumnPtr column_array =
-                ColumnArray::create(result_nullalble_column, 
std::move(result_offset_column));
+                ColumnNullable::create(result_column->get_ptr(), 
result_null_map);
+        ColumnPtr column_array = ColumnArray::create(result_nullalble_column, 
result_offset_column);
         block.replace_by_position(result, column_array);
         return Status::OK();
     }
diff --git a/be/src/exprs/function/array/function_array_sortby.cpp 
b/be/src/exprs/function/array/function_array_sortby.cpp
index 8b8f3589d96..ad8cfb09697 100644
--- a/be/src/exprs/function/array/function_array_sortby.cpp
+++ b/be/src/exprs/function/array/function_array_sortby.cpp
@@ -89,13 +89,12 @@ public:
                 assert_cast<const 
ColumnNullable&>(key_column_array.get_data());
 
         auto result_data_column = src_nested_nullable_column.clone_empty();
-        auto result_offset_column =
-                
src_column_array.get_offsets_column().clone_resized(input_rows_count);
-        MutableColumnPtr result_nullmap = nullptr;
+        ColumnPtr result_offset_column = src_column_array.get_offsets_ptr();
+        ColumnPtr result_nullmap = nullptr;
         const ColumnUInt8::Container* src_null_map_data = nullptr;
         if (argument_nullmap[0]) {
             const auto& src_column_nullmap = assert_cast<const 
ColumnUInt8&>(*argument_nullmap[0]);
-            result_nullmap = 
src_column_nullmap.clone_resized(input_rows_count);
+            result_nullmap = argument_nullmap[0];
             src_null_map_data = &(src_column_nullmap.get_data());
         }
         const ColumnUInt8::Container* key_null_map_data = nullptr;
@@ -151,11 +150,11 @@ public:
             block.replace_by_position(
                     result,
                     
ColumnNullable::create(ColumnArray::create(std::move(result_data_column),
-                                                               
std::move(result_offset_column)),
-                                           std::move(result_nullmap)));
+                                                               
result_offset_column),
+                                           result_nullmap));
         } else {
             block.replace_by_position(result, 
ColumnArray::create(std::move(result_data_column),
-                                                                  
std::move(result_offset_column)));
+                                                                  
result_offset_column));
         }
         return Status::OK();
     }
diff --git a/be/src/exprs/function/cast/cast_to_variant.h 
b/be/src/exprs/function/cast/cast_to_variant.h
index 0efc29047b1..bfef6b2918a 100644
--- a/be/src/exprs/function/cast/cast_to_variant.h
+++ b/be/src/exprs/function/cast/cast_to_variant.h
@@ -46,10 +46,8 @@ inline Status cast_from_variant_impl(FunctionContext* 
context, Block& block,
         auto finalized_variant = variant->clone_finalized();
         variant = assert_cast<const ColumnVariant*>(finalized_variant.get());
         if (nullable != nullptr) {
-            auto cloned_null_map =
-                    
nullable->get_null_map_column_ptr()->clone_resized(input_rows_count);
             finalized_input_column = 
ColumnNullable::create(std::move(finalized_variant),
-                                                            
std::move(cloned_null_map));
+                                                            
nullable->get_null_map_column_ptr());
         } else {
             finalized_input_column = std::move(finalized_variant);
         }
diff --git a/be/src/exprs/function/function.cpp 
b/be/src/exprs/function/function.cpp
index 590eb638292..6d33b1caa4a 100644
--- a/be/src/exprs/function/function.cpp
+++ b/be/src/exprs/function/function.cpp
@@ -62,7 +62,7 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& 
block, const Colum
         if (auto nullable = cast_to_column<ColumnNullable>(elem.column); 
nullable->has_null()) {
             const ColumnPtr& null_map_column = 
nullable->get_null_map_column_ptr();
             if (!result_null_map_column) { // NOLINT(bugprone-use-after-move)
-                result_null_map_column = 
null_map_column->clone_resized(input_rows_count);
+                result_null_map_column = null_map_column;
                 continue;
             }
 
diff --git a/be/src/exprs/function/function_date_or_datetime_to_string.cpp 
b/be/src/exprs/function/function_date_or_datetime_to_string.cpp
index d808d806307..c7b240df961 100644
--- a/be/src/exprs/function/function_date_or_datetime_to_string.cpp
+++ b/be/src/exprs/function/function_date_or_datetime_to_string.cpp
@@ -166,10 +166,8 @@ public:
         if (null_map) {
             const auto* nullable_col = assert_cast<const 
ColumnNullable*>(source_col.get());
             block.replace_by_position(
-                    result,
-                    ColumnNullable::create(std::move(col_res),
-                                           
nullable_col->get_null_map_column_ptr()->clone_resized(
-                                                   input_rows_count)));
+                    result, ColumnNullable::create(std::move(col_res),
+                                                   
nullable_col->get_null_map_column_ptr()));
         } else {
             block.replace_by_position(result, std::move(col_res));
         }
diff --git a/be/src/exprs/function/function_nullables.cpp 
b/be/src/exprs/function/function_nullables.cpp
index 902d5ebe2c1..99a9a427195 100644
--- a/be/src/exprs/function/function_nullables.cpp
+++ b/be/src/exprs/function/function_nullables.cpp
@@ -59,7 +59,7 @@ public:
             block.replace_by_position(
                     result, ColumnNullable::create(col, 
ColumnBool::create(input_rows_count, 0)));
         } else { // column is ColumnNullable
-            block.replace_by_position(result, 
col->clone_resized(input_rows_count));
+            block.replace_by_position(result, col);
         }
         return Status::OK();
     }
@@ -93,9 +93,9 @@ public:
                         data.name);
             }
             const ColumnPtr& nest_col = col_null->get_nested_column_ptr();
-            block.replace_by_position(result, 
nest_col->clone_resized(nest_col->size()));
+            block.replace_by_position(result, nest_col);
         } else {
-            block.replace_by_position(result, 
data.column->clone_resized(input_rows_count));
+            block.replace_by_position(result, data.column);
         }
         return Status::OK();
     }
diff --git a/be/src/exprs/function/function_variant_element.cpp 
b/be/src/exprs/function/function_variant_element.cpp
index 012f11e80c9..d045eaa1126 100644
--- a/be/src/exprs/function/function_variant_element.cpp
+++ b/be/src/exprs/function/function_variant_element.cpp
@@ -92,8 +92,7 @@ public:
         }
         if (var.is_scalar_variant() && is_column_nullable(*var.get_root())) {
             const auto* nullable = assert_cast<const 
ColumnNullable*>(var.get_root().get());
-            return ColumnNullable::create(
-                    col, 
nullable->get_null_map_column_ptr()->clone_resized(col->size()));
+            return ColumnNullable::create(col, 
nullable->get_null_map_column_ptr());
         }
         return make_nullable(col);
     }
diff --git a/be/src/exprs/runtime_filter_expr.cpp 
b/be/src/exprs/runtime_filter_expr.cpp
index e335e8ed4da..8544c809206 100644
--- a/be/src/exprs/runtime_filter_expr.cpp
+++ b/be/src/exprs/runtime_filter_expr.cpp
@@ -121,7 +121,7 @@ Status RuntimeFilterExpr::execute_filter(VExprContext* 
context, const Block* blo
     // bloom filter will handle null aware inside itself
     if (_null_aware && TExprNodeType::BLOOM_PRED != node_type()) {
         DCHECK(arg_column);
-        change_null_to_true(filter_column->assert_mutable(), arg_column);
+        filter_column = change_null_to_true(std::move(filter_column), 
arg_column);
     }
 
     if (const auto* const_column = 
check_and_get_column<ColumnConst>(*filter_column)) {
diff --git a/be/src/exprs/vtopn_pred.h b/be/src/exprs/vtopn_pred.h
index 526787bccc6..94887588f53 100644
--- a/be/src/exprs/vtopn_pred.h
+++ b/be/src/exprs/vtopn_pred.h
@@ -118,7 +118,7 @@ public:
         result_column = 
std::move(temp_block.get_by_position(num_columns_without_result).column);
         if (is_nullable() && _predicate->nulls_first()) {
             // null values ​​are always not filtered
-            change_null_to_true(result_column->assert_mutable());
+            result_column = change_null_to_true(std::move(result_column));
         }
         DCHECK_EQ(result_column->size(), count);
         return Status::OK();
diff --git a/be/test/exprs/function/cast/function_variant_cast_test.cpp 
b/be/test/exprs/function/cast/function_variant_cast_test.cpp
index 51034ad6e03..7576bc6cab9 100644
--- a/be/test/exprs/function/cast/function_variant_cast_test.cpp
+++ b/be/test/exprs/function/cast/function_variant_cast_test.cpp
@@ -523,15 +523,15 @@ TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) {
                   
"{\"v\":{\"a\":20,\"b\":\"20\",\"c\":20,\"e\":\"50\",\"f\":20}}");
     }
 
-    // Test case 5: nullable source null-map is preserved after the nested 
string cast is limited
-    // to input_rows_count.
+    // Test case 5: nullable source null-map is preserved.
     {
         auto variant_col = construct_basic_varint_column();
         variant_col->finalize();
-        auto null_map = ColumnUInt8::create(variant_col->size(), 0);
+        auto single_variant_col = variant_col->cut(0, 1);
+        auto null_map = ColumnUInt8::create(single_variant_col->size(), 0);
         null_map->get_data()[0] = 1;
         auto nullable_variant_col =
-                ColumnNullable::create(std::move(variant_col), 
std::move(null_map));
+                ColumnNullable::create(std::move(single_variant_col), 
std::move(null_map));
 
         auto nullable_string_type = 
make_nullable(std::make_shared<DataTypeString>());
         auto variant_type = std::make_shared<DataTypeVariant>();
@@ -550,7 +550,9 @@ TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) {
         block.insert({nullptr, nullable_string_type, "result"});
         RuntimeState state;
         auto ctx = FunctionContext::create_context(&state, {}, {});
-        ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 
1).ok());
+        ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
+                                      nullable_variant_col->size())
+                            .ok());
 
         auto result_col = block.get_by_position(result_column).column;
         ASSERT_NE(result_col.get(), nullptr);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to