This is an automated email from the ASF dual-hosted git repository.

Mryange pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e23b4030540 [refine](column) Make filter_by_selector const (#65047)
e23b4030540 is described below

commit e23b4030540182544a00cdbdc6742e9f140ef730
Author: Mryange <[email protected]>
AuthorDate: Wed Jul 1 23:45:58 2026 +0800

    [refine](column) Make filter_by_selector const (#65047)
    
    ### What problem does this PR solve?
    
    
    `filter_by_selector` reads from the source column and writes selected
    rows into a destination column, so the source column should be accessed
    through a const interface. Root cause: the interface was non-const, and
    `ColumnNullable::filter_by_selector` wrote the destination nested column
    through a `const_cast` on the destination nullable internals. This
    change makes the interface const, updates the supported column
    implementations, removes the nullable `const_cast`, and keeps
    `ColumnDictionary` read-only by using a local temporary `StringRef`
    buffer.
    
    
    ### Release note
    
    None
---
 be/src/core/column/column.h                    |  3 ++-
 be/src/core/column/column_decimal.h            |  3 ++-
 be/src/core/column/column_dictionary.h         | 17 ++++++++++-------
 be/src/core/column/column_nullable.cpp         | 10 ++++------
 be/src/core/column/column_nullable.h           |  3 ++-
 be/src/core/column/column_string.cpp           |  3 ++-
 be/src/core/column/column_string.h             |  3 ++-
 be/src/core/column/column_vector.h             |  3 ++-
 be/test/core/column/column_dictionary_test.cpp | 12 ++++++------
 be/test/core/column/column_nullable_test.cpp   |  3 ++-
 be/test/core/column/column_string_test.cpp     |  4 ++--
 11 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/be/src/core/column/column.h b/be/src/core/column/column.h
index 88dcbfdaabe..730bb206e69 100644
--- a/be/src/core/column/column.h
+++ b/be/src/core/column/column.h
@@ -462,7 +462,8 @@ public:
      *  // nullable -> predict_column
      *  // string (dictionary) -> column_dictionary
      */
-    virtual Status filter_by_selector(const uint16_t* sel, size_t sel_size, 
IColumn* col_ptr) {
+    virtual Status filter_by_selector(const uint16_t* sel, size_t sel_size,
+                                      IColumn* col_ptr) const {
         throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
                                "Method filter_by_selector is not supported for 
{}, only "
                                "column_nullable, column_dictionary and 
predict_column support",
diff --git a/be/src/core/column/column_decimal.h 
b/be/src/core/column/column_decimal.h
index 66039dc2f0e..0f2d96f7e34 100644
--- a/be/src/core/column/column_decimal.h
+++ b/be/src/core/column/column_decimal.h
@@ -148,7 +148,8 @@ public:
         memset(data.data() + old_size, 0, length * sizeof(data[0]));
     }
 
-    Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* 
col_ptr) override {
+    Status filter_by_selector(const uint16_t* sel, size_t sel_size,
+                              IColumn* col_ptr) const override {
         Self* output = assert_cast<Self*>(col_ptr);
         auto& res_data = output->get_data();
         DCHECK(res_data.empty())
diff --git a/be/src/core/column/column_dictionary.h 
b/be/src/core/column/column_dictionary.h
index 57d6ab66e8e..7c60c7613e4 100644
--- a/be/src/core/column/column_dictionary.h
+++ b/be/src/core/column/column_dictionary.h
@@ -152,19 +152,23 @@ public:
                                "permute not supported in ColumnDictionary");
     }
 
-    Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* 
col_ptr) override {
+    Status filter_by_selector(const uint16_t* sel, size_t sel_size,
+                              IColumn* col_ptr) const override {
         auto* res_col = assert_cast<ColumnString*>(col_ptr);
-        _strings.resize(sel_size);
+        if (sel_size == 0) {
+            return Status::OK();
+        }
+        std::vector<StringRef> strings(sel_size);
         size_t length = 0;
         for (size_t i = 0; i != sel_size; ++i) {
-            auto& value = _dict.get_value(_codes[sel[i]]);
-            _strings[i].data = value.data;
-            _strings[i].size = value.size;
+            const auto& value = _dict.get_value(_codes[sel[i]]);
+            strings[i].data = value.data;
+            strings[i].size = value.size;
             length += value.size;
         }
         res_col->get_offsets().reserve(sel_size + 
res_col->get_offsets().size());
         res_col->get_chars().reserve(length + res_col->get_chars().size());
-        res_col->insert_many_strings_without_reserve(_strings.data(), 
sel_size);
+        res_col->insert_many_strings_without_reserve(strings.data(), sel_size);
         return Status::OK();
     }
 
@@ -454,7 +458,6 @@ private:
     Dictionary _dict;
     Container _codes;
     std::pair<RowsetId, uint32_t> _rowset_segment_id;
-    std::vector<StringRef> _strings;
 };
 
 } // namespace doris
diff --git a/be/src/core/column/column_nullable.cpp 
b/be/src/core/column/column_nullable.cpp
index cc9a5f686df..ed51d4e7a42 100644
--- a/be/src/core/column/column_nullable.cpp
+++ b/be/src/core/column/column_nullable.cpp
@@ -448,17 +448,15 @@ size_t ColumnNullable::filter(const Filter& filter) {
     return data_result_size;
 }
 
-Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t 
sel_size, IColumn* col_ptr) {
+Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size,
+                                          IColumn* col_ptr) const {
     auto* nullable_col_ptr = assert_cast<ColumnNullable*>(col_ptr);
-    // Access the nested column via const path to avoid assert_mutable_ref 
(which requires
-    // exclusive ownership). The output col_ptr was just created, so its 
nested column is exclusive.
-    auto nest_col_raw = const_cast<IColumn*>(
-            static_cast<const 
IColumn::WrappedPtr&>(nullable_col_ptr->_nested_column).get());
+    auto nested_column = nullable_col_ptr->get_nested_column_ptr();
 
     /// `get_null_map_data` will set `_need_update_has_null` to true
     auto& res_nullmap = nullable_col_ptr->get_null_map_data();
 
-    RETURN_IF_ERROR(get_nested_column().filter_by_selector(sel, sel_size, 
nest_col_raw));
+    RETURN_IF_ERROR(get_nested_column().filter_by_selector(sel, sel_size, 
nested_column.get()));
     DCHECK(res_nullmap.empty());
     res_nullmap.resize(sel_size);
     auto& cur_nullmap = get_null_map_column().get_data();
diff --git a/be/src/core/column/column_nullable.h 
b/be/src/core/column/column_nullable.h
index 563d5e7011d..14a764c4aca 100644
--- a/be/src/core/column/column_nullable.h
+++ b/be/src/core/column/column_nullable.h
@@ -213,7 +213,8 @@ public:
 
     size_t filter(const Filter& filter) override;
 
-    Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* 
col_ptr) override;
+    Status filter_by_selector(const uint16_t* sel, size_t sel_size,
+                              IColumn* col_ptr) const override;
     MutableColumnPtr permute(const Permutation& perm, size_t limit) const 
override;
     //    ColumnPtr index(const IColumn & indexes, size_t limit) const 
override;
     int compare_at(size_t n, size_t m, const IColumn& rhs_, int 
null_direction_hint) const override;
diff --git a/be/src/core/column/column_string.cpp 
b/be/src/core/column/column_string.cpp
index 399c0952b89..a2e128d0d79 100644
--- a/be/src/core/column/column_string.cpp
+++ b/be/src/core/column/column_string.cpp
@@ -365,7 +365,8 @@ size_t ColumnStr<T>::filter(const IColumn::Filter& filter) {
 }
 
 template <typename T>
-Status ColumnStr<T>::filter_by_selector(const uint16_t* sel, size_t sel_size, 
IColumn* col_ptr) {
+Status ColumnStr<T>::filter_by_selector(const uint16_t* sel, size_t sel_size,
+                                        IColumn* col_ptr) const {
     if constexpr (std::is_same_v<UInt32, T>) {
         auto* col = static_cast<ColumnStr<T>*>(col_ptr);
         Chars& res_chars = col->chars;
diff --git a/be/src/core/column/column_string.h 
b/be/src/core/column/column_string.h
index 7bae712f849..9ca55caa9c6 100644
--- a/be/src/core/column/column_string.h
+++ b/be/src/core/column/column_string.h
@@ -502,7 +502,8 @@ public:
     ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) 
const override;
     size_t filter(const IColumn::Filter& filter) override;
 
-    Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* 
col_ptr) override;
+    Status filter_by_selector(const uint16_t* sel, size_t sel_size,
+                              IColumn* col_ptr) const override;
 
     MutableColumnPtr permute(const IColumn::Permutation& perm, size_t limit) 
const override;
 
diff --git a/be/src/core/column/column_vector.h 
b/be/src/core/column/column_vector.h
index 90038e2da89..4f3a7a6c46f 100644
--- a/be/src/core/column/column_vector.h
+++ b/be/src/core/column/column_vector.h
@@ -274,7 +274,8 @@ public:
 
     void insert_value(const value_type value) { data.push_back(value); }
 
-    Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* 
col_ptr) override {
+    Status filter_by_selector(const uint16_t* sel, size_t sel_size,
+                              IColumn* col_ptr) const override {
         Self* output = assert_cast<Self*>(col_ptr);
         auto& res_data = output->get_data();
         DCHECK(res_data.empty())
diff --git a/be/test/core/column/column_dictionary_test.cpp 
b/be/test/core/column/column_dictionary_test.cpp
index e802eae7541..389485e2181 100644
--- a/be/test/core/column/column_dictionary_test.cpp
+++ b/be/test/core/column/column_dictionary_test.cpp
@@ -248,8 +248,9 @@ TEST_F(ColumnDictionaryTest, permute) {
 }
 TEST_F(ColumnDictionaryTest, filter_by_selector) {
     auto test_func = [&](const auto& source_column) {
-        auto src_size = source_column->size();
-        const auto& codes_data = source_column->get_data();
+        const auto& source = *source_column;
+        auto src_size = source.size();
+        const auto& codes_data = source.get_data();
         EXPECT_TRUE(src_size <= UINT16_MAX);
 
         auto target_column = ColumnString::create();
@@ -262,13 +263,12 @@ TEST_F(ColumnDictionaryTest, filter_by_selector) {
         size_t sel_size = src_size / 2;
         indices.resize(sel_size);
 
-        auto status =
-                source_column->filter_by_selector(indices.data(), sel_size, 
target_column.get());
+        auto status = source.filter_by_selector(indices.data(), sel_size, 
target_column.get());
         EXPECT_TRUE(status.ok());
         EXPECT_EQ(target_column->size(), sel_size);
         for (size_t i = 0; i != sel_size; ++i) {
             auto real_data = target_column->get_data_at(i);
-            auto expect_data = 
source_column->get_value(codes_data[indices[i]]);
+            auto expect_data = source.get_value(codes_data[indices[i]]);
             if (real_data != expect_data) {
                 std::cout << "index: " << i << ", real_data: " << 
real_data.to_string()
                           << "\nexpect_data: " << expect_data.to_string() << 
std::endl;
@@ -364,4 +364,4 @@ std::vector<StringRef> ColumnDictionaryTest::dict_array;
 ColumnDictI32::MutablePtr ColumnDictionaryTest::column_dict_char;
 ColumnDictI32::MutablePtr ColumnDictionaryTest::column_dict_varchar;
 ColumnDictI32::MutablePtr ColumnDictionaryTest::column_dict_str;
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/test/core/column/column_nullable_test.cpp 
b/be/test/core/column/column_nullable_test.cpp
index 96db41296ef..77f167c9ea8 100644
--- a/be/test/core/column/column_nullable_test.cpp
+++ b/be/test/core/column/column_nullable_test.cpp
@@ -113,7 +113,8 @@ TEST(ColumnNullableTest, PredicateTest) {
     EXPECT_FALSE(null_dst->has_null());
 
     uint16_t selector[] = {5, 8}; // both null
-    EXPECT_EQ(nullable_pred->filter_by_selector(selector, 2, null_dst.get()), 
Status::OK());
+    const IColumn& nullable_src = *nullable_pred;
+    EXPECT_EQ(nullable_src.filter_by_selector(selector, 2, null_dst.get()), 
Status::OK());
     // filter_by_selector must announce to update has_null to make below right.
     EXPECT_TRUE(null_dst->has_null());
 }
diff --git a/be/test/core/column/column_string_test.cpp 
b/be/test/core/column/column_string_test.cpp
index 6d29b555367..37de6fe56e2 100644
--- a/be/test/core/column/column_string_test.cpp
+++ b/be/test/core/column/column_string_test.cpp
@@ -978,8 +978,8 @@ TEST_F(ColumnStringTest, filter_by_selector) {
         }
         std::cout << std::endl;
 
-        auto status =
-                source_column->filter_by_selector(indices.data(), sel_size, 
target_column.get());
+        const auto& source = *source_column;
+        auto status = source.filter_by_selector(indices.data(), sel_size, 
target_column.get());
         EXPECT_TRUE(status.ok());
         EXPECT_EQ(target_column->size(), sel_size);
         for (size_t i = 0; i != sel_size; ++i) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to