This is an automated email from the ASF dual-hosted git repository.
Gabriel39 pushed a commit to branch refact_reader_branch
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/refact_reader_branch by this
push:
new 2539b0a4a7b [test](be) Add DeletePredicate unit tests (#63455)
2539b0a4a7b is described below
commit 2539b0a4a7b1996017c5c5b000366a3374b9892d
Author: Gabriel <[email protected]>
AuthorDate: Thu May 21 12:33:58 2026 +0800
[test](be) Add DeletePredicate unit tests (#63455)
---
be/src/format/reader/expr/delete_predicate.cpp | 50 ++++++-
be/src/format/reader/expr/delete_predicate.h | 5 +-
.../format/reader/expr/delete_predicate_test.cpp | 155 +++++++++++++++++++++
be/test/format/reader/expr/table_expr_test.cpp | 5 +-
4 files changed, 206 insertions(+), 9 deletions(-)
diff --git a/be/src/format/reader/expr/delete_predicate.cpp
b/be/src/format/reader/expr/delete_predicate.cpp
index 8a4ac54102f..d1ca03a5201 100644
--- a/be/src/format/reader/expr/delete_predicate.cpp
+++ b/be/src/format/reader/expr/delete_predicate.cpp
@@ -60,13 +60,53 @@ void DeletePredicate::close(VExprContext* context,
FunctionContext::FunctionStat
VExpr::close(context, scope);
}
-Status DeletePredicate::execute_column_impl(VExprContext* context, const
Block* block,
- const Selector* selector, size_t
count,
- ColumnPtr& result_column) const {
+/**
+ * DeletePredicate is derived from 2 cases:
+ * 1. All row IDs indicates deleted rows. (e.g. Delete rows with row_id in (1,
2, 3))
+ * 2. Bit vector indicates whether each row is deleted or not. (e.g. Bit
vector[0,1,0,0,1] indicates row 1 and row 4 are deleted)
+ *
+ * So DeletePredicate should have exactly 1 child expr, which is the slot of
row id.
+ * Row IDs should be generated by file reader as a virtual column in `block`.
+ **/
+Status DeletePredicate::execute(VExprContext* context, Block* block, int*
result_column_id) const {
+ if (block->empty()) {
+ return Status::OK();
+ }
DCHECK(_open_finished || block == nullptr);
+ if (_children.size() != 1) {
+ return Status::InternalError(fmt::format(
+ "DeletePredicate should have exactly 1 child expr, but got
{}", _children.size()));
+ }
+ int slot = -1;
+ RETURN_IF_ERROR(_children[0]->execute(context, block, &slot));
+ const auto count = block->rows();
+ auto res_col = ColumnBool::create(block->rows(), 0);
+ const auto& row_ids =
+ assert_cast<const
ColumnInt64&>(*block->get_by_position(slot).column).get_data();
+ DCHECK_EQ(row_ids.size(), count);
+ if (_deleted_rows.empty()) {
+ block->insert({std::move(res_col), std::make_shared<DataTypeBool>(),
expr_name()});
+ *result_column_id = block->get_columns().size() - 1;
+ return Status::OK();
+ }
+ const int64_t* delete_rows = _deleted_rows.data();
+ const int64_t* delete_rows_end = delete_rows + _deleted_rows.size();
+ const int64_t* start_pos = std::lower_bound(delete_rows, delete_rows_end,
row_ids[0]);
+ int64_t start_index = start_pos - delete_rows;
+ const int64_t* end_pos = std::upper_bound(start_pos, delete_rows_end,
row_ids[count - 1]);
+ const int64_t end_index = end_pos - delete_rows;
- static_cast<void>(_deleted_rows.size());
- // TODO: implement delete predicate logic here, currently we just return a
column with all 0 (false)
+ while (start_index < end_index) {
+ int64_t delete_row = delete_rows[start_index];
+ if (const auto it = std::ranges::lower_bound(row_ids, delete_row);
+ it != row_ids.end() && *it == delete_row) {
+ const size_t index = it - row_ids.begin();
+ res_col->get_data()[index] = true;
+ }
+ ++start_index;
+ }
+ block->insert({std::move(res_col), std::make_shared<DataTypeBool>(),
expr_name()});
+ *result_column_id = block->get_columns().size() - 1;
return Status::OK();
}
diff --git a/be/src/format/reader/expr/delete_predicate.h
b/be/src/format/reader/expr/delete_predicate.h
index feb8093ea5c..3a95c31d8bf 100644
--- a/be/src/format/reader/expr/delete_predicate.h
+++ b/be/src/format/reader/expr/delete_predicate.h
@@ -40,8 +40,11 @@ class DeletePredicate final : public VExpr {
public:
DeletePredicate(const std::vector<int64_t>& deleted_rows);
~DeletePredicate() override = default;
+ Status execute(VExprContext* context, Block* block, int* result_column_id)
const override;
Status execute_column_impl(VExprContext* context, const Block* block,
const Selector* selector,
- size_t count, ColumnPtr& result_column) const
override;
+ size_t count, ColumnPtr& result_column) const
override {
+ return Status::InternalError("Not implement
DeletePredicate::execute_column_impl");
+ }
Status prepare(RuntimeState* state, const RowDescriptor& desc,
VExprContext* context) override;
Status open(RuntimeState* state, VExprContext* context,
FunctionContext::FunctionStateScope scope) override;
diff --git a/be/test/format/reader/expr/delete_predicate_test.cpp
b/be/test/format/reader/expr/delete_predicate_test.cpp
new file mode 100644
index 00000000000..9d9f7387a22
--- /dev/null
+++ b/be/test/format/reader/expr/delete_predicate_test.cpp
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format/reader/expr/delete_predicate.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+#include "exprs/vexpr_context.h"
+#include "runtime/descriptors.h"
+#include "testutil/mock/mock_slot_ref.h"
+
+namespace doris {
+
+class DeletePredicateTest : public testing::Test {
+protected:
+ static Block make_block(const std::vector<int64_t>& row_ids) {
+ auto column = ColumnInt64::create();
+ for (auto row_id : row_ids) {
+ column->insert_value(row_id);
+ }
+
+ Block block;
+ block.insert({std::move(column), std::make_shared<DataTypeInt64>(),
"row_id"});
+ return block;
+ }
+
+ static std::vector<UInt8> result_column_data(const Block& block, int
result_column_id) {
+ const auto& result_column =
+ assert_cast<const
ColumnBool&>(*block.get_by_position(result_column_id).column);
+ return {result_column.get_data().begin(),
result_column.get_data().end()};
+ }
+
+ static Status execute_delete_predicate(const std::vector<int64_t>&
deleted_rows, Block* block,
+ int* result_column_id) {
+ auto delete_predicate =
std::make_shared<DeletePredicate>(deleted_rows);
+ delete_predicate->_open_finished = true;
+ delete_predicate->add_child(
+ std::make_shared<MockSlotRef>(0,
std::make_shared<DataTypeInt64>()));
+
+ VExprContext context(delete_predicate);
+ return delete_predicate->execute(&context, block, result_column_id);
+ }
+};
+
+TEST_F(DeletePredicateTest, MatchDeletedRowsInInputRange) {
+ const std::vector<int64_t> deleted_rows {-3, 1, 4, 8, 12, 20};
+ auto block = make_block({0, 1, 2, 3, 4, 5, 8, 12});
+
+ int result_column_id = -1;
+ auto status = execute_delete_predicate(deleted_rows, &block,
&result_column_id);
+ ASSERT_TRUE(status.ok()) << status;
+
+ EXPECT_EQ(result_column_id, 1);
+ EXPECT_EQ(result_column_data(block, result_column_id),
+ std::vector<UInt8>({0, 1, 0, 0, 1, 0, 1, 1}));
+}
+
+TEST_F(DeletePredicateTest, EmptyDeletedRowsReturnAllFalse) {
+ const std::vector<int64_t> deleted_rows;
+ auto block = make_block({1, 2, 3});
+
+ int result_column_id = -1;
+ auto status = execute_delete_predicate(deleted_rows, &block,
&result_column_id);
+ ASSERT_TRUE(status.ok()) << status;
+
+ EXPECT_EQ(result_column_data(block, result_column_id),
std::vector<UInt8>({0, 0, 0}));
+}
+
+TEST_F(DeletePredicateTest, DeletedRowsOutsideInputRangeReturnAllFalse) {
+ const std::vector<int64_t> deleted_rows {-10, -1, 10, 11};
+ auto block = make_block({1, 2, 3});
+
+ int result_column_id = -1;
+ auto status = execute_delete_predicate(deleted_rows, &block,
&result_column_id);
+ ASSERT_TRUE(status.ok()) << status;
+
+ EXPECT_EQ(result_column_data(block, result_column_id),
std::vector<UInt8>({0, 0, 0}));
+}
+
+TEST_F(DeletePredicateTest, EmptyBlockDoesNotAppendResultColumn) {
+ const std::vector<int64_t> deleted_rows {1, 2, 3};
+ Block block;
+
+ int result_column_id = -1;
+ auto status = execute_delete_predicate(deleted_rows, &block,
&result_column_id);
+ ASSERT_TRUE(status.ok()) << status;
+
+ EXPECT_EQ(block.columns(), 0);
+ EXPECT_EQ(result_column_id, -1);
+}
+
+TEST_F(DeletePredicateTest, MissingRowIdChildReturnsError) {
+ const std::vector<int64_t> deleted_rows {1};
+ auto block = make_block({1});
+ auto delete_predicate = std::make_shared<DeletePredicate>(deleted_rows);
+ delete_predicate->_open_finished = true;
+ VExprContext context(delete_predicate);
+
+ int result_column_id = -1;
+ auto status = delete_predicate->execute(&context, &block,
&result_column_id);
+ ASSERT_FALSE(status.ok());
+ EXPECT_NE(status.to_string().find("exactly 1 child expr"),
std::string::npos);
+}
+
+TEST_F(DeletePredicateTest, ExecuteColumnImplReturnsError) {
+ const std::vector<int64_t> deleted_rows {1};
+ DeletePredicate delete_predicate(deleted_rows);
+ VExprContext context(std::make_shared<DeletePredicate>(deleted_rows));
+ ColumnPtr result_column;
+
+ auto status =
+ delete_predicate.execute_column_impl(&context, nullptr, nullptr,
0, result_column);
+ ASSERT_FALSE(status.ok());
+ EXPECT_NE(status.to_string().find("DeletePredicate::execute_column_impl"),
std::string::npos);
+}
+
+TEST_F(DeletePredicateTest, LifecycleAndDebugString) {
+ const std::vector<int64_t> deleted_rows {1};
+ DeletePredicate delete_predicate(deleted_rows);
+ VExprContext context(std::make_shared<DeletePredicate>(deleted_rows));
+ RowDescriptor row_desc;
+
+ auto status = delete_predicate.prepare(nullptr, row_desc, &context);
+ ASSERT_TRUE(status.ok()) << status;
+ EXPECT_EQ(delete_predicate.expr_name(), "DeletePredicate");
+ EXPECT_EQ(delete_predicate.debug_string(), "DeletePredicate");
+
+ status = delete_predicate.open(nullptr, &context,
FunctionContext::THREAD_LOCAL);
+ ASSERT_TRUE(status.ok()) << status;
+ delete_predicate.close(&context, FunctionContext::THREAD_LOCAL);
+}
+
+} // namespace doris
diff --git a/be/test/format/reader/expr/table_expr_test.cpp
b/be/test/format/reader/expr/table_expr_test.cpp
index df41c1482e3..dd831071483 100644
--- a/be/test/format/reader/expr/table_expr_test.cpp
+++ b/be/test/format/reader/expr/table_expr_test.cpp
@@ -15,9 +15,6 @@
// specific language governing permissions and limitations
// under the License.
-#include "format/reader/expr/literal.h"
-#include "format/reader/expr/slot_ref.h"
-
#include <gtest/gtest.h>
#include <memory>
@@ -28,6 +25,8 @@
#include "core/data_type/data_type_number.h"
#include "core/data_type/primitive_type.h"
#include "core/field.h"
+#include "format/reader/expr/literal.h"
+#include "format/reader/expr/slot_ref.h"
#include "runtime/descriptors.h"
#include "testutil/column_helper.h"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]