This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 4b15b1f2637 [fix](orc) check all the cases before 
build_search_argument (#44615) (#44801)
4b15b1f2637 is described below

commit 4b15b1f2637ed9191e0bb1bed81f313c2648d830
Author: Mingyu Chen (Rayner) <morning...@163.com>
AuthorDate: Sat Nov 30 09:17:56 2024 +0800

    [fix](orc) check all the cases before build_search_argument (#44615) 
(#44801)
    
    cherry-pick #44615
    
    Co-authored-by: Socrates <suyit...@selectdb.com>
---
 be/src/vec/exec/format/orc/vorc_reader.cpp | 176 ++++++++++++++---------------
 be/src/vec/exec/format/orc/vorc_reader.h   |  18 ++-
 be/test/vec/exec/orc_reader_test.cpp       |  57 +++++-----
 3 files changed, 131 insertions(+), 120 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 63516afdddb..9ab673c759d 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -33,6 +33,7 @@
 #include <map>
 #include <ostream>
 #include <tuple>
+#include <utility>
 
 #include "cctz/civil_time.h"
 #include "cctz/time_zone.h"
@@ -567,12 +568,14 @@ std::tuple<bool, orc::Literal> 
convert_to_orc_literal(const orc::Type* type,
 
 std::tuple<bool, orc::Literal, orc::PredicateDataType> 
OrcReader::_make_orc_literal(
         const VSlotRef* slot_ref, const VLiteral* literal) {
+    
DCHECK(_col_name_to_file_col_name_low_case.contains(slot_ref->expr_name()));
     auto file_col_name_low_case = 
_col_name_to_file_col_name_low_case[slot_ref->expr_name()];
     if (!_type_map.contains(file_col_name_low_case)) {
         // TODO: this is for acid table
         LOG(WARNING) << "Column " << slot_ref->expr_name() << " not found in 
_type_map";
         return std::make_tuple(false, orc::Literal(false), 
orc::PredicateDataType::LONG);
     }
+    DCHECK(_type_map.contains(file_col_name_low_case));
     const auto* orc_type = _type_map[file_col_name_low_case];
     if (!TYPEKIND_TO_PREDICATE_TYPE.contains(orc_type->getKind())) {
         LOG(WARNING) << "Unsupported Push Down Orc Type [TypeKind=" << 
orc_type->getKind() << "]";
@@ -624,15 +627,37 @@ std::tuple<bool, orc::Literal, orc::PredicateDataType> 
OrcReader::_make_orc_lite
     }
 }
 
-// check if the slot of expr can be pushed down to orc reader
+// check if the slot of expr can be pushed down to orc reader and make orc 
predicate type
 bool OrcReader::_check_slot_can_push_down(const VExprSPtr& expr) {
     if (!expr->children()[0]->is_slot_ref()) {
         return false;
     }
     const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
     // check if the slot exists in orc file and not partition column
-    return _col_name_to_file_col_name.contains(slot_ref->expr_name()) &&
-           
!_lazy_read_ctx.predicate_partition_columns.contains(slot_ref->expr_name());
+    if (!_col_name_to_file_col_name.contains(slot_ref->expr_name()) ||
+        
_lazy_read_ctx.predicate_partition_columns.contains(slot_ref->expr_name())) {
+        return false;
+    }
+    auto [valid, _, predicate_type] = _make_orc_literal(slot_ref, nullptr);
+    if (valid) {
+        _vslot_ref_to_orc_predicate_data_type[slot_ref] = predicate_type;
+    }
+    return valid;
+}
+
+// check if the literal of expr can be pushed down to orc reader and make orc 
literal
+bool OrcReader::_check_literal_can_push_down(const VExprSPtr& expr, uint16_t 
child_id) {
+    if (!expr->children()[child_id]->is_literal()) {
+        return false;
+    }
+    // the slot has been checked in _check_slot_can_push_down before calling 
this function
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    const auto* literal = static_cast<const 
VLiteral*>(expr->children()[child_id].get());
+    auto [valid, orc_literal, _] = _make_orc_literal(slot_ref, literal);
+    if (valid) {
+        _vliteral_to_orc_literal.insert(std::make_pair(literal, orc_literal));
+    }
+    return valid;
 }
 
 // check if there are rest children of expr can be pushed down to orc reader
@@ -642,7 +667,7 @@ bool OrcReader::_check_rest_children_can_push_down(const 
VExprSPtr& expr) {
     }
 
     for (size_t i = 1; i < expr->children().size(); ++i) {
-        if (!expr->children()[i]->is_literal()) {
+        if (!_check_literal_can_push_down(expr, i)) {
             return false;
         }
     }
@@ -651,7 +676,10 @@ bool OrcReader::_check_rest_children_can_push_down(const 
VExprSPtr& expr) {
 
 // check if the expr can be pushed down to orc reader
 bool OrcReader::_check_expr_can_push_down(const VExprSPtr& expr) {
-    DCHECK(expr != nullptr);
+    if (expr == nullptr) {
+        return false;
+    }
+
     switch (expr->op()) {
     case TExprOpcode::COMPOUND_AND:
         // at least one child can be pushed down
@@ -693,198 +721,167 @@ bool OrcReader::_check_expr_can_push_down(const 
VExprSPtr& expr) {
     }
 }
 
-bool OrcReader::_build_less_than(const VExprSPtr& expr,
+void OrcReader::_build_less_than(const VExprSPtr& expr,
                                  std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
     DCHECK(expr->children().size() == 2);
     DCHECK(expr->children()[0]->is_slot_ref());
     DCHECK(expr->children()[1]->is_literal());
     const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
     const auto* literal = static_cast<const 
VLiteral*>(expr->children()[1].get());
-    auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, 
literal);
-    if (!valid) {
-        return false;
-    }
+    DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref));
+    auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref];
+    DCHECK(_vliteral_to_orc_literal.contains(literal));
+    auto orc_literal = _vliteral_to_orc_literal.find(literal)->second;
     builder->lessThan(slot_ref->expr_name(), predicate_type, orc_literal);
-    return true;
 }
 
-bool OrcReader::_build_less_than_equals(const VExprSPtr& expr,
+void OrcReader::_build_less_than_equals(const VExprSPtr& expr,
                                         
std::unique_ptr<orc::SearchArgumentBuilder>& builder) {
     DCHECK(expr->children().size() == 2);
     DCHECK(expr->children()[0]->is_slot_ref());
     DCHECK(expr->children()[1]->is_literal());
     const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
     const auto* literal = static_cast<const 
VLiteral*>(expr->children()[1].get());
-    auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, 
literal);
-    if (!valid) {
-        return false;
-    }
+    DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref));
+    auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref];
+    DCHECK(_vliteral_to_orc_literal.contains(literal));
+    auto orc_literal = _vliteral_to_orc_literal.find(literal)->second;
     builder->lessThanEquals(slot_ref->expr_name(), predicate_type, 
orc_literal);
-    return true;
 }
 
-bool OrcReader::_build_equals(const VExprSPtr& expr,
+void OrcReader::_build_equals(const VExprSPtr& expr,
                               std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
     DCHECK(expr->children().size() == 2);
     DCHECK(expr->children()[0]->is_slot_ref());
     DCHECK(expr->children()[1]->is_literal());
     const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
     const auto* literal = static_cast<const 
VLiteral*>(expr->children()[1].get());
-    auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, 
literal);
-    if (!valid) {
-        return false;
-    }
+    DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref));
+    auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref];
+    DCHECK(_vliteral_to_orc_literal.contains(literal));
+    auto orc_literal = _vliteral_to_orc_literal.find(literal)->second;
     builder->equals(slot_ref->expr_name(), predicate_type, orc_literal);
-    return true;
 }
 
-bool OrcReader::_build_filter_in(const VExprSPtr& expr,
+void OrcReader::_build_filter_in(const VExprSPtr& expr,
                                  std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
     DCHECK(expr->children().size() >= 2);
     DCHECK(expr->children()[0]->is_slot_ref());
     const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
     std::vector<orc::Literal> literals;
-    orc::PredicateDataType predicate_type = orc::PredicateDataType::LONG;
+    DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref));
+    orc::PredicateDataType predicate_type = 
_vslot_ref_to_orc_predicate_data_type[slot_ref];
     for (size_t i = 1; i < expr->children().size(); ++i) {
         DCHECK(expr->children()[i]->is_literal());
         const auto* literal = static_cast<const 
VLiteral*>(expr->children()[i].get());
-        auto [valid, orc_literal, type] = _make_orc_literal(slot_ref, literal);
-        if (!valid) {
-            return false;
-        }
+        DCHECK(_vliteral_to_orc_literal.contains(literal));
+        auto orc_literal = _vliteral_to_orc_literal.find(literal)->second;
         literals.emplace_back(orc_literal);
-        predicate_type = type;
     }
     DCHECK(!literals.empty());
     builder->in(slot_ref->expr_name(), predicate_type, literals);
-    return true;
 }
 
-bool OrcReader::_build_is_null(const VExprSPtr& expr,
+void OrcReader::_build_is_null(const VExprSPtr& expr,
                                std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
     DCHECK(expr->children().size() == 1);
     DCHECK(expr->children()[0]->is_slot_ref());
     const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
-    auto [valid, _, predicate_type] = _make_orc_literal(slot_ref, nullptr);
+    DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref));
+    auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref];
     builder->isNull(slot_ref->expr_name(), predicate_type);
-    return true;
 }
 
 bool OrcReader::_build_search_argument(const VExprSPtr& expr,
                                        
std::unique_ptr<orc::SearchArgumentBuilder>& builder) {
-    if (expr == nullptr) {
-        return false;
-    }
-
-    // if expr can not be pushed down, skip it and continue to next expr
+    // OPTIMIZE: check expr only once
     if (!_check_expr_can_push_down(expr)) {
         return false;
     }
-
     switch (expr->op()) {
     case TExprOpcode::COMPOUND_AND: {
-        bool at_least_one_can_push_down = false;
         builder->startAnd();
+        bool at_least_one_can_push_down = false;
         for (const auto& child : expr->children()) {
             if (_build_search_argument(child, builder)) {
                 at_least_one_can_push_down = true;
             }
         }
-        if (!at_least_one_can_push_down) {
-            // if all exprs can not be pushed down, builder->end() will throw 
exception
-            return false;
-        }
+        DCHECK(at_least_one_can_push_down);
         builder->end();
         break;
     }
-    case TExprOpcode::COMPOUND_OR:
+    case TExprOpcode::COMPOUND_OR: {
         builder->startOr();
+        bool all_can_push_down = true;
         for (const auto& child : expr->children()) {
             if (!_build_search_argument(child, builder)) {
-                return false;
+                all_can_push_down = false;
             }
         }
+        DCHECK(all_can_push_down);
         builder->end();
         break;
-    case TExprOpcode::COMPOUND_NOT:
-        builder->startNot();
+    }
+    case TExprOpcode::COMPOUND_NOT: {
         DCHECK_EQ(expr->children().size(), 1);
-        if (!_build_search_argument(expr->children()[0], builder)) {
-            return false;
-        }
+        builder->startNot();
+        auto res = _build_search_argument(expr->children()[0], builder);
+        DCHECK(res);
         builder->end();
         break;
+    }
     case TExprOpcode::GE:
         builder->startNot();
-        if (!_build_less_than(expr, builder)) {
-            return false;
-        }
+        _build_less_than(expr, builder);
         builder->end();
         break;
     case TExprOpcode::GT:
         builder->startNot();
-        if (!_build_less_than_equals(expr, builder)) {
-            return false;
-        }
+        _build_less_than_equals(expr, builder);
         builder->end();
         break;
     case TExprOpcode::LE:
-        if (!_build_less_than_equals(expr, builder)) {
-            return false;
-        }
+        _build_less_than_equals(expr, builder);
         break;
     case TExprOpcode::LT:
-        if (!_build_less_than(expr, builder)) {
-            return false;
-        }
+        _build_less_than(expr, builder);
         break;
     case TExprOpcode::EQ:
-        if (!_build_equals(expr, builder)) {
-            return false;
-        }
+        _build_equals(expr, builder);
         break;
     case TExprOpcode::NE:
         builder->startNot();
-        if (!_build_equals(expr, builder)) {
-            return false;
-        }
+        _build_equals(expr, builder);
         builder->end();
         break;
     case TExprOpcode::FILTER_IN:
-        if (!_build_filter_in(expr, builder)) {
-            return false;
-        }
+        _build_filter_in(expr, builder);
         break;
     case TExprOpcode::FILTER_NOT_IN:
         builder->startNot();
-        if (!_build_filter_in(expr, builder)) {
-            return false;
-        }
+        _build_filter_in(expr, builder);
         builder->end();
         break;
     // is null and is not null is represented as function call
-    case TExprOpcode::INVALID_OPCODE: {
+    case TExprOpcode::INVALID_OPCODE:
         DCHECK(expr->node_type() == TExprNodeType::FUNCTION_CALL);
         if (expr->fn().name.function_name == "is_null_pred") {
-            if (!_build_is_null(expr, builder)) {
-                return false;
-            }
+            _build_is_null(expr, builder);
         } else if (expr->fn().name.function_name == "is_not_null_pred") {
             builder->startNot();
-            if (!_build_is_null(expr, builder)) {
-                return false;
-            }
+            _build_is_null(expr, builder);
             builder->end();
         } else {
+            // should not reach here, because _check_expr_can_push_down has 
already checked
             __builtin_unreachable();
         }
         break;
-    }
-    default: {
+
+    default:
         // should not reach here, because _check_expr_can_push_down has 
already checked
         __builtin_unreachable();
     }
-    }
     return true;
 }
 
@@ -898,6 +895,8 @@ bool OrcReader::_init_search_argument(const 
VExprContextSPtrs& conjuncts) {
     bool at_least_one_can_push_down = false;
     builder->startAnd();
     for (const auto& expr_ctx : conjuncts) {
+        _vslot_ref_to_orc_predicate_data_type.clear();
+        _vliteral_to_orc_literal.clear();
         if (_build_search_argument(expr_ctx->root(), builder)) {
             at_least_one_can_push_down = true;
         }
@@ -943,7 +942,7 @@ Status OrcReader::set_fill_columns(
                     visit_slot(child.get());
                 }
             } else if (VInPredicate* in_predicate = 
typeid_cast<VInPredicate*>(filter_impl)) {
-                if (in_predicate->children().size() > 0) {
+                if (!in_predicate->children().empty()) {
                     visit_slot(in_predicate->children()[0].get());
                 }
             } else {
@@ -1179,7 +1178,8 @@ Status OrcReader::_fill_partition_columns(
         if (num_deserialized != rows) {
             return Status::InternalError(
                     "Failed to fill partition column: {}={} ."
-                    "Number of rows expected to be written : {}, number of 
rows actually written : "
+                    "Number of rows expected to be written : {}, number of 
rows actually "
+                    "written : "
                     "{}",
                     slot_desc->col_name(), value, num_deserialized, rows);
         }
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h 
b/be/src/vec/exec/format/orc/vorc_reader.h
index 8c73957e79e..0dd19077bcf 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -41,6 +41,7 @@
 #include "orc/Reader.hh"
 #include "orc/Type.hh"
 #include "orc/Vector.hh"
+#include "orc/sargs/Literal.hh"
 #include "runtime/types.h"
 #include "util/runtime_profile.h"
 #include "vec/aggregate_functions/aggregate_function.h"
@@ -288,23 +289,27 @@ private:
                         bool* is_hive1_orc);
     static bool _check_acid_schema(const orc::Type& type);
     static const orc::Type& _remove_acid(const orc::Type& type);
+
+    // functions for building search argument until _init_search_argument
     std::tuple<bool, orc::Literal, orc::PredicateDataType> _make_orc_literal(
             const VSlotRef* slot_ref, const VLiteral* literal);
     bool _check_slot_can_push_down(const VExprSPtr& expr);
+    bool _check_literal_can_push_down(const VExprSPtr& expr, uint16_t 
child_id);
     bool _check_rest_children_can_push_down(const VExprSPtr& expr);
     bool _check_expr_can_push_down(const VExprSPtr& expr);
-    bool _build_less_than(const VExprSPtr& expr,
+    void _build_less_than(const VExprSPtr& expr,
                           std::unique_ptr<orc::SearchArgumentBuilder>& 
builder);
-    bool _build_less_than_equals(const VExprSPtr& expr,
+    void _build_less_than_equals(const VExprSPtr& expr,
                                  std::unique_ptr<orc::SearchArgumentBuilder>& 
builder);
-    bool _build_equals(const VExprSPtr& expr, 
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
-    bool _build_filter_in(const VExprSPtr& expr,
+    void _build_equals(const VExprSPtr& expr, 
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
+    void _build_filter_in(const VExprSPtr& expr,
                           std::unique_ptr<orc::SearchArgumentBuilder>& 
builder);
-    bool _build_is_null(const VExprSPtr& expr,
+    void _build_is_null(const VExprSPtr& expr,
                         std::unique_ptr<orc::SearchArgumentBuilder>& builder);
     bool _build_search_argument(const VExprSPtr& expr,
                                 std::unique_ptr<orc::SearchArgumentBuilder>& 
builder);
     bool _init_search_argument(const VExprContextSPtrs& conjuncts);
+
     void _init_bloom_filter(
             std::unordered_map<std::string, ColumnValueRangeType>* 
colname_to_value_range);
     void _init_system_properties();
@@ -644,6 +649,9 @@ private:
     std::unordered_map<std::string, std::string> _table_col_to_file_col;
     //support iceberg position delete .
     std::vector<int64_t>* _position_delete_ordered_rowids = nullptr;
+    std::unordered_map<const VSlotRef*, orc::PredicateDataType>
+            _vslot_ref_to_orc_predicate_data_type;
+    std::unordered_map<const VLiteral*, orc::Literal> _vliteral_to_orc_literal;
 };
 
 class ORCFileInputStream : public orc::InputStream, public ProfileCollector {
diff --git a/be/test/vec/exec/orc_reader_test.cpp 
b/be/test/vec/exec/orc_reader_test.cpp
index ec5bd9b519d..f2bba434368 100644
--- a/be/test/vec/exec/orc_reader_test.cpp
+++ b/be/test/vec/exec/orc_reader_test.cpp
@@ -94,33 +94,34 @@ private:
 
 TEST_F(OrcReaderTest, test_build_search_argument) {
     ExecEnv::GetInstance()->set_orc_memory_pool(new ORCMemoryPool());
-    std::
-            vector<std::string>
-                    exprs =
-                            {
-                                    // select count(o_orderkey) from 
tpch1_orc.orders where o_orderkey < 100 or o_orderkey > 5999900 or o_orderkey 
in (1000000, 2000000, 3000000);
-                                    
R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]}
 [...]
-                                    // select count(o_orderkey) from 
tpch1_orc.orders where o_orderkey is null or (o_orderkey between 100 and 1000 
and o_orderkey not in (200, 300, 400));
-                                    
R"|({"1":{"lst":["rec",16,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32"
 [...]
-                                    // select count(o_orderkey) from 
tpch1_orc.orders where o_orderkey is null or (o_orderkey between 1000000 and 
1200000 and o_orderkey != 1100000);
-                                    
R"|({"1":{"lst":["rec",14,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32"
 [...]
-                                    // SELECT count(o_orderkey) FROM 
tpch1_orc.orders WHERE o_orderkey IN (1000000, 2000000, 3000000) OR 
(o_orderdate >= '1994-01-01' AND o_orderdate <= '1994-12-31');
-                                    
R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":4},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},
 [...]
-                                    // select count(o_orderkey) from 
tpch1_orc.orders where o_orderkey < 2 or (o_comment like '%delayed%' and 
o_orderpriority = '1-URGENT');
-                                    
R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"
 [...]
-                                    // select count(o_orderkey) from 
tpch1_orc.orders where o_orderkey < 1 + 1;
-                                    
R"|({"1":{"lst":["rec",3,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",
 [...]
-                                    // SELECT count(o_orderkey) FROM 
tpch1_orc.orders WHERE o_comment LIKE '%delayed%' OR o_orderpriority = 
'1-URGENT';
-                                    
R"|({"1":{"lst":["rec",7,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"like"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{
 [...]
-                                    // select count(o_orderkey) from 
tpch1_orc.orders where o_orderkey between 1 and 100 or random() > 0.5;
-                                    
R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]}
 [...]
-                                    // select count(o_orderkey) from 
tpch1_orc.orders where lower(o_orderpriority) = '1-urgent';
-                                    
R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":
 [...]
-                                    // select count(o_orderkey) from 
tpch1_orc.orders where o_orderkey * 2 < 60;
-                                    
R"|({"1":{"lst":["rec",5,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",
 [...]
-                                    // select count(o_orderkey) from 
tpch1_orc.orders where o_orderdate is not null;
-                                    
R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i3
 [...]
-                            };
+    std::vector<std::string>
+            exprs =
+                    {
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey < 100 or o_orderkey > 5999900 or o_orderkey in (1000000, 
2000000, 3000000);
+                            
R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey is null or (o_orderkey between 100 and 1000 and o_orderkey not 
in (200, 300, 400));
+                            
R"|({"1":{"lst":["rec",16,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey is null or (o_orderkey between 1000000 and 1200000 and 
o_orderkey != 1100000);
+                            
R"|({"1":{"lst":["rec",14,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":
 [...]
+                            // SELECT count(o_orderkey) FROM tpch1_orc.orders 
WHERE o_orderkey IN (1000000, 2000000, 3000000) OR (o_orderdate >= '1994-01-01' 
AND o_orderdate <= '1994-12-31');
+                            
R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":4},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"re
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey < 2 or (o_comment like '%delayed%' and o_orderpriority = 
'1-URGENT');
+                            
R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey < 2 or (o_totalprice < 173665.47 and o_custkey >= 36901);
+                            
R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey < 1 + 1;
+                            
R"|({"1":{"lst":["rec",3,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{
 [...]
+                            // SELECT count(o_orderkey) FROM tpch1_orc.orders 
WHERE o_comment LIKE '%delayed%' OR o_orderpriority = '1-URGENT';
+                            
R"|({"1":{"lst":["rec",7,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"like"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey between 1 and 100 or random() > 0.5;
+                            
R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where lower(o_orderpriority) = '1-urgent';
+                            
R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}]},"
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey * 2 < 60;
+                            
R"|({"1":{"lst":["rec",5,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderdate is not null;
+                            
R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}
 [...]
+                    };
     std::vector<std::string> result_search_arguments = {
             "leaf-0 = (o_orderkey < 100), leaf-1 = (o_orderkey <= 5999900), 
leaf-2 "
             "= (o_orderkey "
@@ -139,6 +140,8 @@ TEST_F(OrcReaderTest, test_build_search_argument) {
             "(or leaf-0 leaf-2))",
             "leaf-0 = (o_orderkey < 2), leaf-1 = (o_orderpriority = 1-URGENT), 
expr = (or leaf-0 "
             "leaf-1)",
+            "leaf-0 = (o_orderkey < 2), leaf-1 = (o_custkey < 36901), expr = 
(or leaf-0 (not "
+            "leaf-1))",
             "leaf-0 = (o_orderkey < 2), expr = leaf-0",
             CANNOT_PUSH_DOWN_ERROR,
             CANNOT_PUSH_DOWN_ERROR,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to