HappenLee commented on code in PR #63389:
URL: https://github.com/apache/doris/pull/63389#discussion_r3371127414


##########
be/src/exprs/function/function_string.cpp:
##########
@@ -1341,8 +1347,118 @@ using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, 
NameCrc32>;
 using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, 
NameStringUtf8Length>;
 using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
 using FunctionIsValidUTF8 = FunctionUnaryToType<IsValidUTF8Impl, 
NameIsValidUTF8>;
-using FunctionStringStartsWith =
-        FunctionBinaryToType<DataTypeString, DataTypeString, 
StringStartsWithImpl, NameStartsWith>;
+
+namespace {
+
+ZoneMapFilterResult unsupported_starts_with_zonemap(const ZoneMapEvalContext& 
ctx) {
+    ++ctx.stats.unsupported_expr_count;
+    return ZoneMapFilterResult::kUnsupported;
+}
+
+bool field_less_for_starts_with_zonemap(const Field& lhs, const Field& rhs) {
+    return (lhs <=> rhs) == std::strong_ordering::less;
+}
+
+Field string_field_for_starts_with_zonemap(std::string_view value) {
+    return Field::create_field<TYPE_STRING>(std::string(value));
+}
+
+std::optional<std::string> 
next_prefix_for_starts_with_zonemap(std::string_view prefix) {
+    // ZoneMap string bounds are compared by bytewise Field ordering. For 
starts_with(s, p),
+    // the safe upper bound is the next byte string after p: p <= s < 
next_prefix(p).
+    std::string upper(prefix);
+    for (auto i = static_cast<int64_t>(upper.size()) - 1; i >= 0; --i) {
+        auto byte = static_cast<unsigned char>(upper[i]);
+        if (byte != std::numeric_limits<unsigned char>::max()) {
+            upper[i] = static_cast<char>(byte + 1);
+            upper.resize(i + 1);
+            return upper;
+        }
+    }
+    return std::nullopt;
+}
+
+bool supports_starts_with_zonemap_slot_type(const DataTypePtr& data_type) {
+    if (data_type == nullptr) {
+        return false;
+    }
+    auto primitive_type = remove_nullable(data_type)->get_primitive_type();
+    return primitive_type != TYPE_CHAR && is_string_type(primitive_type);
+}
+
+ZoneMapFilterResult evaluate_starts_with_zonemap(const ZoneMapEvalContext& ctx,
+                                                 const VExprSPtrs& arguments) {
+    auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments);
+    if (!slot_literal.has_value() || slot_literal->literal_on_left) {
+        return unsupported_starts_with_zonemap(ctx);
+    }
+    if (slot_literal->literal.is_null()) {
+        return ZoneMapFilterResult::kNoMatch;
+    }
+    const auto* slot_type = ctx.data_type(slot_literal->slot_index);
+    if (slot_type == nullptr || *slot_type == nullptr || 
slot_literal->literal_type == nullptr) {
+        return unsupported_starts_with_zonemap(ctx);
+    }
+    if (!supports_starts_with_zonemap_slot_type(*slot_type) ||
+        
!is_string_type(remove_nullable(slot_literal->literal_type)->get_primitive_type()))
 {
+        ++ctx.stats.type_mismatch_count;
+        return unsupported_starts_with_zonemap(ctx);
+    }
+    auto zone_map_ref = expr_zonemap::fetch_zone_map(ctx, 
slot_literal->slot_index);
+    if (zone_map_ref == nullptr) {
+        return ZoneMapFilterResult::kUnsupported;
+    }
+    const auto& zone_map = *zone_map_ref;
+    if (!zone_map.has_not_null) {
+        return ZoneMapFilterResult::kNoMatch;
+    }
+    if (!expr_zonemap::range_stats_usable_for_zonemap(zone_map, *slot_type)) {
+        return unsupported_starts_with_zonemap(ctx);
+    }
+
+    const auto prefix = slot_literal->literal.as_string_view();
+    if (prefix.empty()) {
+        return ZoneMapFilterResult::kMayMatch;
+    }
+    auto lower = string_field_for_starts_with_zonemap(prefix);
+    if (field_less_for_starts_with_zonemap(zone_map.max_value, lower)) {
+        return ZoneMapFilterResult::kNoMatch;
+    }
+    auto upper_prefix = next_prefix_for_starts_with_zonemap(prefix);
+    if (upper_prefix.has_value() &&
+        !field_less_for_starts_with_zonemap(zone_map.min_value,
+                                            
string_field_for_starts_with_zonemap(*upper_prefix))) {
+        return ZoneMapFilterResult::kNoMatch;
+    }
+    return ZoneMapFilterResult::kMayMatch;
+}
+
+bool can_evaluate_starts_with_zonemap(const VExprSPtrs& arguments) {
+    auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments);
+    if (!slot_literal.has_value() || slot_literal->literal_on_left) {
+        return false;
+    }
+    return supports_starts_with_zonemap_slot_type(slot_literal->slot_type) &&
+           slot_literal->literal_type != nullptr &&
+           
is_string_type(remove_nullable(slot_literal->literal_type)->get_primitive_type());
+}
+
+} // namespace
+
+class FunctionStringStartsWith : public FunctionBinaryToType<DataTypeString, 
DataTypeString,
+                                                             
StringStartsWithImpl, NameStartsWith> {
+public:
+    static FunctionPtr create() { return 
std::make_shared<FunctionStringStartsWith>(); }
+    ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx,
+                                                const VExprSPtrs& arguments) 
const override {
+        return evaluate_starts_with_zonemap(ctx, arguments);
+    }
+
+    bool can_evaluate_zonemap_filter(const VExprSPtrs& arguments) const 
override {
+        return can_evaluate_starts_with_zonemap(arguments);

Review Comment:
   为什么不就在这里实现,要冗余的调用一次函数呢?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to