This is an automated email from the ASF dual-hosted git repository.

gabriellee pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new f112af0fd29 [pick](branch-2.1) pick #41555 #41592 #38204 (#41781)
f112af0fd29 is described below

commit f112af0fd29a1b8a9c4247a33b9b7556d822554d
Author: Gabriel <[email protected]>
AuthorDate: Mon Oct 14 14:05:08 2024 +0800

    [pick](branch-2.1) pick #41555 #41592 #38204 (#41781)
    
    pick #41555 #41592 #38204
---
 be/src/pipeline/exec/aggregation_sink_operator.h   |  2 +-
 be/src/pipeline/exec/analytic_sink_operator.h      |  2 +-
 .../exec/distinct_streaming_aggregation_operator.h |  2 +-
 be/src/pipeline/exec/hashjoin_build_sink.h         |  2 +-
 be/src/pipeline/exec/hashjoin_probe_operator.h     |  2 +-
 be/src/pipeline/exec/operator.h                    | 11 ++--
 .../exec/partitioned_hash_join_probe_operator.h    |  2 +-
 .../exec/partitioned_hash_join_sink_operator.h     |  2 +-
 be/src/pipeline/exec/schema_scan_operator.cpp      |  3 +
 be/src/pipeline/exec/sort_sink_operator.h          |  2 +-
 be/src/pipeline/exec/union_sink_operator.h         |  6 ++
 be/src/pipeline/exec/union_source_operator.h       |  5 ++
 be/src/pipeline/pipeline_x/operator.h              |  4 --
 .../pipeline_x/pipeline_x_fragment_context.cpp     | 43 +++++++------
 be/src/vec/functions/function_string.cpp           | 28 ++++++++-
 .../doris/catalog/BuiltinScalarFunctions.java      |  2 +
 .../trees/expressions/functions/scalar/Crc32.java  | 71 ++++++++++++++++++++++
 .../expressions/visitor/ScalarFunctionVisitor.java |  5 ++
 gensrc/script/doris_builtins_functions.py          |  2 +
 .../string_functions/test_string_function_like.out | 12 ++++
 .../test_string_function_like.groovy               |  4 ++
 21 files changed, 174 insertions(+), 38 deletions(-)

diff --git a/be/src/pipeline/exec/aggregation_sink_operator.h 
b/be/src/pipeline/exec/aggregation_sink_operator.h
index de1f26057ff..e082d803bcb 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.h
+++ b/be/src/pipeline/exec/aggregation_sink_operator.h
@@ -164,7 +164,7 @@ public:
                            ? DataDistribution(ExchangeType::PASSTHROUGH)
                            : 
DataSinkOperatorX<AggSinkLocalState>::required_data_distribution();
         }
-        return _is_colocate && _require_bucket_distribution && 
!_followed_by_shuffled_join
+        return _is_colocate && _require_bucket_distribution && 
!_followed_by_shuffled_operator
                        ? DataDistribution(ExchangeType::BUCKET_HASH_SHUFFLE, 
_partition_exprs)
                        : DataDistribution(ExchangeType::HASH_SHUFFLE, 
_partition_exprs);
     }
diff --git a/be/src/pipeline/exec/analytic_sink_operator.h 
b/be/src/pipeline/exec/analytic_sink_operator.h
index eb65414206c..0269ba15be0 100644
--- a/be/src/pipeline/exec/analytic_sink_operator.h
+++ b/be/src/pipeline/exec/analytic_sink_operator.h
@@ -102,7 +102,7 @@ public:
         if (_partition_by_eq_expr_ctxs.empty()) {
             return {ExchangeType::PASSTHROUGH};
         } else if (_order_by_eq_expr_ctxs.empty()) {
-            return _is_colocate && _require_bucket_distribution && 
!_followed_by_shuffled_join
+            return _is_colocate && _require_bucket_distribution && 
!_followed_by_shuffled_operator
                            ? 
DataDistribution(ExchangeType::BUCKET_HASH_SHUFFLE, _partition_exprs)
                            : DataDistribution(ExchangeType::HASH_SHUFFLE, 
_partition_exprs);
         }
diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h 
b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
index e2129925846..f661f773102 100644
--- a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
+++ b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
@@ -107,7 +107,7 @@ public:
 
     DataDistribution required_data_distribution() const override {
         if (_needs_finalize || (!_probe_expr_ctxs.empty() && 
!_is_streaming_preagg)) {
-            return _is_colocate && _require_bucket_distribution && 
!_followed_by_shuffled_join
+            return _is_colocate && _require_bucket_distribution && 
!_followed_by_shuffled_operator
                            ? 
DataDistribution(ExchangeType::BUCKET_HASH_SHUFFLE, _partition_exprs)
                            : DataDistribution(ExchangeType::HASH_SHUFFLE, 
_partition_exprs);
         }
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h 
b/be/src/pipeline/exec/hashjoin_build_sink.h
index dd9383987ec..3b55dc5e44d 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.h
+++ b/be/src/pipeline/exec/hashjoin_build_sink.h
@@ -167,7 +167,7 @@ public:
     bool require_shuffled_data_distribution() const override {
         return _join_op != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && 
!_is_broadcast_join;
     }
-    bool is_shuffled_hash_join() const override {
+    bool is_shuffled_operator() const override {
         return _join_distribution == TJoinDistributionType::PARTITIONED;
     }
     bool require_data_distribution() const override {
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h 
b/be/src/pipeline/exec/hashjoin_probe_operator.h
index f4b1a2c491f..3c75a8d10de 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.h
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.h
@@ -178,7 +178,7 @@ public:
     bool require_shuffled_data_distribution() const override {
         return _join_op != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && 
!_is_broadcast_join;
     }
-    bool is_shuffled_hash_join() const override {
+    bool is_shuffled_operator() const override {
         return _join_distribution == TJoinDistributionType::PARTITIONED;
     }
     bool require_data_distribution() const override {
diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h
index 067790f873f..34beba600fc 100644
--- a/be/src/pipeline/exec/operator.h
+++ b/be/src/pipeline/exec/operator.h
@@ -254,10 +254,13 @@ public:
 
     virtual Status revoke_memory(RuntimeState* state) { return Status::OK(); }
     [[nodiscard]] virtual bool require_data_distribution() const { return 
false; }
-    [[nodiscard]] bool followed_by_shuffled_join() const { return 
_followed_by_shuffled_join; }
-    void set_followed_by_shuffled_join(bool followed_by_shuffled_join) {
-        _followed_by_shuffled_join = followed_by_shuffled_join;
+    [[nodiscard]] bool followed_by_shuffled_operator() const {
+        return _followed_by_shuffled_operator;
     }
+    void set_followed_by_shuffled_operator(bool followed_by_shuffled_operator) 
{
+        _followed_by_shuffled_operator = followed_by_shuffled_operator;
+    }
+    [[nodiscard]] virtual bool is_shuffled_operator() const { return false; }
     [[nodiscard]] virtual bool require_shuffled_data_distribution() const { 
return false; }
 
 protected:
@@ -268,7 +271,7 @@ protected:
     OperatorXPtr _child_x = nullptr;
 
     bool _is_closed;
-    bool _followed_by_shuffled_join = false;
+    bool _followed_by_shuffled_operator = false;
 };
 
 /**
diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h 
b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
index b19537741a8..04320177a20 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
+++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
@@ -172,7 +172,7 @@ public:
     bool require_shuffled_data_distribution() const override {
         return _join_op != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
     }
-    bool is_shuffled_hash_join() const override {
+    bool is_shuffled_operator() const override {
         return _join_distribution == TJoinDistributionType::PARTITIONED;
     }
 
diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h 
b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
index f3b36086799..2b69c143369 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
+++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
@@ -122,7 +122,7 @@ public:
     bool require_shuffled_data_distribution() const override {
         return _join_op != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
     }
-    bool is_shuffled_hash_join() const override {
+    bool is_shuffled_operator() const override {
         return _join_distribution == TJoinDistributionType::PARTITIONED;
     }
 
diff --git a/be/src/pipeline/exec/schema_scan_operator.cpp 
b/be/src/pipeline/exec/schema_scan_operator.cpp
index d5353655ab0..73e54d52be2 100644
--- a/be/src/pipeline/exec/schema_scan_operator.cpp
+++ b/be/src/pipeline/exec/schema_scan_operator.cpp
@@ -285,6 +285,9 @@ Status SchemaScanOperatorX::get_block(RuntimeState* state, 
vectorized::Block* bl
     } while (block->rows() == 0 && !*eos);
 
     local_state.reached_limit(block, eos);
+    if (*eos) {
+        local_state._finish_dependency->set_always_ready();
+    }
     return Status::OK();
 }
 
diff --git a/be/src/pipeline/exec/sort_sink_operator.h 
b/be/src/pipeline/exec/sort_sink_operator.h
index ae4823c55d0..d13c7abc44b 100644
--- a/be/src/pipeline/exec/sort_sink_operator.h
+++ b/be/src/pipeline/exec/sort_sink_operator.h
@@ -87,7 +87,7 @@ public:
     Status sink(RuntimeState* state, vectorized::Block* in_block, bool eos) 
override;
     DataDistribution required_data_distribution() const override {
         if (_is_analytic_sort) {
-            return _is_colocate && _require_bucket_distribution && 
!_followed_by_shuffled_join
+            return _is_colocate && _require_bucket_distribution && 
!_followed_by_shuffled_operator
                            ? 
DataDistribution(ExchangeType::BUCKET_HASH_SHUFFLE, _partition_exprs)
                            : DataDistribution(ExchangeType::HASH_SHUFFLE, 
_partition_exprs);
         } else if (_merge_by_exchange) {
diff --git a/be/src/pipeline/exec/union_sink_operator.h 
b/be/src/pipeline/exec/union_sink_operator.h
index 97b704078c6..b2c1c414314 100644
--- a/be/src/pipeline/exec/union_sink_operator.h
+++ b/be/src/pipeline/exec/union_sink_operator.h
@@ -125,6 +125,12 @@ public:
         }
     }
 
+    bool require_shuffled_data_distribution() const override {
+        return _followed_by_shuffled_operator;
+    }
+
+    bool is_shuffled_operator() const override { return 
_followed_by_shuffled_operator; }
+
 private:
     int _get_first_materialized_child_idx() const { return 
_first_materialized_child_idx; }
 
diff --git a/be/src/pipeline/exec/union_source_operator.h 
b/be/src/pipeline/exec/union_source_operator.h
index 60530521ec0..0c29374e690 100644
--- a/be/src/pipeline/exec/union_source_operator.h
+++ b/be/src/pipeline/exec/union_source_operator.h
@@ -135,6 +135,11 @@ public:
         return Status::OK();
     }
     [[nodiscard]] int get_child_count() const { return _child_size; }
+    bool require_shuffled_data_distribution() const override {
+        return _followed_by_shuffled_operator;
+    }
+
+    bool is_shuffled_operator() const override { return 
_followed_by_shuffled_operator; }
 
 private:
     bool _has_data(RuntimeState* state) const {
diff --git a/be/src/pipeline/pipeline_x/operator.h 
b/be/src/pipeline/pipeline_x/operator.h
index 72f47b576f6..e32176a08d1 100644
--- a/be/src/pipeline/pipeline_x/operator.h
+++ b/be/src/pipeline/pipeline_x/operator.h
@@ -231,8 +231,6 @@ public:
 
     [[nodiscard]] virtual bool can_terminate_early(RuntimeState* state) { 
return false; }
 
-    [[nodiscard]] virtual bool is_shuffled_hash_join() const { return false; }
-
     bool can_read() override {
         LOG(FATAL) << "should not reach here!";
         return false;
@@ -627,8 +625,6 @@ public:
                        : DataDistribution(ExchangeType::NOOP);
     }
 
-    [[nodiscard]] virtual bool is_shuffled_hash_join() const { return false; }
-
     Status close(RuntimeState* state) override {
         return Status::InternalError("Should not reach here!");
     }
diff --git a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp 
b/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
index ac527ed8e69..a3dff107f1b 100644
--- a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
+++ b/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
@@ -771,7 +771,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
         ObjectPool* pool, const std::vector<TPlanNode>& tnodes,
         const doris::TPipelineFragmentParams& request, const DescriptorTbl& 
descs,
         OperatorXPtr parent, int* node_idx, OperatorXPtr* root, PipelinePtr& 
cur_pipe,
-        int child_idx, const bool followed_by_shuffled_join) {
+        int child_idx, const bool followed_by_shuffled_operator) {
     // propagate error case
     if (*node_idx >= tnodes.size()) {
         // TODO: print thrift msg
@@ -782,11 +782,11 @@ Status PipelineXFragmentContext::_create_tree_helper(
     const TPlanNode& tnode = tnodes[*node_idx];
 
     int num_children = tnodes[*node_idx].num_children;
-    bool current_followed_by_shuffled_join = followed_by_shuffled_join;
+    bool current_followed_by_shuffled_operator = followed_by_shuffled_operator;
     OperatorXPtr op = nullptr;
     RETURN_IF_ERROR(_create_operator(pool, tnodes[*node_idx], request, descs, 
op, cur_pipe,
                                      parent == nullptr ? -1 : 
parent->node_id(), child_idx,
-                                     followed_by_shuffled_join));
+                                     followed_by_shuffled_operator));
 
     // assert(parent != nullptr || (node_idx == 0 && root_expr != nullptr));
     if (parent != nullptr) {
@@ -797,7 +797,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
     }
 
     /**
-     * `ExchangeType::HASH_SHUFFLE` should be used if an operator is followed 
by a shuffled hash join.
+     * `ExchangeType::HASH_SHUFFLE` should be used if an operator is followed 
by a shuffled operator (shuffled hash join, union operator followed by 
co-located operators).
      *
      * For plan:
      * LocalExchange(id=0) -> Aggregation(id=1) -> ShuffledHashJoin(id=2)
@@ -811,8 +811,8 @@ Status PipelineXFragmentContext::_create_tree_helper(
             cur_pipe->operator_xs().empty()
                     ? cur_pipe->sink_x()->require_shuffled_data_distribution()
                     : op->require_shuffled_data_distribution();
-    current_followed_by_shuffled_join =
-            (followed_by_shuffled_join || op->is_shuffled_hash_join()) &&
+    current_followed_by_shuffled_operator =
+            (followed_by_shuffled_operator || op->is_shuffled_operator()) &&
             require_shuffled_data_distribution;
 
     cur_pipe->_name.push_back('-');
@@ -823,7 +823,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
     for (int i = 0; i < num_children; i++) {
         ++*node_idx;
         RETURN_IF_ERROR(_create_tree_helper(pool, tnodes, request, descs, op, 
node_idx, nullptr,
-                                            cur_pipe, i, 
current_followed_by_shuffled_join));
+                                            cur_pipe, i, 
current_followed_by_shuffled_operator));
 
         // we are expecting a child, but have used all nodes
         // this means we have been given a bad tree and must fail
@@ -865,13 +865,13 @@ Status PipelineXFragmentContext::_add_local_exchange_impl(
      * `bucket_seq_to_instance_idx` is empty if no scan operator is contained 
in this fragment.
      * So co-located operators(e.g. Agg, Analytic) should use `HASH_SHUFFLE` 
instead of `BUCKET_HASH_SHUFFLE`.
      */
-    const bool followed_by_shuffled_join =
-            operator_xs.size() > idx ? 
operator_xs[idx]->followed_by_shuffled_join()
-                                     : 
cur_pipe->sink_x()->followed_by_shuffled_join();
+    const bool followed_by_shuffled_operator =
+            operator_xs.size() > idx ? 
operator_xs[idx]->followed_by_shuffled_operator()
+                                     : 
cur_pipe->sink_x()->followed_by_shuffled_operator();
     const bool should_disable_bucket_shuffle =
             bucket_seq_to_instance_idx.empty() &&
             shuffle_idx_to_instance_idx.find(-1) == 
shuffle_idx_to_instance_idx.end() &&
-            followed_by_shuffled_join;
+            followed_by_shuffled_operator;
     sink.reset(new LocalExchangeSinkOperatorX(
             sink_id, local_exchange_id,
             should_disable_bucket_shuffle ? _total_instances : _num_instances,
@@ -1047,7 +1047,7 @@ Status 
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
                                                   const DescriptorTbl& descs, 
OperatorXPtr& op,
                                                   PipelinePtr& cur_pipe, int 
parent_idx,
                                                   int child_idx,
-                                                  const bool 
followed_by_shuffled_join) {
+                                                  const bool 
followed_by_shuffled_operator) {
     // We directly construct the operator from Thrift because the given array 
is in the order of preorder traversal.
     // Therefore, here we need to use a stack-like structure.
     _pipeline_parent_map.pop(cur_pipe, parent_idx, child_idx);
@@ -1121,7 +1121,7 @@ Status 
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
             op.reset(new DistinctStreamingAggOperatorX(pool, 
next_operator_id(), tnode, descs,
                                                        
_require_bucket_distribution));
             RETURN_IF_ERROR(cur_pipe->add_operator(op));
-            op->set_followed_by_shuffled_join(followed_by_shuffled_join);
+            
op->set_followed_by_shuffled_operator(followed_by_shuffled_operator);
             _require_bucket_distribution =
                     _require_bucket_distribution || 
op->require_data_distribution();
         } else if (tnode.agg_node.__isset.use_streaming_preaggregation &&
@@ -1152,7 +1152,7 @@ Status 
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
                 sink.reset(new AggSinkOperatorX(pool, next_sink_operator_id(), 
tnode, descs,
                                                 _require_bucket_distribution));
             }
-            sink->set_followed_by_shuffled_join(followed_by_shuffled_join);
+            
sink->set_followed_by_shuffled_operator(followed_by_shuffled_operator);
             _require_bucket_distribution =
                     _require_bucket_distribution || 
sink->require_data_distribution();
             sink->set_dests_id({op->operator_id()});
@@ -1203,8 +1203,8 @@ Status 
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
 
             _pipeline_parent_map.push(op->node_id(), cur_pipe);
             _pipeline_parent_map.push(op->node_id(), build_side_pipe);
-            sink->set_followed_by_shuffled_join(sink->is_shuffled_hash_join());
-            op->set_followed_by_shuffled_join(op->is_shuffled_hash_join());
+            
sink->set_followed_by_shuffled_operator(sink->is_shuffled_operator());
+            op->set_followed_by_shuffled_operator(op->is_shuffled_operator());
         } else {
             op.reset(new HashJoinProbeOperatorX(pool, tnode, 
next_operator_id(), descs));
             RETURN_IF_ERROR(cur_pipe->add_operator(op));
@@ -1225,8 +1225,8 @@ Status 
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
 
             _pipeline_parent_map.push(op->node_id(), cur_pipe);
             _pipeline_parent_map.push(op->node_id(), build_side_pipe);
-            sink->set_followed_by_shuffled_join(sink->is_shuffled_hash_join());
-            op->set_followed_by_shuffled_join(op->is_shuffled_hash_join());
+            
sink->set_followed_by_shuffled_operator(sink->is_shuffled_operator());
+            op->set_followed_by_shuffled_operator(op->is_shuffled_operator());
         }
         _require_bucket_distribution =
                 _require_bucket_distribution || 
op->require_data_distribution();
@@ -1256,6 +1256,7 @@ Status 
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
     case TPlanNodeType::UNION_NODE: {
         int child_count = tnode.num_children;
         op.reset(new UnionSourceOperatorX(pool, tnode, next_operator_id(), 
descs));
+        op->set_followed_by_shuffled_operator(_require_bucket_distribution);
         RETURN_IF_ERROR(cur_pipe->add_operator(op));
 
         const auto downstream_pipeline_id = cur_pipe->id();
@@ -1298,7 +1299,7 @@ Status 
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
             sink.reset(new SortSinkOperatorX(pool, next_sink_operator_id(), 
tnode, descs,
                                              _require_bucket_distribution));
         }
-        sink->set_followed_by_shuffled_join(followed_by_shuffled_join);
+        sink->set_followed_by_shuffled_operator(followed_by_shuffled_operator);
         _require_bucket_distribution =
                 _require_bucket_distribution || 
sink->require_data_distribution();
         sink->set_dests_id({op->operator_id()});
@@ -1338,7 +1339,7 @@ Status 
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
         DataSinkOperatorXPtr sink;
         sink.reset(new AnalyticSinkOperatorX(pool, next_sink_operator_id(), 
tnode, descs,
                                              _require_bucket_distribution));
-        sink->set_followed_by_shuffled_join(followed_by_shuffled_join);
+        sink->set_followed_by_shuffled_operator(followed_by_shuffled_operator);
         _require_bucket_distribution =
                 _require_bucket_distribution || 
sink->require_data_distribution();
         sink->set_dests_id({op->operator_id()});
@@ -1349,11 +1350,13 @@ Status 
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
     case TPlanNodeType::INTERSECT_NODE: {
         RETURN_IF_ERROR(_build_operators_for_set_operation_node<true>(
                 pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
+        op->set_followed_by_shuffled_operator(_require_bucket_distribution);
         break;
     }
     case TPlanNodeType::EXCEPT_NODE: {
         RETURN_IF_ERROR(_build_operators_for_set_operation_node<false>(
                 pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
+        op->set_followed_by_shuffled_operator(_require_bucket_distribution);
         break;
     }
     case TPlanNodeType::REPEAT_NODE: {
diff --git a/be/src/vec/functions/function_string.cpp 
b/be/src/vec/functions/function_string.cpp
index 7ebcac9ca1b..edf43300f94 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -82,7 +82,7 @@ struct NameQuoteImpl {
     }
 };
 
-struct NameStringLenght {
+struct NameStringLength {
     static constexpr auto name = "length";
 };
 
@@ -104,6 +104,28 @@ struct StringLengthImpl {
     }
 };
 
+struct NameCrc32 {
+    static constexpr auto name = "crc32";
+};
+
+struct Crc32Impl {
+    using ReturnType = DataTypeInt64;
+    static constexpr auto TYPE_INDEX = TypeIndex::String;
+    using Type = String;
+    using ReturnColumnType = ColumnVector<Int64>;
+
+    static Status vector(const ColumnString::Chars& data, const 
ColumnString::Offsets& offsets,
+                         PaddedPODArray<Int64>& res) {
+        auto size = offsets.size();
+        res.resize(size);
+        for (int i = 0; i < size; ++i) {
+            res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i 
- 1],
+                             offsets[i] - offsets[i - 1]);
+        }
+        return Status::OK();
+    }
+};
+
 struct NameStringUtf8Length {
     static constexpr auto name = "char_length";
 };
@@ -1073,7 +1095,8 @@ using StringFindInSetImpl = 
StringFunctionImpl<LeftDataType, RightDataType, Find
 
 // ready for regist function
 using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
-using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, 
NameStringLenght>;
+using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, 
NameStringLength>;
+using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
 using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, 
NameStringUtf8Length>;
 using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
 using FunctionStringStartsWith =
@@ -1111,6 +1134,7 @@ using FunctionStringRPad = FunctionStringPad<StringRPad>;
 void register_function_string(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionStringASCII>();
     factory.register_function<FunctionStringLength>();
+    factory.register_function<FunctionCrc32>();
     factory.register_function<FunctionStringUTF8Length>();
     factory.register_function<FunctionStringSpace>();
     factory.register_function<FunctionStringStartsWith>();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index 4b87fa37652..f84bda52178 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -122,6 +122,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Cos;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Cosh;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CosineDistance;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CountEqual;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Crc32;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CreateMap;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CreateNamedStruct;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CreateStruct;
@@ -723,6 +724,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(Least.class, "least"),
             scalar(Left.class, "left"),
             scalar(Length.class, "length"),
+            scalar(Crc32.class, "crc32"),
             scalar(Like.class, "like"),
             scalar(Ln.class, "ln"),
             scalar(Locate.class, "locate"),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Crc32.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Crc32.java
new file mode 100644
index 00000000000..036807062fa
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Crc32.java
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BigIntType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'crc32'. This class is generated by GenerateFunction.
+ */
+public class Crc32 extends ScalarFunction
+        implements UnaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(BigIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT),
+            
FunctionSignature.ret(BigIntType.INSTANCE).args(StringType.INSTANCE)
+    );
+
+    /**
+     * constructor with 1 argument.
+     */
+    public Crc32(Expression arg) {
+        super("crc32", arg);
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public Crc32 withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 1);
+        return new Crc32(children.get(0));
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitCrc32(this, context);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index a2562baa758..79b8452e1df 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -129,6 +129,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Cos;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Cosh;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CosineDistance;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CountEqual;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Crc32;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CreateMap;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CreateNamedStruct;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CreateStruct;
@@ -1450,6 +1451,10 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(length, context);
     }
 
+    default R visitCrc32(Crc32 crc32, C context) {
+        return visitScalarFunction(crc32, context);
+    }
+
     default R visitLike(Like like, C context) {
         return visitStringRegexPredicate(like, context);
     }
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index 4b5e113faee..a05f6ac8abb 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1574,6 +1574,7 @@ visible_functions = {
         [['rpad'], 'VARCHAR', ['VARCHAR', 'INT', 'VARCHAR'], 
'ALWAYS_NULLABLE'],
         [['append_trailing_char_if_absent'], 'VARCHAR', ['VARCHAR', 
'VARCHAR'], 'ALWAYS_NULLABLE'],
         [['length'], 'INT', ['VARCHAR'], ''],
+        [['crc32'], 'BIGINT', ['VARCHAR'], ''],
         [['bit_length'], 'INT', ['VARCHAR'], ''],
 
         [['char_length', 'character_length'], 'INT', ['VARCHAR'], ''],
@@ -1639,6 +1640,7 @@ visible_functions = {
         [['rpad'], 'STRING', ['STRING', 'INT', 'STRING'], 'ALWAYS_NULLABLE'],
         [['append_trailing_char_if_absent'], 'STRING', ['STRING', 'STRING'], 
'ALWAYS_NULLABLE'],
         [['length'], 'INT', ['STRING'], ''],
+        [['crc32'], 'BIGINT', ['STRING'], ''],
         [['bit_length'], 'INT', ['STRING'], ''],
 
         [['char_length', 'character_length'], 'INT', ['STRING'], ''],
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_like.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_like.out
index 51fbfc68af1..9fcfc2d6ee9 100644
--- 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_like.out
+++ 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_like.out
@@ -245,3 +245,15 @@ bb
 
 -- !sql --
 
+-- !crc32_1 --
+348606243
+
+-- !crc32_2 --
+130583814
+
+-- !crc32_3 --
+2707236321
+
+-- !crc32_4 --
+\N
+
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_like.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_like.groovy
index 9c5deed8651..e092526b03c 100644
--- 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_like.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_like.groovy
@@ -90,4 +90,8 @@ suite("test_string_function_like") {
     qt_sql "SELECT k FROM ${tbName} WHERE NOT LIKE(k, \"%\") ORDER BY k;"
 
     // sql "DROP TABLE ${tbName};"
+    qt_crc32_1 "select crc32(\"DORIS\");"
+    qt_crc32_2 "select crc32(\"APACHE DORIS\");"
+    qt_crc32_3 "select crc32(10);"
+    qt_crc32_4 "select crc32(NULL);"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to