This is an automated email from the ASF dual-hosted git repository.

mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 89a3c5f8f chore: move string function handling to new expression 
registry (#2931)
89a3c5f8f is described below

commit 89a3c5f8f8017509018a51373081c38ed1455b66
Author: Andy Grove <[email protected]>
AuthorDate: Thu Dec 18 05:30:07 2025 -0700

    chore: move string function handling to new expression registry (#2931)
---
 native/core/src/execution/expressions/mod.rs       |   1 +
 native/core/src/execution/expressions/strings.rs   | 100 +++++++++++++++++++++
 native/core/src/execution/planner.rs               |  41 +--------
 .../src/execution/planner/expression_registry.rs   |  16 +++-
 4 files changed, 120 insertions(+), 38 deletions(-)

diff --git a/native/core/src/execution/expressions/mod.rs 
b/native/core/src/execution/expressions/mod.rs
index 105afd595..a06b41b2c 100644
--- a/native/core/src/execution/expressions/mod.rs
+++ b/native/core/src/execution/expressions/mod.rs
@@ -22,6 +22,7 @@ pub mod bitwise;
 pub mod comparison;
 pub mod logical;
 pub mod nullcheck;
+pub mod strings;
 pub mod subquery;
 
 pub use datafusion_comet_spark_expr::EvalMode;
diff --git a/native/core/src/execution/expressions/strings.rs 
b/native/core/src/execution/expressions/strings.rs
new file mode 100644
index 000000000..5f4300eb1
--- /dev/null
+++ b/native/core/src/execution/expressions/strings.rs
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! String expression builders
+
+use std::cmp::max;
+use std::sync::Arc;
+
+use arrow::datatypes::SchemaRef;
+use datafusion::common::ScalarValue;
+use datafusion::physical_expr::expressions::{LikeExpr, Literal};
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion_comet_proto::spark_expression::Expr;
+use datafusion_comet_spark_expr::{RLike, SubstringExpr};
+
+use crate::execution::{
+    expressions::extract_expr,
+    operators::ExecutionError,
+    planner::{expression_registry::ExpressionBuilder, PhysicalPlanner},
+};
+
+/// Builder for Substring expressions
+pub struct SubstringBuilder;
+
+impl ExpressionBuilder for SubstringBuilder {
+    fn build(
+        &self,
+        spark_expr: &Expr,
+        input_schema: SchemaRef,
+        planner: &PhysicalPlanner,
+    ) -> Result<Arc<dyn PhysicalExpr>, ExecutionError> {
+        let expr = extract_expr!(spark_expr, Substring);
+        let child = planner.create_expr(expr.child.as_ref().unwrap(), 
input_schema)?;
+        // Spark Substring's start is 1-based when start > 0
+        let start = expr.start - i32::from(expr.start > 0);
+        // substring negative len is treated as 0 in Spark
+        let len = max(expr.len, 0);
+
+        Ok(Arc::new(SubstringExpr::new(
+            child,
+            start as i64,
+            len as u64,
+        )))
+    }
+}
+
+/// Builder for Like expressions
+pub struct LikeBuilder;
+
+impl ExpressionBuilder for LikeBuilder {
+    fn build(
+        &self,
+        spark_expr: &Expr,
+        input_schema: SchemaRef,
+        planner: &PhysicalPlanner,
+    ) -> Result<Arc<dyn PhysicalExpr>, ExecutionError> {
+        let expr = extract_expr!(spark_expr, Like);
+        let left = planner.create_expr(expr.left.as_ref().unwrap(), 
Arc::clone(&input_schema))?;
+        let right = planner.create_expr(expr.right.as_ref().unwrap(), 
input_schema)?;
+
+        Ok(Arc::new(LikeExpr::new(false, false, left, right)))
+    }
+}
+
+/// Builder for Rlike (regex like) expressions
+pub struct RlikeBuilder;
+
+impl ExpressionBuilder for RlikeBuilder {
+    fn build(
+        &self,
+        spark_expr: &Expr,
+        input_schema: SchemaRef,
+        planner: &PhysicalPlanner,
+    ) -> Result<Arc<dyn PhysicalExpr>, ExecutionError> {
+        let expr = extract_expr!(spark_expr, Rlike);
+        let left = planner.create_expr(expr.left.as_ref().unwrap(), 
Arc::clone(&input_schema))?;
+        let right = planner.create_expr(expr.right.as_ref().unwrap(), 
input_schema)?;
+
+        match right.as_any().downcast_ref::<Literal>().unwrap().value() {
+            ScalarValue::Utf8(Some(pattern)) => 
Ok(Arc::new(RLike::try_new(left, pattern)?)),
+            _ => Err(ExecutionError::GeneralError(
+                "RLike only supports scalar patterns".to_string(),
+            )),
+        }
+    }
+}
diff --git a/native/core/src/execution/planner.rs 
b/native/core/src/execution/planner.rs
index 67b2523be..a019823b1 100644
--- a/native/core/src/execution/planner.rs
+++ b/native/core/src/execution/planner.rs
@@ -52,7 +52,7 @@ use datafusion::{
     logical_expr::Operator as DataFusionOperator,
     physical_expr::{
         expressions::{
-            in_list, BinaryExpr, CaseExpr, CastExpr, Column, IsNullExpr, 
LikeExpr,
+            in_list, BinaryExpr, CaseExpr, CastExpr, Column, IsNullExpr,
             Literal as DataFusionLiteral,
         },
         PhysicalExpr, PhysicalSortExpr, ScalarFunctionExpr,
@@ -124,9 +124,9 @@ use datafusion_comet_proto::{
 use 
datafusion_comet_spark_expr::monotonically_increasing_id::MonotonicallyIncreasingId;
 use datafusion_comet_spark_expr::{
     ArrayInsert, Avg, AvgDecimal, Cast, CheckOverflow, Correlation, 
Covariance, CreateNamedStruct,
-    GetArrayStructFields, GetStructField, IfExpr, ListExtract, 
NormalizeNaNAndZero, RLike,
-    RandExpr, RandnExpr, SparkCastOptions, Stddev, SubstringExpr, SumDecimal, 
TimestampTruncExpr,
-    ToJson, UnboundColumn, Variance,
+    GetArrayStructFields, GetStructField, IfExpr, ListExtract, 
NormalizeNaNAndZero, RandExpr,
+    RandnExpr, SparkCastOptions, Stddev, SumDecimal, TimestampTruncExpr, 
ToJson, UnboundColumn,
+    Variance,
 };
 use itertools::Itertools;
 use jni::objects::GlobalRef;
@@ -433,39 +433,6 @@ impl PhysicalPlanner {
 
                 Ok(Arc::new(TimestampTruncExpr::new(child, format, timezone)))
             }
-            ExprStruct::Substring(expr) => {
-                let child = self.create_expr(expr.child.as_ref().unwrap(), 
input_schema)?;
-                // Spark Substring's start is 1-based when start > 0
-                let start = expr.start - i32::from(expr.start > 0);
-                // substring negative len is treated as 0 in Spark
-                let len = max(expr.len, 0);
-
-                Ok(Arc::new(SubstringExpr::new(
-                    child,
-                    start as i64,
-                    len as u64,
-                )))
-            }
-            ExprStruct::Like(expr) => {
-                let left =
-                    self.create_expr(expr.left.as_ref().unwrap(), 
Arc::clone(&input_schema))?;
-                let right = self.create_expr(expr.right.as_ref().unwrap(), 
input_schema)?;
-
-                Ok(Arc::new(LikeExpr::new(false, false, left, right)))
-            }
-            ExprStruct::Rlike(expr) => {
-                let left =
-                    self.create_expr(expr.left.as_ref().unwrap(), 
Arc::clone(&input_schema))?;
-                let right = self.create_expr(expr.right.as_ref().unwrap(), 
input_schema)?;
-                match 
right.as_any().downcast_ref::<Literal>().unwrap().value() {
-                    ScalarValue::Utf8(Some(pattern)) => {
-                        Ok(Arc::new(RLike::try_new(left, pattern)?))
-                    }
-                    _ => Err(GeneralError(
-                        "RLike only supports scalar patterns".to_string(),
-                    )),
-                }
-            }
             ExprStruct::CheckOverflow(expr) => {
                 let child = self.create_expr(expr.child.as_ref().unwrap(), 
input_schema)?;
                 let data_type = 
to_arrow_datatype(expr.datatype.as_ref().unwrap());
diff --git a/native/core/src/execution/planner/expression_registry.rs 
b/native/core/src/execution/planner/expression_registry.rs
index 227484ca8..3321f6118 100644
--- a/native/core/src/execution/planner/expression_registry.rs
+++ b/native/core/src/execution/planner/expression_registry.rs
@@ -177,8 +177,10 @@ impl ExpressionRegistry {
         // Register null check expressions
         self.register_null_check_expressions();
 
+        // Register string expressions
+        self.register_string_expressions();
+
         // TODO: Register other expression categories in future phases
-        // self.register_string_expressions();
         // self.register_temporal_expressions();
         // etc.
     }
@@ -269,6 +271,18 @@ impl ExpressionRegistry {
             .insert(ExpressionType::IsNotNull, Box::new(IsNotNullBuilder));
     }
 
+    /// Register string expression builders
+    fn register_string_expressions(&mut self) {
+        use crate::execution::expressions::strings::*;
+
+        self.builders
+            .insert(ExpressionType::Substring, Box::new(SubstringBuilder));
+        self.builders
+            .insert(ExpressionType::Like, Box::new(LikeBuilder));
+        self.builders
+            .insert(ExpressionType::Rlike, Box::new(RlikeBuilder));
+    }
+
     /// Extract expression type from Spark protobuf expression
     fn get_expression_type(spark_expr: &Expr) -> Result<ExpressionType, 
ExecutionError> {
         match spark_expr.expr_struct.as_ref() {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to