This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 63e83bd30 [CORE] Add support for Spark url_decode function (#5070)
63e83bd30 is described below

commit 63e83bd30f112ae4f305b4decc087153726f94ed
Author: 高阳阳 <gyyco...@gmail.com>
AuthorDate: Fri Mar 22 20:53:45 2024 +0800

    [CORE] Add support for Spark url_decode function (#5070)
---
 .../scala/io/glutenproject/utils/CHExpressionUtil.scala   |  1 +
 .../execution/VeloxFunctionsValidateSuite.scala           | 15 +++++++++++++++
 .../io/glutenproject/expression/ExpressionConverter.scala | 14 ++++++++++++++
 .../io/glutenproject/expression/ExpressionNames.scala     |  1 +
 4 files changed, 31 insertions(+)

diff --git 
a/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
 
b/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
index 24555c05c..028e4e9e9 100644
--- 
a/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
@@ -177,6 +177,7 @@ object CHExpressionUtil {
     DATE_FROM_UNIX_DATE -> DefaultValidator(),
     MONOTONICALLY_INCREASING_ID -> DefaultValidator(),
     SPARK_PARTITION_ID -> DefaultValidator(),
+    URL_DECODE -> DefaultValidator(),
     SKEWNESS -> DefaultValidator(),
     BIT_LENGTH -> DefaultValidator()
   )
diff --git 
a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
index 1e9871a6c..ef9c80c4e 100644
--- 
a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
@@ -458,6 +458,21 @@ class VeloxFunctionsValidateSuite extends 
VeloxWholeStageTransformerSuite {
     }
   }
 
+  testWithSpecifiedSparkVersion("Test url_decode function", Some("3.4.2")) {
+    withTempPath {
+      path =>
+        Seq("https%3A%2F%2Fspark.apache.org")
+          .toDF("a")
+          .write
+          .parquet(path.getCanonicalPath)
+        spark.sparkContext.setLogLevel("info")
+        
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("url_tbl")
+        runQueryAndCompare("select url_decode(a) from url_tbl") {
+          checkOperatorMatch[ProjectExecTransformer]
+        }
+    }
+  }
+
   test("Test hex function") {
     runQueryAndCompare("SELECT hex(l_partkey), hex(l_shipmode) FROM lineitem 
limit 1") {
       checkOperatorMatch[ProjectExecTransformer]
diff --git 
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
 
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
index f8f3ead05..4de0ab142 100644
--- 
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
+++ 
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
@@ -28,6 +28,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
ExprCode}
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
 import org.apache.spark.sql.catalyst.optimizer.NormalizeNaNAndZero
 import org.apache.spark.sql.execution.{ScalarSubquery, _}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
@@ -115,6 +116,19 @@ object ExpressionConverter extends SQLConfHelper with 
Logging {
         return replaceScalaUDFWithExpressionTransformer(s, attributeSeq, 
expressionsMap)
       case _ if HiveUDFTransformer.isHiveUDF(expr) =>
         return HiveUDFTransformer.replaceWithExpressionTransformer(expr, 
attributeSeq)
+      case i: StaticInvoke =>
+        val objectName = i.staticObject.getName.stripSuffix("$")
+        if (objectName.endsWith("UrlCodec")) {
+          val child = i.arguments(0)
+          i.functionName match {
+            case "decode" =>
+              return GenericExpressionTransformer(
+                ExpressionNames.URL_DECODE,
+                child.map(
+                  replaceWithExpressionTransformerInternal(_, attributeSeq, 
expressionsMap)),
+                i)
+          }
+        }
       case _ =>
     }
 
diff --git 
a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala 
b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
index cb9e1ab71..2f3391629 100644
--- 
a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
@@ -117,6 +117,7 @@ object ExpressionNames {
 
   // URL functions
   final val PARSE_URL = "parse_url"
+  final val URL_DECODE = "url_decode"
 
   // SparkSQL Math functions
   final val ABS = "abs"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org

Reply via email to