This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9241b8e8c0df [SPARK-48229][SQL] Add collation support for inputFile 
expressions
9241b8e8c0df is described below

commit 9241b8e8c0dfe35fbe1631fd440527eb72d88de8
Author: Uros Bojanic <157381213+uros...@users.noreply.github.com>
AuthorDate: Tue May 14 14:08:30 2024 +0800

    [SPARK-48229][SQL] Add collation support for inputFile expressions
    
    ### What changes were proposed in this pull request?
    Introduce collation awareness for inputFile expressions: input_file_name.
    
    ### Why are the changes needed?
    Add collation support for inputFile expressions in Spark.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, users should now be able to use collated strings within arguments for 
inputFile functions: input_file_name.
    
    ### How was this patch tested?
    E2e sql tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #46503 from uros-db/input-file-block.
    
    Authored-by: Uros Bojanic <157381213+uros...@users.noreply.github.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../spark/sql/catalyst/expressions/inputFileBlock.scala |  5 +++--
 .../apache/spark/sql/CollationSQLExpressionsSuite.scala | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
index 6cd88367aa9a..65eb995ff32f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
@@ -21,7 +21,8 @@ import org.apache.spark.rdd.InputFileBlockHolder
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
CodeGenerator, ExprCode, FalseLiteral}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.types.{DataType, LongType, StringType}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, LongType}
 import org.apache.spark.unsafe.types.UTF8String
 
 // scalastyle:off whitespace.end.of.line
@@ -39,7 +40,7 @@ case class InputFileName() extends LeafExpression with 
Nondeterministic {
 
   override def nullable: Boolean = false
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def prettyName: String = "input_file_name"
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
index dd5703d1284a..22b29154cd78 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
@@ -1275,6 +1275,23 @@ class CollationSQLExpressionsSuite
     })
   }
 
+  test("Support InputFileName expression with collation") {
+    // Supported collations
+    Seq("UTF8_BINARY", "UTF8_BINARY_LCASE", "UNICODE", 
"UNICODE_CI").foreach(collationName => {
+      val query =
+        s"""
+           |select input_file_name()
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(""))
+        val dataType = StringType(collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
   // TODO: Add more tests for other SQL expressions
 
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to