cloud-fan commented on code in PR #55219:
URL: https://github.com/apache/spark/pull/55219#discussion_r3124152825


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParameterHandler.scala:
##########
@@ -107,14 +109,37 @@ object ParameterHandler {
    * @param expr The expression to convert (must be a Literal)
    * @return SQL string representation
    */
-  private def convertToSql(expr: Expression): String = expr match {
-    case lit: Literal => lit.sql
-    case other =>
-      throw new IllegalArgumentException(
-        s"ParameterHandler only accepts resolved Literal expressions. " +
-        s"Received: ${other.getClass.getSimpleName}. " +
-        s"All parameters must be resolved using 
SparkSession.resolveAndValidateParameters " +
-        s"before being passed to the pre-parser.")
+  private def convertToSql(expr: Expression): String = {
+    // Converts an expression to its SQL representation. If the expression's 
type contains collated
+    // types, strips collations from nested literals and wraps the whole 
expression in
+    // CAST to preserve the collation with implicit strength. Without this, 
Literal.sql
+    // produces `'value' COLLATE collationName` which re-parses with explicit 
strength.
+    def toSqlWithImplicitCollation(e: Expression): String = {
+      if (!DataTypeUtils.hasNonDefaultStringCharOrVarcharType(e.dataType)) {
+        e.sql
+      } else {
+        val stripped = e.transform {
+          case lit: Literal
+              if 
DataTypeUtils.hasNonDefaultStringCharOrVarcharType(lit.dataType) =>
+            Literal.create(
+              lit.value, 
SchemaUtils.replaceCollatedStringWithString(lit.dataType))
+        }
+        s"CAST(${stripped.sql} AS ${e.dataType.sql})"
+      }
+    }
+
+    expr match {
+      case lit: Literal if lit.value == null =>
+        lit.sql

Review Comment:
   The null branch isn't really a bypass: for a typed null like `Literal(null, 
StringType(X))`, `lit.sql` itself emits `CAST(NULL AS STRING COLLATE X)` (see 
`Literal.sql`'s `case _ if value == null => s"CAST(NULL AS ${dataType.sql})"`). 
That CAST's child is a NullType literal, so the `Cast` base case in 
`CollationTypeCoercion` returns Default strength — which is the intended 
behavior. For an untyped `Literal(null, NullType)`, `lit.sql` returns `"NULL"` 
→ NullType literal → also Default.
   
   Consider adding a one-liner above this case noting why delegating to 
`lit.sql` already yields Default strength — it saves the next reader the 
round-trip through `Literal.sql` + `collationStrengthBaseCases`.



##########
sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala:
##########
@@ -2724,4 +2725,395 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
     }
   }
 
+
+  test("execute immediate parameter with explicit COLLATE has implicit 
strength") {
+    collations.foreach { collation =>
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT COLLATION(? || "world" COLLATE $collation)'
+             | USING 'hello' COLLATE UNICODE""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT COLLATION(? || "world" COLLATE $collation)'
+             | USING 'hello' COLLATE UTF8_LCASE""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE 'SELECT ? = "HELLO" COLLATE $collation'
+             | USING 'hello' COLLATE UTF8_LCASE""".stripMargin),
+        Row(collation == "UTF8_LCASE" || collation == "UNICODE_CI"))
+    }
+  }
+
+  test("execute immediate parameter without explicit COLLATE") {
+    checkAnswer(
+      sql(
+        """EXECUTE IMMEDIATE 'SELECT COLLATION(? || "world")'
+          | USING 'hello'""".stripMargin),
+      Row(s"${fullyQualifiedPrefix}UTF8_BINARY"))
+
+    collations.foreach { collation =>
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT COLLATION(? || "world" COLLATE $collation)'
+             | USING 'hello'""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+    }
+  }
+
+  test("execute immediate parameter implicit vs column collation") {
+    withTable("t") {
+      sql(
+        """CREATE TABLE t (
+          |  lcase_col STRING COLLATE UTF8_LCASE,
+          |  unicode_col STRING COLLATE UNICODE
+          |) USING parquet""".stripMargin)
+      sql("INSERT INTO t VALUES ('hello', 'hello')")
+
+      checkAnswer(
+        sql(
+          """EXECUTE IMMEDIATE
+            | 'SELECT ? = lcase_col FROM t'
+            | USING 'hello' COLLATE UTF8_LCASE""".stripMargin),
+        Row(true))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """EXECUTE IMMEDIATE
+              | 'SELECT ? = unicode_col FROM t'
+              | USING 'hello' COLLATE UTF8_LCASE""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = unicode_col)\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 21, "? = unicode_col")))
+    }
+  }
+
+  test("execute immediate complex type parameter collation and strength") {
+    withTable("t") {
+      sql(
+        """CREATE TABLE t (
+          |  lcase_col STRING COLLATE UTF8_LCASE,
+          |  unicode_col STRING COLLATE UNICODE
+          |) USING parquet""".stripMargin)
+      sql("INSERT INTO t VALUES ('hello', 'hello')")
+
+      checkAnswer(
+        sql(
+          """EXECUTE IMMEDIATE
+            | 'SELECT ?[0] = lcase_col FROM t'
+            | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin),
+        Row(true))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """EXECUTE IMMEDIATE
+              | 'SELECT ?[0] = unicode_col FROM t'
+              | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(array(hello)[0] = unicode_col)\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 24, "?[0] = 
unicode_col")))
+
+      checkAnswer(
+        sql(
+          """EXECUTE IMMEDIATE
+            | 'SELECT element_at(?, 1) = lcase_col FROM t'
+            | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin),
+        Row(true))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """EXECUTE IMMEDIATE
+              | 'SELECT element_at(?, 1) = unicode_col FROM t'
+              | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(element_at(array(hello), 1) = unicode_col)\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 36,
+            "element_at(?, 1) = unicode_col")))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT element_at(?, "key") = lcase_col FROM t'
+             | USING MAP('key', 'hello' COLLATE UTF8_LCASE)""".stripMargin),
+        Row(true))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            s"""EXECUTE IMMEDIATE
+               | 'SELECT element_at(?, "key") = unicode_col FROM t'
+               | USING MAP('key', 'hello' COLLATE UTF8_LCASE)""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(element_at(map(key, hello), key) = unicode_col)\""),
+        queryContext = Array(
+          ExpectedContext(
+            "EXECUTE IMMEDIATE", "", 7, 40,
+            """element_at(?, "key") = unicode_col""")))
+
+      checkAnswer(
+        sql(
+          """EXECUTE IMMEDIATE
+            | 'SELECT ?.f1 = lcase_col FROM t'
+            | USING NAMED_STRUCT('f1', 'hello' COLLATE 
UTF8_LCASE)""".stripMargin),
+        Row(true))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """EXECUTE IMMEDIATE
+              | 'SELECT ?.f1 = unicode_col FROM t'
+              | USING NAMED_STRUCT('f1', 'hello' COLLATE 
UTF8_LCASE)""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(named_struct(f1, hello).f1 = unicode_col)\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 24, "?.f1 = 
unicode_col")))
+    }
+  }
+
+  test("execute immediate complex type parameter with explicit COLLATE") {
+    collations.foreach { collation =>
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE 'SELECT COLLATION(?[0])'
+             | USING ARRAY('hello' COLLATE $collation)""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE 'SELECT COLLATION(element_at(?, "value"))'
+             | USING MAP('value', 'hello' COLLATE $collation)""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE 'SELECT COLLATION(?.f1)'
+             | USING NAMED_STRUCT('f1', 'hello' COLLATE 
$collation)""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+    }
+  }
+
+  test("execute immediate variable parameter preserves collation") {
+    collations.foreach { collation =>
+      withSessionVariable("v1") {
+        sql(s"DECLARE VARIABLE v1 STRING COLLATE $collation DEFAULT 'hello'")
+        checkAnswer(
+          sql("EXECUTE IMMEDIATE 'SELECT COLLATION(?)' USING v1"),
+          Row(s"$fullyQualifiedPrefix$collation"))
+      }
+    }
+  }
+
+  test("execute immediate variable parameter has implicit strength") {
+    collations.foreach { collation =>
+      withSessionVariable("v1") {
+        sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE DEFAULT 'hello'")
+        checkAnswer(
+          sql(
+            s"""EXECUTE IMMEDIATE 'SELECT ? = "HELLO" COLLATE $collation'
+               | USING v1""".stripMargin),
+          Row(collation == "UTF8_LCASE" || collation == "UNICODE_CI"))
+      }
+    }
+  }
+
+  test("execute immediate variable parameter implicit vs column collation") {
+    withTable("t") {
+      sql(
+        """CREATE TABLE t (
+          |  lcase_col STRING COLLATE UTF8_LCASE,
+          |  unicode_col STRING COLLATE UNICODE
+          |) USING parquet""".stripMargin)
+      sql("INSERT INTO t VALUES ('hello', 'hello')")
+
+      withSessionVariable("v1") {
+        sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE DEFAULT 'hello'")
+        checkAnswer(
+          sql("EXECUTE IMMEDIATE 'SELECT ? = lcase_col FROM t' USING v1"),
+          Row(true))
+
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("EXECUTE IMMEDIATE 'SELECT ? = unicode_col FROM t' USING v1")
+          },
+          condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+          parameters = Map("expr" ->
+            "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = unicode_col)\""),
+          queryContext = Array(
+            ExpectedContext("EXECUTE IMMEDIATE", "", 7, 21, "? = 
unicode_col")))
+      }
+    }
+  }
+
+  test("execute immediate two parameters with different collations") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          """EXECUTE IMMEDIATE 'SELECT ? = ?'
+            | USING 'hello' COLLATE UTF8_LCASE, 'hello' COLLATE 
UNICODE""".stripMargin)
+      },
+      condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+      parameters = Map("expr" ->
+        "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = CAST(hello AS STRING 
COLLATE UNICODE))\""),
+      queryContext = Array(
+        ExpectedContext("EXECUTE IMMEDIATE", "", 7, 11, "? = ?")))
+
+    withSessionVariable("v1", "v2") {
+      sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE DEFAULT 'hello'")
+      sql("DECLARE VARIABLE v2 STRING COLLATE UNICODE DEFAULT 'hello'")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("EXECUTE IMMEDIATE 'SELECT ? = ?' USING v1, v2")
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = CAST(hello AS STRING 
COLLATE UNICODE))\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 11, "? = ?")))
+    }
+
+    withSessionVariable("v1") {
+      sql("DECLARE VARIABLE v1 STRING COLLATE UNICODE DEFAULT 'hello'")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """EXECUTE IMMEDIATE 'SELECT ? = ?'
+              | USING v1, 'hello' COLLATE UTF8_LCASE""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(CAST(hello AS STRING COLLATE UNICODE) = CAST(hello AS STRING 
COLLATE UTF8_LCASE))\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 11, "? = ?")))
+    }
+  }
+
+  test("execute immediate null parameter with collation") {
+    checkAnswer(
+      sql(
+        """EXECUTE IMMEDIATE 'SELECT COLLATION(COALESCE(?, "hello"))'
+          | USING NULL""".stripMargin),
+      Row(s"${fullyQualifiedPrefix}UTF8_BINARY"))
+
+    checkAnswer(
+      sql(
+        """EXECUTE IMMEDIATE 'SELECT COALESCE(?, "hello") = "hello"'
+          | USING NULL""".stripMargin),
+      Row(true))
+
+    withSessionVariable("v1") {
+      sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE")
+      checkAnswer(
+        sql("EXECUTE IMMEDIATE 'SELECT ?, COLLATION(?)' USING v1, v1"),
+        Row(null, s"${fullyQualifiedPrefix}UTF8_LCASE"))
+    }
+
+    checkAnswer(
+      sql(
+        """EXECUTE IMMEDIATE 'SELECT COLLATION(COALESCE(?, "hello"))'
+          | USING CAST(NULL AS STRING COLLATE UNICODE)""".stripMargin),
+      Row("null"))

Review Comment:
   `Row("null")` looks like a typo but is actually correct: `COLLATION()` 
returns the fully-qualified collation name, and `IndeterminateCollation` in 
`CollationFactory.java` has name `"null"`. Here both sides of the COALESCE have 
Default strength (CAST(NULL AS STRING COLLATE UNICODE) via NullType child; 
literal `"hello"`), with different types → 
`getWinningStringType.handleMismatch()` returns `Indeterminate`. Consider a 
brief inline comment so the next reader doesn't double-take.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to