ilicmarkodb commented on code in PR #55219:
URL: https://github.com/apache/spark/pull/55219#discussion_r3160578242
##########
sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala:
##########
@@ -2724,4 +2725,395 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
}
}
+
+ test("execute immediate parameter with explicit COLLATE has implicit
strength") {
+ collations.foreach { collation =>
+ checkAnswer(
+ sql(
+ s"""EXECUTE IMMEDIATE
+ | 'SELECT COLLATION(? || "world" COLLATE $collation)'
+ | USING 'hello' COLLATE UNICODE""".stripMargin),
+ Row(s"$fullyQualifiedPrefix$collation"))
+
+ checkAnswer(
+ sql(
+ s"""EXECUTE IMMEDIATE
+ | 'SELECT COLLATION(? || "world" COLLATE $collation)'
+ | USING 'hello' COLLATE UTF8_LCASE""".stripMargin),
+ Row(s"$fullyQualifiedPrefix$collation"))
+
+ checkAnswer(
+ sql(
+ s"""EXECUTE IMMEDIATE 'SELECT ? = "HELLO" COLLATE $collation'
+ | USING 'hello' COLLATE UTF8_LCASE""".stripMargin),
+ Row(collation == "UTF8_LCASE" || collation == "UNICODE_CI"))
+ }
+ }
+
+ test("execute immediate parameter without explicit COLLATE") {
+ checkAnswer(
+ sql(
+ """EXECUTE IMMEDIATE 'SELECT COLLATION(? || "world")'
+ | USING 'hello'""".stripMargin),
+ Row(s"${fullyQualifiedPrefix}UTF8_BINARY"))
+
+ collations.foreach { collation =>
+ checkAnswer(
+ sql(
+ s"""EXECUTE IMMEDIATE
+ | 'SELECT COLLATION(? || "world" COLLATE $collation)'
+ | USING 'hello'""".stripMargin),
+ Row(s"$fullyQualifiedPrefix$collation"))
+ }
+ }
+
+ test("execute immediate parameter implicit vs column collation") {
+ withTable("t") {
+ sql(
+ """CREATE TABLE t (
+ | lcase_col STRING COLLATE UTF8_LCASE,
+ | unicode_col STRING COLLATE UNICODE
+ |) USING parquet""".stripMargin)
+ sql("INSERT INTO t VALUES ('hello', 'hello')")
+
+ checkAnswer(
+ sql(
+ """EXECUTE IMMEDIATE
+ | 'SELECT ? = lcase_col FROM t'
+ | USING 'hello' COLLATE UTF8_LCASE""".stripMargin),
+ Row(true))
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(
+ """EXECUTE IMMEDIATE
+ | 'SELECT ? = unicode_col FROM t'
+ | USING 'hello' COLLATE UTF8_LCASE""".stripMargin)
+ },
+ condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+ parameters = Map("expr" ->
+ "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = unicode_col)\""),
+ queryContext = Array(
+ ExpectedContext("EXECUTE IMMEDIATE", "", 7, 21, "? = unicode_col")))
+ }
+ }
+
+ test("execute immediate complex type parameter collation and strength") {
+ withTable("t") {
+ sql(
+ """CREATE TABLE t (
+ | lcase_col STRING COLLATE UTF8_LCASE,
+ | unicode_col STRING COLLATE UNICODE
+ |) USING parquet""".stripMargin)
+ sql("INSERT INTO t VALUES ('hello', 'hello')")
+
+ checkAnswer(
+ sql(
+ """EXECUTE IMMEDIATE
+ | 'SELECT ?[0] = lcase_col FROM t'
+ | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin),
+ Row(true))
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(
+ """EXECUTE IMMEDIATE
+ | 'SELECT ?[0] = unicode_col FROM t'
+ | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin)
+ },
+ condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+ parameters = Map("expr" ->
+ "\"(array(hello)[0] = unicode_col)\""),
+ queryContext = Array(
+ ExpectedContext("EXECUTE IMMEDIATE", "", 7, 24, "?[0] =
unicode_col")))
+
+ checkAnswer(
+ sql(
+ """EXECUTE IMMEDIATE
+ | 'SELECT element_at(?, 1) = lcase_col FROM t'
+ | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin),
+ Row(true))
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(
+ """EXECUTE IMMEDIATE
+ | 'SELECT element_at(?, 1) = unicode_col FROM t'
+ | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin)
+ },
+ condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+ parameters = Map("expr" ->
+ "\"(element_at(array(hello), 1) = unicode_col)\""),
+ queryContext = Array(
+ ExpectedContext("EXECUTE IMMEDIATE", "", 7, 36,
+ "element_at(?, 1) = unicode_col")))
+
+ checkAnswer(
+ sql(
+ s"""EXECUTE IMMEDIATE
+ | 'SELECT element_at(?, "key") = lcase_col FROM t'
+ | USING MAP('key', 'hello' COLLATE UTF8_LCASE)""".stripMargin),
+ Row(true))
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(
+ s"""EXECUTE IMMEDIATE
+ | 'SELECT element_at(?, "key") = unicode_col FROM t'
+ | USING MAP('key', 'hello' COLLATE UTF8_LCASE)""".stripMargin)
+ },
+ condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+ parameters = Map("expr" ->
+ "\"(element_at(map(key, hello), key) = unicode_col)\""),
+ queryContext = Array(
+ ExpectedContext(
+ "EXECUTE IMMEDIATE", "", 7, 40,
+ """element_at(?, "key") = unicode_col""")))
+
+ checkAnswer(
+ sql(
+ """EXECUTE IMMEDIATE
+ | 'SELECT ?.f1 = lcase_col FROM t'
+ | USING NAMED_STRUCT('f1', 'hello' COLLATE
UTF8_LCASE)""".stripMargin),
+ Row(true))
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(
+ """EXECUTE IMMEDIATE
+ | 'SELECT ?.f1 = unicode_col FROM t'
+ | USING NAMED_STRUCT('f1', 'hello' COLLATE
UTF8_LCASE)""".stripMargin)
+ },
+ condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+ parameters = Map("expr" ->
+ "\"(named_struct(f1, hello).f1 = unicode_col)\""),
+ queryContext = Array(
+ ExpectedContext("EXECUTE IMMEDIATE", "", 7, 24, "?.f1 =
unicode_col")))
+ }
+ }
+
+ test("execute immediate complex type parameter with explicit COLLATE") {
+ collations.foreach { collation =>
+ checkAnswer(
+ sql(
+ s"""EXECUTE IMMEDIATE 'SELECT COLLATION(?[0])'
+ | USING ARRAY('hello' COLLATE $collation)""".stripMargin),
+ Row(s"$fullyQualifiedPrefix$collation"))
+
+ checkAnswer(
+ sql(
+ s"""EXECUTE IMMEDIATE 'SELECT COLLATION(element_at(?, "value"))'
+ | USING MAP('value', 'hello' COLLATE $collation)""".stripMargin),
+ Row(s"$fullyQualifiedPrefix$collation"))
+
+ checkAnswer(
+ sql(
+ s"""EXECUTE IMMEDIATE 'SELECT COLLATION(?.f1)'
+ | USING NAMED_STRUCT('f1', 'hello' COLLATE
$collation)""".stripMargin),
+ Row(s"$fullyQualifiedPrefix$collation"))
+ }
+ }
+
+ test("execute immediate variable parameter preserves collation") {
+ collations.foreach { collation =>
+ withSessionVariable("v1") {
+ sql(s"DECLARE VARIABLE v1 STRING COLLATE $collation DEFAULT 'hello'")
+ checkAnswer(
+ sql("EXECUTE IMMEDIATE 'SELECT COLLATION(?)' USING v1"),
+ Row(s"$fullyQualifiedPrefix$collation"))
+ }
+ }
+ }
+
+ test("execute immediate variable parameter has implicit strength") {
+ collations.foreach { collation =>
+ withSessionVariable("v1") {
+ sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE DEFAULT 'hello'")
+ checkAnswer(
+ sql(
+ s"""EXECUTE IMMEDIATE 'SELECT ? = "HELLO" COLLATE $collation'
+ | USING v1""".stripMargin),
+ Row(collation == "UTF8_LCASE" || collation == "UNICODE_CI"))
+ }
+ }
+ }
+
+ test("execute immediate variable parameter implicit vs column collation") {
+ withTable("t") {
+ sql(
+ """CREATE TABLE t (
+ | lcase_col STRING COLLATE UTF8_LCASE,
+ | unicode_col STRING COLLATE UNICODE
+ |) USING parquet""".stripMargin)
+ sql("INSERT INTO t VALUES ('hello', 'hello')")
+
+ withSessionVariable("v1") {
+ sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE DEFAULT 'hello'")
+ checkAnswer(
+ sql("EXECUTE IMMEDIATE 'SELECT ? = lcase_col FROM t' USING v1"),
+ Row(true))
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql("EXECUTE IMMEDIATE 'SELECT ? = unicode_col FROM t' USING v1")
+ },
+ condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+ parameters = Map("expr" ->
+ "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = unicode_col)\""),
+ queryContext = Array(
+ ExpectedContext("EXECUTE IMMEDIATE", "", 7, 21, "? =
unicode_col")))
+ }
+ }
+ }
+
+ test("execute immediate two parameters with different collations") {
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(
+ """EXECUTE IMMEDIATE 'SELECT ? = ?'
+ | USING 'hello' COLLATE UTF8_LCASE, 'hello' COLLATE
UNICODE""".stripMargin)
+ },
+ condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+ parameters = Map("expr" ->
+ "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = CAST(hello AS STRING
COLLATE UNICODE))\""),
+ queryContext = Array(
+ ExpectedContext("EXECUTE IMMEDIATE", "", 7, 11, "? = ?")))
+
+ withSessionVariable("v1", "v2") {
+ sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE DEFAULT 'hello'")
+ sql("DECLARE VARIABLE v2 STRING COLLATE UNICODE DEFAULT 'hello'")
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql("EXECUTE IMMEDIATE 'SELECT ? = ?' USING v1, v2")
+ },
+ condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+ parameters = Map("expr" ->
+ "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = CAST(hello AS STRING
COLLATE UNICODE))\""),
+ queryContext = Array(
+ ExpectedContext("EXECUTE IMMEDIATE", "", 7, 11, "? = ?")))
+ }
+
+ withSessionVariable("v1") {
+ sql("DECLARE VARIABLE v1 STRING COLLATE UNICODE DEFAULT 'hello'")
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(
+ """EXECUTE IMMEDIATE 'SELECT ? = ?'
+ | USING v1, 'hello' COLLATE UTF8_LCASE""".stripMargin)
+ },
+ condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+ parameters = Map("expr" ->
+ "\"(CAST(hello AS STRING COLLATE UNICODE) = CAST(hello AS STRING
COLLATE UTF8_LCASE))\""),
+ queryContext = Array(
+ ExpectedContext("EXECUTE IMMEDIATE", "", 7, 11, "? = ?")))
+ }
+ }
+
+ test("execute immediate null parameter with collation") {
+ checkAnswer(
+ sql(
+ """EXECUTE IMMEDIATE 'SELECT COLLATION(COALESCE(?, "hello"))'
+ | USING NULL""".stripMargin),
+ Row(s"${fullyQualifiedPrefix}UTF8_BINARY"))
+
+ checkAnswer(
+ sql(
+ """EXECUTE IMMEDIATE 'SELECT COALESCE(?, "hello") = "hello"'
+ | USING NULL""".stripMargin),
+ Row(true))
+
+ withSessionVariable("v1") {
+ sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE")
+ checkAnswer(
+ sql("EXECUTE IMMEDIATE 'SELECT ?, COLLATION(?)' USING v1, v1"),
+ Row(null, s"${fullyQualifiedPrefix}UTF8_LCASE"))
+ }
+
+ checkAnswer(
+ sql(
+ """EXECUTE IMMEDIATE 'SELECT COLLATION(COALESCE(?, "hello"))'
+ | USING CAST(NULL AS STRING COLLATE UNICODE)""".stripMargin),
+ Row("null"))
Review Comment:
done, thanks!
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParameterHandler.scala:
##########
@@ -107,14 +109,37 @@ object ParameterHandler {
* @param expr The expression to convert (must be a Literal)
* @return SQL string representation
*/
- private def convertToSql(expr: Expression): String = expr match {
- case lit: Literal => lit.sql
- case other =>
- throw new IllegalArgumentException(
- s"ParameterHandler only accepts resolved Literal expressions. " +
- s"Received: ${other.getClass.getSimpleName}. " +
- s"All parameters must be resolved using
SparkSession.resolveAndValidateParameters " +
- s"before being passed to the pre-parser.")
+ private def convertToSql(expr: Expression): String = {
+ // Converts an expression to its SQL representation. If the expression's
type contains collated
+ // types, strips collations from nested literals and wraps the whole
expression in
+ // CAST to preserve the collation with implicit strength. Without this,
Literal.sql
+ // produces `'value' COLLATE collationName` which re-parses with explicit
strength.
+ def toSqlWithImplicitCollation(e: Expression): String = {
+ if (!DataTypeUtils.hasNonDefaultStringCharOrVarcharType(e.dataType)) {
+ e.sql
+ } else {
+ val stripped = e.transform {
+ case lit: Literal
+ if
DataTypeUtils.hasNonDefaultStringCharOrVarcharType(lit.dataType) =>
+ Literal.create(
+ lit.value,
SchemaUtils.replaceCollatedStringWithString(lit.dataType))
+ }
+ s"CAST(${stripped.sql} AS ${e.dataType.sql})"
+ }
+ }
+
+ expr match {
+ case lit: Literal if lit.value == null =>
+ lit.sql
Review Comment:
done, thanks!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]