uros-db commented on code in PR #47154: URL: https://github.com/apache/spark/pull/47154#discussion_r1700144582
########## sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala: ########## @@ -1789,44 +1830,135 @@ class CollationSQLExpressionsSuite s"named_struct('f2', collate('$elt', '${t.collationId}')), 'f3', 1)").mkString(",") }.mkString(",") - val tableName = s"t_${t.collationId}_mode_nested_struct" + val tableName = s"t_${t.collationId}_mode_nested_struct1" withTable(tableName) { sql(s"CREATE TABLE ${tableName}(i STRUCT<f1: STRUCT<f2: STRING COLLATE " + t.collationId + ">, f3: INT>) USING parquet") sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd) val query = s"SELECT lower(mode(i).f1.f2) FROM ${tableName}" - if(t.collationId == "UTF8_LCASE" || - t.collationId == "unicode_ci" || - t.collationId == "unicode") { - // Cannot resolve "mode(i)" due to data type mismatch: - // Input to function mode was a complex type with strings collated on non-binary - // collations, which is not yet supported.. SQLSTATE: 42K09; line 1 pos 13; - val params = Seq(("sqlExpr", "\"mode(i)\""), - ("msg", "The input to the function 'mode' " + - "was a type of binary-unstable type that is not currently supported by mode."), - ("hint", "")).toMap - checkError( - exception = intercept[AnalysisException] { - sql(query) - }, - errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", - parameters = params, - queryContext = Array( - ExpectedContext(objectType = "", - objectName = "", - startIndex = 13, - stopIndex = 19, - fragment = "mode(i)") - ) - ) - } else { - checkAnswer(sql(query), Row(t.result)) - } + checkAnswer(sql(query), Row(t.result)) } }) } +// +// test("Support mode for string expression with collated strings in " + +// "recursively nested struct with map with collated keys") { +// case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R) +// val testCases = Seq( +// ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a -> 1}"), +// ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a -> 1}") +// ) +// testCases.foreach(t => { +// val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) => +// (0L to numRepeats).map(_ => +// s"named_struct('m1', " + +// s"map(" + +// s"collate(" + +// s"'$elt', '${t.collationId}'" + +// s"), " + +// s"1))").mkString(",") +// }.mkString(",") +// +// val tableName = s"t_${t.collationId}_mode_nested_struct1" +// withTable(tableName) { +// sql(s"CREATE TABLE ${tableName}(i STRUCT<m1: MAP<STRING COLLATE " + +// t.collationId + ", INT>>) USING parquet") +// sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd) +// val query = s"SELECT lower(cast(mode(i).m1 as string))" + +// s" FROM ${tableName}" +// if (t.collationId == "utf8_binary" || t.collationId == "unicode") { +// checkAnswer(sql(query), Row(t.result)) +// } else { +// checkError( +// exception = intercept[AnalysisException] { +// val testQuery = sql(query) +// testQuery.collect() +// }, +// errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", +// parameters = Map.apply(("sqlExpr", "\"mode(i)\""), ("msg", +// "The input to the function 'mode' includes a map with " + +// "keys and/or values which are not binary-stable." + +// " This is not yetsupported by mode."), ("hint", "")) +// ) +// } +// checkAnswer(sql(query), Row(t.result)) +// } +// }) +// } test("Support mode for string expression with collated strings in array complex type") { + case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R) + val testCases = Seq( + ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"), + ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"), + ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"), + ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b") + ) + testCases.foreach(t => { + val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) => + (0L to numRepeats).map(_ => s"array(named_struct('f2', " + + s"collate('$elt', '${t.collationId}'), 'f3', 1))").mkString(",") + }.mkString(",") + + val tableName = s"t_${t.collationId}_mode_nested_struct2" + withTable(tableName) { + sql(s"CREATE TABLE ${tableName}(" + + s"i ARRAY< STRUCT<f2: STRING COLLATE ${t.collationId}, f3: INT>>)" + + s" USING parquet") + sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd) + val query = s"SELECT lower(element_at(mode(i).f2, 1)) FROM ${tableName}" + checkAnswer(sql(query), Row(t.result)) + } + }) + } + + test("Support mode for string expression with collated strings in 3D array type") { + case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R) + val testCases = Seq( + ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"), + ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"), + ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"), + ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b") + ) + testCases.foreach(t => { + val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) => + (0L to numRepeats).map(_ => + s"array(" + + s"array(" + + s"array(" + + s"collate('$elt', '${t.collationId}')" + + s")" + + s")" + + s")").mkString(",") Review Comment: I see what you were going for here, but I think we can keep it like just: `s"array(array(array(collate('$elt', '${t.collationId}')))).mkString(",")` ########## sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala: ########## @@ -1789,44 +1830,135 @@ class CollationSQLExpressionsSuite s"named_struct('f2', collate('$elt', '${t.collationId}')), 'f3', 1)").mkString(",") }.mkString(",") - val tableName = s"t_${t.collationId}_mode_nested_struct" + val tableName = s"t_${t.collationId}_mode_nested_struct1" withTable(tableName) { sql(s"CREATE TABLE ${tableName}(i STRUCT<f1: STRUCT<f2: STRING COLLATE " + t.collationId + ">, f3: INT>) USING parquet") sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd) val query = s"SELECT lower(mode(i).f1.f2) FROM ${tableName}" - if(t.collationId == "UTF8_LCASE" || - t.collationId == "unicode_ci" || - t.collationId == "unicode") { - // Cannot resolve "mode(i)" due to data type mismatch: - // Input to function mode was a complex type with strings collated on non-binary - // collations, which is not yet supported.. SQLSTATE: 42K09; line 1 pos 13; - val params = Seq(("sqlExpr", "\"mode(i)\""), - ("msg", "The input to the function 'mode' " + - "was a type of binary-unstable type that is not currently supported by mode."), - ("hint", "")).toMap - checkError( - exception = intercept[AnalysisException] { - sql(query) - }, - errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", - parameters = params, - queryContext = Array( - ExpectedContext(objectType = "", - objectName = "", - startIndex = 13, - stopIndex = 19, - fragment = "mode(i)") - ) - ) - } else { - checkAnswer(sql(query), Row(t.result)) - } + checkAnswer(sql(query), Row(t.result)) } }) } +// +// test("Support mode for string expression with collated strings in " + +// "recursively nested struct with map with collated keys") { +// case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R) +// val testCases = Seq( +// ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a -> 1}"), +// ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a -> 1}") +// ) +// testCases.foreach(t => { +// val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) => +// (0L to numRepeats).map(_ => +// s"named_struct('m1', " + +// s"map(" + +// s"collate(" + +// s"'$elt', '${t.collationId}'" + +// s"), " + +// s"1))").mkString(",") +// }.mkString(",") +// +// val tableName = s"t_${t.collationId}_mode_nested_struct1" +// withTable(tableName) { +// sql(s"CREATE TABLE ${tableName}(i STRUCT<m1: MAP<STRING COLLATE " + +// t.collationId + ", INT>>) USING parquet") +// sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd) +// val query = s"SELECT lower(cast(mode(i).m1 as string))" + +// s" FROM ${tableName}" +// if (t.collationId == "utf8_binary" || t.collationId == "unicode") { +// checkAnswer(sql(query), Row(t.result)) +// } else { +// checkError( +// exception = intercept[AnalysisException] { +// val testQuery = sql(query) +// testQuery.collect() +// }, +// errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", +// parameters = Map.apply(("sqlExpr", "\"mode(i)\""), ("msg", +// "The input to the function 'mode' includes a map with " + +// "keys and/or values which are not binary-stable." + +// " This is not yetsupported by mode."), ("hint", "")) +// ) +// } +// checkAnswer(sql(query), Row(t.result)) +// } +// }) +// } test("Support mode for string expression with collated strings in array complex type") { + case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R) + val testCases = Seq( + ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"), + ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"), + ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"), + ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b") + ) + testCases.foreach(t => { + val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) => + (0L to numRepeats).map(_ => s"array(named_struct('f2', " + + s"collate('$elt', '${t.collationId}'), 'f3', 1))").mkString(",") + }.mkString(",") + + val tableName = s"t_${t.collationId}_mode_nested_struct2" + withTable(tableName) { + sql(s"CREATE TABLE ${tableName}(" + + s"i ARRAY< STRUCT<f2: STRING COLLATE ${t.collationId}, f3: INT>>)" + + s" USING parquet") + sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd) + val query = s"SELECT lower(element_at(mode(i).f2, 1)) FROM ${tableName}" + checkAnswer(sql(query), Row(t.result)) + } + }) + } + + test("Support mode for string expression with collated strings in 3D array type") { + case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R) + val testCases = Seq( + ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"), + ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"), + ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"), + ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b") + ) + testCases.foreach(t => { + val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) => + (0L to numRepeats).map(_ => + s"array(" + + s"array(" + + s"array(" + + s"collate('$elt', '${t.collationId}')" + + s")" + + s")" + + s")").mkString(",") + }.mkString(",") + + val tableName = s"t_${t.collationId}_mode_nested_3d_array" + withTable(tableName) { + sql(s"CREATE TABLE ${tableName}(" + + s"i ARRAY<" + + s"ARRAY<" + + s"ARRAY<" + + s"STRING COLLATE ${t.collationId}" + + s">" + + s">" + + s">)" + + s" USING parquet") Review Comment: same here -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org