HyukjinKwon commented on PR #46649: URL: https://github.com/apache/spark/pull/46649#issuecomment-2118744355
For a bit of more context, the test fails as below: ``` org.apache.spark.sql.AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "raise_error(USER_RAISED_EXCEPTION, map(errorMessage, 'aa' collate UTF8_BINARY_LCASE))" due to data type mismatch: The second parameter requires the "MAP<STRING, STRING>" type, however "map(errorMessage, 'aa' collate UTF8_BINARY_LCASE)" has the type "MAP<STRING, STRING COLLATE UTF8_BINARY_LCASE>". SQLSTATE: 42K09; line 1 pos 7; 'Project [unresolvedalias(raise_error(cast(USER_RAISED_EXCEPTION as string collate UTF8_BINARY_LCASE), map(errorMessage, aa), NullType))] +- OneRowRelation at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:73) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7(CheckAnalysis.scala:315) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7$adapted(CheckAnalysis.scala:302) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:244) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:243) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:243) at scala.collection.immutable.Vector.foreach(Vector.scala:2124) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:243) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$6(CheckAnalysis.scala:302) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$6$adapted(CheckAnalysis.scala:302) at scala.collection.immutable.List.foreach(List.scala:334) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$2(CheckAnalysis.scala:302) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$2$adapted(CheckAnalysis.scala:216) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:244) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis0(CheckAnalysis.scala:216) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis0$(CheckAnalysis.scala:198) at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis0(Analyzer.scala:192) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:190) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:161) at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:192) at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:214) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:393) at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:212) at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:92) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:225) at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:599) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:225) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:923) at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:224) at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:92) at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:89) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:73) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$3(Dataset.scala:118) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:923) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:115) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:660) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:923) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:651) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:681) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.base/java.lang.reflect.Method.invoke(Method.java:568) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) at py4j.ClientServerConnection.run(ClientServerConnection.java:106) at java.base/java.lang.Thread.run(Thread.java:840) ``` because it fails to add a cast from `MapType(StringType, collated StringType)` to `MapType(StringType, StringType)`. For ANSI, the cast is added here: https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala#L206-L211 cc @gengliangwang too -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org