[ https://issues.apache.org/jira/browse/SPARK-34723?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17368894#comment-17368894 ]
Andrew Olson commented on SPARK-34723: -------------------------------------- In case it might be helpful to anyone, the compilation failure's stack trace looks something like this. {noformat} [main] ERROR org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator - failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 32, Column 84: IDENTIFIER expected instead of '[' org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 32, Column 84: IDENTIFIER expected instead of '[' at org.codehaus.janino.TokenStreamImpl.read(TokenStreamImpl.java:196) at org.codehaus.janino.Parser.read(Parser.java:3705) at org.codehaus.janino.Parser.parseQualifiedIdentifier(Parser.java:446) at org.codehaus.janino.Parser.parseReferenceType(Parser.java:2569) at org.codehaus.janino.Parser.parseType(Parser.java:2549) at org.codehaus.janino.Parser.parseFormalParameter(Parser.java:1688) at org.codehaus.janino.Parser.parseFormalParameterList(Parser.java:1639) at org.codehaus.janino.Parser.parseFormalParameters(Parser.java:1620) at org.codehaus.janino.Parser.parseMethodDeclarationRest(Parser.java:1518) at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:1028) at org.codehaus.janino.Parser.parseClassBody(Parser.java:841) at org.codehaus.janino.Parser.parseClassDeclarationRest(Parser.java:736) at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:941) at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:234) at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:205) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1403) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1500) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1497) at org.sparkproject.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) at org.sparkproject.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) at org.sparkproject.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) at org.sparkproject.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) at org.sparkproject.guava.cache.LocalCache.get(LocalCache.java:4000) at org.sparkproject.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) at org.sparkproject.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1351) at org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:721) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:720) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.ProjectExec.doExecute(basicPhysicalOperators.scala:92) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.SortExec.doExecute(SortExec.scala:112) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:184) at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:188) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106) at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:132) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:131) at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:989) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:989) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:438) at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:415) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:293) at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:874) [main] WARN org.apache.spark.sql.execution.WholeStageCodegenExec - Whole-stage codegen disabled for plan ... {noformat} Referenced line of generated code that doesn't compile: {noformat} private UTF8String project_subExpr_1(boolean scan_isNull_2, boolean scan_isNull_1, [B scan_value_1, org.apache.spark.unsafe.types.UTF8String scan_value_2) { {noformat} Corrected in Spark 3.1.2, it becomes: {noformat} private UTF8String project_subExpr_1(org.apache.spark.unsafe.types.UTF8String scan_value_2, boolean scan_isNull_2, boolean scan_isNull_1, byte[] scan_value_1) { {noformat} > Correct parameter type for subexpression elimination under whole-stage > ---------------------------------------------------------------------- > > Key: SPARK-34723 > URL: https://issues.apache.org/jira/browse/SPARK-34723 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.0.2, 3.1.1, 3.2.0 > Reporter: L. C. Hsieh > Assignee: L. C. Hsieh > Priority: Major > Fix For: 3.0.3, 3.1.2, 3.2.0 > > > There is a bug in subexpression elimination under wholestage codegen if the > parameter type is byte array. -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org