[ https://issues.apache.org/jira/browse/SPARK-25767?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16655445#comment-16655445 ]
Thomas Brugiere commented on SPARK-25767: ----------------------------------------- Please can you provide an example? I don't see any Stream in my code > Error reported in Spark logs when using the > org.apache.spark:spark-sql_2.11:2.3.2 Java library > ---------------------------------------------------------------------------------------------- > > Key: SPARK-25767 > URL: https://issues.apache.org/jira/browse/SPARK-25767 > Project: Spark > Issue Type: Bug > Components: Java API > Affects Versions: 2.2.0, 2.3.2 > Reporter: Thomas Brugiere > Priority: Major > Attachments: fileA.csv, fileB.csv, fileC.csv > > > Hi, > Here is a bug I found using the latest version of spark-sql_2.11:2.2.0. Note > that this case was also tested with spark-sql_2.11:2.3.2 and the bug is also > present. > This issue is a duplicate of the SPARK-25582 issue that I had to close after > an accidental manipulation from another developer (was linked to a wrong PR) > You will find attached three small sample CSV files with the minimal content > to raise the bug. > Find below a reproducer code: > {code:java} > import org.apache.spark.SparkConf; > import org.apache.spark.sql.Dataset; > import org.apache.spark.sql.Row; > import org.apache.spark.sql.SparkSession; > import scala.collection.JavaConverters; > import scala.collection.Seq; > import java.util.Arrays; > public class SparkBug { > private static <T> Seq<T> arrayToSeq(T[] input) { > return > JavaConverters.asScalaIteratorConverter(Arrays.asList(input).iterator()).asScala().toSeq(); > } > public static void main(String[] args) throws Exception { > SparkConf conf = new > SparkConf().setAppName("SparkBug").setMaster("local"); > SparkSession sparkSession = > SparkSession.builder().config(conf).getOrCreate(); > Dataset<Row> df_a = sparkSession.read().option("header", > true).csv("local/fileA.csv").dropDuplicates(); > Dataset<Row> df_b = sparkSession.read().option("header", > true).csv("local/fileB.csv").dropDuplicates(); > Dataset<Row> df_c = sparkSession.read().option("header", > true).csv("local/fileC.csv").dropDuplicates(); > String[] key_join_1 = new String[]{"colA", "colB", "colC", "colD", > "colE", "colF"}; > String[] key_join_2 = new String[]{"colA", "colB", "colC", "colD", > "colE"}; > Dataset<Row> df_inventory_1 = df_a.join(df_b, arrayToSeq(key_join_1), > "left"); > Dataset<Row> df_inventory_2 = df_inventory_1.join(df_c, > arrayToSeq(key_join_2), "left"); > df_inventory_2.show(); > } > } > {code} > When running this code, I can see the exception below: > {code:java} > 18/10/18 09:25:49 ERROR CodeGenerator: failed to compile: > org.codehaus.commons.compiler.CompileException: File 'generated.java', Line > 202, Column 18: Expression "agg_isNull_28" is not an rvalue > org.codehaus.commons.compiler.CompileException: File 'generated.java', Line > 202, Column 18: Expression "agg_isNull_28" is not an rvalue > at org.codehaus.janino.UnitCompiler.compileError(UnitCompiler.java:11821) > at > org.codehaus.janino.UnitCompiler.toRvalueOrCompileException(UnitCompiler.java:7170) > at > org.codehaus.janino.UnitCompiler.getConstantValue2(UnitCompiler.java:5332) > at org.codehaus.janino.UnitCompiler.access$9400(UnitCompiler.java:212) > at > org.codehaus.janino.UnitCompiler$13$1.visitAmbiguousName(UnitCompiler.java:5287) > at org.codehaus.janino.Java$AmbiguousName.accept(Java.java:4053) > at org.codehaus.janino.UnitCompiler$13.visitLvalue(UnitCompiler.java:5284) > at org.codehaus.janino.Java$Lvalue.accept(Java.java:3977) > at > org.codehaus.janino.UnitCompiler.getConstantValue(UnitCompiler.java:5280) > at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2391) > at org.codehaus.janino.UnitCompiler.access$1900(UnitCompiler.java:212) > at > org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1474) > at > org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1466) > at org.codehaus.janino.Java$IfStatement.accept(Java.java:2926) > at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1466) > at > org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1546) > at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3075) > at > org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1336) > at > org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1309) > at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:799) > at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:958) > at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:212) > at > org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:393) > at > org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:385) > at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1286) > at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:385) > at > org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1285) > at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:825) > at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:411) > at org.codehaus.janino.UnitCompiler.access$400(UnitCompiler.java:212) > at > org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:390) > at > org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:385) > at > org.codehaus.janino.Java$PackageMemberClassDeclaration.accept(Java.java:1405) > at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:385) > at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:357) > at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:234) > at > org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:446) > at > org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:313) > at > org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:235) > at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:204) > at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1417) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1493) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1490) > at > org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) > at > org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) > at > org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) > at > org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) > at org.spark_project.guava.cache.LocalCache.get(LocalCache.java:4000) > at > org.spark_project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) > at > org.spark_project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1365) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:579) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:578) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:337) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3278) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489) > at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3259) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3258) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2489) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2703) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:254) > at org.apache.spark.sql.Dataset.show(Dataset.scala:723) > at org.apache.spark.sql.Dataset.show(Dataset.scala:682) > at org.apache.spark.sql.Dataset.show(Dataset.scala:691) > at SparkBug.main(SparkBug.java:30) > {code} > -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org