[ https://issues.apache.org/jira/browse/SPARK-30711?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Dongjoon Hyun updated SPARK-30711: ---------------------------------- Affects Version/s: 2.4.0 2.4.1 2.4.2 2.4.3 > 64KB JVM bytecode limit - janino.InternalCompilerException > ---------------------------------------------------------- > > Key: SPARK-30711 > URL: https://issues.apache.org/jira/browse/SPARK-30711 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.4.0, 2.4.1, 2.4.2, 2.4.3, 2.4.4 > Environment: Windows 10 > Spark 2.4.4 > scalaVersion 2.11.12 > JVM Oracle 1.8.0_221-b11 > Reporter: Frederik Schreiber > Priority: Major > > Exception > {code:java} > ERROR CodeGenerator: failed to compile: > org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": > Code of method "processNext()V" of class > "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4" > grows beyond 64 KBERROR CodeGenerator: failed to compile: > org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": > Code of method "processNext()V" of class > "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4" > grows beyond 64 KBorg.codehaus.janino.InternalCompilerException: Compiling > "GeneratedClass": Code of method "processNext()V" of class > "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4" > grows beyond 64 KB at > org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:382) at > org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:237) at > org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:465) > at > org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:313) > at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:235) > at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:207) at > org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80) at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1290) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1372) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1369) > at > org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) > at > org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) > at > org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) > at > org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) at > org.spark_project.guava.cache.LocalCache.get(LocalCache.java:4000) at > org.spark_project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) at > org.spark_project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1238) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:584) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:583) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3384) > at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2783) > at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2783) > at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3365) at > org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3364) at > org.apache.spark.sql.Dataset.collect(Dataset.scala:2783) at > de.sparkbug.janino.SparkJaninoBug$$anonfun$1.apply(SparkJaninoBug.scala:105) > at > de.sparkbug.janino.SparkJaninoBug$$anonfun$1.apply(SparkJaninoBug.scala:12) > at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) at > org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at > org.scalatest.Transformer.apply(Transformer.scala:22) at > org.scalatest.Transformer.apply(Transformer.scala:20) at > org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) at > org.scalatest.TestSuite$class.withFixture(TestSuite.scala:196) at > org.scalatest.FunSuite.withFixture(FunSuite.scala:1560) at > org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:183) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196) > at org.scalatest.SuperEngine.runTestImpl(Engine.scala:286) at > org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:196) at > org.scalatest.FunSuite.runTest(FunSuite.scala:1560) at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:393) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:381) > at scala.collection.immutable.List.foreach(List.scala:392) at > org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:381) at > org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:376) > at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:458) at > org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:229) at > org.scalatest.FunSuite.runTests(FunSuite.scala:1560) at > org.scalatest.Suite$class.run(Suite.scala:1124) at > org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560) > at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233) > at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233) at > org.scalatest.SuperEngine.runImpl(Engine.scala:518) at > org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:233) at > org.scalatest.FunSuite.run(FunSuite.scala:1560) at > org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45) at > org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1349) > at > org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1343) > at scala.collection.immutable.List.foreach(List.scala:392) at > org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1343) at > org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1012) > at > org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1011) > at > org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1509) > at > org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1011) > at org.scalatest.tools.Runner$.run(Runner.scala:850) at > org.scalatest.tools.Runner.run(Runner.scala) at > org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:133) > at > org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:27)Caused > by: org.codehaus.janino.InternalCompilerException: Code of method > "processNext()V" of class > "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4" > grows beyond 64 KB at > org.codehaus.janino.CodeContext.makeSpace(CodeContext.java:1009){code} > Example code: > {code:java} > package de.sparkbug.janino > import java.sql.Date > import org.apache.spark.sql.functions._ > import org.apache.spark.sql.types._ > import org.apache.spark.sql.{Row, SparkSession} > import org.scalatest.FunSuite > class SparkJaninoBug extends FunSuite { > test("test janino compile bug") { > val spark: SparkSession = SparkSession > .builder() > .appName("Test Spark App") > .master("local[*]") > .getOrCreate() > import spark.implicits._ > val br_schema = StructType(Seq( > StructField("S_ID", IntegerType, nullable = true), > StructField("D_ID", LongType, nullable = true), > StructField("TYPE", StringType, nullable = true), > StructField("RT", StringType, nullable = true), > StructField("BR_ID", StringType, nullable = true), > StructField("I_ID", DateType, nullable = true), > StructField("SG_ID", IntegerType, nullable = true), > StructField("S_ID_MAIN", IntegerType, nullable = true), > StructField("PT_ID", IntegerType, nullable = true), > StructField("C_ID", IntegerType, nullable = true), > StructField("CF_ID", IntegerType, nullable = true), > StructField("P_ID", IntegerType, nullable = true), > StructField("RT_ID", IntegerType, nullable = true), > StructField("BT_ID", IntegerType, nullable = true), > StructField("I_T", StringType, nullable = true), > StructField("A", DoubleType, nullable = true), > StructField("T_A", DoubleType, nullable = true), > StructField("B_S_DT", DateType, nullable = true), > StructField("B_E_DT", DateType, nullable = true), > StructField("B_M_DT", DateType, nullable = true), > StructField("BR_P_A", DoubleType, nullable = true), > StructField("BR_D_A", IntegerType, nullable = true), > StructField("BR_B_D", IntegerType, nullable = true), > StructField("BR_B_A", DoubleType, nullable = true) > )) > val b_data = Seq(Row(111, 2804711765L, "D", "recano", "1017888", > Date.valueOf("2019-11-20"),1,111,1,1,1,2,1,1,"",0.0,0.0,Date.valueOf("2019-10-01"),Date.valueOf("2019-10-31"),Date.valueOf("2019-10-31"),0.0,30,30,0.0)) > val df_b = spark.createDataFrame(spark.sparkContext.parallelize(b_data), > br_schema) > val schema = StructType(Seq( > StructField( "D_ID", LongType, nullable = true), > StructField( "RT_ID", IntegerType, nullable = true), > StructField( "P_ID", IntegerType, nullable = true), > StructField( "BT_ID", IntegerType, nullable = true), > StructField( "CF_ID", IntegerType, nullable = true), > StructField( "B_ID", IntegerType, nullable = true), > StructField( "S_ID", IntegerType, nullable = true), > StructField( "SG_ID", IntegerType, nullable = true), > StructField( "PT_ID", IntegerType, nullable = true), > StructField( "C_ID", IntegerType, nullable = true), > StructField( "N_C_DT", DateType, nullable = true), > StructField( "N_T_DT", DateType, nullable = true), > StructField( "A_B_D", IntegerType, nullable = true), > StructField( "B_P_E", DateType, nullable = true), > StructField( "B_P_S", DateType, nullable = true), > StructField( "A_P_A", DoubleType, nullable = true), > StructField( "A_B_1_D_A", DoubleType, nullable = true), > StructField( "A_C", IntegerType, nullable = true), > StructField( "A_D_A", DoubleType, nullable = true) > )) > val a_data = > Seq(Row(2804711813L,1,2,1,1,1,111,1,1,1,null,null,30,Date.valueOf("2019-10-31"),Date.valueOf("2019-10-01"),0.0,0.0,1,-1.0)) > val df_a = spark.createDataFrame(spark.sparkContext.parallelize(a_data), > schema) > val df = df_b > .join(df_a, List("D_ID", "RT_ID", "P_ID", "BT_ID", "CF_ID", "S_ID", > "SG_ID", "PT_ID", "C_ID"), "outer") > .withColumn("T", lit(null)) > .withColumn("B_C", lit("2")) > .withColumn("A_B_DT", to_date(concat(year(df_b("I_ID")), > month(df_b("I_ID")), lpad($"B_C",2,"0")), "yyyyMMdd")) > .withColumn("B_B_DT", date_sub($"I_ID", 6)) > .withColumn("B_P_E", when($"N_T_DT" > $"A_B_DT" > or $"N_C_DT" > $"A_B_DT", last_day($"B_P_E")).otherwise($"B_P_E")) > .withColumn("A_D_A", when($"RT_ID".isInCollection(Seq(5, 6)), > $"A_D_A").otherwise(datediff($"B_P_E", $"B_P_S") + 1)) > .withColumn("A_I_C", when($"N_C_DT".isNotNull and $"B_P_S" === > $"B_P_E", lit(true)).otherwise(lit(false))) > .withColumn("A_D_A", when($"A_I_C", lit(0)).otherwise($"A_D_A")) > .withColumn("A_D_A", when($"A_D_A" > 30, 30).otherwise($"A_D_A")) > .withColumn("A_P_A", round($"A_B_1_D_A" * $"A_D_A", 7)) > .withColumn("M_T_I", > when($"A_I_C", 5) > .when($"T".isNotNull, 6) > .when($"A_P_A".isNotNull and $"BR_P_A".isNotNull, > when(abs($"A_P_A" - $"BR_P_A") < 0.001, 1).otherwise(2)) > .when($"A_P_A".isNotNull and $"BR_P_A".isNull, 3) > .when($"A_P_A".isNull and $"BR_P_A".isNotNull, 4) > .otherwise(lit(99))) > .withColumn("D_A", when($"M_T_I" === 2, round($"BR_P_A" - $"A_P_A", > 7)).otherwise(lit(null))) > .withColumn("D_D", when($"M_T_I" === 2, round($"BR_D_A" - $"A_D_A", > 7)).otherwise(lit(null))) > .withColumn("RT_ID", when($"RT_ID".isNull, lit(99)).otherwise($"RT_ID")) > df.collect() > } > } > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org