[ 
https://issues.apache.org/jira/browse/SPARK-24481?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16504965#comment-16504965
 ] 

Andrew Conegliano commented on SPARK-24481:
-------------------------------------------

Thanks Marco.

Forgot to mention, this error doesn't happen in 2.0.2 and 2.2.0. And for 2.3.0, 
even though the error pops up, the code will still run because it disables 
wholestagecodegen to run it. The main problem is that in a spark streaming 
context, the error pops up for every message so logs fill disk very quickly.

> GeneratedIteratorForCodegenStage1 grows beyond 64 KB
> ----------------------------------------------------
>
>                 Key: SPARK-24481
>                 URL: https://issues.apache.org/jira/browse/SPARK-24481
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.3.0
>         Environment: Emr 5.13.0 and Databricks Cloud 4.0
>            Reporter: Andrew Conegliano
>            Priority: Major
>         Attachments: log4j-active(1).log
>
>
> Similar to other "grows beyond 64 KB" errors.  Happens with large case 
> statement:
> {code:java}
> import org.apache.spark.sql.functions._
> import scala.collection.mutable
> import org.apache.spark.sql.Column
> var rdd = sc.parallelize(Array("""{
> "event":
> {
> "timestamp": 1521086591110,
> "event_name": "yu",
> "page":
> {
> "page_url": "https://";,
> "page_name": "es"
> },
> "properties":
> {
> "id": "87",
> "action": "action",
> "navigate_action": "navigate_action"
> }
> }
> }
> """))
> var df = spark.read.json(rdd)
> df = 
> df.select("event.properties.id","event.timestamp","event.page.page_url","event.properties.action","event.page.page_name","event.event_name","event.properties.navigate_action")
> .toDF("id","event_time","url","action","page_name","event_name","navigation_action")
> var a = "case "
> for(i <- 1 to 300){
>   a = a + s"when action like '$i%' THEN '$i' "
> }
> a = a + " else null end as task_id"
> val expression = expr(a)
> df = df.filter("id is not null and id <> '' and event_time is not null")
> val transformationExpressions: mutable.HashMap[String, Column] = 
> mutable.HashMap(
> "action" -> expr("coalesce(action, navigation_action) as action"),
> "task_id" -> expression
> )
> for((col, expr) <- transformationExpressions)
> df = df.withColumn(col, expr)
> df = df.filter("(action is not null and action <> '') or (page_name is not 
> null and page_name <> '')")
> df.show
> {code}
>  
> Exception:
> {code:java}
> 18/06/07 01:06:34 ERROR CodeGenerator: failed to compile: 
> org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": 
> Code of method 
> "project_doConsume$(Lorg/apache/spark/sql/catalyst/expressions/GeneratedClass$GeneratedIteratorForCodegenStage1;Lorg/apache/spark/sql/catalyst/InternalRow;)V"
>  of class 
> "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1"
>  grows beyond 64 KB
> org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": 
> Code of method 
> "project_doConsume$(Lorg/apache/spark/sql/catalyst/expressions/GeneratedClass$GeneratedIteratorForCodegenStage1;Lorg/apache/spark/sql/catalyst/InternalRow;)V"
>  of class 
> "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1"
>  grows beyond 64 KB
>       at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:361)
>       at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:234)
>       at 
> org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:446)
>       at 
> org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:313)
>       at 
> org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:235)
>       at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:204)
>       at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80)
>       at 
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1444)
>       at 
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1523)
>       at 
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1520)
>       at 
> com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3522)
>       at 
> com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2315)
>       at 
> com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2278)
>       at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2193)
>       at com.google.common.cache.LocalCache.get(LocalCache.java:3932)
>       at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:3936)
>       at 
> com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4806)
>       at 
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1392)
>       at 
> org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:579)
>       at 
> org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:578)
>       at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:135)
>       at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
>       at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$3.apply(SparkPlan.scala:167)
>       at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>       at 
> org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:164)
>       at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
>       at 
> org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:61)
>       at 
> org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:70)
>       at 
> org.apache.spark.sql.execution.CollectLimitExec.executeCollectResult(limit.scala:45)
>       at 
> org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectResult(Dataset.scala:2759)
>       at 
> org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3331)
>       at 
> org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2488)
>       at 
> org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2488)
>       at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3315)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:88)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:124)
>       at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3314)
>       at org.apache.spark.sql.Dataset.head(Dataset.scala:2488)
>       at org.apache.spark.sql.Dataset.take(Dataset.scala:2702)
>       at org.apache.spark.sql.Dataset.showString(Dataset.scala:258)
>       at org.apache.spark.sql.Dataset.show(Dataset.scala:727)
>       at org.apache.spark.sql.Dataset.show(Dataset.scala:686)
>       at org.apache.spark.sql.Dataset.show(Dataset.scala:695)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:1)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:51)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:53)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:55)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw.<init>(command-687647945500165:57)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw.<init>(command-687647945500165:59)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw.<init>(command-687647945500165:61)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw.<init>(command-687647945500165:63)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$read.<init>(command-687647945500165:65)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$.<init>(command-687647945500165:69)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$.<clinit>(command-687647945500165)
>       at 
> line7b2cd01e0857498cbfa87d4dfaadb85d46.$eval$.$print$lzycompute(<notebook>:7)
>       at line7b2cd01e0857498cbfa87d4dfaadb85d46.$eval$.$print(<notebook>:6)
>       at line7b2cd01e0857498cbfa87d4dfaadb85d46.$eval.$print(<notebook>)
>       at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>       at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>       at java.lang.reflect.Method.invoke(Method.java:498)
>       at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786)
>       at 
> scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047)
>       at 
> scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638)
>       at 
> scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637)
>       at 
> scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
>       at 
> scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
>       at 
> scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637)
>       at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569)
>       at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565)
>       at 
> com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:186)
>       at 
> com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply$mcV$sp(ScalaDriverLocal.scala:189)
>       at 
> com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:189)
>       at 
> com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:189)
>       at 
> com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:500)
>       at 
> com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:456)
>       at 
> com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:189)
>       at 
> com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$3.apply(DriverLocal.scala:249)
>       at 
> com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$3.apply(DriverLocal.scala:229)
>       at 
> com.databricks.logging.UsageLogging$$anonfun$withAttributionContext$1.apply(UsageLogging.scala:188)
>       at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
>       at 
> com.databricks.logging.UsageLogging$class.withAttributionContext(UsageLogging.scala:183)
>       at 
> com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:43)
>       at 
> com.databricks.logging.UsageLogging$class.withAttributionTags(UsageLogging.scala:221)
>       at 
> com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:43)
>       at 
> com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:229)
>       at 
> com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:601)
>       at 
> com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:601)
>       at scala.util.Try$.apply(Try.scala:192)
>       at 
> com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:596)
>       at 
> com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:486)
>       at 
> com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:554)
>       at 
> com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:391)
>       at 
> com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:348)
>       at 
> com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:215)
>       at java.lang.Thread.run(Thread.java:748){code}
>  
> Log file is attached



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to