[ https://issues.apache.org/jira/browse/SPARK-24481?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Andrew Conegliano updated SPARK-24481: -------------------------------------- Description: Similar to other "grows beyond 64 KB" errors. Happens with large case statement: {code:java} import org.apache.spark.sql.functions._ import scala.collection.mutable import org.apache.spark.sql.Column var rdd = sc.parallelize(Array("""{ "event": { "timestamp": 1521086591110, "event_name": "yu", "page": { "page_url": "https://", "page_name": "es" }, "properties": { "id": "87", "action": "action", "navigate_action": "navigate_action" } } } """)) var df = spark.read.json(rdd) df = df.select("event.properties.id","event.timestamp","event.page.page_url","event.properties.action","event.page.page_name","event.event_name","event.properties.navigate_action") .toDF("id","event_time","url","action","page_name","event_name","navigation_action") var a = "case " for(i <- 1 to 300){ a = a + s"when action like '$i%' THEN '$i' " } a = a + " else null end as task_id" val expression = expr(a) df = df.filter("id is not null and id <> '' and event_time is not null") val transformationExpressions: mutable.HashMap[String, Column] = mutable.HashMap( "action" -> expr("coalesce(action, navigation_action) as action"), "task_id" -> expression ) for((col, expr) <- transformationExpressions) df = df.withColumn(col, expr) df = df.filter("(action is not null and action <> '') or (page_name is not null and page_name <> '')") df.show {code} Exception: {code:java} 18/06/07 01:06:34 ERROR CodeGenerator: failed to compile: org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": Code of method "project_doConsume$(Lorg/apache/spark/sql/catalyst/expressions/GeneratedClass$GeneratedIteratorForCodegenStage1;Lorg/apache/spark/sql/catalyst/InternalRow;)V" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1" grows beyond 64 KB org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": Code of method "project_doConsume$(Lorg/apache/spark/sql/catalyst/expressions/GeneratedClass$GeneratedIteratorForCodegenStage1;Lorg/apache/spark/sql/catalyst/InternalRow;)V" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1" grows beyond 64 KB{code} Log file is attached was: Similar to other "grows beyond 64 KB" errors. Happens with large case statement: {code:java} import org.apache.spark.sql.functions._ import scala.collection.mutable import org.apache.spark.sql.Column var rdd = sc.parallelize(Array("""{ "event": { "timestamp": 1521086591110, "event_name": "yu", "page": { "page_url": "https://", "page_name": "es" }, "properties": { "id": "87", "action": "action", "navigate_action": "navigate_action" } } } """)) var df = spark.read.json(rdd) df = df.select("event.properties.id","event.timestamp","event.page.page_url","event.properties.action","event.page.page_name","event.event_name","event.properties.navigate_action") .toDF("id","event_time","url","action","page_name","event_name","navigation_action") var a = "case " for(i <- 1 to 300){ a = a + s"when action like '$i%' THEN '$i' " } a = a + " else null end as task_id" val expression = expr(a) df = df.filter("id is not null and id <> '' and event_time is not null") val transformationExpressions: mutable.HashMap[String, Column] = mutable.HashMap( "action" -> expr("coalesce(action, navigation_action) as action"), "task_id" -> expression ) for((col, expr) <- transformationExpressions) df = df.withColumn(col, expr) df = df.filter("(action is not null and action <> '') or (page_name is not null and page_name <> '')") df.show {code} Log file is attached > GeneratedIteratorForCodegenStage1 grows beyond 64 KB > ---------------------------------------------------- > > Key: SPARK-24481 > URL: https://issues.apache.org/jira/browse/SPARK-24481 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.3.0 > Environment: Emr 5.13.0 and Databricks Cloud 4.0 > Reporter: Andrew Conegliano > Priority: Major > Attachments: log4j-active(1).log > > > Similar to other "grows beyond 64 KB" errors. Happens with large case > statement: > {code:java} > import org.apache.spark.sql.functions._ > import scala.collection.mutable > import org.apache.spark.sql.Column > var rdd = sc.parallelize(Array("""{ > "event": > { > "timestamp": 1521086591110, > "event_name": "yu", > "page": > { > "page_url": "https://", > "page_name": "es" > }, > "properties": > { > "id": "87", > "action": "action", > "navigate_action": "navigate_action" > } > } > } > """)) > var df = spark.read.json(rdd) > df = > df.select("event.properties.id","event.timestamp","event.page.page_url","event.properties.action","event.page.page_name","event.event_name","event.properties.navigate_action") > .toDF("id","event_time","url","action","page_name","event_name","navigation_action") > var a = "case " > for(i <- 1 to 300){ > a = a + s"when action like '$i%' THEN '$i' " > } > a = a + " else null end as task_id" > val expression = expr(a) > df = df.filter("id is not null and id <> '' and event_time is not null") > val transformationExpressions: mutable.HashMap[String, Column] = > mutable.HashMap( > "action" -> expr("coalesce(action, navigation_action) as action"), > "task_id" -> expression > ) > for((col, expr) <- transformationExpressions) > df = df.withColumn(col, expr) > df = df.filter("(action is not null and action <> '') or (page_name is not > null and page_name <> '')") > df.show > {code} > > Exception: > {code:java} > 18/06/07 01:06:34 ERROR CodeGenerator: failed to compile: > org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": > Code of method > "project_doConsume$(Lorg/apache/spark/sql/catalyst/expressions/GeneratedClass$GeneratedIteratorForCodegenStage1;Lorg/apache/spark/sql/catalyst/InternalRow;)V" > of class > "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1" > grows beyond 64 KB > org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": > Code of method > "project_doConsume$(Lorg/apache/spark/sql/catalyst/expressions/GeneratedClass$GeneratedIteratorForCodegenStage1;Lorg/apache/spark/sql/catalyst/InternalRow;)V" > of class > "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1" > grows beyond 64 KB{code} > > Log file is attached -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org