Andrew Conegliano created SPARK-24481: -----------------------------------------
Summary: GeneratedIteratorForCodegenStage1 grows beyond 64 KB Key: SPARK-24481 URL: https://issues.apache.org/jira/browse/SPARK-24481 Project: Spark Issue Type: Bug Components: SQL Affects Versions: 2.3.0 Environment: Emr 5.13.0 Reporter: Andrew Conegliano Attachments: log4j-active(1).log Similar to other "grows beyond 64 KB" errors. Happens with large case statement: {code:java} // Databricks notebook source import org.apache.spark.sql.functions._ import scala.collection.mutable import org.apache.spark.sql.Column var rdd = sc.parallelize(Array("""{ "event": { "timestamp": 1521086591110, "event_name": "yu", "page": { "page_url": "https://", "page_name": "es" }, "properties": { "id": "87", "action": "action", "navigate_action": "navigate_action" } } } """)) var df = spark.read.json(rdd) df = df.select("event.properties.id","event.timestamp","event.page.page_url","event.properties.action","event.page.page_name","event.event_name","event.properties.navigate_action") .toDF("id","event_time","url","action","page_name","event_name","navigation_action") var a = "case " for(i <- 1 to 300) a = a + s"when action like '$i%' THEN '$i' " a = a + " else null end as task_id" val expression = expr(a) df = df.filter("id is not null and id <> '' and event_time is not null") val transformationExpressions: mutable.HashMap[String, Column] = mutable.HashMap( "action" -> expr("coalesce(action, navigation_action) as action"), "task_id" -> expression ) for((col, expr) <- transformationExpressions) df = df.withColumn(col, expr) df = df.filter("(action is not null and action <> '') or (page_name is not null and page_name <> '')") df.show {code} Log file is attached -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org