[ https://issues.apache.org/jira/browse/SPARK-29561?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16960996#comment-16960996 ]
Hyukjin Kwon commented on SPARK-29561: -------------------------------------- Seems like it was just the leak of memory. Does it work when you increase the memory? > Large Case Statement Code Generation OOM > ---------------------------------------- > > Key: SPARK-29561 > URL: https://issues.apache.org/jira/browse/SPARK-29561 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.3.0 > Reporter: Michael Chen > Priority: Major > Attachments: apacheSparkCase.sql > > > Spark Configuration > spark.driver.memory = 1g > spark.master = "local" > spark.deploy.mode = "client" > Try to execute a case statement with 3000+ branches. Added sql statement as > attachment > Spark runs for a while before it OOM > {noformat} > java.lang.OutOfMemoryError: GC overhead limit exceeded > at > org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:182) > at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320) > at > org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178) > at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73) > 19/10/22 16:19:54 ERROR FileFormatWriter: Aborting job null. > java.lang.OutOfMemoryError: GC overhead limit exceeded > at java.util.HashMap.newNode(HashMap.java:1750) > at java.util.HashMap.putVal(HashMap.java:631) > at java.util.HashMap.putMapEntries(HashMap.java:515) > at java.util.HashMap.putAll(HashMap.java:785) > at > org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3345) > at org.codehaus.janino.UnitCompiler.access$5000(UnitCompiler.java:212) > at > org.codehaus.janino.UnitCompiler$8.visitLocalVariableDeclarationStatement(UnitCompiler.java:3230) > at > org.codehaus.janino.UnitCompiler$8.visitLocalVariableDeclarationStatement(UnitCompiler.java:3198) > at > org.codehaus.janino.Java$LocalVariableDeclarationStatement.accept(Java.java:3351) > at > org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197) > at > org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3254) > at org.codehaus.janino.UnitCompiler.access$3900(UnitCompiler.java:212) > at org.codehaus.janino.UnitCompiler$8.visitBlock(UnitCompiler.java:3216) > at org.codehaus.janino.UnitCompiler$8.visitBlock(UnitCompiler.java:3198) > at org.codehaus.janino.Java$Block.accept(Java.java:2756) > at > org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197) > at > org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3260) > at org.codehaus.janino.UnitCompiler.access$4000(UnitCompiler.java:212) > at > org.codehaus.janino.UnitCompiler$8.visitDoStatement(UnitCompiler.java:3217) > at > org.codehaus.janino.UnitCompiler$8.visitDoStatement(UnitCompiler.java:3198) > at org.codehaus.janino.Java$DoStatement.accept(Java.java:3304) > at > org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197) > at > org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3186) > at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3009) > at > org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1336) > at > org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1309) > at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:799) > at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:958) > at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:212) > at > org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:393) > at > org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:385) > at > org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1286) > 19/10/22 16:19:54 ERROR Utils: throw uncaught fatal error in thread Spark > Context Cleaner > java.lang.OutOfMemoryError: GC overhead limit exceeded > at > org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:182) > at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320) > at > org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178) > at > org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73){noformat} > Generated code looks like > {noformat} > /* 029 */ private void project_doConsume(InternalRow scan_row, UTF8String > project_expr_0, boolean project_exprIsNull_0) throws java.io.IOException { > /* 030 */ byte project_caseWhenResultState = -1; > /* 031 */ do { > /* 032 */ boolean project_isNull1 = true; > /* 033 */ boolean project_value1 = false; > /* 034 */ > /* 035 */ boolean project_isNull2 = project_exprIsNull_0; > /* 036 */ int project_value2 = -1; > /* 037 */ if (!project_exprIsNull_0) { > /* 038 */ UTF8String.IntWrapper project_intWrapper = new > UTF8String.IntWrapper(); > /* 039 */ if (project_expr_0.toInt(project_intWrapper)) { > /* 040 */ project_value2 = project_intWrapper.value; > /* 041 */ } else { > /* 042 */ project_isNull2 = true; > /* 043 */ } > /* 044 */ project_intWrapper = null; > /* 045 */ > /* 046 */ }{noformat} > ... bunch of lines ... > {noformat} > if (!project_isNull15002) { > /* 78048 */ project_isNull15001 = false; // resultCode could change > nullability. > /* 78049 */ project_value15001 = project_value15002 == 3000; > /* 78050 */ > /* 78051 */ } > /* 78052 */ if (!project_isNull15001 && project_value15001) { > /* 78053 */ project_caseWhenResultState = (byte)(false ? 1 : 0); > /* 78054 */ project_project_value = -3000; > /* 78055 */ continue; > /* 78056 */ } > /* 78057 */ > /* 78058 */ } while (false); > /* 78059 */ // TRUE if any condition is met and the result is null, or no > any condition is met. > /* 78060 */ final boolean project_isNull = (project_caseWhenResultState > != 0); > /* 78061 */ project_mutableStateArray2[0].zeroOutNullBytes(); > /* 78062 */ > /* 78063 */ if (project_isNull) { > /* 78064 */ project_mutableStateArray2[0].setNullAt(0); > /* 78065 */ } else { > /* 78066 */ project_mutableStateArray2[0].write(0, > project_project_value); > /* 78067 */ } > /* 78068 */ append(project_mutableStateArray[0]); > /* 78069 */ > /* 78070 */ } > /* 78071 */ > /* 78072 */ protected void processNext() throws java.io.IOException { > /* 78073 */ while (scan_mutableStateArray[0].hasNext()) { > /* 78074 */ InternalRow scan_row = (InternalRow) > scan_mutableStateArray[0].next(); > /* 78075 */ ((org.apache.spark.sql.execution.metric.SQLMetric) > references[0] /* numOutputRows */).add(1); > /* 78076 */ boolean scan_isNull = scan_row.isNullAt(0); > /* 78077 */ UTF8String scan_value = scan_isNull ? null : > (scan_row.getUTF8String(0)); > /* 78078 */ > /* 78079 */ project_doConsume(scan_row, scan_value, scan_isNull); > /* 78080 */ if (shouldStop()) return; > /* 78081 */ } > /* 78082 */ } > /* 78083 */ > /* 78084 */ }{noformat} > -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org