[jira] [Comment Edited] (SPARK-36862) ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java'
[ https://issues.apache.org/jira/browse/SPARK-36862?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17599013#comment-17599013 ] Lukas Waldmann edited comment on SPARK-36862 at 9/1/22 2:55 PM: I managed to reproduce the issue in my environment. Problem is on line 192 - variable name in function header having array index Here is the generated code {code:java} /* 001 */ public Object generate(Object[] references) { /* 002 */ return new GeneratedIteratorForCodegenStage636(references); /* 003 */ } /* 004 */ /* 005 */ // codegenStageId=636 /* 006 */ final class GeneratedIteratorForCodegenStage636 extends org.apache.spark.sql.execution.BufferedRowIterator { /* 007 */ private Object[] references; /* 008 */ private scala.collection.Iterator[] inputs; /* 009 */ private scala.collection.Iterator smj_leftInput_0; /* 010 */ private scala.collection.Iterator smj_rightInput_0; /* 011 */ private InternalRow smj_leftRow_0; /* 012 */ private InternalRow smj_rightRow_0; /* 013 */ private boolean smj_globalIsNull_0; /* 014 */ private boolean smj_globalIsNull_1; /* 015 */ private double smj_value_27; /* 016 */ private org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray smj_matches_0; /* 017 */ private double smj_value_28; /* 018 */ private boolean smj_isNull_25; /* 019 */ private boolean smj_isNull_26; /* 020 */ private boolean smj_isNull_27; /* 021 */ private boolean smj_isNull_28; /* 022 */ private boolean smj_isNull_29; /* 023 */ private boolean smj_isNull_30; /* 024 */ private boolean project_subExprIsNull_0; /* 025 */ private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] smj_mutableStateArray_2 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[2]; /* 026 */ private java.util.regex.Pattern[] project_mutableStateArray_0 = new java.util.regex.Pattern[1]; /* 027 */ private Decimal[] smj_mutableStateArray_1 = new Decimal[1]; /* 028 */ private String[] project_mutableStateArray_1 = new String[1]; /* 029 */ private UTF8String[] smj_mutableStateArray_0 = new UTF8String[7]; /* 030 */ /* 031 */ public GeneratedIteratorForCodegenStage636(Object[] references) { /* 032 */ this.references = references; /* 033 */ } /* 034 */ /* 035 */ public void init(int index, scala.collection.Iterator[] inputs) { /* 036 */ partitionIndex = index; /* 037 */ this.inputs = inputs; /* 038 */ smj_leftInput_0 = inputs[0]; /* 039 */ smj_rightInput_0 = inputs[1]; /* 040 */ /* 041 */ smj_matches_0 = new org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray(2147483632, 2147483647); /* 042 */ smj_mutableStateArray_2[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(6, 192); /* 043 */ smj_mutableStateArray_2[1] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(6, 192); /* 044 */ /* 045 */ } /* 046 */ /* 047 */ private boolean smj_findNextOuterJoinRows_0( /* 048 */ scala.collection.Iterator leftIter, /* 049 */ scala.collection.Iterator rightIter) { /* 050 */ smj_leftRow_0 = null; /* 051 */ int comp = 0; /* 052 */ while (smj_leftRow_0 == null) { /* 053 */ if (!leftIter.hasNext()) return false; /* 054 */ smj_leftRow_0 = (InternalRow) leftIter.next(); /* 055 */ UTF8String smj_value_22 = smj_If_0(smj_leftRow_0); /* 056 */ boolean smj_isNull_2 = smj_globalIsNull_1; /* 057 */ double smj_value_2 = -1.0; /* 058 */ if (!smj_globalIsNull_1) { /* 059 */ final String smj_doubleStr_0 = smj_value_22.toString(); /* 060 */ try { /* 061 */ smj_value_2 = Double.valueOf(smj_doubleStr_0); /* 062 */ } catch (java.lang.NumberFormatException e) { /* 063 */ final Double d = (Double) Cast.processFloatingPointSpecialLiterals(smj_doubleStr_0, false); /* 064 */ if (d == null) { /* 065 */ smj_isNull_2 = true; /* 066 */ } else { /* 067 */ smj_value_2 = d.doubleValue(); /* 068 */ } /* 069 */ } /* 070 */ } /* 071 */ boolean smj_isNull_1 = smj_isNull_2; /* 072 */ double smj_value_1 = -1.0; /* 073 */ /* 074 */ if (!smj_isNull_2) { /* 075 */ if (Double.isNaN(smj_value_2)) { /* 076 */ smj_value_1 = Double.NaN; /* 077 */ } else if (smj_value_2 == -0.0d) { /* 078 */ smj_value_1 = 0.0d; /* 079 */ } else { /* 080 */ smj_value_1 = smj_value_2; /* 081 */ } /* 082 */ /* 083 */ } /* 084 */ if (smj_isNull_1) { /* 085 */ if (!smj_matches_0.isEmpty()) { /* 086 */ smj_matches_0.clear(); /* 087 */ } /* 088 */ return true; /* 089 */ } /* 090 */ if (!smj_matches_0.isEmpty()) { /* 091 */ comp = 0; /* 092 */ if (comp == 0) { /* 093 */ comp = org.apache.spark.sql.catalyst.util.SQLOrderingUtil.compareDoubles(smj_value_1, smj_value_28); /* 094 */ } /* 095 */ /* 096 */ if (comp == 0) { /* 097 */ return true; /* 098 */ } /* 099 */ smj_matches_0.clear(); /* 100 */ } /* 101 */ /* 102 */ do { /* 103 */ if (smj_rightRow_0 == null) { /* 104 */ if (!rightIter.hasNext()) { /* 105 */ if (!smj_matches_0.isEmpty()) { /* 106 */ smj_value_28 = smj_value_1; /* 107 */ } /* 108 */ return true; /* 109 */ } /* 110
[jira] [Commented] (SPARK-36862) ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java'
[ https://issues.apache.org/jira/browse/SPARK-36862?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17599013#comment-17599013 ] Lukas Waldmann commented on SPARK-36862: I manage reproduce the issue in my environment. Problem is on line 192 - variable name in function header having array index Here is the generated code {code:java} /* 001 */ public Object generate(Object[] references) { /* 002 */ return new GeneratedIteratorForCodegenStage636(references); /* 003 */ } /* 004 */ /* 005 */ // codegenStageId=636 /* 006 */ final class GeneratedIteratorForCodegenStage636 extends org.apache.spark.sql.execution.BufferedRowIterator { /* 007 */ private Object[] references; /* 008 */ private scala.collection.Iterator[] inputs; /* 009 */ private scala.collection.Iterator smj_leftInput_0; /* 010 */ private scala.collection.Iterator smj_rightInput_0; /* 011 */ private InternalRow smj_leftRow_0; /* 012 */ private InternalRow smj_rightRow_0; /* 013 */ private boolean smj_globalIsNull_0; /* 014 */ private boolean smj_globalIsNull_1; /* 015 */ private double smj_value_27; /* 016 */ private org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray smj_matches_0; /* 017 */ private double smj_value_28; /* 018 */ private boolean smj_isNull_25; /* 019 */ private boolean smj_isNull_26; /* 020 */ private boolean smj_isNull_27; /* 021 */ private boolean smj_isNull_28; /* 022 */ private boolean smj_isNull_29; /* 023 */ private boolean smj_isNull_30; /* 024 */ private boolean project_subExprIsNull_0; /* 025 */ private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] smj_mutableStateArray_2 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[2]; /* 026 */ private java.util.regex.Pattern[] project_mutableStateArray_0 = new java.util.regex.Pattern[1]; /* 027 */ private Decimal[] smj_mutableStateArray_1 = new Decimal[1]; /* 028 */ private String[] project_mutableStateArray_1 = new String[1]; /* 029 */ private UTF8String[] smj_mutableStateArray_0 = new UTF8String[7]; /* 030 */ /* 031 */ public GeneratedIteratorForCodegenStage636(Object[] references) { /* 032 */ this.references = references; /* 033 */ } /* 034 */ /* 035 */ public void init(int index, scala.collection.Iterator[] inputs) { /* 036 */ partitionIndex = index; /* 037 */ this.inputs = inputs; /* 038 */ smj_leftInput_0 = inputs[0]; /* 039 */ smj_rightInput_0 = inputs[1]; /* 040 */ /* 041 */ smj_matches_0 = new org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray(2147483632, 2147483647); /* 042 */ smj_mutableStateArray_2[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(6, 192); /* 043 */ smj_mutableStateArray_2[1] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(6, 192); /* 044 */ /* 045 */ } /* 046 */ /* 047 */ private boolean smj_findNextOuterJoinRows_0( /* 048 */ scala.collection.Iterator leftIter, /* 049 */ scala.collection.Iterator rightIter) { /* 050 */ smj_leftRow_0 = null; /* 051 */ int comp = 0; /* 052 */ while (smj_leftRow_0 == null) { /* 053 */ if (!leftIter.hasNext()) return false; /* 054 */ smj_leftRow_0 = (InternalRow) leftIter.next(); /* 055 */ UTF8String smj_value_22 = smj_If_0(smj_leftRow_0); /* 056 */ boolean smj_isNull_2 = smj_globalIsNull_1; /* 057 */ double smj_value_2 = -1.0; /* 058 */ if (!smj_globalIsNull_1) { /* 059 */ final String smj_doubleStr_0 = smj_value_22.toString(); /* 060 */ try { /* 061 */ smj_value_2 = Double.valueOf(smj_doubleStr_0); /* 062 */ } catch (java.lang.NumberFormatException e) { /* 063 */ final Double d = (Double) Cast.processFloatingPointSpecialLiterals(smj_doubleStr_0, false); /* 064 */ if (d == null) { /* 065 */ smj_isNull_2 = true; /* 066 */ } else { /* 067 */ smj_value_2 = d.doubleValue(); /* 068 */ } /* 069 */ } /* 070 */ } /* 071 */ boolean smj_isNull_1 = smj_isNull_2; /* 072 */ double smj_value_1 = -1.0; /* 073 */ /* 074 */ if (!smj_isNull_2) { /* 075 */ if (Double.isNaN(smj_value_2)) { /* 076 */ smj_value_1 = Double.NaN; /* 077 */ } else if (smj_value_2 == -0.0d) { /* 078 */ smj_value_1 = 0.0d; /* 079 */ } else { /* 080 */ smj_value_1 = smj_value_2; /* 081 */ } /* 082 */ /* 083 */ } /* 084 */ if (smj_isNull_1) { /* 085 */ if (!smj_matches_0.isEmpty()) { /* 086 */ smj_matches_0.clear(); /* 087 */ } /* 088 */ return true; /* 089 */ } /* 090 */ if (!smj_matches_0.isEmpty()) { /* 091 */ comp = 0; /* 092 */ if (comp == 0) { /* 093 */ comp = org.apache.spark.sql.catalyst.util.SQLOrderingUtil.compareDoubles(smj_value_1, smj_value_28); /* 094 */ } /* 095 */ /* 096 */ if (comp == 0) { /* 097 */ return true; /* 098 */ } /* 099 */ smj_matches_0.clear(); /* 100 */ } /* 101 */ /* 102 */ do { /* 103 */ if (smj_rightRow_0 == null) { /* 104 */ if (!rightIter.hasNext()) { /* 105 */ if (!smj_matches_0.isEmpty()) { /* 106 */ smj_value_28 = smj_value_1; /* 107 */ } /* 108 */ return true; /* 109 */ } /* 110 */ smj_rightRow_0 = (InternalRow)
[jira] [Commented] (SPARK-27228) Spark long delay on close, possible problem with killing executors
[ https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16805418#comment-16805418 ] Lukas Waldmann commented on SPARK-27228: I understand. Unfortunately, things as they are, I am afraid I will be stuck with 2.3 for quite some time. Upgrade cycle of our cluster is in years rather than months :( > Spark long delay on close, possible problem with killing executors > -- > > Key: SPARK-27228 > URL: https://issues.apache.org/jira/browse/SPARK-27228 > Project: Spark > Issue Type: Bug > Components: Block Manager >Affects Versions: 2.3.0 >Reporter: Lukas Waldmann >Priority: Major > Attachments: log.html > > > When using dynamic allocations after all jobs finishes spark delays for > several minutes before finally finishes. Log suggest that executors are not > cleared up properly. > See the attachment for log > -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-27228) Spark long delay on close, possible problem with killing executors
[ https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16800621#comment-16800621 ] Lukas Waldmann commented on SPARK-27228: That's a good question :) What code does it runs up to several hundreds of sql queries with different parameters and union the results before writing the result to Hive table. Input are Hive tables with up to several hundreds million lines code looks something like this: {code:java} void process(String dbName, String environment) { //For all items in call the sql snippet and union the results List mds = ...; Map res = new LinkedHashMap<>(); mds.stream() .forEach(md -> { try (InputStream is = getClass().getResourceAsStream("/" + md.query_id)) { String snippet = IOUtils.toString(is); Dataset ds = spark.sql(snippet); String key = md.product; res.put(key, res.get(key) == null ? ds : ds.union(res.get(key))); } catch (IOException ex) { Logger.getLogger(SparkMainApp.class.getName()).log(Level.SEVERE, null, ex); } }); String name = dbName + "." + table; res.values().stream() .forEach( result -> { result.repartition( result.col(PRODUCT.toString()), result.col(PROTOCOL.toString())).write() .mode(SaveMode.Overwrite).insertInto(name); } ); } {code} > Spark long delay on close, possible problem with killing executors > -- > > Key: SPARK-27228 > URL: https://issues.apache.org/jira/browse/SPARK-27228 > Project: Spark > Issue Type: Bug > Components: Block Manager >Affects Versions: 2.3.0 >Reporter: Lukas Waldmann >Priority: Major > Attachments: log.html > > > When using dynamic allocations after all jobs finishes spark delays for > several minutes before finally finishes. Log suggest that executors are not > cleared up properly. > See the attachment for log > -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-27228) Spark long delay on close, possible problem with killing executors
[ https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16798841#comment-16798841 ] Lukas Waldmann commented on SPARK-27228: Executors management seems to behave strangely. After calling spark.stop() See this: {quote}19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 332. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 332 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(332, data-10.bdp.gin.merck.com, 38713, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 332 successfully in removeExecutor {quote} and few minutes later: {quote}19/03/21 09:54:26 WARN HeartbeatReceiver: Removing executor 332 with no recent heartbeats: 173942 ms exceeds timeout 12 ms 19/03/21 09:54:26 ERROR YarnClusterScheduler: Lost an executor 332 (already removed): Executor heartbeat timed out after 173942 ms 19/03/21 09:54:26 INFO YarnClusterSchedulerBackend: Requesting to kill executor(s) 332 19/03/21 09:54:26 WARN YarnClusterSchedulerBackend: Executor to kill 332 does not exist! 19/03/21 09:54:26 INFO YarnClusterSchedulerBackend: Actual list of executor(s) to be killed is{quote} > Spark long delay on close, possible problem with killing executors > -- > > Key: SPARK-27228 > URL: https://issues.apache.org/jira/browse/SPARK-27228 > Project: Spark > Issue Type: Bug > Components: Block Manager >Affects Versions: 2.3.0 >Reporter: Lukas Waldmann >Priority: Major > Attachments: log.html > > > When using dynamic allocations after all jobs finishes spark delays for > several minutes before finally finishes. Log suggest that executors are not > cleared up properly. > See the attachment for log > -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Reopened] (SPARK-27228) Spark long delay on close, possible problem with killing executors
[ https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Lukas Waldmann reopened SPARK-27228: Added initial issue investigation > Spark long delay on close, possible problem with killing executors > -- > > Key: SPARK-27228 > URL: https://issues.apache.org/jira/browse/SPARK-27228 > Project: Spark > Issue Type: Bug > Components: Block Manager >Affects Versions: 2.3.0 >Reporter: Lukas Waldmann >Priority: Major > Attachments: log.html > > > When using dynamic allocations after all jobs finishes spark delays for > several minutes before finally finishes. Log suggest that executors are not > cleared up properly. > See the attachment for log > -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Updated] (SPARK-27228) Spark long delay on close, possible problem with killing executors
[ https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Lukas Waldmann updated SPARK-27228: --- Description: When using dynamic allocations after all jobs finishes spark delays for several minutes before finally finishes. Log suggest that executors are not cleared up properly. See the attachment for log was:When using dynamic allocations after all jobs finishes spark delays for several minutes before finally finishes. Log suggest that executors are not cleared up properly. > Spark long delay on close, possible problem with killing executors > -- > > Key: SPARK-27228 > URL: https://issues.apache.org/jira/browse/SPARK-27228 > Project: Spark > Issue Type: Bug > Components: Block Manager >Affects Versions: 2.3.0 >Reporter: Lukas Waldmann >Priority: Major > Attachments: log.html > > > When using dynamic allocations after all jobs finishes spark delays for > several minutes before finally finishes. Log suggest that executors are not > cleared up properly. > See the attachment for log > -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Updated] (SPARK-27228) Spark long delay on close, possible problem with killing executors
[ https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Lukas Waldmann updated SPARK-27228: --- Description: When using dynamic allocations after all jobs finishes spark delays for several minutes before finally finishes. Log suggest that executors are not cleared up properly. (was: When using dynamic allocations after all jobs finishes spark delays for several minutes before finally finishes. Log suggest that executors are not cleared up properly. {quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual list of executor(s) to be killed is 355 19/03/21 09:51:38 INFO ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. 19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 because it has been idle for 60 seconds (new desired total will be 65) 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 228. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 228 from BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(228, data-15.bdp.gin.merck.com, 45882, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 228 successfully in removeExecutor 19/03/21 09:51:38 INFO SparkUI: Stopped Spark web UI at [http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/] 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, 41186, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 successfully in removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 346 on datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 332. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 332 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(332, data-10.bdp.gin.merck.com, 38713, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 332 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 332 on data-10.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 327. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 327 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(327, data-20.bdp.gin.merck.com, 34235, None) 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 327 on data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 327 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 355 on data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 355 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 355 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(355, data-20.bdp.gin.merck.com, 43141, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 355 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 168. 19/03/21 09:51:39 INFO DAGScheduler:
[jira] [Commented] (SPARK-27228) Spark long delay on close, possible problem with killing executors
[ https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16798209#comment-16798209 ] Lukas Waldmann commented on SPARK-27228: Startup parameters: spark-submit --conf spark.shuffle.service.enabled=true --conf spark.dynamicAllocation.enabled=true --conf spark.driver.maxResultSize=4g --executor-memory 4g --driver-memory 8g --master yarn --deploy-mode cluster > Spark long delay on close, possible problem with killing executors > -- > > Key: SPARK-27228 > URL: https://issues.apache.org/jira/browse/SPARK-27228 > Project: Spark > Issue Type: Bug > Components: Block Manager >Affects Versions: 2.3.0 >Reporter: Lukas Waldmann >Priority: Major > Attachments: log.html > > > When using dynamic allocations after all jobs finishes spark delays for > several minutes before finally finishes. Log suggest that executors are not > cleared up properly. > {quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 > 09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 > 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill > executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual > list of executor(s) to be killed is 355 19/03/21 09:51:38 INFO > ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. > 19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 > because it has been idle for 60 seconds (new desired total will be 65) > 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling > executor 228. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch > 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove > executor 228 from BlockManagerMaster. 19/03/21 09:51:38 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(228, > data-15.bdp.gin.merck.com, 45882, None) 19/03/21 09:51:38 INFO > BlockManagerMaster: Removed 228 successfully in removeExecutor 19/03/21 > 09:51:38 INFO SparkUI: Stopped Spark web UI at > [http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/] > 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on > data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO > YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 > 09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 > INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from > BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: > Removing block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, > 41186, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 > successfully in removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: > Executor 346 on datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 > 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor > 332. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) > 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor > 332 from BlockManagerMaster. 19/03/21 09:51:39 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(332, > data-10.bdp.gin.merck.com, 38713, None) 19/03/21 09:51:39 INFO > BlockManagerMaster: Removed 332 successfully in removeExecutor 19/03/21 > 09:51:39 INFO YarnClusterScheduler: Executor 332 on data-10.bdp.gin.merck.com > killed by driver. 19/03/21 09:51:39 INFO > YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 > 09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com > killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 > (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to > remove executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, > data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO > BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 > 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor > 327. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) > 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor > 327 from BlockManagerMaster. 19/03/21 09:51:39 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(327, > data-20.bdp.gin.merck.com, 34235, None) 19/03/21 09:51:39 INFO > YarnClusterScheduler: Executor 327 on data-20.bdp.gin.merck.com killed by > driver. 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 327 successfully > in removeExecutor 19/03/21 09:51:39 INFO >
[jira] [Updated] (SPARK-27228) Spark long delay on close, possible problem with killing executors
[ https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Lukas Waldmann updated SPARK-27228: --- Attachment: log.html > Spark long delay on close, possible problem with killing executors > -- > > Key: SPARK-27228 > URL: https://issues.apache.org/jira/browse/SPARK-27228 > Project: Spark > Issue Type: Bug > Components: Block Manager >Affects Versions: 2.3.0 >Reporter: Lukas Waldmann >Priority: Major > Attachments: log.html > > > When using dynamic allocations after all jobs finishes spark delays for > several minutes before finally finishes. Log suggest that executors are not > cleared up properly. > {quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 > 09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 > 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill > executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual > list of executor(s) to be killed is 355 19/03/21 09:51:38 INFO > ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. > 19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 > because it has been idle for 60 seconds (new desired total will be 65) > 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling > executor 228. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch > 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove > executor 228 from BlockManagerMaster. 19/03/21 09:51:38 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(228, > data-15.bdp.gin.merck.com, 45882, None) 19/03/21 09:51:38 INFO > BlockManagerMaster: Removed 228 successfully in removeExecutor 19/03/21 > 09:51:38 INFO SparkUI: Stopped Spark web UI at > [http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/] > 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on > data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO > YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 > 09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 > INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from > BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: > Removing block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, > 41186, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 > successfully in removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: > Executor 346 on datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 > 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor > 332. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) > 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor > 332 from BlockManagerMaster. 19/03/21 09:51:39 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(332, > data-10.bdp.gin.merck.com, 38713, None) 19/03/21 09:51:39 INFO > BlockManagerMaster: Removed 332 successfully in removeExecutor 19/03/21 > 09:51:39 INFO YarnClusterScheduler: Executor 332 on data-10.bdp.gin.merck.com > killed by driver. 19/03/21 09:51:39 INFO > YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 > 09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com > killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 > (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to > remove executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, > data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO > BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 > 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor > 327. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) > 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor > 327 from BlockManagerMaster. 19/03/21 09:51:39 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(327, > data-20.bdp.gin.merck.com, 34235, None) 19/03/21 09:51:39 INFO > YarnClusterScheduler: Executor 327 on data-20.bdp.gin.merck.com killed by > driver. 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 327 successfully > in removeExecutor 19/03/21 09:51:39 INFO > YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 19/03/21 > 09:51:39 INFO YarnClusterScheduler: Executor 355 on data-20.bdp.gin.merck.com > killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 355 > (epoch 446) 19/03/21 09:51:39 INFO
[jira] [Commented] (SPARK-27228) Spark long delay on close, possible problem with killing executors
[ https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16798207#comment-16798207 ] Lukas Waldmann commented on SPARK-27228: log file added > Spark long delay on close, possible problem with killing executors > -- > > Key: SPARK-27228 > URL: https://issues.apache.org/jira/browse/SPARK-27228 > Project: Spark > Issue Type: Bug > Components: Block Manager >Affects Versions: 2.3.0 >Reporter: Lukas Waldmann >Priority: Major > Attachments: log.html > > > When using dynamic allocations after all jobs finishes spark delays for > several minutes before finally finishes. Log suggest that executors are not > cleared up properly. > {quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 > 09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 > 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill > executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual > list of executor(s) to be killed is 355 19/03/21 09:51:38 INFO > ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. > 19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 > because it has been idle for 60 seconds (new desired total will be 65) > 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling > executor 228. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch > 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove > executor 228 from BlockManagerMaster. 19/03/21 09:51:38 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(228, > data-15.bdp.gin.merck.com, 45882, None) 19/03/21 09:51:38 INFO > BlockManagerMaster: Removed 228 successfully in removeExecutor 19/03/21 > 09:51:38 INFO SparkUI: Stopped Spark web UI at > [http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/] > 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on > data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO > YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 > 09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 > INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from > BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: > Removing block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, > 41186, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 > successfully in removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: > Executor 346 on datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 > 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor > 332. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) > 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor > 332 from BlockManagerMaster. 19/03/21 09:51:39 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(332, > data-10.bdp.gin.merck.com, 38713, None) 19/03/21 09:51:39 INFO > BlockManagerMaster: Removed 332 successfully in removeExecutor 19/03/21 > 09:51:39 INFO YarnClusterScheduler: Executor 332 on data-10.bdp.gin.merck.com > killed by driver. 19/03/21 09:51:39 INFO > YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 > 09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com > killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 > (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to > remove executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, > data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO > BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 > 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor > 327. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) > 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor > 327 from BlockManagerMaster. 19/03/21 09:51:39 INFO > BlockManagerMasterEndpoint: Removing block manager BlockManagerId(327, > data-20.bdp.gin.merck.com, 34235, None) 19/03/21 09:51:39 INFO > YarnClusterScheduler: Executor 327 on data-20.bdp.gin.merck.com killed by > driver. 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 327 successfully > in removeExecutor 19/03/21 09:51:39 INFO > YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 19/03/21 > 09:51:39 INFO YarnClusterScheduler: Executor 355 on data-20.bdp.gin.merck.com > killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 355 > (epoch 446)
[jira] [Updated] (SPARK-27228) Spark long delay on close, possible problem with killing executors
[ https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Lukas Waldmann updated SPARK-27228: --- Description: When using dynamic allocations after all jobs finishes spark delays for several minutes before finally finishes. Log suggest that executors are not cleared up properly. {quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual list of executor(s) to be killed is 355 19/03/21 09:51:38 INFO ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. 19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 because it has been idle for 60 seconds (new desired total will be 65) 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 228. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 228 from BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(228, data-15.bdp.gin.merck.com, 45882, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 228 successfully in removeExecutor 19/03/21 09:51:38 INFO SparkUI: Stopped Spark web UI at [http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/] 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, 41186, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 successfully in removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 346 on datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 332. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 332 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(332, data-10.bdp.gin.merck.com, 38713, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 332 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 332 on data-10.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 327. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 327 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(327, data-20.bdp.gin.merck.com, 34235, None) 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 327 on data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 327 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 355 on data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 355 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 355 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(355, data-20.bdp.gin.merck.com, 43141, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 355 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 168. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 168 (epoch 446) 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 168 on data-07.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO
[jira] [Updated] (SPARK-27228) Spark long delay on close, possible problem with killing executors
[ https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Lukas Waldmann updated SPARK-27228: --- Description: When using dynamic allocations after all jobs finishes spark delays for several minutes before finally finishes. Log suggest that executors are not cleared up properly. {quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual list of executor(s) to be killed is 355 19/03/21 09:51:38 INFO ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. 19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 because it has been idle for 60 seconds (new desired total will be 65) 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 228. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 228 from BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(228, data-15.bdp.gin.merck.com, 45882, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 228 successfully in removeExecutor 19/03/21 09:51:38 INFO SparkUI: Stopped Spark web UI at [http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/] 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, 41186, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 successfully in removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 346 on datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 332. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 332 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(332, data-10.bdp.gin.merck.com, 38713, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 332 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 332 on data-10.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 327. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 327 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(327, data-20.bdp.gin.merck.com, 34235, None) 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 327 on data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 327 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 355 on data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 355 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 355 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(355, data-20.bdp.gin.merck.com, 43141, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 355 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 168. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 168 (epoch 446) 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 168 on data-07.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO
[jira] [Created] (SPARK-27228) Spark long delay on close, possible problem with killing executors
Lukas Waldmann created SPARK-27228: -- Summary: Spark long delay on close, possible problem with killing executors Key: SPARK-27228 URL: https://issues.apache.org/jira/browse/SPARK-27228 Project: Spark Issue Type: Bug Components: Block Manager Affects Versions: 2.3.0 Reporter: Lukas Waldmann When using dynamic allocations after all jobs finishes spark delays for several minutes before finally finishes. Log suggest that executors are not cleared up properly. {quote}19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual list of executor(s) to be killed is 355 19/03/21 09:51:38 INFO ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. 19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 because it has been idle for 60 seconds (new desired total will be 65) 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 228. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 228 from BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(228, data-15.bdp.gin.merck.com, 45882, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 228 successfully in removeExecutor 19/03/21 09:51:38 INFO SparkUI: Stopped Spark web UI at http://data-04.bdp.gin.merck.com:44304 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, 41186, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 successfully in removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 346 on datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 332. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 332 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(332, data-10.bdp.gin.merck.com, 38713, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 332 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 332 on data-10.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 327. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 327 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(327, data-20.bdp.gin.merck.com, 34235, None) 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 327 on data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 327 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 355 on data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 355 (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 355 from BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(355, data-20.bdp.gin.merck.com, 43141, None) 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 355 successfully in removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 168. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: