[jira] [Comment Edited] (SPARK-36862) ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java'

2022-09-01 Thread Lukas Waldmann (Jira)


[ 
https://issues.apache.org/jira/browse/SPARK-36862?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17599013#comment-17599013
 ] 

Lukas Waldmann edited comment on SPARK-36862 at 9/1/22 2:55 PM:


I managed to reproduce the issue in my environment. Problem is on line 192 - 
variable name in function header having array index

Here is the generated code
{code:java}
/* 001 */ public Object generate(Object[] references) {
/* 002 */ return new GeneratedIteratorForCodegenStage636(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=636
/* 006 */ final class GeneratedIteratorForCodegenStage636 extends 
org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */ private Object[] references;
/* 008 */ private scala.collection.Iterator[] inputs;
/* 009 */ private scala.collection.Iterator smj_leftInput_0;
/* 010 */ private scala.collection.Iterator smj_rightInput_0;
/* 011 */ private InternalRow smj_leftRow_0;
/* 012 */ private InternalRow smj_rightRow_0;
/* 013 */ private boolean smj_globalIsNull_0;
/* 014 */ private boolean smj_globalIsNull_1;
/* 015 */ private double smj_value_27;
/* 016 */ private 
org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray smj_matches_0;
/* 017 */ private double smj_value_28;
/* 018 */ private boolean smj_isNull_25;
/* 019 */ private boolean smj_isNull_26;
/* 020 */ private boolean smj_isNull_27;
/* 021 */ private boolean smj_isNull_28;
/* 022 */ private boolean smj_isNull_29;
/* 023 */ private boolean smj_isNull_30;
/* 024 */ private boolean project_subExprIsNull_0;
/* 025 */ private 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] 
smj_mutableStateArray_2 = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[2];
/* 026 */ private java.util.regex.Pattern[] project_mutableStateArray_0 = new 
java.util.regex.Pattern[1];
/* 027 */ private Decimal[] smj_mutableStateArray_1 = new Decimal[1];
/* 028 */ private String[] project_mutableStateArray_1 = new String[1];
/* 029 */ private UTF8String[] smj_mutableStateArray_0 = new UTF8String[7];
/* 030 */
/* 031 */ public GeneratedIteratorForCodegenStage636(Object[] references) {
/* 032 */ this.references = references;
/* 033 */ }
/* 034 */
/* 035 */ public void init(int index, scala.collection.Iterator[] inputs) {
/* 036 */ partitionIndex = index;
/* 037 */ this.inputs = inputs;
/* 038 */ smj_leftInput_0 = inputs[0];
/* 039 */ smj_rightInput_0 = inputs[1];
/* 040 */
/* 041 */ smj_matches_0 = new 
org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray(2147483632, 
2147483647);
/* 042 */ smj_mutableStateArray_2[0] = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(6, 192);
/* 043 */ smj_mutableStateArray_2[1] = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(6, 192);
/* 044 */
/* 045 */ }
/* 046 */
/* 047 */ private boolean smj_findNextOuterJoinRows_0(
/* 048 */ scala.collection.Iterator leftIter,
/* 049 */ scala.collection.Iterator rightIter) {
/* 050 */ smj_leftRow_0 = null;
/* 051 */ int comp = 0;
/* 052 */ while (smj_leftRow_0 == null) {
/* 053 */ if (!leftIter.hasNext()) return false;
/* 054 */ smj_leftRow_0 = (InternalRow) leftIter.next();
/* 055 */ UTF8String smj_value_22 = smj_If_0(smj_leftRow_0);
/* 056 */ boolean smj_isNull_2 = smj_globalIsNull_1;
/* 057 */ double smj_value_2 = -1.0;
/* 058 */ if (!smj_globalIsNull_1) {
/* 059 */ final String smj_doubleStr_0 = smj_value_22.toString();
/* 060 */ try {
/* 061 */ smj_value_2 = Double.valueOf(smj_doubleStr_0);
/* 062 */ } catch (java.lang.NumberFormatException e) {
/* 063 */ final Double d = (Double) 
Cast.processFloatingPointSpecialLiterals(smj_doubleStr_0, false);
/* 064 */ if (d == null) {
/* 065 */ smj_isNull_2 = true;
/* 066 */ } else {
/* 067 */ smj_value_2 = d.doubleValue();
/* 068 */ }
/* 069 */ }
/* 070 */ }
/* 071 */ boolean smj_isNull_1 = smj_isNull_2;
/* 072 */ double smj_value_1 = -1.0;
/* 073 */
/* 074 */ if (!smj_isNull_2) {
/* 075 */ if (Double.isNaN(smj_value_2)) {
/* 076 */ smj_value_1 = Double.NaN;
/* 077 */ } else if (smj_value_2 == -0.0d) {
/* 078 */ smj_value_1 = 0.0d;
/* 079 */ } else {
/* 080 */ smj_value_1 = smj_value_2;
/* 081 */ }
/* 082 */
/* 083 */ }
/* 084 */ if (smj_isNull_1) {
/* 085 */ if (!smj_matches_0.isEmpty()) {
/* 086 */ smj_matches_0.clear();
/* 087 */ }
/* 088 */ return true;
/* 089 */ }
/* 090 */ if (!smj_matches_0.isEmpty()) {
/* 091 */ comp = 0;
/* 092 */ if (comp == 0) {
/* 093 */ comp = 
org.apache.spark.sql.catalyst.util.SQLOrderingUtil.compareDoubles(smj_value_1, 
smj_value_28);
/* 094 */ }
/* 095 */
/* 096 */ if (comp == 0) {
/* 097 */ return true;
/* 098 */ }
/* 099 */ smj_matches_0.clear();
/* 100 */ }
/* 101 */
/* 102 */ do {
/* 103 */ if (smj_rightRow_0 == null) {
/* 104 */ if (!rightIter.hasNext()) {
/* 105 */ if (!smj_matches_0.isEmpty()) {
/* 106 */ smj_value_28 = smj_value_1;
/* 107 */ }
/* 108 */ return true;
/* 109 */ }
/* 110 

[jira] [Commented] (SPARK-36862) ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java'

2022-09-01 Thread Lukas Waldmann (Jira)


[ 
https://issues.apache.org/jira/browse/SPARK-36862?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17599013#comment-17599013
 ] 

Lukas Waldmann commented on SPARK-36862:


I manage reproduce the issue in my environment. Problem is on line 192 - 
variable name in function header having array index

Here is the generated code
{code:java}
/* 001 */ public Object generate(Object[] references) {
/* 002 */ return new GeneratedIteratorForCodegenStage636(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=636
/* 006 */ final class GeneratedIteratorForCodegenStage636 extends 
org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */ private Object[] references;
/* 008 */ private scala.collection.Iterator[] inputs;
/* 009 */ private scala.collection.Iterator smj_leftInput_0;
/* 010 */ private scala.collection.Iterator smj_rightInput_0;
/* 011 */ private InternalRow smj_leftRow_0;
/* 012 */ private InternalRow smj_rightRow_0;
/* 013 */ private boolean smj_globalIsNull_0;
/* 014 */ private boolean smj_globalIsNull_1;
/* 015 */ private double smj_value_27;
/* 016 */ private 
org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray smj_matches_0;
/* 017 */ private double smj_value_28;
/* 018 */ private boolean smj_isNull_25;
/* 019 */ private boolean smj_isNull_26;
/* 020 */ private boolean smj_isNull_27;
/* 021 */ private boolean smj_isNull_28;
/* 022 */ private boolean smj_isNull_29;
/* 023 */ private boolean smj_isNull_30;
/* 024 */ private boolean project_subExprIsNull_0;
/* 025 */ private 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] 
smj_mutableStateArray_2 = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[2];
/* 026 */ private java.util.regex.Pattern[] project_mutableStateArray_0 = new 
java.util.regex.Pattern[1];
/* 027 */ private Decimal[] smj_mutableStateArray_1 = new Decimal[1];
/* 028 */ private String[] project_mutableStateArray_1 = new String[1];
/* 029 */ private UTF8String[] smj_mutableStateArray_0 = new UTF8String[7];
/* 030 */
/* 031 */ public GeneratedIteratorForCodegenStage636(Object[] references) {
/* 032 */ this.references = references;
/* 033 */ }
/* 034 */
/* 035 */ public void init(int index, scala.collection.Iterator[] inputs) {
/* 036 */ partitionIndex = index;
/* 037 */ this.inputs = inputs;
/* 038 */ smj_leftInput_0 = inputs[0];
/* 039 */ smj_rightInput_0 = inputs[1];
/* 040 */
/* 041 */ smj_matches_0 = new 
org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray(2147483632, 
2147483647);
/* 042 */ smj_mutableStateArray_2[0] = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(6, 192);
/* 043 */ smj_mutableStateArray_2[1] = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(6, 192);
/* 044 */
/* 045 */ }
/* 046 */
/* 047 */ private boolean smj_findNextOuterJoinRows_0(
/* 048 */ scala.collection.Iterator leftIter,
/* 049 */ scala.collection.Iterator rightIter) {
/* 050 */ smj_leftRow_0 = null;
/* 051 */ int comp = 0;
/* 052 */ while (smj_leftRow_0 == null) {
/* 053 */ if (!leftIter.hasNext()) return false;
/* 054 */ smj_leftRow_0 = (InternalRow) leftIter.next();
/* 055 */ UTF8String smj_value_22 = smj_If_0(smj_leftRow_0);
/* 056 */ boolean smj_isNull_2 = smj_globalIsNull_1;
/* 057 */ double smj_value_2 = -1.0;
/* 058 */ if (!smj_globalIsNull_1) {
/* 059 */ final String smj_doubleStr_0 = smj_value_22.toString();
/* 060 */ try {
/* 061 */ smj_value_2 = Double.valueOf(smj_doubleStr_0);
/* 062 */ } catch (java.lang.NumberFormatException e) {
/* 063 */ final Double d = (Double) 
Cast.processFloatingPointSpecialLiterals(smj_doubleStr_0, false);
/* 064 */ if (d == null) {
/* 065 */ smj_isNull_2 = true;
/* 066 */ } else {
/* 067 */ smj_value_2 = d.doubleValue();
/* 068 */ }
/* 069 */ }
/* 070 */ }
/* 071 */ boolean smj_isNull_1 = smj_isNull_2;
/* 072 */ double smj_value_1 = -1.0;
/* 073 */
/* 074 */ if (!smj_isNull_2) {
/* 075 */ if (Double.isNaN(smj_value_2)) {
/* 076 */ smj_value_1 = Double.NaN;
/* 077 */ } else if (smj_value_2 == -0.0d) {
/* 078 */ smj_value_1 = 0.0d;
/* 079 */ } else {
/* 080 */ smj_value_1 = smj_value_2;
/* 081 */ }
/* 082 */
/* 083 */ }
/* 084 */ if (smj_isNull_1) {
/* 085 */ if (!smj_matches_0.isEmpty()) {
/* 086 */ smj_matches_0.clear();
/* 087 */ }
/* 088 */ return true;
/* 089 */ }
/* 090 */ if (!smj_matches_0.isEmpty()) {
/* 091 */ comp = 0;
/* 092 */ if (comp == 0) {
/* 093 */ comp = 
org.apache.spark.sql.catalyst.util.SQLOrderingUtil.compareDoubles(smj_value_1, 
smj_value_28);
/* 094 */ }
/* 095 */
/* 096 */ if (comp == 0) {
/* 097 */ return true;
/* 098 */ }
/* 099 */ smj_matches_0.clear();
/* 100 */ }
/* 101 */
/* 102 */ do {
/* 103 */ if (smj_rightRow_0 == null) {
/* 104 */ if (!rightIter.hasNext()) {
/* 105 */ if (!smj_matches_0.isEmpty()) {
/* 106 */ smj_value_28 = smj_value_1;
/* 107 */ }
/* 108 */ return true;
/* 109 */ }
/* 110 */ smj_rightRow_0 = (InternalRow) 

[jira] [Commented] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-29 Thread Lukas Waldmann (JIRA)


[ 
https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16805418#comment-16805418
 ] 

Lukas Waldmann commented on SPARK-27228:


I understand.

Unfortunately, things as they are, I am afraid I will be stuck with 2.3 for 
quite some time. Upgrade cycle of our cluster is in years rather than months :(

 

> Spark long delay on close, possible problem with killing executors
> --
>
> Key: SPARK-27228
> URL: https://issues.apache.org/jira/browse/SPARK-27228
> Project: Spark
>  Issue Type: Bug
>  Components: Block Manager
>Affects Versions: 2.3.0
>Reporter: Lukas Waldmann
>Priority: Major
> Attachments: log.html
>
>
> When using dynamic allocations after all jobs finishes spark delays for 
> several minutes before finally finishes. Log suggest that executors are not 
> cleared up properly.
> See the attachment for log
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Commented] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-25 Thread Lukas Waldmann (JIRA)


[ 
https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16800621#comment-16800621
 ] 

Lukas Waldmann commented on SPARK-27228:


That's a good question :)

What code does it runs up to several hundreds  of sql queries with different 
parameters and union the results before writing the result to Hive table.

Input are Hive tables with up to several hundreds million lines

code looks something like this:
{code:java}
void process(String dbName, String environment) {
//For all items in call the sql snippet and union the results
List mds = ...;
Map res = new LinkedHashMap<>();
mds.stream()
.forEach(md -> {
try (InputStream is = getClass().getResourceAsStream("/" + 
md.query_id)) {
String snippet = IOUtils.toString(is);
Dataset ds = spark.sql(snippet);
String key = md.product;
res.put(key, res.get(key) == null ?  ds : 
ds.union(res.get(key)));
} catch (IOException ex) {

Logger.getLogger(SparkMainApp.class.getName()).log(Level.SEVERE, null, ex);
}
});
   

String name = dbName + "." + table;
res.values().stream()
.forEach( result -> {
result.repartition( result.col(PRODUCT.toString()), 
result.col(PROTOCOL.toString())).write()
.mode(SaveMode.Overwrite).insertInto(name);
}
);
}
{code}

> Spark long delay on close, possible problem with killing executors
> --
>
> Key: SPARK-27228
> URL: https://issues.apache.org/jira/browse/SPARK-27228
> Project: Spark
>  Issue Type: Bug
>  Components: Block Manager
>Affects Versions: 2.3.0
>Reporter: Lukas Waldmann
>Priority: Major
> Attachments: log.html
>
>
> When using dynamic allocations after all jobs finishes spark delays for 
> several minutes before finally finishes. Log suggest that executors are not 
> cleared up properly.
> See the attachment for log
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Commented] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-22 Thread Lukas Waldmann (JIRA)


[ 
https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16798841#comment-16798841
 ] 

Lukas Waldmann commented on SPARK-27228:


Executors management seems to behave strangely.

After calling spark.stop() 

See this:

 
{quote}19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: 
Disabling executor 332.
19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446)
19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 
332 from BlockManagerMaster.
19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing block manager 
BlockManagerId(332, data-10.bdp.gin.merck.com, 38713, None)
19/03/21 09:51:39 INFO BlockManagerMaster: Removed 332 successfully in 
removeExecutor
{quote}
and few minutes later:
{quote}19/03/21 09:54:26 WARN HeartbeatReceiver: Removing executor 332 with no 
recent heartbeats: 173942 ms exceeds timeout 12 ms
19/03/21 09:54:26 ERROR YarnClusterScheduler: Lost an executor 332 (already 
removed): Executor heartbeat timed out after 173942 ms

19/03/21 09:54:26 INFO YarnClusterSchedulerBackend: Requesting to kill 
executor(s) 332
19/03/21 09:54:26 WARN YarnClusterSchedulerBackend: Executor to kill 332 does 
not exist!
19/03/21 09:54:26 INFO YarnClusterSchedulerBackend: Actual list of executor(s) 
to be killed is{quote}
 

 

> Spark long delay on close, possible problem with killing executors
> --
>
> Key: SPARK-27228
> URL: https://issues.apache.org/jira/browse/SPARK-27228
> Project: Spark
>  Issue Type: Bug
>  Components: Block Manager
>Affects Versions: 2.3.0
>Reporter: Lukas Waldmann
>Priority: Major
> Attachments: log.html
>
>
> When using dynamic allocations after all jobs finishes spark delays for 
> several minutes before finally finishes. Log suggest that executors are not 
> cleared up properly.
> See the attachment for log
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Reopened] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-22 Thread Lukas Waldmann (JIRA)


 [ 
https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Lukas Waldmann reopened SPARK-27228:


Added initial issue investigation

> Spark long delay on close, possible problem with killing executors
> --
>
> Key: SPARK-27228
> URL: https://issues.apache.org/jira/browse/SPARK-27228
> Project: Spark
>  Issue Type: Bug
>  Components: Block Manager
>Affects Versions: 2.3.0
>Reporter: Lukas Waldmann
>Priority: Major
> Attachments: log.html
>
>
> When using dynamic allocations after all jobs finishes spark delays for 
> several minutes before finally finishes. Log suggest that executors are not 
> cleared up properly.
> See the attachment for log
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Updated] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-22 Thread Lukas Waldmann (JIRA)


 [ 
https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Lukas Waldmann updated SPARK-27228:
---
Description: 
When using dynamic allocations after all jobs finishes spark delays for several 
minutes before finally finishes. Log suggest that executors are not cleared up 
properly.

See the attachment for log

 

  was:When using dynamic allocations after all jobs finishes spark delays for 
several minutes before finally finishes. Log suggest that executors are not 
cleared up properly.


> Spark long delay on close, possible problem with killing executors
> --
>
> Key: SPARK-27228
> URL: https://issues.apache.org/jira/browse/SPARK-27228
> Project: Spark
>  Issue Type: Bug
>  Components: Block Manager
>Affects Versions: 2.3.0
>Reporter: Lukas Waldmann
>Priority: Major
> Attachments: log.html
>
>
> When using dynamic allocations after all jobs finishes spark delays for 
> several minutes before finally finishes. Log suggest that executors are not 
> cleared up properly.
> See the attachment for log
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Updated] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-22 Thread Lukas Waldmann (JIRA)


 [ 
https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Lukas Waldmann updated SPARK-27228:
---
Description: When using dynamic allocations after all jobs finishes spark 
delays for several minutes before finally finishes. Log suggest that executors 
are not cleared up properly.  (was: When using dynamic allocations after all 
jobs finishes spark delays for several minutes before finally finishes. Log 
suggest that executors are not cleared up properly.
{quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 
09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 
19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill 
executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual list 
of executor(s) to be killed is 355 19/03/21 09:51:38 INFO 
ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. 
19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 because 
it has been idle for 60 seconds (new desired total will be 65) 19/03/21 
09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 228. 
19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch 446) 19/03/21 
09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 228 from 
BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(228, data-15.bdp.gin.merck.com, 45882, None) 
19/03/21 09:51:38 INFO BlockManagerMaster: Removed 228 successfully in 
removeExecutor 19/03/21 09:51:38 INFO SparkUI: Stopped Spark web UI at 
[http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/]
 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on 
data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO 
YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 
09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 
INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from 
BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, 41186, None) 
19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 successfully in 
removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 346 on 
datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 332. 19/03/21 
09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 19/03/21 09:51:39 
INFO BlockManagerMasterEndpoint: Trying to remove executor 332 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(332, data-10.bdp.gin.merck.com, 38713, None) 
19/03/21 09:51:39 INFO BlockManagerMaster: Removed 332 successfully in 
removeExecutor 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 332 on 
data-10.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 
09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com 
killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 
(epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove 
executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, 
data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO 
BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 
09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 327. 
19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) 19/03/21 
09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 327 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(327, data-20.bdp.gin.merck.com, 34235, None) 
19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 327 on 
data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
BlockManagerMaster: Removed 327 successfully in removeExecutor 19/03/21 
09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 
19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 355 on 
data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
DAGScheduler: Executor lost: 355 (epoch 446) 19/03/21 09:51:39 INFO 
BlockManagerMasterEndpoint: Trying to remove executor 355 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(355, data-20.bdp.gin.merck.com, 43141, None) 
19/03/21 09:51:39 INFO BlockManagerMaster: Removed 355 successfully in 
removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: 
Disabling executor 168. 19/03/21 09:51:39 INFO DAGScheduler: 

[jira] [Commented] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-21 Thread Lukas Waldmann (JIRA)


[ 
https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16798209#comment-16798209
 ] 

Lukas Waldmann commented on SPARK-27228:


Startup parameters:

spark-submit --conf spark.shuffle.service.enabled=true --conf 
spark.dynamicAllocation.enabled=true --conf spark.driver.maxResultSize=4g 
--executor-memory 4g --driver-memory 8g --master yarn --deploy-mode cluster

> Spark long delay on close, possible problem with killing executors
> --
>
> Key: SPARK-27228
> URL: https://issues.apache.org/jira/browse/SPARK-27228
> Project: Spark
>  Issue Type: Bug
>  Components: Block Manager
>Affects Versions: 2.3.0
>Reporter: Lukas Waldmann
>Priority: Major
> Attachments: log.html
>
>
> When using dynamic allocations after all jobs finishes spark delays for 
> several minutes before finally finishes. Log suggest that executors are not 
> cleared up properly.
> {quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 
> 09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 
> 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill 
> executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual 
> list of executor(s) to be killed is 355 19/03/21 09:51:38 INFO 
> ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. 
> 19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 
> because it has been idle for 60 seconds (new desired total will be 65) 
> 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling 
> executor 228. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch 
> 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove 
> executor 228 from BlockManagerMaster. 19/03/21 09:51:38 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(228, 
> data-15.bdp.gin.merck.com, 45882, None) 19/03/21 09:51:38 INFO 
> BlockManagerMaster: Removed 228 successfully in removeExecutor 19/03/21 
> 09:51:38 INFO SparkUI: Stopped Spark web UI at 
> [http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/]
>  19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on 
> data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO 
> YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 
> 09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 
> INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from 
> BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: 
> Removing block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, 
> 41186, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 
> successfully in removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: 
> Executor 346 on datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 
> 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 
> 332. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 
> 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 
> 332 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(332, 
> data-10.bdp.gin.merck.com, 38713, None) 19/03/21 09:51:39 INFO 
> BlockManagerMaster: Removed 332 successfully in removeExecutor 19/03/21 
> 09:51:39 INFO YarnClusterScheduler: Executor 332 on data-10.bdp.gin.merck.com 
> killed by driver. 19/03/21 09:51:39 INFO 
> YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 
> 09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com 
> killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 
> (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to 
> remove executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, 
> data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO 
> BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 
> 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 
> 327. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) 
> 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 
> 327 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(327, 
> data-20.bdp.gin.merck.com, 34235, None) 19/03/21 09:51:39 INFO 
> YarnClusterScheduler: Executor 327 on data-20.bdp.gin.merck.com killed by 
> driver. 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 327 successfully 
> in removeExecutor 19/03/21 09:51:39 INFO 
> 

[jira] [Updated] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-21 Thread Lukas Waldmann (JIRA)


 [ 
https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Lukas Waldmann updated SPARK-27228:
---
Attachment: log.html

> Spark long delay on close, possible problem with killing executors
> --
>
> Key: SPARK-27228
> URL: https://issues.apache.org/jira/browse/SPARK-27228
> Project: Spark
>  Issue Type: Bug
>  Components: Block Manager
>Affects Versions: 2.3.0
>Reporter: Lukas Waldmann
>Priority: Major
> Attachments: log.html
>
>
> When using dynamic allocations after all jobs finishes spark delays for 
> several minutes before finally finishes. Log suggest that executors are not 
> cleared up properly.
> {quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 
> 09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 
> 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill 
> executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual 
> list of executor(s) to be killed is 355 19/03/21 09:51:38 INFO 
> ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. 
> 19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 
> because it has been idle for 60 seconds (new desired total will be 65) 
> 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling 
> executor 228. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch 
> 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove 
> executor 228 from BlockManagerMaster. 19/03/21 09:51:38 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(228, 
> data-15.bdp.gin.merck.com, 45882, None) 19/03/21 09:51:38 INFO 
> BlockManagerMaster: Removed 228 successfully in removeExecutor 19/03/21 
> 09:51:38 INFO SparkUI: Stopped Spark web UI at 
> [http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/]
>  19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on 
> data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO 
> YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 
> 09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 
> INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from 
> BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: 
> Removing block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, 
> 41186, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 
> successfully in removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: 
> Executor 346 on datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 
> 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 
> 332. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 
> 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 
> 332 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(332, 
> data-10.bdp.gin.merck.com, 38713, None) 19/03/21 09:51:39 INFO 
> BlockManagerMaster: Removed 332 successfully in removeExecutor 19/03/21 
> 09:51:39 INFO YarnClusterScheduler: Executor 332 on data-10.bdp.gin.merck.com 
> killed by driver. 19/03/21 09:51:39 INFO 
> YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 
> 09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com 
> killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 
> (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to 
> remove executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, 
> data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO 
> BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 
> 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 
> 327. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) 
> 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 
> 327 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(327, 
> data-20.bdp.gin.merck.com, 34235, None) 19/03/21 09:51:39 INFO 
> YarnClusterScheduler: Executor 327 on data-20.bdp.gin.merck.com killed by 
> driver. 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 327 successfully 
> in removeExecutor 19/03/21 09:51:39 INFO 
> YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 19/03/21 
> 09:51:39 INFO YarnClusterScheduler: Executor 355 on data-20.bdp.gin.merck.com 
> killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 355 
> (epoch 446) 19/03/21 09:51:39 INFO 

[jira] [Commented] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-21 Thread Lukas Waldmann (JIRA)


[ 
https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16798207#comment-16798207
 ] 

Lukas Waldmann commented on SPARK-27228:


log file added

> Spark long delay on close, possible problem with killing executors
> --
>
> Key: SPARK-27228
> URL: https://issues.apache.org/jira/browse/SPARK-27228
> Project: Spark
>  Issue Type: Bug
>  Components: Block Manager
>Affects Versions: 2.3.0
>Reporter: Lukas Waldmann
>Priority: Major
> Attachments: log.html
>
>
> When using dynamic allocations after all jobs finishes spark delays for 
> several minutes before finally finishes. Log suggest that executors are not 
> cleared up properly.
> {quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 
> 09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 
> 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill 
> executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual 
> list of executor(s) to be killed is 355 19/03/21 09:51:38 INFO 
> ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. 
> 19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 
> because it has been idle for 60 seconds (new desired total will be 65) 
> 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling 
> executor 228. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch 
> 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove 
> executor 228 from BlockManagerMaster. 19/03/21 09:51:38 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(228, 
> data-15.bdp.gin.merck.com, 45882, None) 19/03/21 09:51:38 INFO 
> BlockManagerMaster: Removed 228 successfully in removeExecutor 19/03/21 
> 09:51:38 INFO SparkUI: Stopped Spark web UI at 
> [http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/]
>  19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on 
> data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO 
> YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 
> 09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 
> INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from 
> BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: 
> Removing block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, 
> 41186, None) 19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 
> successfully in removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: 
> Executor 346 on datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 
> 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 
> 332. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 
> 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 
> 332 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(332, 
> data-10.bdp.gin.merck.com, 38713, None) 19/03/21 09:51:39 INFO 
> BlockManagerMaster: Removed 332 successfully in removeExecutor 19/03/21 
> 09:51:39 INFO YarnClusterScheduler: Executor 332 on data-10.bdp.gin.merck.com 
> killed by driver. 19/03/21 09:51:39 INFO 
> YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 
> 09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com 
> killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 
> (epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to 
> remove executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, 
> data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO 
> BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 
> 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 
> 327. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) 
> 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 
> 327 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
> BlockManagerMasterEndpoint: Removing block manager BlockManagerId(327, 
> data-20.bdp.gin.merck.com, 34235, None) 19/03/21 09:51:39 INFO 
> YarnClusterScheduler: Executor 327 on data-20.bdp.gin.merck.com killed by 
> driver. 19/03/21 09:51:39 INFO BlockManagerMaster: Removed 327 successfully 
> in removeExecutor 19/03/21 09:51:39 INFO 
> YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 19/03/21 
> 09:51:39 INFO YarnClusterScheduler: Executor 355 on data-20.bdp.gin.merck.com 
> killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 355 
> (epoch 446) 

[jira] [Updated] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-21 Thread Lukas Waldmann (JIRA)


 [ 
https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Lukas Waldmann updated SPARK-27228:
---
Description: 
When using dynamic allocations after all jobs finishes spark delays for several 
minutes before finally finishes. Log suggest that executors are not cleared up 
properly.
{quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 
09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 
19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill 
executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual list 
of executor(s) to be killed is 355 19/03/21 09:51:38 INFO 
ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. 
19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 because 
it has been idle for 60 seconds (new desired total will be 65) 19/03/21 
09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 228. 
19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch 446) 19/03/21 
09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 228 from 
BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(228, data-15.bdp.gin.merck.com, 45882, None) 
19/03/21 09:51:38 INFO BlockManagerMaster: Removed 228 successfully in 
removeExecutor 19/03/21 09:51:38 INFO SparkUI: Stopped Spark web UI at 
[http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/]
 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on 
data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO 
YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 
09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 
INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from 
BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, 41186, None) 
19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 successfully in 
removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 346 on 
datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 332. 19/03/21 
09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 19/03/21 09:51:39 
INFO BlockManagerMasterEndpoint: Trying to remove executor 332 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(332, data-10.bdp.gin.merck.com, 38713, None) 
19/03/21 09:51:39 INFO BlockManagerMaster: Removed 332 successfully in 
removeExecutor 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 332 on 
data-10.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 
09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com 
killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 
(epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove 
executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, 
data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO 
BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 
09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 327. 
19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) 19/03/21 
09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 327 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(327, data-20.bdp.gin.merck.com, 34235, None) 
19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 327 on 
data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
BlockManagerMaster: Removed 327 successfully in removeExecutor 19/03/21 
09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 
19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 355 on 
data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
DAGScheduler: Executor lost: 355 (epoch 446) 19/03/21 09:51:39 INFO 
BlockManagerMasterEndpoint: Trying to remove executor 355 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(355, data-20.bdp.gin.merck.com, 43141, None) 
19/03/21 09:51:39 INFO BlockManagerMaster: Removed 355 successfully in 
removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: 
Disabling executor 168. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 168 
(epoch 446) 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 168 on 
data-07.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 

[jira] [Updated] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-21 Thread Lukas Waldmann (JIRA)


 [ 
https://issues.apache.org/jira/browse/SPARK-27228?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Lukas Waldmann updated SPARK-27228:
---
Description: 
When using dynamic allocations after all jobs finishes spark delays for several 
minutes before finally finishes. Log suggest that executors are not cleared up 
properly.
{quote}{{19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 
09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 
19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill 
executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual list 
of executor(s) to be killed is 355 19/03/21 09:51:38 INFO 
ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. 
19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 because 
it has been idle for 60 seconds (new desired total will be 65) 19/03/21 
09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 228. 
19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch 446) 19/03/21 
09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 228 from 
BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(228, data-15.bdp.gin.merck.com, 45882, None) 
19/03/21 09:51:38 INFO BlockManagerMaster: Removed 228 successfully in 
removeExecutor 19/03/21 09:51:38 INFO SparkUI: Stopped Spark web UI at 
[http://data-04.bdp.gin.merck.com:44304|http://data-04.bdp.gin.merck.com:44304/]
 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 228 on 
data-15.bdp.gin.merck.com killed by driver. 19/03/21 09:51:38 INFO 
YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 346. 19/03/21 
09:51:38 INFO DAGScheduler: Executor lost: 346 (epoch 446) 19/03/21 09:51:38 
INFO BlockManagerMasterEndpoint: Trying to remove executor 346 from 
BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(346, datanode-02.bdp.gin.merck.com, 41186, None) 
19/03/21 09:51:38 INFO BlockManagerMaster: Removed 346 successfully in 
removeExecutor 19/03/21 09:51:38 INFO YarnClusterScheduler: Executor 346 on 
datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 332. 19/03/21 
09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 19/03/21 09:51:39 
INFO BlockManagerMasterEndpoint: Trying to remove executor 332 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(332, data-10.bdp.gin.merck.com, 38713, None) 
19/03/21 09:51:39 INFO BlockManagerMaster: Removed 332 successfully in 
removeExecutor 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 332 on 
data-10.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 
09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com 
killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 
(epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove 
executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, 
data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO 
BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 
09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 327. 
19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) 19/03/21 
09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 327 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(327, data-20.bdp.gin.merck.com, 34235, None) 
19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 327 on 
data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
BlockManagerMaster: Removed 327 successfully in removeExecutor 19/03/21 
09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 
19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 355 on 
data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
DAGScheduler: Executor lost: 355 (epoch 446) 19/03/21 09:51:39 INFO 
BlockManagerMasterEndpoint: Trying to remove executor 355 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(355, data-20.bdp.gin.merck.com, 43141, None) 
19/03/21 09:51:39 INFO BlockManagerMaster: Removed 355 successfully in 
removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: 
Disabling executor 168. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 168 
(epoch 446) 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 168 on 
data-07.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 

[jira] [Created] (SPARK-27228) Spark long delay on close, possible problem with killing executors

2019-03-21 Thread Lukas Waldmann (JIRA)
Lukas Waldmann created SPARK-27228:
--

 Summary: Spark long delay on close, possible problem with killing 
executors
 Key: SPARK-27228
 URL: https://issues.apache.org/jira/browse/SPARK-27228
 Project: Spark
  Issue Type: Bug
  Components: Block Manager
Affects Versions: 2.3.0
Reporter: Lukas Waldmann


When using dynamic allocations after all jobs finishes spark delays for several 
minutes before finally finishes. Log suggest that executors are not cleared up 
properly.
{quote}19/03/21 09:51:38 INFO SparkSession: PROCESSING FINISHED 19/03/21 
09:51:38 INFO ExecutorAllocationManager: Request to remove executorIds: 355 
19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Requesting to kill 
executor(s) 355 19/03/21 09:51:38 INFO YarnClusterSchedulerBackend: Actual list 
of executor(s) to be killed is 355 19/03/21 09:51:38 INFO 
ApplicationMaster$AMEndpoint: Driver requested to kill executor(s) 355. 
19/03/21 09:51:38 INFO ExecutorAllocationManager: Removing executor 355 because 
it has been idle for 60 seconds (new desired total will be 65) 19/03/21 
09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 228. 
19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 228 (epoch 446) 19/03/21 
09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove executor 228 from 
BlockManagerMaster. 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(228, data-15.bdp.gin.merck.com, 45882, None) 
19/03/21 09:51:38 INFO BlockManagerMaster: Removed 228 successfully in 
removeExecutor 19/03/21 09:51:38 INFO SparkUI: Stopped Spark web UI at 
http://data-04.bdp.gin.merck.com:44304 19/03/21 09:51:38 INFO 
YarnClusterScheduler: Executor 228 on data-15.bdp.gin.merck.com killed by 
driver. 19/03/21 09:51:38 INFO YarnSchedulerBackend$YarnDriverEndpoint: 
Disabling executor 346. 19/03/21 09:51:38 INFO DAGScheduler: Executor lost: 346 
(epoch 446) 19/03/21 09:51:38 INFO BlockManagerMasterEndpoint: Trying to remove 
executor 346 from BlockManagerMaster. 19/03/21 09:51:38 INFO 
BlockManagerMasterEndpoint: Removing block manager BlockManagerId(346, 
datanode-02.bdp.gin.merck.com, 41186, None) 19/03/21 09:51:38 INFO 
BlockManagerMaster: Removed 346 successfully in removeExecutor 19/03/21 
09:51:38 INFO YarnClusterScheduler: Executor 346 on 
datanode-02.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 332. 19/03/21 
09:51:39 INFO DAGScheduler: Executor lost: 332 (epoch 446) 19/03/21 09:51:39 
INFO BlockManagerMasterEndpoint: Trying to remove executor 332 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(332, data-10.bdp.gin.merck.com, 38713, None) 
19/03/21 09:51:39 INFO BlockManagerMaster: Removed 332 successfully in 
removeExecutor 19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 332 on 
data-10.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 240. 19/03/21 
09:51:39 INFO YarnClusterScheduler: Executor 240 on data-22.bdp.gin.merck.com 
killed by driver. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 240 
(epoch 446) 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove 
executor 240 from BlockManagerMaster. 19/03/21 09:51:39 INFO 
BlockManagerMasterEndpoint: Removing block manager BlockManagerId(240, 
data-22.bdp.gin.merck.com, 43344, None) 19/03/21 09:51:39 INFO 
BlockManagerMaster: Removed 240 successfully in removeExecutor 19/03/21 
09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 327. 
19/03/21 09:51:39 INFO DAGScheduler: Executor lost: 327 (epoch 446) 19/03/21 
09:51:39 INFO BlockManagerMasterEndpoint: Trying to remove executor 327 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(327, data-20.bdp.gin.merck.com, 34235, None) 
19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 327 on 
data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
BlockManagerMaster: Removed 327 successfully in removeExecutor 19/03/21 
09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: Disabling executor 355. 
19/03/21 09:51:39 INFO YarnClusterScheduler: Executor 355 on 
data-20.bdp.gin.merck.com killed by driver. 19/03/21 09:51:39 INFO 
DAGScheduler: Executor lost: 355 (epoch 446) 19/03/21 09:51:39 INFO 
BlockManagerMasterEndpoint: Trying to remove executor 355 from 
BlockManagerMaster. 19/03/21 09:51:39 INFO BlockManagerMasterEndpoint: Removing 
block manager BlockManagerId(355, data-20.bdp.gin.merck.com, 43141, None) 
19/03/21 09:51:39 INFO BlockManagerMaster: Removed 355 successfully in 
removeExecutor 19/03/21 09:51:39 INFO YarnSchedulerBackend$YarnDriverEndpoint: 
Disabling executor 168. 19/03/21 09:51:39 INFO DAGScheduler: Executor lost: