[ https://issues.apache.org/jira/browse/SPARK-24523?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16857979#comment-16857979 ]
Martin Studer commented on SPARK-24523: --------------------------------------- We're observing the same issue on Hortonworks HDP 2.6.5 with Spark 2.3.0 and Hadoop 2.7.3. Specifically note the DFSClient exceptions. {noformat} 19/06/06 18:00:36 INFO SparkContext: Invoking stop() from shutdown hook 19/06/06 18:00:36 INFO SparkUI: Stopped Spark web UI at http://xxxxxxxx:4040 19/06/06 18:00:46 WARN ShutdownHookManager: ShutdownHook '$anon$2' timeout, java.util.concurrent.TimeoutException java.util.concurrent.TimeoutException at java.util.concurrent.FutureTask.get(FutureTask.java:205) at org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:67) 19/06/06 18:00:46 ERROR Utils: Uncaught exception in thread pool-1-thread-1 java.lang.InterruptedException at java.lang.Object.wait(Native Method) at java.lang.Thread.join(Thread.java:1252) at java.lang.Thread.join(Thread.java:1326) at org.apache.spark.scheduler.AsyncEventQueue.stop(AsyncEventQueue.scala:133) at org.apache.spark.scheduler.LiveListenerBus$$anonfun$stop$1.apply(LiveListenerBus.scala:219) at org.apache.spark.scheduler.LiveListenerBus$$anonfun$stop$1.apply(LiveListenerBus.scala:219) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at scala.collection.AbstractIterable.foreach(Iterable.scala:54) at org.apache.spark.scheduler.LiveListenerBus.stop(LiveListenerBus.scala:219) at org.apache.spark.SparkContext$$anonfun$stop$6.apply$mcV$sp(SparkContext.scala:1922) at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1357) at org.apache.spark.SparkContext.stop(SparkContext.scala:1921) at org.apache.spark.SparkContext$$anonfun$2.apply$mcV$sp(SparkContext.scala:573) at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:216) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188) at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1988) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply$mcV$sp(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188) at scala.util.Try$.apply(Try.scala:192) at org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 19/06/06 18:00:46 WARN DFSClient: Unable to persist blocks in hflush for /spark2-history/application_1559195453904_0022.inprogress org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException): No lease on /spark2-history/application_1559195453904_0022.inprogress (inode 7393123): File is not open for writing. Holder DFSClient_NONMAPREDUCE_128063675_20 does not have any open files. at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:3712) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.fsync(FSNamesystem.java:4324) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.fsync(NameNodeRpcServer.java:1354) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.fsync(ClientNamenodeProtocolServerSideTranslatorPB.java:926) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:640) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2351) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2347) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2347) at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1554) at org.apache.hadoop.ipc.Client.call(Client.java:1498) at org.apache.hadoop.ipc.Client.call(Client.java:1398) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:233) at com.sun.proxy.$Proxy12.fsync(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.fsync(ClientNamenodeProtocolTranslatorPB.java:890) at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:290) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:202) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:184) at com.sun.proxy.$Proxy13.fsync(Unknown Source) at org.apache.hadoop.hdfs.DFSOutputStream.flushOrSync(DFSOutputStream.java:2252) at org.apache.hadoop.hdfs.DFSOutputStream.hsync(DFSOutputStream.java:2151) at org.apache.spark.scheduler.EventLoggingListener$$anonfun$logEvent$3.apply(EventLoggingListener.scala:148) at org.apache.spark.scheduler.EventLoggingListener$$anonfun$logEvent$3.apply(EventLoggingListener.scala:147) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.EventLoggingListener.logEvent(EventLoggingListener.scala:147) at org.apache.spark.scheduler.EventLoggingListener.onJobEnd(EventLoggingListener.scala:177) at org.apache.spark.scheduler.SparkListenerBus$class.doPostEvent(SparkListenerBus.scala:39) at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37) at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37) at org.apache.spark.util.ListenerBus$class.postToAll(ListenerBus.scala:82) at org.apache.spark.scheduler.AsyncEventQueue.org$apache$spark$scheduler$AsyncEventQueue$$super$postToAll(AsyncEventQueue.scala:89) at org.apache.spark.scheduler.AsyncEventQueue$$anonfun$org$apache$spark$scheduler$AsyncEventQueue$$dispatch$1.apply(AsyncEventQueue.scala:89) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) at org.apache.spark.scheduler.AsyncEventQueue.org$apache$spark$scheduler$AsyncEventQueue$$dispatch(AsyncEventQueue.scala:83) at org.apache.spark.scheduler.AsyncEventQueue$$anon$1$$anonfun$run$1.apply$mcV$sp(AsyncEventQueue.scala:79) at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1319) at org.apache.spark.scheduler.AsyncEventQueue$$anon$1.run(AsyncEventQueue.scala:78) 19/06/06 18:00:46 WARN DFSClient: Error while syncing java.nio.channels.ClosedChannelException at org.apache.hadoop.hdfs.DFSOutputStream.checkClosed(DFSOutputStream.java:1709) at org.apache.hadoop.hdfs.DFSOutputStream.flushOrSync(DFSOutputStream.java:2259) at org.apache.hadoop.hdfs.DFSOutputStream.hsync(DFSOutputStream.java:2151) at org.apache.spark.scheduler.EventLoggingListener$$anonfun$logEvent$3.apply(EventLoggingListener.scala:148) at org.apache.spark.scheduler.EventLoggingListener$$anonfun$logEvent$3.apply(EventLoggingListener.scala:147) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.EventLoggingListener.logEvent(EventLoggingListener.scala:147) at org.apache.spark.scheduler.EventLoggingListener.onJobEnd(EventLoggingListener.scala:177) at org.apache.spark.scheduler.SparkListenerBus$class.doPostEvent(SparkListenerBus.scala:39) at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37) at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37) at org.apache.spark.util.ListenerBus$class.postToAll(ListenerBus.scala:82) at org.apache.spark.scheduler.AsyncEventQueue.org$apache$spark$scheduler$AsyncEventQueue$$super$postToAll(AsyncEventQueue.scala:89) at org.apache.spark.scheduler.AsyncEventQueue$$anonfun$org$apache$spark$scheduler$AsyncEventQueue$$dispatch$1.apply(AsyncEventQueue.scala:89) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) at org.apache.spark.scheduler.AsyncEventQueue.org$apache$spark$scheduler$AsyncEventQueue$$dispatch(AsyncEventQueue.scala:83) at org.apache.spark.scheduler.AsyncEventQueue$$anon$1$$anonfun$run$1.apply$mcV$sp(AsyncEventQueue.scala:79) at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1319) at org.apache.spark.scheduler.AsyncEventQueue$$anon$1.run(AsyncEventQueue.scala:78) 19/06/06 18:00:46 ERROR AsyncEventQueue: Listener EventLoggingListener threw an exception java.nio.channels.ClosedChannelException at org.apache.hadoop.hdfs.DFSOutputStream.checkClosed(DFSOutputStream.java:1709) at org.apache.hadoop.hdfs.DFSOutputStream.flushOrSync(DFSOutputStream.java:2259) at org.apache.hadoop.hdfs.DFSOutputStream.hsync(DFSOutputStream.java:2151) at org.apache.spark.scheduler.EventLoggingListener$$anonfun$logEvent$3.apply(EventLoggingListener.scala:148) at org.apache.spark.scheduler.EventLoggingListener$$anonfun$logEvent$3.apply(EventLoggingListener.scala:147) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.EventLoggingListener.logEvent(EventLoggingListener.scala:147) at org.apache.spark.scheduler.EventLoggingListener.onJobEnd(EventLoggingListener.scala:177) at org.apache.spark.scheduler.SparkListenerBus$class.doPostEvent(SparkListenerBus.scala:39) at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37) at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37) at org.apache.spark.util.ListenerBus$class.postToAll(ListenerBus.scala:82) at org.apache.spark.scheduler.AsyncEventQueue.org$apache$spark$scheduler$AsyncEventQueue$$super$postToAll(AsyncEventQueue.scala:89) at org.apache.spark.scheduler.AsyncEventQueue$$anonfun$org$apache$spark$scheduler$AsyncEventQueue$$dispatch$1.apply(AsyncEventQueue.scala:89) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) at org.apache.spark.scheduler.AsyncEventQueue.org$apache$spark$scheduler$AsyncEventQueue$$dispatch(AsyncEventQueue.scala:83) at org.apache.spark.scheduler.AsyncEventQueue$$anon$1$$anonfun$run$1.apply$mcV$sp(AsyncEventQueue.scala:79) at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1319) at org.apache.spark.scheduler.AsyncEventQueue$$anon$1.run(AsyncEventQueue.scala:78) {noformat} [~ste...@apache.org] Do I understand correctly that HADOOP-15679 is addressing these DFSClient exceptions or is the above pointing at another issue? > InterruptedException when closing SparkContext > ---------------------------------------------- > > Key: SPARK-24523 > URL: https://issues.apache.org/jira/browse/SPARK-24523 > Project: Spark > Issue Type: Bug > Components: Scheduler > Affects Versions: 2.3.0, 2.3.1 > Environment: EMR 5.14.0, S3/HDFS inputs and outputs; EMR 5.17 > > > > Reporter: Umayr Hassan > Priority: Major > Attachments: spark-stop-jstack.log.1, spark-stop-jstack.log.2, > spark-stop-jstack.log.3, thread-dump.log > > > I'm running a Scala application in EMR with the following properties: > {{--master yarn --deploy-mode cluster --driver-memory 13g --executor-memory > 30g --executor-cores 5 --conf spark.default.parallelism=400 --conf > spark.dynamicAllocation.enabled=true --conf > spark.dynamicAllocation.maxExecutors=20 --conf > spark.eventLog.dir=hdfs:///var/log/spark/apps --conf > spark.eventLog.enabled=true --conf > spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2 --conf > spark.scheduler.listenerbus.eventqueue.capacity=20000 --conf > spark.shuffle.service.enabled=true --conf spark.sql.shuffle.partitions=400 > --conf spark.yarn.maxAppAttempts=1}} > The application runs fine till SparkContext is (automatically) closed, at > which point the SparkContext object throws. > {{18/06/10 10:44:43 ERROR Utils: Uncaught exception in thread pool-4-thread-1 > java.lang.InterruptedException at java.lang.Object.wait(Native Method) at > java.lang.Thread.join(Thread.java:1252) at > java.lang.Thread.join(Thread.java:1326) at > org.apache.spark.scheduler.AsyncEventQueue.stop(AsyncEventQueue.scala:133) at > org.apache.spark.scheduler.LiveListenerBus$$anonfun$stop$1.apply(LiveListenerBus.scala:219) > at > org.apache.spark.scheduler.LiveListenerBus$$anonfun$stop$1.apply(LiveListenerBus.scala:219) > at scala.collection.Iterator$class.foreach(Iterator.scala:893) at > scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at > scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at > scala.collection.AbstractIterable.foreach(Iterable.scala:54) at > org.apache.spark.scheduler.LiveListenerBus.stop(LiveListenerBus.scala:219) at > org.apache.spark.SparkContext$$anonfun$stop$6.apply$mcV$sp(SparkContext.scala:1915) > at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1357) at > org.apache.spark.SparkContext.stop(SparkContext.scala:1914) at > org.apache.spark.SparkContext$$anonfun$2.apply$mcV$sp(SparkContext.scala:572) > at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:216) > at > org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ShutdownHookManager.scala:188) > at > org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188) > at > org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188) > at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1988) at > org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply$mcV$sp(ShutdownHookManager.scala:188) > at > org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188) > at > org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188) > at scala.util.Try$.apply(Try.scala:192) at > org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188) > at > org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748)}} > > I've not seen this behavior in Spark 2.0.2 and Spark 2.2.0 (for the same > application), so I'm not sure which change is causing Spark 2.3 to throw. Any > ideas? > best, > Umayr -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org