[ https://issues.apache.org/jira/browse/SPARK-27802?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
liupengcheng updated SPARK-27802: --------------------------------- Description: Recently, we hit this issue when testing spark2.3. It report the following error messages when clicking on the stage UI link. We add more logs to print the executorId(here is 10) to debug, and finally find out that it's caused by the inconsistency between the list of `ExecutorStageSummaryWrapper` and the `ExecutorSummaryWrapper` in the KVStore. The number of deadExecutors may exceeded threshold and being removed from list of `ExecutorSummaryWrapper`, however, it may still be kept in the list of `ExecutorStageSummaryWrapper` in the store. {code:java} HTTP ERROR 500 Problem accessing /stages/stage/. Reason: Server Error Caused by: java.util.NoSuchElementException: 10 at org.apache.spark.util.kvstore.InMemoryStore.read(InMemoryStore.java:83) at org.apache.spark.status.ElementTrackingStore.read(ElementTrackingStore.scala:95) at org.apache.spark.status.AppStatusStore.executorSummary(AppStatusStore.scala:70) at org.apache.spark.ui.jobs.ExecutorTable$$anonfun$createExecutorTable$2.apply(ExecutorTable.scala:99) at org.apache.spark.ui.jobs.ExecutorTable$$anonfun$createExecutorTable$2.apply(ExecutorTable.scala:92) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.AbstractTraversable.map(Traversable.scala:104) at org.apache.spark.ui.jobs.ExecutorTable.createExecutorTable(ExecutorTable.scala:92) at org.apache.spark.ui.jobs.ExecutorTable.toNodeSeq(ExecutorTable.scala:75) at org.apache.spark.ui.jobs.StagePage.render(StagePage.scala:478) at org.apache.spark.ui.WebUI$$anonfun$2.apply(WebUI.scala:82) at org.apache.spark.ui.WebUI$$anonfun$2.apply(WebUI.scala:82) at org.apache.spark.ui.JettyUtils$$anon$3.doGet(JettyUtils.scala:90) at javax.servlet.http.HttpServlet.service(HttpServlet.java:687) at javax.servlet.http.HttpServlet.service(HttpServlet.java:790) at org.spark_project.jetty.servlet.ServletHolder.handle(ServletHolder.java:848) at org.spark_project.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1772) at org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.doFilter(AmIpFilter.java:166) at org.spark_project.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) at org.spark_project.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) at org.spark_project.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) at org.spark_project.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) at org.spark_project.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) at org.spark_project.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) at org.spark_project.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:493) at org.spark_project.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) at org.spark_project.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) at org.spark_project.jetty.server.Server.handle(Server.java:539) at org.spark_project.jetty.server.HttpChannel.handle(HttpChannel.java:333) at org.spark_project.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) at org.spark_project.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:283) at org.spark_project.jetty.io.FillInterest.fillable(FillInterest.java:108) at org.spark_project.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:136) at org.spark_project.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:671) at org.spark_project.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:589) at java.lang.Thread.run(Thread.java:748) {code} was: Recently, we hit this issue when testing spark2.3. It report the following error messages when click on the stage UI link. We add more logs to print the executorId(here is 10) to debug, and finally find out that it's caused by the inconsistency between the list of `ExecutorStageSummaryWrapper` and the `ExecutorSummaryWrapper` in the KVStore. The number of deadExecutors may exceeded threshold and being removed from list of `ExecutorSummaryWrapper`, however, it may still be kept in the list of `ExecutorStageSummaryWrapper` in the store. {code:java} HTTP ERROR 500 Problem accessing /stages/stage/. Reason: Server Error Caused by: java.util.NoSuchElementException: 10 at org.apache.spark.util.kvstore.InMemoryStore.read(InMemoryStore.java:83) at org.apache.spark.status.ElementTrackingStore.read(ElementTrackingStore.scala:95) at org.apache.spark.status.AppStatusStore.executorSummary(AppStatusStore.scala:70) at org.apache.spark.ui.jobs.ExecutorTable$$anonfun$createExecutorTable$2.apply(ExecutorTable.scala:99) at org.apache.spark.ui.jobs.ExecutorTable$$anonfun$createExecutorTable$2.apply(ExecutorTable.scala:92) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.AbstractTraversable.map(Traversable.scala:104) at org.apache.spark.ui.jobs.ExecutorTable.createExecutorTable(ExecutorTable.scala:92) at org.apache.spark.ui.jobs.ExecutorTable.toNodeSeq(ExecutorTable.scala:75) at org.apache.spark.ui.jobs.StagePage.render(StagePage.scala:478) at org.apache.spark.ui.WebUI$$anonfun$2.apply(WebUI.scala:82) at org.apache.spark.ui.WebUI$$anonfun$2.apply(WebUI.scala:82) at org.apache.spark.ui.JettyUtils$$anon$3.doGet(JettyUtils.scala:90) at javax.servlet.http.HttpServlet.service(HttpServlet.java:687) at javax.servlet.http.HttpServlet.service(HttpServlet.java:790) at org.spark_project.jetty.servlet.ServletHolder.handle(ServletHolder.java:848) at org.spark_project.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1772) at org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.doFilter(AmIpFilter.java:166) at org.spark_project.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) at org.spark_project.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) at org.spark_project.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) at org.spark_project.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) at org.spark_project.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) at org.spark_project.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) at org.spark_project.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:493) at org.spark_project.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) at org.spark_project.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) at org.spark_project.jetty.server.Server.handle(Server.java:539) at org.spark_project.jetty.server.HttpChannel.handle(HttpChannel.java:333) at org.spark_project.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) at org.spark_project.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:283) at org.spark_project.jetty.io.FillInterest.fillable(FillInterest.java:108) at org.spark_project.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:136) at org.spark_project.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:671) at org.spark_project.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:589) at java.lang.Thread.run(Thread.java:748) {code} > SparkUI throws NoSuchElementException when inconsistency appears between > `ExecutorStageSummaryWrapper`s and `ExecutorSummaryWrapper`s > ------------------------------------------------------------------------------------------------------------------------------------- > > Key: SPARK-27802 > URL: https://issues.apache.org/jira/browse/SPARK-27802 > Project: Spark > Issue Type: Bug > Components: Web UI > Affects Versions: 2.3.2 > Reporter: liupengcheng > Priority: Major > > Recently, we hit this issue when testing spark2.3. It report the following > error messages when clicking on the stage UI link. > We add more logs to print the executorId(here is 10) to debug, and finally > find out that it's caused by the inconsistency between the list of > `ExecutorStageSummaryWrapper` and the `ExecutorSummaryWrapper` in the > KVStore. The number of deadExecutors may exceeded threshold and being removed > from list of `ExecutorSummaryWrapper`, however, it may still be kept in the > list of `ExecutorStageSummaryWrapper` in the store. > {code:java} > HTTP ERROR 500 > Problem accessing /stages/stage/. Reason: > Server Error > Caused by: > java.util.NoSuchElementException: 10 > at > org.apache.spark.util.kvstore.InMemoryStore.read(InMemoryStore.java:83) > at > org.apache.spark.status.ElementTrackingStore.read(ElementTrackingStore.scala:95) > at > org.apache.spark.status.AppStatusStore.executorSummary(AppStatusStore.scala:70) > at > org.apache.spark.ui.jobs.ExecutorTable$$anonfun$createExecutorTable$2.apply(ExecutorTable.scala:99) > at > org.apache.spark.ui.jobs.ExecutorTable$$anonfun$createExecutorTable$2.apply(ExecutorTable.scala:92) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.ui.jobs.ExecutorTable.createExecutorTable(ExecutorTable.scala:92) > at > org.apache.spark.ui.jobs.ExecutorTable.toNodeSeq(ExecutorTable.scala:75) > at org.apache.spark.ui.jobs.StagePage.render(StagePage.scala:478) > at org.apache.spark.ui.WebUI$$anonfun$2.apply(WebUI.scala:82) > at org.apache.spark.ui.WebUI$$anonfun$2.apply(WebUI.scala:82) > at org.apache.spark.ui.JettyUtils$$anon$3.doGet(JettyUtils.scala:90) > at javax.servlet.http.HttpServlet.service(HttpServlet.java:687) > at javax.servlet.http.HttpServlet.service(HttpServlet.java:790) > at > org.spark_project.jetty.servlet.ServletHolder.handle(ServletHolder.java:848) > at > org.spark_project.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1772) > at > org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.doFilter(AmIpFilter.java:166) > at > org.spark_project.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) > at > org.spark_project.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.spark_project.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.spark_project.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.spark_project.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.spark_project.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.spark_project.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:493) > at > org.spark_project.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.spark_project.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.spark_project.jetty.server.Server.handle(Server.java:539) > at > org.spark_project.jetty.server.HttpChannel.handle(HttpChannel.java:333) > at > org.spark_project.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.spark_project.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:283) > at > org.spark_project.jetty.io.FillInterest.fillable(FillInterest.java:108) > at > org.spark_project.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at > org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) > at > org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:136) > at > org.spark_project.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:671) > at > org.spark_project.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:589) > at java.lang.Thread.run(Thread.java:748) > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org