Repository: spark Updated Branches: refs/heads/master 556b5d215 -> 96798d14f
[SPARK-22172][CORE] Worker hangs when the external shuffle service port is already in use ## What changes were proposed in this pull request? Handling the NonFatal exceptions while starting the external shuffle service, if there are any NonFatal exceptions it logs and continues without the external shuffle service. ## How was this patch tested? I verified it manually, it logs the exception and continues to serve without external shuffle service when BindException occurs. Author: Devaraj K <deva...@apache.org> Closes #19396 from devaraj-kavali/SPARK-22172. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/96798d14 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/96798d14 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/96798d14 Branch: refs/heads/master Commit: 96798d14f07208796fa0a90af0ab369879bacd6c Parents: 556b5d2 Author: Devaraj K <deva...@apache.org> Authored: Wed Nov 1 18:07:39 2017 +0800 Committer: jerryshao <ss...@hortonworks.com> Committed: Wed Nov 1 18:07:39 2017 +0800 ---------------------------------------------------------------------- .../scala/org/apache/spark/deploy/worker/Worker.scala | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/96798d14/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index ed5fa4b..3962d42 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -199,7 +199,7 @@ private[deploy] class Worker( logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}") logInfo("Spark home: " + sparkHome) createWorkDir() - shuffleService.startIfEnabled() + startExternalShuffleService() webUi = new WorkerWebUI(this, workDir, webUiPort) webUi.bind() @@ -367,6 +367,16 @@ private[deploy] class Worker( } } + private def startExternalShuffleService() { + try { + shuffleService.startIfEnabled() + } catch { + case e: Exception => + logError("Failed to start external shuffle service", e) + System.exit(1) + } + } + private def sendRegisterMessageToMaster(masterEndpoint: RpcEndpointRef): Unit = { masterEndpoint.send(RegisterWorker( workerId, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org