Github user zsxwing commented on a diff in the pull request:

    https://github.com/apache/spark/pull/11746#discussion_r59653957
  
    --- Diff: 
core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala ---
    @@ -68,56 +70,72 @@ private[deploy] class DriverRunner(
     
       private var clock: Clock = new SystemClock()
       private var sleeper = new Sleeper {
    -    def sleep(seconds: Int): Unit = (0 until seconds).takeWhile(f => 
{Thread.sleep(1000); !killed})
    +    def sleep(seconds: Int): Unit = Thread.sleep(seconds * 1000)
       }
     
       /** Starts a thread to run and manage the driver. */
       private[worker] def start() = {
    -    new Thread("DriverRunner for " + driverId) {
    +    workerThread = new Thread("DriverRunner for " + driverId) {
           override def run() {
    +        var shutdownHook: AnyRef = null
             try {
    -          val driverDir = createWorkingDirectory()
    -          val localJarFilename = downloadUserJar(driverDir)
    -
    -          def substituteVariables(argument: String): String = argument 
match {
    -            case "{{WORKER_URL}}" => workerUrl
    -            case "{{USER_JAR}}" => localJarFilename
    -            case other => other
    +          shutdownHook = ShutdownHookManager.addShutdownHook { () =>
    +            logInfo(s"Worker shutting down, killing driver $driverId")
    +            kill()
               }
     
    -          // TODO: If we add ability to submit multiple jars they should 
also be added here
    -          val builder = 
CommandUtils.buildProcessBuilder(driverDesc.command, securityManager,
    -            driverDesc.mem, sparkHome.getAbsolutePath, substituteVariables)
    -          launchDriver(builder, driverDir, driverDesc.supervise)
    +          // prepare driver jars, launch driver and set final state from 
process exit code
    +          val exitCode = prepareAndLaunchDriver()
    +          finalState = if (exitCode == 0) Some(DriverState.FINISHED) else 
Some(DriverState.FAILED)
             }
             catch {
    -          case e: Exception => finalException = Some(e)
    +          case interrupted: InterruptedException =>
    +            logInfo("Runner thread for driver " + driverId + " 
interrupted")
    +            killProcessAndFinalize(DriverState.KILLED, interrupted)
    +          case e: Exception =>
    +            killProcessAndFinalize(DriverState.ERROR, e)
    +        }
    +        finally {
    +          if (shutdownHook != null) 
ShutdownHookManager.removeShutdownHook(shutdownHook)
             }
     
    -        val state =
    -          if (killed) {
    -            DriverState.KILLED
    -          } else if (finalException.isDefined) {
    -            DriverState.ERROR
    -          } else {
    -            finalExitCode match {
    -              case Some(0) => DriverState.FINISHED
    -              case _ => DriverState.FAILED
    -            }
    -          }
    +        // notify worker of final driver state, possible exception
    +        worker.send(DriverStateChanged(driverId, finalState.get, 
finalException))
    +      }
     
    +      // kill the process if started, set shared finalizing variables
    +      def killProcessAndFinalize(state: DriverState.DriverState, e: 
Exception): Unit = {
    +        killProcess()
             finalState = Some(state)
    +        finalException = Some(e)
    +      }
    +    }
    +
    +    workerThread.start()
    +  }
     
    -        worker.send(DriverStateChanged(driverId, state, finalException))
    +  /** Kill driver process and wait for it to exit. */
    +  private def killProcess(): Unit = {
    +    if (process != null) {
    +      logInfo("Killing driver process!")
    +      val exitCode = Utils.terminateProcess(process, 
DRIVER_TERMINATE_TIMEOUT_MS)
    +      if (exitCode.isEmpty) {
    +        logWarning("Failed to terminate driver process: " + process +
    +            ". This process will likely be orphaned.")
           }
    -    }.start()
    +    }
       }
     
    -  /** Terminate this driver (or prevent it from ever starting if not yet 
started) */
    -  private[worker] def kill() {
    -    synchronized {
    -      process.foreach(p => p.destroy())
    -      killed = true
    +  /** Stop this driver, including the process it launched */
    +  private[worker] def kill(): Unit = {
    +    if (workerThread != null) {
    +      // make sure process does not start if being interrupted
    +      this.synchronized {
    +        // the workerThread will kill the child process when interrupted
    +        workerThread.interrupt()
    +        workerThread.join()
    --- End diff --
    
    There is a dead lock. If you join with a lock here, workerThread won't be 
able to get the lock. Then if it happens to reach the `synchronized` line, it 
will block forever.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to