Github user BryanCutler commented on a diff in the pull request:

    https://github.com/apache/spark/pull/11746#discussion_r59657657
  
    --- Diff: 
core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala ---
    @@ -68,56 +70,72 @@ private[deploy] class DriverRunner(
     
       private var clock: Clock = new SystemClock()
       private var sleeper = new Sleeper {
    -    def sleep(seconds: Int): Unit = (0 until seconds).takeWhile(f => 
{Thread.sleep(1000); !killed})
    +    def sleep(seconds: Int): Unit = Thread.sleep(seconds * 1000)
       }
     
       /** Starts a thread to run and manage the driver. */
       private[worker] def start() = {
    -    new Thread("DriverRunner for " + driverId) {
    +    workerThread = new Thread("DriverRunner for " + driverId) {
           override def run() {
    +        var shutdownHook: AnyRef = null
             try {
    -          val driverDir = createWorkingDirectory()
    -          val localJarFilename = downloadUserJar(driverDir)
    -
    -          def substituteVariables(argument: String): String = argument 
match {
    -            case "{{WORKER_URL}}" => workerUrl
    -            case "{{USER_JAR}}" => localJarFilename
    -            case other => other
    +          shutdownHook = ShutdownHookManager.addShutdownHook { () =>
    +            logInfo(s"Worker shutting down, killing driver $driverId")
    +            kill()
               }
     
    -          // TODO: If we add ability to submit multiple jars they should 
also be added here
    -          val builder = 
CommandUtils.buildProcessBuilder(driverDesc.command, securityManager,
    -            driverDesc.mem, sparkHome.getAbsolutePath, substituteVariables)
    -          launchDriver(builder, driverDir, driverDesc.supervise)
    +          // prepare driver jars, launch driver and set final state from 
process exit code
    +          val exitCode = prepareAndLaunchDriver()
    +          finalState = if (exitCode == 0) Some(DriverState.FINISHED) else 
Some(DriverState.FAILED)
             }
             catch {
    -          case e: Exception => finalException = Some(e)
    +          case interrupted: InterruptedException =>
    +            logInfo("Runner thread for driver " + driverId + " 
interrupted")
    +            killProcessAndFinalize(DriverState.KILLED, interrupted)
    +          case e: Exception =>
    +            killProcessAndFinalize(DriverState.ERROR, e)
    +        }
    +        finally {
    +          if (shutdownHook != null) 
ShutdownHookManager.removeShutdownHook(shutdownHook)
             }
     
    -        val state =
    -          if (killed) {
    -            DriverState.KILLED
    -          } else if (finalException.isDefined) {
    -            DriverState.ERROR
    -          } else {
    -            finalExitCode match {
    -              case Some(0) => DriverState.FINISHED
    -              case _ => DriverState.FAILED
    -            }
    -          }
    +        // notify worker of final driver state, possible exception
    +        worker.send(DriverStateChanged(driverId, finalState.get, 
finalException))
    +      }
     
    +      // kill the process if started, set shared finalizing variables
    +      def killProcessAndFinalize(state: DriverState.DriverState, e: 
Exception): Unit = {
    +        killProcess()
             finalState = Some(state)
    +        finalException = Some(e)
    +      }
    +    }
    +
    +    workerThread.start()
    +  }
     
    -        worker.send(DriverStateChanged(driverId, state, finalException))
    +  /** Kill driver process and wait for it to exit. */
    +  private def killProcess(): Unit = {
    +    if (process != null) {
    +      logInfo("Killing driver process!")
    +      val exitCode = Utils.terminateProcess(process, 
DRIVER_TERMINATE_TIMEOUT_MS)
    +      if (exitCode.isEmpty) {
    +        logWarning("Failed to terminate driver process: " + process +
    +            ". This process will likely be orphaned.")
           }
    -    }.start()
    +    }
       }
     
    -  /** Terminate this driver (or prevent it from ever starting if not yet 
started) */
    -  private[worker] def kill() {
    -    synchronized {
    -      process.foreach(p => p.destroy())
    -      killed = true
    +  /** Stop this driver, including the process it launched */
    +  private[worker] def kill(): Unit = {
    +    if (workerThread != null) {
    +      // make sure process does not start if being interrupted
    +      this.synchronized {
    +        // the workerThread will kill the child process when interrupted
    +        workerThread.interrupt()
    +        workerThread.join()
    --- End diff --
    
    > This doesn't fix the race condition. Image the following execution order:
    
    That order can't happen because the ShutdownHook isn't added until the 
thread is started, so `workerThread` will be assigned at that point.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to