Github user yhuai commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16189#discussion_r93162832
  
    --- Diff: core/src/test/scala/org/apache/spark/JobCancellationSuite.scala 
---
    @@ -209,6 +209,83 @@ class JobCancellationSuite extends SparkFunSuite with 
Matchers with BeforeAndAft
         assert(jobB.get() === 100)
       }
     
    +  test("task reaper kills JVM if killed tasks keep running for too long") {
    +    val conf = new SparkConf()
    +      .set("spark.task.reaper.enabled", "true")
    +      .set("spark.task.reaper.killTimeout", "5s")
    +    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
    +
    +    // Add a listener to release the semaphore once any tasks are launched.
    +    val sem = new Semaphore(0)
    +    sc.addSparkListener(new SparkListener {
    +      override def onTaskStart(taskStart: SparkListenerTaskStart) {
    +        sem.release()
    +      }
    +    })
    +
    +    // jobA is the one to be cancelled.
    +    val jobA = Future {
    +      sc.setJobGroup("jobA", "this is a job to be cancelled", 
interruptOnCancel = true)
    +      sc.parallelize(1 to 10000, 2).map { i =>
    +        while (true) { }
    +      }.count()
    +    }
    +
    +    // Block until both tasks of job A have started and cancel job A.
    +    sem.acquire(2)
    +    // Small delay to ensure tasks actually start executing the task body
    +    Thread.sleep(1000)
    +
    +    sc.clearJobGroup()
    +    val jobB = sc.parallelize(1 to 100, 2).countAsync()
    +    sc.cancelJobGroup("jobA")
    +    val e = intercept[SparkException] { ThreadUtils.awaitResult(jobA, 
15.seconds) }.getCause
    +    assert(e.getMessage contains "cancel")
    +
    +    // Once A is cancelled, job B should finish fairly quickly.
    +    assert(ThreadUtils.awaitResult(jobB, 60.seconds) === 100)
    +  }
    +
    +  test("task reaper will not kill JVM if spark.task.killTimeout == -1") {
    +    val conf = new SparkConf()
    +      .set("spark.task.reaper.enabled", "true")
    +      .set("spark.task.reaper.killTimeout", "-1")
    +      .set("spark.task.reaper.PollingInterval", "1s")
    +      .set("spark.deploy.maxExecutorRetries", "1")
    --- End diff --
    
    We set it to 1 to make sure that we will not kill JVM, right (if we kill 
JVM, we will remove the application because spark.deploy.maxExecutorRetries is 
1.)?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to