Github user JoshRosen commented on a diff in the pull request:

    https://github.com/apache/spark/pull/3518#discussion_r21062580
  
    --- Diff: 
core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala ---
    @@ -244,6 +244,133 @@ class DAGSchedulerSuite extends 
TestKit(ActorSystem("DAGSchedulerSuite")) with F
       private def cancel(jobId: Int) {
         runEvent(JobCancelled(jobId))
       }
    +  
    +  test("Serialization trace for unserializable task") {
    +    val unserializableRdd = new MyRDD(sc, 1, Nil) {
    +      class UnserializableClass
    +      val unserializable = new UnserializableClass
    +    }
    +    
    +    val trace = scheduler.getSerializationTrace(unserializableRdd)
    +
    +    val splitS = trace.split(":")
    +    val depth = splitS(1).trim()
    +    val status = splitS(2).trim()
    +    val rddName = splitS(3).trim()
    +    
    +    assert(rddName.equals("DAGSchedulerSuiteRDD 0"))
    +    assert(status.equals(SerializationHelper.Failed))
    +  }
    +
    +  test("Serialization trace for unserializable task with serializable 
dependencies") {
    +    // The trace should show which nested dependency is unserializable
    +
    +    val baseRdd = new MyRDD(sc, 1, Nil)
    +    val midRdd = new MyRDD(sc, 1, List(new OneToOneDependency(baseRdd)))
    +    val finalRdd = new MyRDD(sc, 1, List(new OneToOneDependency(midRdd))){
    +      class UnserializableClass
    +      val unserializable = new UnserializableClass
    +    }
    +    
    +    val result = Array(SerializationHelper.Failed,
    +      SerializationHelper.Serialized,
    +      SerializationHelper.Serialized)
    +    
    +    val trace = scheduler.getSerializationTrace(finalRdd)
    +    val splitRdds = trace.split("\n")
    +
    +    var x = 0
    +    for(x <- 1 until splitRdds.length){
    +      val splitS = splitRdds(x).split(":")
    +      val status = splitS(1).trim()
    +      
    +      assert(status.equals(result(x-1)))
    +
    +    }
    +
    +  }
    +
    +  test("Serialization trace for serializable task and nested 
unserializable dependency") {
    +    // The trace should show which nested dependency is unserializable
    +    
    +    val baseRdd = new MyRDD(sc, 1, Nil){
    +      class UnserializableClass
    +      val unserializable = new UnserializableClass
    +    }
    +      
    +    val midRdd = new MyRDD(sc, 1, List(new OneToOneDependency(baseRdd)))
    +    val finalRdd = new MyRDD(sc, 1, List(new OneToOneDependency(midRdd)))
    +    val result = Array(SerializationHelper.Serialized,
    +      SerializationHelper.FailedDeps,
    +      SerializationHelper.Failed)
    +
    +    val trace = scheduler.getSerializationTrace(finalRdd)
    +    val splitRdds = trace.split("\n")
    +    
    +    var x = 0
    +    
    +    for(x <- 1 until splitRdds.length){
    +      val splitS = splitRdds(x).split(":")
    +      val status = splitS(1).trim()
    +      
    +      assert(status.equals(result(x-1)))
    +      
    +    }
    +    
    +  }
    +  
    +  test("Serialization trace for serializable task with sandwiched 
unserializable dependency") {
    +    // The trace should show which nested dependency is unserializable
    +
    +    val baseRdd = new MyRDD(sc, 1, Nil)
    +    val midRdd = new MyRDD(sc, 1, List(new OneToOneDependency(baseRdd))){
    +      class UnserializableClass
    +      val unserializable = new UnserializableClass
    +    }
    +    val finalRdd = new MyRDD(sc, 1, List(new OneToOneDependency(midRdd)))
    +    val result = Array(SerializationHelper.FailedDeps,
    +      SerializationHelper.Failed,
    +      SerializationHelper.Serialized)
    +
    +    val trace = scheduler.getSerializationTrace(finalRdd)
    +    val splitRdds = trace.split("\n")
    +
    +    var x = 0
    +    for(x <- 1 until splitRdds.length){
    --- End diff --
    
    You don't need this mutability; you can use `zipWithIndex` if you want to 
iterate through the elements of `splitRdds` and read their indices.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to