Github user squito commented on a diff in the pull request:

    https://github.com/apache/spark/pull/7753#discussion_r45927657
  
    --- Diff: 
core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala 
---
    @@ -122,6 +122,105 @@ class EventLoggingListenerSuite extends SparkFunSuite 
with LocalSparkContext wit
             "a fine:mind$dollar{bills}.1", None, Some("lz4")))
       }
     
    +  test("test event logger logging executor metrics") {
    +    import org.apache.spark.scheduler.cluster._
    +    import org.apache.spark.ui.memory._
    +    val conf = EventLoggingListenerSuite.getLoggingConf(testDirPath)
    +    val eventLogger = new EventLoggingListener("test-memListener", None, 
testDirPath.toUri(), conf)
    +    val execId = "exec-1"
    +    val hostName = "host-1"
    +
    +    eventLogger.start()
    +    eventLogger.onExecutorAdded(SparkListenerExecutorAdded(
    +      0L, execId, new ExecutorInfo(hostName, 1, Map.empty)))
    +
    +    // stage 1 and stage 2 submitted
    +    
eventLogger.onStageSubmitted(MemoryListenerSuite.createStageStartEvent(1))
    +    
eventLogger.onStageSubmitted(MemoryListenerSuite.createStageStartEvent(2))
    +    val execMetrics1 = MemoryListenerSuite.createExecutorMetrics(hostName, 
1L, 20, 10)
    +    
eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent(
    +      execId, execMetrics1))
    +    val execMetrics2 = MemoryListenerSuite.createExecutorMetrics(hostName, 
2L, 30, 10)
    +    
eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent(
    +      execId, execMetrics2))
    +    // stage1 completed
    +    
eventLogger.onStageCompleted(MemoryListenerSuite.createStageEndEvent(1))
    +    // stage3 submitted
    +    
eventLogger.onStageSubmitted(MemoryListenerSuite.createStageStartEvent(3))
    +    val execMetrics3 = MemoryListenerSuite.createExecutorMetrics(hostName, 
3L, 30, 30)
    +    
eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent(
    +      execId, execMetrics3))
    +    val execMetrics4 = MemoryListenerSuite.createExecutorMetrics(hostName, 
4L, 20, 25)
    +    
eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent(
    +      execId, execMetrics4))
    +    // stage 2 completed
    +    
eventLogger.onStageCompleted(MemoryListenerSuite.createStageEndEvent(2))
    +    val execMetrics5 = MemoryListenerSuite.createExecutorMetrics(hostName, 
5L, 15, 15)
    +    
eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent(
    +      execId, execMetrics5))
    +    val execMetrics6 = MemoryListenerSuite.createExecutorMetrics(hostName, 
6L, 25, 10)
    +    
eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent(
    +      execId, execMetrics6))
    +    // stage 3 completed
    +    
eventLogger.onStageCompleted(MemoryListenerSuite.createStageEndEvent(3))
    +
    +    eventLogger.onExecutorRemoved(SparkListenerExecutorRemoved(7L, execId, 
""))
    +
    +    // Totally there are 15 logged events, including:
    +    // 2 events of executor Added/Removed
    +    // 6 events of stage Submitted/Completed
    +    // 7 events of executorMetrics update (3 combined metrics and 4 
original metrics)
    +    assert(eventLogger.loggedEvents.size === 15)
    +    eventLogger.stop()
    +
    +    val logData = EventLoggingListener.openEventLog(new 
Path(eventLogger.logPath), fileSystem)
    +    val lines = readLines(logData)
    +    Utils.tryWithSafeFinally {
    +      // totally there are 15 lines, including SparkListenerLogStart event 
and 14 other events
    +      assert(lines.size === 16)
    +
    +      // 4 executor metrics that is the latest metrics updated before 
stage submit and complete
    +      val jsonMetrics = JsonProtocol.sparkEventFromJson(parse(lines(5)))
    --- End diff --
    
    I'm not a fan of pulling out very specific lines of the log here -- it 
makes it harder for the reader to follow, and also makes the tests more 
brittle.  Could you instead have a util method like 
`getLastExecutorMetricBeforeStageEnd(events: Seq[SparkListenerEvent], stageId: 
Int): SparkListenerExecutorMetricsUpdate`?  Then your checks would be more 
clear, they'd look like 
    ```scala
    parsedLines = line.map { line => 
JsonProtocol.sparkEventFromJson(parse(line)) }
    ...
    checkExecutorMetrics(
      metrics = getLastExecutorMetricBeforeStageEnd(parsedLines, 3),
      expMetrics = ...
    )
    ```
    (not quite the right args, but hopefully that conveys the idea).  You'd 
also need to make sure the stage end events had a completion time in there to 
be able to grab the right event.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to