[GitHub] spark pull request #19156: [SPARK-19634][SQL][ML][FOLLOW-UP] Improve interfa...

yanboliang Tue, 12 Dec 2017 21:01:27 -0800

Github user yanboliang commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19156#discussion_r156564056
  
    --- Diff: 
mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala ---
    @@ -205,67 +207,21 @@ class SummarizerSuite extends SparkFunSuite with 
MLlibTestSparkContext {
         }
       }
     
    -  test("debugging test") {
    -    val df = denseData(Nil)
    -    val c = df.col("features")
    -    val c1 = metrics("mean").summary(c)
    -    val res = df.select(c1)
    -    intercept[SparkException] {
    -      compare(res, Seq.empty)
    -    }
    -  }
    -
    -  test("basic error handling") {
    -    val df = denseData(Nil)
    -    val c = df.col("features")
    -    val res = df.select(metrics("mean").summary(c), mean(c))
    -    intercept[SparkException] {
    -      compare(res, Seq.empty)
    -    }
    -  }
    +  testExample("single element", Seq((Vectors.dense(0.0, 1.0, 2.0), 2.0)))
     
    -  test("no element, working metrics") {
    -    val df = denseData(Nil)
    -    val c = df.col("features")
    -    val res = df.select(metrics("count").summary(c), count(c))
    -    compare(res, Seq(Row(0L), 0L))
    -  }
    +  testExample("multiple elements (dense)",
    +    Seq(
    +      (Vectors.dense(-1.0, 0.0, 6.0), 0.5),
    +      (Vectors.dense(3.0, -3.0, 0.0), 2.8),
    +      (Vectors.dense(1.0, -3.0, 0.0), 0.0)
    +    )
    +  )
     
    -  val singleElem = Seq(0.0, 1.0, 2.0)
    -  testExample("single element", Seq(singleElem), ExpectedMetrics(
    -    mean = singleElem,
    -    variance = Seq(0.0, 0.0, 0.0),
    -    count = 1,
    -    numNonZeros = Seq(0, 1, 1),
    -    max = singleElem,
    -    min = singleElem,
    -    normL1 = singleElem,
    -    normL2 = singleElem
    -  ))
    -
    -  testExample("two elements", Seq(Seq(0.0, 1.0, 2.0), Seq(0.0, -1.0, 
-2.0)), ExpectedMetrics(
    -    mean = Seq(0.0, 0.0, 0.0),
    -    // TODO: I have a doubt about these values, they are not normalized.
    -    variance = Seq(0.0, 2.0, 8.0),
    -    count = 2,
    -    numNonZeros = Seq(0, 2, 2),
    -    max = Seq(0.0, 1.0, 2.0),
    -    min = Seq(0.0, -1.0, -2.0),
    -    normL1 = Seq(0.0, 2.0, 4.0),
    -    normL2 = Seq(0.0, math.sqrt(2.0), math.sqrt(2.0) * 2.0)
    -  ))
    -
    -  testExample("dense vector input",
    -    Seq(Seq(-1.0, 0.0, 6.0), Seq(3.0, -3.0, 0.0)),
    --- End diff --
    
    Why do you remove the test against ground true value?



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #19156: [SPARK-19634][SQL][ML][FOLLOW-UP] Improve interfa...

Reply via email to