[GitHub] [spark] viirya commented on a change in pull request #26247: [SPARK-29566][ML] Imputer should support single-column input/output

GitBox Thu, 24 Oct 2019 21:10:49 -0700

viirya commented on a change in pull request #26247: [SPARK-29566][ML] Imputer 
should support single-column input/output
URL: https://github.com/apache/spark/pull/26247#discussion_r338879058


 ##########
 File path: mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
 ##########
 @@ -228,34 +453,42 @@ object ImputerSuite {
    * Imputation strategy. Available options are ["mean", "median"].
    * @param df DataFrame with columns "id", "value", "expected_mean", 
"expected_median"
    */
-  def iterateStrategyTest(imputer: Imputer, df: DataFrame): Unit = {
+  def iterateStrategyTest(isMultiCol: Boolean, imputer: Imputer, df: 
DataFrame): Unit = {
     Seq("mean", "median").foreach { strategy =>
       imputer.setStrategy(strategy)
       val model = imputer.fit(df)
       val resultDF = model.transform(df)
-      imputer.getInputCols.zip(imputer.getOutputCols).foreach { case 
(inputCol, outputCol) =>
-
-        // check dataType is consistent between input and output
-        val inputType = resultDF.schema(inputCol).dataType
-        val outputType = resultDF.schema(outputCol).dataType
-        assert(inputType == outputType, "Output type is not the same as input 
type.")
-
-        // check value
-        resultDF.select(s"expected_${strategy}_$inputCol", 
outputCol).collect().foreach {
-          case Row(exp: Float, out: Float) =>
-            assert((exp.isNaN && out.isNaN) || (exp == out),
-              s"Imputed values differ. Expected: $exp, actual: $out")
-          case Row(exp: Double, out: Double) =>
-            assert((exp.isNaN && out.isNaN) || (exp ~== out absTol 1e-5),
-              s"Imputed values differ. Expected: $exp, actual: $out")
-          case Row(exp: Integer, out: Integer) =>
-            assert(exp == out,
-              s"Imputed values differ. Expected: $exp, actual: $out")
-          case Row(exp: Long, out: Long) =>
-            assert(exp == out,
-              s"Imputed values differ. Expected: $exp, actual: $out")
+      if (isMultiCol) {
+        imputer.getInputCols.zip(imputer.getOutputCols).foreach { case 
(inputCol, outputCol) =>
+          verifyTransformResult(strategy, inputCol, outputCol, resultDF)
         }
+      } else {
+          verifyTransformResult(strategy, imputer.getInputCol, 
imputer.getOutputCol, resultDF)
       }
     }
   }
+
+  def verifyTransformResult(strategy: String, inputCol: String, outputCol: 
String,
+                            resultDF: DataFrame): Unit = {
 
 Review comment:
   seems not usual indent.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] viirya commented on a change in pull request #26247: [SPARK-29566][ML] Imputer should support single-column input/output

Reply via email to