Github user yogeshg commented on a diff in the pull request: https://github.com/apache/spark/pull/20829#discussion_r176266213 --- Diff: mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala --- @@ -147,4 +149,72 @@ class VectorAssemblerSuite .filter(vectorUDF($"features") > 1) .count() == 1) } + + test("assemble should keep nulls") { + import org.apache.spark.ml.feature.VectorAssembler.assemble + assert(assemble(Seq(1, 1), true)(1.0, null) === Vectors.dense(1.0, Double.NaN)) + assert(assemble(Seq(1, 2), true)(1.0, null) === Vectors.dense(1.0, Double.NaN, Double.NaN)) + assert(assemble(Seq(1), true)(null) === Vectors.dense(Double.NaN)) + assert(assemble(Seq(2), true)(null) === Vectors.dense(Double.NaN, Double.NaN)) + } + + test("assemble should throw errors") { + import org.apache.spark.ml.feature.VectorAssembler.assemble + intercept[SparkException](assemble(Seq(1, 1), false)(1.0, null) === + Vectors.dense(1.0, Double.NaN)) + intercept[SparkException](assemble(Seq(1, 2), false)(1.0, null) === + Vectors.dense(1.0, Double.NaN, Double.NaN)) + intercept[SparkException](assemble(Seq(1), false)(null) === Vectors.dense(Double.NaN)) + intercept[SparkException](assemble(Seq(2), false)(null) === + Vectors.dense(Double.NaN, Double.NaN)) + } + + test("get lengths function") { + val df = Seq[(Long, Long, java.lang.Double, Vector, String, Vector, Long)]( --- End diff -- to allow nulls in the column :)
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org