Github user WeichenXu123 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20964#discussion_r178783980
  
    --- Diff: 
mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala ---
    @@ -167,4 +166,20 @@ class MinHashLSHSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defa
         assert(precision == 1.0)
         assert(recall >= 0.7)
       }
    +
    +  test("MinHashLSHModel.transform should work with Structured Streaming") {
    +    val localSpark = spark
    +    import localSpark.implicits._
    +
    +    val model = new MinHashLSHModel("mh", randCoefficients = Array((1, 0)))
    +    model.set(model.inputCol, "keys")
    +    testTransformer[Tuple1[Vector]](dataset.toDF(), model, "keys", 
model.getOutputCol) {
    +      case Row(_: Vector, output: Seq[_]) =>
    +        assert(output.length === model.randCoefficients.length)
    +        // no AND-amplification yet: SPARK-18450, so each hash output is 
of length 1
    +        output.foreach {
    +          case hashOutput: Vector => assert(hashOutput.size === 1)
    +        }
    +    }
    --- End diff --
    
    Why not have "expected" column" here to compare with ?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to