Github user facaiy commented on a diff in the pull request: https://github.com/apache/spark/pull/18998#discussion_r179903481 --- Diff: mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala --- @@ -93,11 +97,21 @@ class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String) @Since("2.0.0") override def transform(dataset: Dataset[_]): DataFrame = { val outputSchema = transformSchema(dataset.schema) - val hashingTF = new feature.HashingTF($(numFeatures)).setBinary($(binary)) - // TODO: Make the hashingTF.transform natively in ml framework to avoid extra conversion. - val t = udf { terms: Seq[_] => hashingTF.transform(terms).asML } + val hashUDF = udf { (terms: Seq[_]) => + val ids = terms.map { term => --- End diff -- @sethah Hi, thank all for your review and comments. However, since it has been a quite long time with no activity, is it a good idea to close the PR?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org