Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/17819#discussion_r143930542 --- Diff: mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala --- @@ -96,9 +99,71 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String def setHandleInvalid(value: String): this.type = set(handleInvalid, value) setDefault(handleInvalid, Bucketizer.ERROR_INVALID) + /** + * Parameter for specifying multiple splits parameters. Each element in this array can be used to + * map continuous features into buckets. + * + * @group param + */ + @Since("2.3.0") + val splitsArray: DoubleArrayArrayParam = new DoubleArrayArrayParam(this, "splitsArray", + "The array of split points for mapping continuous features into buckets for multiple " + + "columns. For each input column, with n+1 splits, there are n buckets. A bucket defined by " + + "splits x,y holds values in the range [x,y) except the last bucket, which also includes y. " + + "The splits should be of length >= 3 and strictly increasing. Values at -inf, inf must be " + + "explicitly provided to cover all Double values; otherwise, values outside the splits " + + "specified will be treated as errors.", + Bucketizer.checkSplitsArray) + + /** + * Param for output column names. + * @group param + */ + @Since("2.3.0") + final val outputCols: StringArrayParam = new StringArrayParam(this, "outputCols", --- End diff -- I will create `HasOutputCols`.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org