Github user MLnick commented on a diff in the pull request: https://github.com/apache/spark/pull/19715#discussion_r153774170 --- Diff: mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala --- @@ -50,10 +50,26 @@ private[feature] trait QuantileDiscretizerBase extends Params /** @group getParam */ def getNumBuckets: Int = getOrDefault(numBuckets) + /** + * Array of number of buckets (quantiles, or categories) into which data points are grouped. + * + * See also [[handleInvalid]], which can optionally create an additional bucket for NaN values. + * + * @group param + */ + val numBucketsArray = new IntArrayParam(this, "numBucketsArray", "Array of number of buckets " + + "(quantiles, or categories) into which data points are grouped. This is for multiple " + + "columns input. If numBucketsArray is not set but numBuckets is set, it means user wants " + + "to use the same numBuckets across all columns.") --- End diff -- Need a validator function here to ensure all bucket values >= 2
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org