Repository: spark Updated Branches: refs/heads/master 14e2700de -> 35316cb0b
[SPARK-13292] [ML] [PYTHON] QuantileDiscretizer should take random seed in PySpark ## What changes were proposed in this pull request? QuantileDiscretizer in Python should also specify a random seed. ## How was this patch tested? unit tests Author: Yu ISHIKAWA <yuu.ishik...@gmail.com> Closes #11362 from yu-iskw/SPARK-13292 and squashes the following commits: 02ffa76 [Yu ISHIKAWA] [SPARK-13292][ML][PYTHON] QuantileDiscretizer should take random seed in PySpark Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/35316cb0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/35316cb0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/35316cb0 Branch: refs/heads/master Commit: 35316cb0b744bef9bcb390411ddc321167f953be Parents: 14e2700 Author: Yu ISHIKAWA <yuu.ishik...@gmail.com> Authored: Thu Feb 25 13:29:10 2016 -0800 Committer: Xiangrui Meng <m...@databricks.com> Committed: Thu Feb 25 13:29:10 2016 -0800 ---------------------------------------------------------------------- python/pyspark/ml/feature.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/35316cb0/python/pyspark/ml/feature.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 464c944..67bccfa 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -939,7 +939,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol): @inherit_doc -class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol): +class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasSeed): """ .. note:: Experimental @@ -951,7 +951,9 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol): >>> df = sqlContext.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"]) >>> qds = QuantileDiscretizer(numBuckets=2, - ... inputCol="values", outputCol="buckets") + ... inputCol="values", outputCol="buckets", seed=123) + >>> qds.getSeed() + 123 >>> bucketizer = qds.fit(df) >>> splits = bucketizer.getSplits() >>> splits[0] @@ -971,9 +973,9 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol): "categories) into which data points are grouped. Must be >= 2. Default 2.") @keyword_only - def __init__(self, numBuckets=2, inputCol=None, outputCol=None): + def __init__(self, numBuckets=2, inputCol=None, outputCol=None, seed=None): """ - __init__(self, numBuckets=2, inputCol=None, outputCol=None) + __init__(self, numBuckets=2, inputCol=None, outputCol=None, seed=None) """ super(QuantileDiscretizer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.QuantileDiscretizer", @@ -987,9 +989,9 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol): @keyword_only @since("2.0.0") - def setParams(self, numBuckets=2, inputCol=None, outputCol=None): + def setParams(self, numBuckets=2, inputCol=None, outputCol=None, seed=None): """ - setParams(self, numBuckets=2, inputCol=None, outputCol=None) + setParams(self, numBuckets=2, inputCol=None, outputCol=None, seed=None) Set the params for the QuantileDiscretizer """ kwargs = self.setParams._input_kwargs --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org