Repository: spark Updated Branches: refs/heads/master 2235cd444 -> 969d5665b
[SPARK-12296][PYSPARK][MLLIB] Feature parity for pyspark mllib standard scaler model Some methods are missing, such as ways to access the std, mean, etc. This PR is for feature parity for pyspark.mllib.feature.StandardScaler & StandardScalerModel. Author: Holden Karau <hol...@us.ibm.com> Closes #10298 from holdenk/SPARK-12296-feature-parity-pyspark-mllib-StandardScalerModel. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/969d5665 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/969d5665 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/969d5665 Branch: refs/heads/master Commit: 969d5665bb1806703f948e8e7ab6133fca38c086 Parents: 2235cd4 Author: Holden Karau <hol...@us.ibm.com> Authored: Tue Dec 22 09:14:12 2015 +0200 Committer: Nick Pentreath <nick.pentre...@gmail.com> Committed: Tue Dec 22 09:14:12 2015 +0200 ---------------------------------------------------------------------- python/pyspark/mllib/feature.py | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/969d5665/python/pyspark/mllib/feature.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index acd7ec5..6129353 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -172,6 +172,38 @@ class StandardScalerModel(JavaVectorTransformer): self.call("setWithStd", withStd) return self + @property + @since('2.0.0') + def withStd(self): + """ + Returns if the model scales the data to unit standard deviation. + """ + return self.call("withStd") + + @property + @since('2.0.0') + def withMean(self): + """ + Returns if the model centers the data before scaling. + """ + return self.call("withMean") + + @property + @since('2.0.0') + def std(self): + """ + Return the column standard deviation values. + """ + return self.call("std") + + @property + @since('2.0.0') + def mean(self): + """ + Return the column mean values. + """ + return self.call("mean") + class StandardScaler(object): """ @@ -196,6 +228,14 @@ class StandardScaler(object): >>> for r in result.collect(): r DenseVector([-0.7071, 0.7071, -0.7071]) DenseVector([0.7071, -0.7071, 0.7071]) + >>> int(model.std[0]) + 4 + >>> int(model.mean[0]*10) + 9 + >>> model.withStd + True + >>> model.withMean + True .. versionadded:: 1.2.0 """ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org