Repository: spark
Updated Branches:
  refs/heads/branch-2.0 6709ce1ae -> 54b4763d2


[SPARK-15837][ML][PYSPARK] Word2vec python add maxsentence parameter

## What changes were proposed in this pull request?

Word2vec python add maxsentence parameter.

## How was this patch tested?

Existing test.

Author: WeichenXu <weichenxu...@outlook.com>

Closes #13578 from WeichenXu123/word2vec_python_add_maxsentence.

(cherry picked from commit cdd7f5a57a21d4a8f93456d149f65859c96190cf)
Signed-off-by: Sean Owen <so...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54b4763d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54b4763d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54b4763d

Branch: refs/heads/branch-2.0
Commit: 54b4763d295d6aeab6105d0430470343dd4ca3a3
Parents: 6709ce1
Author: WeichenXu <weichenxu...@outlook.com>
Authored: Fri Jun 10 12:26:53 2016 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Fri Jun 10 12:27:04 2016 +0100

----------------------------------------------------------------------
 python/pyspark/ml/feature.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/54b4763d/python/pyspark/ml/feature.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index ebe1300..bfb2fb7 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -2244,28 +2244,33 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, 
HasSeed, HasInputCol, Has
     windowSize = Param(Params._dummy(), "windowSize",
                        "the window size (context words from [-window, 
window]). Default value is 5",
                        typeConverter=TypeConverters.toInt)
+    maxSentenceLength = Param(Params._dummy(), "maxSentenceLength",
+                              "Maximum length (in words) of each sentence in 
the input data. " +
+                              "Any sentence longer than this threshold will " +
+                              "be divided into chunks up to the size.",
+                              typeConverter=TypeConverters.toInt)
 
     @keyword_only
     def __init__(self, vectorSize=100, minCount=5, numPartitions=1, 
stepSize=0.025, maxIter=1,
-                 seed=None, inputCol=None, outputCol=None, windowSize=5):
+                 seed=None, inputCol=None, outputCol=None, windowSize=5, 
maxSentenceLength=1000):
         """
         __init__(self, vectorSize=100, minCount=5, numPartitions=1, 
stepSize=0.025, maxIter=1, \
-                 seed=None, inputCol=None, outputCol=None, windowSize=5)
+                 seed=None, inputCol=None, outputCol=None, windowSize=5, 
maxSentenceLength=1000)
         """
         super(Word2Vec, self).__init__()
         self._java_obj = 
self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
         self._setDefault(vectorSize=100, minCount=5, numPartitions=1, 
stepSize=0.025, maxIter=1,
-                         seed=None, windowSize=5)
+                         seed=None, windowSize=5, maxSentenceLength=1000)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     @since("1.4.0")
     def setParams(self, vectorSize=100, minCount=5, numPartitions=1, 
stepSize=0.025, maxIter=1,
-                  seed=None, inputCol=None, outputCol=None, windowSize=5):
+                  seed=None, inputCol=None, outputCol=None, windowSize=5, 
maxSentenceLength=1000):
         """
         setParams(self, minCount=5, numPartitions=1, stepSize=0.025, 
maxIter=1, seed=None, \
-                 inputCol=None, outputCol=None, windowSize=5)
+                 inputCol=None, outputCol=None, windowSize=5, 
maxSentenceLength=1000)
         Sets params for this Word2Vec.
         """
         kwargs = self.setParams._input_kwargs
@@ -2327,6 +2332,20 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, 
HasSeed, HasInputCol, Has
         """
         return self.getOrDefault(self.windowSize)
 
+    @since("2.0.0")
+    def setMaxSentenceLength(self, value):
+        """
+        Sets the value of :py:attr:`maxSentenceLength`.
+        """
+        return self._set(maxSentenceLength=value)
+
+    @since("2.0.0")
+    def getMaxSentenceLength(self):
+        """
+        Gets the value of maxSentenceLength or its default value.
+        """
+        return self.getOrDefault(self.maxSentenceLength)
+
     def _create_model(self, java_model):
         return Word2VecModel(java_model)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to