Github user jkbradley commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20112#discussion_r159096655
  
    --- Diff: python/pyspark/ml/feature.py ---
    @@ -3466,6 +3466,72 @@ def selectedFeatures(self):
             return self._call_java("selectedFeatures")
     
     
    +@inherit_doc
    +class VectorSizeHint(JavaTransformer, HasInputCol, HasHandleInvalid, 
JavaMLReadable,
    +                     JavaMLWritable):
    +    """
    +    A feature transformer that adds size information to the metadata of a 
vector column.
    +    VectorAssembler needs size information for its input columns and 
cannot be used on streaming
    +    dataframes without this metadata.
    +
    +    >>> from pyspark.ml.linalg import Vectors
    +    >>> from pyspark.ml import Pipeline, PipelineModel
    +    >>> data = [(Vectors.dense([1., 2., 3.]), 4.)]
    +    >>> df = spark.createDataFrame(data, ["vector", "float"])
    +    >>>
    +    >>> sizeHint = VectorSizeHint(inputCol="vector", size=3, 
handleInvalid="skip")
    +    >>> vecAssembler = VectorAssembler(inputCols=["vector", "float"], 
outputCol="assembled")
    +    >>> pipeline = Pipeline(stages=[sizeHint, vecAssembler])
    +    >>>
    +    >>> pipelineModel = pipeline.fit(df)
    +    >>> pipelineModel.transform(df).head().assembled
    +    DenseVector([1.0, 2.0, 3.0, 4.0])
    +    >>> vectorSizeHintPath = temp_path + "/vector-size-hint-pipeline"
    +    >>> pipelineModel.save(vectorSizeHintPath)
    +    >>> loadedPipeline = PipelineModel.load(vectorSizeHintPath)
    +    >>> loaded = loadedPipeline.transform(df).head().assembled
    +    >>> expected = pipelineModel.transform(df).head().assembled
    +    >>> loaded == expected
    +    True
    +
    +    .. versionadded:: 2.3.0
    +    .. note:: Experimental
    +    """
    +
    +    size = Param(Params._dummy(), "size", "Size of vectors in column.",
    +                 typeConverter=TypeConverters.toInt)
    +
    +    @since("2.3.0")
    +    def getSize(self):
    +        """ Gets size param, the size of vectors in `inputCol`."""
    +        self.getOrDefault(self.size)
    +
    +    @since("2.3.0")
    +    def setSize(self, value):
    +        """ Sets size param, the size of vectors in `inputCol`."""
    +        self._set(size=value)
    +
    +    @keyword_only
    +    def __init__(self, inputCol=None, size=None, handleInvalid="error"):
    --- End diff --
    
    Let's stick with the order which all other python classes follow: dummy 
Params, __init__, Param setters & getters


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to