Repository: spark Updated Branches: refs/heads/master ce49bfc25 -> 37494a18e
[SPARK-10258][DOC][ML] Add @Since annotations to ml.feature This PR adds missing `Since` annotations to `ml.feature` package. Closes #8505. ## How was this patch tested? Existing tests. Author: Nick Pentreath <ni...@za.ibm.com> Closes #13641 from MLnick/add-since-annotations. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/37494a18 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/37494a18 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/37494a18 Branch: refs/heads/master Commit: 37494a18e8d6e22113338523d6498e00ac9725ea Parents: ce49bfc Author: Nick Pentreath <ni...@za.ibm.com> Authored: Tue Jun 21 00:39:47 2016 -0700 Committer: Xiangrui Meng <m...@databricks.com> Committed: Tue Jun 21 00:39:47 2016 -0700 ---------------------------------------------------------------------- .../org/apache/spark/ml/feature/Binarizer.scala | 11 ++++++- .../apache/spark/ml/feature/Bucketizer.scala | 12 ++++++- .../apache/spark/ml/feature/ChiSqSelector.scala | 19 +++++++++-- .../spark/ml/feature/CountVectorizer.scala | 24 ++++++++++++-- .../scala/org/apache/spark/ml/feature/DCT.scala | 7 ++++- .../spark/ml/feature/ElementwiseProduct.scala | 7 ++++- .../org/apache/spark/ml/feature/HashingTF.scala | 14 ++++++++- .../scala/org/apache/spark/ml/feature/IDF.scala | 20 +++++++++--- .../apache/spark/ml/feature/Interaction.scala | 4 +-- .../apache/spark/ml/feature/MaxAbsScaler.scala | 26 ++++++++++----- .../apache/spark/ml/feature/MinMaxScaler.scala | 23 +++++++++++--- .../org/apache/spark/ml/feature/NGram.scala | 7 ++++- .../apache/spark/ml/feature/Normalizer.scala | 7 ++++- .../apache/spark/ml/feature/OneHotEncoder.scala | 10 +++++- .../scala/org/apache/spark/ml/feature/PCA.scala | 23 +++++++++++--- .../spark/ml/feature/PolynomialExpansion.scala | 8 ++++- .../spark/ml/feature/QuantileDiscretizer.scala | 10 +++++- .../org/apache/spark/ml/feature/RFormula.scala | 18 +++++++++-- .../spark/ml/feature/SQLTransformer.scala | 2 +- .../spark/ml/feature/StandardScaler.scala | 24 +++++++++++--- .../spark/ml/feature/StopWordsRemover.scala | 14 ++++++++- .../apache/spark/ml/feature/StringIndexer.scala | 33 +++++++++++++++++--- .../org/apache/spark/ml/feature/Tokenizer.scala | 22 +++++++++++-- .../spark/ml/feature/VectorAssembler.scala | 8 ++++- .../apache/spark/ml/feature/VectorIndexer.scala | 24 +++++++++++--- .../apache/spark/ml/feature/VectorSlicer.scala | 14 ++++++++- .../org/apache/spark/ml/feature/Word2Vec.scala | 29 +++++++++++++++-- python/pyspark/ml/feature.py | 10 +++--- 28 files changed, 362 insertions(+), 68 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index 318c8b8..fa9634f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -35,9 +35,11 @@ import org.apache.spark.sql.types._ * Binarize a column of continuous features given a threshold. */ @Experimental -final class Binarizer(override val uid: String) +@Since("1.4.0") +final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("binarizer")) /** @@ -47,21 +49,26 @@ final class Binarizer(override val uid: String) * Default: 0.0 * @group param */ + @Since("1.4.0") val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold used to binarize continuous features") /** @group getParam */ + @Since("1.4.0") def getThreshold: Double = $(threshold) /** @group setParam */ + @Since("1.4.0") def setThreshold(value: Double): this.type = set(threshold, value) setDefault(threshold -> 0.0) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -96,6 +103,7 @@ final class Binarizer(override val uid: String) } } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType val outputColName = $(outputCol) @@ -115,6 +123,7 @@ final class Binarizer(override val uid: String) StructType(schema.fields :+ outCol) } + @Since("1.4.1") override def copy(extra: ParamMap): Binarizer = defaultCopy(extra) } http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index ff988cc..caffc39 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -35,9 +35,11 @@ import org.apache.spark.sql.types.{DoubleType, StructField, StructType} * `Bucketizer` maps a column of continuous features to a column of feature buckets. */ @Experimental -final class Bucketizer(override val uid: String) +@Since("1.4.0") +final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Model[Bucketizer] with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("bucketizer")) /** @@ -48,6 +50,7 @@ final class Bucketizer(override val uid: String) * otherwise, values outside the splits specified will be treated as errors. * @group param */ + @Since("1.4.0") val splits: DoubleArrayParam = new DoubleArrayParam(this, "splits", "Split points for mapping continuous features into buckets. With n+1 splits, there are n " + "buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last " + @@ -57,15 +60,19 @@ final class Bucketizer(override val uid: String) Bucketizer.checkSplits) /** @group getParam */ + @Since("1.4.0") def getSplits: Array[Double] = $(splits) /** @group setParam */ + @Since("1.4.0") def setSplits(value: Array[Double]): this.type = set(splits, value) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -86,16 +93,19 @@ final class Bucketizer(override val uid: String) attr.toStructField() } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType) SchemaUtils.appendColumn(schema, prepOutputField(schema)) } + @Since("1.4.1") override def copy(extra: ParamMap): Bucketizer = { defaultCopy[Bucketizer](extra).setParent(parent) } } +@Since("1.6.0") object Bucketizer extends DefaultParamsReadable[Bucketizer] { /** We require splits to be of length >= 3 and to be in strictly increasing order. */ http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala index e73a8f5..1c32926 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala @@ -62,21 +62,27 @@ private[feature] trait ChiSqSelectorParams extends Params * categorical label. */ @Experimental -final class ChiSqSelector(override val uid: String) +@Since("1.6.0") +final class ChiSqSelector @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Estimator[ChiSqSelectorModel] with ChiSqSelectorParams with DefaultParamsWritable { + @Since("1.6.0") def this() = this(Identifiable.randomUID("chiSqSelector")) /** @group setParam */ + @Since("1.6.0") def setNumTopFeatures(value: Int): this.type = set(numTopFeatures, value) /** @group setParam */ + @Since("1.6.0") def setFeaturesCol(value: String): this.type = set(featuresCol, value) /** @group setParam */ + @Since("1.6.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.6.0") def setLabelCol(value: String): this.type = set(labelCol, value) @Since("2.0.0") @@ -91,12 +97,14 @@ final class ChiSqSelector(override val uid: String) copyValues(new ChiSqSelectorModel(uid, chiSqSelector).setParent(this)) } + @Since("1.6.0") override def transformSchema(schema: StructType): StructType = { SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT) SchemaUtils.checkNumericType(schema, $(labelCol)) SchemaUtils.appendColumn(schema, $(outputCol), new VectorUDT) } + @Since("1.6.0") override def copy(extra: ParamMap): ChiSqSelector = defaultCopy(extra) } @@ -112,23 +120,28 @@ object ChiSqSelector extends DefaultParamsReadable[ChiSqSelector] { * Model fitted by [[ChiSqSelector]]. */ @Experimental +@Since("1.6.0") final class ChiSqSelectorModel private[ml] ( - override val uid: String, + @Since("1.6.0") override val uid: String, private val chiSqSelector: feature.ChiSqSelectorModel) extends Model[ChiSqSelectorModel] with ChiSqSelectorParams with MLWritable { import ChiSqSelectorModel._ /** list of indices to select (filter). Must be ordered asc */ + @Since("1.6.0") val selectedFeatures: Array[Int] = chiSqSelector.selectedFeatures /** @group setParam */ + @Since("1.6.0") def setFeaturesCol(value: String): this.type = set(featuresCol, value) /** @group setParam */ + @Since("1.6.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.6.0") def setLabelCol(value: String): this.type = set(labelCol, value) @Since("2.0.0") @@ -143,6 +156,7 @@ final class ChiSqSelectorModel private[ml] ( dataset.withColumn($(outputCol), selector(col($(featuresCol))), newField.metadata) } + @Since("1.6.0") override def transformSchema(schema: StructType): StructType = { SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT) val newField = prepOutputField(schema) @@ -165,6 +179,7 @@ final class ChiSqSelectorModel private[ml] ( newAttributeGroup.toStructField() } + @Since("1.6.0") override def copy(extra: ParamMap): ChiSqSelectorModel = { val copied = new ChiSqSelectorModel(uid, chiSqSelector) copyValues(copied, extra).setParent(parent) http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala index 272567d..3250fe5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala @@ -120,27 +120,35 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit * Extracts a vocabulary from document collections and generates a [[CountVectorizerModel]]. */ @Experimental -class CountVectorizer(override val uid: String) +@Since("1.5.0") +class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[CountVectorizerModel] with CountVectorizerParams with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("cntVec")) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setVocabSize(value: Int): this.type = set(vocabSize, value) /** @group setParam */ + @Since("1.5.0") def setMinDF(value: Double): this.type = set(minDF, value) /** @group setParam */ + @Since("1.5.0") def setMinTF(value: Double): this.type = set(minTF, value) /** @group setParam */ + @Since("2.0.0") def setBinary(value: Boolean): this.type = set(binary, value) @Since("2.0.0") @@ -176,10 +184,12 @@ class CountVectorizer(override val uid: String) copyValues(new CountVectorizerModel(uid, vocab).setParent(this)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): CountVectorizer = defaultCopy(extra) } @@ -196,26 +206,34 @@ object CountVectorizer extends DefaultParamsReadable[CountVectorizer] { * @param vocabulary An Array over terms. Only the terms in the vocabulary will be counted. */ @Experimental -class CountVectorizerModel(override val uid: String, val vocabulary: Array[String]) +@Since("1.5.0") +class CountVectorizerModel( + @Since("1.5.0") override val uid: String, + @Since("1.5.0") val vocabulary: Array[String]) extends Model[CountVectorizerModel] with CountVectorizerParams with MLWritable { import CountVectorizerModel._ + @Since("1.5.0") def this(vocabulary: Array[String]) = { this(Identifiable.randomUID("cntVecModel"), vocabulary) set(vocabSize, vocabulary.length) } /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setMinTF(value: Double): this.type = set(minTF, value) /** @group setParam */ + @Since("2.0.0") def setBinary(value: Boolean): this.type = set(binary, value) /** Dictionary created from [[vocabulary]] and its indices, broadcast once for [[transform()]] */ @@ -252,10 +270,12 @@ class CountVectorizerModel(override val uid: String, val vocabulary: Array[Strin dataset.withColumn($(outputCol), vectorizer(col($(inputCol)))) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): CountVectorizerModel = { val copied = new CountVectorizerModel(uid, vocabulary).setParent(parent) copyValues(copied, extra) http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala index 301358e..9605145 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala @@ -36,9 +36,11 @@ import org.apache.spark.sql.types.DataType * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]]. */ @Experimental -class DCT(override val uid: String) +@Since("1.5.0") +class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends UnaryTransformer[Vector, Vector, DCT] with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("dct")) /** @@ -46,13 +48,16 @@ class DCT(override val uid: String) * Default: false * @group param */ + @Since("1.5.0") def inverse: BooleanParam = new BooleanParam( this, "inverse", "Set transformer to perform inverse DCT") /** @group setParam */ + @Since("1.5.0") def setInverse(value: Boolean): this.type = set(inverse, value) /** @group getParam */ + @Since("1.5.0") def getInverse: Boolean = $(inverse) setDefault(inverse -> false) http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index 9d2e60f..92fefb1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -33,21 +33,26 @@ import org.apache.spark.sql.types.DataType * multiplier. */ @Experimental -class ElementwiseProduct(override val uid: String) +@Since("2.0.0") +class ElementwiseProduct @Since("2.0.0") (@Since("2.0.0") override val uid: String) extends UnaryTransformer[Vector, Vector, ElementwiseProduct] with DefaultParamsWritable { + @Since("2.0.0") def this() = this(Identifiable.randomUID("elemProd")) /** * the vector to multiply with input vectors * @group param */ + @Since("2.0.0") val scalingVec: Param[Vector] = new Param(this, "scalingVec", "vector for hadamard product") /** @group setParam */ + @Since("2.0.0") def setScalingVec(value: Vector): this.type = set(scalingVec, value) /** @group getParam */ + @Since("2.0.0") def getScalingVec: Vector = getOrDefault(scalingVec) override protected def createTransformFunc: Vector => Vector = { http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index 94e1825..6ca7336 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -38,15 +38,19 @@ import org.apache.spark.sql.types.{ArrayType, StructType} * otherwise the features will not be mapped evenly to the columns. */ @Experimental -class HashingTF(override val uid: String) +@Since("1.2.0") +class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.2.0") def this() = this(Identifiable.randomUID("hashingTF")) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @@ -54,6 +58,7 @@ class HashingTF(override val uid: String) * (default = 2^18^) * @group param */ + @Since("1.2.0") val numFeatures = new IntParam(this, "numFeatures", "number of features (> 0)", ParamValidators.gt(0)) @@ -64,6 +69,7 @@ class HashingTF(override val uid: String) * (default = false) * @group param */ + @Since("2.0.0") val binary = new BooleanParam(this, "binary", "If true, all non zero counts are set to 1. " + "This is useful for discrete probabilistic models that model binary events rather " + "than integer counts") @@ -71,15 +77,19 @@ class HashingTF(override val uid: String) setDefault(numFeatures -> (1 << 18), binary -> false) /** @group getParam */ + @Since("1.2.0") def getNumFeatures: Int = $(numFeatures) /** @group setParam */ + @Since("1.2.0") def setNumFeatures(value: Int): this.type = set(numFeatures, value) /** @group getParam */ + @Since("2.0.0") def getBinary: Boolean = $(binary) /** @group setParam */ + @Since("2.0.0") def setBinary(value: Boolean): this.type = set(binary, value) @Since("2.0.0") @@ -92,6 +102,7 @@ class HashingTF(override val uid: String) dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType require(inputType.isInstanceOf[ArrayType], @@ -100,6 +111,7 @@ class HashingTF(override val uid: String) SchemaUtils.appendColumn(schema, attrGroup.toStructField()) } + @Since("1.4.1") override def copy(extra: ParamMap): HashingTF = defaultCopy(extra) } http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 08beda6..cf03a28 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -64,18 +64,23 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol * Compute the Inverse Document Frequency (IDF) given a collection of documents. */ @Experimental -final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBase - with DefaultParamsWritable { +@Since("1.4.0") +final class IDF @Since("1.4.0") (@Since("1.4.0") override val uid: String) + extends Estimator[IDFModel] with IDFBase with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("idf")) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.4.0") def setMinDocFreq(value: Int): this.type = set(minDocFreq, value) @Since("2.0.0") @@ -88,10 +93,12 @@ final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBa copyValues(new IDFModel(uid, idf).setParent(this)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): IDF = defaultCopy(extra) } @@ -107,17 +114,20 @@ object IDF extends DefaultParamsReadable[IDF] { * Model fitted by [[IDF]]. */ @Experimental +@Since("1.4.0") class IDFModel private[ml] ( - override val uid: String, + @Since("1.4.0") override val uid: String, idfModel: feature.IDFModel) extends Model[IDFModel] with IDFBase with MLWritable { import IDFModel._ /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -128,17 +138,19 @@ class IDFModel private[ml] ( dataset.withColumn($(outputCol), idf(col($(inputCol)))) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): IDFModel = { val copied = new IDFModel(uid, idfModel) copyValues(copied, extra).setParent(parent) } /** Returns the IDF vector. */ - @Since("1.6.0") + @Since("2.0.0") def idf: Vector = idfModel.idf.asML @Since("1.6.0") http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala index fa65ff9..dca28b5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala @@ -42,9 +42,9 @@ import org.apache.spark.sql.types._ * `Vector(6, 8)` if all input features were numeric. If the first feature was instead nominal * with four categories, the output would then be `Vector(0, 0, 0, 0, 3, 4, 0, 0)`. */ -@Since("1.6.0") @Experimental -class Interaction @Since("1.6.0") (override val uid: String) extends Transformer +@Since("1.6.0") +class Interaction @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer with HasInputCols with HasOutputCol with DefaultParamsWritable { @Since("1.6.0") http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala index 7298a18..31a5815 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala @@ -54,16 +54,19 @@ private[feature] trait MaxAbsScalerParams extends Params with HasInputCol with H * any sparsity. */ @Experimental -class MaxAbsScaler @Since("2.0.0") (override val uid: String) +@Since("2.0.0") +class MaxAbsScaler @Since("2.0.0") (@Since("2.0.0") override val uid: String) extends Estimator[MaxAbsScalerModel] with MaxAbsScalerParams with DefaultParamsWritable { @Since("2.0.0") def this() = this(Identifiable.randomUID("maxAbsScal")) /** @group setParam */ + @Since("2.0.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("2.0.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -81,17 +84,19 @@ class MaxAbsScaler @Since("2.0.0") (override val uid: String) copyValues(new MaxAbsScalerModel(uid, Vectors.dense(maxAbs)).setParent(this)) } + @Since("2.0.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("2.0.0") override def copy(extra: ParamMap): MaxAbsScaler = defaultCopy(extra) } -@Since("1.6.0") +@Since("2.0.0") object MaxAbsScaler extends DefaultParamsReadable[MaxAbsScaler] { - @Since("1.6.0") + @Since("2.0.0") override def load(path: String): MaxAbsScaler = super.load(path) } @@ -101,17 +106,20 @@ object MaxAbsScaler extends DefaultParamsReadable[MaxAbsScaler] { * */ @Experimental +@Since("2.0.0") class MaxAbsScalerModel private[ml] ( - override val uid: String, - val maxAbs: Vector) + @Since("2.0.0") override val uid: String, + @Since("2.0.0") val maxAbs: Vector) extends Model[MaxAbsScalerModel] with MaxAbsScalerParams with MLWritable { import MaxAbsScalerModel._ /** @group setParam */ + @Since("2.0.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("2.0.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -126,10 +134,12 @@ class MaxAbsScalerModel private[ml] ( dataset.withColumn($(outputCol), reScale(col($(inputCol)))) } + @Since("2.0.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("2.0.0") override def copy(extra: ParamMap): MaxAbsScalerModel = { val copied = new MaxAbsScalerModel(uid, maxAbs) copyValues(copied, extra).setParent(parent) @@ -139,7 +149,7 @@ class MaxAbsScalerModel private[ml] ( override def write: MLWriter = new MaxAbsScalerModelWriter(this) } -@Since("1.6.0") +@Since("2.0.0") object MaxAbsScalerModel extends MLReadable[MaxAbsScalerModel] { private[MaxAbsScalerModel] @@ -171,9 +181,9 @@ object MaxAbsScalerModel extends MLReadable[MaxAbsScalerModel] { } } - @Since("1.6.0") + @Since("2.0.0") override def read: MLReader[MaxAbsScalerModel] = new MaxAbsScalerModelReader - @Since("1.6.0") + @Since("2.0.0") override def load(path: String): MaxAbsScalerModel = super.load(path) } http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index a27bed5..dd5a1f9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -85,23 +85,29 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H * transformer will be DenseVector even for sparse input. */ @Experimental -class MinMaxScaler(override val uid: String) +@Since("1.5.0") +class MinMaxScaler @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[MinMaxScalerModel] with MinMaxScalerParams with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("minMaxScal")) setDefault(min -> 0.0, max -> 1.0) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setMin(value: Double): this.type = set(min, value) /** @group setParam */ + @Since("1.5.0") def setMax(value: Double): this.type = set(max, value) @Since("2.0.0") @@ -114,10 +120,12 @@ class MinMaxScaler(override val uid: String) copyValues(new MinMaxScalerModel(uid, summary.min, summary.max).setParent(this)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): MinMaxScaler = defaultCopy(extra) } @@ -138,24 +146,29 @@ object MinMaxScaler extends DefaultParamsReadable[MinMaxScaler] { * TODO: The transformer does not yet set the metadata in the output column (SPARK-8529). */ @Experimental +@Since("1.5.0") class MinMaxScalerModel private[ml] ( - override val uid: String, - val originalMin: Vector, - val originalMax: Vector) + @Since("1.5.0") override val uid: String, + @Since("2.0.0") val originalMin: Vector, + @Since("2.0.0") val originalMax: Vector) extends Model[MinMaxScalerModel] with MinMaxScalerParams with MLWritable { import MinMaxScalerModel._ /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setMin(value: Double): this.type = set(min, value) /** @group setParam */ + @Since("1.5.0") def setMax(value: Double): this.type = set(max, value) @Since("2.0.0") @@ -181,10 +194,12 @@ class MinMaxScalerModel private[ml] ( dataset.withColumn($(outputCol), reScale(col($(inputCol)))) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): MinMaxScalerModel = { val copied = new MinMaxScalerModel(uid, originalMin, originalMax) copyValues(copied, extra).setParent(parent) http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala index f8bc7e3..9c1f1ad 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala @@ -35,9 +35,11 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} * returned. */ @Experimental -class NGram(override val uid: String) +@Since("1.5.0") +class NGram @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends UnaryTransformer[Seq[String], Seq[String], NGram] with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("ngram")) /** @@ -45,13 +47,16 @@ class NGram(override val uid: String) * Default: 2, bigram features * @group param */ + @Since("1.5.0") val n: IntParam = new IntParam(this, "n", "number elements per n-gram (>=1)", ParamValidators.gtEq(1)) /** @group setParam */ + @Since("1.5.0") def setN(value: Int): this.type = set(n, value) /** @group getParam */ + @Since("1.5.0") def getN: Int = $(n) setDefault(n -> 2) http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index 942ac7e..9a4e682 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -31,9 +31,11 @@ import org.apache.spark.sql.types.DataType * Normalize a vector to have unit norm using the given p-norm. */ @Experimental -class Normalizer(override val uid: String) +@Since("2.0.0") +class Normalizer @Since("2.0.0") (@Since("2.0.0") override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] with DefaultParamsWritable { + @Since("2.0.0") def this() = this(Identifiable.randomUID("normalizer")) /** @@ -41,14 +43,17 @@ class Normalizer(override val uid: String) * (default: p = 2) * @group param */ + @Since("2.0.0") val p = new DoubleParam(this, "p", "the p norm value", ParamValidators.gtEq(1)) setDefault(p -> 2.0) /** @group getParam */ + @Since("2.0.0") def getP: Double = $(p) /** @group setParam */ + @Since("2.0.0") def setP(value: Double): this.type = set(p, value) override protected def createTransformFunc: Vector => Vector = { http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index 3d1e6dd..4fafc1e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -43,28 +43,35 @@ import org.apache.spark.sql.types.{DoubleType, NumericType, StructType} * @see [[StringIndexer]] for converting categorical values into category indices */ @Experimental -class OneHotEncoder(override val uid: String) extends Transformer +@Since("1.4.0") +class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("oneHot")) /** * Whether to drop the last category in the encoded vector (default: true) * @group param */ + @Since("1.4.0") final val dropLast: BooleanParam = new BooleanParam(this, "dropLast", "whether to drop the last category") setDefault(dropLast -> true) /** @group setParam */ + @Since("1.4.0") def setDropLast(value: Boolean): this.type = set(dropLast, value) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val inputColName = $(inputCol) val outputColName = $(outputCol) @@ -168,6 +175,7 @@ class OneHotEncoder(override val uid: String) extends Transformer dataset.select(col("*"), encode(col(inputColName).cast(DoubleType)).as(outputColName, metadata)) } + @Since("1.4.1") override def copy(extra: ParamMap): OneHotEncoder = defaultCopy(extra) } http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 2f667af..b89c859 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -65,18 +65,24 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC * principal components. */ @Experimental -class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams - with DefaultParamsWritable { +@Since("1.5.0") +class PCA @Since("1.5.0") ( + @Since("1.5.0") override val uid: String) + extends Estimator[PCAModel] with PCAParams with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("pca")) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setK(value: Int): this.type = set(k, value) /** @@ -93,10 +99,12 @@ class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams copyValues(new PCAModel(uid, pcaModel.pc, pcaModel.explainedVariance).setParent(this)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): PCA = defaultCopy(extra) } @@ -116,18 +124,21 @@ object PCA extends DefaultParamsReadable[PCA] { * each principal component. */ @Experimental +@Since("1.5.0") class PCAModel private[ml] ( - override val uid: String, - val pc: DenseMatrix, - val explainedVariance: DenseVector) + @Since("1.5.0") override val uid: String, + @Since("2.0.0") val pc: DenseMatrix, + @Since("2.0.0") val explainedVariance: DenseVector) extends Model[PCAModel] with PCAParams with MLWritable { import PCAModel._ /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @@ -149,10 +160,12 @@ class PCAModel private[ml] ( dataset.withColumn($(outputCol), pcaOp(col($(inputCol)))) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): PCAModel = { val copied = new PCAModel(uid, pc, explainedVariance) copyValues(copied, extra).setParent(parent) http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index a018677..026014c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -35,9 +35,11 @@ import org.apache.spark.sql.types.DataType * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`. */ @Experimental -class PolynomialExpansion(override val uid: String) +@Since("2.0.0") +class PolynomialExpansion @Since("2.0.0") (@Since("2.0.0") override val uid: String) extends UnaryTransformer[Vector, Vector, PolynomialExpansion] with DefaultParamsWritable { + @Since("2.0.0") def this() = this(Identifiable.randomUID("poly")) /** @@ -45,15 +47,18 @@ class PolynomialExpansion(override val uid: String) * Default: 2 * @group param */ + @Since("2.0.0") val degree = new IntParam(this, "degree", "the polynomial degree to expand (>= 1)", ParamValidators.gtEq(1)) setDefault(degree -> 2) /** @group getParam */ + @Since("2.0.0") def getDegree: Int = $(degree) /** @group setParam */ + @Since("2.0.0") def setDegree(value: Int): this.type = set(degree, value) override protected def createTransformFunc: Vector => Vector = { v => @@ -62,6 +67,7 @@ class PolynomialExpansion(override val uid: String) override protected def outputDataType: DataType = new VectorUDT() + @Since("1.4.1") override def copy(extra: ParamMap): PolynomialExpansion = defaultCopy(extra) } http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala index 1fefaa1..96b8e7d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala @@ -74,23 +74,30 @@ private[feature] trait QuantileDiscretizerBase extends Params * covering all real values. */ @Experimental -final class QuantileDiscretizer(override val uid: String) +@Since("1.6.0") +final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Estimator[Bucketizer] with QuantileDiscretizerBase with DefaultParamsWritable { + @Since("1.6.0") def this() = this(Identifiable.randomUID("quantileDiscretizer")) /** @group setParam */ + @Since("2.0.0") def setRelativeError(value: Double): this.type = set(relativeError, value) /** @group setParam */ + @Since("1.6.0") def setNumBuckets(value: Int): this.type = set(numBuckets, value) /** @group setParam */ + @Since("1.6.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.6.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.6.0") override def transformSchema(schema: StructType): StructType = { SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType) val inputFields = schema.fields @@ -112,6 +119,7 @@ final class QuantileDiscretizer(override val uid: String) copyValues(bucketizer.setParent(this)) } + @Since("1.6.0") override def copy(extra: ParamMap): QuantileDiscretizer = defaultCopy(extra) } http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index a7ca0fe..546dc7e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -70,15 +70,18 @@ private[feature] trait RFormulaBase extends HasFeaturesCol with HasLabelCol { * will be created from the specified response variable in the formula. */ @Experimental -class RFormula(override val uid: String) +@Since("1.5.0") +class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[RFormulaModel] with RFormulaBase with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("rFormula")) /** * R formula parameter. The formula is provided in string form. * @group param */ + @Since("1.5.0") val formula: Param[String] = new Param(this, "formula", "R model formula") /** @@ -86,15 +89,19 @@ class RFormula(override val uid: String) * @group setParam * @param value an R formula in string form (e.g. "y ~ x + z") */ + @Since("1.5.0") def setFormula(value: String): this.type = set(formula, value) /** @group getParam */ + @Since("1.5.0") def getFormula: String = $(formula) /** @group setParam */ + @Since("1.5.0") def setFeaturesCol(value: String): this.type = set(featuresCol, value) /** @group setParam */ + @Since("1.5.0") def setLabelCol(value: String): this.type = set(labelCol, value) /** Whether the formula specifies fitting an intercept. */ @@ -170,6 +177,7 @@ class RFormula(override val uid: String) copyValues(new RFormulaModel(uid, resolvedFormula, pipelineModel).setParent(this)) } + @Since("1.5.0") // optimistic schema; does not contain any ML attributes override def transformSchema(schema: StructType): StructType = { if (hasLabelCol(schema)) { @@ -180,8 +188,10 @@ class RFormula(override val uid: String) } } + @Since("1.5.0") override def copy(extra: ParamMap): RFormula = defaultCopy(extra) + @Since("2.0.0") override def toString: String = s"RFormula(${get(formula).getOrElse("")}) (uid=$uid)" } @@ -201,8 +211,9 @@ object RFormula extends DefaultParamsReadable[RFormula] { * @param pipelineModel the fitted feature model, including factor to index mappings. */ @Experimental +@Since("1.5.0") class RFormulaModel private[feature]( - override val uid: String, + @Since("1.5.0") override val uid: String, private[ml] val resolvedFormula: ResolvedRFormula, private[ml] val pipelineModel: PipelineModel) extends Model[RFormulaModel] with RFormulaBase with MLWritable { @@ -213,6 +224,7 @@ class RFormulaModel private[feature]( transformLabel(pipelineModel.transform(dataset)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { checkCanTransform(schema) val withFeatures = pipelineModel.transformSchema(schema) @@ -231,9 +243,11 @@ class RFormulaModel private[feature]( } } + @Since("1.5.0") override def copy(extra: ParamMap): RFormulaModel = copyValues( new RFormulaModel(uid, resolvedFormula, pipelineModel)) + @Since("2.0.0") override def toString: String = s"RFormulaModel($resolvedFormula) (uid=$uid)" private def transformLabel(dataset: Dataset[_]): DataFrame = { http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala index bd8f949..b871574 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala @@ -39,7 +39,7 @@ import org.apache.spark.sql.types.StructType */ @Experimental @Since("1.6.0") -class SQLTransformer @Since("1.6.0") (override val uid: String) extends Transformer +class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer with DefaultParamsWritable { @Since("1.6.0") http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index 7cec369..5e1bacf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -85,21 +85,28 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with * which is computed as the square root of the unbiased sample variance. */ @Experimental -class StandardScaler(override val uid: String) extends Estimator[StandardScalerModel] - with StandardScalerParams with DefaultParamsWritable { +@Since("1.2.0") +class StandardScaler @Since("1.4.0") ( + @Since("1.4.0") override val uid: String) + extends Estimator[StandardScalerModel] with StandardScalerParams with DefaultParamsWritable { + @Since("1.2.0") def this() = this(Identifiable.randomUID("stdScal")) /** @group setParam */ + @Since("1.2.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.2.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.4.0") def setWithMean(value: Boolean): this.type = set(withMean, value) /** @group setParam */ + @Since("1.4.0") def setWithStd(value: Boolean): this.type = set(withStd, value) @Since("2.0.0") @@ -113,10 +120,12 @@ class StandardScaler(override val uid: String) extends Estimator[StandardScalerM copyValues(new StandardScalerModel(uid, scalerModel.std, scalerModel.mean).setParent(this)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): StandardScaler = defaultCopy(extra) } @@ -135,18 +144,21 @@ object StandardScaler extends DefaultParamsReadable[StandardScaler] { * @param mean Mean of the StandardScalerModel */ @Experimental +@Since("1.2.0") class StandardScalerModel private[ml] ( - override val uid: String, - val std: Vector, - val mean: Vector) + @Since("1.4.0") override val uid: String, + @Since("2.0.0") val std: Vector, + @Since("2.0.0") val mean: Vector) extends Model[StandardScalerModel] with StandardScalerParams with MLWritable { import StandardScalerModel._ /** @group setParam */ + @Since("1.2.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.2.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -161,10 +173,12 @@ class StandardScalerModel private[ml] ( dataset.withColumn($(outputCol), scale(col($(inputCol)))) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): StandardScalerModel = { val copied = new StandardScalerModel(uid, std, mean) copyValues(copied, extra).setParent(parent) http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index 11864cb..1a6f42f 100755 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -33,15 +33,19 @@ import org.apache.spark.sql.types.{ArrayType, StringType, StructType} * @see [[http://en.wikipedia.org/wiki/Stop_words]] */ @Experimental -class StopWordsRemover(override val uid: String) +@Since("1.5.0") +class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("stopWords")) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @@ -50,13 +54,16 @@ class StopWordsRemover(override val uid: String) * @see [[StopWordsRemover.loadDefaultStopWords()]] * @group param */ + @Since("1.5.0") val stopWords: StringArrayParam = new StringArrayParam(this, "stopWords", "the words to be filtered out") /** @group setParam */ + @Since("1.5.0") def setStopWords(value: Array[String]): this.type = set(stopWords, value) /** @group getParam */ + @Since("1.5.0") def getStopWords: Array[String] = $(stopWords) /** @@ -64,13 +71,16 @@ class StopWordsRemover(override val uid: String) * Default: false * @group param */ + @Since("1.5.0") val caseSensitive: BooleanParam = new BooleanParam(this, "caseSensitive", "whether to do a case-sensitive comparison over the stop words") /** @group setParam */ + @Since("1.5.0") def setCaseSensitive(value: Boolean): this.type = set(caseSensitive, value) /** @group getParam */ + @Since("1.5.0") def getCaseSensitive: Boolean = $(caseSensitive) setDefault(stopWords -> StopWordsRemover.loadDefaultStopWords("english"), caseSensitive -> false) @@ -95,6 +105,7 @@ class StopWordsRemover(override val uid: String) dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType require(inputType.sameType(ArrayType(StringType)), @@ -102,6 +113,7 @@ class StopWordsRemover(override val uid: String) SchemaUtils.appendColumn(schema, $(outputCol), inputType, schema($(inputCol)).nullable) } + @Since("1.5.0") override def copy(extra: ParamMap): StopWordsRemover = defaultCopy(extra) } http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index cc0571f..0f7337c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -64,22 +64,27 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha * @see [[IndexToString]] for the inverse transformation */ @Experimental -class StringIndexer(override val uid: String) extends Estimator[StringIndexerModel] +@Since("1.4.0") +class StringIndexer @Since("1.4.0") ( + @Since("1.4.0") override val uid: String) extends Estimator[StringIndexerModel] with StringIndexerBase with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("strIdx")) /** @group setParam */ + @Since("1.6.0") def setHandleInvalid(value: String): this.type = set(handleInvalid, value) setDefault(handleInvalid, "error") /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) - @Since("2.0.0") override def fit(dataset: Dataset[_]): StringIndexerModel = { val counts = dataset.select(col($(inputCol)).cast(StringType)) @@ -90,10 +95,12 @@ class StringIndexer(override val uid: String) extends Estimator[StringIndexerMod copyValues(new StringIndexerModel(uid, labels).setParent(this)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): StringIndexer = defaultCopy(extra) } @@ -115,13 +122,15 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] { * @param labels Ordered list of labels, corresponding to indices to be assigned. */ @Experimental +@Since("1.4.0") class StringIndexerModel ( - override val uid: String, - val labels: Array[String]) + @Since("1.4.0") override val uid: String, + @Since("1.5.0") val labels: Array[String]) extends Model[StringIndexerModel] with StringIndexerBase with MLWritable { import StringIndexerModel._ + @Since("1.5.0") def this(labels: Array[String]) = this(Identifiable.randomUID("strIdx"), labels) private val labelToIndex: OpenHashMap[String, Double] = { @@ -136,13 +145,16 @@ class StringIndexerModel ( } /** @group setParam */ + @Since("1.6.0") def setHandleInvalid(value: String): this.type = set(handleInvalid, value) setDefault(handleInvalid, "error") /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -177,6 +189,7 @@ class StringIndexerModel ( indexer(dataset($(inputCol)).cast(StringType)).as($(outputCol), metadata)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { if (schema.fieldNames.contains($(inputCol))) { validateAndTransformSchema(schema) @@ -186,6 +199,7 @@ class StringIndexerModel ( } } + @Since("1.4.1") override def copy(extra: ParamMap): StringIndexerModel = { val copied = new StringIndexerModel(uid, labels) copyValues(copied, extra).setParent(parent) @@ -245,19 +259,24 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] { * @see [[StringIndexer]] for converting strings into indices */ @Experimental -class IndexToString private[ml] (override val uid: String) +@Since("1.5.0") +class IndexToString private[ml] (@Since("1.5.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("idxToStr")) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setLabels(value: Array[String]): this.type = set(labels, value) /** @@ -266,13 +285,16 @@ class IndexToString private[ml] (override val uid: String) * Default: Not specified, in which case [[inputCol]] metadata is used for labels. * @group param */ + @Since("1.5.0") final val labels: StringArrayParam = new StringArrayParam(this, "labels", "Optional array of labels specifying index-string mapping." + " If not provided or if empty, then metadata from inputCol is used instead.") /** @group getParam */ + @Since("1.5.0") final def getLabels: Array[String] = $(labels) + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { val inputColName = $(inputCol) val inputDataType = schema(inputColName).dataType @@ -310,6 +332,7 @@ class IndexToString private[ml] (override val uid: String) indexer(dataset($(inputCol)).cast(DoubleType)).as(outputColName)) } + @Since("1.5.0") override def copy(extra: ParamMap): IndexToString = { defaultCopy(extra) } http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 8456a0e..010c948 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -30,9 +30,11 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} * @see [[RegexTokenizer]] */ @Experimental -class Tokenizer(override val uid: String) +@Since("1.2.0") +class Tokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] with DefaultParamsWritable { + @Since("1.2.0") def this() = this(Identifiable.randomUID("tok")) override protected def createTransformFunc: String => Seq[String] = { @@ -45,6 +47,7 @@ class Tokenizer(override val uid: String) override protected def outputDataType: DataType = new ArrayType(StringType, true) + @Since("1.4.1") override def copy(extra: ParamMap): Tokenizer = defaultCopy(extra) } @@ -63,9 +66,11 @@ object Tokenizer extends DefaultParamsReadable[Tokenizer] { * It returns an array of strings that can be empty. */ @Experimental -class RegexTokenizer(override val uid: String) +@Since("1.4.0") +class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[String, Seq[String], RegexTokenizer] with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("regexTok")) /** @@ -73,13 +78,16 @@ class RegexTokenizer(override val uid: String) * Default: 1, to avoid returning empty strings * @group param */ + @Since("1.4.0") val minTokenLength: IntParam = new IntParam(this, "minTokenLength", "minimum token length (>= 0)", ParamValidators.gtEq(0)) /** @group setParam */ + @Since("1.4.0") def setMinTokenLength(value: Int): this.type = set(minTokenLength, value) /** @group getParam */ + @Since("1.4.0") def getMinTokenLength: Int = $(minTokenLength) /** @@ -87,12 +95,15 @@ class RegexTokenizer(override val uid: String) * Default: true * @group param */ + @Since("1.4.0") val gaps: BooleanParam = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens") /** @group setParam */ + @Since("1.4.0") def setGaps(value: Boolean): this.type = set(gaps, value) /** @group getParam */ + @Since("1.4.0") def getGaps: Boolean = $(gaps) /** @@ -100,12 +111,15 @@ class RegexTokenizer(override val uid: String) * Default: `"\\s+"` * @group param */ + @Since("1.4.0") val pattern: Param[String] = new Param(this, "pattern", "regex pattern used for tokenizing") /** @group setParam */ + @Since("1.4.0") def setPattern(value: String): this.type = set(pattern, value) /** @group getParam */ + @Since("1.4.0") def getPattern: String = $(pattern) /** @@ -113,13 +127,16 @@ class RegexTokenizer(override val uid: String) * Default: true * @group param */ + @Since("1.6.0") final val toLowercase: BooleanParam = new BooleanParam(this, "toLowercase", "whether to convert all characters to lowercase before tokenizing.") /** @group setParam */ + @Since("1.6.0") def setToLowercase(value: Boolean): this.type = set(toLowercase, value) /** @group getParam */ + @Since("1.6.0") def getToLowercase: Boolean = $(toLowercase) setDefault(minTokenLength -> 1, gaps -> true, pattern -> "\\s+", toLowercase -> true) @@ -138,6 +155,7 @@ class RegexTokenizer(override val uid: String) override protected def outputDataType: DataType = new ArrayType(StringType, true) + @Since("1.4.1") override def copy(extra: ParamMap): RegexTokenizer = defaultCopy(extra) } http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index 1bc2420..4939dab 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -36,15 +36,19 @@ import org.apache.spark.sql.types._ * A feature transformer that merges multiple columns into a vector column. */ @Experimental -class VectorAssembler(override val uid: String) +@Since("1.4.0") +class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCols with HasOutputCol with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("vecAssembler")) /** @group setParam */ + @Since("1.4.0") def setInputCols(value: Array[String]): this.type = set(inputCols, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -106,6 +110,7 @@ class VectorAssembler(override val uid: String) dataset.select(col("*"), assembleFunc(struct(args: _*)).as($(outputCol), metadata)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val inputColNames = $(inputCols) val outputColName = $(outputCol) @@ -122,6 +127,7 @@ class VectorAssembler(override val uid: String) StructType(schema.fields :+ new StructField(outputColName, new VectorUDT, true)) } + @Since("1.4.1") override def copy(extra: ParamMap): VectorAssembler = defaultCopy(extra) } http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index d814528..52db996 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -94,18 +94,24 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu * - Add option for allowing unknown categories. */ @Experimental -class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerModel] - with VectorIndexerParams with DefaultParamsWritable { +@Since("1.4.0") +class VectorIndexer @Since("1.4.0") ( + @Since("1.4.0") override val uid: String) + extends Estimator[VectorIndexerModel] with VectorIndexerParams with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("vecIdx")) /** @group setParam */ + @Since("1.4.0") def setMaxCategories(value: Int): this.type = set(maxCategories, value) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -126,6 +132,7 @@ class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerMod copyValues(model) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { // We do not transfer feature metadata since we do not know what types of features we will // produce in transform(). @@ -136,6 +143,7 @@ class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerMod SchemaUtils.appendColumn(schema, $(outputCol), dataType) } + @Since("1.4.1") override def copy(extra: ParamMap): VectorIndexer = defaultCopy(extra) } @@ -256,15 +264,17 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] { * If a feature is not in this map, it is treated as continuous. */ @Experimental +@Since("1.4.0") class VectorIndexerModel private[ml] ( - override val uid: String, - val numFeatures: Int, - val categoryMaps: Map[Int, Map[Double, Int]]) + @Since("1.4.0") override val uid: String, + @Since("1.4.0") val numFeatures: Int, + @Since("1.4.0") val categoryMaps: Map[Int, Map[Double, Int]]) extends Model[VectorIndexerModel] with VectorIndexerParams with MLWritable { import VectorIndexerModel._ /** Java-friendly version of [[categoryMaps]] */ + @Since("1.4.0") def javaCategoryMaps: JMap[JInt, JMap[JDouble, JInt]] = { categoryMaps.mapValues(_.asJava).asJava.asInstanceOf[JMap[JInt, JMap[JDouble, JInt]]] } @@ -342,9 +352,11 @@ class VectorIndexerModel private[ml] ( } /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -356,6 +368,7 @@ class VectorIndexerModel private[ml] ( dataset.withColumn($(outputCol), newCol, newField.metadata) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val dataType = new VectorUDT require(isDefined(inputCol), @@ -415,6 +428,7 @@ class VectorIndexerModel private[ml] ( newAttributeGroup.toStructField() } + @Since("1.4.1") override def copy(extra: ParamMap): VectorIndexerModel = { val copied = new VectorIndexerModel(uid, numFeatures, categoryMaps) copyValues(copied, extra).setParent(parent) http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index 103738c..6769e49 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -41,9 +41,11 @@ import org.apache.spark.sql.types.StructType * followed by the selected names (in the order given). */ @Experimental -final class VectorSlicer(override val uid: String) +@Since("1.5.0") +final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("vectorSlicer")) /** @@ -52,6 +54,7 @@ final class VectorSlicer(override val uid: String) * Default: Empty array * @group param */ + @Since("1.5.0") val indices = new IntArrayParam(this, "indices", "An array of indices to select features from a vector column." + " There can be no overlap with names.", VectorSlicer.validIndices) @@ -59,9 +62,11 @@ final class VectorSlicer(override val uid: String) setDefault(indices -> Array.empty[Int]) /** @group getParam */ + @Since("1.5.0") def getIndices: Array[Int] = $(indices) /** @group setParam */ + @Since("1.5.0") def setIndices(value: Array[Int]): this.type = set(indices, value) /** @@ -71,6 +76,7 @@ final class VectorSlicer(override val uid: String) * Default: Empty Array * @group param */ + @Since("1.5.0") val names = new StringArrayParam(this, "names", "An array of feature names to select features from a vector column." + " There can be no overlap with indices.", VectorSlicer.validNames) @@ -78,15 +84,19 @@ final class VectorSlicer(override val uid: String) setDefault(names -> Array.empty[String]) /** @group getParam */ + @Since("1.5.0") def getNames: Array[String] = $(names) /** @group setParam */ + @Since("1.5.0") def setNames(value: Array[String]): this.type = set(names, value) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -134,6 +144,7 @@ final class VectorSlicer(override val uid: String) indFeatures ++ nameFeatures } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { require($(indices).length > 0 || $(names).length > 0, s"VectorSlicer requires that at least one feature be selected.") @@ -148,6 +159,7 @@ final class VectorSlicer(override val uid: String) StructType(outputFields) } + @Since("1.5.0") override def copy(extra: ParamMap): VectorSlicer = defaultCopy(extra) } http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 33515b2..05c4f2f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -120,39 +120,52 @@ private[feature] trait Word2VecBase extends Params * natural language processing or machine learning process. */ @Experimental -final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel] with Word2VecBase - with DefaultParamsWritable { +@Since("1.4.0") +final class Word2Vec @Since("1.4.0") ( + @Since("1.4.0") override val uid: String) + extends Estimator[Word2VecModel] with Word2VecBase with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("w2v")) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.4.0") def setVectorSize(value: Int): this.type = set(vectorSize, value) /** @group expertSetParam */ + @Since("1.6.0") def setWindowSize(value: Int): this.type = set(windowSize, value) /** @group setParam */ + @Since("1.4.0") def setStepSize(value: Double): this.type = set(stepSize, value) /** @group setParam */ + @Since("1.4.0") def setNumPartitions(value: Int): this.type = set(numPartitions, value) /** @group setParam */ + @Since("1.4.0") def setMaxIter(value: Int): this.type = set(maxIter, value) /** @group setParam */ + @Since("1.4.0") def setSeed(value: Long): this.type = set(seed, value) /** @group setParam */ + @Since("1.4.0") def setMinCount(value: Int): this.type = set(minCount, value) /** @group setParam */ + @Since("2.0.0") def setMaxSentenceLength(value: Int): this.type = set(maxSentenceLength, value) @Since("2.0.0") @@ -172,10 +185,12 @@ final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel] copyValues(new Word2VecModel(uid, wordVectors).setParent(this)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): Word2Vec = defaultCopy(extra) } @@ -191,8 +206,9 @@ object Word2Vec extends DefaultParamsReadable[Word2Vec] { * Model fitted by [[Word2Vec]]. */ @Experimental +@Since("1.4.0") class Word2VecModel private[ml] ( - override val uid: String, + @Since("1.4.0") override val uid: String, @transient private val wordVectors: feature.Word2VecModel) extends Model[Word2VecModel] with Word2VecBase with MLWritable { @@ -202,6 +218,7 @@ class Word2VecModel private[ml] ( * Returns a dataframe with two fields, "word" and "vector", with "word" being a String and * and the vector the DenseVector that it is mapped to. */ + @Since("1.5.0") @transient lazy val getVectors: DataFrame = { val spark = SparkSession.builder().getOrCreate() val wordVec = wordVectors.getVectors.mapValues(vec => Vectors.dense(vec.map(_.toDouble))) @@ -213,6 +230,7 @@ class Word2VecModel private[ml] ( * Returns a dataframe with the words and the cosine similarities between the * synonyms and the given word. */ + @Since("1.5.0") def findSynonyms(word: String, num: Int): DataFrame = { findSynonyms(wordVectors.transform(word), num) } @@ -222,15 +240,18 @@ class Word2VecModel private[ml] ( * of the word. Returns a dataframe with the words and the cosine similarities between the * synonyms and the given word vector. */ + @Since("1.5.0") def findSynonyms(word: Vector, num: Int): DataFrame = { val spark = SparkSession.builder().getOrCreate() spark.createDataFrame(wordVectors.findSynonyms(word, num)).toDF("word", "similarity") } /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @@ -262,10 +283,12 @@ class Word2VecModel private[ml] ( dataset.withColumn($(outputCol), word2Vec(col($(inputCol)))) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): Word2VecModel = { val copied = new Word2VecModel(uid, wordVectors) copyValues(copied, extra).setParent(parent) http://git-wip-us.apache.org/repos/asf/spark/blob/37494a18/python/pyspark/ml/feature.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index a28764a..1e9ec0f 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -149,7 +149,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav >>> loadedBucketizer.getSplits() == bucketizer.getSplits() True - .. versionadded:: 1.3.0 + .. versionadded:: 1.4.0 """ splits = \ @@ -486,14 +486,14 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada kwargs = self.setParams._input_kwargs return self._set(**kwargs) - @since("1.5.0") + @since("2.0.0") def setScalingVec(self, value): """ Sets the value of :py:attr:`scalingVec`. """ return self._set(scalingVec=value) - @since("1.5.0") + @since("2.0.0") def getScalingVec(self): """ Gets the value of scalingVec or its default value. @@ -1584,7 +1584,7 @@ class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): """ @property - @since("1.5.0") + @since("2.0.0") def std(self): """ Standard deviation of the StandardScalerModel. @@ -1592,7 +1592,7 @@ class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): return self._call_java("std") @property - @since("1.5.0") + @since("2.0.0") def mean(self): """ Mean of the StandardScalerModel. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org