Repository: spark Updated Branches: refs/heads/branch-1.5 4e72839b7 -> e7db8761b
http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala index a2ab95c..cd3ed8a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala @@ -20,7 +20,7 @@ package org.apache.spark.mllib.regression import scala.reflect.ClassTag import org.apache.spark.Logging -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.api.java.JavaSparkContext.fakeClassTag import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.streaming.api.java.{JavaDStream, JavaPairDStream} @@ -54,8 +54,8 @@ import org.apache.spark.streaming.dstream.DStream * the model using each of the different sources, in sequence. * * - * @since 1.1.0 */ +@Since("1.1.0") @DeveloperApi abstract class StreamingLinearAlgorithm[ M <: GeneralizedLinearModel, @@ -70,8 +70,8 @@ abstract class StreamingLinearAlgorithm[ /** * Return the latest model. * - * @since 1.1.0 */ + @Since("1.1.0") def latestModel(): M = { model.get } @@ -84,8 +84,8 @@ abstract class StreamingLinearAlgorithm[ * * @param data DStream containing labeled data * - * @since 1.3.0 */ + @Since("1.3.0") def trainOn(data: DStream[LabeledPoint]): Unit = { if (model.isEmpty) { throw new IllegalArgumentException("Model must be initialized before starting training.") @@ -106,8 +106,8 @@ abstract class StreamingLinearAlgorithm[ /** * Java-friendly version of `trainOn`. * - * @since 1.3.0 */ + @Since("1.3.0") def trainOn(data: JavaDStream[LabeledPoint]): Unit = trainOn(data.dstream) /** @@ -116,8 +116,8 @@ abstract class StreamingLinearAlgorithm[ * @param data DStream containing feature vectors * @return DStream containing predictions * - * @since 1.1.0 */ + @Since("1.1.0") def predictOn(data: DStream[Vector]): DStream[Double] = { if (model.isEmpty) { throw new IllegalArgumentException("Model must be initialized before starting prediction.") @@ -128,8 +128,8 @@ abstract class StreamingLinearAlgorithm[ /** * Java-friendly version of `predictOn`. * - * @since 1.1.0 */ + @Since("1.1.0") def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Double] = { JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Double]]) } @@ -140,8 +140,8 @@ abstract class StreamingLinearAlgorithm[ * @tparam K key type * @return DStream containing the input keys and the predictions as values * - * @since 1.1.0 */ + @Since("1.1.0") def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Double)] = { if (model.isEmpty) { throw new IllegalArgumentException("Model must be initialized before starting prediction") @@ -153,8 +153,8 @@ abstract class StreamingLinearAlgorithm[ /** * Java-friendly version of `predictOnValues`. * - * @since 1.3.0 */ + @Since("1.3.0") def predictOnValues[K](data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Double] = { implicit val tag = fakeClassTag[K] JavaPairDStream.fromPairDStream( http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala index 93a6753..4a856f7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat import com.github.fommil.netlib.BLAS.{getInstance => blas} -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.api.java.JavaRDD import org.apache.spark.rdd.RDD @@ -37,8 +37,8 @@ import org.apache.spark.rdd.RDD * .setBandwidth(3.0) * val densities = kd.estimate(Array(-1.0, 2.0, 5.0)) * }}} - * @since 1.4.0 */ +@Since("1.4.0") @Experimental class KernelDensity extends Serializable { @@ -52,8 +52,8 @@ class KernelDensity extends Serializable { /** * Sets the bandwidth (standard deviation) of the Gaussian kernel (default: `1.0`). - * @since 1.4.0 */ + @Since("1.4.0") def setBandwidth(bandwidth: Double): this.type = { require(bandwidth > 0, s"Bandwidth must be positive, but got $bandwidth.") this.bandwidth = bandwidth @@ -62,8 +62,8 @@ class KernelDensity extends Serializable { /** * Sets the sample to use for density estimation. - * @since 1.4.0 */ + @Since("1.4.0") def setSample(sample: RDD[Double]): this.type = { this.sample = sample this @@ -71,8 +71,8 @@ class KernelDensity extends Serializable { /** * Sets the sample to use for density estimation (for Java users). - * @since 1.4.0 */ + @Since("1.4.0") def setSample(sample: JavaRDD[java.lang.Double]): this.type = { this.sample = sample.rdd.asInstanceOf[RDD[Double]] this @@ -80,8 +80,8 @@ class KernelDensity extends Serializable { /** * Estimates probability density function at the given array of points. - * @since 1.4.0 */ + @Since("1.4.0") def estimate(points: Array[Double]): Array[Double] = { val sample = this.sample val bandwidth = this.bandwidth http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala index 64e4be0..51b713e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.stat -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.linalg.{Vectors, Vector} /** @@ -33,8 +33,8 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector} * Reference: [[http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance variance-wiki]] * Zero elements (including explicit zero values) are skipped when calling add(), * to have time complexity O(nnz) instead of O(n) for each column. - * @since 1.1.0 */ +@Since("1.1.0") @DeveloperApi class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with Serializable { @@ -53,8 +53,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S * * @param sample The sample in dense/sparse vector format to be added into this summarizer. * @return This MultivariateOnlineSummarizer object. - * @since 1.1.0 */ + @Since("1.1.0") def add(sample: Vector): this.type = { if (n == 0) { require(sample.size > 0, s"Vector should have dimension larger than zero.") @@ -109,8 +109,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S * * @param other The other MultivariateOnlineSummarizer to be merged. * @return This MultivariateOnlineSummarizer object. - * @since 1.1.0 */ + @Since("1.1.0") def merge(other: MultivariateOnlineSummarizer): this.type = { if (this.totalCnt != 0 && other.totalCnt != 0) { require(n == other.n, s"Dimensions mismatch when merging with another summarizer. " + @@ -155,8 +155,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S /** * Sample mean of each dimension. * - * @since 1.1.0 */ + @Since("1.1.0") override def mean: Vector = { require(totalCnt > 0, s"Nothing has been added to this summarizer.") @@ -172,8 +172,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S /** * Sample variance of each dimension. * - * @since 1.1.0 */ + @Since("1.1.0") override def variance: Vector = { require(totalCnt > 0, s"Nothing has been added to this summarizer.") @@ -199,15 +199,15 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S /** * Sample size. * - * @since 1.1.0 */ + @Since("1.1.0") override def count: Long = totalCnt /** * Number of nonzero elements in each dimension. * - * @since 1.1.0 */ + @Since("1.1.0") override def numNonzeros: Vector = { require(totalCnt > 0, s"Nothing has been added to this summarizer.") @@ -217,8 +217,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S /** * Maximum value of each dimension. * - * @since 1.1.0 */ + @Since("1.1.0") override def max: Vector = { require(totalCnt > 0, s"Nothing has been added to this summarizer.") @@ -233,8 +233,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S /** * Minimum value of each dimension. * - * @since 1.1.0 */ + @Since("1.1.0") override def min: Vector = { require(totalCnt > 0, s"Nothing has been added to this summarizer.") @@ -249,8 +249,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S /** * L2 (Euclidian) norm of each dimension. * - * @since 1.2.0 */ + @Since("1.2.0") override def normL2: Vector = { require(totalCnt > 0, s"Nothing has been added to this summarizer.") @@ -268,8 +268,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S /** * L1 norm of each dimension. * - * @since 1.2.0 */ + @Since("1.2.0") override def normL1: Vector = { require(totalCnt > 0, s"Nothing has been added to this summarizer.") http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala index 3bb49f1..39a16fb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala @@ -17,59 +17,60 @@ package org.apache.spark.mllib.stat +import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg.Vector /** * Trait for multivariate statistical summary of a data matrix. - * @since 1.0.0 */ +@Since("1.0.0") trait MultivariateStatisticalSummary { /** * Sample mean vector. - * @since 1.0.0 */ + @Since("1.0.0") def mean: Vector /** * Sample variance vector. Should return a zero vector if the sample size is 1. - * @since 1.0.0 */ + @Since("1.0.0") def variance: Vector /** * Sample size. - * @since 1.0.0 */ + @Since("1.0.0") def count: Long /** * Number of nonzero elements (including explicitly presented zero values) in each column. - * @since 1.0.0 */ + @Since("1.0.0") def numNonzeros: Vector /** * Maximum value of each column. - * @since 1.0.0 */ + @Since("1.0.0") def max: Vector /** * Minimum value of each column. - * @since 1.0.0 */ + @Since("1.0.0") def min: Vector /** * Euclidean magnitude of each column - * @since 1.2.0 */ + @Since("1.2.0") def normL2: Vector /** * L1 norm of each column - * @since 1.2.0 */ + @Since("1.2.0") def normL1: Vector } http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala index ef8d786..84d64a5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat import scala.annotation.varargs -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.api.java.{JavaRDD, JavaDoubleRDD} import org.apache.spark.mllib.linalg.distributed.RowMatrix import org.apache.spark.mllib.linalg.{Matrix, Vector} @@ -32,8 +32,8 @@ import org.apache.spark.rdd.RDD /** * :: Experimental :: * API for statistical functions in MLlib. - * @since 1.1.0 */ +@Since("1.1.0") @Experimental object Statistics { @@ -42,8 +42,8 @@ object Statistics { * * @param X an RDD[Vector] for which column-wise summary statistics are to be computed. * @return [[MultivariateStatisticalSummary]] object containing column-wise summary statistics. - * @since 1.1.0 */ + @Since("1.1.0") def colStats(X: RDD[Vector]): MultivariateStatisticalSummary = { new RowMatrix(X).computeColumnSummaryStatistics() } @@ -54,8 +54,8 @@ object Statistics { * * @param X an RDD[Vector] for which the correlation matrix is to be computed. * @return Pearson correlation matrix comparing columns in X. - * @since 1.1.0 */ + @Since("1.1.0") def corr(X: RDD[Vector]): Matrix = Correlations.corrMatrix(X) /** @@ -71,8 +71,8 @@ object Statistics { * @param method String specifying the method to use for computing correlation. * Supported: `pearson` (default), `spearman` * @return Correlation matrix comparing columns in X. - * @since 1.1.0 */ + @Since("1.1.0") def corr(X: RDD[Vector], method: String): Matrix = Correlations.corrMatrix(X, method) /** @@ -85,14 +85,14 @@ object Statistics { * @param x RDD[Double] of the same cardinality as y. * @param y RDD[Double] of the same cardinality as x. * @return A Double containing the Pearson correlation between the two input RDD[Double]s - * @since 1.1.0 */ + @Since("1.1.0") def corr(x: RDD[Double], y: RDD[Double]): Double = Correlations.corr(x, y) /** * Java-friendly version of [[corr()]] - * @since 1.4.1 */ + @Since("1.4.1") def corr(x: JavaRDD[java.lang.Double], y: JavaRDD[java.lang.Double]): Double = corr(x.rdd.asInstanceOf[RDD[Double]], y.rdd.asInstanceOf[RDD[Double]]) @@ -109,14 +109,14 @@ object Statistics { * Supported: `pearson` (default), `spearman` * @return A Double containing the correlation between the two input RDD[Double]s using the * specified method. - * @since 1.1.0 */ + @Since("1.1.0") def corr(x: RDD[Double], y: RDD[Double], method: String): Double = Correlations.corr(x, y, method) /** * Java-friendly version of [[corr()]] - * @since 1.4.1 */ + @Since("1.4.1") def corr(x: JavaRDD[java.lang.Double], y: JavaRDD[java.lang.Double], method: String): Double = corr(x.rdd.asInstanceOf[RDD[Double]], y.rdd.asInstanceOf[RDD[Double]], method) @@ -133,8 +133,8 @@ object Statistics { * `expected` is rescaled if the `expected` sum differs from the `observed` sum. * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value, * the method used, and the null hypothesis. - * @since 1.1.0 */ + @Since("1.1.0") def chiSqTest(observed: Vector, expected: Vector): ChiSqTestResult = { ChiSqTest.chiSquared(observed, expected) } @@ -148,8 +148,8 @@ object Statistics { * @param observed Vector containing the observed categorical counts/relative frequencies. * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value, * the method used, and the null hypothesis. - * @since 1.1.0 */ + @Since("1.1.0") def chiSqTest(observed: Vector): ChiSqTestResult = ChiSqTest.chiSquared(observed) /** @@ -159,8 +159,8 @@ object Statistics { * @param observed The contingency matrix (containing either counts or relative frequencies). * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value, * the method used, and the null hypothesis. - * @since 1.1.0 */ + @Since("1.1.0") def chiSqTest(observed: Matrix): ChiSqTestResult = ChiSqTest.chiSquaredMatrix(observed) /** @@ -172,13 +172,14 @@ object Statistics { * Real-valued features will be treated as categorical for each distinct value. * @return an array containing the ChiSquaredTestResult for every feature against the label. * The order of the elements in the returned array reflects the order of input features. - * @since 1.1.0 */ + @Since("1.1.0") def chiSqTest(data: RDD[LabeledPoint]): Array[ChiSqTestResult] = { ChiSqTest.chiSquaredFeatures(data) } /** Java-friendly version of [[chiSqTest()]] */ + @Since("1.5.0") def chiSqTest(data: JavaRDD[LabeledPoint]): Array[ChiSqTestResult] = chiSqTest(data.rdd) /** @@ -194,6 +195,7 @@ object Statistics { * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult]] object containing test * statistic, p-value, and null hypothesis. */ + @Since("1.5.0") def kolmogorovSmirnovTest(data: RDD[Double], cdf: Double => Double) : KolmogorovSmirnovTestResult = { KolmogorovSmirnovTest.testOneSample(data, cdf) @@ -210,6 +212,7 @@ object Statistics { * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult]] object containing test * statistic, p-value, and null hypothesis. */ + @Since("1.5.0") @varargs def kolmogorovSmirnovTest(data: RDD[Double], distName: String, params: Double*) : KolmogorovSmirnovTestResult = { @@ -217,6 +220,7 @@ object Statistics { } /** Java-friendly version of [[kolmogorovSmirnovTest()]] */ + @Since("1.5.0") @varargs def kolmogorovSmirnovTest( data: JavaDoubleRDD, http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala index 9aa7763..bd4d813 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat.distribution import breeze.linalg.{DenseVector => DBV, DenseMatrix => DBM, diag, max, eigSym, Vector => BV} -import org.apache.spark.annotation.DeveloperApi; +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.linalg.{Vectors, Vector, Matrices, Matrix} import org.apache.spark.mllib.util.MLUtils @@ -32,8 +32,8 @@ import org.apache.spark.mllib.util.MLUtils * * @param mu The mean vector of the distribution * @param sigma The covariance matrix of the distribution - * @since 1.3.0 */ +@Since("1.3.0") @DeveloperApi class MultivariateGaussian ( val mu: Vector, @@ -62,15 +62,15 @@ class MultivariateGaussian ( private val (rootSigmaInv: DBM[Double], u: Double) = calculateCovarianceConstants /** Returns density of this multivariate Gaussian at given point, x - * @since 1.3.0 */ + @Since("1.3.0") def pdf(x: Vector): Double = { pdf(x.toBreeze) } /** Returns the log-density of this multivariate Gaussian at given point, x - * @since 1.3.0 */ + @Since("1.3.0") def logpdf(x: Vector): Double = { logpdf(x.toBreeze) } http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala index e5200b8..9728410 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala @@ -22,7 +22,7 @@ import scala.collection.mutable import scala.collection.mutable.ArrayBuilder import org.apache.spark.Logging -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.RandomForest.NodeIndexInfo @@ -43,8 +43,8 @@ import org.apache.spark.util.random.XORShiftRandom * @param strategy The configuration parameters for the tree algorithm which specify the type * of algorithm (classification, regression, etc.), feature type (continuous, * categorical), depth of the tree, quantile calculation strategy, etc. - * @since 1.0.0 */ +@Since("1.0.0") @Experimental class DecisionTree (private val strategy: Strategy) extends Serializable with Logging { @@ -54,8 +54,8 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo * Method to train a decision tree model over an RDD * @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]] * @return DecisionTreeModel that can be used for prediction - * @since 1.2.0 */ + @Since("1.2.0") def run(input: RDD[LabeledPoint]): DecisionTreeModel = { // Note: random seed will not be used since numTrees = 1. val rf = new RandomForest(strategy, numTrees = 1, featureSubsetStrategy = "all", seed = 0) @@ -64,9 +64,7 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo } } -/** - * @since 1.0.0 - */ +@Since("1.0.0") object DecisionTree extends Serializable with Logging { /** @@ -84,8 +82,8 @@ object DecisionTree extends Serializable with Logging { * of algorithm (classification, regression, etc.), feature type (continuous, * categorical), depth of the tree, quantile calculation strategy, etc. * @return DecisionTreeModel that can be used for prediction - * @since 1.0.0 - */ + */ + @Since("1.0.0") def train(input: RDD[LabeledPoint], strategy: Strategy): DecisionTreeModel = { new DecisionTree(strategy).run(input) } @@ -106,8 +104,8 @@ object DecisionTree extends Serializable with Logging { * @param maxDepth Maximum depth of the tree. * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. * @return DecisionTreeModel that can be used for prediction - * @since 1.0.0 */ + @Since("1.0.0") def train( input: RDD[LabeledPoint], algo: Algo, @@ -134,8 +132,8 @@ object DecisionTree extends Serializable with Logging { * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. * @param numClasses number of classes for classification. Default value of 2. * @return DecisionTreeModel that can be used for prediction - * @since 1.2.0 */ + @Since("1.2.0") def train( input: RDD[LabeledPoint], algo: Algo, @@ -168,8 +166,8 @@ object DecisionTree extends Serializable with Logging { * E.g., an entry (n -> k) indicates that feature n is categorical * with k categories indexed from 0: {0, 1, ..., k-1}. * @return DecisionTreeModel that can be used for prediction - * @since 1.0.0 */ + @Since("1.0.0") def train( input: RDD[LabeledPoint], algo: Algo, @@ -201,8 +199,8 @@ object DecisionTree extends Serializable with Logging { * @param maxBins maximum number of bins used for splitting features * (suggested value: 32) * @return DecisionTreeModel that can be used for prediction - * @since 1.1.0 */ + @Since("1.1.0") def trainClassifier( input: RDD[LabeledPoint], numClasses: Int, @@ -217,8 +215,8 @@ object DecisionTree extends Serializable with Logging { /** * Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]] - * @since 1.1.0 */ + @Since("1.1.0") def trainClassifier( input: JavaRDD[LabeledPoint], numClasses: Int, @@ -247,8 +245,8 @@ object DecisionTree extends Serializable with Logging { * @param maxBins maximum number of bins used for splitting features * (suggested value: 32) * @return DecisionTreeModel that can be used for prediction - * @since 1.1.0 */ + @Since("1.1.0") def trainRegressor( input: RDD[LabeledPoint], categoricalFeaturesInfo: Map[Int, Int], @@ -261,8 +259,8 @@ object DecisionTree extends Serializable with Logging { /** * Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]] - * @since 1.1.0 */ + @Since("1.1.0") def trainRegressor( input: JavaRDD[LabeledPoint], categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer], http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala index 1436170..e750408 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala @@ -18,7 +18,7 @@ package org.apache.spark.mllib.tree import org.apache.spark.Logging -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.impl.PeriodicRDDCheckpointer import org.apache.spark.mllib.regression.LabeledPoint @@ -48,8 +48,8 @@ import org.apache.spark.storage.StorageLevel * for other loss functions. * * @param boostingStrategy Parameters for the gradient boosting algorithm. - * @since 1.2.0 */ +@Since("1.2.0") @Experimental class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) extends Serializable with Logging { @@ -58,8 +58,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) * Method to train a gradient boosting model * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]. * @return a gradient boosted trees model that can be used for prediction - * @since 1.2.0 */ + @Since("1.2.0") def run(input: RDD[LabeledPoint]): GradientBoostedTreesModel = { val algo = boostingStrategy.treeStrategy.algo algo match { @@ -76,8 +76,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) /** * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#run]]. - * @since 1.2.0 */ + @Since("1.2.0") def run(input: JavaRDD[LabeledPoint]): GradientBoostedTreesModel = { run(input.rdd) } @@ -91,8 +91,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) * E.g., these two datasets could be created from an original dataset * by using [[org.apache.spark.rdd.RDD.randomSplit()]] * @return a gradient boosted trees model that can be used for prediction - * @since 1.4.0 */ + @Since("1.4.0") def runWithValidation( input: RDD[LabeledPoint], validationInput: RDD[LabeledPoint]): GradientBoostedTreesModel = { @@ -115,8 +115,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) /** * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#runWithValidation]]. - * @since 1.4.0 */ + @Since("1.4.0") def runWithValidation( input: JavaRDD[LabeledPoint], validationInput: JavaRDD[LabeledPoint]): GradientBoostedTreesModel = { @@ -124,9 +124,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) } } -/** - * @since 1.2.0 - */ +@Since("1.2.0") object GradientBoostedTrees extends Logging { /** @@ -137,8 +135,8 @@ object GradientBoostedTrees extends Logging { * For regression, labels are real numbers. * @param boostingStrategy Configuration options for the boosting algorithm. * @return a gradient boosted trees model that can be used for prediction - * @since 1.2.0 */ + @Since("1.2.0") def train( input: RDD[LabeledPoint], boostingStrategy: BoostingStrategy): GradientBoostedTreesModel = { @@ -147,8 +145,8 @@ object GradientBoostedTrees extends Logging { /** * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees$#train]] - * @since 1.2.0 */ + @Since("1.2.0") def train( input: JavaRDD[LabeledPoint], boostingStrategy: BoostingStrategy): GradientBoostedTreesModel = { http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala index 9f32306..63a902f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala @@ -23,7 +23,7 @@ import scala.collection.mutable import scala.collection.JavaConverters._ import org.apache.spark.Logging -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.configuration.Strategy @@ -260,9 +260,7 @@ private class RandomForest ( } -/** - * @since 1.2.0 - */ +@Since("1.2.0") object RandomForest extends Serializable with Logging { /** @@ -279,8 +277,8 @@ object RandomForest extends Serializable with Logging { * if numTrees > 1 (forest) set to "sqrt". * @param seed Random seed for bootstrapping and choosing feature subsets. * @return a random forest model that can be used for prediction - * @since 1.2.0 */ + @Since("1.2.0") def trainClassifier( input: RDD[LabeledPoint], strategy: Strategy, @@ -317,8 +315,8 @@ object RandomForest extends Serializable with Logging { * (suggested value: 100) * @param seed Random seed for bootstrapping and choosing feature subsets. * @return a random forest model that can be used for prediction - * @since 1.2.0 */ + @Since("1.2.0") def trainClassifier( input: RDD[LabeledPoint], numClasses: Int, @@ -337,8 +335,8 @@ object RandomForest extends Serializable with Logging { /** * Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainClassifier]] - * @since 1.2.0 */ + @Since("1.2.0") def trainClassifier( input: JavaRDD[LabeledPoint], numClasses: Int, @@ -368,8 +366,8 @@ object RandomForest extends Serializable with Logging { * if numTrees > 1 (forest) set to "onethird". * @param seed Random seed for bootstrapping and choosing feature subsets. * @return a random forest model that can be used for prediction - * @since 1.2.0 */ + @Since("1.2.0") def trainRegressor( input: RDD[LabeledPoint], strategy: Strategy, @@ -405,8 +403,8 @@ object RandomForest extends Serializable with Logging { * (suggested value: 100) * @param seed Random seed for bootstrapping and choosing feature subsets. * @return a random forest model that can be used for prediction - * @since 1.2.0 */ + @Since("1.2.0") def trainRegressor( input: RDD[LabeledPoint], categoricalFeaturesInfo: Map[Int, Int], @@ -424,8 +422,8 @@ object RandomForest extends Serializable with Logging { /** * Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainRegressor]] - * @since 1.2.0 */ + @Since("1.2.0") def trainRegressor( input: JavaRDD[LabeledPoint], categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer], @@ -442,8 +440,8 @@ object RandomForest extends Serializable with Logging { /** * List of supported feature subset sampling strategies. - * @since 1.2.0 */ + @Since("1.2.0") val supportedFeatureSubsetStrategies: Array[String] = Array("auto", "all", "sqrt", "log2", "onethird") http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala index d9a49aa..8301ad1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala @@ -17,13 +17,13 @@ package org.apache.spark.mllib.tree.configuration -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} /** * :: Experimental :: * Enum to select the algorithm for the decision tree - * @since 1.0.0 */ +@Since("1.0.0") @Experimental object Algo extends Enumeration { type Algo = Value http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala index 88e5f57..7c56998 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.tree.configuration import scala.beans.BeanProperty -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.mllib.tree.configuration.Algo._ import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss} @@ -38,8 +38,8 @@ import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss} * validation input between two iterations is less than the validationTol * then stop. Ignored when * [[org.apache.spark.mllib.tree.GradientBoostedTrees.run()]] is used. - * @since 1.2.0 */ +@Since("1.2.0") @Experimental case class BoostingStrategy( // Required boosting parameters @@ -71,9 +71,7 @@ case class BoostingStrategy( } } -/** - * @since 1.2.0 - */ +@Since("1.2.0") @Experimental object BoostingStrategy { @@ -81,8 +79,8 @@ object BoostingStrategy { * Returns default configuration for the boosting algorithm * @param algo Learning goal. Supported: "Classification" or "Regression" * @return Configuration for boosting algorithm - * @since 1.2.0 */ + @Since("1.2.0") def defaultParams(algo: String): BoostingStrategy = { defaultParams(Algo.fromString(algo)) } @@ -93,8 +91,8 @@ object BoostingStrategy { * [[org.apache.spark.mllib.tree.configuration.Algo.Classification]], * [[org.apache.spark.mllib.tree.configuration.Algo.Regression]] * @return Configuration for boosting algorithm - * @since 1.3.0 */ + @Since("1.3.0") def defaultParams(algo: Algo): BoostingStrategy = { val treeStrategy = Strategy.defaultStrategy(algo) treeStrategy.maxDepth = 3 http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala index 0684caf..bb7c7ee 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala @@ -17,13 +17,13 @@ package org.apache.spark.mllib.tree.configuration -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} /** * :: Experimental :: * Enum to describe whether a feature is "continuous" or "categorical" - * @since 1.0.0 */ +@Since("1.0.0") @Experimental object FeatureType extends Enumeration { type FeatureType = Value http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala index 2daa63c..904e42d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala @@ -17,13 +17,13 @@ package org.apache.spark.mllib.tree.configuration -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} /** * :: Experimental :: * Enum for selecting the quantile calculation strategy - * @since 1.0.0 */ +@Since("1.0.0") @Experimental object QuantileStrategy extends Enumeration { type QuantileStrategy = Value http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala index 7ae25a8..a58f01b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala @@ -20,7 +20,7 @@ package org.apache.spark.mllib.tree.configuration import scala.beans.BeanProperty import scala.collection.JavaConverters._ -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.mllib.tree.impurity.{Variance, Entropy, Gini, Impurity} import org.apache.spark.mllib.tree.configuration.Algo._ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._ @@ -66,8 +66,8 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._ * E.g. 10 means that the cache will get checkpointed every 10 updates. If * the checkpoint directory is not set in * [[org.apache.spark.SparkContext]], this setting is ignored. - * @since 1.0.0 */ +@Since("1.0.0") @Experimental class Strategy ( @BeanProperty var algo: Algo, @@ -85,23 +85,23 @@ class Strategy ( @BeanProperty var checkpointInterval: Int = 10) extends Serializable { /** - * @since 1.2.0 */ + @Since("1.2.0") def isMulticlassClassification: Boolean = { algo == Classification && numClasses > 2 } /** - * @since 1.2.0 */ + @Since("1.2.0") def isMulticlassWithCategoricalFeatures: Boolean = { isMulticlassClassification && (categoricalFeaturesInfo.size > 0) } /** * Java-friendly constructor for [[org.apache.spark.mllib.tree.configuration.Strategy]] - * @since 1.1.0 */ + @Since("1.1.0") def this( algo: Algo, impurity: Impurity, @@ -115,8 +115,8 @@ class Strategy ( /** * Sets Algorithm using a String. - * @since 1.2.0 */ + @Since("1.2.0") def setAlgo(algo: String): Unit = algo match { case "Classification" => setAlgo(Classification) case "Regression" => setAlgo(Regression) @@ -124,8 +124,8 @@ class Strategy ( /** * Sets categoricalFeaturesInfo using a Java Map. - * @since 1.2.0 */ + @Since("1.2.0") def setCategoricalFeaturesInfo( categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]): Unit = { this.categoricalFeaturesInfo = @@ -174,8 +174,8 @@ class Strategy ( /** * Returns a shallow copy of this instance. - * @since 1.2.0 */ + @Since("1.2.0") def copy: Strategy = { new Strategy(algo, impurity, maxDepth, numClasses, maxBins, quantileCalculationStrategy, categoricalFeaturesInfo, minInstancesPerNode, minInfoGain, @@ -183,17 +183,15 @@ class Strategy ( } } -/** - * @since 1.2.0 - */ +@Since("1.2.0") @Experimental object Strategy { /** * Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]] * @param algo "Classification" or "Regression" - * @since 1.2.0 */ + @Since("1.2.0") def defaultStrategy(algo: String): Strategy = { defaultStrategy(Algo.fromString(algo)) } @@ -201,8 +199,8 @@ object Strategy { /** * Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]] * @param algo Algo.Classification or Algo.Regression - * @since 1.3.0 */ + @Since("1.3.0") def defaultStrategy(algo: Algo): Strategy = algo match { case Algo.Classification => new Strategy(algo = Classification, impurity = Gini, maxDepth = 10, http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala index 0b6c726..73df6b0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala @@ -17,14 +17,14 @@ package org.apache.spark.mllib.tree.impurity -import org.apache.spark.annotation.{DeveloperApi, Experimental} +import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} /** * :: Experimental :: * Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during * binary classification. - * @since 1.0.0 */ +@Since("1.0.0") @Experimental object Entropy extends Impurity { @@ -36,8 +36,8 @@ object Entropy extends Impurity { * @param counts Array[Double] with counts for each label * @param totalCount sum of counts for all labels * @return information value, or 0 if totalCount = 0 - * @since 1.1.0 */ + @Since("1.1.0") @DeveloperApi override def calculate(counts: Array[Double], totalCount: Double): Double = { if (totalCount == 0) { @@ -64,8 +64,8 @@ object Entropy extends Impurity { * @param sum sum of labels * @param sumSquares summation of squares of the labels * @return information value, or 0 if count = 0 - * @since 1.0.0 */ + @Since("1.0.0") @DeveloperApi override def calculate(count: Double, sum: Double, sumSquares: Double): Double = throw new UnsupportedOperationException("Entropy.calculate") @@ -73,8 +73,8 @@ object Entropy extends Impurity { /** * Get this impurity instance. * This is useful for passing impurity parameters to a Strategy in Java. - * @since 1.1.0 */ + @Since("1.1.0") def instance: this.type = this } http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala index 3b0be42..f21845b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala @@ -17,15 +17,15 @@ package org.apache.spark.mllib.tree.impurity -import org.apache.spark.annotation.{DeveloperApi, Experimental} +import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} /** * :: Experimental :: * Class for calculating the * [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]] * during binary classification. - * @since 1.0.0 */ +@Since("1.0.0") @Experimental object Gini extends Impurity { @@ -35,8 +35,8 @@ object Gini extends Impurity { * @param counts Array[Double] with counts for each label * @param totalCount sum of counts for all labels * @return information value, or 0 if totalCount = 0 - * @since 1.1.0 */ + @Since("1.1.0") @DeveloperApi override def calculate(counts: Array[Double], totalCount: Double): Double = { if (totalCount == 0) { @@ -60,8 +60,8 @@ object Gini extends Impurity { * @param sum sum of labels * @param sumSquares summation of squares of the labels * @return information value, or 0 if count = 0 - * @since 1.0.0 */ + @Since("1.0.0") @DeveloperApi override def calculate(count: Double, sum: Double, sumSquares: Double): Double = throw new UnsupportedOperationException("Gini.calculate") @@ -69,8 +69,8 @@ object Gini extends Impurity { /** * Get this impurity instance. * This is useful for passing impurity parameters to a Strategy in Java. - * @since 1.1.0 */ + @Since("1.1.0") def instance: this.type = this } http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala index dd29740..4637dcc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.tree.impurity -import org.apache.spark.annotation.{DeveloperApi, Experimental} +import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} /** * :: Experimental :: @@ -25,8 +25,8 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental} * This trait is used for * (a) setting the impurity parameter in [[org.apache.spark.mllib.tree.configuration.Strategy]] * (b) calculating impurity values from sufficient statistics. - * @since 1.0.0 */ +@Since("1.0.0") @Experimental trait Impurity extends Serializable { @@ -36,8 +36,8 @@ trait Impurity extends Serializable { * @param counts Array[Double] with counts for each label * @param totalCount sum of counts for all labels * @return information value, or 0 if totalCount = 0 - * @since 1.1.0 */ + @Since("1.1.0") @DeveloperApi def calculate(counts: Array[Double], totalCount: Double): Double @@ -48,8 +48,8 @@ trait Impurity extends Serializable { * @param sum sum of labels * @param sumSquares summation of squares of the labels * @return information value, or 0 if count = 0 - * @since 1.0.0 */ + @Since("1.0.0") @DeveloperApi def calculate(count: Double, sum: Double, sumSquares: Double): Double } http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala index adbe058..a741972 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala @@ -17,13 +17,13 @@ package org.apache.spark.mllib.tree.impurity -import org.apache.spark.annotation.{DeveloperApi, Experimental} +import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} /** * :: Experimental :: * Class for calculating variance during regression - * @since 1.0.0 */ +@Since("1.0.0") @Experimental object Variance extends Impurity { @@ -33,8 +33,8 @@ object Variance extends Impurity { * @param counts Array[Double] with counts for each label * @param totalCount sum of counts for all labels * @return information value, or 0 if totalCount = 0 - * @since 1.1.0 */ + @Since("1.1.0") @DeveloperApi override def calculate(counts: Array[Double], totalCount: Double): Double = throw new UnsupportedOperationException("Variance.calculate") @@ -46,8 +46,8 @@ object Variance extends Impurity { * @param sum sum of labels * @param sumSquares summation of squares of the labels * @return information value, or 0 if count = 0 - * @since 1.0.0 */ + @Since("1.0.0") @DeveloperApi override def calculate(count: Double, sum: Double, sumSquares: Double): Double = { if (count == 0) { @@ -60,8 +60,8 @@ object Variance extends Impurity { /** * Get this impurity instance. * This is useful for passing impurity parameters to a Strategy in Java. - * @since 1.0.0 */ + @Since("1.0.0") def instance: this.type = this } http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala index c6e3d0d..bab7b8c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.tree.loss -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.model.TreeEnsembleModel @@ -29,8 +29,8 @@ import org.apache.spark.mllib.tree.model.TreeEnsembleModel * The absolute (L1) error is defined as: * |y - F(x)| * where y is the label and F(x) is the model prediction for features x. - * @since 1.2.0 */ +@Since("1.2.0") @DeveloperApi object AbsoluteError extends Loss { @@ -41,8 +41,8 @@ object AbsoluteError extends Loss { * @param prediction Predicted label. * @param label True label. * @return Loss gradient - * @since 1.2.0 */ + @Since("1.2.0") override def gradient(prediction: Double, label: Double): Double = { if (label - prediction < 0) 1.0 else -1.0 } http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala index eee5844..b2b4594 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.tree.loss -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.model.TreeEnsembleModel import org.apache.spark.mllib.util.MLUtils @@ -31,8 +31,8 @@ import org.apache.spark.mllib.util.MLUtils * The log loss is defined as: * 2 log(1 + exp(-2 y F(x))) * where y is a label in {-1, 1} and F(x) is the model prediction for features x. - * @since 1.2.0 */ +@Since("1.2.0") @DeveloperApi object LogLoss extends Loss { @@ -43,8 +43,8 @@ object LogLoss extends Loss { * @param prediction Predicted label. * @param label True label. * @return Loss gradient - * @since 1.2.0 */ + @Since("1.2.0") override def gradient(prediction: Double, label: Double): Double = { - 4.0 * label / (1.0 + math.exp(2.0 * label * prediction)) } http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala index 7c9fb92..687cde3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.tree.loss -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.model.TreeEnsembleModel import org.apache.spark.rdd.RDD @@ -26,8 +26,8 @@ import org.apache.spark.rdd.RDD /** * :: DeveloperApi :: * Trait for adding "pluggable" loss functions for the gradient boosting algorithm. - * @since 1.2.0 */ +@Since("1.2.0") @DeveloperApi trait Loss extends Serializable { @@ -36,8 +36,8 @@ trait Loss extends Serializable { * @param prediction Predicted feature * @param label true label. * @return Loss gradient. - * @since 1.2.0 */ + @Since("1.2.0") def gradient(prediction: Double, label: Double): Double /** @@ -47,8 +47,8 @@ trait Loss extends Serializable { * @param model Model of the weak learner. * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]. * @return Measure of model error on data - * @since 1.2.0 */ + @Since("1.2.0") def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = { data.map(point => computeError(model.predict(point.features), point.label)).mean() } http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala index 47dc94c..2b112fb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala @@ -17,14 +17,12 @@ package org.apache.spark.mllib.tree.loss -/** - * @since 1.2.0 - */ +import org.apache.spark.annotation.Since + +@Since("1.2.0") object Losses { - /** - * @since 1.2.0 - */ + @Since("1.2.0") def fromString(name: String): Loss = name match { case "leastSquaresError" => SquaredError case "leastAbsoluteError" => AbsoluteError http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala index ff8903d..3f7d3d3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.tree.loss -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.model.TreeEnsembleModel @@ -29,8 +29,8 @@ import org.apache.spark.mllib.tree.model.TreeEnsembleModel * The squared (L2) error is defined as: * (y - F(x))**2 * where y is the label and F(x) is the model prediction for features x. - * @since 1.2.0 */ +@Since("1.2.0") @DeveloperApi object SquaredError extends Loss { @@ -41,8 +41,8 @@ object SquaredError extends Loss { * @param prediction Predicted label. * @param label True label. * @return Loss gradient - * @since 1.2.0 */ + @Since("1.2.0") override def gradient(prediction: Double, label: Double): Double = { - 2.0 * (label - prediction) } http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala index 0f386a2..3eefd13 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala @@ -24,7 +24,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.{Logging, SparkContext} -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.tree.configuration.{Algo, FeatureType} @@ -40,8 +40,8 @@ import org.apache.spark.util.Utils * This model stores the decision tree structure and parameters. * @param topNode root node * @param algo algorithm type -- classification or regression - * @since 1.0.0 */ +@Since("1.0.0") @Experimental class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable with Saveable { @@ -50,8 +50,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable * * @param features array representing a single data point * @return Double prediction from the trained model - * @since 1.0.0 */ + @Since("1.0.0") def predict(features: Vector): Double = { topNode.predict(features) } @@ -61,8 +61,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable * * @param features RDD representing data points to be predicted * @return RDD of predictions for each of the given data points - * @since 1.0.0 */ + @Since("1.0.0") def predict(features: RDD[Vector]): RDD[Double] = { features.map(x => predict(x)) } @@ -72,16 +72,16 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable * * @param features JavaRDD representing data points to be predicted * @return JavaRDD of predictions for each of the given data points - * @since 1.2.0 */ + @Since("1.2.0") def predict(features: JavaRDD[Vector]): JavaRDD[Double] = { predict(features.rdd) } /** * Get number of nodes in tree, including leaf nodes. - * @since 1.1.0 */ + @Since("1.1.0") def numNodes: Int = { 1 + topNode.numDescendants } @@ -89,8 +89,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable /** * Get depth of tree. * E.g.: Depth 0 means 1 leaf node. Depth 1 means 1 internal node and 2 leaf nodes. - * @since 1.1.0 */ + @Since("1.1.0") def depth: Int = { topNode.subtreeDepth } @@ -119,8 +119,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable * @param sc Spark context used to save model data. * @param path Path specifying the directory in which to save this model. * If the directory already exists, this method throws an exception. - * @since 1.3.0 */ + @Since("1.3.0") override def save(sc: SparkContext, path: String): Unit = { DecisionTreeModel.SaveLoadV1_0.save(sc, path, this) } @@ -128,9 +128,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable override protected def formatVersion: String = DecisionTreeModel.formatVersion } -/** - * @since 1.3.0 - */ +@Since("1.3.0") object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging { private[spark] def formatVersion: String = "1.0" @@ -317,8 +315,8 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging { * @param sc Spark context used for loading model files. * @param path Path specifying the directory to which the model was saved. * @return Model instance - * @since 1.3.0 */ + @Since("1.3.0") override def load(sc: SparkContext, path: String): DecisionTreeModel = { implicit val formats = DefaultFormats val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path) http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala index 23f0363..091a046 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.tree.model -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.tree.impurity.ImpurityCalculator /** @@ -29,8 +29,8 @@ import org.apache.spark.mllib.tree.impurity.ImpurityCalculator * @param rightImpurity right node impurity * @param leftPredict left node predict * @param rightPredict right node predict - * @since 1.0.0 */ +@Since("1.0.0") @DeveloperApi class InformationGainStats( val gain: Double, http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala index aca3350..8c54c55 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.tree.model -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.Logging import org.apache.spark.mllib.tree.configuration.FeatureType._ import org.apache.spark.mllib.linalg.Vector @@ -38,8 +38,8 @@ import org.apache.spark.mllib.linalg.Vector * @param leftNode left child * @param rightNode right child * @param stats information gain stats - * @since 1.0.0 */ +@Since("1.0.0") @DeveloperApi class Node ( val id: Int, @@ -59,8 +59,8 @@ class Node ( /** * build the left node and right nodes if not leaf * @param nodes array of nodes - * @since 1.0.0 */ + @Since("1.0.0") @deprecated("build should no longer be used since trees are constructed on-the-fly in training", "1.2.0") def build(nodes: Array[Node]): Unit = { @@ -81,8 +81,8 @@ class Node ( * predict value if node is not leaf * @param features feature value * @return predicted value - * @since 1.1.0 */ + @Since("1.1.0") def predict(features: Vector) : Double = { if (isLeaf) { predict.predict http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala index be819b5..9657840 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala @@ -17,14 +17,14 @@ package org.apache.spark.mllib.tree.model -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} /** * Predicted value for a node * @param predict predicted value * @param prob probability of the label (classification only) - * @since 1.2.0 */ +@Since("1.2.0") @DeveloperApi class Predict( val predict: Double, http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala index 18d4053..45db83a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.tree.model -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType import org.apache.spark.mllib.tree.configuration.FeatureType import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType @@ -30,8 +30,8 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType * Split left if feature <= threshold, else right. * @param featureType type of feature -- categorical or continuous * @param categories Split left if categorical feature value is in this set, else right. - * @since 1.0.0 */ +@Since("1.0.0") @DeveloperApi case class Split( feature: Int, http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala index 0c629b1..1957144 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala @@ -25,7 +25,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.{Logging, SparkContext} -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.regression.LabeledPoint @@ -45,8 +45,8 @@ import org.apache.spark.util.Utils * * @param algo algorithm for the ensemble model, either Classification or Regression * @param trees tree ensembles - * @since 1.2.0 */ +@Since("1.2.0") @Experimental class RandomForestModel(override val algo: Algo, override val trees: Array[DecisionTreeModel]) extends TreeEnsembleModel(algo, trees, Array.fill(trees.length)(1.0), @@ -60,8 +60,8 @@ class RandomForestModel(override val algo: Algo, override val trees: Array[Decis * @param sc Spark context used to save model data. * @param path Path specifying the directory in which to save this model. * If the directory already exists, this method throws an exception. - * @since 1.3.0 */ + @Since("1.3.0") override def save(sc: SparkContext, path: String): Unit = { TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this, RandomForestModel.SaveLoadV1_0.thisClassName) @@ -70,9 +70,7 @@ class RandomForestModel(override val algo: Algo, override val trees: Array[Decis override protected def formatVersion: String = RandomForestModel.formatVersion } -/** - * @since 1.3.0 - */ +@Since("1.3.0") object RandomForestModel extends Loader[RandomForestModel] { private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion @@ -82,8 +80,8 @@ object RandomForestModel extends Loader[RandomForestModel] { * @param sc Spark context used for loading model files. * @param path Path specifying the directory to which the model was saved. * @return Model instance - * @since 1.3.0 */ + @Since("1.3.0") override def load(sc: SparkContext, path: String): RandomForestModel = { val (loadedClassName, version, jsonMetadata) = Loader.loadMetadata(sc, path) val classNameV1_0 = SaveLoadV1_0.thisClassName @@ -114,8 +112,8 @@ object RandomForestModel extends Loader[RandomForestModel] { * @param algo algorithm for the ensemble model, either Classification or Regression * @param trees tree ensembles * @param treeWeights tree ensemble weights - * @since 1.2.0 */ +@Since("1.2.0") @Experimental class GradientBoostedTreesModel( override val algo: Algo, @@ -130,8 +128,8 @@ class GradientBoostedTreesModel( * @param sc Spark context used to save model data. * @param path Path specifying the directory in which to save this model. * If the directory already exists, this method throws an exception. - * @since 1.3.0 */ + @Since("1.3.0") override def save(sc: SparkContext, path: String): Unit = { TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this, GradientBoostedTreesModel.SaveLoadV1_0.thisClassName) @@ -143,8 +141,8 @@ class GradientBoostedTreesModel( * @param loss evaluation metric. * @return an array with index i having the losses or errors for the ensemble * containing the first i+1 trees - * @since 1.4.0 */ + @Since("1.4.0") def evaluateEachIteration( data: RDD[LabeledPoint], loss: Loss): Array[Double] = { @@ -186,8 +184,8 @@ class GradientBoostedTreesModel( } /** - * @since 1.3.0 */ +@Since("1.3.0") object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] { /** @@ -199,8 +197,8 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] { * @param loss: evaluation metric. * @return a RDD with each element being a zip of the prediction and error * corresponding to every sample. - * @since 1.4.0 */ + @Since("1.4.0") def computeInitialPredictionAndError( data: RDD[LabeledPoint], initTreeWeight: Double, @@ -223,8 +221,8 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] { * @param loss: evaluation metric. * @return a RDD with each element being a zip of the prediction and error * corresponding to each sample. - * @since 1.4.0 */ + @Since("1.4.0") def updatePredictionError( data: RDD[LabeledPoint], predictionAndError: RDD[(Double, Double)], @@ -248,8 +246,8 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] { * @param sc Spark context used for loading model files. * @param path Path specifying the directory to which the model was saved. * @return Model instance - * @since 1.3.0 */ + @Since("1.3.0") override def load(sc: SparkContext, path: String): GradientBoostedTreesModel = { val (loadedClassName, version, jsonMetadata) = Loader.loadMetadata(sc, path) val classNameV1_0 = SaveLoadV1_0.thisClassName http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala index f520b3a..bcaacc1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala @@ -24,7 +24,6 @@ package org.apache.spark.mllib * - information loss calculation with entropy and Gini for classification and * variance for regression, * - both continuous and categorical features. - * @since 1.0.0 */ package object tree { } http://git-wip-us.apache.org/repos/asf/spark/blob/e7db8761/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index 11ed231..4940974 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -21,7 +21,7 @@ import scala.reflect.ClassTag import breeze.linalg.{DenseVector => BDV, SparseVector => BSV} -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.rdd.PartitionwiseSampledRDD @@ -64,8 +64,8 @@ object MLUtils { * feature dimensions. * @param minPartitions min number of partitions * @return labeled data stored as an RDD[LabeledPoint] - * @since 1.0.0 */ + @Since("1.0.0") def loadLibSVMFile( sc: SparkContext, path: String, @@ -115,9 +115,7 @@ object MLUtils { // Convenient methods for `loadLibSVMFile`. - /** - * @since 1.0.0 - */ + @Since("1.0.0") @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0") def loadLibSVMFile( sc: SparkContext, @@ -130,17 +128,15 @@ object MLUtils { /** * Loads labeled data in the LIBSVM format into an RDD[LabeledPoint], with the default number of * partitions. - * @since 1.0.0 */ + @Since("1.0.0") def loadLibSVMFile( sc: SparkContext, path: String, numFeatures: Int): RDD[LabeledPoint] = loadLibSVMFile(sc, path, numFeatures, sc.defaultMinPartitions) - /** - * @since 1.0.0 - */ + @Since("1.0.0") @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0") def loadLibSVMFile( sc: SparkContext, @@ -149,9 +145,7 @@ object MLUtils { numFeatures: Int): RDD[LabeledPoint] = loadLibSVMFile(sc, path, numFeatures) - /** - * @since 1.0.0 - */ + @Since("1.0.0") @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0") def loadLibSVMFile( sc: SparkContext, @@ -162,8 +156,8 @@ object MLUtils { /** * Loads binary labeled data in the LIBSVM format into an RDD[LabeledPoint], with number of * features determined automatically and the default number of partitions. - * @since 1.0.0 */ + @Since("1.0.0") def loadLibSVMFile(sc: SparkContext, path: String): RDD[LabeledPoint] = loadLibSVMFile(sc, path, -1) @@ -193,15 +187,15 @@ object MLUtils { * @param path file or directory path in any Hadoop-supported file system URI * @param minPartitions min number of partitions * @return vectors stored as an RDD[Vector] - * @since 1.1.0 */ + @Since("1.1.0") def loadVectors(sc: SparkContext, path: String, minPartitions: Int): RDD[Vector] = sc.textFile(path, minPartitions).map(Vectors.parse) /** * Loads vectors saved using `RDD[Vector].saveAsTextFile` with the default number of partitions. - * @since 1.1.0 */ + @Since("1.1.0") def loadVectors(sc: SparkContext, path: String): RDD[Vector] = sc.textFile(path, sc.defaultMinPartitions).map(Vectors.parse) @@ -211,16 +205,16 @@ object MLUtils { * @param path file or directory path in any Hadoop-supported file system URI * @param minPartitions min number of partitions * @return labeled points stored as an RDD[LabeledPoint] - * @since 1.1.0 */ + @Since("1.1.0") def loadLabeledPoints(sc: SparkContext, path: String, minPartitions: Int): RDD[LabeledPoint] = sc.textFile(path, minPartitions).map(LabeledPoint.parse) /** * Loads labeled points saved using `RDD[LabeledPoint].saveAsTextFile` with the default number of * partitions. - * @since 1.1.0 */ + @Since("1.1.0") def loadLabeledPoints(sc: SparkContext, dir: String): RDD[LabeledPoint] = loadLabeledPoints(sc, dir, sc.defaultMinPartitions) @@ -236,8 +230,8 @@ object MLUtils { * * @deprecated Should use [[org.apache.spark.rdd.RDD#saveAsTextFile]] for saving and * [[org.apache.spark.mllib.util.MLUtils#loadLabeledPoints]] for loading. - * @since 1.0.0 */ + @Since("1.0.0") @deprecated("Should use MLUtils.loadLabeledPoints instead.", "1.0.1") def loadLabeledData(sc: SparkContext, dir: String): RDD[LabeledPoint] = { sc.textFile(dir).map { line => @@ -258,8 +252,8 @@ object MLUtils { * * @deprecated Should use [[org.apache.spark.rdd.RDD#saveAsTextFile]] for saving and * [[org.apache.spark.mllib.util.MLUtils#loadLabeledPoints]] for loading. - * @since 1.0.0 */ + @Since("1.0.0") @deprecated("Should use RDD[LabeledPoint].saveAsTextFile instead.", "1.0.1") def saveLabeledData(data: RDD[LabeledPoint], dir: String) { val dataStr = data.map(x => x.label + "," + x.features.toArray.mkString(" ")) @@ -271,8 +265,8 @@ object MLUtils { * Return a k element array of pairs of RDDs with the first element of each pair * containing the training data, a complement of the validation data and the second * element, the validation data, containing a unique 1/kth of the data. Where k=numFolds. - * @since 1.0.0 */ + @Since("1.0.0") @Experimental def kFold[T: ClassTag](rdd: RDD[T], numFolds: Int, seed: Int): Array[(RDD[T], RDD[T])] = { val numFoldsF = numFolds.toFloat @@ -287,8 +281,8 @@ object MLUtils { /** * Returns a new vector with `1.0` (bias) appended to the input vector. - * @since 1.0.0 */ + @Since("1.0.0") def appendBias(vector: Vector): Vector = { vector match { case dv: DenseVector => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org