http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala index b9e01dd..d8f33cd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala @@ -35,7 +35,7 @@ private[feature] trait QuantileDiscretizerBase extends Params /** * Number of buckets (quantiles, or categories) into which data points are grouped. Must - * be >= 2. + * be greater than or equal to 2. * * See also [[handleInvalid]], which can optionally create an additional bucket for NaN values. * @@ -52,7 +52,7 @@ private[feature] trait QuantileDiscretizerBase extends Params /** * Relative error (see documentation for - * [[org.apache.spark.sql.DataFrameStatFunctions.approxQuantile approxQuantile]] for description) + * `org.apache.spark.sql.DataFrameStatFunctions.approxQuantile` for description) * Must be in the range [0, 1]. * default: 0.001 * @group param @@ -99,7 +99,7 @@ private[feature] trait QuantileDiscretizerBase extends Params * but NaNs will be counted in a special bucket[4]. * * Algorithm: The bin ranges are chosen using an approximate algorithm (see the documentation for - * [[org.apache.spark.sql.DataFrameStatFunctions.approxQuantile approxQuantile]] + * `org.apache.spark.sql.DataFrameStatFunctions.approxQuantile` * for a detailed description). The precision of the approximation can be controlled with the * `relativeError` parameter. The lower and upper bin bounds will be `-Infinity` and `+Infinity`, * covering all real values.
http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala index b25fff9..65db06c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala @@ -32,9 +32,11 @@ import org.apache.spark.sql.types.StructType * the output, it can be any select clause that Spark SQL supports. Users can also * use Spark SQL built-in function and UDFs to operate on these selected columns. * For example, [[SQLTransformer]] supports statements like: - * - SELECT a, a + b AS a_b FROM __THIS__ - * - SELECT a, SQRT(b) AS b_sqrt FROM __THIS__ where a > 5 - * - SELECT a, b, SUM(c) AS c_sum FROM __THIS__ GROUP BY a, b + * {{{ + * SELECT a, a + b AS a_b FROM __THIS__ + * SELECT a, SQRT(b) AS b_sqrt FROM __THIS__ where a > 5 + * SELECT a, b, SUM(c) AS c_sum FROM __THIS__ GROUP BY a, b + * }}} */ @Since("1.6.0") class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index a558162..3fcd84c 100755 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -52,7 +52,7 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String /** * The words to be filtered out. * Default: English stop words - * @see [[StopWordsRemover.loadDefaultStopWords()]] + * @see `StopWordsRemover.loadDefaultStopWords()` * @group param */ @Since("1.5.0") http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 8b155f0..0a4d31d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -60,7 +60,7 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha * The indices are in [0, numLabels), ordered by label frequencies. * So the most frequent label gets index 0. * - * @see [[IndexToString]] for the inverse transformation + * @see `IndexToString` for the inverse transformation */ @Since("1.4.0") class StringIndexer @Since("1.4.0") ( @@ -116,7 +116,7 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] { * @param labels Ordered list of labels, corresponding to indices to be assigned. * * @note During transformation, if the input column does not exist, - * [[StringIndexerModel.transform]] would return the input dataset unmodified. + * `StringIndexerModel.transform` would return the input dataset unmodified. * This is a temporary fix for the case when target labels do not exist during prediction. */ @Since("1.4.0") @@ -247,12 +247,12 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] { } /** - * A [[Transformer]] that maps a column of indices back to a new column of corresponding + * A `Transformer` that maps a column of indices back to a new column of corresponding * string values. * The index-string mapping is either from the ML attributes of the input column, * or from user-supplied labels (which take precedence over ML attributes). * - * @see [[StringIndexer]] for converting strings into indices + * @see `StringIndexer` for converting strings into indices */ @Since("1.5.0") class IndexToString private[ml] (@Since("1.5.0") override val uid: String) http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 45d8fa9..cfaf6c0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -70,7 +70,7 @@ class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) def this() = this(Identifiable.randomUID("regexTok")) /** - * Minimum token length, >= 0. + * Minimum token length, greater than or equal to 0. * Default: 1, to avoid returning empty strings * @group param */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index d1a5c2e..d371da7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -41,8 +41,8 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu /** * Threshold for the number of values a categorical feature can take. - * If a feature is found to have > maxCategories values, then it is declared continuous. - * Must be >= 2. + * If a feature is found to have {@literal >} maxCategories values, then it is declared + * continuous. Must be greater than or equal to 2. * * (default = 20) * @group param @@ -59,7 +59,7 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu } /** - * Class for indexing categorical feature columns in a dataset of [[Vector]]. + * Class for indexing categorical feature columns in a dataset of `Vector`. * * This has 2 usage modes: * - Automatically identify categorical features (default behavior) @@ -76,7 +76,8 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu * - Warning: This can cause problems if features are continuous since this will collect ALL * unique values to the driver. * - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}. - * If maxCategories >= 3, then both features will be declared categorical. + * If maxCategories is greater than or equal to 3, then both features will be declared + * categorical. * * This returns a model which can transform categorical features to use 0-based indices. * http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index 966ccb8..e3e462d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -32,8 +32,8 @@ import org.apache.spark.sql.types.StructType * This class takes a feature vector and outputs a new feature vector with a subarray of the * original features. * - * The subset of features can be specified with either indices ([[setIndices()]]) - * or names ([[setNames()]]). At least one feature must be selected. Duplicate features + * The subset of features can be specified with either indices (`setIndices()`) + * or names (`setNames()`). At least one feature must be selected. Duplicate features * are not allowed, so there can be no overlap between selected indices and names. * * The output vector will order features with the selected indices first (in the order given), http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java b/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java index dcff424..ce7f335 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java @@ -61,12 +61,12 @@ * createStructField("id", IntegerType, false), * createStructField("text", StringType, false), * createStructField("rating", DoubleType, false))); - * JavaRDD<Row> rowRDD = jsc.parallelize( + * JavaRDD<Row> rowRDD = jsc.parallelize( * Arrays.asList( * RowFactory.create(0, "Hi I heard about Spark", 3.0), * RowFactory.create(1, "I wish Java could use case classes", 4.0), * RowFactory.create(2, "Logistic regression models are neat", 4.0))); - * Dataset<Row> dataset = jsql.createDataFrame(rowRDD, schema); + * Dataset<Row> dataset = jsql.createDataFrame(rowRDD, schema); * // define feature transformers * RegexTokenizer tok = new RegexTokenizer() * .setInputCol("text") http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/param/params.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index 5bd8ebe..9adb0fa 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -87,7 +87,7 @@ class Param[T](val parent: String, val name: String, val doc: String, val isVali def ->(value: T): ParamPair[T] = ParamPair(this, value) // scalastyle:on - /** Encodes a param value into JSON, which can be decoded by [[jsonDecode()]]. */ + /** Encodes a param value into JSON, which can be decoded by `jsonDecode()`. */ def jsonEncode(value: T): String = { value match { case x: String => @@ -140,7 +140,7 @@ private[ml] object Param { /** * :: DeveloperApi :: - * Factory methods for common validation functions for [[Param.isValid]]. + * Factory methods for common validation functions for `Param.isValid`. * The numerical methods only support Int, Long, Float, and Double. */ @DeveloperApi @@ -165,32 +165,39 @@ object ParamValidators { s" of unexpected input type: ${value.getClass}") } - /** Check if value > lowerBound */ + /** + * Check if value is greater than lowerBound + */ def gt[T](lowerBound: Double): T => Boolean = { (value: T) => getDouble(value) > lowerBound } - /** Check if value >= lowerBound */ + /** + * Check if value is greater than or equal to lowerBound + */ def gtEq[T](lowerBound: Double): T => Boolean = { (value: T) => getDouble(value) >= lowerBound } - /** Check if value < upperBound */ + /** + * Check if value is less than upperBound + */ def lt[T](upperBound: Double): T => Boolean = { (value: T) => getDouble(value) < upperBound } - /** Check if value <= upperBound */ + /** + * Check if value is less than or equal to upperBound + */ def ltEq[T](upperBound: Double): T => Boolean = { (value: T) => getDouble(value) <= upperBound } /** * Check for value in range lowerBound to upperBound. - * @param lowerInclusive If true, check for value >= lowerBound. - * If false, check for value > lowerBound. - * @param upperInclusive If true, check for value <= upperBound. - * If false, check for value < upperBound. + * + * @param lowerInclusive if true, range includes value = lowerBound + * @param upperInclusive if true, range includes value = upperBound */ def inRange[T]( lowerBound: Double, @@ -203,7 +210,7 @@ object ParamValidators { lowerValid && upperValid } - /** Version of [[inRange()]] which uses inclusive be default: [lowerBound, upperBound] */ + /** Version of `inRange()` which uses inclusive be default: [lowerBound, upperBound] */ def inRange[T](lowerBound: Double, upperBound: Double): T => Boolean = { inRange[T](lowerBound, upperBound, lowerInclusive = true, upperInclusive = true) } @@ -228,7 +235,7 @@ object ParamValidators { /** * :: DeveloperApi :: - * Specialized version of [[Param[Double]]] for Java. + * Specialized version of `Param[Double]` for Java. */ @DeveloperApi class DoubleParam(parent: String, name: String, doc: String, isValid: Double => Boolean) @@ -288,7 +295,7 @@ private[param] object DoubleParam { /** * :: DeveloperApi :: - * Specialized version of [[Param[Int]]] for Java. + * Specialized version of `Param[Int]` for Java. */ @DeveloperApi class IntParam(parent: String, name: String, doc: String, isValid: Int => Boolean) @@ -317,7 +324,7 @@ class IntParam(parent: String, name: String, doc: String, isValid: Int => Boolea /** * :: DeveloperApi :: - * Specialized version of [[Param[Float]]] for Java. + * Specialized version of `Param[Float]` for Java. */ @DeveloperApi class FloatParam(parent: String, name: String, doc: String, isValid: Float => Boolean) @@ -378,7 +385,7 @@ private object FloatParam { /** * :: DeveloperApi :: - * Specialized version of [[Param[Long]]] for Java. + * Specialized version of `Param[Long]` for Java. */ @DeveloperApi class LongParam(parent: String, name: String, doc: String, isValid: Long => Boolean) @@ -407,7 +414,7 @@ class LongParam(parent: String, name: String, doc: String, isValid: Long => Bool /** * :: DeveloperApi :: - * Specialized version of [[Param[Boolean]]] for Java. + * Specialized version of `Param[Boolean]` for Java. */ @DeveloperApi class BooleanParam(parent: String, name: String, doc: String) // No need for isValid @@ -430,7 +437,7 @@ class BooleanParam(parent: String, name: String, doc: String) // No need for isV /** * :: DeveloperApi :: - * Specialized version of [[Param[Array[String]]]] for Java. + * Specialized version of `Param[Array[String]]` for Java. */ @DeveloperApi class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array[String] => Boolean) @@ -455,7 +462,7 @@ class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array /** * :: DeveloperApi :: - * Specialized version of [[Param[Array[Double]]]] for Java. + * Specialized version of `Param[Array[Double]]` for Java. */ @DeveloperApi class DoubleArrayParam(parent: Params, name: String, doc: String, isValid: Array[Double] => Boolean) @@ -485,7 +492,7 @@ class DoubleArrayParam(parent: Params, name: String, doc: String, isValid: Array /** * :: DeveloperApi :: - * Specialized version of [[Param[Array[Int]]]] for Java. + * Specialized version of `Param[Array[Int]]` for Java. */ @DeveloperApi class IntArrayParam(parent: Params, name: String, doc: String, isValid: Array[Int] => Boolean) http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 6d2c59a..b466e2e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -355,8 +355,8 @@ object ALSModel extends MLReadable[ALSModel] { * * Essentially instead of finding the low-rank approximations to the rating matrix `R`, * this finds the approximations for a preference matrix `P` where the elements of `P` are 1 if - * r > 0 and 0 if r <= 0. The ratings then act as 'confidence' values related to strength of - * indicated user + * r is greater than 0 and 0 if r is less than or equal to 0. The ratings then act as 'confidence' + * values related to strength of indicated user * preferences rather than explicit ratings given to items. */ @Since("1.3.0") @@ -878,7 +878,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { } /** - * Builder for [[RatingBlock]]. [[mutable.ArrayBuilder]] is used to avoid boxing/unboxing. + * Builder for [[RatingBlock]]. `mutable.ArrayBuilder` is used to avoid boxing/unboxing. */ private[recommendation] class RatingBlockBuilder[@specialized(Int, Long) ID: ClassTag] extends Serializable { http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index d6ad1ea..af68e7b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -185,7 +185,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S setDefault(tol -> 1E-6) /** - * Suggested depth for treeAggregate (>= 2). + * Suggested depth for treeAggregate (greater than or equal to 2). * If the dimensions of features or the number of partitions are large, * this param could be adjusted to a larger size. * Default is 2. http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index 894b6a2..0b0c461 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -132,7 +132,8 @@ object DecisionTreeRegressor extends DefaultParamsReadable[DecisionTreeRegressor } /** - * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for regression. + * <a href="http://en.wikipedia.org/wiki/Decision_tree_learning"> + * Decision tree (Wikipedia)</a> model for regression. * It supports both continuous and categorical features. * @param rootNode Root of the decision tree */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 6d8159a..6e62c8d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -229,7 +229,7 @@ class GBTRegressionModel private[ml]( * (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.) * and follows the implementation from scikit-learn. * - * @see [[DecisionTreeRegressionModel.featureImportances]] + * @see `DecisionTreeRegressionModel.featureImportances` */ @Since("2.0.0") lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(trees, numFeatures) http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala index 1201ecd..e718cda 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala @@ -131,10 +131,10 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam * It supports "gaussian", "binomial", "poisson" and "gamma" as family. * Valid link functions for each family is listed below. The first link function of each family * is the default one. - * - "gaussian" -> "identity", "log", "inverse" - * - "binomial" -> "logit", "probit", "cloglog" - * - "poisson" -> "log", "identity", "sqrt" - * - "gamma" -> "inverse", "identity", "log" + * - "gaussian" : "identity", "log", "inverse" + * - "binomial" : "logit", "probit", "cloglog" + * - "poisson" : "log", "identity", "sqrt" + * - "gamma" : "inverse", "identity", "log" */ @Experimental @Since("2.0.0") @@ -1066,7 +1066,7 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] ( import GeneralizedLinearRegression._ /** - * Whether the underlying [[WeightedLeastSquares]] using the "normal" solver. + * Whether the underlying `WeightedLeastSquares` using the "normal" solver. */ private[ml] val isNormalSolver: Boolean = { diagInvAtWA.length != 1 || diagInvAtWA(0) != 0 @@ -1074,10 +1074,10 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] ( /** * Standard error of estimated coefficients and intercept. - * This value is only available when the underlying [[WeightedLeastSquares]] + * This value is only available when the underlying `WeightedLeastSquares` * using the "normal" solver. * - * If [[GeneralizedLinearRegression.fitIntercept]] is set to true, + * If `GeneralizedLinearRegression.fitIntercept` is set to true, * then the last element returned corresponds to the intercept. */ @Since("2.0.0") @@ -1092,10 +1092,10 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] ( /** * T-statistic of estimated coefficients and intercept. - * This value is only available when the underlying [[WeightedLeastSquares]] + * This value is only available when the underlying `WeightedLeastSquares` * using the "normal" solver. * - * If [[GeneralizedLinearRegression.fitIntercept]] is set to true, + * If `GeneralizedLinearRegression.fitIntercept` is set to true, * then the last element returned corresponds to the intercept. */ @Since("2.0.0") @@ -1115,10 +1115,10 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] ( /** * Two-sided p-value of estimated coefficients and intercept. - * This value is only available when the underlying [[WeightedLeastSquares]] + * This value is only available when the underlying `WeightedLeastSquares` * using the "normal" solver. * - * If [[GeneralizedLinearRegression.fitIntercept]] is set to true, + * If `GeneralizedLinearRegression.fitIntercept` is set to true, * then the last element returned corresponds to the intercept. */ @Since("2.0.0") http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index 4d274f3..c378a99 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -56,7 +56,7 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures final def getIsotonic: Boolean = $(isotonic) /** - * Param for the index of the feature if [[featuresCol]] is a vector column (default: `0`), no + * Param for the index of the feature if `featuresCol` is a vector column (default: `0`), no * effect otherwise. * @group param */ @@ -194,7 +194,7 @@ object IsotonicRegression extends DefaultParamsReadable[IsotonicRegression] { * Model fitted by IsotonicRegression. * Predicts using a piecewise linear function. * - * For detailed rules see [[org.apache.spark.mllib.regression.IsotonicRegressionModel.predict()]]. + * For detailed rules see `org.apache.spark.mllib.regression.IsotonicRegressionModel.predict()`. * * @param oldModel A [[org.apache.spark.mllib.regression.IsotonicRegressionModel]] * model trained by [[org.apache.spark.mllib.regression.IsotonicRegression]]. http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 19ddf36..534ef87 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -60,11 +60,11 @@ private[regression] trait LinearRegressionParams extends PredictorParams * The learning objective is to minimize the squared error, with regularization. * The specific squared error loss function used is: * - * <p><blockquote> + * <blockquote> * $$ * L = 1/2n ||A coefficients - y||^2^ * $$ - * </blockquote></p> + * </blockquote> * * This supports multiple types of regularization: * - none (a.k.a. ordinary least squares) @@ -118,8 +118,9 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String /** * Set the ElasticNet mixing parameter. - * For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty. - * For 0 < alpha < 1, the penalty is a combination of L1 and L2. + * For alpha = 0, the penalty is an L2 penalty. + * For alpha = 1, it is an L1 penalty. + * For alpha in (0,1), the penalty is a combination of L1 and L2. * Default is 0.0 which is an L2 penalty. * * @group setParam @@ -165,7 +166,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String * - "l-bfgs" denotes Limited-memory BFGS which is a limited-memory quasi-Newton * optimization method. * - "normal" denotes using Normal Equation as an analytical solution to the linear regression - * problem. This solver is limited to [[LinearRegression.MAX_FEATURES_FOR_NORMAL_SOLVER]]. + * problem. This solver is limited to `LinearRegression.MAX_FEATURES_FOR_NORMAL_SOLVER`. * - "auto" (default) means that the solver algorithm is selected automatically. * The Normal Equations solver will be used when possible, but this will automatically fall * back to iterative optimization methods when needed. @@ -181,7 +182,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String setDefault(solver -> "auto") /** - * Suggested depth for treeAggregate (>= 2). + * Suggested depth for treeAggregate (greater than or equal to 2). * If the dimensions of features or the number of partitions are large, * this param could be adjusted to a larger size. * Default is 2. @@ -338,12 +339,12 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String /* Note that in Linear Regression, the objective history (loss + regularization) returned from optimizer is computed in the scaled space given by the following formula. - <p><blockquote> + <blockquote> $$ L &= 1/2n||\sum_i w_i(x_i - \bar{x_i}) / \hat{x_i} - (y - \bar{y}) / \hat{y}||^2 + regTerms \\ $$ - </blockquote></p> + </blockquote> */ val arrayBuilder = mutable.ArrayBuilder.make[Double] var state: optimizer.State = null @@ -414,7 +415,7 @@ object LinearRegression extends DefaultParamsReadable[LinearRegression] { override def load(path: String): LinearRegression = super.load(path) /** - * When using [[LinearRegression.solver]] == "normal", the solver must limit the number of + * When using `LinearRegression.solver` == "normal", the solver must limit the number of * features to at most this number. The entire covariance matrix X^T^X will be collected * to the driver. This limit helps prevent memory overflow errors. */ @@ -584,7 +585,7 @@ class LinearRegressionTrainingSummary private[regression] ( * * This value is only available when using the "l-bfgs" solver. * - * @see [[LinearRegression.solver]] + * @see `LinearRegression.solver` */ @Since("1.5.0") val totalIterations = objectiveHistory.length @@ -624,7 +625,7 @@ class LinearRegressionSummary private[regression] ( * Reference: <a href="http://en.wikipedia.org/wiki/Explained_variation"> * Wikipedia explain variation</a> * - * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]]. + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. * This will change in later Spark versions. */ @Since("1.5.0") @@ -634,7 +635,7 @@ class LinearRegressionSummary private[regression] ( * Returns the mean absolute error, which is a risk function corresponding to the * expected value of the absolute error loss or l1-norm loss. * - * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]]. + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. * This will change in later Spark versions. */ @Since("1.5.0") @@ -644,7 +645,7 @@ class LinearRegressionSummary private[regression] ( * Returns the mean squared error, which is a risk function corresponding to the * expected value of the squared error loss or quadratic loss. * - * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]]. + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. * This will change in later Spark versions. */ @Since("1.5.0") @@ -654,7 +655,7 @@ class LinearRegressionSummary private[regression] ( * Returns the root mean squared error, which is defined as the square root of * the mean squared error. * - * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]]. + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. * This will change in later Spark versions. */ @Since("1.5.0") @@ -665,7 +666,7 @@ class LinearRegressionSummary private[regression] ( * Reference: <a href="http://en.wikipedia.org/wiki/Coefficient_of_determination"> * Wikipedia coefficient of determination</a> * - * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]]. + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. * This will change in later Spark versions. */ @Since("1.5.0") @@ -711,10 +712,10 @@ class LinearRegressionSummary private[regression] ( * Standard error of estimated coefficients and intercept. * This value is only available when using the "normal" solver. * - * If [[LinearRegression.fitIntercept]] is set to true, + * If `LinearRegression.fitIntercept` is set to true, * then the last element returned corresponds to the intercept. * - * @see [[LinearRegression.solver]] + * @see `LinearRegression.solver` */ lazy val coefficientStandardErrors: Array[Double] = { if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { @@ -739,10 +740,10 @@ class LinearRegressionSummary private[regression] ( * T-statistic of estimated coefficients and intercept. * This value is only available when using the "normal" solver. * - * If [[LinearRegression.fitIntercept]] is set to true, + * If `LinearRegression.fitIntercept` is set to true, * then the last element returned corresponds to the intercept. * - * @see [[LinearRegression.solver]] + * @see `LinearRegression.solver` */ lazy val tValues: Array[Double] = { if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { @@ -762,10 +763,10 @@ class LinearRegressionSummary private[regression] ( * Two-sided p-value of estimated coefficients and intercept. * This value is only available when using the "normal" solver. * - * If [[LinearRegression.fitIntercept]] is set to true, + * If `LinearRegression.fitIntercept` is set to true, * then the last element returned corresponds to the intercept. * - * @see [[LinearRegression.solver]] + * @see `LinearRegression.solver` */ lazy val pValues: Array[Double] = { if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala index 90d89c5..62dd729 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala @@ -200,7 +200,7 @@ class RandomForestRegressionModel private[ml] ( * (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.) * and follows the implementation from scikit-learn. * - * @see [[DecisionTreeRegressionModel.featureImportances]] + * @see `DecisionTreeRegressionModel.featureImportances` */ @Since("1.5.0") lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(trees, numFeatures) http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala index 442f52b..bc3c86a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala @@ -35,7 +35,7 @@ import org.apache.spark.rdd.RDD * @param numClasses For classification: labels can take values {0, ..., numClasses - 1}. * For regression: fixed at 0 (no meaning). * @param maxBins Maximum number of bins, for all features. - * @param featureArity Map: categorical feature index --> arity. + * @param featureArity Map: categorical feature index to arity. * I.e., the feature takes values in {0, ..., arity - 1}. * @param numBins Number of bins for each feature. */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala index f34a831..3e19f27 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala @@ -48,7 +48,7 @@ private[spark] object MetadataUtils { * If a feature does not have metadata, it is assumed to be continuous. * If a feature is Nominal, then it must have the number of values * specified. - * @return Map: feature index --> number of categories. + * @return Map: feature index to number of categories. * The map's set of keys will be the set of categorical feature indices. */ def getCategoricalFeatures(featuresSchema: StructField): Map[Int, Int] = { http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala index bbb9886..95f4804 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala @@ -76,7 +76,7 @@ private[util] sealed trait BaseReadWrite { */ protected final def sqlContext: SQLContext = sparkSession.sqlContext - /** Returns the underlying [[SparkContext]]. */ + /** Returns the underlying `SparkContext`. */ protected final def sc: SparkContext = sparkSession.sparkContext } @@ -169,7 +169,7 @@ trait MLWritable { * This only handles simple [[org.apache.spark.ml.param.Param]] types; e.g., it will not handle * [[org.apache.spark.sql.Dataset]]. * - * @see [[DefaultParamsReadable]], the counterpart to this trait + * @see `DefaultParamsReadable`, the counterpart to this trait */ @DeveloperApi trait DefaultParamsWritable extends MLWritable { self: Params => @@ -238,7 +238,7 @@ trait MLReadable[T] { * [[org.apache.spark.sql.Dataset]]. * * @tparam T ML instance type - * @see [[DefaultParamsWritable]], the counterpart to this trait + * @see `DefaultParamsWritable`, the counterpart to this trait */ @DeveloperApi trait DefaultParamsReadable[T] extends MLReadable[T] { @@ -345,7 +345,7 @@ private[ml] object DefaultParamsReader { /** * All info from metadata file. * - * @param params paramMap, as a [[JValue]] + * @param params paramMap, as a `JValue` * @param metadata All metadata, including the other fields * @param metadataJson Full metadata file String (for debugging) */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala index aec1526..5fb04ed 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala @@ -124,7 +124,7 @@ object SVMModel extends Loader[SVMModel] { /** * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent. By default L2 - * regularization is used, which can be changed via [[SVMWithSGD.optimizer]]. + * regularization is used, which can be changed via `SVMWithSGD.optimizer`. * * @note Labels used in SVM should be {0, 1}. */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala index 31f5141..336f2fc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala @@ -43,9 +43,9 @@ import org.apache.spark.storage.StorageLevel * @param k the desired number of leaf clusters (default: 4). The actual number could be smaller if * there are no divisible leaf clusters. * @param maxIterations the max number of k-means iterations to split clusters (default: 20) - * @param minDivisibleClusterSize the minimum number of points (if >= 1.0) or the minimum - * proportion of points (if < 1.0) of a divisible cluster - * (default: 1) + * @param minDivisibleClusterSize the minimum number of points (if greater than or equal 1.0) or + * the minimum proportion of points (if less than 1.0) of a divisible + * cluster (default: 1) * @param seed a random seed (default: hash value of the class name) * * @see <a href="http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf"> @@ -101,8 +101,8 @@ class BisectingKMeans private ( def getMaxIterations: Int = this.maxIterations /** - * Sets the minimum number of points (if >= `1.0`) or the minimum proportion of points - * (if < `1.0`) of a divisible cluster (default: 1). + * Sets the minimum number of points (if greater than or equal to `1.0`) or the minimum proportion + * of points (if less than `1.0`) of a divisible cluster (default: 1). */ @Since("1.6.0") def setMinDivisibleClusterSize(minDivisibleClusterSize: Double): this.type = { @@ -113,8 +113,8 @@ class BisectingKMeans private ( } /** - * Gets the minimum number of points (if >= `1.0`) or the minimum proportion of points - * (if < `1.0`) of a divisible cluster. + * Gets the minimum number of points (if greater than or equal to `1.0`) or the minimum proportion + * of points (if less than `1.0`) of a divisible cluster. */ @Since("1.6.0") def getMinDivisibleClusterSize: Double = minDivisibleClusterSize http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala index 6873d42..10bd846 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala @@ -274,7 +274,7 @@ class GaussianMixture private ( private[clustering] object GaussianMixture { /** * Heuristic to distribute the computation of the `MultivariateGaussian`s, approximately when - * d > 25 except for when k is very small. + * d is greater than 25 except for when k is very small. * @param k Number of topics * @param d Number of features */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala index 16742bd..4cb9200 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala @@ -120,11 +120,11 @@ class LDA private ( * - EM * - Currently only supports symmetric distributions, so all values in the vector should be * the same. - * - Values should be > 1.0 + * - Values should be greater than 1.0 * - default = uniformly (50 / k) + 1, where 50/k is common in LDA libraries and +1 follows * from Asuncion et al. (2009), who recommend a +1 adjustment for EM. * - Online - * - Values should be >= 0 + * - Values should be greater than or equal to 0 * - default = uniformly (1.0 / k), following the implementation from * <a href="https://github.com/Blei-Lab/onlineldavb">here</a>. */ @@ -195,11 +195,11 @@ class LDA private ( * * Optimizer-specific parameter settings: * - EM - * - Value should be > 1.0 + * - Value should be greater than 1.0 * - default = 0.1 + 1, where 0.1 gives a small amount of smoothing and +1 follows * Asuncion et al. (2009), who recommend a +1 adjustment for EM. * - Online - * - Value should be >= 0 + * - Value should be greater than or equal to 0 * - default = (1.0 / k), following the implementation from * <a href="https://github.com/Blei-Lab/onlineldavb">here</a>. */ @@ -216,7 +216,7 @@ class LDA private ( def getBeta: Double = getTopicConcentration /** - * Alias for [[setTopicConcentration()]] + * Alias for `setTopicConcentration()` */ @Since("1.3.0") def setBeta(beta: Double): this.type = setTopicConcentration(beta) @@ -261,11 +261,11 @@ class LDA private ( def getCheckpointInterval: Int = checkpointInterval /** - * Parameter for set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that - * the cache will get checkpointed every 10 iterations. Checkpointing helps with recovery - * (when nodes fail). It also helps with eliminating temporary shuffle files on disk, which can be - * important when LDA is run for many iterations. If the checkpoint directory is not set in - * [[org.apache.spark.SparkContext]], this setting is ignored. (default = 10) + * Parameter for set checkpoint interval (greater than or equal to 1) or disable checkpoint (-1). + * E.g. 10 means that the cache will get checkpointed every 10 iterations. Checkpointing helps + * with recovery (when nodes fail). It also helps with eliminating temporary shuffle files on + * disk, which can be important when LDA is run for many iterations. If the checkpoint directory + * is not set in [[org.apache.spark.SparkContext]], this setting is ignored. (default = 10) * * @see [[org.apache.spark.SparkContext#setCheckpointDir]] */ @@ -321,7 +321,7 @@ class LDA private ( * @param documents RDD of documents, which are term (word) count vectors paired with IDs. * The term count vectors are "bags of words" with a fixed-size vocabulary * (where the vocabulary size is the length of the vector). - * Document IDs must be unique and >= 0. + * Document IDs must be unique and greater than or equal to 0. * @return Inferred LDA model */ @Since("1.3.0") @@ -340,7 +340,7 @@ class LDA private ( } /** - * Java-friendly version of [[run()]] + * Java-friendly version of `run()` */ @Since("1.3.0") def run(documents: JavaPairRDD[java.lang.Long, Vector]): LDAModel = { http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala index 017fbc6..25ffd85 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala @@ -171,7 +171,7 @@ abstract class LDAModel private[clustering] extends Saveable { * The term count vectors are "bags of words" with a fixed-size vocabulary * (where the vocabulary size is the length of the vector). * This must use the same vocabulary (ordering of term counts) as in training. - * Document IDs must be unique and >= 0. + * Document IDs must be unique and greater than or equal to 0. * @return Estimated topic distribution for each document. * The returned RDD may be zipped with the given RDD, where each returned vector * is a multinomial distribution over topics. @@ -392,7 +392,7 @@ class LocalLDAModel private[spark] ( * literature). Returns a vector of zeros for an empty document. * * Note this means to allow quick query for single document. For batch documents, please refer - * to [[topicDistributions()]] to avoid overhead. + * to `topicDistributions()` to avoid overhead. * * @param document document to predict topic mixture distributions for * @return topic mixture distribution for the document http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala index 9687fc8..96b49bc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala @@ -350,9 +350,9 @@ final class OnlineLDAOptimizer extends LDAOptimizer { * Mini-batch fraction in (0, 1], which sets the fraction of document sampled and used in * each iteration. * - * @note This should be adjusted in synch with [[LDA.setMaxIterations()]] + * @note This should be adjusted in synch with `LDA.setMaxIterations()` * so the entire corpus is used. Specifically, set both so that - * maxIterations * miniBatchFraction >= 1. + * maxIterations * miniBatchFraction is at least 1. * * Default: 0.05, i.e., 5% of total documents. */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala index 92cd7f2..9b7cd04 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala @@ -78,7 +78,8 @@ class BinaryClassificationMetrics @Since("1.3.0") ( * Returns the receiver operating characteristic (ROC) curve, * which is an RDD of (false positive rate, true positive rate) * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it. - * @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic + * @see <a href="http://en.wikipedia.org/wiki/Receiver_operating_characteristic"> + * Receiver operating characteristic (Wikipedia)</a> */ @Since("1.0.0") def roc(): RDD[(Double, Double)] = { @@ -98,7 +99,8 @@ class BinaryClassificationMetrics @Since("1.3.0") ( /** * Returns the precision-recall curve, which is an RDD of (recall, precision), * NOT (precision, recall), with (0.0, 1.0) prepended to it. - * @see http://en.wikipedia.org/wiki/Precision_and_recall + * @see <a href="http://en.wikipedia.org/wiki/Precision_and_recall"> + * Precision and recall (Wikipedia)</a> */ @Since("1.0.0") def pr(): RDD[(Double, Double)] = { @@ -118,7 +120,7 @@ class BinaryClassificationMetrics @Since("1.3.0") ( * Returns the (threshold, F-Measure) curve. * @param beta the beta factor in F-Measure computation. * @return an RDD of (threshold, F-Measure) pairs. - * @see http://en.wikipedia.org/wiki/F1_score + * @see <a href="http://en.wikipedia.org/wiki/F1_score">F1 score (Wikipedia)</a> */ @Since("1.0.0") def fMeasureByThreshold(beta: Double): RDD[(Double, Double)] = createCurve(FMeasure(beta)) http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala index e29b51c..b98aa05 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala @@ -30,7 +30,7 @@ import org.apache.spark.rdd.RDD /** * Evaluator for ranking algorithms. * - * Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance. + * Java users should use `RankingMetrics$.of` to create a [[RankingMetrics]] instance. * * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pairs. */ @@ -41,9 +41,9 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])] /** * Compute the average precision of all the queries, truncated at ranking position k. * - * If for a query, the ranking algorithm returns n (n < k) results, the precision value will be - * computed as #(relevant items retrieved) / k. This formula also applies when the size of the - * ground truth set is less than k. + * If for a query, the ranking algorithm returns n (n is less than k) results, the precision + * value will be computed as #(relevant items retrieved) / k. This formula also applies when + * the size of the ground truth set is less than k. * * If a query has an empty ground truth set, zero will be used as precision together with * a log warning. http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala index be3319d..5a4c6ae 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala @@ -62,7 +62,7 @@ private[evaluation] object Recall extends BinaryClassificationMetricComputer { * F-Measure. Defined as 0 if both precision and recall are 0. EG in the case that all examples * are false positives. * @param beta the beta constant in F-Measure - * @see http://en.wikipedia.org/wiki/F1_score + * @see <a href="http://en.wikipedia.org/wiki/F1_score">F1 score (Wikipedia)</a> */ private[evaluation] case class FMeasure(beta: Double) extends BinaryClassificationMetricComputer { private val beta2 = beta * beta http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala index 3c26d26..dca0314 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala @@ -28,7 +28,7 @@ import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset import org.apache.spark.rdd.RDD /** - * Generates association rules from a [[RDD[FreqItemset[Item]]]. This method only generates + * Generates association rules from a `RDD[FreqItemset[Item]]`. This method only generates * association rules which have a single item as the consequent. * */ @@ -56,7 +56,7 @@ class AssociationRules private[fpm] ( /** * Computes the association rules with confidence above [[minConfidence]]. * @param freqItemsets frequent itemset model obtained from [[FPGrowth]] - * @return a [[Set[Rule[Item]]] containing the association rules. + * @return a `Set[Rule[Item]]` containing the association rules. * */ @Since("1.5.0") http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala index b533860..e3cf0d4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala @@ -44,7 +44,7 @@ import org.apache.spark.storage.StorageLevel /** * Model trained by [[FPGrowth]], which holds frequent itemsets. - * @param freqItemsets frequent itemset, which is an RDD of [[FreqItemset]] + * @param freqItemsets frequent itemset, which is an RDD of `FreqItemset` * @tparam Item item type */ @Since("1.3.0") @@ -69,7 +69,7 @@ class FPGrowthModel[Item: ClassTag] @Since("1.3.0") ( * - human-readable (JSON) model metadata to path/metadata/ * - Parquet formatted data to path/data/ * - * The model may be loaded using [[FPGrowthModel.load]]. + * The model may be loaded using `FPGrowthModel.load`. * * @param sc Spark context used to save model data. * @param path Path specifying the directory in which to save this model. @@ -309,7 +309,7 @@ object FPGrowth { /** * Frequent itemset. - * @param items items in this itemset. Java users should call [[FreqItemset#javaItems]] instead. + * @param items items in this itemset. Java users should call `FreqItemset.javaItems` instead. * @param freq frequency * @tparam Item item type * http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala index a564167..327cb97 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala @@ -211,7 +211,7 @@ class PrefixSpan private ( } /** - * A Java-friendly version of [[run()]] that reads sequences from a [[JavaRDD]] and returns + * A Java-friendly version of `run()` that reads sequences from a `JavaRDD` and returns * frequent sequences in a [[PrefixSpanModel]]. * @param data ordered sequences of itemsets stored as Java Iterable of Iterables * @tparam Item item type @@ -366,13 +366,13 @@ object PrefixSpan extends Logging { * Items are represented by positive integers, and items in each itemset must be distinct and * ordered. * we use 0 as the delimiter between itemsets. - * For example, a sequence `<(12)(31)1>` is represented by `[0, 1, 2, 0, 1, 3, 0, 1, 0]`. - * The postfix of this sequence w.r.t. to prefix `<1>` is `<(_2)(13)1>`. + * For example, a sequence `(12)(31)1` is represented by `[0, 1, 2, 0, 1, 3, 0, 1, 0]`. + * The postfix of this sequence w.r.t. to prefix `1` is `(_2)(13)1`. * We may reuse the original items array `[0, 1, 2, 0, 1, 3, 0, 1, 0]` to represent the postfix, * and mark the start index of the postfix, which is `2` in this example. * So the active items in this postfix are `[2, 0, 1, 3, 0, 1, 0]`. * We also remember the start indices of partial projections, the ones that split an itemset. - * For example, another possible partial projection w.r.t. `<1>` is `<(_3)1>`. + * For example, another possible partial projection w.r.t. `1` is `(_3)1`. * We remember the start indices of partial projections, which is `[2, 5]` in this example. * This data structure makes it easier to do projections. * @@ -583,7 +583,7 @@ class PrefixSpanModel[Item] @Since("1.5.0") ( * - human-readable (JSON) model metadata to path/metadata/ * - Parquet formatted data to path/data/ * - * The model may be loaded using [[PrefixSpanModel.load]]. + * The model may be loaded using `PrefixSpanModel.load`. * * @param sc Spark context used to save model data. * @param path Path specifying the directory in which to save this model. http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala index bb94745..7695aab 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala @@ -32,7 +32,7 @@ private[mllib] object EigenValueDecomposition { * * @param mul a function that multiplies the symmetric matrix with a DenseVector. * @param n dimension of the square matrix (maximum Int.MaxValue). - * @param k number of leading eigenvalues required, 0 < k < n. + * @param k number of leading eigenvalues required, where k must be positive and less than n. * @param tol tolerance of the eigs computation. * @param maxIterations the maximum number of Arnoldi update iterations. * @return a dense vector of eigenvalues in descending order and a dense matrix of eigenvectors http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index c94d789..63ea9d3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -77,7 +77,7 @@ sealed trait Vector extends Serializable { /** * Returns a hash code value for the vector. The hash code is based on its size and its first 128 - * nonzero entries, using a hash algorithm similar to [[java.util.Arrays.hashCode]]. + * nonzero entries, using a hash algorithm similar to `java.util.Arrays.hashCode`. */ override def hashCode(): Int = { // This is a reference implementation. It calls return in foreachActive, which is slow. @@ -351,7 +351,7 @@ object Vectors { } /** - * Parses a string resulted from [[Vector.toString]] into a [[Vector]]. + * Parses a string resulted from `Vector.toString` into a [[Vector]]. */ @Since("1.1.0") def parse(s: String): Vector = { http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala index 67da88e..8979707 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala @@ -88,11 +88,11 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va * convergenceTol is a condition which decides iteration termination. * The end of iteration is decided based on below logic. * - * - If the norm of the new solution vector is >1, the diff of solution vectors + * - If the norm of the new solution vector is greater than 1, the diff of solution vectors * is compared to relative tolerance which means normalizing by the norm of * the new solution vector. - * - If the norm of the new solution vector is <=1, the diff of solution vectors - * is compared to absolute tolerance which is not normalizing. + * - If the norm of the new solution vector is less than or equal to 1, the diff of solution + * vectors is compared to absolute tolerance which is not normalizing. * * Must be between 0.0 and 1.0 inclusively. */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala index 6232ff3..900eec1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala @@ -49,8 +49,7 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater) * Set the number of corrections used in the LBFGS update. Default 10. * Values of numCorrections less than 3 are not recommended; large values * of numCorrections will result in excessive computing time. - * 3 < numCorrections < 10 is recommended. - * Restriction: numCorrections > 0 + * numCorrections must be positive, and values from 4 to 9 are generally recommended. */ def setNumCorrections(corrections: Int): this.type = { require(corrections > 0, http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala index b7c9fcf..86632ae 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala @@ -53,8 +53,13 @@ private[spark] object NNLS { * projected gradient method. That is, find x minimising ||Ax - b||_2 given A^T A and A^T b. * * We solve the problem - * min_x 1/2 x^T ata x^T - x^T atb - * subject to x >= 0 + * + * <blockquote> + * $$ + * min_x 1/2 x^T ata x^T - x^T atb + * $$ + * </blockquote> + * where x is nonnegative. * * The method used is similar to one described by Polyak (B. T. Polyak, The conjugate gradient * method in extremal problems, Zh. Vychisl. Mat. Mat. Fiz. 9(4)(1969), pp. 94-112) for bound- http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala index aa7dd1a..142f0ec 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala @@ -95,9 +95,9 @@ class SimpleUpdater extends Updater { * The corresponding proximal operator for the L1 norm is the soft-thresholding * function. That is, each weight component is shrunk towards 0 by shrinkageVal. * - * If w > shrinkageVal, set weight component to w-shrinkageVal. - * If w < -shrinkageVal, set weight component to w+shrinkageVal. - * If -shrinkageVal < w < shrinkageVal, set weight component to 0. + * If w is greater than shrinkageVal, set weight component to w-shrinkageVal. + * If w is less than -shrinkageVal, set weight component to w+shrinkageVal. + * If w is (-shrinkageVal, shrinkageVal), set weight component to 0. * * Equivalently, set weight component to signum(w) * max(0.0, abs(w) - shrinkageVal) */ http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala index 6d60136..85d4d7f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala @@ -249,8 +249,8 @@ object RandomRDDs { * shape and scale. * * @param sc SparkContext used to create the RDD. - * @param shape shape parameter (> 0) for the gamma distribution - * @param scale scale parameter (> 0) for the gamma distribution + * @param shape shape parameter (greater than 0) for the gamma distribution + * @param scale scale parameter (greater than 0) for the gamma distribution * @param size Size of the RDD. * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`). * @param seed Random seed (default: a random long integer). @@ -766,8 +766,8 @@ object RandomRDDs { * gamma distribution with the input shape and scale. * * @param sc SparkContext used to create the RDD. - * @param shape shape parameter (> 0) for the gamma distribution. - * @param scale scale parameter (> 0) for the gamma distribution. + * @param shape shape parameter (greater than 0) for the gamma distribution. + * @param scale scale parameter (greater than 0) for the gamma distribution. * @param numRows Number of Vectors in the RDD. * @param numCols Number of elements in each Vector. * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`) http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala index adb5e51..365b2a0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala @@ -42,8 +42,8 @@ class SlidingRDDPartition[T](val idx: Int, val prev: Partition, val tail: Seq[T] * @param windowSize the window size, must be greater than 1 * @param step step size for windows * - * @see [[org.apache.spark.mllib.rdd.RDDFunctions.sliding(Int, Int)*]] - * @see [[scala.collection.IterableLike.sliding(Int, Int)*]] + * @see `org.apache.spark.mllib.rdd.RDDFunctions.sliding(Int, Int)*` + * @see `scala.collection.IterableLike.sliding(Int, Int)*` */ private[mllib] class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int, val step: Int) http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala index a8b5955..d17f704 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala @@ -31,7 +31,8 @@ import org.apache.spark.rdd.RDD * distribution of the sample data and the theoretical distribution we can provide a test for the * the null hypothesis that the sample data comes from that theoretical distribution. * For more information on KS Test: - * @see [[https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test]] + * @see <a href="https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test"> + * Kolmogorov-Smirnov test (Wikipedia)</a> * * Implementation note: We seek to implement the KS test with a minimal number of distributed * passes. We sort the RDD, and then perform the following operations on a per-partition basis: http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala index 97c032d..d680237 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala @@ -47,7 +47,7 @@ case class BinarySample @Since("1.6.0") ( * of the observation. * * To address novelty affects, the `peacePeriod` specifies a set number of initial - * [[org.apache.spark.rdd.RDD]] batches of the [[DStream]] to be dropped from significance testing. + * [[org.apache.spark.rdd.RDD]] batches of the `DStream` to be dropped from significance testing. * * The `windowSize` sets the number of batches each significance test is to be performed over. The * window is sliding with a stride length of 1 batch. Setting windowSize to 0 will perform @@ -97,7 +97,7 @@ class StreamingTest @Since("1.6.0") () extends Logging with Serializable { } /** - * Register a [[DStream]] of values for significance testing. + * Register a `DStream` of values for significance testing. * * @param data stream of BinarySample(key,value) pairs where the key denotes group membership * (true = experiment, false = control) and the value is the numerical metric to @@ -114,7 +114,7 @@ class StreamingTest @Since("1.6.0") () extends Logging with Serializable { } /** - * Register a [[JavaDStream]] of values for significance testing. + * Register a `JavaDStream` of values for significance testing. * * @param data stream of BinarySample(isExperiment,value) pairs where the isExperiment denotes * group (true = experiment, false = control) and the value is the numerical metric http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala index ff27f28..14ac14d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala @@ -73,7 +73,7 @@ private[stat] sealed trait StreamingTestMethod extends Serializable { * This test does not assume equal variance between the two samples and does not assume equal * sample size. * - * @see http://en.wikipedia.org/wiki/Welch%27s_t_test + * @see <a href="http://en.wikipedia.org/wiki/Welch%27s_t_test">Welch's t-test (Wikipedia)</a> */ private[stat] object WelchTTest extends StreamingTestMethod with Logging { @@ -115,7 +115,7 @@ private[stat] object WelchTTest extends StreamingTestMethod with Logging { * mean. This test assumes equal variance between the two samples and does not assume equal sample * size. For unequal variances, Welch's t-test should be used instead. * - * @see http://en.wikipedia.org/wiki/Student%27s_t-test + * @see <a href="http://en.wikipedia.org/wiki/Student%27s_t-test">Student's t-test (Wikipedia)</a> */ private[stat] object StudentTTest extends StreamingTestMethod with Logging { http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala index d846c43..499c807 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala @@ -162,7 +162,7 @@ object DecisionTree extends Serializable with Logging { * @param numClasses Number of classes for classification. Default value of 2. * @param maxBins Maximum number of bins used for splitting features. * @param quantileCalculationStrategy Algorithm for calculating quantiles. - * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n -> k) + * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n to k) * indicates that feature n is categorical with k categories * indexed from 0: {0, 1, ..., k-1}. * @return DecisionTreeModel that can be used for prediction. @@ -192,7 +192,7 @@ object DecisionTree extends Serializable with Logging { * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]. * Labels should take values {0, 1, ..., numClasses-1}. * @param numClasses Number of classes for classification. - * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n -> k) + * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n to k) * indicates that feature n is categorical with k categories * indexed from 0: {0, 1, ..., k-1}. * @param impurity Criterion used for information gain calculation. @@ -238,7 +238,7 @@ object DecisionTree extends Serializable with Logging { * * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]. * Labels are real numbers. - * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n -> k) + * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n to k) * indicates that feature n is categorical with k categories * indexed from 0: {0, 1, ..., k-1}. * @param impurity Criterion used for information gain calculation. http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala index cdeef16..3e85678 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala @@ -74,7 +74,7 @@ class GradientBoostedTrees private[spark] ( } /** - * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#run]]. + * Java-friendly API for `org.apache.spark.mllib.tree.GradientBoostedTrees.run`. */ @Since("1.2.0") def run(input: JavaRDD[LabeledPoint]): GradientBoostedTreesModel = { @@ -89,7 +89,7 @@ class GradientBoostedTrees private[spark] ( * This dataset should be different from the training dataset, * but it should follow the same distribution. * E.g., these two datasets could be created from an original dataset - * by using [[org.apache.spark.rdd.RDD.randomSplit()]] + * by using `org.apache.spark.rdd.RDD.randomSplit()` * @return GradientBoostedTreesModel that can be used for prediction. */ @Since("1.4.0") @@ -106,7 +106,7 @@ class GradientBoostedTrees private[spark] ( } /** - * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#runWithValidation]]. + * Java-friendly API for `org.apache.spark.mllib.tree.GradientBoostedTrees.runWithValidation`. */ @Since("1.4.0") def runWithValidation( http://git-wip-us.apache.org/repos/asf/spark/blob/f830bb91/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala index 428af21..1f6cb08 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala @@ -53,14 +53,15 @@ import org.apache.spark.util.Utils * the type of random forest (classification or regression), feature type * (continuous, categorical), depth of the tree, quantile calculation strategy, * etc. - * @param numTrees If 1, then no bootstrapping is used. If > 1, then bootstrapping is done. + * @param numTrees If 1, then no bootstrapping is used. If greater than 1, then bootstrapping is + * done. * @param featureSubsetStrategy Number of features to consider for splits at each node. * Supported values: "auto", "all", "sqrt", "log2", "onethird". * Supported numerical values: "(0.0-1.0]", "[1-n]". * If "auto" is set, this parameter is set based on numTrees: * if numTrees == 1, set to "all"; - * if numTrees > 1 (forest) set to "sqrt" for classification and - * to "onethird" for regression. + * if numTrees is greater than 1 (forest) set to "sqrt" for + * classification and to "onethird" for regression. * If a real value "n" in the range (0, 1.0] is set, * use n * number of features. * If an integer value "n" in the range (1, num features) is set, @@ -111,7 +112,7 @@ object RandomForest extends Serializable with Logging { * Supported values: "auto", "all", "sqrt", "log2", "onethird". * If "auto" is set, this parameter is set based on numTrees: * if numTrees == 1, set to "all"; - * if numTrees > 1 (forest) set to "sqrt". + * if numTrees is greater than 1 (forest) set to "sqrt". * @param seed Random seed for bootstrapping and choosing feature subsets. * @return RandomForestModel that can be used for prediction. */ @@ -134,7 +135,7 @@ object RandomForest extends Serializable with Logging { * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]. * Labels should take values {0, 1, ..., numClasses-1}. * @param numClasses Number of classes for classification. - * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n -> k) + * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n to k) * indicates that feature n is categorical with k categories * indexed from 0: {0, 1, ..., k-1}. * @param numTrees Number of trees in the random forest. @@ -142,7 +143,7 @@ object RandomForest extends Serializable with Logging { * Supported values: "auto", "all", "sqrt", "log2", "onethird". * If "auto" is set, this parameter is set based on numTrees: * if numTrees == 1, set to "all"; - * if numTrees > 1 (forest) set to "sqrt". + * if numTrees is greater than 1 (forest) set to "sqrt". * @param impurity Criterion used for information gain calculation. * Supported values: "gini" (recommended) or "entropy". * @param maxDepth Maximum depth of the tree (e.g. depth 0 means 1 leaf node, depth 1 means @@ -200,7 +201,7 @@ object RandomForest extends Serializable with Logging { * Supported values: "auto", "all", "sqrt", "log2", "onethird". * If "auto" is set, this parameter is set based on numTrees: * if numTrees == 1, set to "all"; - * if numTrees > 1 (forest) set to "onethird". + * if numTrees is greater than 1 (forest) set to "onethird". * @param seed Random seed for bootstrapping and choosing feature subsets. * @return RandomForestModel that can be used for prediction. */ @@ -222,7 +223,7 @@ object RandomForest extends Serializable with Logging { * * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]. * Labels are real numbers. - * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n -> k) + * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n to k) * indicates that feature n is categorical with k categories * indexed from 0: {0, 1, ..., k-1}. * @param numTrees Number of trees in the random forest. @@ -230,7 +231,7 @@ object RandomForest extends Serializable with Logging { * Supported values: "auto", "all", "sqrt", "log2", "onethird". * If "auto" is set, this parameter is set based on numTrees: * if numTrees == 1, set to "all"; - * if numTrees > 1 (forest) set to "onethird". + * if numTrees is greater than 1 (forest) set to "onethird". * @param impurity Criterion used for information gain calculation. * The only supported value for regression is "variance". * @param maxDepth Maximum depth of the tree. (e.g., depth 0 means 1 leaf node, depth 1 means --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org