spark git commit: [SPARK-10239] [SPARK-10244] [MLLIB] update since versions in mllib.pmml and mllib.util

dbtsai Tue, 25 Aug 2015 14:12:34 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 055387c08 -> 6f05b7aeb



[SPARK-10239] [SPARK-10244] [MLLIB] update since versions in mllib.pmml and 
mllib.util

Same as #8421 but for `mllib.pmml` and `mllib.util`.

cc dbtsai

Author: Xiangrui Meng <m...@databricks.com>

Closes #8430 from mengxr/SPARK-10239 and squashes the following commits:

a189acf [Xiangrui Meng] update since versions in mllib.pmml and mllib.util

(cherry picked from commit 00ae4be97f7b205432db2967ba6d506286ef2ca6)
Signed-off-by: DB Tsai <d...@netflix.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6f05b7ae
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6f05b7ae
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6f05b7ae

Branch: refs/heads/branch-1.5
Commit: 6f05b7aebd66a00e2556a29b35084e81ac526406
Parents: 055387c
Author: Xiangrui Meng <m...@databricks.com>
Authored: Tue Aug 25 14:11:38 2015 -0700
Committer: DB Tsai <d...@netflix.com>
Committed: Tue Aug 25 14:11:50 2015 -0700

----------------------------------------------------------------------
 .../org/apache/spark/mllib/pmml/PMMLExportable.scala      |  7 ++++++-
 .../org/apache/spark/mllib/util/DataValidators.scala      |  7 +++++--
 .../org/apache/spark/mllib/util/KMeansDataGenerator.scala |  5 ++++-
 .../org/apache/spark/mllib/util/LinearDataGenerator.scala | 10 ++++++++--
 .../mllib/util/LogisticRegressionDataGenerator.scala      |  5 ++++-
 .../org/apache/spark/mllib/util/MFDataGenerator.scala     |  4 +++-
 .../main/scala/org/apache/spark/mllib/util/MLUtils.scala  |  2 ++
 .../org/apache/spark/mllib/util/SVMDataGenerator.scala    |  6 ++++--
 .../scala/org/apache/spark/mllib/util/modelSaveLoad.scala |  6 +++++-
 9 files changed, 41 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/6f05b7ae/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
index 5e882d4..274ac7c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
@@ -23,7 +23,7 @@ import javax.xml.transform.stream.StreamResult
 import org.jpmml.model.JAXBUtil
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory
 
 /**
@@ -33,6 +33,7 @@ import 
org.apache.spark.mllib.pmml.export.PMMLModelExportFactory
  * developed by the Data Mining Group (www.dmg.org).
  */
 @DeveloperApi
+@Since("1.4.0")
 trait PMMLExportable {
 
   /**
@@ -48,6 +49,7 @@ trait PMMLExportable {
    * Export the model to a local file in PMML format
    */
   @Experimental
+  @Since("1.4.0")
   def toPMML(localPath: String): Unit = {
     toPMML(new StreamResult(new File(localPath)))
   }
@@ -57,6 +59,7 @@ trait PMMLExportable {
    * Export the model to a directory on a distributed file system in PMML 
format
    */
   @Experimental
+  @Since("1.4.0")
   def toPMML(sc: SparkContext, path: String): Unit = {
     val pmml = toPMML()
     sc.parallelize(Array(pmml), 1).saveAsTextFile(path)
@@ -67,6 +70,7 @@ trait PMMLExportable {
    * Export the model to the OutputStream in PMML format
    */
   @Experimental
+  @Since("1.4.0")
   def toPMML(outputStream: OutputStream): Unit = {
     toPMML(new StreamResult(outputStream))
   }
@@ -76,6 +80,7 @@ trait PMMLExportable {
    * Export the model to a String in PMML format
    */
   @Experimental
+  @Since("1.4.0")
   def toPMML(): String = {
     val writer = new StringWriter
     toPMML(new StreamResult(writer))

http://git-wip-us.apache.org/repos/asf/spark/blob/6f05b7ae/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
index be335a1..dffe6e7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
@@ -17,16 +17,17 @@
 
 package org.apache.spark.mllib.util
 
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
-import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
 
 /**
  * :: DeveloperApi ::
  * A collection of methods used to validate data before applying ML algorithms.
  */
 @DeveloperApi
+@Since("0.8.0")
 object DataValidators extends Logging {
 
   /**
@@ -34,6 +35,7 @@ object DataValidators extends Logging {
    *
    * @return True if labels are all zero or one, false otherwise.
    */
+  @Since("1.0.0")
   val binaryLabelValidator: RDD[LabeledPoint] => Boolean = { data =>
     val numInvalid = data.filter(x => x.label != 1.0 && x.label != 0.0).count()
     if (numInvalid != 0) {
@@ -48,6 +50,7 @@ object DataValidators extends Logging {
    *
    * @return True if labels are all in the range of {0, 1, ..., k-1}, false 
otherwise.
    */
+  @Since("1.3.0")
   def multiLabelValidator(k: Int): RDD[LabeledPoint] => Boolean = { data =>
     val numInvalid = data.filter(x =>
       x.label - x.label.toInt != 0.0 || x.label < 0 || x.label > k - 1).count()

http://git-wip-us.apache.org/repos/asf/spark/blob/6f05b7ae/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
index e6bcff4..00fd160 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
@@ -19,8 +19,8 @@ package org.apache.spark.mllib.util
 
 import scala.util.Random
 
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.rdd.RDD
 
 /**
@@ -30,6 +30,7 @@ import org.apache.spark.rdd.RDD
  * cluster with scale 1 around each center.
  */
 @DeveloperApi
+@Since("0.8.0")
 object KMeansDataGenerator {
 
   /**
@@ -42,6 +43,7 @@ object KMeansDataGenerator {
    * @param r Scaling factor for the distribution of the initial centers
    * @param numPartitions Number of partitions of the generated RDD; default 2
    */
+  @Since("0.8.0")
   def generateKMeansRDD(
       sc: SparkContext,
       numPoints: Int,
@@ -62,6 +64,7 @@ object KMeansDataGenerator {
     }
   }
 
+  @Since("0.8.0")
   def main(args: Array[String]) {
     if (args.length < 6) {
       // scalastyle:off println

http://git-wip-us.apache.org/repos/asf/spark/blob/6f05b7ae/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index 87eeb5d..8b0a02e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -22,11 +22,11 @@ import scala.util.Random
 
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
 
 /**
  * :: DeveloperApi ::
@@ -35,6 +35,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
  * response variable `Y`.
  */
 @DeveloperApi
+@Since("0.8.0")
 object LinearDataGenerator {
 
   /**
@@ -46,6 +47,7 @@ object LinearDataGenerator {
    * @param seed Random seed
    * @return Java List of input.
    */
+  @Since("0.8.0")
   def generateLinearInputAsList(
       intercept: Double,
       weights: Array[Double],
@@ -68,6 +70,7 @@ object LinearDataGenerator {
    * @param eps Epsilon scaling factor.
    * @return Seq of input.
    */
+  @Since("0.8.0")
   def generateLinearInput(
       intercept: Double,
       weights: Array[Double],
@@ -92,6 +95,7 @@ object LinearDataGenerator {
    * @param eps Epsilon scaling factor.
    * @return Seq of input.
    */
+  @Since("0.8.0")
   def generateLinearInput(
       intercept: Double,
       weights: Array[Double],
@@ -132,6 +136,7 @@ object LinearDataGenerator {
    *
    * @return RDD of LabeledPoint containing sample data.
    */
+  @Since("0.8.0")
   def generateLinearRDD(
       sc: SparkContext,
       nexamples: Int,
@@ -151,6 +156,7 @@ object LinearDataGenerator {
     data
   }
 
+  @Since("0.8.0")
   def main(args: Array[String]) {
     if (args.length < 2) {
       // scalastyle:off println

http://git-wip-us.apache.org/repos/asf/spark/blob/6f05b7ae/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
index c09cbe6..33477ee 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.util
 
 import scala.util.Random
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{Since, DeveloperApi}
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -31,6 +31,7 @@ import org.apache.spark.mllib.linalg.Vectors
  * with probability `probOne` and scales features for positive examples by 
`eps`.
  */
 @DeveloperApi
+@Since("0.8.0")
 object LogisticRegressionDataGenerator {
 
   /**
@@ -43,6 +44,7 @@ object LogisticRegressionDataGenerator {
    * @param nparts Number of partitions of the generated RDD. Default value is 
2.
    * @param probOne Probability that a label is 1 (and not 0). Default value 
is 0.5.
    */
+  @Since("0.8.0")
   def generateLogisticRDD(
     sc: SparkContext,
     nexamples: Int,
@@ -62,6 +64,7 @@ object LogisticRegressionDataGenerator {
     data
   }
 
+  @Since("0.8.0")
   def main(args: Array[String]) {
     if (args.length != 5) {
       // scalastyle:off println

http://git-wip-us.apache.org/repos/asf/spark/blob/6f05b7ae/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index 16f4305..906bd30 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -23,7 +23,7 @@ import scala.language.postfixOps
 import scala.util.Random
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{Since, DeveloperApi}
 import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix}
 import org.apache.spark.rdd.RDD
 
@@ -52,7 +52,9 @@ import org.apache.spark.rdd.RDD
  *   testSampFact   (Double) Percentage of training data to use as test data.
  */
 @DeveloperApi
+@Since("0.8.0")
 object MFDataGenerator {
+  @Since("0.8.0")
   def main(args: Array[String]) {
     if (args.length < 2) {
       // scalastyle:off println

http://git-wip-us.apache.org/repos/asf/spark/blob/6f05b7ae/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index 4940974..81c2f0c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -36,6 +36,7 @@ import org.apache.spark.streaming.dstream.DStream
 /**
  * Helper methods to load, save and pre-process data used in ML Lib.
  */
+@Since("0.8.0")
 object MLUtils {
 
   private[mllib] lazy val EPSILON = {
@@ -168,6 +169,7 @@ object MLUtils {
    *
    * @see [[org.apache.spark.mllib.util.MLUtils#loadLibSVMFile]]
    */
+  @Since("1.0.0")
   def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: String) {
     // TODO: allow to specify label precision and feature precision.
     val dataStr = data.map { case LabeledPoint(label, features) =>

http://git-wip-us.apache.org/repos/asf/spark/blob/6f05b7ae/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
index ad20b76..cde5979 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -21,11 +21,11 @@ import scala.util.Random
 
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
 
 /**
  * :: DeveloperApi ::
@@ -33,8 +33,10 @@ import org.apache.spark.mllib.regression.LabeledPoint
  * for the features and adds Gaussian noise with weight 0.1 to generate labels.
  */
 @DeveloperApi
+@Since("0.8.0")
 object SVMDataGenerator {
 
+  @Since("0.8.0")
   def main(args: Array[String]) {
     if (args.length < 2) {
       // scalastyle:off println

http://git-wip-us.apache.org/repos/asf/spark/blob/6f05b7ae/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
index 30d642c..4d71d53 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
@@ -24,7 +24,7 @@ import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.types.{DataType, StructField, StructType}
 
@@ -35,6 +35,7 @@ import org.apache.spark.sql.types.{DataType, StructField, 
StructType}
  * This should be inherited by the class which implements model instances.
  */
 @DeveloperApi
+@Since("1.3.0")
 trait Saveable {
 
   /**
@@ -50,6 +51,7 @@ trait Saveable {
    * @param path  Path specifying the directory in which to save this model.
    *              If the directory already exists, this method throws an 
exception.
    */
+  @Since("1.3.0")
   def save(sc: SparkContext, path: String): Unit
 
   /** Current version of model save/load format. */
@@ -64,6 +66,7 @@ trait Saveable {
  * This should be inherited by an object paired with the model class.
  */
 @DeveloperApi
+@Since("1.3.0")
 trait Loader[M <: Saveable] {
 
   /**
@@ -75,6 +78,7 @@ trait Loader[M <: Saveable] {
    * @param path  Path specifying the directory to which the model was saved.
    * @return  Model instance
    */
+  @Since("1.3.0")
   def load(sc: SparkContext, path: String): M
 
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10239] [SPARK-10244] [MLLIB] update since versions in mllib.pmml and mllib.util

Reply via email to