spark git commit: [SPARK-5757][MLLIB] replace SQL JSON usage in model import/export by json4s

2015-02-12 Thread meng
Repository: spark
Updated Branches:
  refs/heads/master 466b1f671 -> 99bd50066


[SPARK-5757][MLLIB] replace SQL JSON usage in model import/export by json4s

This PR detaches MLlib model import/export code from SQL's JSON support, and 
hence unblocks #4544 . yhuai

Author: Xiangrui Meng 

Closes #4555 from mengxr/SPARK-5757 and squashes the following commits:

b0415e8 [Xiangrui Meng] replace SQL JSON usage by json4s


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/99bd5006
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/99bd5006
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/99bd5006

Branch: refs/heads/master
Commit: 99bd5006650bb15ec5465ffee1ebaca81354a3df
Parents: 466b1f6
Author: Xiangrui Meng 
Authored: Thu Feb 12 10:48:13 2015 -0800
Committer: Xiangrui Meng 
Committed: Thu Feb 12 10:48:13 2015 -0800

--
 .../classification/ClassificationModel.scala| 16 ++
 .../classification/LogisticRegression.scala |  3 +-
 .../spark/mllib/classification/NaiveBayes.scala | 18 +++
 .../apache/spark/mllib/classification/SVM.scala |  6 +--
 .../impl/GLMClassificationModel.scala   | 17 ---
 .../MatrixFactorizationModel.scala  | 14 --
 .../apache/spark/mllib/regression/Lasso.scala   |  2 +-
 .../mllib/regression/LinearRegression.scala |  2 +-
 .../mllib/regression/RegressionModel.scala  | 16 ++
 .../mllib/regression/RidgeRegression.scala  |  2 +-
 .../regression/impl/GLMRegressionModel.scala| 11 +++--
 .../apache/spark/mllib/tree/DecisionTree.scala  |  8 +--
 .../mllib/tree/model/DecisionTreeModel.scala| 28 +--
 .../mllib/tree/model/treeEnsembleModels.scala   | 51 
 .../apache/spark/mllib/util/modelSaveLoad.scala | 25 +++---
 15 files changed, 92 insertions(+), 127 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/99bd5006/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
index 348c1e8..35a0db7 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
@@ -17,12 +17,12 @@
 
 package org.apache.spark.mllib.classification
 
+import org.json4s.{DefaultFormats, JValue}
+
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.util.Loader
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Row}
 
 /**
  * :: Experimental ::
@@ -60,16 +60,10 @@ private[mllib] object ClassificationModel {
 
   /**
* Helper method for loading GLM classification model metadata.
-   *
-   * @param modelClass  String name for model class (used for error messages)
* @return (numFeatures, numClasses)
*/
-  def getNumFeaturesClasses(metadata: DataFrame, modelClass: String, path: 
String): (Int, Int) = {
-metadata.select("numFeatures", "numClasses").take(1)(0) match {
-  case Row(nFeatures: Int, nClasses: Int) => (nFeatures, nClasses)
-  case _ => throw new Exception(s"$modelClass unable to load" +
-s" numFeatures, numClasses from metadata: 
${Loader.metadataPath(path)}")
-}
+  def getNumFeaturesClasses(metadata: JValue): (Int, Int) = {
+implicit val formats = DefaultFormats
+((metadata \ "numFeatures").extract[Int], (metadata \ 
"numClasses").extract[Int])
   }
-
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/99bd5006/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 9a391bf..420d6e2 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -173,8 +173,7 @@ object LogisticRegressionModel extends 
Loader[LogisticRegressionModel] {
 val classNameV1_0 = 
"org.apache.spark.mllib.classification.LogisticRegressionModel"
 (loadedClassName, version) match {
   case (className, "1.0") if className == classNameV1_0 =>
-val (numFeatures, numClasses) =
-  ClassificationModel.getNumFeaturesClasses(metadata, classNameV1_0, 
path)
+val (numFeatures, numClasses) = 
Classifica

spark git commit: [SPARK-5757][MLLIB] replace SQL JSON usage in model import/export by json4s

2015-02-12 Thread meng
Repository: spark
Updated Branches:
  refs/heads/branch-1.3 e23c8f5c8 -> e26c14990


[SPARK-5757][MLLIB] replace SQL JSON usage in model import/export by json4s

This PR detaches MLlib model import/export code from SQL's JSON support, and 
hence unblocks #4544 . yhuai

Author: Xiangrui Meng 

Closes #4555 from mengxr/SPARK-5757 and squashes the following commits:

b0415e8 [Xiangrui Meng] replace SQL JSON usage by json4s

(cherry picked from commit 99bd5006650bb15ec5465ffee1ebaca81354a3df)
Signed-off-by: Xiangrui Meng 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e26c1499
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e26c1499
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e26c1499

Branch: refs/heads/branch-1.3
Commit: e26c14990c477249241b429c1bb877c3d9339744
Parents: e23c8f5
Author: Xiangrui Meng 
Authored: Thu Feb 12 10:48:13 2015 -0800
Committer: Xiangrui Meng 
Committed: Thu Feb 12 10:48:22 2015 -0800

--
 .../classification/ClassificationModel.scala| 16 ++
 .../classification/LogisticRegression.scala |  3 +-
 .../spark/mllib/classification/NaiveBayes.scala | 18 +++
 .../apache/spark/mllib/classification/SVM.scala |  6 +--
 .../impl/GLMClassificationModel.scala   | 17 ---
 .../MatrixFactorizationModel.scala  | 14 --
 .../apache/spark/mllib/regression/Lasso.scala   |  2 +-
 .../mllib/regression/LinearRegression.scala |  2 +-
 .../mllib/regression/RegressionModel.scala  | 16 ++
 .../mllib/regression/RidgeRegression.scala  |  2 +-
 .../regression/impl/GLMRegressionModel.scala| 11 +++--
 .../apache/spark/mllib/tree/DecisionTree.scala  |  8 +--
 .../mllib/tree/model/DecisionTreeModel.scala| 28 +--
 .../mllib/tree/model/treeEnsembleModels.scala   | 51 
 .../apache/spark/mllib/util/modelSaveLoad.scala | 25 +++---
 15 files changed, 92 insertions(+), 127 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e26c1499/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
index 348c1e8..35a0db7 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
@@ -17,12 +17,12 @@
 
 package org.apache.spark.mllib.classification
 
+import org.json4s.{DefaultFormats, JValue}
+
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.util.Loader
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Row}
 
 /**
  * :: Experimental ::
@@ -60,16 +60,10 @@ private[mllib] object ClassificationModel {
 
   /**
* Helper method for loading GLM classification model metadata.
-   *
-   * @param modelClass  String name for model class (used for error messages)
* @return (numFeatures, numClasses)
*/
-  def getNumFeaturesClasses(metadata: DataFrame, modelClass: String, path: 
String): (Int, Int) = {
-metadata.select("numFeatures", "numClasses").take(1)(0) match {
-  case Row(nFeatures: Int, nClasses: Int) => (nFeatures, nClasses)
-  case _ => throw new Exception(s"$modelClass unable to load" +
-s" numFeatures, numClasses from metadata: 
${Loader.metadataPath(path)}")
-}
+  def getNumFeaturesClasses(metadata: JValue): (Int, Int) = {
+implicit val formats = DefaultFormats
+((metadata \ "numFeatures").extract[Int], (metadata \ 
"numClasses").extract[Int])
   }
-
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/e26c1499/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 9a391bf..420d6e2 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -173,8 +173,7 @@ object LogisticRegressionModel extends 
Loader[LogisticRegressionModel] {
 val classNameV1_0 = 
"org.apache.spark.mllib.classification.LogisticRegressionModel"
 (loadedClassName, version) match {
   case (className, "1.0") if className == classNameV1_0 =>
-val (numFeatures, numClasses) =
-  ClassificationMode