spark git commit: [SPARK-5757][MLLIB] replace SQL JSON usage in model import/export by json4s
Repository: spark Updated Branches: refs/heads/master 466b1f671 -> 99bd50066 [SPARK-5757][MLLIB] replace SQL JSON usage in model import/export by json4s This PR detaches MLlib model import/export code from SQL's JSON support, and hence unblocks #4544 . yhuai Author: Xiangrui Meng Closes #4555 from mengxr/SPARK-5757 and squashes the following commits: b0415e8 [Xiangrui Meng] replace SQL JSON usage by json4s Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/99bd5006 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/99bd5006 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/99bd5006 Branch: refs/heads/master Commit: 99bd5006650bb15ec5465ffee1ebaca81354a3df Parents: 466b1f6 Author: Xiangrui Meng Authored: Thu Feb 12 10:48:13 2015 -0800 Committer: Xiangrui Meng Committed: Thu Feb 12 10:48:13 2015 -0800 -- .../classification/ClassificationModel.scala| 16 ++ .../classification/LogisticRegression.scala | 3 +- .../spark/mllib/classification/NaiveBayes.scala | 18 +++ .../apache/spark/mllib/classification/SVM.scala | 6 +-- .../impl/GLMClassificationModel.scala | 17 --- .../MatrixFactorizationModel.scala | 14 -- .../apache/spark/mllib/regression/Lasso.scala | 2 +- .../mllib/regression/LinearRegression.scala | 2 +- .../mllib/regression/RegressionModel.scala | 16 ++ .../mllib/regression/RidgeRegression.scala | 2 +- .../regression/impl/GLMRegressionModel.scala| 11 +++-- .../apache/spark/mllib/tree/DecisionTree.scala | 8 +-- .../mllib/tree/model/DecisionTreeModel.scala| 28 +-- .../mllib/tree/model/treeEnsembleModels.scala | 51 .../apache/spark/mllib/util/modelSaveLoad.scala | 25 +++--- 15 files changed, 92 insertions(+), 127 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/99bd5006/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala index 348c1e8..35a0db7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala @@ -17,12 +17,12 @@ package org.apache.spark.mllib.classification +import org.json4s.{DefaultFormats, JValue} + import org.apache.spark.annotation.Experimental import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector -import org.apache.spark.mllib.util.Loader import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{DataFrame, Row} /** * :: Experimental :: @@ -60,16 +60,10 @@ private[mllib] object ClassificationModel { /** * Helper method for loading GLM classification model metadata. - * - * @param modelClass String name for model class (used for error messages) * @return (numFeatures, numClasses) */ - def getNumFeaturesClasses(metadata: DataFrame, modelClass: String, path: String): (Int, Int) = { -metadata.select("numFeatures", "numClasses").take(1)(0) match { - case Row(nFeatures: Int, nClasses: Int) => (nFeatures, nClasses) - case _ => throw new Exception(s"$modelClass unable to load" + -s" numFeatures, numClasses from metadata: ${Loader.metadataPath(path)}") -} + def getNumFeaturesClasses(metadata: JValue): (Int, Int) = { +implicit val formats = DefaultFormats +((metadata \ "numFeatures").extract[Int], (metadata \ "numClasses").extract[Int]) } - } http://git-wip-us.apache.org/repos/asf/spark/blob/99bd5006/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index 9a391bf..420d6e2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -173,8 +173,7 @@ object LogisticRegressionModel extends Loader[LogisticRegressionModel] { val classNameV1_0 = "org.apache.spark.mllib.classification.LogisticRegressionModel" (loadedClassName, version) match { case (className, "1.0") if className == classNameV1_0 => -val (numFeatures, numClasses) = - ClassificationModel.getNumFeaturesClasses(metadata, classNameV1_0, path) +val (numFeatures, numClasses) = Classifica
spark git commit: [SPARK-5757][MLLIB] replace SQL JSON usage in model import/export by json4s
Repository: spark Updated Branches: refs/heads/branch-1.3 e23c8f5c8 -> e26c14990 [SPARK-5757][MLLIB] replace SQL JSON usage in model import/export by json4s This PR detaches MLlib model import/export code from SQL's JSON support, and hence unblocks #4544 . yhuai Author: Xiangrui Meng Closes #4555 from mengxr/SPARK-5757 and squashes the following commits: b0415e8 [Xiangrui Meng] replace SQL JSON usage by json4s (cherry picked from commit 99bd5006650bb15ec5465ffee1ebaca81354a3df) Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e26c1499 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e26c1499 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e26c1499 Branch: refs/heads/branch-1.3 Commit: e26c14990c477249241b429c1bb877c3d9339744 Parents: e23c8f5 Author: Xiangrui Meng Authored: Thu Feb 12 10:48:13 2015 -0800 Committer: Xiangrui Meng Committed: Thu Feb 12 10:48:22 2015 -0800 -- .../classification/ClassificationModel.scala| 16 ++ .../classification/LogisticRegression.scala | 3 +- .../spark/mllib/classification/NaiveBayes.scala | 18 +++ .../apache/spark/mllib/classification/SVM.scala | 6 +-- .../impl/GLMClassificationModel.scala | 17 --- .../MatrixFactorizationModel.scala | 14 -- .../apache/spark/mllib/regression/Lasso.scala | 2 +- .../mllib/regression/LinearRegression.scala | 2 +- .../mllib/regression/RegressionModel.scala | 16 ++ .../mllib/regression/RidgeRegression.scala | 2 +- .../regression/impl/GLMRegressionModel.scala| 11 +++-- .../apache/spark/mllib/tree/DecisionTree.scala | 8 +-- .../mllib/tree/model/DecisionTreeModel.scala| 28 +-- .../mllib/tree/model/treeEnsembleModels.scala | 51 .../apache/spark/mllib/util/modelSaveLoad.scala | 25 +++--- 15 files changed, 92 insertions(+), 127 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e26c1499/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala index 348c1e8..35a0db7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala @@ -17,12 +17,12 @@ package org.apache.spark.mllib.classification +import org.json4s.{DefaultFormats, JValue} + import org.apache.spark.annotation.Experimental import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector -import org.apache.spark.mllib.util.Loader import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{DataFrame, Row} /** * :: Experimental :: @@ -60,16 +60,10 @@ private[mllib] object ClassificationModel { /** * Helper method for loading GLM classification model metadata. - * - * @param modelClass String name for model class (used for error messages) * @return (numFeatures, numClasses) */ - def getNumFeaturesClasses(metadata: DataFrame, modelClass: String, path: String): (Int, Int) = { -metadata.select("numFeatures", "numClasses").take(1)(0) match { - case Row(nFeatures: Int, nClasses: Int) => (nFeatures, nClasses) - case _ => throw new Exception(s"$modelClass unable to load" + -s" numFeatures, numClasses from metadata: ${Loader.metadataPath(path)}") -} + def getNumFeaturesClasses(metadata: JValue): (Int, Int) = { +implicit val formats = DefaultFormats +((metadata \ "numFeatures").extract[Int], (metadata \ "numClasses").extract[Int]) } - } http://git-wip-us.apache.org/repos/asf/spark/blob/e26c1499/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index 9a391bf..420d6e2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -173,8 +173,7 @@ object LogisticRegressionModel extends Loader[LogisticRegressionModel] { val classNameV1_0 = "org.apache.spark.mllib.classification.LogisticRegressionModel" (loadedClassName, version) match { case (className, "1.0") if className == classNameV1_0 => -val (numFeatures, numClasses) = - ClassificationMode