Repository: spark Updated Branches: refs/heads/master 7a9dcbc91 -> e6f8d3686
[SPARK-10468] [ MLLIB ] Verify schema before Dataframe select API call Loader.checkSchema was called to verify the schema after dataframe.select(...). Schema verification should be done before dataframe.select(...) Author: Vinod K C <vinod...@huawei.com> Closes #8636 from vinodkc/fix_GaussianMixtureModel_load_verification. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e6f8d368 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e6f8d368 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e6f8d368 Branch: refs/heads/master Commit: e6f8d3686016a305a747c5bcc85f46fd4c0cbe83 Parents: 7a9dcbc Author: Vinod K C <vinod...@huawei.com> Authored: Tue Sep 8 14:44:05 2015 -0700 Committer: Xiangrui Meng <m...@databricks.com> Committed: Tue Sep 8 14:44:05 2015 -0700 ---------------------------------------------------------------------- .../org/apache/spark/mllib/clustering/GaussianMixtureModel.scala | 3 +-- .../src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e6f8d368/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala index 7f6163e..a590219 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala @@ -168,10 +168,9 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] { val dataPath = Loader.dataPath(path) val sqlContext = new SQLContext(sc) val dataFrame = sqlContext.read.parquet(dataPath) - val dataArray = dataFrame.select("weight", "mu", "sigma").collect() - // Check schema explicitly since erasure makes it hard to use match-case for checking. Loader.checkSchema[Data](dataFrame.schema) + val dataArray = dataFrame.select("weight", "mu", "sigma").collect() val (weights, gaussians) = dataArray.map { case Row(weight: Double, mu: Vector, sigma: Matrix) => http://git-wip-us.apache.org/repos/asf/spark/blob/e6f8d368/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index 36b124c..58857c3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -590,12 +590,10 @@ object Word2VecModel extends Loader[Word2VecModel] { val dataPath = Loader.dataPath(path) val sqlContext = new SQLContext(sc) val dataFrame = sqlContext.read.parquet(dataPath) - - val dataArray = dataFrame.select("word", "vector").collect() - // Check schema explicitly since erasure makes it hard to use match-case for checking. Loader.checkSchema[Data](dataFrame.schema) + val dataArray = dataFrame.select("word", "vector").collect() val word2VecMap = dataArray.map(i => (i.getString(0), i.getSeq[Float](1).toArray)).toMap new Word2VecModel(word2VecMap) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org