spark git commit: [SPARK-11902][ML] Unhandled case in VectorAssembler#transform
Repository: spark Updated Branches: refs/heads/master d9cf9c21f -> 4be360d4e [SPARK-11902][ML] Unhandled case in VectorAssembler#transform There is an unhandled case in the transform method of VectorAssembler if one of the input columns doesn't have one of the supported type DoubleType, NumericType, BooleanType or VectorUDT. So, if you try to transform a column of StringType you get a cryptic "scala.MatchError: StringType". This PR aims to fix this, throwing a SparkException when dealing with an unknown column type. Author: BenFradet Closes #9885 from BenFradet/SPARK-11902. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4be360d4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4be360d4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4be360d4 Branch: refs/heads/master Commit: 4be360d4ee6cdb4d06306feca38ddef5212608cf Parents: d9cf9c2 Author: BenFradet Authored: Sun Nov 22 22:05:01 2015 -0800 Committer: Xiangrui Meng Committed: Sun Nov 22 22:05:01 2015 -0800 -- .../org/apache/spark/ml/feature/VectorAssembler.scala| 2 ++ .../apache/spark/ml/feature/VectorAssemblerSuite.scala | 11 +++ 2 files changed, 13 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4be360d4/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index 0feec05..801096f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -84,6 +84,8 @@ class VectorAssembler(override val uid: String) val numAttrs = group.numAttributes.getOrElse(first.getAs[Vector](index).size) Array.fill(numAttrs)(NumericAttribute.defaultAttr) } +case otherType => + throw new SparkException(s"VectorAssembler does not support the $otherType type") } } val metadata = new AttributeGroup($(outputCol), attrs).toMetadata() http://git-wip-us.apache.org/repos/asf/spark/blob/4be360d4/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala index fb21ab6..9c1c00f 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala @@ -69,6 +69,17 @@ class VectorAssemblerSuite } } + test("transform should throw an exception in case of unsupported type") { +val df = sqlContext.createDataFrame(Seq(("a", "b", "c"))).toDF("a", "b", "c") +val assembler = new VectorAssembler() + .setInputCols(Array("a", "b", "c")) + .setOutputCol("features") +val thrown = intercept[SparkException] { + assembler.transform(df) +} +assert(thrown.getMessage contains "VectorAssembler does not support the StringType type") + } + test("ML attributes") { val browser = NominalAttribute.defaultAttr.withValues("chrome", "firefox", "safari") val hour = NumericAttribute.defaultAttr.withMin(0.0).withMax(24.0) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-11902][ML] Unhandled case in VectorAssembler#transform
Repository: spark Updated Branches: refs/heads/branch-1.6 d482dced3 -> bad93d9f3 [SPARK-11902][ML] Unhandled case in VectorAssembler#transform There is an unhandled case in the transform method of VectorAssembler if one of the input columns doesn't have one of the supported type DoubleType, NumericType, BooleanType or VectorUDT. So, if you try to transform a column of StringType you get a cryptic "scala.MatchError: StringType". This PR aims to fix this, throwing a SparkException when dealing with an unknown column type. Author: BenFradet Closes #9885 from BenFradet/SPARK-11902. (cherry picked from commit 4be360d4ee6cdb4d06306feca38ddef5212608cf) Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bad93d9f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bad93d9f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bad93d9f Branch: refs/heads/branch-1.6 Commit: bad93d9f3a24a7ee024541569c6f3de88aad2fda Parents: d482dce Author: BenFradet Authored: Sun Nov 22 22:05:01 2015 -0800 Committer: Xiangrui Meng Committed: Sun Nov 22 22:05:13 2015 -0800 -- .../org/apache/spark/ml/feature/VectorAssembler.scala| 2 ++ .../apache/spark/ml/feature/VectorAssemblerSuite.scala | 11 +++ 2 files changed, 13 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bad93d9f/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index 0feec05..801096f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -84,6 +84,8 @@ class VectorAssembler(override val uid: String) val numAttrs = group.numAttributes.getOrElse(first.getAs[Vector](index).size) Array.fill(numAttrs)(NumericAttribute.defaultAttr) } +case otherType => + throw new SparkException(s"VectorAssembler does not support the $otherType type") } } val metadata = new AttributeGroup($(outputCol), attrs).toMetadata() http://git-wip-us.apache.org/repos/asf/spark/blob/bad93d9f/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala index fb21ab6..9c1c00f 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala @@ -69,6 +69,17 @@ class VectorAssemblerSuite } } + test("transform should throw an exception in case of unsupported type") { +val df = sqlContext.createDataFrame(Seq(("a", "b", "c"))).toDF("a", "b", "c") +val assembler = new VectorAssembler() + .setInputCols(Array("a", "b", "c")) + .setOutputCol("features") +val thrown = intercept[SparkException] { + assembler.transform(df) +} +assert(thrown.getMessage contains "VectorAssembler does not support the StringType type") + } + test("ML attributes") { val browser = NominalAttribute.defaultAttr.withValues("chrome", "firefox", "safari") val hour = NumericAttribute.defaultAttr.withMin(0.0).withMax(24.0) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-11912][ML] ml.feature.PCA minor refactor
Repository: spark Updated Branches: refs/heads/branch-1.6 7f9d3358a -> d482dced3 [SPARK-11912][ML] ml.feature.PCA minor refactor Like [SPARK-11852](https://issues.apache.org/jira/browse/SPARK-11852), ```k``` is params and we should save it under ```metadata/``` rather than both under ```data/``` and ```metadata/```. Refactor the constructor of ```ml.feature.PCAModel``` to take only ```pc``` but construct ```mllib.feature.PCAModel``` inside ```transform```. Author: Yanbo Liang Closes #9897 from yanboliang/spark-11912. (cherry picked from commit d9cf9c21fc6b1aa22e68d66760afd42c4e1c18b8) Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d482dced Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d482dced Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d482dced Branch: refs/heads/branch-1.6 Commit: d482dced313d1d837508d3f449261419c8543c1d Parents: 7f9d335 Author: Yanbo Liang Authored: Sun Nov 22 21:56:07 2015 -0800 Committer: Xiangrui Meng Committed: Sun Nov 22 21:56:17 2015 -0800 -- .../scala/org/apache/spark/ml/feature/PCA.scala | 23 +++ .../org/apache/spark/ml/feature/PCASuite.scala | 31 2 files changed, 24 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d482dced/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 32d7afe..aa88cb0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -73,7 +73,7 @@ class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v} val pca = new feature.PCA(k = $(k)) val pcaModel = pca.fit(input) -copyValues(new PCAModel(uid, pcaModel).setParent(this)) +copyValues(new PCAModel(uid, pcaModel.pc).setParent(this)) } override def transformSchema(schema: StructType): StructType = { @@ -99,18 +99,17 @@ object PCA extends DefaultParamsReadable[PCA] { /** * :: Experimental :: * Model fitted by [[PCA]]. + * + * @param pc A principal components Matrix. Each column is one principal component. */ @Experimental class PCAModel private[ml] ( override val uid: String, -pcaModel: feature.PCAModel) +val pc: DenseMatrix) extends Model[PCAModel] with PCAParams with MLWritable { import PCAModel._ - /** a principal components Matrix. Each column is one principal component. */ - val pc: DenseMatrix = pcaModel.pc - /** @group setParam */ def setInputCol(value: String): this.type = set(inputCol, value) @@ -124,6 +123,7 @@ class PCAModel private[ml] ( */ override def transform(dataset: DataFrame): DataFrame = { transformSchema(dataset.schema, logging = true) +val pcaModel = new feature.PCAModel($(k), pc) val pcaOp = udf { pcaModel.transform _ } dataset.withColumn($(outputCol), pcaOp(col($(inputCol } @@ -139,7 +139,7 @@ class PCAModel private[ml] ( } override def copy(extra: ParamMap): PCAModel = { -val copied = new PCAModel(uid, pcaModel) +val copied = new PCAModel(uid, pc) copyValues(copied, extra).setParent(parent) } @@ -152,11 +152,11 @@ object PCAModel extends MLReadable[PCAModel] { private[PCAModel] class PCAModelWriter(instance: PCAModel) extends MLWriter { -private case class Data(k: Int, pc: DenseMatrix) +private case class Data(pc: DenseMatrix) override protected def saveImpl(path: String): Unit = { DefaultParamsWriter.saveMetadata(instance, path, sc) - val data = Data(instance.getK, instance.pc) + val data = Data(instance.pc) val dataPath = new Path(path, "data").toString sqlContext.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath) } @@ -169,11 +169,10 @@ object PCAModel extends MLReadable[PCAModel] { override def load(path: String): PCAModel = { val metadata = DefaultParamsReader.loadMetadata(path, sc, className) val dataPath = new Path(path, "data").toString - val Row(k: Int, pc: DenseMatrix) = sqlContext.read.parquet(dataPath) -.select("k", "pc") + val Row(pc: DenseMatrix) = sqlContext.read.parquet(dataPath) +.select("pc") .head() - val oldModel = new feature.PCAModel(k, pc) - val model = new PCAModel(metadata.uid, oldModel) + val model = new PCAModel(metadata.uid, pc) DefaultParamsReader.getAndSetParams(model, metadata) model } http://git-wip-us.apache.org/repos/asf/spark/blob
spark git commit: [SPARK-11912][ML] ml.feature.PCA minor refactor
Repository: spark Updated Branches: refs/heads/master fc4b792d2 -> d9cf9c21f [SPARK-11912][ML] ml.feature.PCA minor refactor Like [SPARK-11852](https://issues.apache.org/jira/browse/SPARK-11852), ```k``` is params and we should save it under ```metadata/``` rather than both under ```data/``` and ```metadata/```. Refactor the constructor of ```ml.feature.PCAModel``` to take only ```pc``` but construct ```mllib.feature.PCAModel``` inside ```transform```. Author: Yanbo Liang Closes #9897 from yanboliang/spark-11912. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d9cf9c21 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d9cf9c21 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d9cf9c21 Branch: refs/heads/master Commit: d9cf9c21fc6b1aa22e68d66760afd42c4e1c18b8 Parents: fc4b792 Author: Yanbo Liang Authored: Sun Nov 22 21:56:07 2015 -0800 Committer: Xiangrui Meng Committed: Sun Nov 22 21:56:07 2015 -0800 -- .../scala/org/apache/spark/ml/feature/PCA.scala | 23 +++ .../org/apache/spark/ml/feature/PCASuite.scala | 31 2 files changed, 24 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d9cf9c21/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 32d7afe..aa88cb0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -73,7 +73,7 @@ class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v} val pca = new feature.PCA(k = $(k)) val pcaModel = pca.fit(input) -copyValues(new PCAModel(uid, pcaModel).setParent(this)) +copyValues(new PCAModel(uid, pcaModel.pc).setParent(this)) } override def transformSchema(schema: StructType): StructType = { @@ -99,18 +99,17 @@ object PCA extends DefaultParamsReadable[PCA] { /** * :: Experimental :: * Model fitted by [[PCA]]. + * + * @param pc A principal components Matrix. Each column is one principal component. */ @Experimental class PCAModel private[ml] ( override val uid: String, -pcaModel: feature.PCAModel) +val pc: DenseMatrix) extends Model[PCAModel] with PCAParams with MLWritable { import PCAModel._ - /** a principal components Matrix. Each column is one principal component. */ - val pc: DenseMatrix = pcaModel.pc - /** @group setParam */ def setInputCol(value: String): this.type = set(inputCol, value) @@ -124,6 +123,7 @@ class PCAModel private[ml] ( */ override def transform(dataset: DataFrame): DataFrame = { transformSchema(dataset.schema, logging = true) +val pcaModel = new feature.PCAModel($(k), pc) val pcaOp = udf { pcaModel.transform _ } dataset.withColumn($(outputCol), pcaOp(col($(inputCol } @@ -139,7 +139,7 @@ class PCAModel private[ml] ( } override def copy(extra: ParamMap): PCAModel = { -val copied = new PCAModel(uid, pcaModel) +val copied = new PCAModel(uid, pc) copyValues(copied, extra).setParent(parent) } @@ -152,11 +152,11 @@ object PCAModel extends MLReadable[PCAModel] { private[PCAModel] class PCAModelWriter(instance: PCAModel) extends MLWriter { -private case class Data(k: Int, pc: DenseMatrix) +private case class Data(pc: DenseMatrix) override protected def saveImpl(path: String): Unit = { DefaultParamsWriter.saveMetadata(instance, path, sc) - val data = Data(instance.getK, instance.pc) + val data = Data(instance.pc) val dataPath = new Path(path, "data").toString sqlContext.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath) } @@ -169,11 +169,10 @@ object PCAModel extends MLReadable[PCAModel] { override def load(path: String): PCAModel = { val metadata = DefaultParamsReader.loadMetadata(path, sc, className) val dataPath = new Path(path, "data").toString - val Row(k: Int, pc: DenseMatrix) = sqlContext.read.parquet(dataPath) -.select("k", "pc") + val Row(pc: DenseMatrix) = sqlContext.read.parquet(dataPath) +.select("pc") .head() - val oldModel = new feature.PCAModel(k, pc) - val model = new PCAModel(metadata.uid, oldModel) + val model = new PCAModel(metadata.uid, pc) DefaultParamsReader.getAndSetParams(model, metadata) model } http://git-wip-us.apache.org/repos/asf/spark/blob/d9cf9c21/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala --
spark git commit: [SPARK-11835] Adds a sidebar menu to MLlib's documentation
Repository: spark Updated Branches: refs/heads/branch-1.6 835b5488f -> 7f9d3358a [SPARK-11835] Adds a sidebar menu to MLlib's documentation This PR adds a sidebar menu when browsing the user guide of MLlib. It uses a YAML file to describe the structure of the documentation. It should be trivial to adapt this to the other projects. ![screen shot 2015-11-18 at 4 46 12 pm](https://cloud.githubusercontent.com/assets/7594753/11259591/a55173f4-8e17-11e5-9340-0aed79d66262.png) Author: Timothy Hunter Closes #9826 from thunterdb/spark-11835. (cherry picked from commit fc4b792d287095d70379a51f117c225d8d857078) Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7f9d3358 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7f9d3358 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7f9d3358 Branch: refs/heads/branch-1.6 Commit: 7f9d3358afd7e266c79e9989e4d874cd1183f474 Parents: 835b548 Author: Timothy Hunter Authored: Sun Nov 22 21:51:42 2015 -0800 Committer: Xiangrui Meng Committed: Sun Nov 22 21:51:51 2015 -0800 -- docs/_data/menu-ml.yaml | 10 docs/_data/menu-mllib.yaml | 75 docs/_includes/nav-left-wrapper-ml.html | 8 +++ docs/_includes/nav-left.html| 17 +++ docs/_layouts/global.html | 24 ++--- docs/css/main.css | 37 ++ 6 files changed, 163 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7f9d3358/docs/_data/menu-ml.yaml -- diff --git a/docs/_data/menu-ml.yaml b/docs/_data/menu-ml.yaml new file mode 100644 index 000..dff3d33 --- /dev/null +++ b/docs/_data/menu-ml.yaml @@ -0,0 +1,10 @@ +- text: Feature extraction, transformation, and selection + url: ml-features.html +- text: Decision trees for classification and regression + url: ml-decision-tree.html +- text: Ensembles + url: ml-ensembles.html +- text: Linear methods with elastic-net regularization + url: ml-linear-methods.html +- text: Multilayer perceptron classifier + url: ml-ann.html http://git-wip-us.apache.org/repos/asf/spark/blob/7f9d3358/docs/_data/menu-mllib.yaml -- diff --git a/docs/_data/menu-mllib.yaml b/docs/_data/menu-mllib.yaml new file mode 100644 index 000..12d22ab --- /dev/null +++ b/docs/_data/menu-mllib.yaml @@ -0,0 +1,75 @@ +- text: Data types + url: mllib-data-types.html +- text: Basic statistics + url: mllib-statistics.html + subitems: +- text: Summary statistics + url: mllib-statistics.html#summary-statistics +- text: Correlations + url: mllib-statistics.html#correlations +- text: Stratified sampling + url: mllib-statistics.html#stratified-sampling +- text: Hypothesis testing + url: mllib-statistics.html#hypothesis-testing +- text: Random data generation + url: mllib-statistics.html#random-data-generation +- text: Classification and regression + url: mllib-classification-regression.html + subitems: +- text: Linear models (SVMs, logistic regression, linear regression) + url: mllib-linear-methods.html +- text: Naive Bayes + url: mllib-naive-bayes.html +- text: decision trees + url: mllib-decision-tree.html +- text: ensembles of trees (Random Forests and Gradient-Boosted Trees) + url: mllib-ensembles.html +- text: isotonic regression + url: mllib-isotonic-regression.html +- text: Collaborative filtering + url: mllib-collaborative-filtering.html + subitems: +- text: alternating least squares (ALS) + url: mllib-collaborative-filtering.html#collaborative-filtering +- text: Clustering + url: mllib-clustering.html + subitems: +- text: k-means + url: mllib-clustering.html#k-means +- text: Gaussian mixture + url: mllib-clustering.html#gaussian-mixture +- text: power iteration clustering (PIC) + url: mllib-clustering.html#power-iteration-clustering-pic +- text: latent Dirichlet allocation (LDA) + url: mllib-clustering.html#latent-dirichlet-allocation-lda +- text: streaming k-means + url: mllib-clustering.html#streaming-k-means +- text: Dimensionality reduction + url: mllib-dimensionality-reduction.html + subitems: +- text: singular value decomposition (SVD) + url: mllib-dimensionality-reduction.html#singular-value-decomposition-svd +- text: principal component analysis (PCA) + url: mllib-dimensionality-reduction.html#principal-component-analysis-pca +- text: Feature extraction and transformation + url: mllib-feature-extraction.html +- text: Frequent pattern mining + url: mllib-frequent-pattern-mining.html +
spark git commit: [SPARK-11835] Adds a sidebar menu to MLlib's documentation
Repository: spark Updated Branches: refs/heads/master a6fda0bfc -> fc4b792d2 [SPARK-11835] Adds a sidebar menu to MLlib's documentation This PR adds a sidebar menu when browsing the user guide of MLlib. It uses a YAML file to describe the structure of the documentation. It should be trivial to adapt this to the other projects. ![screen shot 2015-11-18 at 4 46 12 pm](https://cloud.githubusercontent.com/assets/7594753/11259591/a55173f4-8e17-11e5-9340-0aed79d66262.png) Author: Timothy Hunter Closes #9826 from thunterdb/spark-11835. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc4b792d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc4b792d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc4b792d Branch: refs/heads/master Commit: fc4b792d287095d70379a51f117c225d8d857078 Parents: a6fda0b Author: Timothy Hunter Authored: Sun Nov 22 21:51:42 2015 -0800 Committer: Xiangrui Meng Committed: Sun Nov 22 21:51:42 2015 -0800 -- docs/_data/menu-ml.yaml | 10 docs/_data/menu-mllib.yaml | 75 docs/_includes/nav-left-wrapper-ml.html | 8 +++ docs/_includes/nav-left.html| 17 +++ docs/_layouts/global.html | 24 ++--- docs/css/main.css | 37 ++ 6 files changed, 163 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fc4b792d/docs/_data/menu-ml.yaml -- diff --git a/docs/_data/menu-ml.yaml b/docs/_data/menu-ml.yaml new file mode 100644 index 000..dff3d33 --- /dev/null +++ b/docs/_data/menu-ml.yaml @@ -0,0 +1,10 @@ +- text: Feature extraction, transformation, and selection + url: ml-features.html +- text: Decision trees for classification and regression + url: ml-decision-tree.html +- text: Ensembles + url: ml-ensembles.html +- text: Linear methods with elastic-net regularization + url: ml-linear-methods.html +- text: Multilayer perceptron classifier + url: ml-ann.html http://git-wip-us.apache.org/repos/asf/spark/blob/fc4b792d/docs/_data/menu-mllib.yaml -- diff --git a/docs/_data/menu-mllib.yaml b/docs/_data/menu-mllib.yaml new file mode 100644 index 000..12d22ab --- /dev/null +++ b/docs/_data/menu-mllib.yaml @@ -0,0 +1,75 @@ +- text: Data types + url: mllib-data-types.html +- text: Basic statistics + url: mllib-statistics.html + subitems: +- text: Summary statistics + url: mllib-statistics.html#summary-statistics +- text: Correlations + url: mllib-statistics.html#correlations +- text: Stratified sampling + url: mllib-statistics.html#stratified-sampling +- text: Hypothesis testing + url: mllib-statistics.html#hypothesis-testing +- text: Random data generation + url: mllib-statistics.html#random-data-generation +- text: Classification and regression + url: mllib-classification-regression.html + subitems: +- text: Linear models (SVMs, logistic regression, linear regression) + url: mllib-linear-methods.html +- text: Naive Bayes + url: mllib-naive-bayes.html +- text: decision trees + url: mllib-decision-tree.html +- text: ensembles of trees (Random Forests and Gradient-Boosted Trees) + url: mllib-ensembles.html +- text: isotonic regression + url: mllib-isotonic-regression.html +- text: Collaborative filtering + url: mllib-collaborative-filtering.html + subitems: +- text: alternating least squares (ALS) + url: mllib-collaborative-filtering.html#collaborative-filtering +- text: Clustering + url: mllib-clustering.html + subitems: +- text: k-means + url: mllib-clustering.html#k-means +- text: Gaussian mixture + url: mllib-clustering.html#gaussian-mixture +- text: power iteration clustering (PIC) + url: mllib-clustering.html#power-iteration-clustering-pic +- text: latent Dirichlet allocation (LDA) + url: mllib-clustering.html#latent-dirichlet-allocation-lda +- text: streaming k-means + url: mllib-clustering.html#streaming-k-means +- text: Dimensionality reduction + url: mllib-dimensionality-reduction.html + subitems: +- text: singular value decomposition (SVD) + url: mllib-dimensionality-reduction.html#singular-value-decomposition-svd +- text: principal component analysis (PCA) + url: mllib-dimensionality-reduction.html#principal-component-analysis-pca +- text: Feature extraction and transformation + url: mllib-feature-extraction.html +- text: Frequent pattern mining + url: mllib-frequent-pattern-mining.html + subitems: +- text: FP-growth + url: mllib-frequent-pattern-mining.html#fp-growth +- text: ass
spark git commit: [SPARK-6791][ML] Add read/write for CrossValidator and Evaluators
Repository: spark Updated Branches: refs/heads/branch-1.6 a36d9bc75 -> 835b5488f [SPARK-6791][ML] Add read/write for CrossValidator and Evaluators I believe this works for general estimators within CrossValidator, including compound estimators. (See the complex unit test.) Added read/write for all 3 Evaluators as well. CC: mengxr yanboliang Author: Joseph K. Bradley Closes #9848 from jkbradley/cv-io. (cherry picked from commit a6fda0bfc16a13b28b1cecc96f1ff91363089144) Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/835b5488 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/835b5488 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/835b5488 Branch: refs/heads/branch-1.6 Commit: 835b5488ff644e2f51442943adffd3cd682703ac Parents: a36d9bc Author: Joseph K. Bradley Authored: Sun Nov 22 21:48:48 2015 -0800 Committer: Xiangrui Meng Committed: Sun Nov 22 21:48:57 2015 -0800 -- .../scala/org/apache/spark/ml/Pipeline.scala| 38 +-- .../BinaryClassificationEvaluator.scala | 11 +- .../MulticlassClassificationEvaluator.scala | 12 +- .../ml/evaluation/RegressionEvaluator.scala | 11 +- .../apache/spark/ml/recommendation/ALS.scala| 14 +- .../apache/spark/ml/tuning/CrossValidator.scala | 229 ++- .../org/apache/spark/ml/util/ReadWrite.scala| 48 ++-- .../org/apache/spark/ml/PipelineSuite.scala | 4 +- .../BinaryClassificationEvaluatorSuite.scala| 13 +- ...MulticlassClassificationEvaluatorSuite.scala | 13 +- .../evaluation/RegressionEvaluatorSuite.scala | 12 +- .../spark/ml/tuning/CrossValidatorSuite.scala | 202 +++- 12 files changed, 522 insertions(+), 85 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/835b5488/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index 6f15b37..4b2b3f8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -34,7 +34,6 @@ import org.apache.spark.ml.util.MLWriter import org.apache.spark.ml.util._ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.StructType -import org.apache.spark.util.Utils /** * :: DeveloperApi :: @@ -232,20 +231,9 @@ object Pipeline extends MLReadable[Pipeline] { stages: Array[PipelineStage], sc: SparkContext, path: String): Unit = { - // Copied and edited from DefaultParamsWriter.saveMetadata - // TODO: modify DefaultParamsWriter.saveMetadata to avoid duplication - val uid = instance.uid - val cls = instance.getClass.getName val stageUids = stages.map(_.uid) val jsonParams = List("stageUids" -> parse(compact(render(stageUids.toSeq - val metadata = ("class" -> cls) ~ -("timestamp" -> System.currentTimeMillis()) ~ -("sparkVersion" -> sc.version) ~ -("uid" -> uid) ~ -("paramMap" -> jsonParams) - val metadataPath = new Path(path, "metadata").toString - val metadataJson = compact(render(metadata)) - sc.parallelize(Seq(metadataJson), 1).saveAsTextFile(metadataPath) + DefaultParamsWriter.saveMetadata(instance, path, sc, paramMap = Some(jsonParams)) // Save stages val stagesDir = new Path(path, "stages").toString @@ -266,30 +254,10 @@ object Pipeline extends MLReadable[Pipeline] { implicit val format = DefaultFormats val stagesDir = new Path(path, "stages").toString - val stageUids: Array[String] = metadata.params match { -case JObject(pairs) => - if (pairs.length != 1) { -// Should not happen unless file is corrupted or we have a bug. -throw new RuntimeException( - s"Pipeline read expected 1 Param (stageUids), but found ${pairs.length}.") - } - pairs.head match { -case ("stageUids", jsonValue) => - jsonValue.extract[Seq[String]].toArray -case (paramName, jsonValue) => - // Should not happen unless file is corrupted or we have a bug. - throw new RuntimeException(s"Pipeline read encountered unexpected Param $paramName" + -s" in metadata: ${metadata.metadataStr}") - } -case _ => - throw new IllegalArgumentException( -s"Cannot recognize JSON metadata: ${metadata.metadataStr}.") - } + val stageUids: Array[String] = (metadata.params \ "stageUids").extract[Seq[String]].toArray val stages: Array[PipelineStage] = stageUids.zipWithIndex.map { case (st
spark git commit: [SPARK-6791][ML] Add read/write for CrossValidator and Evaluators
Repository: spark Updated Branches: refs/heads/master fe89c1817 -> a6fda0bfc [SPARK-6791][ML] Add read/write for CrossValidator and Evaluators I believe this works for general estimators within CrossValidator, including compound estimators. (See the complex unit test.) Added read/write for all 3 Evaluators as well. CC: mengxr yanboliang Author: Joseph K. Bradley Closes #9848 from jkbradley/cv-io. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a6fda0bf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a6fda0bf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a6fda0bf Branch: refs/heads/master Commit: a6fda0bfc16a13b28b1cecc96f1ff91363089144 Parents: fe89c18 Author: Joseph K. Bradley Authored: Sun Nov 22 21:48:48 2015 -0800 Committer: Xiangrui Meng Committed: Sun Nov 22 21:48:48 2015 -0800 -- .../scala/org/apache/spark/ml/Pipeline.scala| 38 +-- .../BinaryClassificationEvaluator.scala | 11 +- .../MulticlassClassificationEvaluator.scala | 12 +- .../ml/evaluation/RegressionEvaluator.scala | 11 +- .../apache/spark/ml/recommendation/ALS.scala| 14 +- .../apache/spark/ml/tuning/CrossValidator.scala | 229 ++- .../org/apache/spark/ml/util/ReadWrite.scala| 48 ++-- .../org/apache/spark/ml/PipelineSuite.scala | 4 +- .../BinaryClassificationEvaluatorSuite.scala| 13 +- ...MulticlassClassificationEvaluatorSuite.scala | 13 +- .../evaluation/RegressionEvaluatorSuite.scala | 12 +- .../spark/ml/tuning/CrossValidatorSuite.scala | 202 +++- 12 files changed, 522 insertions(+), 85 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a6fda0bf/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index 6f15b37..4b2b3f8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -34,7 +34,6 @@ import org.apache.spark.ml.util.MLWriter import org.apache.spark.ml.util._ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.StructType -import org.apache.spark.util.Utils /** * :: DeveloperApi :: @@ -232,20 +231,9 @@ object Pipeline extends MLReadable[Pipeline] { stages: Array[PipelineStage], sc: SparkContext, path: String): Unit = { - // Copied and edited from DefaultParamsWriter.saveMetadata - // TODO: modify DefaultParamsWriter.saveMetadata to avoid duplication - val uid = instance.uid - val cls = instance.getClass.getName val stageUids = stages.map(_.uid) val jsonParams = List("stageUids" -> parse(compact(render(stageUids.toSeq - val metadata = ("class" -> cls) ~ -("timestamp" -> System.currentTimeMillis()) ~ -("sparkVersion" -> sc.version) ~ -("uid" -> uid) ~ -("paramMap" -> jsonParams) - val metadataPath = new Path(path, "metadata").toString - val metadataJson = compact(render(metadata)) - sc.parallelize(Seq(metadataJson), 1).saveAsTextFile(metadataPath) + DefaultParamsWriter.saveMetadata(instance, path, sc, paramMap = Some(jsonParams)) // Save stages val stagesDir = new Path(path, "stages").toString @@ -266,30 +254,10 @@ object Pipeline extends MLReadable[Pipeline] { implicit val format = DefaultFormats val stagesDir = new Path(path, "stages").toString - val stageUids: Array[String] = metadata.params match { -case JObject(pairs) => - if (pairs.length != 1) { -// Should not happen unless file is corrupted or we have a bug. -throw new RuntimeException( - s"Pipeline read expected 1 Param (stageUids), but found ${pairs.length}.") - } - pairs.head match { -case ("stageUids", jsonValue) => - jsonValue.extract[Seq[String]].toArray -case (paramName, jsonValue) => - // Should not happen unless file is corrupted or we have a bug. - throw new RuntimeException(s"Pipeline read encountered unexpected Param $paramName" + -s" in metadata: ${metadata.metadataStr}") - } -case _ => - throw new IllegalArgumentException( -s"Cannot recognize JSON metadata: ${metadata.metadataStr}.") - } + val stageUids: Array[String] = (metadata.params \ "stageUids").extract[Seq[String]].toArray val stages: Array[PipelineStage] = stageUids.zipWithIndex.map { case (stageUid, idx) => val stagePath = SharedReadWrite.getStagePath(stageUid, idx, stageUids.length, stag
spark git commit: [SPARK-11895][ML] rename and refactor DatasetExample under mllib/examples
Repository: spark Updated Branches: refs/heads/branch-1.6 fc4b88f3b -> a36d9bc75 [SPARK-11895][ML] rename and refactor DatasetExample under mllib/examples We used the name `Dataset` to refer to `SchemaRDD` in 1.2 in ML pipelines and created this example file. Since `Dataset` has a new meaning in Spark 1.6, we should rename it to avoid confusion. This PR also removes support for dense format to simplify the example code. cc: yinxusen Author: Xiangrui Meng Closes #9873 from mengxr/SPARK-11895. (cherry picked from commit fe89c1817d668e46adf70d0896c42c22a547c76a) Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a36d9bc7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a36d9bc7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a36d9bc7 Branch: refs/heads/branch-1.6 Commit: a36d9bc7528ab8e6fe5e002f9b9b0a51a5b93568 Parents: fc4b88f Author: Xiangrui Meng Authored: Sun Nov 22 21:45:46 2015 -0800 Committer: Xiangrui Meng Committed: Sun Nov 22 21:45:53 2015 -0800 -- .../spark/examples/ml/DataFrameExample.scala| 104 .../spark/examples/mllib/DatasetExample.scala | 123 --- 2 files changed, 104 insertions(+), 123 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a36d9bc7/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala new file mode 100644 index 000..424f001 --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// scalastyle:off println +package org.apache.spark.examples.ml + +import java.io.File + +import com.google.common.io.Files +import scopt.OptionParser + +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.examples.mllib.AbstractParams +import org.apache.spark.mllib.linalg.Vector +import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer +import org.apache.spark.sql.{DataFrame, Row, SQLContext} + +/** + * An example of how to use [[org.apache.spark.sql.DataFrame]] for ML. Run with + * {{{ + * ./bin/run-example ml.DataFrameExample [options] + * }}} + * If you use it as a template to create your own app, please use `spark-submit` to submit your app. + */ +object DataFrameExample { + + case class Params(input: String = "data/mllib/sample_libsvm_data.txt") +extends AbstractParams[Params] + + def main(args: Array[String]) { +val defaultParams = Params() + +val parser = new OptionParser[Params]("DatasetExample") { + head("Dataset: an example app using DataFrame as a Dataset for ML.") + opt[String]("input") +.text(s"input path to dataset") +.action((x, c) => c.copy(input = x)) + checkConfig { params => +success + } +} + +parser.parse(args, defaultParams).map { params => + run(params) +}.getOrElse { + sys.exit(1) +} + } + + def run(params: Params) { + +val conf = new SparkConf().setAppName(s"DataFrameExample with $params") +val sc = new SparkContext(conf) +val sqlContext = new SQLContext(sc) + +// Load input data +println(s"Loading LIBSVM file with UDT from ${params.input}.") +val df: DataFrame = sqlContext.read.format("libsvm").load(params.input).cache() +println("Schema from LIBSVM:") +df.printSchema() +println(s"Loaded training data as a DataFrame with ${df.count()} records.") + +// Show statistical summary of labels. +val labelSummary = df.describe("label") +labelSummary.show() + +// Convert features column to an RDD of vectors. +val features = df.select("features").map { case Row(v: Vector) => v } +val featureSummary = features.aggregate(new MultivariateOnlineSummarizer())( + (summary,
spark git commit: [SPARK-11895][ML] rename and refactor DatasetExample under mllib/examples
Repository: spark Updated Branches: refs/heads/master 426004a9c -> fe89c1817 [SPARK-11895][ML] rename and refactor DatasetExample under mllib/examples We used the name `Dataset` to refer to `SchemaRDD` in 1.2 in ML pipelines and created this example file. Since `Dataset` has a new meaning in Spark 1.6, we should rename it to avoid confusion. This PR also removes support for dense format to simplify the example code. cc: yinxusen Author: Xiangrui Meng Closes #9873 from mengxr/SPARK-11895. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fe89c181 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fe89c181 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fe89c181 Branch: refs/heads/master Commit: fe89c1817d668e46adf70d0896c42c22a547c76a Parents: 426004a Author: Xiangrui Meng Authored: Sun Nov 22 21:45:46 2015 -0800 Committer: Xiangrui Meng Committed: Sun Nov 22 21:45:46 2015 -0800 -- .../spark/examples/ml/DataFrameExample.scala| 104 .../spark/examples/mllib/DatasetExample.scala | 123 --- 2 files changed, 104 insertions(+), 123 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fe89c181/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala new file mode 100644 index 000..424f001 --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// scalastyle:off println +package org.apache.spark.examples.ml + +import java.io.File + +import com.google.common.io.Files +import scopt.OptionParser + +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.examples.mllib.AbstractParams +import org.apache.spark.mllib.linalg.Vector +import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer +import org.apache.spark.sql.{DataFrame, Row, SQLContext} + +/** + * An example of how to use [[org.apache.spark.sql.DataFrame]] for ML. Run with + * {{{ + * ./bin/run-example ml.DataFrameExample [options] + * }}} + * If you use it as a template to create your own app, please use `spark-submit` to submit your app. + */ +object DataFrameExample { + + case class Params(input: String = "data/mllib/sample_libsvm_data.txt") +extends AbstractParams[Params] + + def main(args: Array[String]) { +val defaultParams = Params() + +val parser = new OptionParser[Params]("DatasetExample") { + head("Dataset: an example app using DataFrame as a Dataset for ML.") + opt[String]("input") +.text(s"input path to dataset") +.action((x, c) => c.copy(input = x)) + checkConfig { params => +success + } +} + +parser.parse(args, defaultParams).map { params => + run(params) +}.getOrElse { + sys.exit(1) +} + } + + def run(params: Params) { + +val conf = new SparkConf().setAppName(s"DataFrameExample with $params") +val sc = new SparkContext(conf) +val sqlContext = new SQLContext(sc) + +// Load input data +println(s"Loading LIBSVM file with UDT from ${params.input}.") +val df: DataFrame = sqlContext.read.format("libsvm").load(params.input).cache() +println("Schema from LIBSVM:") +df.printSchema() +println(s"Loaded training data as a DataFrame with ${df.count()} records.") + +// Show statistical summary of labels. +val labelSummary = df.describe("label") +labelSummary.show() + +// Convert features column to an RDD of vectors. +val features = df.select("features").map { case Row(v: Vector) => v } +val featureSummary = features.aggregate(new MultivariateOnlineSummarizer())( + (summary, feat) => summary.add(feat), + (sum1, sum2) => sum1.merge(sum2)) +println(s"Selected features column
[1/2] spark git commit: Preparing Spark release v1.6.0-preview2
Repository: spark Updated Branches: refs/heads/branch-1.6 9d10ba76f -> fc4b88f3b Preparing Spark release v1.6.0-preview2 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/30838142 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/30838142 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/30838142 Branch: refs/heads/branch-1.6 Commit: 308381420f51b6da1007ea09a02d740613a226e0 Parents: 9d10ba7 Author: Patrick Wendell Authored: Sun Nov 22 11:41:18 2015 -0800 Committer: Patrick Wendell Committed: Sun Nov 22 11:41:18 2015 -0800 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- docker-integration-tests/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tags/pom.xml| 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 35 files changed, 35 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/30838142/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 4b60ee0..fbabaa5 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/30838142/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 672e946..1b3e417 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/30838142/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 37e3f16..d32b93b 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/30838142/docker-integration-tests/pom.xml -- diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml index dee0c4a..ee9de91 100644 --- a/docker-integration-tests/pom.xml +++ b/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/30838142/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index f5ab2a7..37b15bb 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/30838142/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index dceedcf..295455a 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/30838142/external/flume-sink/pom.xml -- diff
[2/2] spark git commit: Preparing development version 1.6.0-SNAPSHOT
Preparing development version 1.6.0-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc4b88f3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc4b88f3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc4b88f3 Branch: refs/heads/branch-1.6 Commit: fc4b88f3bce31184aa43b386f44d699555e17443 Parents: 3083814 Author: Patrick Wendell Authored: Sun Nov 22 11:41:24 2015 -0800 Committer: Patrick Wendell Committed: Sun Nov 22 11:41:24 2015 -0800 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- docker-integration-tests/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tags/pom.xml| 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 35 files changed, 35 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fc4b88f3/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index fbabaa5..4b60ee0 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/fc4b88f3/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 1b3e417..672e946 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/fc4b88f3/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index d32b93b..37e3f16 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/fc4b88f3/docker-integration-tests/pom.xml -- diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml index ee9de91..dee0c4a 100644 --- a/docker-integration-tests/pom.xml +++ b/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/fc4b88f3/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 37b15bb..f5ab2a7 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/fc4b88f3/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 295455a..dceedcf 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/fc4b88f3/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 31b907f
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.6.0-preview2 [created] 308381420 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: Preparing development version 1.6.0-SNAPSHOT
Preparing development version 1.6.0-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9d10ba76 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9d10ba76 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9d10ba76 Branch: refs/heads/branch-1.6 Commit: 9d10ba76fdff22f6172e775a45a07477300dd618 Parents: f836941 Author: Patrick Wendell Authored: Sun Nov 22 10:59:59 2015 -0800 Committer: Patrick Wendell Committed: Sun Nov 22 10:59:59 2015 -0800 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- docker-integration-tests/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tags/pom.xml| 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 35 files changed, 35 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9d10ba76/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index fbabaa5..4b60ee0 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9d10ba76/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 1b3e417..672e946 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9d10ba76/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index d32b93b..37e3f16 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9d10ba76/docker-integration-tests/pom.xml -- diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml index ee9de91..dee0c4a 100644 --- a/docker-integration-tests/pom.xml +++ b/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9d10ba76/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 37b15bb..f5ab2a7 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9d10ba76/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 295455a..dceedcf 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0 +1.6.0-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9d10ba76/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 31b907f
[1/2] spark git commit: Preparing Spark release v1.6.0-preview1
Repository: spark Updated Branches: refs/heads/branch-1.6 b004a104f -> 9d10ba76f Preparing Spark release v1.6.0-preview1 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f8369412 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f8369412 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f8369412 Branch: refs/heads/branch-1.6 Commit: f8369412d22de0fc75b2aab4d72ad298fc30cc6f Parents: b004a10 Author: Patrick Wendell Authored: Sun Nov 22 10:59:54 2015 -0800 Committer: Patrick Wendell Committed: Sun Nov 22 10:59:54 2015 -0800 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- docker-integration-tests/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tags/pom.xml| 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 35 files changed, 35 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f8369412/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 4b60ee0..fbabaa5 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/f8369412/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 672e946..1b3e417 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/f8369412/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 37e3f16..d32b93b 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/f8369412/docker-integration-tests/pom.xml -- diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml index dee0c4a..ee9de91 100644 --- a/docker-integration-tests/pom.xml +++ b/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/f8369412/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index f5ab2a7..37b15bb 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/f8369412/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index dceedcf..295455a 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.0-SNAPSHOT +1.6.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/f8369412/external/flume-sink/pom.xml -- diff
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.6.0-preview1 [created] f8369412d - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.6.0-preview1 [deleted] 7582425d1 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-11908][SQL] Add NullType support to RowEncoder
Repository: spark Updated Branches: refs/heads/master ff442bbcf -> 426004a9c [SPARK-11908][SQL] Add NullType support to RowEncoder JIRA: https://issues.apache.org/jira/browse/SPARK-11908 We should add NullType support to RowEncoder. Author: Liang-Chi Hsieh Closes #9891 from viirya/rowencoder-nulltype. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/426004a9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/426004a9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/426004a9 Branch: refs/heads/master Commit: 426004a9c9a864f90494d08601e6974709091a56 Parents: ff442bb Author: Liang-Chi Hsieh Authored: Sun Nov 22 10:36:47 2015 -0800 Committer: Michael Armbrust Committed: Sun Nov 22 10:36:47 2015 -0800 -- .../org/apache/spark/sql/catalyst/encoders/RowEncoder.scala | 5 +++-- .../org/apache/spark/sql/catalyst/expressions/objects.scala | 3 +++ .../apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala| 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/426004a9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala index 4cda482..fa553e7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala @@ -48,7 +48,7 @@ object RowEncoder { private def extractorsFor( inputObject: Expression, inputType: DataType): Expression = inputType match { -case BooleanType | ByteType | ShortType | IntegerType | LongType | +case NullType | BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType | BinaryType => inputObject case udt: UserDefinedType[_] => @@ -143,6 +143,7 @@ object RowEncoder { case _: MapType => ObjectType(classOf[scala.collection.Map[_, _]]) case _: StructType => ObjectType(classOf[Row]) case udt: UserDefinedType[_] => ObjectType(udt.userClass) +case _: NullType => ObjectType(classOf[java.lang.Object]) } private def constructorFor(schema: StructType): Expression = { @@ -158,7 +159,7 @@ object RowEncoder { } private def constructorFor(input: Expression): Expression = input.dataType match { -case BooleanType | ByteType | ShortType | IntegerType | LongType | +case NullType | BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType | BinaryType => input case udt: UserDefinedType[_] => http://git-wip-us.apache.org/repos/asf/spark/blob/426004a9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala index ef7399e..82317d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala @@ -369,6 +369,9 @@ case class MapObjects( private lazy val completeFunction = function(loopAttribute) private def itemAccessorMethod(dataType: DataType): String => String = dataType match { +case NullType => + val nullTypeClassName = NullType.getClass.getName + ".MODULE$" + (i: String) => s".get($i, $nullTypeClassName)" case IntegerType => (i: String) => s".getInt($i)" case LongType => (i: String) => s".getLong($i)" case FloatType => (i: String) => s".getFloat($i)" http://git-wip-us.apache.org/repos/asf/spark/blob/426004a9/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala index 46c6e0d..0ea51ec 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala @@ -80,11 +80,13 @@ class RowEncoderSuite extends SparkFunSuite { private val structOfString = new StructType().add("str", StringType) private val structOfUDT = new StructType().add("udt", new ExamplePointUDT, false) private val arrayOf
spark git commit: [SPARK-11908][SQL] Add NullType support to RowEncoder
Repository: spark Updated Branches: refs/heads/branch-1.6 8c718a577 -> b004a104f [SPARK-11908][SQL] Add NullType support to RowEncoder JIRA: https://issues.apache.org/jira/browse/SPARK-11908 We should add NullType support to RowEncoder. Author: Liang-Chi Hsieh Closes #9891 from viirya/rowencoder-nulltype. (cherry picked from commit 426004a9c9a864f90494d08601e6974709091a56) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b004a104 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b004a104 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b004a104 Branch: refs/heads/branch-1.6 Commit: b004a104f62849b393047aa8ea45542c871198e7 Parents: 8c718a5 Author: Liang-Chi Hsieh Authored: Sun Nov 22 10:36:47 2015 -0800 Committer: Michael Armbrust Committed: Sun Nov 22 10:38:20 2015 -0800 -- .../org/apache/spark/sql/catalyst/encoders/RowEncoder.scala | 5 +++-- .../org/apache/spark/sql/catalyst/expressions/objects.scala | 3 +++ .../apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala| 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b004a104/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala index 4cda482..fa553e7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala @@ -48,7 +48,7 @@ object RowEncoder { private def extractorsFor( inputObject: Expression, inputType: DataType): Expression = inputType match { -case BooleanType | ByteType | ShortType | IntegerType | LongType | +case NullType | BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType | BinaryType => inputObject case udt: UserDefinedType[_] => @@ -143,6 +143,7 @@ object RowEncoder { case _: MapType => ObjectType(classOf[scala.collection.Map[_, _]]) case _: StructType => ObjectType(classOf[Row]) case udt: UserDefinedType[_] => ObjectType(udt.userClass) +case _: NullType => ObjectType(classOf[java.lang.Object]) } private def constructorFor(schema: StructType): Expression = { @@ -158,7 +159,7 @@ object RowEncoder { } private def constructorFor(input: Expression): Expression = input.dataType match { -case BooleanType | ByteType | ShortType | IntegerType | LongType | +case NullType | BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType | BinaryType => input case udt: UserDefinedType[_] => http://git-wip-us.apache.org/repos/asf/spark/blob/b004a104/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala index ef7399e..82317d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala @@ -369,6 +369,9 @@ case class MapObjects( private lazy val completeFunction = function(loopAttribute) private def itemAccessorMethod(dataType: DataType): String => String = dataType match { +case NullType => + val nullTypeClassName = NullType.getClass.getName + ".MODULE$" + (i: String) => s".get($i, $nullTypeClassName)" case IntegerType => (i: String) => s".getInt($i)" case LongType => (i: String) => s".getLong($i)" case FloatType => (i: String) => s".getFloat($i)" http://git-wip-us.apache.org/repos/asf/spark/blob/b004a104/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala index 46c6e0d..0ea51ec 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala @@ -80,11 +80,13 @@ class RowEncoderSuite extends SparkFunSuite { private val structOfString = new StructType().add("str", StringTyp