spark git commit: [SPARK-20401][DOC] In the spark official configuration document, the 'spark.driver.supervise' configuration parameter specification and default values are necessary.
Repository: spark Updated Branches: refs/heads/branch-2.2 ff1f989f2 -> 6c2489c66 [SPARK-20401][DOC] In the spark official configuration document, the 'spark.driver.supervise' configuration parameter specification and default values are necessary. ## What changes were proposed in this pull request? Use the REST interface submits the spark job. e.g. curl -X POST http://10.43.183.120:6066/v1/submissions/create --header "Content-Type:application/json;charset=UTF-8" --data'{ "action": "CreateSubmissionRequest", "appArgs": [ "myAppArgument" ], "appResource": "/home/mr/gxl/test.jar", "clientSparkVersion": "2.2.0", "environmentVariables": { "SPARK_ENV_LOADED": "1" }, "mainClass": "cn.zte.HdfsTest", "sparkProperties": { "spark.jars": "/home/mr/gxl/test.jar", **"spark.driver.supervise": "true",** "spark.app.name": "HdfsTest", "spark.eventLog.enabled": "false", "spark.submit.deployMode": "cluster", "spark.master": "spark://10.43.183.120:6066" } }' **I hope that make sure that the driver is automatically restarted if it fails with non-zero exit code. But I can not find the 'spark.driver.supervise' configuration parameter specification and default values from the spark official document.** ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: éå°é¾ 10207633 Author: guoxiaolong Author: guoxiaolongzte Closes #17696 from guoxiaolongzte/SPARK-20401. (cherry picked from commit ad290402aa1d609abf5a2883a6d87fa8bc2bd517) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6c2489c6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6c2489c6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6c2489c6 Branch: refs/heads/branch-2.2 Commit: 6c2489c66682fdc6a886346ed980d95e6e5eefde Parents: ff1f989 Author: éå°é¾ 10207633 Authored: Fri Apr 21 20:08:26 2017 +0100 Committer: Sean Owen Committed: Fri Apr 21 20:08:34 2017 +0100 -- docs/configuration.md | 8 1 file changed, 8 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6c2489c6/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 2687f54..6b65d2b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -213,6 +213,14 @@ of the most common options to set are: and typically can have up to 50 characters. + + spark.driver.supervise + false + +If true, restarts the driver automatically if it fails with a non-zero exit status. +Only has effect in Spark standalone mode or Mesos cluster deploy mode. + + Apart from these, the following properties are also available, and may be useful in some situations: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20401][DOC] In the spark official configuration document, the 'spark.driver.supervise' configuration parameter specification and default values are necessary.
Repository: spark Updated Branches: refs/heads/master fd648bff6 -> ad290402a [SPARK-20401][DOC] In the spark official configuration document, the 'spark.driver.supervise' configuration parameter specification and default values are necessary. ## What changes were proposed in this pull request? Use the REST interface submits the spark job. e.g. curl -X POST http://10.43.183.120:6066/v1/submissions/create --header "Content-Type:application/json;charset=UTF-8" --data'{ "action": "CreateSubmissionRequest", "appArgs": [ "myAppArgument" ], "appResource": "/home/mr/gxl/test.jar", "clientSparkVersion": "2.2.0", "environmentVariables": { "SPARK_ENV_LOADED": "1" }, "mainClass": "cn.zte.HdfsTest", "sparkProperties": { "spark.jars": "/home/mr/gxl/test.jar", **"spark.driver.supervise": "true",** "spark.app.name": "HdfsTest", "spark.eventLog.enabled": "false", "spark.submit.deployMode": "cluster", "spark.master": "spark://10.43.183.120:6066" } }' **I hope that make sure that the driver is automatically restarted if it fails with non-zero exit code. But I can not find the 'spark.driver.supervise' configuration parameter specification and default values from the spark official document.** ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: éå°é¾ 10207633 Author: guoxiaolong Author: guoxiaolongzte Closes #17696 from guoxiaolongzte/SPARK-20401. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ad290402 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ad290402 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ad290402 Branch: refs/heads/master Commit: ad290402aa1d609abf5a2883a6d87fa8bc2bd517 Parents: fd648bf Author: éå°é¾ 10207633 Authored: Fri Apr 21 20:08:26 2017 +0100 Committer: Sean Owen Committed: Fri Apr 21 20:08:26 2017 +0100 -- docs/configuration.md | 8 1 file changed, 8 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ad290402/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 2687f54..6b65d2b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -213,6 +213,14 @@ of the most common options to set are: and typically can have up to 50 characters. + + spark.driver.supervise + false + +If true, restarts the driver automatically if it fails with a non-zero exit status. +Only has effect in Spark standalone mode or Mesos cluster deploy mode. + + Apart from these, the following properties are also available, and may be useful in some situations: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20371][R] Add wrappers for collect_list and collect_set
Repository: spark Updated Branches: refs/heads/master eb00378f0 -> fd648bff6 [SPARK-20371][R] Add wrappers for collect_list and collect_set ## What changes were proposed in this pull request? Adds wrappers for `collect_list` and `collect_set`. ## How was this patch tested? Unit tests, `check-cran.sh` Author: zero323 Closes #17672 from zero323/SPARK-20371. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd648bff Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd648bff Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd648bff Branch: refs/heads/master Commit: fd648bff63f91a30810910dfc5664eea0ff5e6f9 Parents: eb00378 Author: zero323 Authored: Fri Apr 21 12:06:21 2017 -0700 Committer: Felix Cheung Committed: Fri Apr 21 12:06:21 2017 -0700 -- R/pkg/NAMESPACE | 2 ++ R/pkg/R/functions.R | 40 ++ R/pkg/R/generics.R| 9 ++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 22 ++ 4 files changed, 73 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fd648bff/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index b6b559a..e804e30 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -203,6 +203,8 @@ exportMethods("%in%", "cbrt", "ceil", "ceiling", + "collect_list", + "collect_set", "column", "concat", "concat_ws", http://git-wip-us.apache.org/repos/asf/spark/blob/fd648bff/R/pkg/R/functions.R -- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index f854df1..e7decb9 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -3705,3 +3705,43 @@ setMethod("create_map", jc <- callJStatic("org.apache.spark.sql.functions", "map", jcols) column(jc) }) + +#' collect_list +#' +#' Creates a list of objects with duplicates. +#' +#' @param x Column to compute on +#' +#' @rdname collect_list +#' @name collect_list +#' @family agg_funcs +#' @aliases collect_list,Column-method +#' @export +#' @examples \dontrun{collect_list(df$x)} +#' @note collect_list since 2.3.0 +setMethod("collect_list", + signature(x = "Column"), + function(x) { +jc <- callJStatic("org.apache.spark.sql.functions", "collect_list", x@jc) +column(jc) + }) + +#' collect_set +#' +#' Creates a list of objects with duplicate elements eliminated. +#' +#' @param x Column to compute on +#' +#' @rdname collect_set +#' @name collect_set +#' @family agg_funcs +#' @aliases collect_set,Column-method +#' @export +#' @examples \dontrun{collect_set(df$x)} +#' @note collect_set since 2.3.0 +setMethod("collect_set", + signature(x = "Column"), + function(x) { +jc <- callJStatic("org.apache.spark.sql.functions", "collect_set", x@jc) +column(jc) + }) http://git-wip-us.apache.org/repos/asf/spark/blob/fd648bff/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index da46823..61d248e 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -918,6 +918,14 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") }) #' @export setGeneric("ceil", function(x) { standardGeneric("ceil") }) +#' @rdname collect_list +#' @export +setGeneric("collect_list", function(x) { standardGeneric("collect_list") }) + +#' @rdname collect_set +#' @export +setGeneric("collect_set", function(x) { standardGeneric("collect_set") }) + #' @rdname column #' @export setGeneric("column", function(x) { standardGeneric("column") }) @@ -1358,6 +1366,7 @@ setGeneric("window", function(x, ...) { standardGeneric("window") }) #' @export setGeneric("year", function(x) { standardGeneric("year") }) + ## Spark.ML Methods ## #' @rdname fitted http://git-wip-us.apache.org/repos/asf/spark/blob/fd648bff/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 9e87a47..bf2093f 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1731,6 +1731,28 @@ test_that("group by, agg functions", { expect_true(abs(sd(1:2) - 0.7071068) < 1e-6) expect_true(abs(var(1:5, 1:5) - 2.5) < 1e-6) + # Test collect_list and collect_set + gd3_collections_local <- collect( +agg(gd3, collect_set(df8$age),
spark git commit: [SPARK-20423][ML] fix MLOR coeffs centering when reg == 0
Repository: spark Updated Branches: refs/heads/branch-2.2 adaa3f7e0 -> ff1f989f2 [SPARK-20423][ML] fix MLOR coeffs centering when reg == 0 ## What changes were proposed in this pull request? When reg == 0, MLOR has multiple solutions and we need to centralize the coeffs to get identical result. BUT current implementation centralize the `coefficientMatrix` by the global coeffs means. In fact the `coefficientMatrix` should be centralized on each feature index itself. Because, according to the MLOR probability distribution function, it can be proven easily that: suppose `{ w0, w1, .. w(K-1) }` make up the `coefficientMatrix`, then `{ w0 + c, w1 + c, ... w(K - 1) + c}` will also be the equivalent solution. `c` is an arbitrary vector of `numFeatures` dimension. reference https://core.ac.uk/download/pdf/6287975.pdf So that we need to centralize the `coefficientMatrix` on each feature dimension separately. **We can also confirm this through R library `glmnet`, that MLOR in `glmnet` always generate coefficients result that the sum of each dimension is all `zero`, when reg == 0.** ## How was this patch tested? Tests added. Author: WeichenXu Closes #17706 from WeichenXu123/mlor_center. (cherry picked from commit eb00378f0eed6afbf328ae6cd541cc202d14c1f0) Signed-off-by: DB Tsai Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ff1f989f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ff1f989f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ff1f989f Branch: refs/heads/branch-2.2 Commit: ff1f989f29c08bb5297f3aa35f30ff06e0cb8046 Parents: adaa3f7 Author: WeichenXu Authored: Fri Apr 21 17:58:13 2017 + Committer: DB Tsai Committed: Fri Apr 21 17:58:33 2017 + -- .../spark/ml/classification/LogisticRegression.scala | 11 --- .../ml/classification/LogisticRegressionSuite.scala | 6 ++ 2 files changed, 14 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ff1f989f/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 965ce3d..bc81546 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -609,9 +609,14 @@ class LogisticRegression @Since("1.2.0") ( Friedman, et al. "Regularization Paths for Generalized Linear Models via Coordinate Descent," https://core.ac.uk/download/files/153/6287975.pdf */ - val denseValues = denseCoefficientMatrix.values - val coefficientMean = denseValues.sum / denseValues.length - denseCoefficientMatrix.update(_ - coefficientMean) + val centers = Array.fill(numFeatures)(0.0) + denseCoefficientMatrix.foreachActive { case (i, j, v) => +centers(j) += v + } + centers.transform(_ / numCoefficientSets) + denseCoefficientMatrix.foreachActive { case (i, j, v) => +denseCoefficientMatrix.update(i, j, v - centers(j)) + } } // center the intercepts when using multinomial algorithm http://git-wip-us.apache.org/repos/asf/spark/blob/ff1f989f/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index c858b9b..83f575e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -1139,6 +1139,9 @@ class LogisticRegressionSuite 0.10095851, -0.85897154, 0.08392798, 0.07904499), isTransposed = true) val interceptsR = Vectors.dense(-2.10320093, 0.3394473, 1.76375361) +model1.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps)) +model2.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps)) + assert(model1.coefficientMatrix ~== coefficientsR relTol 0.05) assert(model1.coefficientMatrix.toArray.sum ~== 0.0 absTol eps) assert(model1.interceptVector ~== interceptsR relTol 0.05) @@ -1204,6 +1207,9 @@ class LogisticRegressionSuite -0.3180040, 0.9679074, -0.2252219, -0.4319914, 0.2452411, -0.6046524, 0.1050710, 0.1180180), isTrans
spark git commit: [SPARK-20423][ML] fix MLOR coeffs centering when reg == 0
Repository: spark Updated Branches: refs/heads/master a750a5959 -> eb00378f0 [SPARK-20423][ML] fix MLOR coeffs centering when reg == 0 ## What changes were proposed in this pull request? When reg == 0, MLOR has multiple solutions and we need to centralize the coeffs to get identical result. BUT current implementation centralize the `coefficientMatrix` by the global coeffs means. In fact the `coefficientMatrix` should be centralized on each feature index itself. Because, according to the MLOR probability distribution function, it can be proven easily that: suppose `{ w0, w1, .. w(K-1) }` make up the `coefficientMatrix`, then `{ w0 + c, w1 + c, ... w(K - 1) + c}` will also be the equivalent solution. `c` is an arbitrary vector of `numFeatures` dimension. reference https://core.ac.uk/download/pdf/6287975.pdf So that we need to centralize the `coefficientMatrix` on each feature dimension separately. **We can also confirm this through R library `glmnet`, that MLOR in `glmnet` always generate coefficients result that the sum of each dimension is all `zero`, when reg == 0.** ## How was this patch tested? Tests added. Author: WeichenXu Closes #17706 from WeichenXu123/mlor_center. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eb00378f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eb00378f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eb00378f Branch: refs/heads/master Commit: eb00378f0eed6afbf328ae6cd541cc202d14c1f0 Parents: a750a59 Author: WeichenXu Authored: Fri Apr 21 17:58:13 2017 + Committer: DB Tsai Committed: Fri Apr 21 17:58:13 2017 + -- .../spark/ml/classification/LogisticRegression.scala | 11 --- .../ml/classification/LogisticRegressionSuite.scala | 6 ++ 2 files changed, 14 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/eb00378f/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 965ce3d..bc81546 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -609,9 +609,14 @@ class LogisticRegression @Since("1.2.0") ( Friedman, et al. "Regularization Paths for Generalized Linear Models via Coordinate Descent," https://core.ac.uk/download/files/153/6287975.pdf */ - val denseValues = denseCoefficientMatrix.values - val coefficientMean = denseValues.sum / denseValues.length - denseCoefficientMatrix.update(_ - coefficientMean) + val centers = Array.fill(numFeatures)(0.0) + denseCoefficientMatrix.foreachActive { case (i, j, v) => +centers(j) += v + } + centers.transform(_ / numCoefficientSets) + denseCoefficientMatrix.foreachActive { case (i, j, v) => +denseCoefficientMatrix.update(i, j, v - centers(j)) + } } // center the intercepts when using multinomial algorithm http://git-wip-us.apache.org/repos/asf/spark/blob/eb00378f/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index c858b9b..83f575e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -1139,6 +1139,9 @@ class LogisticRegressionSuite 0.10095851, -0.85897154, 0.08392798, 0.07904499), isTransposed = true) val interceptsR = Vectors.dense(-2.10320093, 0.3394473, 1.76375361) +model1.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps)) +model2.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps)) + assert(model1.coefficientMatrix ~== coefficientsR relTol 0.05) assert(model1.coefficientMatrix.toArray.sum ~== 0.0 absTol eps) assert(model1.interceptVector ~== interceptsR relTol 0.05) @@ -1204,6 +1207,9 @@ class LogisticRegressionSuite -0.3180040, 0.9679074, -0.2252219, -0.4319914, 0.2452411, -0.6046524, 0.1050710, 0.1180180), isTransposed = true) +model1.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTo
spark git commit: [SPARK-20341][SQL] Support BigInt's value that does not fit in long value range
Repository: spark Updated Branches: refs/heads/branch-2.2 aaeca8bdd -> adaa3f7e0 [SPARK-20341][SQL] Support BigInt's value that does not fit in long value range ## What changes were proposed in this pull request? This PR avoids an exception in the case where `scala.math.BigInt` has a value that does not fit into long value range (e.g. `Long.MAX_VALUE+1`). When we run the following code by using the current Spark, the following exception is thrown. This PR keeps the value using `BigDecimal` if we detect such an overflow case by catching `ArithmeticException`. Sample program: ``` case class BigIntWrapper(value:scala.math.BigInt)``` spark.createDataset(BigIntWrapper(scala.math.BigInt("1002"))::Nil).show ``` Exception: ``` Error while encoding: java.lang.ArithmeticException: BigInteger out of long range staticinvoke(class org.apache.spark.sql.types.Decimal$, DecimalType(38,0), apply, assertnotnull(assertnotnull(input[0, org.apache.spark.sql.BigIntWrapper, true])).value, true) AS value#0 java.lang.RuntimeException: Error while encoding: java.lang.ArithmeticException: BigInteger out of long range staticinvoke(class org.apache.spark.sql.types.Decimal$, DecimalType(38,0), apply, assertnotnull(assertnotnull(input[0, org.apache.spark.sql.BigIntWrapper, true])).value, true) AS value#0 at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) at org.apache.spark.sql.SparkSession$$anonfun$2.apply(SparkSession.scala:454) at org.apache.spark.sql.SparkSession$$anonfun$2.apply(SparkSession.scala:454) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.immutable.List.foreach(List.scala:381) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.immutable.List.map(List.scala:285) at org.apache.spark.sql.SparkSession.createDataset(SparkSession.scala:454) at org.apache.spark.sql.Agg$$anonfun$18.apply$mcV$sp(MySuite.scala:192) at org.apache.spark.sql.Agg$$anonfun$18.apply(MySuite.scala:192) at org.apache.spark.sql.Agg$$anonfun$18.apply(MySuite.scala:192) at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166) at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:68) at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175) ... Caused by: java.lang.ArithmeticException: BigInteger out of long range at java.math.BigInteger.longValueExact(BigInteger.java:4531) at org.apache.spark.sql.types.Decimal.set(Decimal.scala:140) at org.apache.spark.sql.types.Decimal$.apply(Decimal.scala:434) at org.apache.spark.sql.types.Decimal.apply(Decimal.scala) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:287) ... 59 more ``` ## How was this patch tested? Add new test suite into `DecimalSuite` Author: Kazuaki Ishizaki Closes #17684 from kiszk/SPARK-20341. (cherry picked from commit a750a595976791cb8a77063f690ea8f82ea75a8f) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/adaa3f7e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/adaa3f7e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/adaa3f7e Branch: refs/heads/branch-2.2 Commit: adaa3f7e027338522e8a71ea40b3237d5889a30d Parents: aaeca8b Author: Kazuaki Ishizaki Authored: Fri Apr 21 22:25:35 2017 +0800 Committer: Wenchen Fan Committed: Fri Apr 21 22:26:05 2017 +0800 -- .../org/apache/spark/sql/types/Decimal.scala| 20 ++-- .../apache/spark/sql/types/DecimalSuite.scala | 6 ++ 2 files changed, 20 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/adaa3f7e/sql/catalyst/src/main/scala/org/apache/spark/sql/typ
spark git commit: [SPARK-20341][SQL] Support BigInt's value that does not fit in long value range
Repository: spark Updated Branches: refs/heads/master c9e6035e1 -> a750a5959 [SPARK-20341][SQL] Support BigInt's value that does not fit in long value range ## What changes were proposed in this pull request? This PR avoids an exception in the case where `scala.math.BigInt` has a value that does not fit into long value range (e.g. `Long.MAX_VALUE+1`). When we run the following code by using the current Spark, the following exception is thrown. This PR keeps the value using `BigDecimal` if we detect such an overflow case by catching `ArithmeticException`. Sample program: ``` case class BigIntWrapper(value:scala.math.BigInt)``` spark.createDataset(BigIntWrapper(scala.math.BigInt("1002"))::Nil).show ``` Exception: ``` Error while encoding: java.lang.ArithmeticException: BigInteger out of long range staticinvoke(class org.apache.spark.sql.types.Decimal$, DecimalType(38,0), apply, assertnotnull(assertnotnull(input[0, org.apache.spark.sql.BigIntWrapper, true])).value, true) AS value#0 java.lang.RuntimeException: Error while encoding: java.lang.ArithmeticException: BigInteger out of long range staticinvoke(class org.apache.spark.sql.types.Decimal$, DecimalType(38,0), apply, assertnotnull(assertnotnull(input[0, org.apache.spark.sql.BigIntWrapper, true])).value, true) AS value#0 at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) at org.apache.spark.sql.SparkSession$$anonfun$2.apply(SparkSession.scala:454) at org.apache.spark.sql.SparkSession$$anonfun$2.apply(SparkSession.scala:454) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.immutable.List.foreach(List.scala:381) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.immutable.List.map(List.scala:285) at org.apache.spark.sql.SparkSession.createDataset(SparkSession.scala:454) at org.apache.spark.sql.Agg$$anonfun$18.apply$mcV$sp(MySuite.scala:192) at org.apache.spark.sql.Agg$$anonfun$18.apply(MySuite.scala:192) at org.apache.spark.sql.Agg$$anonfun$18.apply(MySuite.scala:192) at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166) at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:68) at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175) ... Caused by: java.lang.ArithmeticException: BigInteger out of long range at java.math.BigInteger.longValueExact(BigInteger.java:4531) at org.apache.spark.sql.types.Decimal.set(Decimal.scala:140) at org.apache.spark.sql.types.Decimal$.apply(Decimal.scala:434) at org.apache.spark.sql.types.Decimal.apply(Decimal.scala) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:287) ... 59 more ``` ## How was this patch tested? Add new test suite into `DecimalSuite` Author: Kazuaki Ishizaki Closes #17684 from kiszk/SPARK-20341. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a750a595 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a750a595 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a750a595 Branch: refs/heads/master Commit: a750a595976791cb8a77063f690ea8f82ea75a8f Parents: c9e6035 Author: Kazuaki Ishizaki Authored: Fri Apr 21 22:25:35 2017 +0800 Committer: Wenchen Fan Committed: Fri Apr 21 22:25:35 2017 +0800 -- .../org/apache/spark/sql/types/Decimal.scala| 20 ++-- .../apache/spark/sql/types/DecimalSuite.scala | 6 ++ 2 files changed, 20 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a750a595/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala -- diff --git a/sql/
spark git commit: [SPARK-20412] Throw ParseException from visitNonOptionalPartitionSpec instead of returning null values.
Repository: spark Updated Branches: refs/heads/branch-2.2 eb4d097c3 -> aaeca8bdd [SPARK-20412] Throw ParseException from visitNonOptionalPartitionSpec instead of returning null values. ## What changes were proposed in this pull request? If a partitionSpec is supposed to not contain optional values, a ParseException should be thrown, and not nulls returned. The nulls can later cause NullPointerExceptions in places not expecting them. ## How was this patch tested? A query like "SHOW PARTITIONS tbl PARTITION(col1='val1', col2)" used to throw a NullPointerException. Now it throws a ParseException. Author: Juliusz Sompolski Closes #17707 from juliuszsompolski/SPARK-20412. (cherry picked from commit c9e6035e1fb825d280eaec3bdfc1e4d362897ffd) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aaeca8bd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aaeca8bd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aaeca8bd Branch: refs/heads/branch-2.2 Commit: aaeca8bdd4bbbad5a14e1030e1d7ecf4836e8a5d Parents: eb4d097 Author: Juliusz Sompolski Authored: Fri Apr 21 22:11:24 2017 +0800 Committer: Wenchen Fan Committed: Fri Apr 21 22:20:55 2017 +0800 -- .../spark/sql/catalyst/parser/AstBuilder.scala | 5 - .../sql/execution/command/DDLCommandSuite.scala | 16 2 files changed, 16 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/aaeca8bd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index e1db1ef..2cf06d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -215,7 +215,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { */ protected def visitNonOptionalPartitionSpec( ctx: PartitionSpecContext): Map[String, String] = withOrigin(ctx) { -visitPartitionSpec(ctx).mapValues(_.orNull).map(identity) +visitPartitionSpec(ctx).map { + case (key, None) => throw new ParseException(s"Found an empty partition key '$key'.", ctx) + case (key, Some(value)) => key -> value +} } /** http://git-wip-us.apache.org/repos/asf/spark/blob/aaeca8bd/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala index 97c61dc..8a6bc62 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala @@ -530,13 +530,13 @@ class DDLCommandSuite extends PlanTest { """.stripMargin val sql4 = """ - |ALTER TABLE table_name PARTITION (test, dt='2008-08-08', + |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08', |country='us') SET SERDE 'org.apache.class' WITH SERDEPROPERTIES ('columns'='foo,bar', |'field.delim' = ',') """.stripMargin val sql5 = """ - |ALTER TABLE table_name PARTITION (test, dt='2008-08-08', + |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08', |country='us') SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',') """.stripMargin val parsed1 = parser.parsePlan(sql1) @@ -558,12 +558,12 @@ class DDLCommandSuite extends PlanTest { tableIdent, Some("org.apache.class"), Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), - Some(Map("test" -> null, "dt" -> "2008-08-08", "country" -> "us"))) + Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us"))) val expected5 = AlterTableSerDePropertiesCommand( tableIdent, None, Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), - Some(Map("test" -> null, "dt" -> "2008-08-08", "country" -> "us"))) + Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us"))) comparePlans(parsed1, expected1) comparePlans(parsed2, expected2) comparePlans(parsed3, expected3) @@ -832,6 +832,14 @@ class DDLCommandSuite extends PlanTest { assert(e.contains("Found duplicate keys 'a'")) } + test("empty values in non-optional partition specs") { +val e = intercept[ParseException] { + par
spark git commit: [SPARK-20412] Throw ParseException from visitNonOptionalPartitionSpec instead of returning null values.
Repository: spark Updated Branches: refs/heads/master 34767997e -> c9e6035e1 [SPARK-20412] Throw ParseException from visitNonOptionalPartitionSpec instead of returning null values. ## What changes were proposed in this pull request? If a partitionSpec is supposed to not contain optional values, a ParseException should be thrown, and not nulls returned. The nulls can later cause NullPointerExceptions in places not expecting them. ## How was this patch tested? A query like "SHOW PARTITIONS tbl PARTITION(col1='val1', col2)" used to throw a NullPointerException. Now it throws a ParseException. Author: Juliusz Sompolski Closes #17707 from juliuszsompolski/SPARK-20412. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c9e6035e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c9e6035e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c9e6035e Branch: refs/heads/master Commit: c9e6035e1fb825d280eaec3bdfc1e4d362897ffd Parents: 3476799 Author: Juliusz Sompolski Authored: Fri Apr 21 22:11:24 2017 +0800 Committer: Wenchen Fan Committed: Fri Apr 21 22:11:24 2017 +0800 -- .../spark/sql/catalyst/parser/AstBuilder.scala | 5 - .../sql/execution/command/DDLCommandSuite.scala | 16 2 files changed, 16 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c9e6035e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index e1db1ef..2cf06d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -215,7 +215,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { */ protected def visitNonOptionalPartitionSpec( ctx: PartitionSpecContext): Map[String, String] = withOrigin(ctx) { -visitPartitionSpec(ctx).mapValues(_.orNull).map(identity) +visitPartitionSpec(ctx).map { + case (key, None) => throw new ParseException(s"Found an empty partition key '$key'.", ctx) + case (key, Some(value)) => key -> value +} } /** http://git-wip-us.apache.org/repos/asf/spark/blob/c9e6035e/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala index 97c61dc..8a6bc62 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala @@ -530,13 +530,13 @@ class DDLCommandSuite extends PlanTest { """.stripMargin val sql4 = """ - |ALTER TABLE table_name PARTITION (test, dt='2008-08-08', + |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08', |country='us') SET SERDE 'org.apache.class' WITH SERDEPROPERTIES ('columns'='foo,bar', |'field.delim' = ',') """.stripMargin val sql5 = """ - |ALTER TABLE table_name PARTITION (test, dt='2008-08-08', + |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08', |country='us') SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',') """.stripMargin val parsed1 = parser.parsePlan(sql1) @@ -558,12 +558,12 @@ class DDLCommandSuite extends PlanTest { tableIdent, Some("org.apache.class"), Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), - Some(Map("test" -> null, "dt" -> "2008-08-08", "country" -> "us"))) + Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us"))) val expected5 = AlterTableSerDePropertiesCommand( tableIdent, None, Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), - Some(Map("test" -> null, "dt" -> "2008-08-08", "country" -> "us"))) + Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us"))) comparePlans(parsed1, expected1) comparePlans(parsed2, expected2) comparePlans(parsed3, expected3) @@ -832,6 +832,14 @@ class DDLCommandSuite extends PlanTest { assert(e.contains("Found duplicate keys 'a'")) } + test("empty values in non-optional partition specs") { +val e = intercept[ParseException] { + parser.parsePlan( +"SHOW PARTITIONS dbx.tab1 PARTITION (a='1', b)") +}.getMessage +assert(e.c
spark git commit: Small rewording about history server use case
Repository: spark Updated Branches: refs/heads/branch-2.1 66e7a8f1d -> fb0351a3f Small rewording about history server use case Hello PR #10991 removed the built-in history view from Spark Standalone, so the history server is no longer useful to Yarn or Mesos only. Author: Hervé Closes #17709 from dud225/patch-1. (cherry picked from commit 34767997e0c6cb28e1fac8cb650fa3511f260ca5) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fb0351a3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fb0351a3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fb0351a3 Branch: refs/heads/branch-2.1 Commit: fb0351a3f76b535c7132f107cc8ea94923d51fd7 Parents: 66e7a8f Author: Hervé Authored: Fri Apr 21 08:52:18 2017 +0100 Committer: Sean Owen Committed: Fri Apr 21 08:52:54 2017 +0100 -- docs/monitoring.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fb0351a3/docs/monitoring.md -- diff --git a/docs/monitoring.md b/docs/monitoring.md index 077af08..8583213 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -27,8 +27,8 @@ in the UI to persisted storage. ## Viewing After the Fact -If Spark is run on Mesos or YARN, it is still possible to construct the UI of an -application through Spark's history server, provided that the application's event logs exist. +It is still possible to construct the UI of an application through Spark's history server, +provided that the application's event logs exist. You can start the history server by executing: ./sbin/start-history-server.sh - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Small rewording about history server use case
Repository: spark Updated Branches: refs/heads/branch-2.2 cddb4b7db -> eb4d097c3 Small rewording about history server use case Hello PR #10991 removed the built-in history view from Spark Standalone, so the history server is no longer useful to Yarn or Mesos only. Author: Hervé Closes #17709 from dud225/patch-1. (cherry picked from commit 34767997e0c6cb28e1fac8cb650fa3511f260ca5) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eb4d097c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eb4d097c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eb4d097c Branch: refs/heads/branch-2.2 Commit: eb4d097c3c73d1aaf4cd9e17193a6b06ba273429 Parents: cddb4b7 Author: Hervé Authored: Fri Apr 21 08:52:18 2017 +0100 Committer: Sean Owen Committed: Fri Apr 21 08:52:28 2017 +0100 -- docs/monitoring.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/eb4d097c/docs/monitoring.md -- diff --git a/docs/monitoring.md b/docs/monitoring.md index da95438..3e577c5 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -27,8 +27,8 @@ in the UI to persisted storage. ## Viewing After the Fact -If Spark is run on Mesos or YARN, it is still possible to construct the UI of an -application through Spark's history server, provided that the application's event logs exist. +It is still possible to construct the UI of an application through Spark's history server, +provided that the application's event logs exist. You can start the history server by executing: ./sbin/start-history-server.sh - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Small rewording about history server use case
Repository: spark Updated Branches: refs/heads/master e2b3d2367 -> 34767997e Small rewording about history server use case Hello PR #10991 removed the built-in history view from Spark Standalone, so the history server is no longer useful to Yarn or Mesos only. Author: Hervé Closes #17709 from dud225/patch-1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/34767997 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/34767997 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/34767997 Branch: refs/heads/master Commit: 34767997e0c6cb28e1fac8cb650fa3511f260ca5 Parents: e2b3d23 Author: Hervé Authored: Fri Apr 21 08:52:18 2017 +0100 Committer: Sean Owen Committed: Fri Apr 21 08:52:18 2017 +0100 -- docs/monitoring.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/34767997/docs/monitoring.md -- diff --git a/docs/monitoring.md b/docs/monitoring.md index da95438..3e577c5 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -27,8 +27,8 @@ in the UI to persisted storage. ## Viewing After the Fact -If Spark is run on Mesos or YARN, it is still possible to construct the UI of an -application through Spark's history server, provided that the application's event logs exist. +It is still possible to construct the UI of an application through Spark's history server, +provided that the application's event logs exist. You can start the history server by executing: ./sbin/start-history-server.sh - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20420][SQL] Add events to the external catalog
Repository: spark Updated Branches: refs/heads/master 48d760d02 -> e2b3d2367 [SPARK-20420][SQL] Add events to the external catalog ## What changes were proposed in this pull request? It is often useful to be able to track changes to the `ExternalCatalog`. This PR makes the `ExternalCatalog` emit events when a catalog object is changed. Events are fired before and after the change. The following events are fired per object: - Database - CreateDatabasePreEvent: event fired before the database is created. - CreateDatabaseEvent: event fired after the database has been created. - DropDatabasePreEvent: event fired before the database is dropped. - DropDatabaseEvent: event fired after the database has been dropped. - Table - CreateTablePreEvent: event fired before the table is created. - CreateTableEvent: event fired after the table has been created. - RenameTablePreEvent: event fired before the table is renamed. - RenameTableEvent: event fired after the table has been renamed. - DropTablePreEvent: event fired before the table is dropped. - DropTableEvent: event fired after the table has been dropped. - Function - CreateFunctionPreEvent: event fired before the function is created. - CreateFunctionEvent: event fired after the function has been created. - RenameFunctionPreEvent: event fired before the function is renamed. - RenameFunctionEvent: event fired after the function has been renamed. - DropFunctionPreEvent: event fired before the function is dropped. - DropFunctionPreEvent: event fired after the function has been dropped. The current events currently only contain the names of the object modified. We add more events, and more details at a later point. A user can monitor changes to the external catalog by adding a listener to the Spark listener bus checking for `ExternalCatalogEvent`s using the `SparkListener.onOtherEvent` hook. A more direct approach is add listener directly to the `ExternalCatalog`. ## How was this patch tested? Added the `ExternalCatalogEventSuite`. Author: Herman van Hovell Closes #17710 from hvanhovell/SPARK-20420. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e2b3d236 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e2b3d236 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e2b3d236 Branch: refs/heads/master Commit: e2b3d2367a563d4600d8d87b5317e71135c362f0 Parents: 48d760d Author: Herman van Hovell Authored: Fri Apr 21 00:05:03 2017 -0700 Committer: Reynold Xin Committed: Fri Apr 21 00:05:03 2017 -0700 -- .../sql/catalyst/catalog/ExternalCatalog.scala | 85 - .../sql/catalyst/catalog/InMemoryCatalog.scala | 22 ++- .../spark/sql/catalyst/catalog/events.scala | 158 .../catalog/ExternalCatalogEventSuite.scala | 188 +++ .../apache/spark/sql/internal/SharedState.scala | 7 + .../spark/sql/hive/HiveExternalCatalog.scala| 22 ++- 6 files changed, 457 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e2b3d236/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala index 08a01e8..974ef90 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.catalog import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchTableException} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.types.StructType +import org.apache.spark.util.ListenerBus /** * Interface for the system catalog (of functions, partitions, tables, and databases). @@ -30,7 +31,8 @@ import org.apache.spark.sql.types.StructType * * Implementations should throw [[NoSuchDatabaseException]] when databases don't exist. */ -abstract class ExternalCatalog { +abstract class ExternalCatalog + extends ListenerBus[ExternalCatalogEventListener, ExternalCatalogEvent] { import CatalogTypes.TablePartitionSpec protected def requireDbExists(db: String): Unit = { @@ -61,9 +63,22 @@ abstract class ExternalCatalog { // Databases // -- - def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit + final def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExis
spark git commit: [SPARK-20420][SQL] Add events to the external catalog
Repository: spark Updated Branches: refs/heads/branch-2.2 6cd2f16b1 -> cddb4b7db [SPARK-20420][SQL] Add events to the external catalog ## What changes were proposed in this pull request? It is often useful to be able to track changes to the `ExternalCatalog`. This PR makes the `ExternalCatalog` emit events when a catalog object is changed. Events are fired before and after the change. The following events are fired per object: - Database - CreateDatabasePreEvent: event fired before the database is created. - CreateDatabaseEvent: event fired after the database has been created. - DropDatabasePreEvent: event fired before the database is dropped. - DropDatabaseEvent: event fired after the database has been dropped. - Table - CreateTablePreEvent: event fired before the table is created. - CreateTableEvent: event fired after the table has been created. - RenameTablePreEvent: event fired before the table is renamed. - RenameTableEvent: event fired after the table has been renamed. - DropTablePreEvent: event fired before the table is dropped. - DropTableEvent: event fired after the table has been dropped. - Function - CreateFunctionPreEvent: event fired before the function is created. - CreateFunctionEvent: event fired after the function has been created. - RenameFunctionPreEvent: event fired before the function is renamed. - RenameFunctionEvent: event fired after the function has been renamed. - DropFunctionPreEvent: event fired before the function is dropped. - DropFunctionPreEvent: event fired after the function has been dropped. The current events currently only contain the names of the object modified. We add more events, and more details at a later point. A user can monitor changes to the external catalog by adding a listener to the Spark listener bus checking for `ExternalCatalogEvent`s using the `SparkListener.onOtherEvent` hook. A more direct approach is add listener directly to the `ExternalCatalog`. ## How was this patch tested? Added the `ExternalCatalogEventSuite`. Author: Herman van Hovell Closes #17710 from hvanhovell/SPARK-20420. (cherry picked from commit e2b3d2367a563d4600d8d87b5317e71135c362f0) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cddb4b7d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cddb4b7d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cddb4b7d Branch: refs/heads/branch-2.2 Commit: cddb4b7db81b01b4abf2ab683aba97e4eabb9769 Parents: 6cd2f16 Author: Herman van Hovell Authored: Fri Apr 21 00:05:03 2017 -0700 Committer: Reynold Xin Committed: Fri Apr 21 00:05:10 2017 -0700 -- .../sql/catalyst/catalog/ExternalCatalog.scala | 85 - .../sql/catalyst/catalog/InMemoryCatalog.scala | 22 ++- .../spark/sql/catalyst/catalog/events.scala | 158 .../catalog/ExternalCatalogEventSuite.scala | 188 +++ .../apache/spark/sql/internal/SharedState.scala | 7 + .../spark/sql/hive/HiveExternalCatalog.scala| 22 ++- 6 files changed, 457 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cddb4b7d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala index 08a01e8..974ef90 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.catalog import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchTableException} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.types.StructType +import org.apache.spark.util.ListenerBus /** * Interface for the system catalog (of functions, partitions, tables, and databases). @@ -30,7 +31,8 @@ import org.apache.spark.sql.types.StructType * * Implementations should throw [[NoSuchDatabaseException]] when databases don't exist. */ -abstract class ExternalCatalog { +abstract class ExternalCatalog + extends ListenerBus[ExternalCatalogEventListener, ExternalCatalogEvent] { import CatalogTypes.TablePartitionSpec protected def requireDbExists(db: String): Unit = { @@ -61,9 +63,22 @@ abstract class ExternalCatalog { // Databases // -- - def createDatabase(dbDefinition: CatalogDatabas