svn commit: r26911 - in /dev/spark/2.3.2-SNAPSHOT-2018_05_14_22_01-a886dc2-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue May 15 05:16:03 2018 New Revision: 26911 Log: Apache Spark 2.3.2-SNAPSHOT-2018_05_14_22_01-a886dc2 docs [This commit notification would consist of 1443 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26906 - in /dev/spark/2.4.0-SNAPSHOT-2018_05_14_20_01-9059f1e-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue May 15 03:16:11 2018 New Revision: 26906 Log: Apache Spark 2.4.0-SNAPSHOT-2018_05_14_20_01-9059f1e docs [This commit notification would consist of 1462 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23780][R] Failed to use googleVis library with new SparkR
Repository: spark Updated Branches: refs/heads/branch-2.3 eb7b37350 -> a886dc23a [SPARK-23780][R] Failed to use googleVis library with new SparkR ## What changes were proposed in this pull request? change generic to get it to work with googleVis also fix lintr ## How was this patch tested? manual test, unit tests Author: Felix CheungCloses #21315 from felixcheung/googvis. (cherry picked from commit 9059f1ee6ae13c8636c9b7fdbb708a349256fb8e) Signed-off-by: Felix Cheung Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a886dc23 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a886dc23 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a886dc23 Branch: refs/heads/branch-2.3 Commit: a886dc23aeae8a79f7bc4d1aeb47d545e5550604 Parents: eb7b373 Author: Felix Cheung Authored: Mon May 14 19:20:25 2018 -0700 Committer: Felix Cheung Committed: Mon May 14 19:21:04 2018 -0700 -- R/pkg/R/client.R | 5 +++-- R/pkg/R/generics.R | 2 +- R/pkg/R/sparkR.R | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a886dc23/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 14a17c6..4c87f64 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -63,7 +63,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack checkJavaVersion <- function() { javaBin <- "java" javaHome <- Sys.getenv("JAVA_HOME") - javaReqs <- utils::packageDescription(utils::packageName(), fields=c("SystemRequirements")) + javaReqs <- utils::packageDescription(utils::packageName(), fields = c("SystemRequirements")) sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 1L)) if (javaHome != "") { javaBin <- file.path(javaHome, "bin", javaBin) @@ -90,7 +90,8 @@ checkJavaVersion <- function() { # Extract 8 from it to compare to sparkJavaVersion javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2]) if (javaVersionNum != sparkJavaVersion) { -stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", javaVersionStr)) +stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", + javaVersionStr)) } } http://git-wip-us.apache.org/repos/asf/spark/blob/a886dc23/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index e0dde33..cffc9ab 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -762,7 +762,7 @@ setGeneric("summarize", function(x, ...) { standardGeneric("summarize") }) #' @export setGeneric("summary", function(object, ...) { standardGeneric("summary") }) -setGeneric("toJSON", function(x) { standardGeneric("toJSON") }) +setGeneric("toJSON", function(x, ...) { standardGeneric("toJSON") }) setGeneric("toRDD", function(x) { standardGeneric("toRDD") }) http://git-wip-us.apache.org/repos/asf/spark/blob/a886dc23/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 2cd8b0c..266fa46 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -197,7 +197,7 @@ sparkR.sparkContext <- function( # Don't use readString() so that we can provide a useful # error message if the R and Java versions are mismatched. -authSecretLen = readInt(f) +authSecretLen <- readInt(f) if (length(authSecretLen) == 0 || authSecretLen == 0) { stop("Unexpected EOF in JVM connection data. Mismatched versions?") } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23780][R] Failed to use googleVis library with new SparkR
Repository: spark Updated Branches: refs/heads/master 061e0084c -> 9059f1ee6 [SPARK-23780][R] Failed to use googleVis library with new SparkR ## What changes were proposed in this pull request? change generic to get it to work with googleVis also fix lintr ## How was this patch tested? manual test, unit tests Author: Felix CheungCloses #21315 from felixcheung/googvis. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9059f1ee Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9059f1ee Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9059f1ee Branch: refs/heads/master Commit: 9059f1ee6ae13c8636c9b7fdbb708a349256fb8e Parents: 061e008 Author: Felix Cheung Authored: Mon May 14 19:20:25 2018 -0700 Committer: Felix Cheung Committed: Mon May 14 19:20:25 2018 -0700 -- R/pkg/R/client.R | 5 +++-- R/pkg/R/generics.R | 2 +- R/pkg/R/sparkR.R | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9059f1ee/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 14a17c6..4c87f64 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -63,7 +63,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack checkJavaVersion <- function() { javaBin <- "java" javaHome <- Sys.getenv("JAVA_HOME") - javaReqs <- utils::packageDescription(utils::packageName(), fields=c("SystemRequirements")) + javaReqs <- utils::packageDescription(utils::packageName(), fields = c("SystemRequirements")) sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 1L)) if (javaHome != "") { javaBin <- file.path(javaHome, "bin", javaBin) @@ -90,7 +90,8 @@ checkJavaVersion <- function() { # Extract 8 from it to compare to sparkJavaVersion javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2]) if (javaVersionNum != sparkJavaVersion) { -stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", javaVersionStr)) +stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", + javaVersionStr)) } } http://git-wip-us.apache.org/repos/asf/spark/blob/9059f1ee/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 61da30b..3ea1811 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -624,7 +624,7 @@ setGeneric("summarize", function(x, ...) { standardGeneric("summarize") }) #' @rdname summary setGeneric("summary", function(object, ...) { standardGeneric("summary") }) -setGeneric("toJSON", function(x) { standardGeneric("toJSON") }) +setGeneric("toJSON", function(x, ...) { standardGeneric("toJSON") }) setGeneric("toRDD", function(x) { standardGeneric("toRDD") }) http://git-wip-us.apache.org/repos/asf/spark/blob/9059f1ee/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index d6a2d08..f7c1663 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -194,7 +194,7 @@ sparkR.sparkContext <- function( # Don't use readString() so that we can provide a useful # error message if the R and Java versions are mismatched. -authSecretLen = readInt(f) +authSecretLen <- readInt(f) if (length(authSecretLen) == 0 || authSecretLen == 0) { stop("Unexpected EOF in JVM connection data. Mismatched versions?") } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.3.1-rc1 [created] cc93bc959 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26904 - in /dev/spark/2.4.0-SNAPSHOT-2018_05_14_16_01-061e008-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon May 14 23:15:34 2018 New Revision: 26904 Log: Apache Spark 2.4.0-SNAPSHOT-2018_05_14_16_01-061e008 docs [This commit notification would consist of 1462 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23852][SQL] Add withSQLConf(...) to test case
Repository: spark Updated Branches: refs/heads/branch-2.3 a8ee5706a -> 6dfb51557 [SPARK-23852][SQL] Add withSQLConf(...) to test case ## What changes were proposed in this pull request? Add a `withSQLConf(...)` wrapper to force Parquet filter pushdown for a test that relies on it. ## How was this patch tested? Test passes Author: Henry RobinsonCloses #21323 from henryr/spark-23582. (cherry picked from commit 061e0084ce19c1384ba271a97a0aa1f87abe879d) Signed-off-by: Marcelo Vanzin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6dfb5155 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6dfb5155 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6dfb5155 Branch: refs/heads/branch-2.3 Commit: 6dfb515571b68a471509035287a46e431e48b73b Parents: a8ee570 Author: Henry Robinson Authored: Mon May 14 14:35:08 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 14:35:17 2018 -0700 -- .../datasources/parquet/ParquetFilterSuite.scala | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6dfb5155/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 79891af..f8d04b5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -604,13 +604,15 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } test("SPARK-23852: Broken Parquet push-down for partially-written stats") { -// parquet-1217.parquet contains a single column with values -1, 0, 1, 2 and null. -// The row-group statistics include null counts, but not min and max values, which -// triggers PARQUET-1217. -val df = readResourceParquetFile("test-data/parquet-1217.parquet") +withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> "true") { + // parquet-1217.parquet contains a single column with values -1, 0, 1, 2 and null. + // The row-group statistics include null counts, but not min and max values, which + // triggers PARQUET-1217. + val df = readResourceParquetFile("test-data/parquet-1217.parquet") -// Will return 0 rows if PARQUET-1217 is not fixed. -assert(df.where("col > 0").count() === 2) + // Will return 0 rows if PARQUET-1217 is not fixed. + assert(df.where("col > 0").count() === 2) +} } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23852][SQL] Add withSQLConf(...) to test case
Repository: spark Updated Branches: refs/heads/master 8cd83acf4 -> 061e0084c [SPARK-23852][SQL] Add withSQLConf(...) to test case ## What changes were proposed in this pull request? Add a `withSQLConf(...)` wrapper to force Parquet filter pushdown for a test that relies on it. ## How was this patch tested? Test passes Author: Henry RobinsonCloses #21323 from henryr/spark-23582. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/061e0084 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/061e0084 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/061e0084 Branch: refs/heads/master Commit: 061e0084ce19c1384ba271a97a0aa1f87abe879d Parents: 8cd83ac Author: Henry Robinson Authored: Mon May 14 14:35:08 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 14:35:08 2018 -0700 -- .../datasources/parquet/ParquetFilterSuite.scala | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/061e0084/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 4d0ecde..90da7eb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -650,13 +650,15 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } test("SPARK-23852: Broken Parquet push-down for partially-written stats") { -// parquet-1217.parquet contains a single column with values -1, 0, 1, 2 and null. -// The row-group statistics include null counts, but not min and max values, which -// triggers PARQUET-1217. -val df = readResourceParquetFile("test-data/parquet-1217.parquet") +withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> "true") { + // parquet-1217.parquet contains a single column with values -1, 0, 1, 2 and null. + // The row-group statistics include null counts, but not min and max values, which + // triggers PARQUET-1217. + val df = readResourceParquetFile("test-data/parquet-1217.parquet") -// Will return 0 rows if PARQUET-1217 is not fixed. -assert(df.where("col > 0").count() === 2) + // Will return 0 rows if PARQUET-1217 is not fixed. + assert(df.where("col > 0").count() === 2) +} } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26901 - in /dev/spark/2.3.1-SNAPSHOT-2018_05_14_14_01-2f60df0-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon May 14 21:15:26 2018 New Revision: 26901 Log: Apache Spark 2.3.1-SNAPSHOT-2018_05_14_14_01-2f60df0 docs [This commit notification would consist of 1443 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23852][SQL] Upgrade to Parquet 1.8.3
Repository: spark Updated Branches: refs/heads/branch-2.3 2f60df09d -> a8ee5706a [SPARK-23852][SQL] Upgrade to Parquet 1.8.3 ## What changes were proposed in this pull request? Upgrade Parquet dependency to 1.8.3 to avoid PARQUET-1217 ## How was this patch tested? Ran the included new test case. Author: Henry RobinsonCloses #21302 from henryr/branch-2.3. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8ee5706 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8ee5706 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8ee5706 Branch: refs/heads/branch-2.3 Commit: a8ee5706ad96be3d6501471d05f7c3d61d3ca38e Parents: 2f60df0 Author: Henry Robinson Authored: Mon May 14 14:05:32 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 14:05:32 2018 -0700 -- dev/deps/spark-deps-hadoop-2.6 | 10 +- dev/deps/spark-deps-hadoop-2.7 | 10 +- pom.xml | 2 +- .../test/resources/test-data/parquet-1217.parquet| Bin 0 -> 321 bytes .../datasources/parquet/ParquetFilterSuite.scala | 10 ++ 5 files changed, 21 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a8ee5706/dev/deps/spark-deps-hadoop-2.6 -- diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index 577bf43..f4559a8 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -161,13 +161,13 @@ orc-mapreduce-1.4.3-nohive.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar paranamer-2.8.jar -parquet-column-1.8.2.jar -parquet-common-1.8.2.jar -parquet-encoding-1.8.2.jar +parquet-column-1.8.3.jar +parquet-common-1.8.3.jar +parquet-encoding-1.8.3.jar parquet-format-2.3.1.jar -parquet-hadoop-1.8.2.jar +parquet-hadoop-1.8.3.jar parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.2.jar +parquet-jackson-1.8.3.jar protobuf-java-2.5.0.jar py4j-0.10.7.jar pyrolite-4.13.jar http://git-wip-us.apache.org/repos/asf/spark/blob/a8ee5706/dev/deps/spark-deps-hadoop-2.7 -- diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index 304982e..c2df998 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -162,13 +162,13 @@ orc-mapreduce-1.4.3-nohive.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar paranamer-2.8.jar -parquet-column-1.8.2.jar -parquet-common-1.8.2.jar -parquet-encoding-1.8.2.jar +parquet-column-1.8.3.jar +parquet-common-1.8.3.jar +parquet-encoding-1.8.3.jar parquet-format-2.3.1.jar -parquet-hadoop-1.8.2.jar +parquet-hadoop-1.8.3.jar parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.2.jar +parquet-jackson-1.8.3.jar protobuf-java-2.5.0.jar py4j-0.10.7.jar pyrolite-4.13.jar http://git-wip-us.apache.org/repos/asf/spark/blob/a8ee5706/pom.xml -- diff --git a/pom.xml b/pom.xml index 9c2d931..533c6b4 100644 --- a/pom.xml +++ b/pom.xml @@ -129,7 +129,7 @@ 1.2.1 10.12.1.1 -1.8.2 +1.8.3 1.4.3 nohive 1.6.0 http://git-wip-us.apache.org/repos/asf/spark/blob/a8ee5706/sql/core/src/test/resources/test-data/parquet-1217.parquet -- diff --git a/sql/core/src/test/resources/test-data/parquet-1217.parquet b/sql/core/src/test/resources/test-data/parquet-1217.parquet new file mode 100644 index 000..eb2dc4f Binary files /dev/null and b/sql/core/src/test/resources/test-data/parquet-1217.parquet differ http://git-wip-us.apache.org/repos/asf/spark/blob/a8ee5706/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 3380195..79891af 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -602,6 +602,16 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } } } + + test("SPARK-23852: Broken Parquet push-down for partially-written stats") { +// parquet-1217.parquet contains a single column with values -1, 0, 1, 2 and null. +// The row-group statistics include null
spark git commit: [SPARK-24027][SQL] Support MapType with StringType for keys as the root type by from_json
Repository: spark Updated Branches: refs/heads/master 075d678c8 -> 8cd83acf4 [SPARK-24027][SQL] Support MapType with StringType for keys as the root type by from_json ## What changes were proposed in this pull request? Currently, the from_json function support StructType or ArrayType as the root type. The PR allows to specify MapType(StringType, DataType) as the root type additionally to mentioned types. For example: ```scala import org.apache.spark.sql.types._ val schema = MapType(StringType, IntegerType) val in = Seq("""{"a": 1, "b": 2, "c": 3}""").toDS() in.select(from_json($"value", schema, Map[String, String]())).collect() ``` ``` res1: Array[org.apache.spark.sql.Row] = Array([Map(a -> 1, b -> 2, c -> 3)]) ``` ## How was this patch tested? It was checked by new tests for the map type with integer type and struct type as value types. Also roundtrip tests like from_json(to_json) and to_json(from_json) for MapType are added. Author: Maxim GekkAuthor: Maxim Gekk Closes #21108 from MaxGekk/from_json-map-type. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8cd83acf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8cd83acf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8cd83acf Branch: refs/heads/master Commit: 8cd83acf4075d369bfcf9e703760d4946ef15f00 Parents: 075d678 Author: Maxim Gekk Authored: Mon May 14 14:05:42 2018 -0700 Committer: gatorsmile Committed: Mon May 14 14:05:42 2018 -0700 -- python/pyspark/sql/functions.py | 10 ++- .../catalyst/expressions/jsonExpressions.scala | 10 ++- .../spark/sql/catalyst/json/JacksonParser.scala | 18 +- .../scala/org/apache/spark/sql/functions.scala | 29 - .../apache/spark/sql/JsonFunctionsSuite.scala | 66 5 files changed, 113 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8cd83acf/python/pyspark/sql/functions.py -- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index b62748e..6866c1c 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2095,12 +2095,13 @@ def json_tuple(col, *fields): return Column(jc) +@ignore_unicode_prefix @since(2.1) def from_json(col, schema, options={}): """ -Parses a column containing a JSON string into a :class:`StructType` or :class:`ArrayType` -of :class:`StructType`\\s with the specified schema. Returns `null`, in the case of an -unparseable string. +Parses a column containing a JSON string into a :class:`MapType` with :class:`StringType` +as keys type, :class:`StructType` or :class:`ArrayType` of :class:`StructType`\\s with +the specified schema. Returns `null`, in the case of an unparseable string. :param col: string column in json format :param schema: a StructType or ArrayType of StructType to use when parsing the json column. @@ -2117,6 +2118,9 @@ def from_json(col, schema, options={}): [Row(json=Row(a=1))] >>> df.select(from_json(df.value, "a INT").alias("json")).collect() [Row(json=Row(a=1))] +>>> schema = MapType(StringType(), IntegerType()) +>>> df.select(from_json(df.value, schema).alias("json")).collect() +[Row(json={u'a': 1})] >>> data = [(1, '''[{"a": 1}]''')] >>> schema = ArrayType(StructType([StructField("a", IntegerType())])) >>> df = spark.createDataFrame(data, ("key", "value")) http://git-wip-us.apache.org/repos/asf/spark/blob/8cd83acf/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 34161f0..04a4eb0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -548,7 +548,7 @@ case class JsonToStructs( forceNullableSchema = SQLConf.get.getConf(SQLConf.FROM_JSON_FORCE_NULLABLE_SCHEMA)) override def checkInputDataTypes(): TypeCheckResult = nullableSchema match { -case _: StructType | ArrayType(_: StructType, _) => +case _: StructType | ArrayType(_: StructType, _) | _: MapType => super.checkInputDataTypes() case _ => TypeCheckResult.TypeCheckFailure( s"Input schema ${nullableSchema.simpleString} must be a struct or an array of structs.") @@ -558,6 +558,7 @@
spark git commit: [SPARK-24155][ML] Instrumentation improvements for clustering
Repository: spark Updated Branches: refs/heads/master c26f67325 -> 075d678c8 [SPARK-24155][ML] Instrumentation improvements for clustering ## What changes were proposed in this pull request? changed the instrument for all of the clustering methods ## How was this patch tested? N/A Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Lu WANGCloses #21218 from ludatabricks/SPARK-23686-1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/075d678c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/075d678c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/075d678c Branch: refs/heads/master Commit: 075d678c8844614910b50abca07282bde31ef7e0 Parents: c26f673 Author: Lu WANG Authored: Mon May 14 13:35:54 2018 -0700 Committer: Xiangrui Meng Committed: Mon May 14 13:35:54 2018 -0700 -- .../org/apache/spark/ml/clustering/BisectingKMeans.scala | 7 +-- .../org/apache/spark/ml/clustering/GaussianMixture.scala | 5 - .../main/scala/org/apache/spark/ml/clustering/KMeans.scala| 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/075d678c/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index 438e53b..1ad4e09 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -261,8 +261,9 @@ class BisectingKMeans @Since("2.0.0") ( transformSchema(dataset.schema, logging = true) val rdd = DatasetUtils.columnToOldVector(dataset, getFeaturesCol) -val instr = Instrumentation.create(this, rdd) -instr.logParams(featuresCol, predictionCol, k, maxIter, seed, minDivisibleClusterSize) +val instr = Instrumentation.create(this, dataset) +instr.logParams(featuresCol, predictionCol, k, maxIter, seed, + minDivisibleClusterSize, distanceMeasure) val bkm = new MLlibBisectingKMeans() .setK($(k)) @@ -275,6 +276,8 @@ class BisectingKMeans @Since("2.0.0") ( val summary = new BisectingKMeansSummary( model.transform(dataset), $(predictionCol), $(featuresCol), $(k)) model.setSummary(Some(summary)) +// TODO: need to extend logNamedValue to support Array +instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]")) instr.logSuccess(model) model } http://git-wip-us.apache.org/repos/asf/spark/blob/075d678c/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala index 88d618c..3091bb5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala @@ -352,7 +352,7 @@ class GaussianMixture @Since("2.0.0") ( s"than ${GaussianMixture.MAX_NUM_FEATURES} features because the size of the covariance" + s" matrix is quadratic in the number of features.") -val instr = Instrumentation.create(this, instances) +val instr = Instrumentation.create(this, dataset) instr.logParams(featuresCol, predictionCol, probabilityCol, k, maxIter, seed, tol) instr.logNumFeatures(numFeatures) @@ -425,6 +425,9 @@ class GaussianMixture @Since("2.0.0") ( val summary = new GaussianMixtureSummary(model.transform(dataset), $(predictionCol), $(probabilityCol), $(featuresCol), $(k), logLikelihood) model.setSummary(Some(summary)) +instr.logNamedValue("logLikelihood", logLikelihood) +// TODO: need to extend logNamedValue to support Array +instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]")) instr.logSuccess(model) model } http://git-wip-us.apache.org/repos/asf/spark/blob/075d678c/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index 97f246f..e72d7f9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -342,7 +342,7 @@
svn commit: r26899 - in /dev/spark/2.4.0-SNAPSHOT-2018_05_14_12_03-c26f673-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon May 14 19:18:09 2018 New Revision: 26899 Log: Apache Spark 2.4.0-SNAPSHOT-2018_05_14_12_03-c26f673 docs [This commit notification would consist of 1462 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24246][SQL] Improve AnalysisException by setting the cause when it's available
Repository: spark Updated Branches: refs/heads/master 1430fa80e -> c26f67325 [SPARK-24246][SQL] Improve AnalysisException by setting the cause when it's available ## What changes were proposed in this pull request? If there is an exception, it's better to set it as the cause of AnalysisException since the exception may contain useful debug information. ## How was this patch tested? Jenkins Author: Shixiong ZhuCloses #21297 from zsxwing/SPARK-24246. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c26f6732 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c26f6732 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c26f6732 Branch: refs/heads/master Commit: c26f673252c2cbbccf8c395ba6d4ab80c098d60e Parents: 1430fa8 Author: Shixiong Zhu Authored: Mon May 14 11:37:57 2018 -0700 Committer: gatorsmile Committed: Mon May 14 11:37:57 2018 -0700 -- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 6 +++--- .../spark/sql/catalyst/analysis/ResolveInlineTables.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/analysis/package.scala | 5 + .../org/apache/spark/sql/execution/datasources/rules.scala | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c26f6732/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index dfdcdbc..3eaa9ec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -676,13 +676,13 @@ class Analyzer( try { catalog.lookupRelation(tableIdentWithDb) } catch { -case _: NoSuchTableException => - u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}") +case e: NoSuchTableException => + u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}", e) // If the database is defined and that database is not found, throw an AnalysisException. // Note that if the database is not defined, it is possible we are looking up a temp view. case e: NoSuchDatabaseException => u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}, the " + -s"database ${e.db} doesn't exist.") +s"database ${e.db} doesn't exist.", e) } } http://git-wip-us.apache.org/repos/asf/spark/blob/c26f6732/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala index 4eb6e64..31ba9d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala @@ -105,7 +105,7 @@ case class ResolveInlineTables(conf: SQLConf) extends Rule[LogicalPlan] with Cas castedExpr.eval() } catch { case NonFatal(ex) => -table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}") +table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}", ex) } }) } http://git-wip-us.apache.org/repos/asf/spark/blob/c26f6732/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala index 7731336..354a3fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala @@ -41,6 +41,11 @@ package object analysis { def failAnalysis(msg: String): Nothing = { throw new AnalysisException(msg, t.origin.line, t.origin.startPosition) } + +/** Fails the analysis at the point where a specific tree node was parsed. */ +def failAnalysis(msg: String, cause: Throwable): Nothing = { + throw new AnalysisException(msg, t.origin.line,
spark git commit: [SPARK-24246][SQL] Improve AnalysisException by setting the cause when it's available
Repository: spark Updated Branches: refs/heads/branch-2.3 88003f02c -> 2f60df09d [SPARK-24246][SQL] Improve AnalysisException by setting the cause when it's available ## What changes were proposed in this pull request? If there is an exception, it's better to set it as the cause of AnalysisException since the exception may contain useful debug information. ## How was this patch tested? Jenkins Author: Shixiong ZhuCloses #21297 from zsxwing/SPARK-24246. (cherry picked from commit c26f673252c2cbbccf8c395ba6d4ab80c098d60e) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2f60df09 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2f60df09 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2f60df09 Branch: refs/heads/branch-2.3 Commit: 2f60df09dc1bb65da254e00abe8463122e8c77cf Parents: 88003f0 Author: Shixiong Zhu Authored: Mon May 14 11:37:57 2018 -0700 Committer: gatorsmile Committed: Mon May 14 11:38:09 2018 -0700 -- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 6 +++--- .../spark/sql/catalyst/analysis/ResolveInlineTables.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/analysis/package.scala | 5 + .../org/apache/spark/sql/execution/datasources/rules.scala | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2f60df09/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 9cc928c..8597d83 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -661,13 +661,13 @@ class Analyzer( try { catalog.lookupRelation(tableIdentWithDb) } catch { -case _: NoSuchTableException => - u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}") +case e: NoSuchTableException => + u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}", e) // If the database is defined and that database is not found, throw an AnalysisException. // Note that if the database is not defined, it is possible we are looking up a temp view. case e: NoSuchDatabaseException => u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}, the " + -s"database ${e.db} doesn't exist.") +s"database ${e.db} doesn't exist.", e) } } http://git-wip-us.apache.org/repos/asf/spark/blob/2f60df09/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala index f2df3e1..71ed754 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala @@ -103,7 +103,7 @@ case class ResolveInlineTables(conf: SQLConf) extends Rule[LogicalPlan] with Cas castedExpr.eval() } catch { case NonFatal(ex) => -table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}") +table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}", ex) } }) } http://git-wip-us.apache.org/repos/asf/spark/blob/2f60df09/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala index 7731336..354a3fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala @@ -41,6 +41,11 @@ package object analysis { def failAnalysis(msg: String): Nothing = { throw new AnalysisException(msg, t.origin.line, t.origin.startPosition) } + +/** Fails the analysis at the point where a specific tree node was parsed. */ +
spark git commit: [SPARK-24263][R] SparkR java check breaks with openjdk
Repository: spark Updated Branches: refs/heads/branch-2.3 867d948a6 -> 88003f02c [SPARK-24263][R] SparkR java check breaks with openjdk ## What changes were proposed in this pull request? Change text to grep for. ## How was this patch tested? manual test Author: Felix CheungCloses #21314 from felixcheung/openjdkver. (cherry picked from commit 1430fa80e37762e31cc5adc74cd609c215d84b6e) Signed-off-by: Marcelo Vanzin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/88003f02 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/88003f02 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/88003f02 Branch: refs/heads/branch-2.3 Commit: 88003f02c11bec9df42d225841b82dc748828940 Parents: 867d948 Author: Felix Cheung Authored: Mon May 14 10:49:12 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 10:49:24 2018 -0700 -- R/pkg/R/client.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/88003f02/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index e9295e0..14a17c6 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -82,7 +82,7 @@ checkJavaVersion <- function() { }) javaVersionFilter <- Filter( function(x) { -grepl("java version", x) +grepl(" version", x) }, javaVersionOut) javaVersionStr <- strsplit(javaVersionFilter[[1]], "[\"]")[[1L]][2] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24263][R] SparkR java check breaks with openjdk
Repository: spark Updated Branches: refs/heads/master b6c50d782 -> 1430fa80e [SPARK-24263][R] SparkR java check breaks with openjdk ## What changes were proposed in this pull request? Change text to grep for. ## How was this patch tested? manual test Author: Felix CheungCloses #21314 from felixcheung/openjdkver. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1430fa80 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1430fa80 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1430fa80 Branch: refs/heads/master Commit: 1430fa80e37762e31cc5adc74cd609c215d84b6e Parents: b6c50d7 Author: Felix Cheung Authored: Mon May 14 10:49:12 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 10:49:12 2018 -0700 -- R/pkg/R/client.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1430fa80/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index e9295e0..14a17c6 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -82,7 +82,7 @@ checkJavaVersion <- function() { }) javaVersionFilter <- Filter( function(x) { -grepl("java version", x) +grepl(" version", x) }, javaVersionOut) javaVersionStr <- strsplit(javaVersionFilter[[1]], "[\"]")[[1L]][2] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org