[spark] branch master updated: [SPARK-37545][SQL] V2 CreateTableAsSelect command should qualify location
This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new feba5ac [SPARK-37545][SQL] V2 CreateTableAsSelect command should qualify location feba5ac is described below commit feba5ac32f2598f6ca8a274850934106be0db64d Author: Terry Kim AuthorDate: Sat Dec 4 20:47:45 2021 -0800 [SPARK-37545][SQL] V2 CreateTableAsSelect command should qualify location ### What changes were proposed in this pull request? Currently, v2 CTAS command doesn't qualify the location: ``` spark.sql("CREATE TABLE testcat.t USING foo LOCATION '/tmp/foo' AS SELECT id FROM source") spark.sql("DESCRIBE EXTENDED testcat.t").filter("col_name = 'Location'").show ++-+---+ |col_name|data_type|comment| ++-+---+ |Location|/tmp/foo | | ++-+---+ ``` , whereas v1 command qualifies the location as `file:/tmp/foo` which is the correct behavior since the default filesystem can change for different sessions. ### Why are the changes needed? This PR proposes to store the qualified location in order to prevent the issue where default filesystem changes for different sessions. ### Does this PR introduce _any_ user-facing change? Yes, now, v2 CTAS command will store qualified location: ``` ++-+---+ |col_name|data_type|comment| ++-+---+ |Location|file:/tmp/foo| | ++-+---+ ``` ### How was this patch tested? Added new test Closes #34806 from imback82/v2_ctas_qualified_loc. Authored-by: Terry Kim Signed-off-by: Huaxin Gao --- .../execution/datasources/v2/DataSourceV2Strategy.scala | 6 -- .../DataSourceV2DataFrameSessionCatalogSuite.scala| 4 ++-- .../apache/spark/sql/connector/DataSourceV2SQLSuite.scala | 15 +++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index f73b1a6..dbe4168 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -172,13 +172,15 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case CreateTableAsSelect(ResolvedDBObjectName(catalog, ident), parts, query, tableSpec, options, ifNotExists) => val writeOptions = new CaseInsensitiveStringMap(options.asJava) + val tableSpecWithQualifiedLocation = tableSpec.copy( +location = tableSpec.location.map(makeQualifiedDBObjectPath(_))) catalog match { case staging: StagingTableCatalog => AtomicCreateTableAsSelectExec(staging, ident.asIdentifier, parts, query, planLater(query), -tableSpec, writeOptions, ifNotExists) :: Nil +tableSpecWithQualifiedLocation, writeOptions, ifNotExists) :: Nil case _ => CreateTableAsSelectExec(catalog.asTableCatalog, ident.asIdentifier, parts, query, -planLater(query), tableSpec, writeOptions, ifNotExists) :: Nil +planLater(query), tableSpecWithQualifiedLocation, writeOptions, ifNotExists) :: Nil } case RefreshTable(r: ResolvedTable) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala index 91ac7db..3edc4b9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala @@ -83,10 +83,10 @@ class DataSourceV2DataFrameSessionCatalogSuite test("saveAsTable passes path and provider information properly") { val t1 = "prop_table" withTable(t1) { - spark.range(20).write.format(v2Format).option("path", "abc").saveAsTable(t1) + spark.range(20).write.format(v2Format).option("path", "/abc").saveAsTable(t1) val cat = spark.sessionState.catalogManager.currentCatalog.asInstanceOf[TableCatalog] val tableInfo = cat.loadTable(Identifier.of(Array("default"), t1)) - assert(tableInfo.properties().get("location") === "abc") + assert(tableInfo.properties().get("location") === "file:/abc") assert(tableInfo.properties().get("provider") === v2Format) } } diff --git
[spark] branch master updated (32922ee -> 0843e0d)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 32922ee [SPARK-37529][K8S][TESTS] Support K8s integration tests for Java 17 add 0843e0d [SPARK-37548][INFRA][R][TESTS] Add Java17 SparkR daily test coverage No new revisions were added by this update. Summary of changes: .github/workflows/build_and_test.yml | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37529][K8S][TESTS] Support K8s integration tests for Java 17
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 32922ee [SPARK-37529][K8S][TESTS] Support K8s integration tests for Java 17 32922ee is described below commit 32922eeb3250408523991f21c943de02cd1ebea3 Author: Kousuke Saruta AuthorDate: Sat Dec 4 15:20:29 2021 -0800 [SPARK-37529][K8S][TESTS] Support K8s integration tests for Java 17 ### What changes were proposed in this pull request? This PR aims to support K8s integration tests for Java 17 using Maven and SBT. The new system property `spark.kubernetes.test.dockerFile` is introduced to specify a Dockerfile. By setting `Dockerfile.java17` to the property, the integration tests run with Java 17. This PR also revised the change brought by SPARK-37354 (#34628) by changing `SparkBuild.scala` so that it can recognize the system property `spark.kubernetes.test.javaImageTag`, like the integration tests with Maven do. If both `spark.kubernetes.test.dockerFile` and `spark.kubernetes.test.javaImageTag` are set, `spark.kubernetes.test.dockerFile` is preferred. ### Why are the changes needed? To ensure Spark works on K8s with Java 17. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Confirmed that the intended version of Java is used for each pattern. * Neither `spark.kubernetes.test.javaImageTag` nor `spark.kubernetes.test.dockerFile` is set (SBT) ``` # Run the integration tests to create the container image $ build/sbt -Pkubernetes -Pkubernetes-integration-tests "kubernetes-integration-tests/test" # Create and login the container. docker run -it /bin/bash 185b40ef8aaa56c:/opt/spark/work-dir$ java -version openjdk version "1.8.0_312" OpenJDK Runtime Environment (build 1.8.0_312-b07) OpenJDK 64-Bit Server VM (build 25.312-b07, mixed mode) ``` * Neither `spark.kubernetes.test.javaImageTag` nor `spark.kubernetes.test.dockerFile` is set (Maven) ``` # Run the integration tests to create the container image $ build/mvn -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests integration-test # Create and login the container. docker run -it /bin/bash 18536ae5c5c21f4:/opt/spark/work-dir$ java -version openjdk version "1.8.0_312" OpenJDK Runtime Environment (build 1.8.0_312-b07) OpenJDK 64-Bit Server VM (build 25.312-b07, mixed mode) ``` * `spark.kubernetes.test.javaImageTag` is set (SBT): ``` # Run the integration tests to create the container image $ build/sbt -Dspark.kubernetes.test.javaImageTag=11-jre-slim -Pkubernetes -Pkubernetes-integration-tests "kubernetes-integration-tests/test" # Create and login the container. docker run -it /bin/bash 1855b896d0e5dd8:/opt/spark/work-dir$ java -version openjdk version "11.0.13" 2021-10-19 OpenJDK Runtime Environment 18.9 (build 11.0.13+8) OpenJDK 64-Bit Server VM 18.9 (build 11.0.13+8, mixed mode, sharing) ``` * `spark.kubernetes.test.javaImageTag` is set (Maven): ``` # Run the integration tests to create the container image $ build/mvn -Dspark.kubernetes.test.javaImageTag=11-jre-slim -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests integration-test # Create and login the container. docker run -it /bin/bash 1855d3c228e7521:/opt/spark/work-dir$ java -version openjdk version "11.0.13" 2021-10-19 OpenJDK Runtime Environment 18.9 (build 11.0.13+8) OpenJDK 64-Bit Server VM 18.9 (build 11.0.13+8, mixed mode, sharing) ``` * `spark.kubernetes.test.dockerFile` is set (SBT) ``` $ build/sbt -Dspark.kubernetes.test.dockerFile=resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 -Pkubernetes -Pkubernetes-integration-tests package "kubernetes-integration-tests/test" # Create and login the container. docker run -it /bin/bash 185550515c76582:/opt/spark/work-dir$ java -version openjdk version "17.0.1" 2021-10-19 OpenJDK Runtime Environment (build 17.0.1+12-Debian-1deb11u2) OpenJDK 64-Bit Server VM (build 17.0.1+12-Debian-1deb11u2, mixed mode, sharing) ``` * `spark.kubernetes.test.dockerFile` is set (Maven) ``` build/mvn -Dspark.kubernetes.test.dockerFile=resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests integration-test # Create and login the container. docker run -it /bin/bash 185be35bde0ebfa:/opt/spark/work-dir$ java
[spark] branch master updated: [SPARK-37533][SQL] New SQL function: try_element_at
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f705e52 [SPARK-37533][SQL] New SQL function: try_element_at f705e52 is described below commit f705e522bc26085feebe1e5efc92313858df58d1 Author: Gengliang Wang AuthorDate: Sat Dec 4 23:38:52 2021 +0800 [SPARK-37533][SQL] New SQL function: try_element_at ### What changes were proposed in this pull request? Add New SQL functions `try_element_at`, which is identical to the `element_at` except that it returns null if error occurs ### Why are the changes needed? After some data science on real world SQL queries, we found that some users are using the following queries ``` select ... where mapCol.key is not null select ... where arrayCol[index] is not null ``` ### Does this PR introduce _any_ user-facing change? Yes, an alternative function `try_element_at` which will return null on array index out of bound or map key doesn't exist under ANSI mode. ### How was this patch tested? Unit tests Closes #34796 from gengliangwang/try_element_at. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- core/src/main/resources/error/error-classes.json | 9 ++- .../org/apache/spark/SparkThrowableSuite.scala | 2 +- docs/sql-ref-ansi-compliance.md| 1 + .../sql/catalyst/analysis/FunctionRegistry.scala | 3 + .../spark/sql/catalyst/expressions/TryEval.scala | 38 + .../catalyst/expressions/stringExpressions.scala | 6 +- .../spark/sql/errors/QueryExecutionErrors.scala| 5 ++ .../sql/catalyst/expressions/TryEvalSuite.scala| 24 .../sql-functions/sql-expression-schema.md | 3 +- .../sql-tests/inputs/ansi/try_element_at.sql | 1 + .../resources/sql-tests/inputs/try_element_at.sql | 11 .../resources/sql-tests/results/ansi/array.sql.out | 8 +-- .../resources/sql-tests/results/ansi/map.sql.out | 4 +- .../sql-tests/results/ansi/try_element_at.sql.out | 66 ++ .../sql-tests/results/try_element_at.sql.out | 66 ++ 15 files changed, 234 insertions(+), 13 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 3e0a9c3..896ee7b 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -72,7 +72,7 @@ "message" : [ "%s" ] }, "INVALID_ARRAY_INDEX" : { -"message" : [ "Invalid index: %s, numElements: %s. If necessary set %s to false to bypass this error." ] +"message" : [ "Invalid index: %s, numElements: %s. To return NULL instead, use 'try_element_at'. If necessary set %s to false to bypass this error." ] }, "INVALID_FIELD_NAME" : { "message" : [ "Field name %s is invalid: %s is not a struct." ], @@ -82,6 +82,9 @@ "message" : [ "The fraction of sec must be zero. Valid range is [0, 60]. If necessary set %s to false to bypass this error. " ], "sqlState" : "22023" }, + "INVALID_INPUT_INDEX" : { +"message" : [ "Invalid index: %s, numElements: %s. If necessary set %s to false to bypass this error." ] + }, "INVALID_INPUT_SYNTAX_FOR_NUMERIC_TYPE" : { "message" : [ "invalid input syntax for type numeric: %s. To return NULL instead, use 'try_cast'. If necessary set %s to false to bypass this error." ], "sqlState" : "42000" @@ -90,7 +93,7 @@ "message" : [ "Input schema %s can only contain StringType as a key type for a MapType." ] }, "MAP_KEY_DOES_NOT_EXIST" : { -"message" : [ "Key %s does not exist. If necessary set %s to false to bypass this error." ] +"message" : [ "Key %s does not exist. To return NULL instead, use 'try_element_at'. If necessary set %s to false to bypass this error." ] }, "MISSING_COLUMN" : { "message" : [ "Column '%s' does not exist. Did you mean one of the following? [%s]" ], @@ -154,4 +157,4 @@ "message" : [ "Writing job aborted" ], "sqlState" : "4" } -} \ No newline at end of file +} diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala index 7d445b2..47df19f 100644 --- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala @@ -69,7 +69,7 @@ class SparkThrowableSuite extends SparkFunSuite { val rewrittenString = mapper.configure(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, true) .setSerializationInclusion(Include.NON_ABSENT) .writeValueAsString(errorClassToInfoMap) -assert(rewrittenString == errorClassFileContents) +assert(rewrittenString.trim ==