[spark] branch master updated: [SPARK-43529][SQL] Support general constant expressions as CREATE/REPLACE TABLE OPTIONS values
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 7250941ab5b [SPARK-43529][SQL] Support general constant expressions as CREATE/REPLACE TABLE OPTIONS values 7250941ab5b is described below commit 7250941ab5b8ea1c1dc720f2b6407404ac7020bb Author: Daniel Tenedorio AuthorDate: Sun Jun 11 21:31:03 2023 -0700 [SPARK-43529][SQL] Support general constant expressions as CREATE/REPLACE TABLE OPTIONS values ### What changes were proposed in this pull request? This PR updates the SQL compiler to support general constnat expressions in the syntax for CREATE/REPLACE TABLE OPTIONS values, rather than restricting to a few types of literals only. * The analyzer now checks that the provided expressions are in fact `foldable`, and throws an error message otherwise. * This error message that users encounter in these cases improves from a general "syntax error at or near " to instead indicate that the syntax is valid, but only constant expressions are supported in these contexts. ### Why are the changes needed? This makes it easier to provide OPTIONS lists in SQL, supporting use cases like concatenating strings with `||`. ### Does this PR introduce _any_ user-facing change? Yes, the SQL syntax changes. ### How was this patch tested? This PR adds new unit test coverage. Closes #41191 from dtenedor/expression-properties. Authored-by: Daniel Tenedorio Signed-off-by: Gengliang Wang --- core/src/main/resources/error/error-classes.json | 5 ++ .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 10 ++- .../spark/sql/catalyst/analysis/Analyzer.scala | 1 + .../sql/catalyst/analysis/ResolveTableSpec.scala | 90 .../spark/sql/catalyst/parser/AstBuilder.scala | 69 ++- .../sql/catalyst/plans/logical/v2Commands.scala| 70 +-- .../sql/catalyst/rules/RuleIdCollection.scala | 1 + .../apache/spark/sql/catalyst/trees/TreeNode.scala | 6 +- .../sql/connector/catalog/CatalogV2Util.scala | 5 +- .../spark/sql/errors/QueryCompilationErrors.scala | 17 .../CreateTablePartitioningValidationSuite.scala | 18 +--- .../spark/sql/catalyst/parser/DDLParserSuite.scala | 76 + .../org/apache/spark/sql/DataFrameWriter.scala | 11 +-- .../org/apache/spark/sql/DataFrameWriterV2.scala | 8 +- .../catalyst/analysis/ResolveSessionCatalog.scala | 13 +-- .../spark/sql/execution/SparkSqlParser.scala | 19 - .../datasources/v2/DataSourceV2Strategy.scala | 13 +-- .../apache/spark/sql/internal/CatalogImpl.scala| 12 ++- .../spark/sql/streaming/DataStreamWriter.scala | 5 +- .../sql/TableOptionsConstantFoldingSuite.scala | 99 ++ .../connector/V2CommandsCaseSensitivitySuite.scala | 14 ++- 21 files changed, 433 insertions(+), 129 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 7b39ab7266c..a12a8000870 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1389,6 +1389,11 @@ " with multiple part function name() is not allowed." ] }, + "OPTION_IS_INVALID" : { +"message" : [ + "option or property key is invalid; only are supported" +] + }, "REPETITIVE_WINDOW_DEFINITION" : { "message" : [ "The definition of window is repetitive." diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 89100f2aeec..c7b238bfd2c 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -374,7 +374,7 @@ tableProvider ; createTableClauses -:((OPTIONS options=propertyList) | +:((OPTIONS options=expressionPropertyList) | (PARTITIONED BY partitioning=partitionFieldList) | skewSpec | bucketSpec | @@ -405,6 +405,14 @@ propertyValue | stringLit ; +expressionPropertyList +: LEFT_PAREN expressionProperty (COMMA expressionProperty)* RIGHT_PAREN +; + +expressionProperty +: key=propertyKey (EQ? value=expression)? +; + constantList : LEFT_PAREN constant (COMMA constant)* RIGHT_PAREN ; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index aa1b9d0e8fd..901ae243225 100644 ---
[spark] branch master updated: [SPARK-43938][CONNECT][PYTHON] Add to_* functions to Scala and Python
This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 4da9348160f [SPARK-43938][CONNECT][PYTHON] Add to_* functions to Scala and Python 4da9348160f is described below commit 4da9348160f522d6a5e7633a170d8d077100657f Author: panbingkun AuthorDate: Mon Jun 12 08:50:59 2023 +0800 [SPARK-43938][CONNECT][PYTHON] Add to_* functions to Scala and Python ### What changes were proposed in this pull request? Add following functions: - str_to_map - to_binary - to_char - to_number - to_timestamp_ltz - to_timestamp_ntz - to_unix_timestamp to: - Scala API - Python API - Spark Connect Scala Client - Spark Connect Python Client ### Why are the changes needed? for parity ### Does this PR introduce _any_ user-facing change? Yes, new functions. ### How was this patch tested? - Add New UT. Closes #41505 from panbingkun/SPARK-43938. Lead-authored-by: panbingkun Co-authored-by: panbingkun <84731...@qq.com> Signed-off-by: Ruifeng Zheng --- .../scala/org/apache/spark/sql/functions.scala | 160 + .../apache/spark/sql/PlanGenerationTestSuite.scala | 52 .../explain-results/function_str_to_map.explain| 2 + ...to_map_with_pair_and_keyValue_delimiter.explain | 2 + ...function_str_to_map_with_pair_delimiter.explain | 2 + .../explain-results/function_to_binary.explain | 2 + .../function_to_binary_with_format.explain | 2 + .../explain-results/function_to_char.explain | 2 + .../explain-results/function_to_number.explain | 2 + .../function_to_timestamp_ltz.explain | 2 + .../function_to_timestamp_ltz_with_format.explain | 2 + .../function_to_timestamp_ntz.explain | 2 + .../function_to_timestamp_ntz_with_format.explain | 2 + .../function_to_unix_timestamp.explain | 2 + .../function_to_unix_timestamp_with_format.explain | 2 + .../query-tests/queries/function_str_to_map.json | 25 ++ .../queries/function_str_to_map.proto.bin | Bin 0 -> 179 bytes ...tr_to_map_with_pair_and_keyValue_delimiter.json | 29 +++ ..._map_with_pair_and_keyValue_delimiter.proto.bin | Bin 0 -> 186 bytes .../function_str_to_map_with_pair_delimiter.json | 33 +++ ...nction_str_to_map_with_pair_delimiter.proto.bin | Bin 0 -> 193 bytes .../query-tests/queries/function_to_binary.json| 25 ++ .../queries/function_to_binary.proto.bin | Bin 0 -> 178 bytes .../queries/function_to_binary_with_format.json| 29 +++ .../function_to_binary_with_format.proto.bin | Bin 0 -> 189 bytes .../query-tests/queries/function_to_char.json | 29 +++ .../query-tests/queries/function_to_char.proto.bin | Bin 0 -> 188 bytes .../query-tests/queries/function_to_number.json| 29 +++ .../queries/function_to_number.proto.bin | Bin 0 -> 188 bytes .../queries/function_to_timestamp_ltz.json | 25 ++ .../queries/function_to_timestamp_ltz.proto.bin| Bin 0 -> 185 bytes .../function_to_timestamp_ltz_with_format.json | 29 +++ ...function_to_timestamp_ltz_with_format.proto.bin | Bin 0 -> 192 bytes .../queries/function_to_timestamp_ntz.json | 25 ++ .../queries/function_to_timestamp_ntz.proto.bin| Bin 0 -> 185 bytes .../function_to_timestamp_ntz_with_format.json | 29 +++ ...function_to_timestamp_ntz_with_format.proto.bin | Bin 0 -> 192 bytes .../queries/function_to_unix_timestamp.json| 25 ++ .../queries/function_to_unix_timestamp.proto.bin | Bin 0 -> 186 bytes .../function_to_unix_timestamp_with_format.json| 29 +++ ...unction_to_unix_timestamp_with_format.proto.bin | Bin 0 -> 193 bytes .../source/reference/pyspark.sql/functions.rst | 7 + python/pyspark/sql/connect/functions.py| 77 ++ python/pyspark/sql/functions.py| 264 + .../scala/org/apache/spark/sql/functions.scala | 175 ++ .../org/apache/spark/sql/DateFunctionsSuite.scala | 35 +++ .../apache/spark/sql/StringFunctionsSuite.scala| 61 + 47 files changed, 1218 insertions(+) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index fc9eb074ca9..9c26037df84 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -1396,6 +1396,34 @@ object functions { def map_from_arrays(keys: Column, values: Column): Column = Column.fn("map_from_arrays", keys, values) + /** + * Creates a map after splitting the text into
[spark] branch master updated: [SPARK-43179][FOLLOW-UP] Use the secret ByteBuffer instead of the String
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 45ad044042f [SPARK-43179][FOLLOW-UP] Use the secret ByteBuffer instead of the String 45ad044042f is described below commit 45ad044042f7f376c4c0234807a62179b680edae Author: Chandni Singh AuthorDate: Sun Jun 11 07:59:35 2023 -0500 [SPARK-43179][FOLLOW-UP] Use the secret ByteBuffer instead of the String ### What changes were proposed in this pull request? Introduced a bug with this change: https://github.com/apache/spark/pull/40843. To get the value that is persisted in db, we used to use `mapper.writeValueAsString(ByteBuffer)`. We changed it to `mapper.writeValueAsString(String)`. However, when we load from the db, it still uses `ByteBuffer secret = mapper.readValue(e.getValue(), ByteBuffer.class);` causing exceptions when the shuffle service is unable to recover the apps: ``` ERROR org.apache.spark.network.server.TransportRequestHandler: Error while invoking RpcHandler#receive() on RPC id 5764589675121231159 java.lang.RuntimeException: javax.security.sasl.SaslException: DIGEST-MD5: digest response format violation. Mismatched response. at org.sparkproject.guava.base.Throwables.propagate(Throwables.java:160) at org.apache.spark.network.sasl.SparkSaslServer.response(SparkSaslServer.java:121) at org.apache.spark.network.sasl.SaslRpcHandler.doAuthChallenge(Sas [...] ``` ### Why are the changes needed? It fixes the bug that was introduced with SPARK-43179 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? The existing UTs in the `YarnShuffleServiceSuite` were using empty password which masked the issue. Changed it to use a non-empty password. Closes #41502 from otterc/SPARK-43179-followup. Authored-by: Chandni Singh Signed-off-by: Sean Owen --- .../spark/network/yarn/YarnShuffleService.java | 4 +++- .../network/yarn/YarnShuffleServiceSuite.scala | 25 +- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java index 578c1a19c40..b34ebf6e29b 100644 --- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java +++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java @@ -440,7 +440,9 @@ public class YarnShuffleService extends AuxiliaryService { if (db != null && AppsWithRecoveryDisabled.isRecoveryEnabledForApp(appId)) { AppId fullId = new AppId(appId); byte[] key = dbAppKey(fullId); - byte[] value = mapper.writeValueAsString(shuffleSecret).getBytes(StandardCharsets.UTF_8); + ByteBuffer dbVal = metaInfo != null ? + JavaUtils.stringToBytes(shuffleSecret) : appServiceData; + byte[] value = mapper.writeValueAsString(dbVal).getBytes(StandardCharsets.UTF_8); db.put(key, value); } secretManager.registerApp(appId, shuffleSecret); diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala index 3e78262a765..552cc98311e 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala @@ -71,6 +71,8 @@ abstract class YarnShuffleServiceSuite extends SparkFunSuite with Matchers { private[yarn] val SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithNoAttemptID = "org.apache.spark.shuffle.sort.SortShuffleManager:{\"mergeDir\": \"merge_manager\"}" private val DUMMY_BLOCK_DATA = "dummyBlockData".getBytes(StandardCharsets.UTF_8) + private val DUMMY_PASSWORD = "dummyPassword" + private val EMPTY_PASSWORD = "" private var recoveryLocalDir: File = _ protected var tempDir: File = _ @@ -191,7 +193,8 @@ abstract class YarnShuffleServiceSuite extends SparkFunSuite with Matchers { val app3Data = makeAppInfo("user", app3Id) s1.initializeApplication(app3Data) val app4Id = ApplicationId.newInstance(0, 4) -val app4Data = makeAppInfo("user", app4Id) +val app4Data = makeAppInfo("user", app4Id, metadataStorageDisabled = false, +authEnabled = true, DUMMY_PASSWORD) s1.initializeApplication(app4Data) val execStateFile = s1.registeredExecutorFile @@ -1038,15 +1041,15 @@ abstract class YarnShuffleServiceSuite extends SparkFunSuite with Matchers { private def makeAppInfo(user: String, appId: ApplicationId,
[spark] branch master updated: [SPARK-43772][BUILD][CONNECT] Move version configuration in `connect` module to parent
This is an automated email from the ASF dual-hosted git repository. yangjie01 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 4f2e2d7e038 [SPARK-43772][BUILD][CONNECT] Move version configuration in `connect` module to parent 4f2e2d7e038 is described below commit 4f2e2d7e038396f37d2afddff76608eef5cc6e8d Author: panbingkun AuthorDate: Sun Jun 11 15:54:38 2023 +0800 [SPARK-43772][BUILD][CONNECT] Move version configuration in `connect` module to parent ### What changes were proposed in this pull request? The pr aims to move version configuration in `connect` module to parent. ### Why are the changes needed? For better management and post maintenance, eg: upgrading some library version of a module while forgetting another one. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - Pass GA. - Manually test sh build/sbt "connect/testOnly org.apache.spark.sql.connect.artifact.ArtifactManagerSuite" sh build/sbt "connect/testOnly org.apache.spark.sql.connect.planner.SparkConnectServiceSuite" sh build/sbt "connect/testOnly org.apache.spark.sql.connect.service.InterceptorRegistrySuite" Closes #41295 from panbingkun/SPARK-43772. Authored-by: panbingkun Signed-off-by: yangjie01 --- connector/connect/client/jvm/pom.xml | 5 + connector/connect/common/pom.xml | 4 connector/connect/server/pom.xml | 6 +- pom.xml | 7 +++ project/SparkBuild.scala | 18 -- 5 files changed, 21 insertions(+), 19 deletions(-) diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml index 4d0a4379329..cbb283ed993 100644 --- a/connector/connect/client/jvm/pom.xml +++ b/connector/connect/client/jvm/pom.xml @@ -32,9 +32,6 @@ https://spark.apache.org/ connect-client-jvm -31.0.1-jre -1.0.1 -1.1.2 @@ -70,7 +67,7 @@ com.google.guava guava - ${guava.version} + ${connect.guava.version} compile diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml index 06076646df7..1890384b51d 100644 --- a/connector/connect/common/pom.xml +++ b/connector/connect/common/pom.xml @@ -32,10 +32,6 @@ https://spark.apache.org/ connect-common -31.0.1-jre -1.0.1 -1.47.0 -6.0.53 diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml index f9dc40ce85a..95b70c6b0f4 100644 --- a/connector/connect/server/pom.xml +++ b/connector/connect/server/pom.xml @@ -32,10 +32,6 @@ https://spark.apache.org/ connect -31.0.1-jre -1.0.1 -1.47.0 -6.0.53 @@ -165,7 +161,7 @@ com.google.guava guava - ${guava.version} + ${connect.guava.version} compile diff --git a/pom.xml b/pom.xml index b668854fd4e..23872c6ea9a 100644 --- a/pom.xml +++ b/pom.xml @@ -277,6 +277,13 @@ false true + +31.0.1-jre +1.0.1 +1.47.0 +1.1.2 +6.0.53 + 128m -MM-dd HH:mm:ss z diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 023ce4ba81c..bd09c53b442 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -674,7 +674,8 @@ object SparkConnectCommon { // of these dependendencies that we need to shade later on. libraryDependencies ++= { val guavaVersion = - SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String] +SbtPomKeys.effectivePom.value.getProperties.get( + "connect.guava.version").asInstanceOf[String] val guavaFailureaccessVersion = SbtPomKeys.effectivePom.value.getProperties.get( "guava.failureaccess.version").asInstanceOf[String] @@ -688,7 +689,8 @@ object SparkConnectCommon { dependencyOverrides ++= { val guavaVersion = - SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String] +SbtPomKeys.effectivePom.value.getProperties.get( + "connect.guava.version").asInstanceOf[String] val guavaFailureaccessVersion = SbtPomKeys.effectivePom.value.getProperties.get( "guava.failureaccess.version").asInstanceOf[String] @@ -753,7 +755,8 @@ object SparkConnect { // of these dependendencies that we need to shade later on. libraryDependencies ++= { val guavaVersion = - SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String] +SbtPomKeys.effectivePom.value.getProperties.get( + "connect.guava.version").asInstanceOf[String] val guavaFailureaccessVersion =