[spark] branch branch-3.4 updated: [SPARK-42345][SQL] Rename TimestampNTZ inference conf as spark.sql.sources.timestampNTZTypeInference.enabled
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 529ccc7ce37 [SPARK-42345][SQL] Rename TimestampNTZ inference conf as spark.sql.sources.timestampNTZTypeInference.enabled 529ccc7ce37 is described below commit 529ccc7ce37f73c01d6c64b3639323f67e6ca323 Author: Gengliang Wang AuthorDate: Sun Feb 5 10:43:34 2023 +0300 [SPARK-42345][SQL] Rename TimestampNTZ inference conf as spark.sql.sources.timestampNTZTypeInference.enabled ### What changes were proposed in this pull request? Rename TimestampNTZ data source inference configuration from `spark.sql.inferTimestampNTZInDataSources.enabled` to `spark.sql.sources.timestampNTZTypeInference.enabled` For more context on this configuration: https://github.com/apache/spark/pull/39777 https://github.com/apache/spark/pull/39812 https://github.com/apache/spark/pull/39868 ### Why are the changes needed? Since the configuration is for data source, we can put it under the prefix `spark.sql.sources`. The new naming is consistent with another configuration `spark.sql.sources.partitionColumnTypeInference.enabled`. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Closes #39885 from gengliangwang/renameConf. Authored-by: Gengliang Wang Signed-off-by: Max Gekk (cherry picked from commit c5c1927d6a137c0e92417a0efad5da62ab253137) Signed-off-by: Max Gekk --- .../org/apache/spark/sql/internal/SQLConf.scala | 20 ++-- .../execution/datasources/PartitioningUtils.scala| 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 363e763be4f..2f05c356160 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1416,6 +1416,16 @@ object SQLConf { .booleanConf .createWithDefault(true) + val INFER_TIMESTAMP_NTZ_IN_DATA_SOURCES = +buildConf("spark.sql.sources.timestampNTZTypeInference.enabled") + .doc("For the schema inference of JSON/CSV/JDBC data sources and partition directories, " + +"this config determines whether to choose the TimestampNTZ type if a column can be " + +"either TimestampNTZ or TimestampLTZ type. If set to true, the inference result of " + +"the column will be TimestampNTZ type. Otherwise, the result will be TimestampLTZ type.") + .version("3.4.0") + .booleanConf + .createWithDefault(false) + val BUCKETING_ENABLED = buildConf("spark.sql.sources.bucketing.enabled") .doc("When false, we will treat bucketed table as normal table") .version("2.0.0") @@ -3518,16 +3528,6 @@ object SQLConf { .checkValues(TimestampTypes.values.map(_.toString)) .createWithDefault(TimestampTypes.TIMESTAMP_LTZ.toString) - val INFER_TIMESTAMP_NTZ_IN_DATA_SOURCES = -buildConf("spark.sql.inferTimestampNTZInDataSources.enabled") - .doc("For the schema inference of JSON/CSV/JDBC data sources and partition directories, " + -"this config determines whether to choose the TimestampNTZ type if a column can be " + -"either TimestampNTZ or TimestampLTZ type. If set to true, the inference result of " + -"the column will be TimestampNTZ type. Otherwise, the result will be TimestampLTZ type.") - .version("3.4.0") - .booleanConf - .createWithDefault(false) - val DATETIME_JAVA8API_ENABLED = buildConf("spark.sql.datetime.java8API.enabled") .doc("If the configuration property is set to true, java.time.Instant and " + "java.time.LocalDate classes of Java 8 API are used as external types for " + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 38c3f71ab49..90c45fd11dd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -490,7 +490,7 @@ object PartitioningUtils extends SQLConfHelper { val unescapedRaw = unescapePathName(raw) // try and parse the date, if no exception occurs this is a candidate to be resolved as // TimestampType or TimestampNTZType. The inference timestamp typ is controlled by the conf - // "spark.sql.inferTimestampNTZInDataSources.enabled". + // "spark.sql.sources.timestampNTZTypeInference.enabled". val timestampType =
[spark] branch master updated: [SPARK-42345][SQL] Rename TimestampNTZ inference conf as spark.sql.sources.timestampNTZTypeInference.enabled
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c5c1927d6a1 [SPARK-42345][SQL] Rename TimestampNTZ inference conf as spark.sql.sources.timestampNTZTypeInference.enabled c5c1927d6a1 is described below commit c5c1927d6a137c0e92417a0efad5da62ab253137 Author: Gengliang Wang AuthorDate: Sun Feb 5 10:43:34 2023 +0300 [SPARK-42345][SQL] Rename TimestampNTZ inference conf as spark.sql.sources.timestampNTZTypeInference.enabled ### What changes were proposed in this pull request? Rename TimestampNTZ data source inference configuration from `spark.sql.inferTimestampNTZInDataSources.enabled` to `spark.sql.sources.timestampNTZTypeInference.enabled` For more context on this configuration: https://github.com/apache/spark/pull/39777 https://github.com/apache/spark/pull/39812 https://github.com/apache/spark/pull/39868 ### Why are the changes needed? Since the configuration is for data source, we can put it under the prefix `spark.sql.sources`. The new naming is consistent with another configuration `spark.sql.sources.partitionColumnTypeInference.enabled`. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Closes #39885 from gengliangwang/renameConf. Authored-by: Gengliang Wang Signed-off-by: Max Gekk --- .../org/apache/spark/sql/internal/SQLConf.scala | 20 ++-- .../execution/datasources/PartitioningUtils.scala| 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 363e763be4f..2f05c356160 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1416,6 +1416,16 @@ object SQLConf { .booleanConf .createWithDefault(true) + val INFER_TIMESTAMP_NTZ_IN_DATA_SOURCES = +buildConf("spark.sql.sources.timestampNTZTypeInference.enabled") + .doc("For the schema inference of JSON/CSV/JDBC data sources and partition directories, " + +"this config determines whether to choose the TimestampNTZ type if a column can be " + +"either TimestampNTZ or TimestampLTZ type. If set to true, the inference result of " + +"the column will be TimestampNTZ type. Otherwise, the result will be TimestampLTZ type.") + .version("3.4.0") + .booleanConf + .createWithDefault(false) + val BUCKETING_ENABLED = buildConf("spark.sql.sources.bucketing.enabled") .doc("When false, we will treat bucketed table as normal table") .version("2.0.0") @@ -3518,16 +3528,6 @@ object SQLConf { .checkValues(TimestampTypes.values.map(_.toString)) .createWithDefault(TimestampTypes.TIMESTAMP_LTZ.toString) - val INFER_TIMESTAMP_NTZ_IN_DATA_SOURCES = -buildConf("spark.sql.inferTimestampNTZInDataSources.enabled") - .doc("For the schema inference of JSON/CSV/JDBC data sources and partition directories, " + -"this config determines whether to choose the TimestampNTZ type if a column can be " + -"either TimestampNTZ or TimestampLTZ type. If set to true, the inference result of " + -"the column will be TimestampNTZ type. Otherwise, the result will be TimestampLTZ type.") - .version("3.4.0") - .booleanConf - .createWithDefault(false) - val DATETIME_JAVA8API_ENABLED = buildConf("spark.sql.datetime.java8API.enabled") .doc("If the configuration property is set to true, java.time.Instant and " + "java.time.LocalDate classes of Java 8 API are used as external types for " + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 38c3f71ab49..90c45fd11dd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -490,7 +490,7 @@ object PartitioningUtils extends SQLConfHelper { val unescapedRaw = unescapePathName(raw) // try and parse the date, if no exception occurs this is a candidate to be resolved as // TimestampType or TimestampNTZType. The inference timestamp typ is controlled by the conf - // "spark.sql.inferTimestampNTZInDataSources.enabled". + // "spark.sql.sources.timestampNTZTypeInference.enabled". val timestampType = conf.timestampTypeInSchemaInference timestampType match { case TimestampType => timestampFormatter.parse(unescapedRaw)
[spark] branch branch-3.4 updated: [SPARK-42343][CORE] Ignore `IOException` in `handleBlockRemovalFailure` if SparkContext is stopped
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 177947381c5 [SPARK-42343][CORE] Ignore `IOException` in `handleBlockRemovalFailure` if SparkContext is stopped 177947381c5 is described below commit 177947381c51e753b2de02022e6001bbde953652 Author: Dongjoon Hyun AuthorDate: Sat Feb 4 21:16:01 2023 -0800 [SPARK-42343][CORE] Ignore `IOException` in `handleBlockRemovalFailure` if SparkContext is stopped ### What changes were proposed in this pull request? This PR aims to suppress verbose `IOException` warnings in `BlockManagerMasterEndpoint.handleBlockRemovalFailure` in case of the stopped `SparkContext`. ### Why are the changes needed? Although we ignore these kind of exceptions during `SparkContext.stop`, this PR can prevent the misleading error messages like the following. ``` 23/02/04 01:26:05 INFO SparkUI: Stopped Spark web UI at http://driver-svc.default.svc:4040 23/02/04 01:26:05 INFO KubernetesClusterSchedulerBackend: Shutting down all executors 23/02/04 01:26:05 INFO KubernetesClusterSchedulerBackend$KubernetesDriverEndpoint: Asking each executor to shut down 23/02/04 01:26:05 WARN ExecutorPodsWatchSnapshotSource: Kubernetes client has been closed. 23/02/04 01:26:05 ERROR TransportResponseHandler: Still have 1 requests outstanding when connection from /172.31.215.139:47148 is closed 23/02/04 01:26:05 WARN BlockManagerMasterEndpoint: Error trying to remove shuffle 0 from block manager BlockManagerId(3, 172.31.215.139, 37477, None) java.io.IOException: Connection from /172.31.215.139:47148 closed at org.apache.spark.network.client.TransportResponseHandler.channelInactive(TransportResponseHandler.java:147) at org.apache.spark.network.server.TransportChannelHandler.channelInactive(TransportChannelHandler.java:117) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248) at io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:241) at io.netty.channel.ChannelInboundHandlerAdapter.channelInactive(ChannelInboundHandlerAdapter.java:81) at io.netty.handler.timeout.IdleStateHandler.channelInactive(IdleStateHandler.java:277) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248) at io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:241) at io.netty.channel.ChannelInboundHandlerAdapter.channelInactive(ChannelInboundHandlerAdapter.java:81) at org.apache.spark.network.util.TransportFrameDecoder.channelInactive(TransportFrameDecoder.java:225) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248) at io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:241) at io.netty.channel.DefaultChannelPipeline$HeadContext.channelInactive(DefaultChannelPipeline.java:1405) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248) at io.netty.channel.DefaultChannelPipeline.fireChannelInactive(DefaultChannelPipeline.java:901) at io.netty.channel.AbstractChannel$AbstractUnsafe$8.run(AbstractChannel.java:831) at io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164) at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:469) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:497) at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986) at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) at java.base/java.lang.Thread.run(Thread.java:829) ``` ### Does this PR introduce _any_ user-facing change? Yes, but this PR changes `log` ond and this
[spark] branch master updated: [SPARK-42343][CORE] Ignore `IOException` in `handleBlockRemovalFailure` if SparkContext is stopped
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 67285c3461f [SPARK-42343][CORE] Ignore `IOException` in `handleBlockRemovalFailure` if SparkContext is stopped 67285c3461f is described below commit 67285c3461fe9e2ecb3a00e930115465ffc080ca Author: Dongjoon Hyun AuthorDate: Sat Feb 4 21:16:01 2023 -0800 [SPARK-42343][CORE] Ignore `IOException` in `handleBlockRemovalFailure` if SparkContext is stopped ### What changes were proposed in this pull request? This PR aims to suppress verbose `IOException` warnings in `BlockManagerMasterEndpoint.handleBlockRemovalFailure` in case of the stopped `SparkContext`. ### Why are the changes needed? Although we ignore these kind of exceptions during `SparkContext.stop`, this PR can prevent the misleading error messages like the following. ``` 23/02/04 01:26:05 INFO SparkUI: Stopped Spark web UI at http://driver-svc.default.svc:4040 23/02/04 01:26:05 INFO KubernetesClusterSchedulerBackend: Shutting down all executors 23/02/04 01:26:05 INFO KubernetesClusterSchedulerBackend$KubernetesDriverEndpoint: Asking each executor to shut down 23/02/04 01:26:05 WARN ExecutorPodsWatchSnapshotSource: Kubernetes client has been closed. 23/02/04 01:26:05 ERROR TransportResponseHandler: Still have 1 requests outstanding when connection from /172.31.215.139:47148 is closed 23/02/04 01:26:05 WARN BlockManagerMasterEndpoint: Error trying to remove shuffle 0 from block manager BlockManagerId(3, 172.31.215.139, 37477, None) java.io.IOException: Connection from /172.31.215.139:47148 closed at org.apache.spark.network.client.TransportResponseHandler.channelInactive(TransportResponseHandler.java:147) at org.apache.spark.network.server.TransportChannelHandler.channelInactive(TransportChannelHandler.java:117) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248) at io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:241) at io.netty.channel.ChannelInboundHandlerAdapter.channelInactive(ChannelInboundHandlerAdapter.java:81) at io.netty.handler.timeout.IdleStateHandler.channelInactive(IdleStateHandler.java:277) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248) at io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:241) at io.netty.channel.ChannelInboundHandlerAdapter.channelInactive(ChannelInboundHandlerAdapter.java:81) at org.apache.spark.network.util.TransportFrameDecoder.channelInactive(TransportFrameDecoder.java:225) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248) at io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:241) at io.netty.channel.DefaultChannelPipeline$HeadContext.channelInactive(DefaultChannelPipeline.java:1405) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:262) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:248) at io.netty.channel.DefaultChannelPipeline.fireChannelInactive(DefaultChannelPipeline.java:901) at io.netty.channel.AbstractChannel$AbstractUnsafe$8.run(AbstractChannel.java:831) at io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164) at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:469) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:497) at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986) at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) at java.base/java.lang.Thread.run(Thread.java:829) ``` ### Does this PR introduce _any_ user-facing change? Yes, but this PR changes `log` ond and this happens
[spark] branch branch-3.4 updated: [SPARK-42334][CONNECT][BUILD] Make sure connect client assembly and sql package is built before running client tests - SBT
This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 26f100311ec [SPARK-42334][CONNECT][BUILD] Make sure connect client assembly and sql package is built before running client tests - SBT 26f100311ec is described below commit 26f100311ec769ab9eb5473f2190cdda684e3994 Author: yangjie01 AuthorDate: Sat Feb 4 21:42:38 2023 -0400 [SPARK-42334][CONNECT][BUILD] Make sure connect client assembly and sql package is built before running client tests - SBT ### What changes were proposed in this pull request? `build/sbt clean "connect-client-jvm/test"` will fail after SPARK-42172 merged, so this pr make sure sbt assembles the connect client assembly jar and sbt package the sql jar before we run `CompatibilitySuite` of client module. ### Why are the changes needed? Similar as SPARK-42284, it makes it easier to develop and test the JVM client for Spark Connect. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually tested. ``` build/sbt clean "connect-client-jvm/test" ``` **Before** ``` [info] - compatibility MiMa tests *** FAILED *** (34 milliseconds) [info] java.lang.AssertionError: assertion failed: Failed to find the jar inside folder: /Users/yangjie01/SourceCode/git/spark-mine-sbt/connector/connect/client/jvm/target [info] at scala.Predef$.assert(Predef.scala:223) [info] at org.apache.spark.sql.connect.client.util.IntegrationTestUtils$.findJar(IntegrationTestUtils.scala:67) [info] at org.apache.spark.sql.connect.client.CompatibilitySuite.clientJar$lzycompute(CompatibilitySuite.scala:57) [info] at org.apache.spark.sql.connect.client.CompatibilitySuite.clientJar(CompatibilitySuite.scala:53) [info] at org.apache.spark.sql.connect.client.CompatibilitySuite.$anonfun$new$1(CompatibilitySuite.scala:69) [info] *** 2 TESTS FAILED *** [error] Failed tests: [error] org.apache.spark.sql.connect.client.CompatibilitySuite [error] (connect-client-jvm / Test / test) sbt.TestsFailedException: Tests unsuccessful [error] Total time: 196 s (03:16), completed 2023-2-3 17:20:40 ``` **After** ``` [info] Run completed in 20 seconds, 652 milliseconds. [info] Total number of tests run: 31 [info] Suites: completed 6, aborted 0 [info] Tests: succeeded 31, failed 0, canceled 0, ignored 0, pending 0 [info] All tests passed. [success] Total time: 230 s (03:50), completed 2023-2-3 17:35:37 ``` Closes #39874 from LuciferYang/make-test. Authored-by: yangjie01 Signed-off-by: Herman van Hovell (cherry picked from commit 104a546f6d9e1d2d7c63ced9cbbdb588110252e9) Signed-off-by: Herman van Hovell --- project/SparkBuild.scala | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 4eb17e88d4d..a4c8d62dd6e 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -829,6 +829,7 @@ object SparkConnect { object SparkConnectClient { import BuildCommons.protoVersion + val buildTestDeps = TaskKey[Unit]("buildTestDeps", "Build needed dependencies for test.") lazy val settings = Seq( // For some reason the resolution from the imported Maven build does not work for some @@ -851,8 +852,14 @@ object SparkConnectClient { ) }, +buildTestDeps := { + (LocalProject("sql") / Compile / Keys.`package`).value + (LocalProject("connect") / assembly).value + (LocalProject("connect-client-jvm") / assembly).value +}, + // Make sure the connect server assembly jar is available for testing. -test := ((Test / test) dependsOn (LocalProject("connect") / assembly)).value, +test := ((Test / test) dependsOn (buildTestDeps)).value, (assembly / test) := { }, - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-42334][CONNECT][BUILD] Make sure connect client assembly and sql package is built before running client tests - SBT
This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 104a546f6d9 [SPARK-42334][CONNECT][BUILD] Make sure connect client assembly and sql package is built before running client tests - SBT 104a546f6d9 is described below commit 104a546f6d9e1d2d7c63ced9cbbdb588110252e9 Author: yangjie01 AuthorDate: Sat Feb 4 21:42:38 2023 -0400 [SPARK-42334][CONNECT][BUILD] Make sure connect client assembly and sql package is built before running client tests - SBT ### What changes were proposed in this pull request? `build/sbt clean "connect-client-jvm/test"` will fail after SPARK-42172 merged, so this pr make sure sbt assembles the connect client assembly jar and sbt package the sql jar before we run `CompatibilitySuite` of client module. ### Why are the changes needed? Similar as SPARK-42284, it makes it easier to develop and test the JVM client for Spark Connect. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually tested. ``` build/sbt clean "connect-client-jvm/test" ``` **Before** ``` [info] - compatibility MiMa tests *** FAILED *** (34 milliseconds) [info] java.lang.AssertionError: assertion failed: Failed to find the jar inside folder: /Users/yangjie01/SourceCode/git/spark-mine-sbt/connector/connect/client/jvm/target [info] at scala.Predef$.assert(Predef.scala:223) [info] at org.apache.spark.sql.connect.client.util.IntegrationTestUtils$.findJar(IntegrationTestUtils.scala:67) [info] at org.apache.spark.sql.connect.client.CompatibilitySuite.clientJar$lzycompute(CompatibilitySuite.scala:57) [info] at org.apache.spark.sql.connect.client.CompatibilitySuite.clientJar(CompatibilitySuite.scala:53) [info] at org.apache.spark.sql.connect.client.CompatibilitySuite.$anonfun$new$1(CompatibilitySuite.scala:69) [info] *** 2 TESTS FAILED *** [error] Failed tests: [error] org.apache.spark.sql.connect.client.CompatibilitySuite [error] (connect-client-jvm / Test / test) sbt.TestsFailedException: Tests unsuccessful [error] Total time: 196 s (03:16), completed 2023-2-3 17:20:40 ``` **After** ``` [info] Run completed in 20 seconds, 652 milliseconds. [info] Total number of tests run: 31 [info] Suites: completed 6, aborted 0 [info] Tests: succeeded 31, failed 0, canceled 0, ignored 0, pending 0 [info] All tests passed. [success] Total time: 230 s (03:50), completed 2023-2-3 17:35:37 ``` Closes #39874 from LuciferYang/make-test. Authored-by: yangjie01 Signed-off-by: Herman van Hovell --- project/SparkBuild.scala | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 4eb17e88d4d..a4c8d62dd6e 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -829,6 +829,7 @@ object SparkConnect { object SparkConnectClient { import BuildCommons.protoVersion + val buildTestDeps = TaskKey[Unit]("buildTestDeps", "Build needed dependencies for test.") lazy val settings = Seq( // For some reason the resolution from the imported Maven build does not work for some @@ -851,8 +852,14 @@ object SparkConnectClient { ) }, +buildTestDeps := { + (LocalProject("sql") / Compile / Keys.`package`).value + (LocalProject("connect") / assembly).value + (LocalProject("connect-client-jvm") / assembly).value +}, + // Make sure the connect server assembly jar is available for testing. -test := ((Test / test) dependsOn (LocalProject("connect") / assembly)).value, +test := ((Test / test) dependsOn (buildTestDeps)).value, (assembly / test) := { }, - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.4 updated: [SPARK-42341][SQL][TESTS] Fix JoinSelectionHelperSuite and PlanStabilitySuite to use explicit broadcast threshold
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 5f344d1aa3b [SPARK-42341][SQL][TESTS] Fix JoinSelectionHelperSuite and PlanStabilitySuite to use explicit broadcast threshold 5f344d1aa3b is described below commit 5f344d1aa3bba02b1568ad394f2deb2f552efc5d Author: Dongjoon Hyun AuthorDate: Sat Feb 4 10:11:07 2023 -0800 [SPARK-42341][SQL][TESTS] Fix JoinSelectionHelperSuite and PlanStabilitySuite to use explicit broadcast threshold ### What changes were proposed in this pull request? This PR fixes `JoinSelectionHelperSuite` and `PlanStabilitySuite` to use explicit broadcast threshold according to the test assumption. ### Why are the changes needed? To be independent and clear in the tests. ### Does this PR introduce _any_ user-facing change? No. This is a test-only PR. ### How was this patch tested? Pass the CIs. Closes #39881 from dongjoon-hyun/SPARK-42341. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 80d673da6a6f8e18d835f7c524622dfeb4bf5898) Signed-off-by: Dongjoon Hyun --- .../optimizer/JoinSelectionHelperSuite.scala | 26 +- .../org/apache/spark/sql/PlanStabilitySuite.scala | 3 ++- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala index 5024d0135b4..6acce44922f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala @@ -103,15 +103,17 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper { } test("getBroadcastBuildSide (hintOnly = false) return None when right has no broadcast hint") { -val broadcastSide = getBroadcastBuildSide( - left, - right, - Inner, - JoinHint(None, hintNotToBroadcast ), - hintOnly = false, - SQLConf.get -) -assert(broadcastSide === None) +withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") { + val broadcastSide = getBroadcastBuildSide( +left, +right, +Inner, +JoinHint(None, hintNotToBroadcast ), +hintOnly = false, +SQLConf.get + ) + assert(broadcastSide === None) +} } test("getShuffleHashJoinBuildSide (hintOnly = true) return BuildLeft with only a left hint") { @@ -179,8 +181,10 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper { } test("canBroadcastBySize should return true if the plan size is less than 10MB") { -assert(canBroadcastBySize(left, SQLConf.get) === false) -assert(canBroadcastBySize(right, SQLConf.get) === true) +withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") { + assert(canBroadcastBySize(left, SQLConf.get) === false) + assert(canBroadcastBySize(right, SQLConf.get) === true) +} } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala index 643dcc20c65..b5b34922694 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala @@ -257,7 +257,8 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite { val queryString = resourceToString(s"$tpcdsGroup/$query.sql", classLoader = Thread.currentThread().getContextClassLoader) // Disable char/varchar read-side handling for better performance. -withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> "false") { +withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> "false", +SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") { val qe = sql(queryString).queryExecution val plan = qe.executedPlan val explain = normalizeLocation(normalizeIds(qe.explainString(FormattedMode))) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-42341][SQL][TESTS] Fix JoinSelectionHelperSuite and PlanStabilitySuite to use explicit broadcast threshold
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 80d673da6a6 [SPARK-42341][SQL][TESTS] Fix JoinSelectionHelperSuite and PlanStabilitySuite to use explicit broadcast threshold 80d673da6a6 is described below commit 80d673da6a6f8e18d835f7c524622dfeb4bf5898 Author: Dongjoon Hyun AuthorDate: Sat Feb 4 10:11:07 2023 -0800 [SPARK-42341][SQL][TESTS] Fix JoinSelectionHelperSuite and PlanStabilitySuite to use explicit broadcast threshold ### What changes were proposed in this pull request? This PR fixes `JoinSelectionHelperSuite` and `PlanStabilitySuite` to use explicit broadcast threshold according to the test assumption. ### Why are the changes needed? To be independent and clear in the tests. ### Does this PR introduce _any_ user-facing change? No. This is a test-only PR. ### How was this patch tested? Pass the CIs. Closes #39881 from dongjoon-hyun/SPARK-42341. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../optimizer/JoinSelectionHelperSuite.scala | 26 +- .../org/apache/spark/sql/PlanStabilitySuite.scala | 3 ++- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala index 5024d0135b4..6acce44922f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala @@ -103,15 +103,17 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper { } test("getBroadcastBuildSide (hintOnly = false) return None when right has no broadcast hint") { -val broadcastSide = getBroadcastBuildSide( - left, - right, - Inner, - JoinHint(None, hintNotToBroadcast ), - hintOnly = false, - SQLConf.get -) -assert(broadcastSide === None) +withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") { + val broadcastSide = getBroadcastBuildSide( +left, +right, +Inner, +JoinHint(None, hintNotToBroadcast ), +hintOnly = false, +SQLConf.get + ) + assert(broadcastSide === None) +} } test("getShuffleHashJoinBuildSide (hintOnly = true) return BuildLeft with only a left hint") { @@ -179,8 +181,10 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper { } test("canBroadcastBySize should return true if the plan size is less than 10MB") { -assert(canBroadcastBySize(left, SQLConf.get) === false) -assert(canBroadcastBySize(right, SQLConf.get) === true) +withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") { + assert(canBroadcastBySize(left, SQLConf.get) === false) + assert(canBroadcastBySize(right, SQLConf.get) === true) +} } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala index 643dcc20c65..b5b34922694 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala @@ -257,7 +257,8 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite { val queryString = resourceToString(s"$tpcdsGroup/$query.sql", classLoader = Thread.currentThread().getContextClassLoader) // Disable char/varchar read-side handling for better performance. -withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> "false") { +withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> "false", +SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") { val qe = sql(queryString).queryExecution val plan = qe.executedPlan val explain = normalizeLocation(normalizeIds(qe.explainString(FormattedMode))) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c49415412e3 [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185 c49415412e3 is described below commit c49415412e3027a171c2691df97fe8757f26a0aa Author: narek_karapetian AuthorDate: Sat Feb 4 15:18:53 2023 +0300 [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185 ### What changes were proposed in this pull request? This PR proposes to assign name to `_LEGACY_ERROR_TEMP_1185` -> `IDENTIFIER_TOO_MANY_NAME_PARTS` ### Why are the changes needed? We should assign proper name to LEGACY_ERROR_TEMP* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Additional test cases were added Closes #39723 from NarekDW/SPARK-41302. Lead-authored-by: narek_karapetian Co-authored-by: Narek Karapetian Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 +++ .../sql/connector/catalog/CatalogV2Implicits.scala | 20 + .../spark/sql/errors/QueryCompilationErrors.scala | 9 ++ .../sql/errors/QueryCompilationErrorsSuite.scala | 35 ++ .../datasources/v2/V2SessionCatalogSuite.scala | 18 +++ .../org/apache/spark/sql/jdbc/JDBCV2Suite.scala| 19 6 files changed, 89 insertions(+), 23 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 67de6c6a29d..7ecd924ea8d 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -575,6 +575,12 @@ ], "sqlState" : "42805" }, + "IDENTIFIER_TOO_MANY_NAME_PARTS" : { +"message" : [ + " is not a valid identifier as it has more than 2 name parts." +], +"sqlState" : "42601" + }, "INCOMPARABLE_PIVOT_COLUMN" : { "message" : [ "Invalid pivot column . Pivot columns must be comparable." @@ -2851,11 +2857,6 @@ "Catalog does not support ." ] }, - "_LEGACY_ERROR_TEMP_1185" : { -"message" : [ - " is not a valid as it has more than 2 name parts." -] - }, "_LEGACY_ERROR_TEMP_1186" : { "message" : [ "Multi-part identifier cannot be empty." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala index d9f15d84d89..0c9282f9675 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala @@ -130,22 +130,20 @@ private[sql] object CatalogV2Implicits { } } +def original: String = ident.namespace() :+ ident.name() mkString "." + def asMultipartIdentifier: Seq[String] = ident.namespace :+ ident.name def asTableIdentifier: TableIdentifier = ident.namespace match { case ns if ns.isEmpty => TableIdentifier(ident.name) case Array(dbName) => TableIdentifier(ident.name, Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "TableIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } def asFunctionIdentifier: FunctionIdentifier = ident.namespace() match { case ns if ns.isEmpty => FunctionIdentifier(ident.name()) case Array(dbName) => FunctionIdentifier(ident.name(), Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "FunctionIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } } @@ -159,20 +157,18 @@ private[sql] object CatalogV2Implicits { def asTableIdentifier: TableIdentifier = parts match { case Seq(tblName) => TableIdentifier(tblName) case Seq(dbName, tblName) => TableIdentifier(tblName, Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "TableIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } def asFunctionIdentifier: FunctionIdentifier = parts match { case Seq(funcName) => FunctionIdentifier(funcName) case Seq(dbName, funcName) => FunctionIdentifier(funcName, Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "FunctionIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } def quoted:
[spark] branch branch-3.4 updated: [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 8aaa655da37 [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185 8aaa655da37 is described below commit 8aaa655da37616c004ab12de6983bcc368212d43 Author: narek_karapetian AuthorDate: Sat Feb 4 15:18:53 2023 +0300 [SPARK-41302][SQL] Assign name to _LEGACY_ERROR_TEMP_1185 ### What changes were proposed in this pull request? This PR proposes to assign name to `_LEGACY_ERROR_TEMP_1185` -> `IDENTIFIER_TOO_MANY_NAME_PARTS` ### Why are the changes needed? We should assign proper name to LEGACY_ERROR_TEMP* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Additional test cases were added Closes #39723 from NarekDW/SPARK-41302. Lead-authored-by: narek_karapetian Co-authored-by: Narek Karapetian Signed-off-by: Max Gekk (cherry picked from commit c49415412e3027a171c2691df97fe8757f26a0aa) Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 +++ .../sql/connector/catalog/CatalogV2Implicits.scala | 20 + .../spark/sql/errors/QueryCompilationErrors.scala | 9 ++ .../sql/errors/QueryCompilationErrorsSuite.scala | 35 ++ .../datasources/v2/V2SessionCatalogSuite.scala | 18 +++ .../org/apache/spark/sql/jdbc/JDBCV2Suite.scala| 19 6 files changed, 89 insertions(+), 23 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 069f10423a5..42de98ccb87 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -575,6 +575,12 @@ ], "sqlState" : "42805" }, + "IDENTIFIER_TOO_MANY_NAME_PARTS" : { +"message" : [ + " is not a valid identifier as it has more than 2 name parts." +], +"sqlState" : "42601" + }, "INCOMPARABLE_PIVOT_COLUMN" : { "message" : [ "Invalid pivot column . Pivot columns must be comparable." @@ -2851,11 +2857,6 @@ "Catalog does not support ." ] }, - "_LEGACY_ERROR_TEMP_1185" : { -"message" : [ - " is not a valid as it has more than 2 name parts." -] - }, "_LEGACY_ERROR_TEMP_1186" : { "message" : [ "Multi-part identifier cannot be empty." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala index d9f15d84d89..0c9282f9675 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala @@ -130,22 +130,20 @@ private[sql] object CatalogV2Implicits { } } +def original: String = ident.namespace() :+ ident.name() mkString "." + def asMultipartIdentifier: Seq[String] = ident.namespace :+ ident.name def asTableIdentifier: TableIdentifier = ident.namespace match { case ns if ns.isEmpty => TableIdentifier(ident.name) case Array(dbName) => TableIdentifier(ident.name, Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "TableIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } def asFunctionIdentifier: FunctionIdentifier = ident.namespace() match { case ns if ns.isEmpty => FunctionIdentifier(ident.name()) case Array(dbName) => FunctionIdentifier(ident.name(), Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "FunctionIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } } @@ -159,20 +157,18 @@ private[sql] object CatalogV2Implicits { def asTableIdentifier: TableIdentifier = parts match { case Seq(tblName) => TableIdentifier(tblName) case Seq(dbName, tblName) => TableIdentifier(tblName, Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "TableIdentifier") + case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original) } def asFunctionIdentifier: FunctionIdentifier = parts match { case Seq(funcName) => FunctionIdentifier(funcName) case Seq(dbName, funcName) => FunctionIdentifier(funcName, Some(dbName)) - case _ => -throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError( - quoted, "FunctionIdentifier") +
[spark] branch branch-3.4 updated: [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 5b40e8feb56 [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES` 5b40e8feb56 is described below commit 5b40e8feb56a3a595bee03e0c5c096266f5c3c63 Author: itholic AuthorDate: Sat Feb 4 13:15:25 2023 +0300 [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES` ### What changes were proposed in this pull request? This PR proposes to introduce new error class `INCOMPATIBLE_JOIN_TYPES` to improve the error message for incompatible join type usage. ### Why are the changes needed? The existing error classes `LATERAL_NATURAL_JOIN` and `NATURAL_CROSS_JOIN` are not logically belong under `UNSUPPORTED_FEATURE`, and their error message is not very clear to understand for end-users. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated UTs. Closes #39805 from itholic/NATURAL_CROSS_JOIN. Authored-by: itholic Signed-off-by: Max Gekk (cherry picked from commit d9c0e8754d1c24ee49f9ee13efa60a5e78b18172) Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 16 ++-- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 8 ++-- .../apache/spark/sql/errors/QueryParsingErrors.scala | 18 -- .../spark/sql/catalyst/parser/PlanParserSuite.scala| 5 +++-- .../resources/sql-tests/results/join-lateral.sql.out | 8 ++-- .../spark/sql/errors/QueryParsingErrorsSuite.scala | 10 ++ 6 files changed, 35 insertions(+), 30 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 8bcfc527eaa..069f10423a5 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -586,6 +586,12 @@ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: " ] }, + "INCOMPATIBLE_JOIN_TYPES" : { +"message" : [ + "The join types and are incompatible." +], +"sqlState" : "42613" + }, "INCOMPATIBLE_VIEW_SCHEMA_CHANGE" : { "message" : [ "The SQL query of view has an incompatible schema change and column cannot be resolved. Expected columns named but got .", @@ -1559,11 +1565,6 @@ "JOIN USING with LATERAL correlation." ] }, - "LATERAL_NATURAL_JOIN" : { -"message" : [ - "NATURAL join with LATERAL correlation." -] - }, "LITERAL_TYPE" : { "message" : [ "Literal for '' of ." @@ -1579,11 +1580,6 @@ "The target JDBC server hosting table does not support ALTER TABLE with multiple actions. Split the ALTER TABLE up into individual actions to avoid this error." ] }, - "NATURAL_CROSS_JOIN" : { -"message" : [ - "NATURAL CROSS JOIN." -] - }, "ORC_TYPE_CAST" : { "message" : [ "Unable to convert of Orc to data type ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d2a1cb1eb16..dfc6e21d4a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1331,10 +1331,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit throw new IllegalStateException(s"Unimplemented joinCriteria: $c") case None if ctx.NATURAL != null => if (ctx.LATERAL != null) { -throw QueryParsingErrors.lateralJoinWithNaturalJoinUnsupportedError(ctx) +throw QueryParsingErrors.incompatibleJoinTypesError( + joinType1 = ctx.LATERAL.toString, joinType2 = ctx.NATURAL.toString, ctx = ctx +) } if (baseJoinType == Cross) { -throw QueryParsingErrors.naturalCrossJoinUnsupportedError(ctx) +throw QueryParsingErrors.incompatibleJoinTypesError( + joinType1 = ctx.NATURAL.toString, joinType2 = baseJoinType.toString, ctx = ctx +) } (NaturalJoin(baseJoinType), None) case None => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index e54bbb9c9d1..accf5363d6c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++
[spark] branch master updated: [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d9c0e8754d1 [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES` d9c0e8754d1 is described below commit d9c0e8754d1c24ee49f9ee13efa60a5e78b18172 Author: itholic AuthorDate: Sat Feb 4 13:15:25 2023 +0300 [SPARK-42238][SQL] Introduce new error class: `INCOMPATIBLE_JOIN_TYPES` ### What changes were proposed in this pull request? This PR proposes to introduce new error class `INCOMPATIBLE_JOIN_TYPES` to improve the error message for incompatible join type usage. ### Why are the changes needed? The existing error classes `LATERAL_NATURAL_JOIN` and `NATURAL_CROSS_JOIN` are not logically belong under `UNSUPPORTED_FEATURE`, and their error message is not very clear to understand for end-users. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated UTs. Closes #39805 from itholic/NATURAL_CROSS_JOIN. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 16 ++-- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 8 ++-- .../apache/spark/sql/errors/QueryParsingErrors.scala | 18 -- .../spark/sql/catalyst/parser/PlanParserSuite.scala| 5 +++-- .../resources/sql-tests/results/join-lateral.sql.out | 8 ++-- .../spark/sql/errors/QueryParsingErrorsSuite.scala | 10 ++ 6 files changed, 35 insertions(+), 30 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index d6a95b22b50..67de6c6a29d 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -586,6 +586,12 @@ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: " ] }, + "INCOMPATIBLE_JOIN_TYPES" : { +"message" : [ + "The join types and are incompatible." +], +"sqlState" : "42613" + }, "INCOMPATIBLE_VIEW_SCHEMA_CHANGE" : { "message" : [ "The SQL query of view has an incompatible schema change and column cannot be resolved. Expected columns named but got .", @@ -1559,11 +1565,6 @@ "JOIN USING with LATERAL correlation." ] }, - "LATERAL_NATURAL_JOIN" : { -"message" : [ - "NATURAL join with LATERAL correlation." -] - }, "LITERAL_TYPE" : { "message" : [ "Literal for '' of ." @@ -1579,11 +1580,6 @@ "The target JDBC server hosting table does not support ALTER TABLE with multiple actions. Split the ALTER TABLE up into individual actions to avoid this error." ] }, - "NATURAL_CROSS_JOIN" : { -"message" : [ - "NATURAL CROSS JOIN." -] - }, "ORC_TYPE_CAST" : { "message" : [ "Unable to convert of Orc to data type ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d2a1cb1eb16..dfc6e21d4a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1331,10 +1331,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit throw new IllegalStateException(s"Unimplemented joinCriteria: $c") case None if ctx.NATURAL != null => if (ctx.LATERAL != null) { -throw QueryParsingErrors.lateralJoinWithNaturalJoinUnsupportedError(ctx) +throw QueryParsingErrors.incompatibleJoinTypesError( + joinType1 = ctx.LATERAL.toString, joinType2 = ctx.NATURAL.toString, ctx = ctx +) } if (baseJoinType == Cross) { -throw QueryParsingErrors.naturalCrossJoinUnsupportedError(ctx) +throw QueryParsingErrors.incompatibleJoinTypesError( + joinType1 = ctx.NATURAL.toString, joinType2 = baseJoinType.toString, ctx = ctx +) } (NaturalJoin(baseJoinType), None) case None => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index e54bbb9c9d1..accf5363d6c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -17,6 +17,8 @@ package
[spark] branch master updated: [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 69229a5dc8b [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412 69229a5dc8b is described below commit 69229a5dc8b7614352ea57cc88e93b1154115760 Author: itholic AuthorDate: Sat Feb 4 11:59:52 2023 +0300 [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2412, "UNSUPPORTED_EXPR_FOR_WINDOW". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*"` Closes #39869 from itholic/LEGACY_2412. Authored-by: itholic Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 4 ++-- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 5 +++-- .../sql/errors/QueryExecutionErrorsSuite.scala | 25 ++ 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 030c65e2056..d6a95b22b50 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1483,6 +1483,12 @@ "Invalid expressions: []" ] }, + "UNSUPPORTED_EXPR_FOR_WINDOW" : { +"message" : [ + "Expression not supported within a window function." +], +"sqlState" : "42P20" + }, "UNSUPPORTED_FEATURE" : { "message" : [ "The feature is not supported:" @@ -5172,11 +5178,6 @@ "Cannot specify order by or frame for ''." ] }, - "_LEGACY_ERROR_TEMP_2412" : { -"message" : [ - "Expression '' not supported within a window function." -] - }, "_LEGACY_ERROR_TEMP_2413" : { "message" : [ "Input argument to must be a constant." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 57852bd950d..cca54a8742d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -320,8 +320,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB case f: PythonUDF if PythonUDF.isWindowPandasUDF(f) => // OK case other => other.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2412", - messageParameters = Map("sqlExpr" -> other.toString)) + errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW", + messageParameters = Map("sqlExpr" -> toSQLExpr(other))) } case s: SubqueryExpression => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 56bb8b0ccc2..71d3deb36c2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -172,7 +172,7 @@ class AnalysisErrorSuite extends AnalysisTest { "inputType" -> "\"DATE\"", "requiredType" -> "\"INT\"")) - errorTest( + errorClassTest( "invalid window function", testRelation2.select( WindowExpression( @@ -181,7 +181,8 @@ class AnalysisErrorSuite extends AnalysisTest { UnresolvedAttribute("a") :: Nil, SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil, UnspecifiedFrame)).as("window")), -"not supported within a window function" :: Nil) +errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW", +messageParameters = Map("sqlExpr" -> "\"0\"")) errorTest( "distinct aggregate function in window", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index 5d4b8e0b0c4..89e0bf7fe41 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -771,6 +771,31 @@ class QueryExecutionErrorsSuite assert(e.getErrorClass === "STREAM_FAILED")
[spark] branch branch-3.4 updated: [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new 0f99b20164a [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412 0f99b20164a is described below commit 0f99b20164a8b0301a842ae9df1cd1ebd9da0ba7 Author: itholic AuthorDate: Sat Feb 4 11:59:52 2023 +0300 [SPARK-42297][SQL] Assign name to _LEGACY_ERROR_TEMP_2412 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2412, "UNSUPPORTED_EXPR_FOR_WINDOW". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*"` Closes #39869 from itholic/LEGACY_2412. Authored-by: itholic Signed-off-by: Max Gekk (cherry picked from commit 69229a5dc8b7614352ea57cc88e93b1154115760) Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 4 ++-- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 5 +++-- .../sql/errors/QueryExecutionErrorsSuite.scala | 25 ++ 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 7cd70bda8bb..8bcfc527eaa 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1483,6 +1483,12 @@ "Invalid expressions: []" ] }, + "UNSUPPORTED_EXPR_FOR_WINDOW" : { +"message" : [ + "Expression not supported within a window function." +], +"sqlState" : "42P20" + }, "UNSUPPORTED_FEATURE" : { "message" : [ "The feature is not supported:" @@ -5172,11 +5178,6 @@ "Cannot specify order by or frame for ''." ] }, - "_LEGACY_ERROR_TEMP_2412" : { -"message" : [ - "Expression '' not supported within a window function." -] - }, "_LEGACY_ERROR_TEMP_2413" : { "message" : [ "Input argument to must be a constant." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 57852bd950d..cca54a8742d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -320,8 +320,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB case f: PythonUDF if PythonUDF.isWindowPandasUDF(f) => // OK case other => other.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2412", - messageParameters = Map("sqlExpr" -> other.toString)) + errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW", + messageParameters = Map("sqlExpr" -> toSQLExpr(other))) } case s: SubqueryExpression => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 56bb8b0ccc2..71d3deb36c2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -172,7 +172,7 @@ class AnalysisErrorSuite extends AnalysisTest { "inputType" -> "\"DATE\"", "requiredType" -> "\"INT\"")) - errorTest( + errorClassTest( "invalid window function", testRelation2.select( WindowExpression( @@ -181,7 +181,8 @@ class AnalysisErrorSuite extends AnalysisTest { UnresolvedAttribute("a") :: Nil, SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil, UnspecifiedFrame)).as("window")), -"not supported within a window function" :: Nil) +errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW", +messageParameters = Map("sqlExpr" -> "\"0\"")) errorTest( "distinct aggregate function in window", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index 5d4b8e0b0c4..89e0bf7fe41 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -771,6 +771,31 @@ class