[spark] branch master updated: [SPARK-39468][CORE] Improve `RpcAddress` to add `[]` to `IPv6` if needed
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 7cfc40fff6a [SPARK-39468][CORE] Improve `RpcAddress` to add `[]` to `IPv6` if needed 7cfc40fff6a is described below commit 7cfc40fff6a2bb4025b29d8dd9eb66734030a901 Author: Dongjoon Hyun AuthorDate: Tue Jun 14 20:00:05 2022 -0700 [SPARK-39468][CORE] Improve `RpcAddress` to add `[]` to `IPv6` if needed ### What changes were proposed in this pull request? This PR aims to extend the `IPv6` support in `RpcAddress` additionally when the input doesn't have `[]` properly. ### Why are the changes needed? Note that Apache Spark already depends on `java.net.URI` `getHost` and `getPort` and it assumpts `[]`-style IPv6. This PR additionally handles the case where the given host string doesn't have `[]`. - `RpcAddress.fromURIString` https://github.com/apache/spark/blob/683179c6813dbdccebd4063c3aac520020765692/core/src/main/scala/org/apache/spark/rpc/RpcAddress.scala#L40-L43 - `Utils.extractHostPortFromSparkUrl` https://github.com/apache/spark/blob/683179c6813dbdccebd4063c3aac520020765692/core/src/main/scala/org/apache/spark/util/Utils.scala#L2520-L2524 We need to handle Java URI IPv6 style additionally. ``` jshell> var uri = new java.net.URI("https://[::1]:80;) uri ==> https://[::1]:80 jshell> uri.getHost() $4 ==> "[::1]" jshell> uri.getPort() $5 ==> 80 ``` ### Does this PR introduce _any_ user-facing change? No. This is `private[spark]` class. ### How was this patch tested? Pass the CIs with newly added test cases. This is also tested manually on IPv6-only environment with the following command. ``` $ SERIAL_SBT_TESTS=1 SPARK_LOCAL_HOSTNAME='[2600:.(omitted)..:60cd]' build/sbt "core/test" -Djava.net.preferIPv6Addresses=true -Dtest.exclude.tags=org.apache.spark.tags.ExtendedLevelDBTest ... [info] Run completed in 18 minutes, 43 seconds. [info] Total number of tests run: 2950 [info] Suites: completed 284, aborted 0 [info] Tests: succeeded 2950, failed 0, canceled 4, ignored 8, pending 0 [info] All tests passed. [info] Passed: Total 3214, Failed 0, Errors 0, Passed 3214, Ignored 8, Canceled 4 [success] Total time: 1189 s (19:49), completed Jun 14, 2022, 4:45:55 PM ``` Closes #36868 from dongjoon-hyun/SPARK-39468. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/rpc/RpcAddress.scala | 4 +++- core/src/main/scala/org/apache/spark/util/Utils.scala | 2 +- .../scala/org/apache/spark/rpc/RpcAddressSuite.scala | 18 ++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcAddress.scala b/core/src/main/scala/org/apache/spark/rpc/RpcAddress.scala index eb0b26947f5..2a2d2051799 100644 --- a/core/src/main/scala/org/apache/spark/rpc/RpcAddress.scala +++ b/core/src/main/scala/org/apache/spark/rpc/RpcAddress.scala @@ -23,7 +23,9 @@ import org.apache.spark.util.Utils /** * Address for an RPC environment, with hostname and port. */ -private[spark] case class RpcAddress(host: String, port: Int) { +private[spark] case class RpcAddress(_host: String, port: Int) { + + val host: String = Utils.addBracketsIfNeeded(_host) def hostPort: String = host + ":" + port diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 3e4a7e727a8..cf93897f97d 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1089,7 +1089,7 @@ private[spark] object Utils extends Logging { addBracketsIfNeeded(customHostname.getOrElse(InetAddresses.toUriString(localIpAddress))) } - private def addBracketsIfNeeded(addr: String): String = { + private[spark] def addBracketsIfNeeded(addr: String): String = { if (addr.contains(":") && !addr.contains("[")) { "[" + addr + "]" } else { diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcAddressSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcAddressSuite.scala index b3223ec61bf..0f7c9d71330 100644 --- a/core/src/test/scala/org/apache/spark/rpc/RpcAddressSuite.scala +++ b/core/src/test/scala/org/apache/spark/rpc/RpcAddressSuite.scala @@ -52,4 +52,22 @@ class RpcAddressSuite extends SparkFunSuite { val address = RpcAddress("1.2.3.4", 1234) assert(address.toSparkURL == "spark://1.2.3.4:1234") } + + test("SPARK-39468: IPv6 hostPort") { +val address = RpcAddress("::1", 1234) +assert(address.host == "[::1]") +assert(address.port == 1234) +assert(address.hostPort == "[::1]:1234") + } + +
[GitHub] [spark-website] mateiz commented on pull request #392: Add Shop section in the community page
mateiz commented on PR #392: URL: https://github.com/apache/spark-website/pull/392#issuecomment-1155917954 Looks good! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[GitHub] [spark-website] yaooqinn commented on pull request #392: Add Shop section in the community page
yaooqinn commented on PR #392: URL: https://github.com/apache/spark-website/pull/392#issuecomment-1155917434 late +1 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39444][SQL] Add OptimizeSubqueries into nonExcludableRules list
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 70a98eeb8e1 [SPARK-39444][SQL] Add OptimizeSubqueries into nonExcludableRules list 70a98eeb8e1 is described below commit 70a98eeb8e10bd6557b83ab040f6b7c689c1e9d7 Author: Yuming Wang AuthorDate: Wed Jun 15 09:46:24 2022 +0900 [SPARK-39444][SQL] Add OptimizeSubqueries into nonExcludableRules list ### What changes were proposed in this pull request? This PR adds `OptimizeSubqueries` rule into nonExcludableRules list. ### Why are the changes needed? It will throw exception if user `set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries` before running this query: ```sql WITH tmp AS ( SELECT id FROM range(2) INTERSECT SELECT id FROM range(4) ) SELECT id FROM range(5) WHERE id > (SELECT max(id) FROM x) ``` Exception: ``` logical intersect operator should have been replaced by semi-join in the optimizer java.lang.IllegalStateException: logical intersect operator should have been replaced by semi-join in the optimizer ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #36841 from wangyum/SPARK-39444. Authored-by: Yuming Wang Signed-off-by: Hyukjin Kwon --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 3 ++- .../sql-tests/inputs/non-excludable-rule.sql | 9 + .../sql-tests/results/non-excludable-rule.sql.out | 23 +- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index a84959f0991..12e21faca9f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -270,7 +270,8 @@ abstract class Optimizer(catalogManager: CatalogManager) RewritePredicateSubquery.ruleName :: NormalizeFloatingNumbers.ruleName :: ReplaceUpdateFieldsExpression.ruleName :: - RewriteLateralSubquery.ruleName :: Nil + RewriteLateralSubquery.ruleName :: + OptimizeSubqueries.ruleName :: Nil /** * Apply finish-analysis rules for the entire plan including all subqueries. diff --git a/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql index b238d199cc1..e3799de5ff7 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql @@ -4,3 +4,12 @@ SELECT (SELECT min(id) FROM range(10)), (SELECT sum(id) FROM range(10)), (SELECT count(distinct id) FROM range(10)); + +-- SPARK-39444 +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries; +WITH tmp AS ( + SELECT id FROM range(2) + INTERSECT + SELECT id FROM range(4) +) +SELECT id FROM range(3) WHERE id > (SELECT max(id) FROM tmp); diff --git a/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out index c7fa2f04152..fa8b2bbec4e 100644 --- a/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 2 +-- Number of queries: 4 -- !query @@ -19,3 +19,24 @@ SELECT struct -- !query output 0 45 10 + + +-- !query +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries +-- !query schema +struct +-- !query output +spark.sql.optimizer.excludedRules org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries + + +-- !query +WITH tmp AS ( + SELECT id FROM range(2) + INTERSECT + SELECT id FROM range(4) +) +SELECT id FROM range(3) WHERE id > (SELECT max(id) FROM tmp) +-- !query schema +struct +-- !query output +2 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Add Shop section in the community page
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new ad3e79d1d Add Shop section in the community page ad3e79d1d is described below commit ad3e79d1d309f6f231ea0fd2bf11eb77e13bf08f Author: Gengliang Wang AuthorDate: Tue Jun 14 18:31:52 2022 -0500 Add Shop section in the community page To promote the Apache Spark swag, this PR adds a Shop section on the community page: https://spark.apache.org/community.html The logo and the first sentence are from https://www.apache.org/foundation/buy_stuff.html Here is a screenshot for preview: https://user-images.githubusercontent.com/1097932/173695057-37857ab4-355a-4ec7-9b84-ed33691ab39a.png;> Author: Gengliang Wang Closes #392 from gengliangwang/shop. --- community.md | 4 images/redbubble.png | Bin 0 -> 5874 bytes site/community.html | 4 site/images/redbubble.png | Bin 0 -> 5874 bytes 4 files changed, 8 insertions(+) diff --git a/community.md b/community.md index 2d8f75fd7..6c04ee5f8 100644 --- a/community.md +++ b/community.md @@ -201,6 +201,10 @@ Spark Meetups are grass-roots events organized and hosted by individuals in the The project tracks bugs and new features on https://issues.apache.org/jira/browse/SPARK;>JIRA. If you'd like, you can also subscribe to https://mail-archives.apache.org/mod_mbox/spark-issues/;>iss...@spark.apache.org to receive emails about new issues, and https://mail-archives.apache.org/mod_mbox/spark-commits/;>commits@spark.apache.org to get emails about commits. +Shop + +The ASF has an https://www.redbubble.com/people/comdev/shop;>official store at RedBubble that Apache Community Development (ComDev) runs. In the store, https://www.redbubble.com/shop/ap/113203780;>various products featuring the Apache Spark logo are available. + Powered by Our site has a list of projects and organizations powered by Spark. diff --git a/images/redbubble.png b/images/redbubble.png new file mode 100644 index 0..61d7df5ef Binary files /dev/null and b/images/redbubble.png differ diff --git a/site/community.html b/site/community.html index 82c0af497..99a5662b5 100644 --- a/site/community.html +++ b/site/community.html @@ -329,6 +329,10 @@ vulnerabilities, and for information on known security issues. The project tracks bugs and new features on https://issues.apache.org/jira/browse/SPARK;>JIRA. If you'd like, you can also subscribe to https://mail-archives.apache.org/mod_mbox/spark-issues/;>iss...@spark.apache.org to receive emails about new issues, and https://mail-archives.apache.org/mod_mbox/spark-commits/;>commits@spark.apache.org to get emails about commits. +Shop + +The ASF has an https://www.redbubble.com/people/comdev/shop;>official store at RedBubble that Apache Community Development (ComDev) runs. In the store, https://www.redbubble.com/shop/ap/113203780;>various products featuring the Apache Spark logo are available. + Powered by Our site has a list of projects and organizations powered by Spark. diff --git a/site/images/redbubble.png b/site/images/redbubble.png new file mode 100644 index 0..61d7df5ef Binary files /dev/null and b/site/images/redbubble.png differ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[GitHub] [spark-website] srowen closed pull request #392: Add Shop section in the community page
srowen closed pull request #392: Add Shop section in the community page URL: https://github.com/apache/spark-website/pull/392 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[GitHub] [spark-website] gengliangwang commented on pull request #392: Add Shop section in the community page
gengliangwang commented on PR #392: URL: https://github.com/apache/spark-website/pull/392#issuecomment-1155748443 cc @mateiz as well -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[GitHub] [spark-website] gengliangwang opened a new pull request, #392: Add Shop section in the community page
gengliangwang opened a new pull request, #392: URL: https://github.com/apache/spark-website/pull/392 To promote the Apache Spark swag, this PR adds a Shop section on the community page: https://spark.apache.org/community.html The logo and the first sentence are from https://www.apache.org/foundation/buy_stuff.html Here is a screenshot for preview: https://user-images.githubusercontent.com/1097932/173695057-37857ab4-355a-4ec7-9b84-ed33691ab39a.png;> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39397][SQL] Relax AliasAwareOutputExpression to support alias with expression
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 683179c6813 [SPARK-39397][SQL] Relax AliasAwareOutputExpression to support alias with expression 683179c6813 is described below commit 683179c6813dbdccebd4063c3aac520020765692 Author: ulysses-you AuthorDate: Wed Jun 15 00:06:06 2022 +0800 [SPARK-39397][SQL] Relax AliasAwareOutputExpression to support alias with expression ### What changes were proposed in this pull request? Change AliasAwareOutputExpression to using expression rather than attribute to track if we can nomalize. So the aliased expression can also preserve the output partitioning and ordering. ### Why are the changes needed? We will pull out complex keys from grouping expressions, so the project can hold a alias with expression. Unfortunately we may lose the output partitioning since the current AliasAwareOutputExpression only support preserve the alias with attribute. For example, the follow query will introduce three exchanges instead of two. ```SQL SELECT c1 + 1, count(*) FROM t1 JOIN t2 ON c1 + 1 = c2 GROUP BY c1 + 1 ``` ### Does this PR introduce _any_ user-facing change? no, improve performance ### How was this patch tested? add new test Closes #36785 from ulysses-you/SPARK-39397. Authored-by: ulysses-you Signed-off-by: Wenchen Fan --- .../sql/execution/AliasAwareOutputExpression.scala | 12 ++-- .../org/apache/spark/sql/execution/PlannerSuite.scala | 17 + 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala index 23a9527a1b3..92e86637eec 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.execution -import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeMap, AttributeReference, Expression, NamedExpression, SortOrder} +import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, NamedExpression, SortOrder} import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, PartitioningCollection, UnknownPartitioning} /** @@ -25,15 +25,15 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition trait AliasAwareOutputExpression extends UnaryExecNode { protected def outputExpressions: Seq[NamedExpression] - private lazy val aliasMap = AttributeMap(outputExpressions.collect { -case a @ Alias(child: AttributeReference, _) => (child, a.toAttribute) - }) + private lazy val aliasMap = outputExpressions.collect { +case a @ Alias(child, _) => child.canonicalized -> a.toAttribute + }.toMap protected def hasAlias: Boolean = aliasMap.nonEmpty protected def normalizeExpression(exp: Expression): Expression = { -exp.transform { - case attr: AttributeReference => aliasMap.getOrElse(attr, attr) +exp.transformDown { + case e: Expression => aliasMap.getOrElse(e.canonicalized, e) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index 3bc39c8b768..6f4869bf110 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -1276,6 +1276,23 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { checkSinglePartitioning(sql("SELECT /*+ REPARTITION(1) */ * FROM VALUES(1),(2),(3) AS t(c)")) checkSinglePartitioning(sql("SELECT /*+ REPARTITION(1, c) */ * FROM VALUES(1),(2),(3) AS t(c)")) } + + test("SPARK-39397: Relax AliasAwareOutputExpression to support alias with expression") { +withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + val df1 = Seq("a").toDF("c1") + val df2 = Seq("A").toDF("c2") + val df = df1.join(df2, upper($"c1") === $"c2").groupBy(upper($"c1")).agg(max($"c1")) + val numShuffles = collect(df.queryExecution.executedPlan) { +case e: ShuffleExchangeExec => e + } + val numSorts = collect(df.queryExecution.executedPlan) { +case e: SortExec => e + } + // before: numShuffles is 3, numSorts is 4 + assert(numShuffles.size == 2) + assert(numSorts.size == 2) +} + } } // Used for unit-testing EnsureRequirements
[spark] branch master updated: [SPARK-39466][CORE][TESTS] Ensure clean `core/temp-secrets/` directory after `SecurityManagerSuite`
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d966154af2b [SPARK-39466][CORE][TESTS] Ensure clean `core/temp-secrets/` directory after `SecurityManagerSuite` d966154af2b is described below commit d966154af2b33a9b406b58636c23ae3d70e10527 Author: yangjie01 AuthorDate: Tue Jun 14 09:03:59 2022 -0700 [SPARK-39466][CORE][TESTS] Ensure clean `core/temp-secrets/` directory after `SecurityManagerSuite` ### What changes were proposed in this pull request? This pr refactor `createTempSecretFile` in `SecurityManagerSuite` to ensure clean `core/temp-secrets/` directory after `SecurityManagerSuite`. ### Why are the changes needed? To ensure clean `core/temp-secrets/` directory after `SecurityManagerSuite`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass Github Actions - Manual test run following commands: ``` mvn clean install -DskipTests -pl core -am mvn clean test -Dtest=none -DwildcardSuites=org.apache.spark.SecurityManagerSuite -pl core git status ``` **Before** The `core/temp-secrets/` directory is not cleaned up **After** The `core/temp-secrets/` directory has been cleaned up Closes #36867 from LuciferYang/SPARK-39466. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/SecurityManagerSuite.scala| 35 -- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala index b31a6b4e2f9..44e338c6f00 100644 --- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala @@ -404,15 +404,17 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties { } test("use executor-specific secret file configuration.") { -val secretFileFromDriver = createTempSecretFile("driver-secret") -val secretFileFromExecutor = createTempSecretFile("executor-secret") -val conf = new SparkConf() - .setMaster("k8s://127.0.0.1") - .set(AUTH_SECRET_FILE_DRIVER, Some(secretFileFromDriver.getAbsolutePath)) - .set(AUTH_SECRET_FILE_EXECUTOR, Some(secretFileFromExecutor.getAbsolutePath)) - .set(SecurityManager.SPARK_AUTH_CONF, "true") -val mgr = new SecurityManager(conf, authSecretFileConf = AUTH_SECRET_FILE_EXECUTOR) -assert(encodeFileAsBase64(secretFileFromExecutor) === mgr.getSecretKey()) +withSecretFile("driver-secret") { secretFileFromDriver => + withSecretFile("executor-secret") { secretFileFromExecutor => +val conf = new SparkConf() + .setMaster("k8s://127.0.0.1") + .set(AUTH_SECRET_FILE_DRIVER, Some(secretFileFromDriver.getAbsolutePath)) + .set(AUTH_SECRET_FILE_EXECUTOR, Some(secretFileFromExecutor.getAbsolutePath)) + .set(SecurityManager.SPARK_AUTH_CONF, "true") +val mgr = new SecurityManager(conf, authSecretFileConf = AUTH_SECRET_FILE_EXECUTOR) +assert(encodeFileAsBase64(secretFileFromExecutor) === mgr.getSecretKey()) + } +} } test("secret file must be defined in both driver and executor") { @@ -496,10 +498,11 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties { } case FILE => -val secretFile = createTempSecretFile() -conf.set(AUTH_SECRET_FILE, secretFile.getAbsolutePath) -mgr.initializeAuth() -assert(encodeFileAsBase64(secretFile) === mgr.getSecretKey()) +withSecretFile() { secretFile => + conf.set(AUTH_SECRET_FILE, secretFile.getAbsolutePath) + mgr.initializeAuth() + assert(encodeFileAsBase64(secretFile) === mgr.getSecretKey()) +} } } } @@ -511,11 +514,13 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties { Base64.getEncoder.encodeToString(Files.readAllBytes(secretFile.toPath)) } - private def createTempSecretFile(contents: String = "test-secret"): File = { + private def withSecretFile(contents: String = "test-secret")(f: File => Unit): Unit = { val secretDir = Utils.createTempDir("temp-secrets") val secretFile = new File(secretDir, "temp-secret.txt") Files.write(secretFile.toPath, contents.getBytes(UTF_8)) -secretFile +try f(secretFile) finally { + Utils.deleteRecursively(secretDir) +} } } - To unsubscribe, e-mail:
[spark] branch master updated: [SPARK-39464][CORE][TESTS] Use `Utils.localCanonicalHostName` instead of `localhost` in tests
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 7346fb184e2 [SPARK-39464][CORE][TESTS] Use `Utils.localCanonicalHostName` instead of `localhost` in tests 7346fb184e2 is described below commit 7346fb184e2d0d27f9720b44cfdd5fb7fe5a10f5 Author: Dongjoon Hyun AuthorDate: Tue Jun 14 08:58:23 2022 -0700 [SPARK-39464][CORE][TESTS] Use `Utils.localCanonicalHostName` instead of `localhost` in tests ### What changes were proposed in this pull request? This PR aims to use `Utils.localCanonicalHostName` instead of a constant `localhost` in the following suites. - `MasterSuite` - `MasterWebUISuite` - `RocksDBBackendHistoryServerSuite` ### Why are the changes needed? These test cases fails when we run with `SPARK_LOCAL_IP` on `IPv6`-only environment. ### Does this PR introduce _any_ user-facing change? No. This is a test-only change. ### How was this patch tested? Pass the CIs first and manually test on `IPv6`-only environment. Closes #36866 from dongjoon-hyun/SPARK-39464. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../spark/deploy/history/HistoryServerSuite.scala | 35 +++--- .../apache/spark/deploy/master/MasterSuite.scala | 12 .../spark/deploy/master/ui/MasterWebUISuite.scala | 9 +++--- 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala index 02a32a80ddd..1aa846b3ac4 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala @@ -74,6 +74,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with private var provider: FsHistoryProvider = null private var server: HistoryServer = null + private val localhost: String = Utils.localCanonicalHostName() private var port: Int = -1 protected def diskBackend: HybridStoreDiskBackend.Value @@ -428,12 +429,12 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with // build a URL for an app or app/attempt plus a page underneath def buildURL(appId: String, suffix: String): URL = { - new URL(s"http://localhost:$port/history/$appId$suffix;) + new URL(s"http://$localhost:$port/history/$appId$suffix;) } // build a rest URL for the application and suffix. def applications(appId: String, suffix: String): URL = { - new URL(s"http://localhost:$port/api/v1/applications/$appId$suffix;) + new URL(s"http://$localhost:$port/api/v1/applications/$appId$suffix;) } // start initial job @@ -586,11 +587,11 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with val port = server.boundPort val testUrls = Seq( - s"http://localhost:$port/api/v1/applications/$appId/1/jobs;, - s"http://localhost:$port/history/$appId/1/jobs/;, - s"http://localhost:$port/api/v1/applications/$appId/logs;, - s"http://localhost:$port/api/v1/applications/$appId/1/logs;, - s"http://localhost:$port/api/v1/applications/$appId/2/logs;) + s"http://$localhost:$port/api/v1/applications/$appId/1/jobs;, + s"http://$localhost:$port/history/$appId/1/jobs/;, + s"http://$localhost:$port/api/v1/applications/$appId/logs;, + s"http://$localhost:$port/api/v1/applications/$appId/1/logs;, + s"http://$localhost:$port/api/v1/applications/$appId/2/logs;) tests.foreach { case (user, expectedCode) => testUrls.foreach { url => @@ -609,9 +610,9 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with val port = server.boundPort val testUrls = Seq( - s"http://localhost:$port/api/v1/applications/$appId/logs;, - s"http://localhost:$port/api/v1/applications/$appId/1/logs;, - s"http://localhost:$port/api/v1/applications/$appId/2/logs;) + s"http://$localhost:$port/api/v1/applications/$appId/logs;, + s"http://$localhost:$port/api/v1/applications/$appId/1/logs;, + s"http://$localhost:$port/api/v1/applications/$appId/2/logs;) testUrls.foreach { url => TestUtils.httpResponseCode(new URL(url)) @@ -636,9 +637,9 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with def buildPageAttemptUrl(appId: String, attemptId: Option[Int]): URL = { attemptId match { case Some(id) => - new URL(s"http://localhost:$port/history/$appId/$id;) + new URL(s"http://$localhost:$port/history/$appId/$id;) case None => - new
[spark] branch master updated: [SPARK-39298][CORE][SQL][DSTREAM][GRAPHX][ML][MLLIB][SS][YARN] Replace constructing ranges of collection indices manually with `.indices`
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 5556cfc59aa [SPARK-39298][CORE][SQL][DSTREAM][GRAPHX][ML][MLLIB][SS][YARN] Replace constructing ranges of collection indices manually with `.indices` 5556cfc59aa is described below commit 5556cfc59aa97a3ad4ea0baacebe19859ec0bcb7 Author: yangjie01 AuthorDate: Tue Jun 14 09:36:30 2022 -0500 [SPARK-39298][CORE][SQL][DSTREAM][GRAPHX][ML][MLLIB][SS][YARN] Replace constructing ranges of collection indices manually with `.indices` ### What changes were proposed in this pull request? This pr is a trivial change: use `Seq.indices` instead of constructing ranges of collection indices manually. **Before** ```scala var x: Seq[Int] Range(0, x.size) 0 until x.size 0.to(x.size - 1) ``` **After** ```scala var x: Seq[Int] x.indices ``` ### Why are the changes needed? Use API instead of manual coding. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass Github Actions Closes #36679 from LuciferYang/seq-indices. Authored-by: yangjie01 Signed-off-by: Sean Owen --- .../spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala | 2 +- .../spark/streaming/kafka010/KafkaDataConsumerSuite.scala| 2 +- .../spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala | 2 +- core/src/main/scala/org/apache/spark/MapOutputTracker.scala | 2 +- core/src/main/scala/org/apache/spark/SparkContext.scala | 8 .../main/scala/org/apache/spark/deploy/master/Master.scala | 2 +- .../scala/org/apache/spark/metrics/ExecutorMetricType.scala | 2 +- core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala | 2 +- core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala | 2 +- core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala | 2 +- core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala | 2 +- .../scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala | 2 +- .../main/scala/org/apache/spark/storage/BlockManager.scala | 2 +- .../scala/org/apache/spark/storage/memory/MemoryStore.scala | 2 +- .../org/apache/spark/BarrierStageOnSubmittedSuite.scala | 2 +- .../org/apache/spark/rdd/ParallelCollectionSplitSuite.scala | 4 ++-- .../scala/org/apache/spark/scheduler/AQEShuffledRDD.scala| 2 +- .../spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala | 2 +- .../scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala | 2 +- .../spark/scheduler/OutputCommitCoordinatorSuite.scala | 4 ++-- .../test/scala/org/apache/spark/util/FileAppenderSuite.scala | 4 ++-- .../test/scala/org/apache/spark/util/JsonProtocolSuite.scala | 2 +- .../util/collection/unsafe/sort/PrefixComparatorsSuite.scala | 2 +- .../scala/org/apache/spark/examples/MultiBroadcastTest.scala | 4 ++-- .../main/scala/org/apache/spark/examples/SparkKMeans.scala | 2 +- .../apache/spark/graphx/impl/ShippableVertexPartition.scala | 2 +- .../src/test/scala/org/apache/spark/graphx/EdgeSuite.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala | 8 .../org/apache/spark/ml/feature/QuantileDiscretizer.scala| 2 +- .../scala/org/apache/spark/ml/feature/StringIndexer.scala| 4 ++-- .../scala/org/apache/spark/ml/tuning/CrossValidator.scala| 4 ++-- .../org/apache/spark/ml/tuning/TrainValidationSplit.scala| 4 ++-- .../org/apache/spark/ml/classification/NaiveBayesSuite.scala | 2 +- .../scala/org/apache/spark/ml/feature/BucketizerSuite.scala | 6 +++--- .../apache/spark/mllib/classification/NaiveBayesSuite.scala | 2 +- .../org/apache/spark/deploy/yarn/ApplicationMaster.scala | 2 +- .../spark/sql/catalyst/analysis/ResolveDefaultColumns.scala | 4 ++-- .../sql/catalyst/expressions/collectionOperations.scala | 2 +- .../org/apache/spark/sql/catalyst/util/ToNumberParser.scala | 2 +- .../spark/sql/catalyst/expressions/PredicateSuite.scala | 12 ++-- .../spark/sql/connector/catalog/InMemoryPartitionTable.scala | 2 +- .../main/scala/org/apache/spark/sql/execution/Columnar.scala | 2 +- .../org/apache/spark/sql/execution/ShuffledRowRDD.scala | 2 +- .../apache/spark/sql/streaming/StateStoreMetricsTest.scala | 2 +- .../org/apache/spark/sql/hive/execution/UDAQuerySuite.scala | 4 ++-- .../spark/streaming/scheduler/ReceiverSchedulingPolicy.scala | 2 +- .../org/apache/spark/streaming/BasicOperationsSuite.scala| 2 +- .../scala/org/apache/spark/streaming/MasterFailureTest.scala | 2 +- .../scala/org/apache/spark/streaming/TestSuiteBase.scala | 2 +- 49 files changed, 71 insertions(+), 71 deletions(-) diff --git
[spark] branch master updated: [SPARK-39459][CORE] `local*HostName*` methods should support `IPv6`
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f8c544b9ef7 [SPARK-39459][CORE] `local*HostName*` methods should support `IPv6` f8c544b9ef7 is described below commit f8c544b9ef78de37b1d149405ff6ead3285eac3e Author: Dongjoon Hyun AuthorDate: Tue Jun 14 02:46:18 2022 -0700 [SPARK-39459][CORE] `local*HostName*` methods should support `IPv6` ### What changes were proposed in this pull request? This PR aims to - Support `IPv6`-only environment in `localHostName`, `localHostNameForUri` and `localCanonicalHostName` methods - have no side-effects in `IPv4` environment. ### Why are the changes needed? Currently, Apache Spark fails on pure-IPv6 environment (which doesn't have IPv4 address). **BEFORE** ``` $ SPARK_LOCAL_IP=::1 build/sbt "core/testOnly *.DistributedSuite" -Djava.net.preferIPv6Addresses=true ... Using SPARK_LOCAL_IP=::1 ... [info] *** 45 TESTS FAILED *** [error] Failed tests: [error] org.apache.spark.DistributedSuite [error] (core / Test / testOnly) sbt.TestsFailedException: Tests unsuccessful [error] Total time: 9 s, completed Jun 13, 2022, 8:38:03 PM ``` **AFTER** ``` $ SPARK_LOCAL_IP=::1 build/sbt "core/testOnly *.DistributedSuite" -Djava.net.preferIPv6Addresses=true ... Using SPARK_LOCAL_IP=::1 ... [info] Tests: succeeded 46, failed 0, canceled 0, ignored 0, pending 0 [info] All tests passed. ``` ### Does this PR introduce _any_ user-facing change? This will help IPv6-only environment users. ### How was this patch tested? Since we don't have IPv6 test CI, this should be tested in IPv6 environment manually with `DistributedSuite` and `Spark-Shell`. **DistributedSuite** ``` $ SPARK_LOCAL_IP=::1 build/sbt "core/testOnly *.DistributedSuite" -Djava.net.preferIPv6Addresses=true ``` **SPARK-SHELL** ``` $ SPARK_LOCAL_IP=2600:...:...:c26a bin/spark-shell Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 22/06/13 20:17:44 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Spark context Web UI available at http://unknown1498774f7f18.attlocal.net:4040 Spark context available as 'sc' (master = local[*], app id = local-1655176664558). Spark session available as 'spark'. Welcome to __ / __/__ ___ _/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 3.4.0-SNAPSHOT /_/ Using Scala version 2.12.16 (OpenJDK 64-Bit Server VM, Java 17.0.3) Type in expressions to have them evaluated. Type :help for more information. ``` Closes #36863 from dongjoon-hyun/SPARK-39459. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- core/src/main/scala/org/apache/spark/util/Utils.scala | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index cf46a3d34c4..3e4a7e727a8 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1071,21 +1071,30 @@ private[spark] object Utils extends Logging { * Get the local machine's FQDN. */ def localCanonicalHostName(): String = { -customHostname.getOrElse(localIpAddress.getCanonicalHostName) + addBracketsIfNeeded(customHostname.getOrElse(localIpAddress.getCanonicalHostName)) } /** * Get the local machine's hostname. + * In case of IPv6, getHostAddress may return '0:0:0:0:0:0:0:1'. */ def localHostName(): String = { -customHostname.getOrElse(localIpAddress.getHostAddress) + addBracketsIfNeeded(customHostname.getOrElse(localIpAddress.getHostAddress)) } /** * Get the local machine's URI. */ def localHostNameForURI(): String = { -customHostname.getOrElse(InetAddresses.toUriString(localIpAddress)) + addBracketsIfNeeded(customHostname.getOrElse(InetAddresses.toUriString(localIpAddress))) + } + + private def addBracketsIfNeeded(addr: String): String = { +if (addr.contains(":") && !addr.contains("[")) { + "[" + addr + "]" +} else { + addr +} } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (0b785b3c773 -> a9b2a2e92b3)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 0b785b3c773 [SPARK-39448][SQL] Add `ReplaceCTERefWithRepartition` into `nonExcludableRules` list add a9b2a2e92b3 [SPARK-39381][SQL] Make vectorized orc columar writer batch size configurable No new revisions were added by this update. Summary of changes: .../scala/org/apache/spark/sql/internal/SQLConf.scala | 9 + .../sql/execution/datasources/orc/OrcFileFormat.scala | 4 +++- .../sql/execution/datasources/orc/OrcOutputWriter.scala | 5 +++-- .../spark/sql/execution/datasources/v2/orc/OrcWrite.scala | 4 +++- .../sql/execution/datasources/orc/OrcQuerySuite.scala | 15 +++ 5 files changed, 33 insertions(+), 4 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [SPARK-39448][SQL] Add `ReplaceCTERefWithRepartition` into `nonExcludableRules` list
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new bb0cce990c2 [SPARK-39448][SQL] Add `ReplaceCTERefWithRepartition` into `nonExcludableRules` list bb0cce990c2 is described below commit bb0cce990c214d4ca9cf3828940a2ca5350acf79 Author: Yuming Wang AuthorDate: Tue Jun 14 00:43:20 2022 -0700 [SPARK-39448][SQL] Add `ReplaceCTERefWithRepartition` into `nonExcludableRules` list ### What changes were proposed in this pull request? This PR adds `ReplaceCTERefWithRepartition` into nonExcludableRules list. ### Why are the changes needed? It will throw exception if user `set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition` before running this query: ```sql SELECT (SELECT avg(id) FROM range(10)), (SELECT sum(id) FROM range(10)), (SELECT count(distinct id) FROM range(10)) ``` Exception: ``` Caused by: java.lang.AssertionError: assertion failed: No plan for WithCTE :- CTERelationDef 0, true : +- Project [named_struct(min(id), min(id)#223L, sum(id), sum(id)#226L, count(DISTINCT id), count(DISTINCT id)#229L) AS mergedValue#240] : +- Aggregate [min(id#221L) AS min(id)#223L, sum(id#221L) AS sum(id)#226L, count(distinct id#221L) AS count(DISTINCT id)#229L] :+- Range (0, 10, step=1, splits=None) +- Project [scalar-subquery#218 [].min(id) AS scalarsubquery()#230L, scalar-subquery#219 [].sum(id) AS scalarsubquery()#231L, scalar-subquery#220 [].count(DISTINCT id) AS scalarsubquery()#232L] : :- CTERelationRef 0, true, [mergedValue#240] : :- CTERelationRef 0, true, [mergedValue#240] : +- CTERelationRef 0, true, [mergedValue#240] +- OneRowRelation ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #36847 from wangyum/SPARK-39448. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun (cherry picked from commit 0b785b3c77374fa7736f01bb55e87c796985ae14) Signed-off-by: Dongjoon Hyun --- .../apache/spark/sql/execution/SparkOptimizer.scala | 3 ++- .../sql-tests/inputs/non-excludable-rule.sql| 6 ++ .../sql-tests/results/non-excludable-rule.sql.out | 21 + 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala index 84e5975189b..b8861715726 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala @@ -88,7 +88,8 @@ class SparkOptimizer( GroupBasedRowLevelOperationScanPlanning.ruleName :+ V2ScanRelationPushDown.ruleName :+ V2ScanPartitioning.ruleName :+ -V2Writes.ruleName +V2Writes.ruleName :+ +ReplaceCTERefWithRepartition.ruleName /** * Optimization batches that are executed before the regular optimization batches (also before diff --git a/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql new file mode 100644 index 000..b238d199cc1 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql @@ -0,0 +1,6 @@ +-- SPARK-39448 +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition; +SELECT + (SELECT min(id) FROM range(10)), + (SELECT sum(id) FROM range(10)), + (SELECT count(distinct id) FROM range(10)); diff --git a/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out new file mode 100644 index 000..c7fa2f04152 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out @@ -0,0 +1,21 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 2 + + +-- !query +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition +-- !query schema +struct +-- !query output +spark.sql.optimizer.excludedRules org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition + + +-- !query +SELECT + (SELECT min(id) FROM range(10)), + (SELECT sum(id) FROM range(10)), + (SELECT count(distinct id) FROM range(10)) +-- !query schema +struct +-- !query output +0 45 10 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39448][SQL] Add `ReplaceCTERefWithRepartition` into `nonExcludableRules` list
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 0b785b3c773 [SPARK-39448][SQL] Add `ReplaceCTERefWithRepartition` into `nonExcludableRules` list 0b785b3c773 is described below commit 0b785b3c77374fa7736f01bb55e87c796985ae14 Author: Yuming Wang AuthorDate: Tue Jun 14 00:43:20 2022 -0700 [SPARK-39448][SQL] Add `ReplaceCTERefWithRepartition` into `nonExcludableRules` list ### What changes were proposed in this pull request? This PR adds `ReplaceCTERefWithRepartition` into nonExcludableRules list. ### Why are the changes needed? It will throw exception if user `set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition` before running this query: ```sql SELECT (SELECT avg(id) FROM range(10)), (SELECT sum(id) FROM range(10)), (SELECT count(distinct id) FROM range(10)) ``` Exception: ``` Caused by: java.lang.AssertionError: assertion failed: No plan for WithCTE :- CTERelationDef 0, true : +- Project [named_struct(min(id), min(id)#223L, sum(id), sum(id)#226L, count(DISTINCT id), count(DISTINCT id)#229L) AS mergedValue#240] : +- Aggregate [min(id#221L) AS min(id)#223L, sum(id#221L) AS sum(id)#226L, count(distinct id#221L) AS count(DISTINCT id)#229L] :+- Range (0, 10, step=1, splits=None) +- Project [scalar-subquery#218 [].min(id) AS scalarsubquery()#230L, scalar-subquery#219 [].sum(id) AS scalarsubquery()#231L, scalar-subquery#220 [].count(DISTINCT id) AS scalarsubquery()#232L] : :- CTERelationRef 0, true, [mergedValue#240] : :- CTERelationRef 0, true, [mergedValue#240] : +- CTERelationRef 0, true, [mergedValue#240] +- OneRowRelation ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #36847 from wangyum/SPARK-39448. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- .../apache/spark/sql/execution/SparkOptimizer.scala | 3 ++- .../sql-tests/inputs/non-excludable-rule.sql| 6 ++ .../sql-tests/results/non-excludable-rule.sql.out | 21 + 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala index 0e7455009c5..056c16affc2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala @@ -87,7 +87,8 @@ class SparkOptimizer( GroupBasedRowLevelOperationScanPlanning.ruleName :+ V2ScanRelationPushDown.ruleName :+ V2ScanPartitioning.ruleName :+ -V2Writes.ruleName +V2Writes.ruleName :+ +ReplaceCTERefWithRepartition.ruleName /** * Optimization batches that are executed before the regular optimization batches (also before diff --git a/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql new file mode 100644 index 000..b238d199cc1 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql @@ -0,0 +1,6 @@ +-- SPARK-39448 +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition; +SELECT + (SELECT min(id) FROM range(10)), + (SELECT sum(id) FROM range(10)), + (SELECT count(distinct id) FROM range(10)); diff --git a/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out new file mode 100644 index 000..c7fa2f04152 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out @@ -0,0 +1,21 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 2 + + +-- !query +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition +-- !query schema +struct +-- !query output +spark.sql.optimizer.excludedRules org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition + + +-- !query +SELECT + (SELECT min(id) FROM range(10)), + (SELECT sum(id) FROM range(10)), + (SELECT count(distinct id) FROM range(10)) +-- !query schema +struct +-- !query output +0 45 10 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR][SQL] Updated `ErrorInfo` and `ErrorSubInfo` comments and indentation
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 99f6f77168f [MINOR][SQL] Updated `ErrorInfo` and `ErrorSubInfo` comments and indentation 99f6f77168f is described below commit 99f6f77168f442d2f6addc6f03dc51a57c607677 Author: panbingkun AuthorDate: Tue Jun 14 00:41:31 2022 -0700 [MINOR][SQL] Updated `ErrorInfo` and `ErrorSubInfo` comments and indentation ### What changes were proposed in this pull request? Correction comments & code style for ErrorSubInfo & ErrorInfo ### Why are the changes needed? Cleanup ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #36865 from panbingkun/ERROR-INFO. Authored-by: panbingkun Signed-off-by: Dongjoon Hyun --- core/src/main/scala/org/apache/spark/ErrorInfo.scala | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ErrorInfo.scala b/core/src/main/scala/org/apache/spark/ErrorInfo.scala index 6c6d51456ec..d98f5296fee 100644 --- a/core/src/main/scala/org/apache/spark/ErrorInfo.scala +++ b/core/src/main/scala/org/apache/spark/ErrorInfo.scala @@ -31,7 +31,6 @@ import org.apache.spark.util.Utils /** * Information associated with an error subclass. * - * @param subClass SubClass associated with this class. * @param message C-style message format compatible with printf. *The error message is constructed by concatenating the lines with newlines. */ @@ -45,13 +44,14 @@ private[spark] case class ErrorSubInfo(message: Seq[String]) { * Information associated with an error class. * * @param sqlState SQLSTATE associated with this class. - * @param subClass A sequence of subclasses + * @param subClass SubClass associated with this class. * @param message C-style message format compatible with printf. *The error message is constructed by concatenating the lines with newlines. */ -private[spark] case class ErrorInfo(message: Seq[String], -subClass: Option[Map[String, ErrorSubInfo]], -sqlState: Option[String]) { +private[spark] case class ErrorInfo( +message: Seq[String], +subClass: Option[Map[String, ErrorSubInfo]], +sqlState: Option[String]) { // For compatibility with multi-line error messages @JsonIgnore val messageFormat: String = message.mkString("\n") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39463][CORE][TESTS] Use `UUID` for test database location in `JavaJdbcRDDSuite`
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f1d45f12aee [SPARK-39463][CORE][TESTS] Use `UUID` for test database location in `JavaJdbcRDDSuite` f1d45f12aee is described below commit f1d45f12aee63670ef67568e5c6c07f64312ede0 Author: Dongjoon Hyun AuthorDate: Tue Jun 14 00:10:07 2022 -0700 [SPARK-39463][CORE][TESTS] Use `UUID` for test database location in `JavaJdbcRDDSuite` ### What changes were proposed in this pull request? This PR aims to use UUID instead of a fixed test database location in `JavaJdbcRDDSuite`. ### Why are the changes needed? Although there exists a clean-up logic in `JavaJdbcRDDSuite`, the location is not removed cleanly when the tests are interrupted. After this PR, we can avoid the conflicts due to the leftover. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #36864 from dongjoon-hyun/SPARK-39463. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java b/core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java index 40a7c9486ae..9226b3c0bee 100644 --- a/core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java +++ b/core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java @@ -22,6 +22,7 @@ import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Statement; +import java.util.UUID; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -32,6 +33,7 @@ import org.junit.Before; import org.junit.Test; public class JavaJdbcRDDSuite implements Serializable { + private String dbName = "db_" + UUID.randomUUID().toString().replace('-', '_'); private transient JavaSparkContext sc; @Before @@ -41,7 +43,7 @@ public class JavaJdbcRDDSuite implements Serializable { Class.forName("org.apache.derby.jdbc.EmbeddedDriver"); try (Connection connection = DriverManager.getConnection( -"jdbc:derby:target/JavaJdbcRDDSuiteDb;create=true")) { +"jdbc:derby:target/" + dbName + ";create=true")) { try (Statement create = connection.createStatement()) { create.execute( @@ -67,7 +69,7 @@ public class JavaJdbcRDDSuite implements Serializable { @After public void tearDown() throws SQLException { try { - DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb;shutdown=true"); + DriverManager.getConnection("jdbc:derby:target/" + dbName + ";shutdown=true"); } catch(SQLException e) { // Throw if not normal single database shutdown // https://db.apache.org/derby/docs/10.2/ref/rrefexcept71493.html @@ -84,7 +86,7 @@ public class JavaJdbcRDDSuite implements Serializable { public void testJavaJdbcRDD() throws Exception { JavaRDD rdd = JdbcRDD.create( sc, - () -> DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb"), + () -> DriverManager.getConnection("jdbc:derby:target/" + dbName), "SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?", 1, 100, 1, r -> r.getInt(1) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org