This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 1401faf [MINOR][SQL][DOCS] Fix some wrong default values in SQL tuning guide's AQE section 1401faf is described below commit 1401faf641bf5d6bf3d129d79b8b1533ea3e3058 Author: Kent Yao <y...@apache.org> AuthorDate: Wed Mar 3 15:00:09 2021 +0900 [MINOR][SQL][DOCS] Fix some wrong default values in SQL tuning guide's AQE section ### What changes were proposed in this pull request? spark.sql.adaptive.coalescePartitions.initialPartitionNum 200 -> (none) spark.sql.adaptive.skewJoin.skewedPartitionFactor is 10 -> 5 ### Why are the changes needed? the wrong doc misguide people ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? passing doc Closes #31717 from yaooqinn/minordoc0. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: HyukjinKwon <gurwls...@apache.org> (cherry picked from commit 499f6200371e9ef33f7f1efba9836e42a7e1b89b) Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- docs/sql-performance-tuning.md | 6 +++--- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- .../apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md index e99af41..bdfe6cd 100644 --- a/docs/sql-performance-tuning.md +++ b/docs/sql-performance-tuning.md @@ -255,9 +255,9 @@ This feature coalesces the post shuffle partitions based on the map output stati </tr> <tr> <td><code>spark.sql.adaptive.coalescePartitions.initialPartitionNum</code></td> - <td>200</td> + <td>(none)</td> <td> - The initial number of shuffle partitions before coalescing. By default it equals to <code>spark.sql.shuffle.partitions</code>. This configuration only has an effect when <code>spark.sql.adaptive.enabled</code> and <code>spark.sql.adaptive.coalescePartitions.enabled</code> are both enabled. + The initial number of shuffle partitions before coalescing. If not set, it equals to <code>spark.sql.shuffle.partitions</code>. This configuration only has an effect when <code>spark.sql.adaptive.enabled</code> and <code>spark.sql.adaptive.coalescePartitions.enabled</code> are both enabled. </td> <td>3.0.0</td> </tr> @@ -288,7 +288,7 @@ Data skew can severely downgrade the performance of join queries. This feature d </tr> <tr> <td><code>spark.sql.adaptive.skewJoin.skewedPartitionFactor</code></td> - <td>10</td> + <td>5</td> <td> A partition is considered as skewed if its size is larger than this factor multiplying the median partition size and also larger than <code>spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes</code>. </td> diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index fcdf910..75bfbdb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -474,7 +474,7 @@ object SQLConf { val COALESCE_PARTITIONS_INITIAL_PARTITION_NUM = buildConf("spark.sql.adaptive.coalescePartitions.initialPartitionNum") - .doc("The initial number of shuffle partitions before coalescing. By default it equals to " + + .doc("The initial number of shuffle partitions before coalescing. If not set, it equals to " + s"${SHUFFLE_PARTITIONS.key}. This configuration only has an effect when " + s"'${ADAPTIVE_EXECUTION_ENABLED.key}' and '${COALESCE_PARTITIONS_ENABLED.key}' " + "are both true.") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala index 085934d..82dc9a0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala @@ -63,8 +63,8 @@ object OptimizeSkewedJoin extends CustomShuffleReaderRule { /** * A partition is considered as a skewed partition if its size is larger than the median - * partition size * ADAPTIVE_EXECUTION_SKEWED_PARTITION_FACTOR and also larger than - * ADVISORY_PARTITION_SIZE_IN_BYTES. + * partition size * SKEW_JOIN_SKEWED_PARTITION_FACTOR and also larger than + * SKEW_JOIN_SKEWED_PARTITION_THRESHOLD. */ private def isSkewed(size: Long, medianSize: Long): Boolean = { size > medianSize * conf.getConf(SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR) && --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org