This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new cc2a5ab [SPARK-36384][CORE][DOC] Add doc for shuffle checksum cc2a5ab is described below commit cc2a5abf7d56192704cf5c8f1bee0b07620c89e4 Author: yi.wu <yi...@databricks.com> AuthorDate: Thu Aug 5 10:16:46 2021 +0900 [SPARK-36384][CORE][DOC] Add doc for shuffle checksum ### What changes were proposed in this pull request? Add doc for the shuffle checksum configs in `configuration.md`. ### Why are the changes needed? doc ### Does this PR introduce _any_ user-facing change? No, since Spark 3.2 hasn't been released. ### How was this patch tested? Pass existed tests. Closes #33637 from Ngone51/SPARK-36384. Authored-by: yi.wu <yi...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit 3b92c721b5c08c76c3aad056d3170553d0b52f85) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../org/apache/spark/internal/config/package.scala | 13 ++++++++----- docs/configuration.md | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 60ba3aa..17c585d 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -1370,21 +1370,24 @@ package object config { private[spark] val SHUFFLE_CHECKSUM_ENABLED = ConfigBuilder("spark.shuffle.checksum.enabled") - .doc("Whether to calculate the checksum of shuffle output. If enabled, Spark will try " + - "its best to tell if shuffle data corruption is caused by network or disk or others.") + .doc("Whether to calculate the checksum of shuffle data. If enabled, Spark will calculate " + + "the checksum values for each partition data within the map output file and store the " + + "values in a checksum file on the disk. When there's shuffle data corruption detected, " + + "Spark will try to diagnose the cause (e.g., network issue, disk issue, etc.) of the " + + "corruption by using the checksum file.") .version("3.2.0") .booleanConf .createWithDefault(true) private[spark] val SHUFFLE_CHECKSUM_ALGORITHM = ConfigBuilder("spark.shuffle.checksum.algorithm") - .doc("The algorithm used to calculate the checksum. Currently, it only supports" + - " built-in algorithms of JDK.") + .doc("The algorithm is used to calculate the shuffle checksum. Currently, it only supports " + + "built-in algorithms of JDK.") .version("3.2.0") .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValue(Set("ADLER32", "CRC32").contains, "Shuffle checksum algorithm " + - "should be either Adler32 or CRC32.") + "should be either ADLER32 or CRC32.") .createWithDefault("ADLER32") private[spark] val SHUFFLE_COMPRESS = diff --git a/docs/configuration.md b/docs/configuration.md index f7db4c2..a4fdc4c 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1032,6 +1032,24 @@ Apart from these, the following properties are also available, and may be useful </td> <td>1.6.0</td> </tr> +<tr> + <td><code>spark.shuffle.checksum.enabled</code></td> + <td>true</td> + <td> + Whether to calculate the checksum of shuffle data. If enabled, Spark will calculate the checksum values for each partition + data within the map output file and store the values in a checksum file on the disk. When there's shuffle data corruption + detected, Spark will try to diagnose the cause (e.g., network issue, disk issue, etc.) of the corruption by using the checksum file. + </td> + <td>3.2.0</td> +</tr> +<tr> + <td><code>spark.shuffle.checksum.algorithm</code></td> + <td>ADLER32</td> + <td> + The algorithm is used to calculate the shuffle checksum. Currently, it only supports built-in algorithms of JDK, e.g., ADLER32, CRC32. + </td> + <td>3.2.0</td> +</tr> </table> ### Spark UI --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org