sunchao commented on code in PR #460: URL: https://github.com/apache/datafusion-comet/pull/460#discussion_r1611986030
########## common/src/main/scala/org/apache/comet/CometConf.scala: ########## @@ -131,14 +132,18 @@ object CometConf { .booleanConf .createWithDefault(false) - val COMET_COLUMNAR_SHUFFLE_ENABLED: ConfigEntry[Boolean] = - conf("spark.comet.columnar.shuffle.enabled") - .doc( - "Whether to enable Arrow-based columnar shuffle for Comet and Spark regular operators. " + - "If this is enabled, Comet prefers columnar shuffle than native shuffle. " + - "By default, this config is true.") - .booleanConf - .createWithDefault(true) + val COMET_SHUFFLE_MODE: ConfigEntry[String] = conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.mode") + .doc( + "The mode of Comet shuffle. This config is only effective only if Comet shuffle " + + "is enabled. Available modes are 'native', 'jvm', and 'auto'. " + + "'native' is for native shuffle which has best performance in general." + + "'jvm' is for jvm-based columnar shuffle which has higher coverage than native shuffle." + + "'auto' is for Comet to choose the best shuffle mode based on the query plan." + + "By default, this config is 'jvm'.") + .stringConf + .transform(_.toLowerCase(Locale.ROOT)) + .checkValues(Set("native", "jvm", "auto")) Review Comment: 👍 ########## spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala: ########## @@ -1131,9 +1129,8 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper { test("var_pop and var_samp") { withSQLConf(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true") { - Seq(true, false).foreach { cometColumnShuffleEnabled => - withSQLConf( - CometConf.COMET_COLUMNAR_SHUFFLE_ENABLED.key -> cometColumnShuffleEnabled.toString) { + Seq("native", "jvm").foreach { cometColumnShuffleEnabled => Review Comment: nit: maybe update the variable name too ########## spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala: ########## @@ -134,14 +134,14 @@ class CometExecSuite extends CometTestBase { .toDF("c1", "c2") .createOrReplaceTempView("v") - Seq(true, false).foreach { columnarShuffle => + Seq("native", "jvm").foreach { columnarShuffle => Review Comment: nit: `shuffleMode`? ########## docs/source/user-guide/configs.md: ########## @@ -39,6 +38,7 @@ Comet provides the following configuration settings. | spark.comet.exec.memoryFraction | The fraction of memory from Comet memory overhead that the native memory manager can use for execution. The purpose of this config is to set aside memory for untracked data structures, as well as imprecise size estimation during memory acquisition. Default value is 0.7. | 0.7 | | spark.comet.exec.shuffle.codec | The codec of Comet native shuffle used to compress shuffle data. Only zstd is supported. | zstd | | spark.comet.exec.shuffle.enabled | Whether to enable Comet native shuffle. By default, this config is false. Note that this requires setting 'spark.shuffle.manager' to 'org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager'. 'spark.shuffle.manager' must be set before starting the Spark application and cannot be changed during the application. | false | +| spark.comet.exec.shuffle.mode | The mode of Comet shuffle. This config is only effective only if Comet shuffle is enabled. Available modes are 'native', 'jvm', and 'auto'. 'native' is for native shuffle which has best performance in general.'jvm' is for jvm-based columnar shuffle which has higher coverage than native shuffle.'auto' is for Comet to choose the best shuffle mode based on the query plan.By default, this config is 'jvm'. | jvm | Review Comment: also spaces before `jvm` and `auto` ########## docs/source/user-guide/configs.md: ########## @@ -39,6 +38,7 @@ Comet provides the following configuration settings. | spark.comet.exec.memoryFraction | The fraction of memory from Comet memory overhead that the native memory manager can use for execution. The purpose of this config is to set aside memory for untracked data structures, as well as imprecise size estimation during memory acquisition. Default value is 0.7. | 0.7 | | spark.comet.exec.shuffle.codec | The codec of Comet native shuffle used to compress shuffle data. Only zstd is supported. | zstd | | spark.comet.exec.shuffle.enabled | Whether to enable Comet native shuffle. By default, this config is false. Note that this requires setting 'spark.shuffle.manager' to 'org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager'. 'spark.shuffle.manager' must be set before starting the Spark application and cannot be changed during the application. | false | +| spark.comet.exec.shuffle.mode | The mode of Comet shuffle. This config is only effective only if Comet shuffle is enabled. Available modes are 'native', 'jvm', and 'auto'. 'native' is for native shuffle which has best performance in general.'jvm' is for jvm-based columnar shuffle which has higher coverage than native shuffle.'auto' is for Comet to choose the best shuffle mode based on the query plan.By default, this config is 'jvm'. | jvm | Review Comment: `is only effective only` -> `is only effective` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org