This is an automated email from the ASF dual-hosted git repository.
yangzy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 65f4ad16df [VL] Change the loadQuantum config if velox cache is
enabled (#8197)
65f4ad16df is described below
commit 65f4ad16df4bf51b63b344baff5bfbb4ca8c8efa
Author: Kaifei Yi <[email protected]>
AuthorDate: Thu Dec 12 17:02:40 2024 +0800
[VL] Change the loadQuantum config if velox cache is enabled (#8197)
---
.../org/apache/spark/sql/execution/VeloxParquetReadSuite.scala | 2 +-
cpp/velox/compute/VeloxBackend.cc | 3 +--
.../src/main/scala/org/apache/gluten/GlutenPlugin.scala | 10 ++++++++++
.../common/src/main/scala/org/apache/gluten/GlutenConfig.scala | 6 +++---
4 files changed, 15 insertions(+), 6 deletions(-)
diff --git
a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
index 77372d47cb..383786d647 100644
---
a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
@@ -29,7 +29,7 @@ class VeloxParquetReadSuite extends
VeloxWholeStageTransformerSuite {
override protected def sparkConf: SparkConf = {
super.sparkConf
- .set(GlutenConfig.LOAD_QUANTUM.key, "8m")
+ .set(GlutenConfig.LOAD_QUANTUM.key, "256m")
}
testWithSpecifiedSparkVersion("read example parquet files", Some("3.5"),
Some("3.5")) {
diff --git a/cpp/velox/compute/VeloxBackend.cc
b/cpp/velox/compute/VeloxBackend.cc
index 10f7768d6b..c453b9981f 100644
--- a/cpp/velox/compute/VeloxBackend.cc
+++ b/cpp/velox/compute/VeloxBackend.cc
@@ -277,9 +277,8 @@ void VeloxBackend::initConnector() {
backendConf_->get<std::string>(kMaxCoalescedDistance, "512KB"); // 512KB
connectorConfMap[velox::connector::hive::HiveConfig::kPrefetchRowGroups] =
backendConf_->get<std::string>(kPrefetchRowGroups, "1");
- // Velox currently only support up to 8MB load quantum size on SSD.
connectorConfMap[velox::connector::hive::HiveConfig::kLoadQuantum] =
- backendConf_->get<std::string>(kLoadQuantum, "8388608"); // 8M
+ backendConf_->get<std::string>(kLoadQuantum, "268435456"); // 256M
connectorConfMap[velox::connector::hive::HiveConfig::kFooterEstimatedSize] =
backendConf_->get<std::string>(kDirectorySizeGuess, "32768"); // 32K
connectorConfMap[velox::connector::hive::HiveConfig::kFilePreloadThreshold] =
diff --git a/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala
b/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala
index 8eee6629c6..4b28f23dd9 100644
--- a/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala
+++ b/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala
@@ -249,6 +249,16 @@ private[gluten] class GlutenDriverPlugin extends
DriverPlugin with Logging {
s"${COLUMNAR_VELOX_CACHE_ENABLED.key} and " +
s"${COLUMNAR_VELOX_FILE_HANDLE_CACHE_ENABLED.key} should be enabled
together.")
}
+
+ if (
+ conf.getBoolean(COLUMNAR_VELOX_CACHE_ENABLED.key, false) &&
+ conf.getSizeAsBytes(LOAD_QUANTUM.key, LOAD_QUANTUM.defaultValueString) >
8 * 1024 * 1024
+ ) {
+ throw new IllegalArgumentException(
+ s"Velox currently only support up to 8MB load quantum size " +
+ s"on SSD cache enabled by ${COLUMNAR_VELOX_CACHE_ENABLED.key}, " +
+ s"User can set ${LOAD_QUANTUM.key} <= 8MB skip this error.")
+ }
}
}
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index c2cfff1b71..15704f1450 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -2096,13 +2096,13 @@ object GlutenConfig {
.intConf
.createWithDefault(1)
- // Velox currently only support up to 8MB load quantum size on SSD.
val LOAD_QUANTUM =
buildStaticConf("spark.gluten.sql.columnar.backend.velox.loadQuantum")
.internal()
- .doc("Set the load quantum for velox file scan")
+ .doc("Set the load quantum for velox file scan, recommend to use the
default value (256MB) " +
+ "for performance consideration. If Velox cache is enabled, it can be
8MB at most.")
.bytesConf(ByteUnit.BYTE)
- .createWithDefaultString("8MB")
+ .createWithDefaultString("256MB")
val MAX_COALESCED_DISTANCE_BYTES =
buildStaticConf("spark.gluten.sql.columnar.backend.velox.maxCoalescedDistance")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]