(incubator-gluten) branch main updated: [VL] Move velox related configs to VeloxConfig.h (#5743)

philo Wed, 15 May 2024 20:06:29 -0700

This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 9d2a13bff [VL] Move velox related configs to VeloxConfig.h (#5743)
9d2a13bff is described below

commit 9d2a13bffb4292f17714bfbba96638aeadb91062
Author: Yang Zhang <yangchuan...@alibaba-inc.com>
AuthorDate: Thu May 16 11:06:18 2024 +0800

    [VL] Move velox related configs to VeloxConfig.h (#5743)
---
 cpp/core/config/GlutenConfig.h                |   7 -
 cpp/velox/compute/VeloxBackend.cc             | 182 ++++++++------------------
 cpp/velox/compute/VeloxBackend.h              |  12 +-
 cpp/velox/compute/VeloxRuntime.cc             |   4 +-
 cpp/velox/compute/WholeStageResultIterator.cc |  47 +------
 cpp/velox/config/VeloxConfig.h                | 127 ++++++++++++++++++
 6 files changed, 192 insertions(+), 187 deletions(-)

diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h
index 3c47fb547..16a18f6be 100644
--- a/cpp/core/config/GlutenConfig.h
+++ b/cpp/core/config/GlutenConfig.h
@@ -61,13 +61,6 @@ const std::string kShuffleCompressionCodecBackend = 
"spark.gluten.sql.columnar.s
 const std::string kQatBackendName = "qat";
 const std::string kIaaBackendName = "iaa";
 
-// Velox conf
-const std::string kGlogVerboseLevel = 
"spark.gluten.sql.columnar.backend.velox.glogVerboseLevel";
-const uint32_t kGlogVerboseLevelDefault = 0;
-const uint32_t kGlogVerboseLevelMaximum = 99;
-const std::string kGlogSeverityLevel = 
"spark.gluten.sql.columnar.backend.velox.glogSeverityLevel";
-const uint32_t kGlogSeverityLevelDefault = 1;
-
 std::unordered_map<std::string, std::string>
 parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t 
planDataLength);
 
diff --git a/cpp/velox/compute/VeloxBackend.cc 
b/cpp/velox/compute/VeloxBackend.cc
index 044c8aa0e..b2fb1c964 100644
--- a/cpp/velox/compute/VeloxBackend.cc
+++ b/cpp/velox/compute/VeloxBackend.cc
@@ -24,8 +24,6 @@
 #include "operators/plannodes/RowVectorStream.h"
 #include "utils/ConfigExtractor.h"
 
-#include "shuffle/VeloxShuffleReader.h"
-
 #ifdef GLUTEN_ENABLE_QAT
 #include "utils/qat/QatCodec.h"
 #endif
@@ -33,7 +31,7 @@
 #include "utils/qpl/qpl_codec.h"
 #endif
 #include "compute/VeloxRuntime.h"
-#include "config/GlutenConfig.h"
+#include "config/VeloxConfig.h"
 #include "jni/JniFileSystem.h"
 #include "operators/functions/SparkTokenizer.h"
 #include "udf/UdfLoader.h"
@@ -54,71 +52,6 @@ DEFINE_int32(gluten_velox_aysnc_timeout_on_task_stopping, 
30000, "Aysnc timout w
 
 using namespace facebook;
 
-namespace {
-
-const std::string kEnableUserExceptionStacktrace =
-    "spark.gluten.sql.columnar.backend.velox.enableUserExceptionStacktrace";
-const bool kEnableUserExceptionStacktraceDefault = true;
-
-const std::string kEnableSystemExceptionStacktrace =
-    "spark.gluten.sql.columnar.backend.velox.enableSystemExceptionStacktrace";
-const bool kEnableSystemExceptionStacktraceDefault = true;
-
-const std::string kMemoryUseHugePages = 
"spark.gluten.sql.columnar.backend.velox.memoryUseHugePages";
-const bool kMemoryUseHugePagesDefault = false;
-
-const std::string kHiveConnectorId = "test-hive";
-const std::string kVeloxCacheEnabled = 
"spark.gluten.sql.columnar.backend.velox.cacheEnabled";
-
-// memory cache
-const std::string kVeloxMemCacheSize = 
"spark.gluten.sql.columnar.backend.velox.memCacheSize";
-const uint64_t kVeloxMemCacheSizeDefault = 1073741824; // 1G
-
-// ssd cache
-const std::string kVeloxSsdCacheSize = 
"spark.gluten.sql.columnar.backend.velox.ssdCacheSize";
-const uint64_t kVeloxSsdCacheSizeDefault = 1073741824; // 1G
-const std::string kVeloxSsdCachePath = 
"spark.gluten.sql.columnar.backend.velox.ssdCachePath";
-const std::string kVeloxSsdCachePathDefault = "/tmp/";
-const std::string kVeloxSsdCacheShards = 
"spark.gluten.sql.columnar.backend.velox.ssdCacheShards";
-const uint32_t kVeloxSsdCacheShardsDefault = 1;
-const std::string kVeloxSsdCacheIOThreads = 
"spark.gluten.sql.columnar.backend.velox.ssdCacheIOThreads";
-const uint32_t kVeloxSsdCacheIOThreadsDefault = 1;
-const std::string kVeloxSsdODirectEnabled = 
"spark.gluten.sql.columnar.backend.velox.ssdODirect";
-
-// async
-const std::string kVeloxIOThreads = 
"spark.gluten.sql.columnar.backend.velox.IOThreads";
-const uint32_t kVeloxIOThreadsDefault = 0;
-const std::string kVeloxAsyncTimeoutOnTaskStopping =
-    "spark.gluten.sql.columnar.backend.velox.asyncTimeoutOnTaskStopping";
-const int32_t kVeloxAsyncTimeoutOnTaskStoppingDefault = 30000; // 30s
-
-// udf
-const std::string kVeloxUdfLibraryPaths = 
"spark.gluten.sql.columnar.backend.velox.udfLibraryPaths";
-
-// spill
-const std::string kMaxSpillFileSize = 
"spark.gluten.sql.columnar.backend.velox.maxSpillFileSize";
-const uint64_t kMaxSpillFileSizeDefault = 1L * 1024 * 1024 * 1024;
-
-// backtrace allocation
-const std::string kBacktraceAllocation = "spark.gluten.backtrace.allocation";
-
-// VeloxShuffleReader print flag.
-const std::string kVeloxShuffleReaderPrintFlag = 
"spark.gluten.velox.shuffleReaderPrintFlag";
-
-const std::string kVeloxFileHandleCacheEnabled = 
"spark.gluten.sql.columnar.backend.velox.fileHandleCacheEnabled";
-const bool kVeloxFileHandleCacheEnabledDefault = false;
-
-/* configs for file read in velox*/
-const std::string kDirectorySizeGuess = 
"spark.gluten.sql.columnar.backend.velox.directorySizeGuess";
-const std::string kFilePreloadThreshold = 
"spark.gluten.sql.columnar.backend.velox.filePreloadThreshold";
-const std::string kPrefetchRowGroups = 
"spark.gluten.sql.columnar.backend.velox.prefetchRowGroups";
-const std::string kLoadQuantum = 
"spark.gluten.sql.columnar.backend.velox.loadQuantum";
-const std::string kMaxCoalescedDistanceBytes = 
"spark.gluten.sql.columnar.backend.velox.maxCoalescedDistanceBytes";
-const std::string kMaxCoalescedBytes = 
"spark.gluten.sql.columnar.backend.velox.maxCoalescedBytes";
-const std::string kCachePrefetchMinPct = 
"spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct";
-
-} // namespace
-
 namespace gluten {
 
 namespace {
@@ -128,25 +61,22 @@ gluten::Runtime* veloxRuntimeFactory(const 
std::unordered_map<std::string, std::
 } // namespace
 
 void VeloxBackend::init(const std::unordered_map<std::string, std::string>& 
conf) {
-  backendConf_ = conf;
+  backendConf_ = 
std::make_shared<facebook::velox::core::MemConfigMutable>(conf);
 
   // Register Velox runtime factory
   gluten::Runtime::registerFactory(gluten::kVeloxRuntimeKind, 
veloxRuntimeFactory);
 
-  std::shared_ptr<const facebook::velox::Config> veloxcfg =
-      std::make_shared<facebook::velox::core::MemConfigMutable>(conf);
-
-  if (veloxcfg->get<bool>(kDebugModeEnabled, false)) {
-    LOG(INFO) << "VeloxBackend config:" << printConfig(veloxcfg->valuesCopy());
+  if (backendConf_->get<bool>(kDebugModeEnabled, false)) {
+    LOG(INFO) << "VeloxBackend config:" << 
printConfig(backendConf_->valuesCopy());
   }
 
   // Init glog and log level.
-  if (!veloxcfg->get<bool>(kDebugModeEnabled, false)) {
-    FLAGS_v = veloxcfg->get<uint32_t>(kGlogVerboseLevel, 
kGlogVerboseLevelDefault);
-    FLAGS_minloglevel = veloxcfg->get<uint32_t>(kGlogSeverityLevel, 
kGlogSeverityLevelDefault);
+  if (!backendConf_->get<bool>(kDebugModeEnabled, false)) {
+    FLAGS_v = backendConf_->get<uint32_t>(kGlogVerboseLevel, 
kGlogVerboseLevelDefault);
+    FLAGS_minloglevel = backendConf_->get<uint32_t>(kGlogSeverityLevel, 
kGlogSeverityLevelDefault);
   } else {
-    if (veloxcfg->isValueExists(kGlogVerboseLevel)) {
-      FLAGS_v = veloxcfg->get<uint32_t>(kGlogVerboseLevel, 
kGlogVerboseLevelDefault);
+    if (backendConf_->isValueExists(kGlogVerboseLevel)) {
+      FLAGS_v = backendConf_->get<uint32_t>(kGlogVerboseLevel, 
kGlogVerboseLevelDefault);
     } else {
       FLAGS_v = kGlogVerboseLevelMaximum;
     }
@@ -159,27 +89,27 @@ void VeloxBackend::init(const 
std::unordered_map<std::string, std::string>& conf
 
   // Set velox_exception_user_stacktrace_enabled.
   FLAGS_velox_exception_user_stacktrace_enabled =
-      veloxcfg->get<bool>(kEnableUserExceptionStacktrace, 
kEnableUserExceptionStacktraceDefault);
+      backendConf_->get<bool>(kEnableUserExceptionStacktrace, 
kEnableUserExceptionStacktraceDefault);
 
   // Set velox_exception_system_stacktrace_enabled.
   FLAGS_velox_exception_system_stacktrace_enabled =
-      veloxcfg->get<bool>(kEnableSystemExceptionStacktrace, 
kEnableSystemExceptionStacktraceDefault);
+      backendConf_->get<bool>(kEnableSystemExceptionStacktrace, 
kEnableSystemExceptionStacktraceDefault);
 
   // Set velox_memory_use_hugepages.
-  FLAGS_velox_memory_use_hugepages = veloxcfg->get<bool>(kMemoryUseHugePages, 
kMemoryUseHugePagesDefault);
+  FLAGS_velox_memory_use_hugepages = 
backendConf_->get<bool>(kMemoryUseHugePages, kMemoryUseHugePagesDefault);
 
   // Async timeout.
   FLAGS_gluten_velox_aysnc_timeout_on_task_stopping =
-      veloxcfg->get<int32_t>(kVeloxAsyncTimeoutOnTaskStopping, 
kVeloxAsyncTimeoutOnTaskStoppingDefault);
+      backendConf_->get<int32_t>(kVeloxAsyncTimeoutOnTaskStopping, 
kVeloxAsyncTimeoutOnTaskStoppingDefault);
 
   // Set backtrace_allocation
-  gluten::backtrace_allocation = veloxcfg->get<bool>(kBacktraceAllocation, 
false);
+  gluten::backtrace_allocation = backendConf_->get<bool>(kBacktraceAllocation, 
false);
 
   // Setup and register.
   velox::filesystems::registerLocalFileSystem();
-  initJolFilesystem(veloxcfg);
-  initCache(veloxcfg);
-  initConnector(veloxcfg);
+  initJolFilesystem();
+  initCache();
+  initConnector();
 
   // Register Velox functions
   registerAllFunctions();
@@ -189,7 +119,7 @@ void VeloxBackend::init(const 
std::unordered_map<std::string, std::string>& conf
   }
   
velox::exec::Operator::registerOperator(std::make_unique<RowVectorStreamOperatorTranslator>());
 
-  initUdf(veloxcfg);
+  initUdf();
   registerSparkTokenizer();
 
   // initialize the global memory manager for current process
@@ -201,8 +131,8 @@ facebook::velox::cache::AsyncDataCache* 
VeloxBackend::getAsyncDataCache() const
 }
 
 // JNI-or-local filesystem, for spilling-to-heap if we have extra JVM heap 
spaces
-void VeloxBackend::initJolFilesystem(const std::shared_ptr<const 
facebook::velox::Config>& conf) {
-  int64_t maxSpillFileSize = conf->get<int64_t>(kMaxSpillFileSize, 
kMaxSpillFileSizeDefault);
+void VeloxBackend::initJolFilesystem() {
+  int64_t maxSpillFileSize = backendConf_->get<int64_t>(kMaxSpillFileSize, 
kMaxSpillFileSizeDefault);
 
   // FIXME It's known that if spill compression is disabled, the actual spill 
file size may
   //   in crease beyond this limit a little (maximum 64 rows which is by 
default
@@ -210,18 +140,17 @@ void VeloxBackend::initJolFilesystem(const 
std::shared_ptr<const facebook::velox
   gluten::registerJolFileSystem(maxSpillFileSize);
 }
 
-void VeloxBackend::initCache(const std::shared_ptr<const 
facebook::velox::Config>& conf) {
-  bool veloxCacheEnabled = conf->get<bool>(kVeloxCacheEnabled, false);
-  if (veloxCacheEnabled) {
+void VeloxBackend::initCache() {
+  if (backendConf_->get<bool>(kVeloxCacheEnabled, false)) {
     FLAGS_ssd_odirect = true;
 
-    FLAGS_ssd_odirect = conf->get<bool>(kVeloxSsdODirectEnabled, false);
+    FLAGS_ssd_odirect = backendConf_->get<bool>(kVeloxSsdODirectEnabled, 
false);
 
-    uint64_t memCacheSize = conf->get<uint64_t>(kVeloxMemCacheSize, 
kVeloxMemCacheSizeDefault);
-    uint64_t ssdCacheSize = conf->get<uint64_t>(kVeloxSsdCacheSize, 
kVeloxSsdCacheSizeDefault);
-    int32_t ssdCacheShards = conf->get<int32_t>(kVeloxSsdCacheShards, 
kVeloxSsdCacheShardsDefault);
-    int32_t ssdCacheIOThreads = conf->get<int32_t>(kVeloxSsdCacheIOThreads, 
kVeloxSsdCacheIOThreadsDefault);
-    std::string ssdCachePathPrefix = 
conf->get<std::string>(kVeloxSsdCachePath, kVeloxSsdCachePathDefault);
+    uint64_t memCacheSize = backendConf_->get<uint64_t>(kVeloxMemCacheSize, 
kVeloxMemCacheSizeDefault);
+    uint64_t ssdCacheSize = backendConf_->get<uint64_t>(kVeloxSsdCacheSize, 
kVeloxSsdCacheSizeDefault);
+    int32_t ssdCacheShards = backendConf_->get<int32_t>(kVeloxSsdCacheShards, 
kVeloxSsdCacheShardsDefault);
+    int32_t ssdCacheIOThreads = 
backendConf_->get<int32_t>(kVeloxSsdCacheIOThreads, 
kVeloxSsdCacheIOThreadsDefault);
+    std::string ssdCachePathPrefix = 
backendConf_->get<std::string>(kVeloxSsdCachePath, kVeloxSsdCachePathDefault);
 
     cachePathPrefix_ = ssdCachePathPrefix;
     cacheFilePrefix_ = getCacheFilePrefix();
@@ -257,63 +186,64 @@ void VeloxBackend::initCache(const std::shared_ptr<const 
facebook::velox::Config
   }
 }
 
-void VeloxBackend::initConnector(const std::shared_ptr<const 
facebook::velox::Config>& conf) {
+void VeloxBackend::initConnector() {
   // The configs below are used at process level.
-  auto mutableConf = 
std::make_shared<facebook::velox::core::MemConfigMutable>(conf->valuesCopy());
+  auto connectorConf = 
std::make_shared<facebook::velox::core::MemConfigMutable>(backendConf_->valuesCopy());
 
-  auto hiveConf = getHiveConfig(conf);
+  auto hiveConf = getHiveConfig(backendConf_);
   for (auto& [k, v] : hiveConf->valuesCopy()) {
-    mutableConf->setValue(k, v);
+    connectorConf->setValue(k, v);
   }
 
 #ifdef ENABLE_ABFS
-  const auto& confValue = conf->valuesCopy();
+  const auto& confValue = backendConf_->valuesCopy();
   for (auto& [k, v] : confValue) {
     if (k.find("fs.azure.account.key") == 0) {
-      mutableConf->setValue(k, v);
+      connectorConf->setValue(k, v);
     } else if (k.find("spark.hadoop.fs.azure.account.key") == 0) {
       constexpr int32_t accountKeyPrefixLength = 13;
-      mutableConf->setValue(k.substr(accountKeyPrefixLength), v);
+      connectorConf->setValue(k.substr(accountKeyPrefixLength), v);
     }
   }
 #endif
 
-  mutableConf->setValue(
+  connectorConf->setValue(
       velox::connector::hive::HiveConfig::kEnableFileHandleCache,
-      conf->get<bool>(kVeloxFileHandleCacheEnabled, 
kVeloxFileHandleCacheEnabledDefault) ? "true" : "false");
+      backendConf_->get<bool>(kVeloxFileHandleCacheEnabled, 
kVeloxFileHandleCacheEnabledDefault) ? "true" : "false");
 
-  mutableConf->setValue(
+  connectorConf->setValue(
       velox::connector::hive::HiveConfig::kMaxCoalescedBytes,
-      conf->get<std::string>(kMaxCoalescedBytes, "67108864")); // 64M
-  mutableConf->setValue(
+      backendConf_->get<std::string>(kMaxCoalescedBytes, "67108864")); // 64M
+  connectorConf->setValue(
       velox::connector::hive::HiveConfig::kMaxCoalescedDistanceBytes,
-      conf->get<std::string>(kMaxCoalescedDistanceBytes, "1048576")); // 1M
-  mutableConf->setValue(
-      velox::connector::hive::HiveConfig::kPrefetchRowGroups, 
conf->get<std::string>(kPrefetchRowGroups, "1"));
-  mutableConf->setValue(
-      velox::connector::hive::HiveConfig::kLoadQuantum, 
conf->get<std::string>(kLoadQuantum, "268435456")); // 256M
-  mutableConf->setValue(
+      backendConf_->get<std::string>(kMaxCoalescedDistanceBytes, "1048576")); 
// 1M
+  connectorConf->setValue(
+      velox::connector::hive::HiveConfig::kPrefetchRowGroups, 
backendConf_->get<std::string>(kPrefetchRowGroups, "1"));
+  connectorConf->setValue(
+      velox::connector::hive::HiveConfig::kLoadQuantum,
+      backendConf_->get<std::string>(kLoadQuantum, "268435456")); // 256M
+  connectorConf->setValue(
       velox::connector::hive::HiveConfig::kFooterEstimatedSize,
-      conf->get<std::string>(kDirectorySizeGuess, "32768")); // 32K
-  mutableConf->setValue(
+      backendConf_->get<std::string>(kDirectorySizeGuess, "32768")); // 32K
+  connectorConf->setValue(
       velox::connector::hive::HiveConfig::kFilePreloadThreshold,
-      conf->get<std::string>(kFilePreloadThreshold, "1048576")); // 1M
+      backendConf_->get<std::string>(kFilePreloadThreshold, "1048576")); // 1M
 
   // set cache_prefetch_min_pct default as 0 to force all loads are prefetched 
in DirectBufferInput.
-  FLAGS_cache_prefetch_min_pct = conf->get<int>(kCachePrefetchMinPct, 0);
+  FLAGS_cache_prefetch_min_pct = backendConf_->get<int>(kCachePrefetchMinPct, 
0);
 
-  auto ioThreads = conf->get<int32_t>(kVeloxIOThreads, kVeloxIOThreadsDefault);
+  auto ioThreads = backendConf_->get<int32_t>(kVeloxIOThreads, 
kVeloxIOThreadsDefault);
   if (ioThreads > 0) {
     ioExecutor_ = std::make_unique<folly::IOThreadPoolExecutor>(ioThreads);
   }
   
velox::connector::registerConnector(std::make_shared<velox::connector::hive::HiveConnector>(
       kHiveConnectorId,
-      
std::make_shared<facebook::velox::core::MemConfig>(mutableConf->valuesCopy()),
+      
std::make_shared<facebook::velox::core::MemConfig>(connectorConf->valuesCopy()),
       ioExecutor_.get()));
 }
 
-void VeloxBackend::initUdf(const std::shared_ptr<const 
facebook::velox::Config>& conf) {
-  auto got = conf->get<std::string>(kVeloxUdfLibraryPaths, "");
+void VeloxBackend::initUdf() {
+  auto got = backendConf_->get<std::string>(kVeloxUdfLibraryPaths, "");
   if (!got.empty()) {
     auto udfLoader = gluten::UdfLoader::getInstance();
     udfLoader->loadUdfLibraries(got);
@@ -335,7 +265,7 @@ VeloxBackend* VeloxBackend::get() {
   return instance_.get();
 }
 
-const std::unordered_map<std::string, std::string>& 
VeloxBackend::getBackendConf() const {
+const std::shared_ptr<const facebook::velox::Config> 
VeloxBackend::getBackendConf() const {
   return backendConf_;
 }
 
diff --git a/cpp/velox/compute/VeloxBackend.h b/cpp/velox/compute/VeloxBackend.h
index a601d715c..891bdd2cc 100644
--- a/cpp/velox/compute/VeloxBackend.h
+++ b/cpp/velox/compute/VeloxBackend.h
@@ -53,7 +53,7 @@ class VeloxBackend {
 
   facebook::velox::cache::AsyncDataCache* getAsyncDataCache() const;
 
-  const std::unordered_map<std::string, std::string>& getBackendConf() const;
+  const std::shared_ptr<const facebook::velox::Config> getBackendConf() const;
 
   void tearDown() {
     // Destruct IOThreadPoolExecutor will join all threads.
@@ -68,11 +68,11 @@ class VeloxBackend {
   }
 
   void init(const std::unordered_map<std::string, std::string>& conf);
-  void initCache(const std::shared_ptr<const facebook::velox::Config>& conf);
-  void initConnector(const std::shared_ptr<const facebook::velox::Config>& 
conf);
-  void initUdf(const std::shared_ptr<const facebook::velox::Config>& conf);
+  void initCache();
+  void initConnector();
+  void initUdf();
 
-  void initJolFilesystem(const std::shared_ptr<const facebook::velox::Config>& 
conf);
+  void initJolFilesystem();
 
   std::string getCacheFilePrefix() {
     return "cache." + 
boost::lexical_cast<std::string>(boost::uuids::random_generator()()) + ".";
@@ -90,7 +90,7 @@ class VeloxBackend {
   std::string cachePathPrefix_;
   std::string cacheFilePrefix_;
 
-  std::unordered_map<std::string, std::string> backendConf_{};
+  std::shared_ptr<const facebook::velox::Config> backendConf_;
 };
 
 } // namespace gluten
diff --git a/cpp/velox/compute/VeloxRuntime.cc 
b/cpp/velox/compute/VeloxRuntime.cc
index 8314d0bd2..a3e8c159c 100644
--- a/cpp/velox/compute/VeloxRuntime.cc
+++ b/cpp/velox/compute/VeloxRuntime.cc
@@ -26,7 +26,7 @@
 #include "compute/ResultIterator.h"
 #include "compute/Runtime.h"
 #include "compute/VeloxPlanConverter.h"
-#include "config/GlutenConfig.h"
+#include "config/VeloxConfig.h"
 #include "operators/serializer/VeloxRowToColumnarConverter.h"
 #include "shuffle/VeloxShuffleReader.h"
 #include "shuffle/VeloxShuffleWriter.h"
@@ -256,7 +256,7 @@ std::unique_ptr<ColumnarBatchSerializer> 
VeloxRuntime::createColumnarBatchSerial
 }
 
 void VeloxRuntime::dumpConf(const std::string& path) {
-  auto backendConf = VeloxBackend::get()->getBackendConf();
+  auto backendConf = VeloxBackend::get()->getBackendConf()->valuesCopy();
   auto allConf = backendConf;
 
   for (const auto& pair : confMap_) {
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc 
b/cpp/velox/compute/WholeStageResultIterator.cc
index 83749061c..006b37588 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -17,13 +17,11 @@
 #include "WholeStageResultIterator.h"
 #include "VeloxBackend.h"
 #include "VeloxRuntime.h"
-#include "config/GlutenConfig.h"
+#include "config/VeloxConfig.h"
 #include "velox/connectors/hive/HiveConfig.h"
 #include "velox/connectors/hive/HiveConnectorSplit.h"
 #include "velox/exec/PlanNodeStats.h"
 
-#include "utils/ConfigExtractor.h"
-
 #ifdef ENABLE_HDFS
 #include "utils/HdfsUtils.h"
 #endif
@@ -33,49 +31,6 @@ using namespace facebook;
 namespace gluten {
 
 namespace {
-// Velox configs
-const std::string kHiveConnectorId = "test-hive";
-
-// memory
-const std::string kSpillStrategy = 
"spark.gluten.sql.columnar.backend.velox.spillStrategy";
-const std::string kSpillStrategyDefaultValue = "auto";
-const std::string kSpillThreadNum = 
"spark.gluten.sql.columnar.backend.velox.spillThreadNum";
-const uint32_t kSpillThreadNumDefaultValue = 0;
-const std::string kAggregationSpillEnabled = 
"spark.gluten.sql.columnar.backend.velox.aggregationSpillEnabled";
-const std::string kJoinSpillEnabled = 
"spark.gluten.sql.columnar.backend.velox.joinSpillEnabled";
-const std::string kOrderBySpillEnabled = 
"spark.gluten.sql.columnar.backend.velox.orderBySpillEnabled";
-
-// spill config
-// refer to
-// 
https://github.com/facebookincubator/velox/blob/95f3e80e77d046c12fbc79dc529366be402e9c2b/velox/docs/configs.rst#spilling
-const std::string kMaxSpillLevel = 
"spark.gluten.sql.columnar.backend.velox.maxSpillLevel";
-const std::string kMaxSpillFileSize = 
"spark.gluten.sql.columnar.backend.velox.maxSpillFileSize";
-const std::string kSpillStartPartitionBit = 
"spark.gluten.sql.columnar.backend.velox.spillStartPartitionBit";
-const std::string kSpillPartitionBits = 
"spark.gluten.sql.columnar.backend.velox.spillPartitionBits";
-const std::string kMaxSpillRunRows = 
"spark.gluten.sql.columnar.backend.velox.MaxSpillRunRows";
-const std::string kMaxSpillBytes = 
"spark.gluten.sql.columnar.backend.velox.MaxSpillBytes";
-const std::string kSpillWriteBufferSize = 
"spark.gluten.sql.columnar.backend.velox.spillWriteBufferSize";
-
-const std::string kSpillableReservationGrowthPct =
-    "spark.gluten.sql.columnar.backend.velox.spillableReservationGrowthPct";
-const std::string kSpillCompressionKind = "spark.io.compression.codec";
-const std::string kMaxPartialAggregationMemoryRatio =
-    "spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio";
-const std::string kMaxExtendedPartialAggregationMemoryRatio =
-    
"spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio";
-const std::string kAbandonPartialAggregationMinPct =
-    "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct";
-const std::string kAbandonPartialAggregationMinRows =
-    "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows";
-
-// execution
-const std::string kBloomFilterExpectedNumItems = 
"spark.gluten.sql.columnar.backend.velox.bloomFilter.expectedNumItems";
-const std::string kBloomFilterNumBits = 
"spark.gluten.sql.columnar.backend.velox.bloomFilter.numBits";
-const std::string kBloomFilterMaxNumBits = 
"spark.gluten.sql.columnar.backend.velox.bloomFilter.maxNumBits";
-const std::string kVeloxSplitPreloadPerDriver = 
"spark.gluten.sql.columnar.backend.velox.SplitPreloadPerDriver";
-
-// write fies
-const std::string kMaxPartitions = 
"spark.gluten.sql.columnar.backend.velox.maxPartitionsPerWritersSession";
 
 // metrics
 const std::string kDynamicFiltersProduced = "dynamicFiltersProduced";
diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h
new file mode 100644
index 000000000..a3112f83e
--- /dev/null
+++ b/cpp/velox/config/VeloxConfig.h
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "config/GlutenConfig.h"
+
+namespace gluten {
+// memory
+const std::string kSpillStrategy = 
"spark.gluten.sql.columnar.backend.velox.spillStrategy";
+const std::string kSpillStrategyDefaultValue = "auto";
+const std::string kSpillThreadNum = 
"spark.gluten.sql.columnar.backend.velox.spillThreadNum";
+const uint32_t kSpillThreadNumDefaultValue = 0;
+const std::string kAggregationSpillEnabled = 
"spark.gluten.sql.columnar.backend.velox.aggregationSpillEnabled";
+const std::string kJoinSpillEnabled = 
"spark.gluten.sql.columnar.backend.velox.joinSpillEnabled";
+const std::string kOrderBySpillEnabled = 
"spark.gluten.sql.columnar.backend.velox.orderBySpillEnabled";
+
+// spill config
+// refer to
+// 
https://github.com/facebookincubator/velox/blob/95f3e80e77d046c12fbc79dc529366be402e9c2b/velox/docs/configs.rst#spilling
+const std::string kMaxSpillLevel = 
"spark.gluten.sql.columnar.backend.velox.maxSpillLevel";
+const std::string kMaxSpillFileSize = 
"spark.gluten.sql.columnar.backend.velox.maxSpillFileSize";
+const std::string kSpillStartPartitionBit = 
"spark.gluten.sql.columnar.backend.velox.spillStartPartitionBit";
+const std::string kSpillPartitionBits = 
"spark.gluten.sql.columnar.backend.velox.spillPartitionBits";
+const std::string kMaxSpillRunRows = 
"spark.gluten.sql.columnar.backend.velox.MaxSpillRunRows";
+const std::string kMaxSpillBytes = 
"spark.gluten.sql.columnar.backend.velox.MaxSpillBytes";
+const std::string kSpillWriteBufferSize = 
"spark.gluten.sql.columnar.backend.velox.spillWriteBufferSize";
+const uint64_t kMaxSpillFileSizeDefault = 1L * 1024 * 1024 * 1024;
+
+const std::string kSpillableReservationGrowthPct =
+    "spark.gluten.sql.columnar.backend.velox.spillableReservationGrowthPct";
+const std::string kSpillCompressionKind = "spark.io.compression.codec";
+const std::string kMaxPartialAggregationMemoryRatio =
+    "spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio";
+const std::string kMaxExtendedPartialAggregationMemoryRatio =
+    
"spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio";
+const std::string kAbandonPartialAggregationMinPct =
+    "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct";
+const std::string kAbandonPartialAggregationMinRows =
+    "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows";
+
+// execution
+const std::string kBloomFilterExpectedNumItems = 
"spark.gluten.sql.columnar.backend.velox.bloomFilter.expectedNumItems";
+const std::string kBloomFilterNumBits = 
"spark.gluten.sql.columnar.backend.velox.bloomFilter.numBits";
+const std::string kBloomFilterMaxNumBits = 
"spark.gluten.sql.columnar.backend.velox.bloomFilter.maxNumBits";
+const std::string kVeloxSplitPreloadPerDriver = 
"spark.gluten.sql.columnar.backend.velox.SplitPreloadPerDriver";
+
+const std::string kEnableUserExceptionStacktrace =
+    "spark.gluten.sql.columnar.backend.velox.enableUserExceptionStacktrace";
+const bool kEnableUserExceptionStacktraceDefault = true;
+
+const std::string kEnableSystemExceptionStacktrace =
+    "spark.gluten.sql.columnar.backend.velox.enableSystemExceptionStacktrace";
+const bool kEnableSystemExceptionStacktraceDefault = true;
+
+const std::string kMemoryUseHugePages = 
"spark.gluten.sql.columnar.backend.velox.memoryUseHugePages";
+const bool kMemoryUseHugePagesDefault = false;
+
+const std::string kHiveConnectorId = "test-hive";
+const std::string kVeloxCacheEnabled = 
"spark.gluten.sql.columnar.backend.velox.cacheEnabled";
+
+// memory cache
+const std::string kVeloxMemCacheSize = 
"spark.gluten.sql.columnar.backend.velox.memCacheSize";
+const uint64_t kVeloxMemCacheSizeDefault = 1073741824; // 1G
+
+// ssd cache
+const std::string kVeloxSsdCacheSize = 
"spark.gluten.sql.columnar.backend.velox.ssdCacheSize";
+const uint64_t kVeloxSsdCacheSizeDefault = 1073741824; // 1G
+const std::string kVeloxSsdCachePath = 
"spark.gluten.sql.columnar.backend.velox.ssdCachePath";
+const std::string kVeloxSsdCachePathDefault = "/tmp/";
+const std::string kVeloxSsdCacheShards = 
"spark.gluten.sql.columnar.backend.velox.ssdCacheShards";
+const uint32_t kVeloxSsdCacheShardsDefault = 1;
+const std::string kVeloxSsdCacheIOThreads = 
"spark.gluten.sql.columnar.backend.velox.ssdCacheIOThreads";
+const uint32_t kVeloxSsdCacheIOThreadsDefault = 1;
+const std::string kVeloxSsdODirectEnabled = 
"spark.gluten.sql.columnar.backend.velox.ssdODirect";
+
+// async
+const std::string kVeloxIOThreads = 
"spark.gluten.sql.columnar.backend.velox.IOThreads";
+const uint32_t kVeloxIOThreadsDefault = 0;
+const std::string kVeloxAsyncTimeoutOnTaskStopping =
+    "spark.gluten.sql.columnar.backend.velox.asyncTimeoutOnTaskStopping";
+const int32_t kVeloxAsyncTimeoutOnTaskStoppingDefault = 30000; // 30s
+
+// udf
+const std::string kVeloxUdfLibraryPaths = 
"spark.gluten.sql.columnar.backend.velox.udfLibraryPaths";
+
+// backtrace allocation
+const std::string kBacktraceAllocation = "spark.gluten.backtrace.allocation";
+
+// VeloxShuffleReader print flag.
+const std::string kVeloxShuffleReaderPrintFlag = 
"spark.gluten.velox.shuffleReaderPrintFlag";
+
+const std::string kVeloxFileHandleCacheEnabled = 
"spark.gluten.sql.columnar.backend.velox.fileHandleCacheEnabled";
+const bool kVeloxFileHandleCacheEnabledDefault = false;
+
+/* configs for file read in velox*/
+const std::string kDirectorySizeGuess = 
"spark.gluten.sql.columnar.backend.velox.directorySizeGuess";
+const std::string kFilePreloadThreshold = 
"spark.gluten.sql.columnar.backend.velox.filePreloadThreshold";
+const std::string kPrefetchRowGroups = 
"spark.gluten.sql.columnar.backend.velox.prefetchRowGroups";
+const std::string kLoadQuantum = 
"spark.gluten.sql.columnar.backend.velox.loadQuantum";
+const std::string kMaxCoalescedDistanceBytes = 
"spark.gluten.sql.columnar.backend.velox.maxCoalescedDistanceBytes";
+const std::string kMaxCoalescedBytes = 
"spark.gluten.sql.columnar.backend.velox.maxCoalescedBytes";
+const std::string kCachePrefetchMinPct = 
"spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct";
+
+// write fies
+const std::string kMaxPartitions = 
"spark.gluten.sql.columnar.backend.velox.maxPartitionsPerWritersSession";
+
+const std::string kGlogVerboseLevel = 
"spark.gluten.sql.columnar.backend.velox.glogVerboseLevel";
+const uint32_t kGlogVerboseLevelDefault = 0;
+const uint32_t kGlogVerboseLevelMaximum = 99;
+const std::string kGlogSeverityLevel = 
"spark.gluten.sql.columnar.backend.velox.glogSeverityLevel";
+const uint32_t kGlogSeverityLevelDefault = 1;
+} // namespace gluten


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org

(incubator-gluten) branch main updated: [VL] Move velox related configs to VeloxConfig.h (#5743)

Reply via email to