This is an automated email from the ASF dual-hosted git repository. philo pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push: new 9d2a13bff [VL] Move velox related configs to VeloxConfig.h (#5743) 9d2a13bff is described below commit 9d2a13bffb4292f17714bfbba96638aeadb91062 Author: Yang Zhang <yangchuan...@alibaba-inc.com> AuthorDate: Thu May 16 11:06:18 2024 +0800 [VL] Move velox related configs to VeloxConfig.h (#5743) --- cpp/core/config/GlutenConfig.h | 7 - cpp/velox/compute/VeloxBackend.cc | 182 ++++++++------------------ cpp/velox/compute/VeloxBackend.h | 12 +- cpp/velox/compute/VeloxRuntime.cc | 4 +- cpp/velox/compute/WholeStageResultIterator.cc | 47 +------ cpp/velox/config/VeloxConfig.h | 127 ++++++++++++++++++ 6 files changed, 192 insertions(+), 187 deletions(-) diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h index 3c47fb547..16a18f6be 100644 --- a/cpp/core/config/GlutenConfig.h +++ b/cpp/core/config/GlutenConfig.h @@ -61,13 +61,6 @@ const std::string kShuffleCompressionCodecBackend = "spark.gluten.sql.columnar.s const std::string kQatBackendName = "qat"; const std::string kIaaBackendName = "iaa"; -// Velox conf -const std::string kGlogVerboseLevel = "spark.gluten.sql.columnar.backend.velox.glogVerboseLevel"; -const uint32_t kGlogVerboseLevelDefault = 0; -const uint32_t kGlogVerboseLevelMaximum = 99; -const std::string kGlogSeverityLevel = "spark.gluten.sql.columnar.backend.velox.glogSeverityLevel"; -const uint32_t kGlogSeverityLevelDefault = 1; - std::unordered_map<std::string, std::string> parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength); diff --git a/cpp/velox/compute/VeloxBackend.cc b/cpp/velox/compute/VeloxBackend.cc index 044c8aa0e..b2fb1c964 100644 --- a/cpp/velox/compute/VeloxBackend.cc +++ b/cpp/velox/compute/VeloxBackend.cc @@ -24,8 +24,6 @@ #include "operators/plannodes/RowVectorStream.h" #include "utils/ConfigExtractor.h" -#include "shuffle/VeloxShuffleReader.h" - #ifdef GLUTEN_ENABLE_QAT #include "utils/qat/QatCodec.h" #endif @@ -33,7 +31,7 @@ #include "utils/qpl/qpl_codec.h" #endif #include "compute/VeloxRuntime.h" -#include "config/GlutenConfig.h" +#include "config/VeloxConfig.h" #include "jni/JniFileSystem.h" #include "operators/functions/SparkTokenizer.h" #include "udf/UdfLoader.h" @@ -54,71 +52,6 @@ DEFINE_int32(gluten_velox_aysnc_timeout_on_task_stopping, 30000, "Aysnc timout w using namespace facebook; -namespace { - -const std::string kEnableUserExceptionStacktrace = - "spark.gluten.sql.columnar.backend.velox.enableUserExceptionStacktrace"; -const bool kEnableUserExceptionStacktraceDefault = true; - -const std::string kEnableSystemExceptionStacktrace = - "spark.gluten.sql.columnar.backend.velox.enableSystemExceptionStacktrace"; -const bool kEnableSystemExceptionStacktraceDefault = true; - -const std::string kMemoryUseHugePages = "spark.gluten.sql.columnar.backend.velox.memoryUseHugePages"; -const bool kMemoryUseHugePagesDefault = false; - -const std::string kHiveConnectorId = "test-hive"; -const std::string kVeloxCacheEnabled = "spark.gluten.sql.columnar.backend.velox.cacheEnabled"; - -// memory cache -const std::string kVeloxMemCacheSize = "spark.gluten.sql.columnar.backend.velox.memCacheSize"; -const uint64_t kVeloxMemCacheSizeDefault = 1073741824; // 1G - -// ssd cache -const std::string kVeloxSsdCacheSize = "spark.gluten.sql.columnar.backend.velox.ssdCacheSize"; -const uint64_t kVeloxSsdCacheSizeDefault = 1073741824; // 1G -const std::string kVeloxSsdCachePath = "spark.gluten.sql.columnar.backend.velox.ssdCachePath"; -const std::string kVeloxSsdCachePathDefault = "/tmp/"; -const std::string kVeloxSsdCacheShards = "spark.gluten.sql.columnar.backend.velox.ssdCacheShards"; -const uint32_t kVeloxSsdCacheShardsDefault = 1; -const std::string kVeloxSsdCacheIOThreads = "spark.gluten.sql.columnar.backend.velox.ssdCacheIOThreads"; -const uint32_t kVeloxSsdCacheIOThreadsDefault = 1; -const std::string kVeloxSsdODirectEnabled = "spark.gluten.sql.columnar.backend.velox.ssdODirect"; - -// async -const std::string kVeloxIOThreads = "spark.gluten.sql.columnar.backend.velox.IOThreads"; -const uint32_t kVeloxIOThreadsDefault = 0; -const std::string kVeloxAsyncTimeoutOnTaskStopping = - "spark.gluten.sql.columnar.backend.velox.asyncTimeoutOnTaskStopping"; -const int32_t kVeloxAsyncTimeoutOnTaskStoppingDefault = 30000; // 30s - -// udf -const std::string kVeloxUdfLibraryPaths = "spark.gluten.sql.columnar.backend.velox.udfLibraryPaths"; - -// spill -const std::string kMaxSpillFileSize = "spark.gluten.sql.columnar.backend.velox.maxSpillFileSize"; -const uint64_t kMaxSpillFileSizeDefault = 1L * 1024 * 1024 * 1024; - -// backtrace allocation -const std::string kBacktraceAllocation = "spark.gluten.backtrace.allocation"; - -// VeloxShuffleReader print flag. -const std::string kVeloxShuffleReaderPrintFlag = "spark.gluten.velox.shuffleReaderPrintFlag"; - -const std::string kVeloxFileHandleCacheEnabled = "spark.gluten.sql.columnar.backend.velox.fileHandleCacheEnabled"; -const bool kVeloxFileHandleCacheEnabledDefault = false; - -/* configs for file read in velox*/ -const std::string kDirectorySizeGuess = "spark.gluten.sql.columnar.backend.velox.directorySizeGuess"; -const std::string kFilePreloadThreshold = "spark.gluten.sql.columnar.backend.velox.filePreloadThreshold"; -const std::string kPrefetchRowGroups = "spark.gluten.sql.columnar.backend.velox.prefetchRowGroups"; -const std::string kLoadQuantum = "spark.gluten.sql.columnar.backend.velox.loadQuantum"; -const std::string kMaxCoalescedDistanceBytes = "spark.gluten.sql.columnar.backend.velox.maxCoalescedDistanceBytes"; -const std::string kMaxCoalescedBytes = "spark.gluten.sql.columnar.backend.velox.maxCoalescedBytes"; -const std::string kCachePrefetchMinPct = "spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct"; - -} // namespace - namespace gluten { namespace { @@ -128,25 +61,22 @@ gluten::Runtime* veloxRuntimeFactory(const std::unordered_map<std::string, std:: } // namespace void VeloxBackend::init(const std::unordered_map<std::string, std::string>& conf) { - backendConf_ = conf; + backendConf_ = std::make_shared<facebook::velox::core::MemConfigMutable>(conf); // Register Velox runtime factory gluten::Runtime::registerFactory(gluten::kVeloxRuntimeKind, veloxRuntimeFactory); - std::shared_ptr<const facebook::velox::Config> veloxcfg = - std::make_shared<facebook::velox::core::MemConfigMutable>(conf); - - if (veloxcfg->get<bool>(kDebugModeEnabled, false)) { - LOG(INFO) << "VeloxBackend config:" << printConfig(veloxcfg->valuesCopy()); + if (backendConf_->get<bool>(kDebugModeEnabled, false)) { + LOG(INFO) << "VeloxBackend config:" << printConfig(backendConf_->valuesCopy()); } // Init glog and log level. - if (!veloxcfg->get<bool>(kDebugModeEnabled, false)) { - FLAGS_v = veloxcfg->get<uint32_t>(kGlogVerboseLevel, kGlogVerboseLevelDefault); - FLAGS_minloglevel = veloxcfg->get<uint32_t>(kGlogSeverityLevel, kGlogSeverityLevelDefault); + if (!backendConf_->get<bool>(kDebugModeEnabled, false)) { + FLAGS_v = backendConf_->get<uint32_t>(kGlogVerboseLevel, kGlogVerboseLevelDefault); + FLAGS_minloglevel = backendConf_->get<uint32_t>(kGlogSeverityLevel, kGlogSeverityLevelDefault); } else { - if (veloxcfg->isValueExists(kGlogVerboseLevel)) { - FLAGS_v = veloxcfg->get<uint32_t>(kGlogVerboseLevel, kGlogVerboseLevelDefault); + if (backendConf_->isValueExists(kGlogVerboseLevel)) { + FLAGS_v = backendConf_->get<uint32_t>(kGlogVerboseLevel, kGlogVerboseLevelDefault); } else { FLAGS_v = kGlogVerboseLevelMaximum; } @@ -159,27 +89,27 @@ void VeloxBackend::init(const std::unordered_map<std::string, std::string>& conf // Set velox_exception_user_stacktrace_enabled. FLAGS_velox_exception_user_stacktrace_enabled = - veloxcfg->get<bool>(kEnableUserExceptionStacktrace, kEnableUserExceptionStacktraceDefault); + backendConf_->get<bool>(kEnableUserExceptionStacktrace, kEnableUserExceptionStacktraceDefault); // Set velox_exception_system_stacktrace_enabled. FLAGS_velox_exception_system_stacktrace_enabled = - veloxcfg->get<bool>(kEnableSystemExceptionStacktrace, kEnableSystemExceptionStacktraceDefault); + backendConf_->get<bool>(kEnableSystemExceptionStacktrace, kEnableSystemExceptionStacktraceDefault); // Set velox_memory_use_hugepages. - FLAGS_velox_memory_use_hugepages = veloxcfg->get<bool>(kMemoryUseHugePages, kMemoryUseHugePagesDefault); + FLAGS_velox_memory_use_hugepages = backendConf_->get<bool>(kMemoryUseHugePages, kMemoryUseHugePagesDefault); // Async timeout. FLAGS_gluten_velox_aysnc_timeout_on_task_stopping = - veloxcfg->get<int32_t>(kVeloxAsyncTimeoutOnTaskStopping, kVeloxAsyncTimeoutOnTaskStoppingDefault); + backendConf_->get<int32_t>(kVeloxAsyncTimeoutOnTaskStopping, kVeloxAsyncTimeoutOnTaskStoppingDefault); // Set backtrace_allocation - gluten::backtrace_allocation = veloxcfg->get<bool>(kBacktraceAllocation, false); + gluten::backtrace_allocation = backendConf_->get<bool>(kBacktraceAllocation, false); // Setup and register. velox::filesystems::registerLocalFileSystem(); - initJolFilesystem(veloxcfg); - initCache(veloxcfg); - initConnector(veloxcfg); + initJolFilesystem(); + initCache(); + initConnector(); // Register Velox functions registerAllFunctions(); @@ -189,7 +119,7 @@ void VeloxBackend::init(const std::unordered_map<std::string, std::string>& conf } velox::exec::Operator::registerOperator(std::make_unique<RowVectorStreamOperatorTranslator>()); - initUdf(veloxcfg); + initUdf(); registerSparkTokenizer(); // initialize the global memory manager for current process @@ -201,8 +131,8 @@ facebook::velox::cache::AsyncDataCache* VeloxBackend::getAsyncDataCache() const } // JNI-or-local filesystem, for spilling-to-heap if we have extra JVM heap spaces -void VeloxBackend::initJolFilesystem(const std::shared_ptr<const facebook::velox::Config>& conf) { - int64_t maxSpillFileSize = conf->get<int64_t>(kMaxSpillFileSize, kMaxSpillFileSizeDefault); +void VeloxBackend::initJolFilesystem() { + int64_t maxSpillFileSize = backendConf_->get<int64_t>(kMaxSpillFileSize, kMaxSpillFileSizeDefault); // FIXME It's known that if spill compression is disabled, the actual spill file size may // in crease beyond this limit a little (maximum 64 rows which is by default @@ -210,18 +140,17 @@ void VeloxBackend::initJolFilesystem(const std::shared_ptr<const facebook::velox gluten::registerJolFileSystem(maxSpillFileSize); } -void VeloxBackend::initCache(const std::shared_ptr<const facebook::velox::Config>& conf) { - bool veloxCacheEnabled = conf->get<bool>(kVeloxCacheEnabled, false); - if (veloxCacheEnabled) { +void VeloxBackend::initCache() { + if (backendConf_->get<bool>(kVeloxCacheEnabled, false)) { FLAGS_ssd_odirect = true; - FLAGS_ssd_odirect = conf->get<bool>(kVeloxSsdODirectEnabled, false); + FLAGS_ssd_odirect = backendConf_->get<bool>(kVeloxSsdODirectEnabled, false); - uint64_t memCacheSize = conf->get<uint64_t>(kVeloxMemCacheSize, kVeloxMemCacheSizeDefault); - uint64_t ssdCacheSize = conf->get<uint64_t>(kVeloxSsdCacheSize, kVeloxSsdCacheSizeDefault); - int32_t ssdCacheShards = conf->get<int32_t>(kVeloxSsdCacheShards, kVeloxSsdCacheShardsDefault); - int32_t ssdCacheIOThreads = conf->get<int32_t>(kVeloxSsdCacheIOThreads, kVeloxSsdCacheIOThreadsDefault); - std::string ssdCachePathPrefix = conf->get<std::string>(kVeloxSsdCachePath, kVeloxSsdCachePathDefault); + uint64_t memCacheSize = backendConf_->get<uint64_t>(kVeloxMemCacheSize, kVeloxMemCacheSizeDefault); + uint64_t ssdCacheSize = backendConf_->get<uint64_t>(kVeloxSsdCacheSize, kVeloxSsdCacheSizeDefault); + int32_t ssdCacheShards = backendConf_->get<int32_t>(kVeloxSsdCacheShards, kVeloxSsdCacheShardsDefault); + int32_t ssdCacheIOThreads = backendConf_->get<int32_t>(kVeloxSsdCacheIOThreads, kVeloxSsdCacheIOThreadsDefault); + std::string ssdCachePathPrefix = backendConf_->get<std::string>(kVeloxSsdCachePath, kVeloxSsdCachePathDefault); cachePathPrefix_ = ssdCachePathPrefix; cacheFilePrefix_ = getCacheFilePrefix(); @@ -257,63 +186,64 @@ void VeloxBackend::initCache(const std::shared_ptr<const facebook::velox::Config } } -void VeloxBackend::initConnector(const std::shared_ptr<const facebook::velox::Config>& conf) { +void VeloxBackend::initConnector() { // The configs below are used at process level. - auto mutableConf = std::make_shared<facebook::velox::core::MemConfigMutable>(conf->valuesCopy()); + auto connectorConf = std::make_shared<facebook::velox::core::MemConfigMutable>(backendConf_->valuesCopy()); - auto hiveConf = getHiveConfig(conf); + auto hiveConf = getHiveConfig(backendConf_); for (auto& [k, v] : hiveConf->valuesCopy()) { - mutableConf->setValue(k, v); + connectorConf->setValue(k, v); } #ifdef ENABLE_ABFS - const auto& confValue = conf->valuesCopy(); + const auto& confValue = backendConf_->valuesCopy(); for (auto& [k, v] : confValue) { if (k.find("fs.azure.account.key") == 0) { - mutableConf->setValue(k, v); + connectorConf->setValue(k, v); } else if (k.find("spark.hadoop.fs.azure.account.key") == 0) { constexpr int32_t accountKeyPrefixLength = 13; - mutableConf->setValue(k.substr(accountKeyPrefixLength), v); + connectorConf->setValue(k.substr(accountKeyPrefixLength), v); } } #endif - mutableConf->setValue( + connectorConf->setValue( velox::connector::hive::HiveConfig::kEnableFileHandleCache, - conf->get<bool>(kVeloxFileHandleCacheEnabled, kVeloxFileHandleCacheEnabledDefault) ? "true" : "false"); + backendConf_->get<bool>(kVeloxFileHandleCacheEnabled, kVeloxFileHandleCacheEnabledDefault) ? "true" : "false"); - mutableConf->setValue( + connectorConf->setValue( velox::connector::hive::HiveConfig::kMaxCoalescedBytes, - conf->get<std::string>(kMaxCoalescedBytes, "67108864")); // 64M - mutableConf->setValue( + backendConf_->get<std::string>(kMaxCoalescedBytes, "67108864")); // 64M + connectorConf->setValue( velox::connector::hive::HiveConfig::kMaxCoalescedDistanceBytes, - conf->get<std::string>(kMaxCoalescedDistanceBytes, "1048576")); // 1M - mutableConf->setValue( - velox::connector::hive::HiveConfig::kPrefetchRowGroups, conf->get<std::string>(kPrefetchRowGroups, "1")); - mutableConf->setValue( - velox::connector::hive::HiveConfig::kLoadQuantum, conf->get<std::string>(kLoadQuantum, "268435456")); // 256M - mutableConf->setValue( + backendConf_->get<std::string>(kMaxCoalescedDistanceBytes, "1048576")); // 1M + connectorConf->setValue( + velox::connector::hive::HiveConfig::kPrefetchRowGroups, backendConf_->get<std::string>(kPrefetchRowGroups, "1")); + connectorConf->setValue( + velox::connector::hive::HiveConfig::kLoadQuantum, + backendConf_->get<std::string>(kLoadQuantum, "268435456")); // 256M + connectorConf->setValue( velox::connector::hive::HiveConfig::kFooterEstimatedSize, - conf->get<std::string>(kDirectorySizeGuess, "32768")); // 32K - mutableConf->setValue( + backendConf_->get<std::string>(kDirectorySizeGuess, "32768")); // 32K + connectorConf->setValue( velox::connector::hive::HiveConfig::kFilePreloadThreshold, - conf->get<std::string>(kFilePreloadThreshold, "1048576")); // 1M + backendConf_->get<std::string>(kFilePreloadThreshold, "1048576")); // 1M // set cache_prefetch_min_pct default as 0 to force all loads are prefetched in DirectBufferInput. - FLAGS_cache_prefetch_min_pct = conf->get<int>(kCachePrefetchMinPct, 0); + FLAGS_cache_prefetch_min_pct = backendConf_->get<int>(kCachePrefetchMinPct, 0); - auto ioThreads = conf->get<int32_t>(kVeloxIOThreads, kVeloxIOThreadsDefault); + auto ioThreads = backendConf_->get<int32_t>(kVeloxIOThreads, kVeloxIOThreadsDefault); if (ioThreads > 0) { ioExecutor_ = std::make_unique<folly::IOThreadPoolExecutor>(ioThreads); } velox::connector::registerConnector(std::make_shared<velox::connector::hive::HiveConnector>( kHiveConnectorId, - std::make_shared<facebook::velox::core::MemConfig>(mutableConf->valuesCopy()), + std::make_shared<facebook::velox::core::MemConfig>(connectorConf->valuesCopy()), ioExecutor_.get())); } -void VeloxBackend::initUdf(const std::shared_ptr<const facebook::velox::Config>& conf) { - auto got = conf->get<std::string>(kVeloxUdfLibraryPaths, ""); +void VeloxBackend::initUdf() { + auto got = backendConf_->get<std::string>(kVeloxUdfLibraryPaths, ""); if (!got.empty()) { auto udfLoader = gluten::UdfLoader::getInstance(); udfLoader->loadUdfLibraries(got); @@ -335,7 +265,7 @@ VeloxBackend* VeloxBackend::get() { return instance_.get(); } -const std::unordered_map<std::string, std::string>& VeloxBackend::getBackendConf() const { +const std::shared_ptr<const facebook::velox::Config> VeloxBackend::getBackendConf() const { return backendConf_; } diff --git a/cpp/velox/compute/VeloxBackend.h b/cpp/velox/compute/VeloxBackend.h index a601d715c..891bdd2cc 100644 --- a/cpp/velox/compute/VeloxBackend.h +++ b/cpp/velox/compute/VeloxBackend.h @@ -53,7 +53,7 @@ class VeloxBackend { facebook::velox::cache::AsyncDataCache* getAsyncDataCache() const; - const std::unordered_map<std::string, std::string>& getBackendConf() const; + const std::shared_ptr<const facebook::velox::Config> getBackendConf() const; void tearDown() { // Destruct IOThreadPoolExecutor will join all threads. @@ -68,11 +68,11 @@ class VeloxBackend { } void init(const std::unordered_map<std::string, std::string>& conf); - void initCache(const std::shared_ptr<const facebook::velox::Config>& conf); - void initConnector(const std::shared_ptr<const facebook::velox::Config>& conf); - void initUdf(const std::shared_ptr<const facebook::velox::Config>& conf); + void initCache(); + void initConnector(); + void initUdf(); - void initJolFilesystem(const std::shared_ptr<const facebook::velox::Config>& conf); + void initJolFilesystem(); std::string getCacheFilePrefix() { return "cache." + boost::lexical_cast<std::string>(boost::uuids::random_generator()()) + "."; @@ -90,7 +90,7 @@ class VeloxBackend { std::string cachePathPrefix_; std::string cacheFilePrefix_; - std::unordered_map<std::string, std::string> backendConf_{}; + std::shared_ptr<const facebook::velox::Config> backendConf_; }; } // namespace gluten diff --git a/cpp/velox/compute/VeloxRuntime.cc b/cpp/velox/compute/VeloxRuntime.cc index 8314d0bd2..a3e8c159c 100644 --- a/cpp/velox/compute/VeloxRuntime.cc +++ b/cpp/velox/compute/VeloxRuntime.cc @@ -26,7 +26,7 @@ #include "compute/ResultIterator.h" #include "compute/Runtime.h" #include "compute/VeloxPlanConverter.h" -#include "config/GlutenConfig.h" +#include "config/VeloxConfig.h" #include "operators/serializer/VeloxRowToColumnarConverter.h" #include "shuffle/VeloxShuffleReader.h" #include "shuffle/VeloxShuffleWriter.h" @@ -256,7 +256,7 @@ std::unique_ptr<ColumnarBatchSerializer> VeloxRuntime::createColumnarBatchSerial } void VeloxRuntime::dumpConf(const std::string& path) { - auto backendConf = VeloxBackend::get()->getBackendConf(); + auto backendConf = VeloxBackend::get()->getBackendConf()->valuesCopy(); auto allConf = backendConf; for (const auto& pair : confMap_) { diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 83749061c..006b37588 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -17,13 +17,11 @@ #include "WholeStageResultIterator.h" #include "VeloxBackend.h" #include "VeloxRuntime.h" -#include "config/GlutenConfig.h" +#include "config/VeloxConfig.h" #include "velox/connectors/hive/HiveConfig.h" #include "velox/connectors/hive/HiveConnectorSplit.h" #include "velox/exec/PlanNodeStats.h" -#include "utils/ConfigExtractor.h" - #ifdef ENABLE_HDFS #include "utils/HdfsUtils.h" #endif @@ -33,49 +31,6 @@ using namespace facebook; namespace gluten { namespace { -// Velox configs -const std::string kHiveConnectorId = "test-hive"; - -// memory -const std::string kSpillStrategy = "spark.gluten.sql.columnar.backend.velox.spillStrategy"; -const std::string kSpillStrategyDefaultValue = "auto"; -const std::string kSpillThreadNum = "spark.gluten.sql.columnar.backend.velox.spillThreadNum"; -const uint32_t kSpillThreadNumDefaultValue = 0; -const std::string kAggregationSpillEnabled = "spark.gluten.sql.columnar.backend.velox.aggregationSpillEnabled"; -const std::string kJoinSpillEnabled = "spark.gluten.sql.columnar.backend.velox.joinSpillEnabled"; -const std::string kOrderBySpillEnabled = "spark.gluten.sql.columnar.backend.velox.orderBySpillEnabled"; - -// spill config -// refer to -// https://github.com/facebookincubator/velox/blob/95f3e80e77d046c12fbc79dc529366be402e9c2b/velox/docs/configs.rst#spilling -const std::string kMaxSpillLevel = "spark.gluten.sql.columnar.backend.velox.maxSpillLevel"; -const std::string kMaxSpillFileSize = "spark.gluten.sql.columnar.backend.velox.maxSpillFileSize"; -const std::string kSpillStartPartitionBit = "spark.gluten.sql.columnar.backend.velox.spillStartPartitionBit"; -const std::string kSpillPartitionBits = "spark.gluten.sql.columnar.backend.velox.spillPartitionBits"; -const std::string kMaxSpillRunRows = "spark.gluten.sql.columnar.backend.velox.MaxSpillRunRows"; -const std::string kMaxSpillBytes = "spark.gluten.sql.columnar.backend.velox.MaxSpillBytes"; -const std::string kSpillWriteBufferSize = "spark.gluten.sql.columnar.backend.velox.spillWriteBufferSize"; - -const std::string kSpillableReservationGrowthPct = - "spark.gluten.sql.columnar.backend.velox.spillableReservationGrowthPct"; -const std::string kSpillCompressionKind = "spark.io.compression.codec"; -const std::string kMaxPartialAggregationMemoryRatio = - "spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio"; -const std::string kMaxExtendedPartialAggregationMemoryRatio = - "spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio"; -const std::string kAbandonPartialAggregationMinPct = - "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct"; -const std::string kAbandonPartialAggregationMinRows = - "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows"; - -// execution -const std::string kBloomFilterExpectedNumItems = "spark.gluten.sql.columnar.backend.velox.bloomFilter.expectedNumItems"; -const std::string kBloomFilterNumBits = "spark.gluten.sql.columnar.backend.velox.bloomFilter.numBits"; -const std::string kBloomFilterMaxNumBits = "spark.gluten.sql.columnar.backend.velox.bloomFilter.maxNumBits"; -const std::string kVeloxSplitPreloadPerDriver = "spark.gluten.sql.columnar.backend.velox.SplitPreloadPerDriver"; - -// write fies -const std::string kMaxPartitions = "spark.gluten.sql.columnar.backend.velox.maxPartitionsPerWritersSession"; // metrics const std::string kDynamicFiltersProduced = "dynamicFiltersProduced"; diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h new file mode 100644 index 000000000..a3112f83e --- /dev/null +++ b/cpp/velox/config/VeloxConfig.h @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "config/GlutenConfig.h" + +namespace gluten { +// memory +const std::string kSpillStrategy = "spark.gluten.sql.columnar.backend.velox.spillStrategy"; +const std::string kSpillStrategyDefaultValue = "auto"; +const std::string kSpillThreadNum = "spark.gluten.sql.columnar.backend.velox.spillThreadNum"; +const uint32_t kSpillThreadNumDefaultValue = 0; +const std::string kAggregationSpillEnabled = "spark.gluten.sql.columnar.backend.velox.aggregationSpillEnabled"; +const std::string kJoinSpillEnabled = "spark.gluten.sql.columnar.backend.velox.joinSpillEnabled"; +const std::string kOrderBySpillEnabled = "spark.gluten.sql.columnar.backend.velox.orderBySpillEnabled"; + +// spill config +// refer to +// https://github.com/facebookincubator/velox/blob/95f3e80e77d046c12fbc79dc529366be402e9c2b/velox/docs/configs.rst#spilling +const std::string kMaxSpillLevel = "spark.gluten.sql.columnar.backend.velox.maxSpillLevel"; +const std::string kMaxSpillFileSize = "spark.gluten.sql.columnar.backend.velox.maxSpillFileSize"; +const std::string kSpillStartPartitionBit = "spark.gluten.sql.columnar.backend.velox.spillStartPartitionBit"; +const std::string kSpillPartitionBits = "spark.gluten.sql.columnar.backend.velox.spillPartitionBits"; +const std::string kMaxSpillRunRows = "spark.gluten.sql.columnar.backend.velox.MaxSpillRunRows"; +const std::string kMaxSpillBytes = "spark.gluten.sql.columnar.backend.velox.MaxSpillBytes"; +const std::string kSpillWriteBufferSize = "spark.gluten.sql.columnar.backend.velox.spillWriteBufferSize"; +const uint64_t kMaxSpillFileSizeDefault = 1L * 1024 * 1024 * 1024; + +const std::string kSpillableReservationGrowthPct = + "spark.gluten.sql.columnar.backend.velox.spillableReservationGrowthPct"; +const std::string kSpillCompressionKind = "spark.io.compression.codec"; +const std::string kMaxPartialAggregationMemoryRatio = + "spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio"; +const std::string kMaxExtendedPartialAggregationMemoryRatio = + "spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio"; +const std::string kAbandonPartialAggregationMinPct = + "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct"; +const std::string kAbandonPartialAggregationMinRows = + "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows"; + +// execution +const std::string kBloomFilterExpectedNumItems = "spark.gluten.sql.columnar.backend.velox.bloomFilter.expectedNumItems"; +const std::string kBloomFilterNumBits = "spark.gluten.sql.columnar.backend.velox.bloomFilter.numBits"; +const std::string kBloomFilterMaxNumBits = "spark.gluten.sql.columnar.backend.velox.bloomFilter.maxNumBits"; +const std::string kVeloxSplitPreloadPerDriver = "spark.gluten.sql.columnar.backend.velox.SplitPreloadPerDriver"; + +const std::string kEnableUserExceptionStacktrace = + "spark.gluten.sql.columnar.backend.velox.enableUserExceptionStacktrace"; +const bool kEnableUserExceptionStacktraceDefault = true; + +const std::string kEnableSystemExceptionStacktrace = + "spark.gluten.sql.columnar.backend.velox.enableSystemExceptionStacktrace"; +const bool kEnableSystemExceptionStacktraceDefault = true; + +const std::string kMemoryUseHugePages = "spark.gluten.sql.columnar.backend.velox.memoryUseHugePages"; +const bool kMemoryUseHugePagesDefault = false; + +const std::string kHiveConnectorId = "test-hive"; +const std::string kVeloxCacheEnabled = "spark.gluten.sql.columnar.backend.velox.cacheEnabled"; + +// memory cache +const std::string kVeloxMemCacheSize = "spark.gluten.sql.columnar.backend.velox.memCacheSize"; +const uint64_t kVeloxMemCacheSizeDefault = 1073741824; // 1G + +// ssd cache +const std::string kVeloxSsdCacheSize = "spark.gluten.sql.columnar.backend.velox.ssdCacheSize"; +const uint64_t kVeloxSsdCacheSizeDefault = 1073741824; // 1G +const std::string kVeloxSsdCachePath = "spark.gluten.sql.columnar.backend.velox.ssdCachePath"; +const std::string kVeloxSsdCachePathDefault = "/tmp/"; +const std::string kVeloxSsdCacheShards = "spark.gluten.sql.columnar.backend.velox.ssdCacheShards"; +const uint32_t kVeloxSsdCacheShardsDefault = 1; +const std::string kVeloxSsdCacheIOThreads = "spark.gluten.sql.columnar.backend.velox.ssdCacheIOThreads"; +const uint32_t kVeloxSsdCacheIOThreadsDefault = 1; +const std::string kVeloxSsdODirectEnabled = "spark.gluten.sql.columnar.backend.velox.ssdODirect"; + +// async +const std::string kVeloxIOThreads = "spark.gluten.sql.columnar.backend.velox.IOThreads"; +const uint32_t kVeloxIOThreadsDefault = 0; +const std::string kVeloxAsyncTimeoutOnTaskStopping = + "spark.gluten.sql.columnar.backend.velox.asyncTimeoutOnTaskStopping"; +const int32_t kVeloxAsyncTimeoutOnTaskStoppingDefault = 30000; // 30s + +// udf +const std::string kVeloxUdfLibraryPaths = "spark.gluten.sql.columnar.backend.velox.udfLibraryPaths"; + +// backtrace allocation +const std::string kBacktraceAllocation = "spark.gluten.backtrace.allocation"; + +// VeloxShuffleReader print flag. +const std::string kVeloxShuffleReaderPrintFlag = "spark.gluten.velox.shuffleReaderPrintFlag"; + +const std::string kVeloxFileHandleCacheEnabled = "spark.gluten.sql.columnar.backend.velox.fileHandleCacheEnabled"; +const bool kVeloxFileHandleCacheEnabledDefault = false; + +/* configs for file read in velox*/ +const std::string kDirectorySizeGuess = "spark.gluten.sql.columnar.backend.velox.directorySizeGuess"; +const std::string kFilePreloadThreshold = "spark.gluten.sql.columnar.backend.velox.filePreloadThreshold"; +const std::string kPrefetchRowGroups = "spark.gluten.sql.columnar.backend.velox.prefetchRowGroups"; +const std::string kLoadQuantum = "spark.gluten.sql.columnar.backend.velox.loadQuantum"; +const std::string kMaxCoalescedDistanceBytes = "spark.gluten.sql.columnar.backend.velox.maxCoalescedDistanceBytes"; +const std::string kMaxCoalescedBytes = "spark.gluten.sql.columnar.backend.velox.maxCoalescedBytes"; +const std::string kCachePrefetchMinPct = "spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct"; + +// write fies +const std::string kMaxPartitions = "spark.gluten.sql.columnar.backend.velox.maxPartitionsPerWritersSession"; + +const std::string kGlogVerboseLevel = "spark.gluten.sql.columnar.backend.velox.glogVerboseLevel"; +const uint32_t kGlogVerboseLevelDefault = 0; +const uint32_t kGlogVerboseLevelMaximum = 99; +const std::string kGlogSeverityLevel = "spark.gluten.sql.columnar.backend.velox.glogSeverityLevel"; +const uint32_t kGlogSeverityLevelDefault = 1; +} // namespace gluten --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org