This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new b0a9aaf4943 [opt](file-meta-cache) reduce file meta cache size
(#32340) (#32367)
b0a9aaf4943 is described below
commit b0a9aaf49438e2a0eae9b139f0a8a188ead75588
Author: Mingyu Chen <[email protected]>
AuthorDate: Mon Mar 18 13:40:36 2024 +0800
[opt](file-meta-cache) reduce file meta cache size (#32340) (#32367)
pick part of #32340
2. Reduce the default capability of file meta cache, from 20000 to 1000
Also change the default capability of hdfs file handle cache, from
20000 to 1000
3. Change judgement of whether enable file meta cache when querying
If the number of file need to be read is larger than the 1/3 of the
file meta cache's capability, file meta cache
will be disabled for this query. Because cache is useless if there are
too many files.
---
be/src/common/config.cpp | 5 +++--
be/src/vec/exec/scan/vfile_scanner.cpp | 5 ++---
be/src/vec/exec/scan/vfile_scanner.h | 9 +++++++++
3 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index f523649f649..27efa016fab 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1051,9 +1051,10 @@ DEFINE_Bool(enable_feature_binlog, "false");
// enable set in BitmapValue
DEFINE_Bool(enable_set_in_bitmap_value, "false");
-DEFINE_Int64(max_hdfs_file_handle_cache_num, "20000");
+DEFINE_Int64(max_hdfs_file_handle_cache_num, "1000");
DEFINE_Int32(max_hdfs_file_handle_cache_time_sec, "3600");
-DEFINE_Int64(max_external_file_meta_cache_num, "20000");
+DEFINE_Int64(max_external_file_meta_cache_num, "1000");
+
// Apply delete pred in cumu compaction
DEFINE_mBool(enable_delete_when_cumu_compaction, "false");
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 988dab0a502..dc4d643a80d 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -737,9 +737,8 @@ Status VFileScanner::_get_next_reader() {
std::unique_ptr<ParquetReader> parquet_reader =
ParquetReader::create_unique(
_profile, *_params, range,
_state->query_options().batch_size, tz,
_io_ctx.get(), _state,
- config::max_external_file_meta_cache_num <= 0
- ? nullptr
- : ExecEnv::GetInstance()->file_meta_cache(),
+ _shoudl_enable_file_meta_cache() ?
ExecEnv::GetInstance()->file_meta_cache()
+ : nullptr,
_state->query_options().enable_parquet_lazy_mat);
{
SCOPED_TIMER(_open_reader_timer);
diff --git a/be/src/vec/exec/scan/vfile_scanner.h
b/be/src/vec/exec/scan/vfile_scanner.h
index 58355cdbe36..5261337f000 100644
--- a/be/src/vec/exec/scan/vfile_scanner.h
+++ b/be/src/vec/exec/scan/vfile_scanner.h
@@ -216,5 +216,14 @@ private:
_counter.num_rows_unselected = 0;
_counter.num_rows_filtered = 0;
}
+
+ // enable the file meta cache only when
+ // 1. max_external_file_meta_cache_num is > 0
+ // 2. the file number is less than 1/3 of cache's capacibility
+ // Otherwise, the cache miss rate will be high
+ bool _shoudl_enable_file_meta_cache() {
+ return config::max_external_file_meta_cache_num > 0 &&
+ _ranges.size() < config::max_external_file_meta_cache_num / 3;
+ }
};
} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]