This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 71645a391c [debug](FileCache) fail over to remote file reader if local
cache failed (#24097)
71645a391c is described below
commit 71645a391c2cf45d955cd66568117f3d46ea4984
Author: Ashin Gau <[email protected]>
AuthorDate: Sun Sep 10 12:26:17 2023 +0800
[debug](FileCache) fail over to remote file reader if local cache failed
(#24097)
Fail over to remote file reader even if local file cache failed. This
operation can increase the robustness of file cache.
---
be/src/common/config.cpp | 1 +
be/src/common/config.h | 2 +
.../io/cache/block/cached_remote_file_reader.cpp | 51 ++++++++++++++++------
be/src/io/cache/block/cached_remote_file_reader.h | 3 ++
4 files changed, 43 insertions(+), 14 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index eb03e00450..418fc64ff2 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -974,6 +974,7 @@ DEFINE_Validator(file_cache_min_file_segment_size, [](const
int64_t config) -> b
});
DEFINE_Bool(clear_file_cache, "false");
DEFINE_Bool(enable_file_cache_query_limit, "false");
+DEFINE_mInt32(file_cache_wait_sec_after_fail, "0"); // // zero for no waiting
and retrying
DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800");
DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index c37c58ea98..83b7dbd95f 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1013,6 +1013,8 @@ DECLARE_Int64(file_cache_min_file_segment_size);
DECLARE_Int64(file_cache_max_file_segment_size);
DECLARE_Bool(clear_file_cache);
DECLARE_Bool(enable_file_cache_query_limit);
+// only for debug, will be removed after finding out the root cause
+DECLARE_mInt32(file_cache_wait_sec_after_fail); // zero for no waiting and
retrying
// inverted index searcher cache
// cache entry stay time after lookup
diff --git a/be/src/io/cache/block/cached_remote_file_reader.cpp
b/be/src/io/cache/block/cached_remote_file_reader.cpp
index e2a629fd4c..59e8c40640 100644
--- a/be/src/io/cache/block/cached_remote_file_reader.cpp
+++ b/be/src/io/cache/block/cached_remote_file_reader.cpp
@@ -89,21 +89,9 @@ std::pair<size_t, size_t>
CachedRemoteFileReader::_align_size(size_t offset,
return std::make_pair(align_left, align_size);
}
-Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result,
size_t* bytes_read,
- const IOContext* io_ctx) {
- DCHECK(!closed());
- DCHECK(io_ctx);
- if (offset > size()) {
- return Status::IOError(
- fmt::format("offset exceeds file size(offset: {), file size:
{}, path: {})", offset,
- size(), path().native()));
- }
+Status CachedRemoteFileReader::_read_from_cache(size_t offset, Slice result,
size_t* bytes_read,
+ const IOContext* io_ctx) {
size_t bytes_req = result.size;
- bytes_req = std::min(bytes_req, size() - offset);
- if (UNLIKELY(bytes_req == 0)) {
- *bytes_read = 0;
- return Status::OK();
- }
ReadStatistics stats;
auto [align_left, align_size] = _align_size(offset, bytes_req);
CacheContext cache_context(io_ctx);
@@ -224,6 +212,41 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset,
Slice result, size_t*
return Status::OK();
}
+Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result,
size_t* bytes_read,
+ const IOContext* io_ctx) {
+ DCHECK(!closed());
+ DCHECK(io_ctx);
+ if (offset > size()) {
+ return Status::IOError(
+ fmt::format("offset exceeds file size(offset: {), file size:
{}, path: {})", offset,
+ size(), path().native()));
+ }
+ size_t bytes_req = result.size;
+ bytes_req = std::min(bytes_req, size() - offset);
+ if (UNLIKELY(bytes_req == 0)) {
+ *bytes_read = 0;
+ return Status::OK();
+ }
+ Status cache_st = _read_from_cache(offset, result, bytes_read, io_ctx);
+ if (UNLIKELY(!cache_st.ok())) {
+ if (config::file_cache_wait_sec_after_fail > 0) {
+ // only for debug, wait and retry to load data from file cache
+ // return error if failed again
+ LOG(WARNING) << "Failed to read data from file cache, and wait "
+ << config::file_cache_wait_sec_after_fail
+ << " seconds to reload data: " <<
cache_st.to_string();
+ sleep(config::file_cache_wait_sec_after_fail);
+ cache_st = _read_from_cache(offset, result, bytes_read, io_ctx);
+ } else {
+ // fail over to remote file reader, and return the status of
remote read
+ LOG(WARNING) << "Failed to read data from file cache, and fail
over to remote file: "
+ << cache_st.to_string();
+ return _remote_file_reader->read_at(offset, result, bytes_read,
io_ctx);
+ }
+ }
+ return cache_st;
+}
+
void CachedRemoteFileReader::_update_state(const ReadStatistics& read_stats,
FileCacheStatistics* statis) const {
if (statis == nullptr) {
diff --git a/be/src/io/cache/block/cached_remote_file_reader.h
b/be/src/io/cache/block/cached_remote_file_reader.h
index 51e9e562a2..7a8e0e82ec 100644
--- a/be/src/io/cache/block/cached_remote_file_reader.h
+++ b/be/src/io/cache/block/cached_remote_file_reader.h
@@ -77,6 +77,9 @@ private:
int64_t local_write_timer = 0;
};
void _update_state(const ReadStatistics& stats, FileCacheStatistics*
state) const;
+
+ Status _read_from_cache(size_t offset, Slice result, size_t* bytes_read,
+ const IOContext* io_ctx);
};
} // namespace io
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]