This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3acf026172 [Enhancement](inverted index) add bkd index query cache to 
improve perf (#23952)
3acf026172 is described below

commit 3acf0261724af7add13ac52269d19eac2e192f18
Author: airborne12 <[email protected]>
AuthorDate: Thu Sep 7 10:24:27 2023 +0800

    [Enhancement](inverted index) add bkd index query cache to improve perf 
(#23952)
    
    use query cache to accelerate bkd query performance, especially for high 
concurrency.
---
 .../olap/rowset/segment_v2/inverted_index_cache.h  |   5 +-
 .../rowset/segment_v2/inverted_index_reader.cpp    | 105 +++++++++++++--------
 .../olap/rowset/segment_v2/inverted_index_reader.h |  13 ++-
 3 files changed, 77 insertions(+), 46 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/inverted_index_cache.h 
b/be/src/olap/rowset/segment_v2/inverted_index_cache.h
index 388ee02ee9..c67e17ddda 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_cache.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_cache.h
@@ -199,7 +199,7 @@ public:
         io::Path index_path;               // index file path
         std::string column_name;           // column name
         InvertedIndexQueryType query_type; // query type
-        std::wstring value;                // query value
+        std::string value;                 // query value
 
         // Encode to a flat binary which can be used as LRUCache's key
         std::string encode() const {
@@ -213,8 +213,7 @@ public:
             }
             key_buf.append(query_type_str);
             key_buf.append("/");
-            auto str = lucene_wcstoutf8string(value.c_str(), value.length());
-            key_buf.append(str);
+            key_buf.append(value);
             return key_buf;
         }
     };
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index c2a622699a..a521da394c 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -53,7 +53,6 @@
 #include "io/fs/file_system.h"
 #include "olap/key_coder.h"
 #include "olap/olap_common.h"
-#include "olap/rowset/segment_v2/inverted_index_cache.h"
 #include "olap/rowset/segment_v2/inverted_index_compound_directory.h"
 #include "olap/rowset/segment_v2/inverted_index_desc.h"
 #include "olap/types.h"
@@ -178,7 +177,7 @@ Status 
InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach
                                                                             
_index_meta.index_id());
         auto index_file_path = index_dir / index_file_name;
         InvertedIndexQueryCache::CacheKey cache_key {
-                index_file_path, "", InvertedIndexQueryType::UNKNOWN_QUERY, 
L"null_bitmap"};
+                index_file_path, "", InvertedIndexQueryType::UNKNOWN_QUERY, 
"null_bitmap"};
         auto cache = InvertedIndexQueryCache::instance();
         if (cache->lookup(cache_key, cache_handle)) {
             return Status::OK();
@@ -331,9 +330,9 @@ Status FullTextIndexReader::query(OlapReaderStatistics* 
stats, const std::string
         roaring::Roaring query_match_bitmap;
         bool null_bitmap_already_read = false;
         if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) {
-            std::wstring str_tokens;
+            std::wstring wstr_tokens;
             for (auto& token : analyse_result) {
-                str_tokens += token;
+                wstr_tokens += token;
             }
 
             auto cache = InvertedIndexQueryCache::instance();
@@ -341,6 +340,7 @@ Status FullTextIndexReader::query(OlapReaderStatistics* 
stats, const std::string
             cache_key.index_path = index_file_path;
             cache_key.column_name = column_name;
             cache_key.query_type = InvertedIndexQueryType::MATCH_PHRASE_QUERY;
+            auto str_tokens = lucene_wcstoutf8string(wstr_tokens.c_str(), 
wstr_tokens.length());
             cache_key.value.swap(str_tokens);
             InvertedIndexQueryCacheHandle cache_handle;
             std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr;
@@ -375,9 +375,10 @@ Status FullTextIndexReader::query(OlapReaderStatistics* 
stats, const std::string
                 // try to get term bitmap match result from cache to avoid 
query index on cache hit
                 auto cache = InvertedIndexQueryCache::instance();
                 // use EQUAL_QUERY type here since cache is for each term/token
-                InvertedIndexQueryCache::CacheKey cache_key {index_file_path, 
column_name,
-                                                             
InvertedIndexQueryType::EQUAL_QUERY,
-                                                             token_ws};
+                auto token = lucene_wcstoutf8string(token_ws.c_str(), 
token_ws.length());
+
+                InvertedIndexQueryCache::CacheKey cache_key {
+                        index_file_path, column_name, 
InvertedIndexQueryType::EQUAL_QUERY, token};
                 VLOG_DEBUG << "cache_key:" << cache_key.encode();
                 InvertedIndexQueryCacheHandle cache_handle;
                 if (cache->lookup(cache_key, &cache_handle)) {
@@ -474,7 +475,7 @@ Status 
StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats,
 
     // try to get query bitmap result from cache and return immediately on 
cache hit
     InvertedIndexQueryCache::CacheKey cache_key {index_file_path, column_name, 
query_type,
-                                                 search_str_ws};
+                                                 search_str};
     auto cache = InvertedIndexQueryCache::instance();
     InvertedIndexQueryCacheHandle cache_handle;
     if (cache->lookup(cache_key, &cache_handle)) {
@@ -593,6 +594,7 @@ BkdIndexReader::BkdIndexReader(io::FileSystemSPtr fs, const 
std::string& path,
         LOG(WARNING) << "bkd index: " << index_file.string() << " not exist.";
         return;
     }
+    _file_full_path = index_file;
     _compoundReader = std::make_unique<DorisCompoundReader>(
             DorisCompoundDirectory::getDirectory(fs, index_dir.c_str()), 
index_file_name.c_str(),
             config::inverted_index_read_buffer_size);
@@ -606,9 +608,8 @@ Status BkdIndexReader::new_iterator(OlapReaderStatistics* 
stats,
 
 Status BkdIndexReader::bkd_query(OlapReaderStatistics* stats, const 
std::string& column_name,
                                  const void* query_value, 
InvertedIndexQueryType query_type,
-                                 
std::shared_ptr<lucene::util::bkd::bkd_reader>& r,
+                                 
std::shared_ptr<lucene::util::bkd::bkd_reader> r,
                                  InvertedIndexVisitor* visitor) {
-    RETURN_IF_ERROR(get_bkd_reader(r));
     char tmp[r->bytes_per_dim_];
     switch (query_type) {
     case InvertedIndexQueryType::EQUAL_QUERY: {
@@ -643,6 +644,20 @@ Status BkdIndexReader::try_query(OlapReaderStatistics* 
stats, const std::string&
                                  uint32_t* count) {
     auto visitor = std::make_unique<InvertedIndexVisitor>(nullptr, query_type, 
true);
     std::shared_ptr<lucene::util::bkd::bkd_reader> r;
+    RETURN_IF_ERROR(get_bkd_reader(&r));
+    std::string query_str;
+    _value_key_coder->full_encode_ascending(query_value, &query_str);
+
+    InvertedIndexQueryCache::CacheKey cache_key {_file_full_path, column_name, 
query_type,
+                                                 query_str};
+    auto cache = InvertedIndexQueryCache::instance();
+    InvertedIndexQueryCacheHandle cache_handler;
+    roaring::Roaring bit_map;
+    auto cache_status = handle_cache(cache, cache_key, &cache_handler, stats, 
&bit_map);
+    if (cache_status.ok()) {
+        *count = bit_map.cardinality();
+        return Status::OK();
+    }
     try {
         auto st = bkd_query(stats, column_name, query_value, query_type, r, 
visitor.get());
         if (!st.ok()) {
@@ -662,34 +677,42 @@ Status BkdIndexReader::try_query(OlapReaderStatistics* 
stats, const std::string&
     return Status::OK();
 }
 
+Status BkdIndexReader::handle_cache(InvertedIndexQueryCache* cache,
+                                    const InvertedIndexQueryCache::CacheKey& 
cache_key,
+                                    InvertedIndexQueryCacheHandle* 
cache_handler,
+                                    OlapReaderStatistics* stats, 
roaring::Roaring* bit_map) {
+    if (cache->lookup(cache_key, cache_handler)) {
+        stats->inverted_index_query_cache_hit++;
+        SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer);
+        *bit_map = *cache_handler->get_bitmap();
+        return Status::OK();
+    } else {
+        stats->inverted_index_query_cache_miss++;
+        return Status::Error<ErrorCode::KEY_NOT_FOUND>("cache miss");
+    }
+}
+
 Status BkdIndexReader::query(OlapReaderStatistics* stats, const std::string& 
column_name,
                              const void* query_value, InvertedIndexQueryType 
query_type,
                              roaring::Roaring* bit_map) {
     SCOPED_RAW_TIMER(&stats->inverted_index_query_timer);
 
-    io::Path path(_path);
-    auto index_dir = path.parent_path();
-    auto index_file_name =
-            InvertedIndexDescriptor::get_index_file_name(path.filename(), 
_index_meta.index_id());
-    auto index_file_path = index_dir / index_file_name;
-    // std::string query_str {(const char *)query_value};
-
-    // // try to get query bitmap result from cache and return immediately on 
cache hit
-    // InvertedIndexQueryCache::CacheKey cache_key
-    //     {index_file_path, column_name, query_type, 
std::wstring(query_str.begin(), query_str.end())};
-    // auto cache = InvertedIndexQueryCache::instance();
-    // InvertedIndexQueryCacheHandle cache_handle;
-    // if (cache->lookup(cache_key, &cache_handle)) {
-    //     stats->inverted_index_query_cache_hit++;
-    //     SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer);
-    //     *bit_map = *cache_handle.match_bitmap();
-    //     return Status::OK();
-    // } else {
-    //     stats->inverted_index_query_cache_miss++;
-    // }
-
     auto visitor = std::make_unique<InvertedIndexVisitor>(bit_map, query_type);
     std::shared_ptr<lucene::util::bkd::bkd_reader> r;
+    RETURN_IF_ERROR(get_bkd_reader(&r));
+
+    std::string query_str;
+    _value_key_coder->full_encode_ascending(query_value, &query_str);
+
+    InvertedIndexQueryCache::CacheKey cache_key {_file_full_path, column_name, 
query_type,
+                                                 query_str};
+    auto cache = InvertedIndexQueryCache::instance();
+    InvertedIndexQueryCacheHandle cache_handler;
+    auto cache_status = handle_cache(cache, cache_key, &cache_handler, stats, 
bit_map);
+    if (cache_status.ok()) {
+        return Status::OK();
+    }
+
     try {
         auto st = bkd_query(stats, column_name, query_value, query_type, r, 
visitor.get());
         if (!st.ok()) {
@@ -705,17 +728,17 @@ Status BkdIndexReader::query(OlapReaderStatistics* stats, 
const std::string& col
                 "BKD Query CLuceneError Occurred, error msg: {}", e.what());
     }
 
-    // // add to cache
-    // roaring::Roaring* term_match_bitmap = new roaring::Roaring(*bit_map);
-    // term_match_bitmap->runOptimize();
-    // cache->insert(cache_key, term_match_bitmap, &cache_handle);
+    std::shared_ptr<roaring::Roaring> query_bitmap = 
std::make_shared<roaring::Roaring>(*bit_map);
+    query_bitmap->runOptimize();
+    cache->insert(cache_key, query_bitmap, &cache_handler);
 
     VLOG_DEBUG << "BKD index search column: " << column_name
                << " result: " << bit_map->cardinality();
+
     return Status::OK();
 }
 
-Status 
BkdIndexReader::get_bkd_reader(std::shared_ptr<lucene::util::bkd::bkd_reader>& 
bkdReader) {
+Status 
BkdIndexReader::get_bkd_reader(std::shared_ptr<lucene::util::bkd::bkd_reader>* 
bkdReader) {
     // bkd file reader
     if (_compoundReader == nullptr) {
         return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
@@ -739,18 +762,18 @@ Status 
BkdIndexReader::get_bkd_reader(std::shared_ptr<lucene::util::bkd::bkd_rea
                                                                        
err.what());
     }
 
-    bkdReader = 
std::make_shared<lucene::util::bkd::bkd_reader>(data_in.release());
-    if (0 == bkdReader->read_meta(meta_in.get())) {
+    *bkdReader = 
std::make_shared<lucene::util::bkd::bkd_reader>(data_in.release());
+    if (0 == (*bkdReader)->read_meta(meta_in.get())) {
         VLOG_NOTICE << "bkd index file is empty:" << 
_compoundReader->toString();
         return Status::EndOfFile("bkd index file is empty");
     }
 
-    bkdReader->read_index(index_in.get());
+    (*bkdReader)->read_index(index_in.get());
 
-    _type_info = get_scalar_type_info((FieldType)bkdReader->type);
+    _type_info = get_scalar_type_info((FieldType)(*bkdReader)->type);
     if (_type_info == nullptr) {
         return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
-                "unsupported typeinfo, type={}", bkdReader->type);
+                "unsupported typeinfo, type={}", (*bkdReader)->type);
     }
     _value_key_coder = get_key_coder(_type_info->type());
     return Status::OK();
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index c076756805..5f7b318825 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -29,6 +29,7 @@
 #include "io/fs/file_system.h"
 #include "io/fs/path.h"
 #include "olap/inverted_index_parser.h"
+#include "olap/rowset/segment_v2/inverted_index_cache.h"
 #include "olap/rowset/segment_v2/inverted_index_compound_reader.h"
 #include "olap/rowset/segment_v2/inverted_index_query_type.h"
 #include "olap/tablet_schema.h"
@@ -192,6 +193,9 @@ public:
 class BkdIndexReader : public InvertedIndexReader {
     ENABLE_FACTORY_CREATOR(BkdIndexReader);
 
+private:
+    std::string _file_full_path;
+
 public:
     explicit BkdIndexReader(io::FileSystemSPtr fs, const std::string& path,
                             const TabletIndex* index_meta);
@@ -221,11 +225,16 @@ public:
                      uint32_t* count) override;
     Status bkd_query(OlapReaderStatistics* stats, const std::string& 
column_name,
                      const void* query_value, InvertedIndexQueryType 
query_type,
-                     std::shared_ptr<lucene::util::bkd::bkd_reader>& r,
+                     std::shared_ptr<lucene::util::bkd::bkd_reader> r,
                      InvertedIndexVisitor* visitor);
 
+    Status handle_cache(InvertedIndexQueryCache* cache,
+                        const InvertedIndexQueryCache::CacheKey& cache_key,
+                        InvertedIndexQueryCacheHandle* cache_handler, 
OlapReaderStatistics* stats,
+                        roaring::Roaring* bit_map);
+
     InvertedIndexReaderType type() override;
-    Status get_bkd_reader(std::shared_ptr<lucene::util::bkd::bkd_reader>& 
reader);
+    Status get_bkd_reader(std::shared_ptr<lucene::util::bkd::bkd_reader>* 
reader);
 
 private:
     const TypeInfo* _type_info {};


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to