This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push: new 358f0f172 KUDU-613: Cleanup of cache code 358f0f172 is described below commit 358f0f172609e1a0ea359eb4e8118d0584926b3d Author: Mahesh Reddy <mre...@cloudera.com> AuthorDate: Tue Feb 6 15:04:38 2024 -0500 KUDU-613: Cleanup of cache code This patch moves some classes out of the anonymous namespace and into the headers of the cache and nvm_cache files. These classes will be used by the new SLRU cache. This path also templatizes the HandleTable class to be used by both the cache and nvm_cache files. Change-Id: I506d4577c0ae873b01d7fa4f53846d6fd0f664cf Reviewed-on: http://gerrit.cloudera.org:8080/21018 Tested-by: Kudu Jenkins Reviewed-by: Alexey Serbin <ale...@apache.org> --- src/kudu/util/cache.cc | 147 +++------------------------------------- src/kudu/util/cache.h | 151 +++++++++++++++++++++++++++++++++++++---- src/kudu/util/file_cache.cc | 1 + src/kudu/util/nvm_cache.cc | 162 +++++++------------------------------------- src/kudu/util/nvm_cache.h | 32 +++++++++ 5 files changed, 206 insertions(+), 287 deletions(-) diff --git a/src/kudu/util/cache.cc b/src/kudu/util/cache.cc index d141caca7..c0156fe12 100644 --- a/src/kudu/util/cache.cc +++ b/src/kudu/util/cache.cc @@ -45,6 +45,7 @@ DEFINE_double(cache_memtracker_approximation_ratio, 0.01, "this ratio to improve performance. For tests."); TAG_FLAG(cache_memtracker_approximation_ratio, hidden); +using RLHandle = kudu::Cache::RLHandle; using std::atomic; using std::shared_ptr; using std::string; @@ -68,130 +69,6 @@ const Cache::IterationFunc Cache::kIterateOverAllEntriesFunc = []( namespace { -// Recency list cache implementations (FIFO, LRU, etc.) - -// Recency list handle. An entry is a variable length heap-allocated structure. -// Entries are kept in a circular doubly linked list ordered by some recency -// criterion (e.g., access time for LRU policy, insertion time for FIFO policy). -struct RLHandle { - Cache::EvictionCallback* eviction_callback; - RLHandle* next_hash; - RLHandle* next; - RLHandle* prev; - size_t charge; // TODO(opt): Only allow uint32_t? - uint32_t key_length; - uint32_t val_length; - std::atomic<int32_t> refs; - uint32_t hash; // Hash of key(); used for fast sharding and comparisons - - // The storage for the key/value pair itself. The data is stored as: - // [key bytes ...] [padding up to 8-byte boundary] [value bytes ...] - uint8_t kv_data[1]; // Beginning of key/value pair - - Slice key() const { - return Slice(kv_data, key_length); - } - - uint8_t* mutable_val_ptr() { - int val_offset = KUDU_ALIGN_UP(key_length, sizeof(void*)); - return &kv_data[val_offset]; - } - - const uint8_t* val_ptr() const { - return const_cast<RLHandle*>(this)->mutable_val_ptr(); - } - - Slice value() const { - return Slice(val_ptr(), val_length); - } -}; - -// We provide our own simple hash table since it removes a whole bunch -// of porting hacks and is also faster than some of the built-in hash -// table implementations in some of the compiler/runtime combinations -// we have tested. E.g., readrandom speeds up by ~5% over the g++ -// 4.4.3's builtin hashtable. -class HandleTable { - public: - HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); } - ~HandleTable() { delete[] list_; } - - RLHandle* Lookup(const Slice& key, uint32_t hash) { - return *FindPointer(key, hash); - } - - RLHandle* Insert(RLHandle* h) { - RLHandle** ptr = FindPointer(h->key(), h->hash); - RLHandle* old = *ptr; - h->next_hash = (old == nullptr ? nullptr : old->next_hash); - *ptr = h; - if (old == nullptr) { - ++elems_; - if (elems_ > length_) { - // Since each cache entry is fairly large, we aim for a small - // average linked list length (<= 1). - Resize(); - } - } - return old; - } - - RLHandle* Remove(const Slice& key, uint32_t hash) { - RLHandle** ptr = FindPointer(key, hash); - RLHandle* result = *ptr; - if (result != nullptr) { - *ptr = result->next_hash; - --elems_; - } - return result; - } - - private: - // The table consists of an array of buckets where each bucket is - // a linked list of cache entries that hash into the bucket. - uint32_t length_; - uint32_t elems_; - RLHandle** list_; - - // Return a pointer to slot that points to a cache entry that - // matches key/hash. If there is no such cache entry, return a - // pointer to the trailing slot in the corresponding linked list. - RLHandle** FindPointer(const Slice& key, uint32_t hash) { - RLHandle** ptr = &list_[hash & (length_ - 1)]; - while (*ptr != nullptr && - ((*ptr)->hash != hash || key != (*ptr)->key())) { - ptr = &(*ptr)->next_hash; - } - return ptr; - } - - void Resize() { - uint32_t new_length = 16; - while (new_length < elems_ * 1.5) { - new_length *= 2; - } - auto new_list = new RLHandle*[new_length]; - memset(new_list, 0, sizeof(new_list[0]) * new_length); - uint32_t count = 0; - for (uint32_t i = 0; i < length_; i++) { - RLHandle* h = list_[i]; - while (h != nullptr) { - RLHandle* next = h->next_hash; - uint32_t hash = h->hash; - RLHandle** ptr = &new_list[hash & (new_length - 1)]; - h->next_hash = *ptr; - *ptr = h; - h = next; - count++; - } - } - DCHECK_EQ(elems_, count); - delete[] list_; - list_ = new_list; - length_ = new_length; - } -}; - string ToString(Cache::EvictionPolicy p) { switch (p) { case Cache::EvictionPolicy::FIFO: @@ -212,7 +89,7 @@ class CacheShard { explicit CacheShard(MemTracker* tracker); ~CacheShard(); - // Separate from constructor so caller can easily make an array of CacheShard + // Separate from constructor so caller can easily make an array of CacheShard. void SetCapacity(size_t capacity) { capacity_ = capacity; max_deferred_consumption_ = capacity * FLAGS_cache_memtracker_approximation_ratio; @@ -270,10 +147,10 @@ class CacheShard { size_t usage_; // Dummy head of recency list. - // rl.prev is newest entry, rl.next is oldest entry. + // rl.prev is the newest entry, rl.next is the oldest entry. RLHandle rl_; - HandleTable table_; + Cache::HandleTable<RLHandle> table_; MemTracker* mem_tracker_; atomic<int64_t> deferred_consumption_ { 0 }; @@ -403,7 +280,7 @@ Cache::Handle* CacheShard<policy>::Lookup(const Slice& key, } } - // Do the metrics outside of the lock. + // Do the metrics outside the lock. UpdateMetricsLookup(e != nullptr, caching); return reinterpret_cast<Cache::Handle*>(e); @@ -422,8 +299,7 @@ template<Cache::EvictionPolicy policy> Cache::Handle* CacheShard<policy>::Insert( RLHandle* handle, Cache::EvictionCallback* eviction_callback) { - // Set the remaining RLHandle members which were not already allocated during - // Allocate(). + // Set the remaining RLHandle members which were not already allocated during Allocate(). handle->eviction_callback = eviction_callback; // Two refs for the handle: one from CacheShard, one for the returned handle. handle->refs.store(2, std::memory_order_relaxed); @@ -459,8 +335,7 @@ Cache::Handle* CacheShard<policy>::Insert( } } - // we free the entries here outside of mutex for - // performance reasons + // We free the entries here outside the mutex for performance reasons. while (to_remove_head != nullptr) { RLHandle* next = to_remove_head->next; FreeEntry(to_remove_head); @@ -482,8 +357,8 @@ void CacheShard<policy>::Erase(const Slice& key, uint32_t hash) { last_reference = Unref(e); } } - // mutex not held here - // last_reference will only be true if e != NULL + // Mutex not held here. + // last_reference will only be true if e != NULL. if (last_reference) { FreeEntry(e); } @@ -524,7 +399,7 @@ size_t CacheShard<policy>::Invalidate(const Cache::InvalidationControl& ctl) { } // Once removed from the lookup table and the recency list, the entries // with no references left must be deallocated because Cache::Release() - // wont be called for them from elsewhere. + // won't be called for them from elsewhere. while (to_remove_head != nullptr) { RLHandle* next = to_remove_head->next; FreeEntry(to_remove_head); @@ -602,7 +477,7 @@ class ShardedCache : public Cache { } UniqueHandle Insert(UniquePendingHandle handle, - Cache::EvictionCallback* eviction_callback) override { + EvictionCallback* eviction_callback) override { RLHandle* h = reinterpret_cast<RLHandle*>(DCHECK_NOTNULL(handle.release())); return UniqueHandle( shards_[Shard(h->hash)]->Insert(h, eviction_callback), diff --git a/src/kudu/util/cache.h b/src/kudu/util/cache.h index b72f0b48c..61d664d04 100644 --- a/src/kudu/util/cache.h +++ b/src/kudu/util/cache.h @@ -17,15 +17,20 @@ #pragma once +#include <atomic> #include <cstddef> #include <cstdint> +#include <cstring> #include <functional> #include <iosfwd> #include <memory> #include <string> #include <utility> +#include <glog/logging.h> + #include "kudu/gutil/macros.h" +#include "kudu/util/alignment.h" #include "kudu/util/slice.h" namespace kudu { @@ -41,8 +46,7 @@ class Cache { }; // Supported eviction policies for the cache. Eviction policy determines what - // items to evict if the cache is at capacity when trying to accommodate - // an extra item. + // items to evict if the cache is at capacity when trying to accommodate an extra item. enum class EvictionPolicy { // The earliest added items are evicted (a.k.a. queue). FIFO, @@ -51,14 +55,138 @@ class Cache { LRU, }; - // Callback interface which is called when an entry is evicted from the - // cache. + // Callback interface which is called when an entry is evicted from the cache. class EvictionCallback { public: virtual ~EvictionCallback() = default; virtual void EvictedEntry(Slice key, Slice value) = 0; }; + // Recency list cache implementations (FIFO, LRU, etc.) + + // Recency list handle. An entry is a variable length heap-allocated structure. + // Entries are kept in a circular doubly linked list ordered by some recency + // criterion (e.g., access time for LRU policy, insertion time for FIFO policy). + struct RLHandle { + EvictionCallback* eviction_callback; + RLHandle* next_hash; + RLHandle* next; + RLHandle* prev; + size_t charge; // TODO(opt): Only allow uint32_t? + uint32_t key_length; + uint32_t val_length; + std::atomic<int32_t> refs; + uint32_t hash; // Hash of key(); used for fast sharding and comparisons + + // The storage for the key/value pair itself. The data is stored as: + // [key bytes ...] [padding up to 8-byte boundary] [value bytes ...] + uint8_t kv_data[1]; // Beginning of key/value pair + + Slice key() const { + return Slice(kv_data, key_length); + } + + uint8_t* mutable_val_ptr() { + int val_offset = KUDU_ALIGN_UP(key_length, sizeof(void*)); + return &kv_data[val_offset]; + } + + const uint8_t* val_ptr() const { + return const_cast<RLHandle*>(this)->mutable_val_ptr(); + } + + Slice value() const { + return Slice(val_ptr(), val_length); + } + }; + + // We provide our own simple hash table since it removes a bunch + // of porting hacks and is also faster than some built-in hash + // table implementations in some compiler/runtime combinations + // we have tested. E.g., readrandom speeds up by ~5% over the g++ + // 4.4.3's builtin hashtable. + template <typename HandleType> + class HandleTable { + public: + HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); } + ~HandleTable() { delete[] list_; } + + HandleType* Lookup(const Slice& key, uint32_t hash) { + return *FindPointer(key, hash); + } + + HandleType* Insert(HandleType* h) { + HandleType** ptr = FindPointer(h->key(), h->hash); + HandleType* old = *ptr; + h->next_hash = (old == nullptr ? nullptr : old->next_hash); + *ptr = h; + if (old == nullptr) { + ++elems_; + if (elems_ > length_) { + // Since each cache entry is fairly large, we aim for a small + // average linked list length (<= 1). + Resize(); + } + } + return old; + } + + HandleType* Remove(const Slice& key, uint32_t hash) { + HandleType** ptr = FindPointer(key, hash); + HandleType* result = *ptr; + if (result != nullptr) { + *ptr = result->next_hash; + --elems_; + } + return result; + } + + private: + // The table consists of an array of buckets where each bucket is + // a linked list of cache entries that hash into the bucket. + uint32_t length_; + uint32_t elems_; + HandleType** list_; + + // Return a pointer to slot that points to a cache entry that + // matches key/hash. If there is no such cache entry, return a + // pointer to the trailing slot in the corresponding linked list. + HandleType** FindPointer(const Slice& key, uint32_t hash) { + HandleType** ptr = &list_[hash & (length_ - 1)]; + while (*ptr != nullptr && + ((*ptr)->hash != hash || key != (*ptr)->key())) { + ptr = &(*ptr)->next_hash; + } + return ptr; + } + + void Resize() { + uint32_t new_length = 16; + while (new_length < elems_ * 1.5) { + new_length *= 2; + } + auto* new_list = new HandleType*[new_length]; + memset(new_list, 0, sizeof(new_list[0]) * new_length); + uint32_t count = 0; + for (uint32_t i = 0; i < length_; i++) { + HandleType* h = list_[i]; + while (h != nullptr) { + HandleType* next = h->next_hash; + uint32_t hash = h->hash; + HandleType** ptr = &new_list[hash & (new_length - 1)]; + h->next_hash = *ptr; + *ptr = h; + h = next; + count++; + } + } + DCHECK_EQ(elems_, count); + delete[] list_; + list_ = new_list; + length_ = new_length; + } + }; + Cache() = default; // Destroys all existing entries by calling the "deleter" @@ -94,7 +222,7 @@ class Cache { : c_(c) { } - void operator()(Cache::Handle* h) const { + void operator()(Handle* h) const { if (h != nullptr) { c_->Release(h); } @@ -122,7 +250,7 @@ class Cache { : c_(c) { } - void operator()(Cache::PendingHandle* h) const { + void operator()(PendingHandle* h) const { if (h != nullptr) { c_->Free(h); } @@ -141,7 +269,7 @@ class Cache { typedef std::unique_ptr<PendingHandle, PendingHandleDeleter> UniquePendingHandle; // Passing EXPECT_IN_CACHE will increment the hit/miss metrics that track the number of times - // blocks were requested that the users were hoping to get the block from the cache, along with + // blocks were requested that the users were hoping to get the block from the cache, along // with the basic metrics. // Passing NO_EXPECT_IN_CACHE will only increment the basic metrics. // This helps in determining if we are effectively caching the blocks that matter the most. @@ -179,8 +307,7 @@ class Cache { // to it have been released. virtual void Erase(const Slice& key) = 0; - // Return the value encapsulated in a raw handle returned by a successful - // Lookup(). + // Return the value encapsulated in a raw handle returned by a successful Lookup(). virtual Slice Value(const UniqueHandle& handle) const = 0; @@ -188,7 +315,7 @@ class Cache { // Insertion path // ------------------------------------------------------------ // - // Because some cache implementations (eg NVM) manage their own memory, and because we'd + // Because some cache implementations (e.g. NVM) manage their own memory, and because we'd // like to read blocks directly into cache-managed memory rather than causing an extra // memcpy, the insertion of a new element into the cache requires two phases. First, a // PendingHandle is allocated with space for the value, and then it is later inserted. @@ -216,7 +343,7 @@ class Cache { // // If 'charge' is not 'kAutomaticCharge', then the cache capacity will be charged // the explicit amount. This is useful when caching items that are small but need to - // maintain a bounded count (eg file descriptors) rather than caring about their actual + // maintain a bounded count (e.g. file descriptors) rather than caring about their actual // memory usage. It is also useful when caching items for whom calculating // memory usage is a complex affair (i.e. items containing pointers to // additional heap allocations). @@ -258,7 +385,7 @@ class Cache { // Invalidate cache's entries, effectively evicting non-valid ones from the // cache. The invalidation process iterates over the cache's recency list(s), - // from best candidate for eviction to the worst. + // from the best candidate for eviction to the worst. // // The provided control structure 'ctl' is responsible for the following: // * determine whether an entry is valid or not diff --git a/src/kudu/util/file_cache.cc b/src/kudu/util/file_cache.cc index a59274b32..61a7beecb 100644 --- a/src/kudu/util/file_cache.cc +++ b/src/kudu/util/file_cache.cc @@ -25,6 +25,7 @@ #include <mutex> #include <ostream> #include <string> +#include <type_traits> #include <utility> #include <vector> diff --git a/src/kudu/util/nvm_cache.cc b/src/kudu/util/nvm_cache.cc index 97ea5652f..7784a95ed 100644 --- a/src/kudu/util/nvm_cache.cc +++ b/src/kudu/util/nvm_cache.cc @@ -33,10 +33,7 @@ #include <gflags/gflags.h> #include <glog/logging.h> -#include "kudu/gutil/atomic_refcount.h" -#include "kudu/gutil/atomicops.h" #include "kudu/gutil/bits.h" -#include "kudu/gutil/dynamic_annotations.h" #include "kudu/gutil/hash/city.h" #include "kudu/gutil/macros.h" #include "kudu/gutil/port.h" @@ -77,6 +74,7 @@ DEFINE_double(nvm_cache_usage_ratio, 1.25, "the ratio."); TAG_FLAG(nvm_cache_usage_ratio, advanced); +using kudu::util::LRUHandle; using std::string; using std::unique_ptr; using std::vector; @@ -188,126 +186,13 @@ typedef simple_spinlock MutexType; // LRU cache implementation -// An entry is a variable length heap-allocated structure. Entries -// are kept in a circular doubly linked list ordered by access time. -struct LRUHandle { - Cache::EvictionCallback* eviction_callback; - LRUHandle* next_hash; - LRUHandle* next; - LRUHandle* prev; - size_t charge; // TODO(opt): Only allow uint32_t? - uint32_t key_length; - uint32_t val_length; - Atomic32 refs; - uint32_t hash; // Hash of key(); used for fast sharding and comparisons - uint8_t* kv_data; - - Slice key() const { - return Slice(kv_data, key_length); - } - - Slice value() const { - return Slice(&kv_data[key_length], val_length); - } - - uint8_t* val_ptr() { - return &kv_data[key_length]; - } -}; - -// We provide our own simple hash table since it removes a whole bunch -// of porting hacks and is also faster than some of the built-in hash -// table implementations in some of the compiler/runtime combinations -// we have tested. E.g., readrandom speeds up by ~5% over the g++ -// 4.4.3's builtin hashtable. -class HandleTable { - public: - HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); } - ~HandleTable() { delete[] list_; } - - LRUHandle* Lookup(const Slice& key, uint32_t hash) { - return *FindPointer(key, hash); - } - - LRUHandle* Insert(LRUHandle* h) { - LRUHandle** ptr = FindPointer(h->key(), h->hash); - LRUHandle* old = *ptr; - h->next_hash = (old == nullptr ? nullptr : old->next_hash); - *ptr = h; - if (old == nullptr) { - ++elems_; - if (elems_ > length_) { - // Since each cache entry is fairly large, we aim for a small - // average linked list length (<= 1). - Resize(); - } - } - return old; - } - - LRUHandle* Remove(const Slice& key, uint32_t hash) { - LRUHandle** ptr = FindPointer(key, hash); - LRUHandle* result = *ptr; - if (result != nullptr) { - *ptr = result->next_hash; - --elems_; - } - return result; - } - - private: - // The table consists of an array of buckets where each bucket is - // a linked list of cache entries that hash into the bucket. - uint32_t length_; - uint32_t elems_; - LRUHandle** list_; - - // Return a pointer to slot that points to a cache entry that - // matches key/hash. If there is no such cache entry, return a - // pointer to the trailing slot in the corresponding linked list. - LRUHandle** FindPointer(const Slice& key, uint32_t hash) { - LRUHandle** ptr = &list_[hash & (length_ - 1)]; - while (*ptr != nullptr && - ((*ptr)->hash != hash || key != (*ptr)->key())) { - ptr = &(*ptr)->next_hash; - } - return ptr; - } - - void Resize() { - uint32_t new_length = 16; - while (new_length < elems_ * 1.5) { - new_length *= 2; - } - LRUHandle** new_list = new LRUHandle*[new_length]; - memset(new_list, 0, sizeof(new_list[0]) * new_length); - uint32_t count = 0; - for (uint32_t i = 0; i < length_; i++) { - LRUHandle* h = list_[i]; - while (h != nullptr) { - LRUHandle* next = h->next_hash; - uint32_t hash = h->hash; - LRUHandle** ptr = &new_list[hash & (new_length - 1)]; - h->next_hash = *ptr; - *ptr = h; - h = next; - count++; - } - } - DCHECK_EQ(elems_, count); - delete[] list_; - list_ = new_list; - length_ = new_length; - } -}; - // A single shard of sharded cache. class NvmLRUCache { public: explicit NvmLRUCache(memkind *vmp); ~NvmLRUCache(); - // Separate from constructor so caller can easily make an array of LRUCache + // Separate from constructor so caller can easily make an array of LRUCache. void SetCapacity(size_t capacity) { capacity_ = capacity; } void SetMetrics(CacheMetrics* metrics) { metrics_ = metrics; } @@ -325,16 +210,15 @@ class NvmLRUCache { void NvmLRU_Remove(LRUHandle* e); void NvmLRU_Append(LRUHandle* e); // Just reduce the reference count by 1. - // Return true if last reference - bool Unref(LRUHandle* e); + // Return true if last reference. + static bool Unref(LRUHandle* e); void FreeEntry(LRUHandle* e); // Evict the LRU item in the cache, adding it to the linked list // pointed to by 'to_remove_head'. void EvictOldestUnlocked(LRUHandle** to_remove_head); - // Free all of the entries in the linked list that has to_free_head - // as its head. + // Free all the entries in the linked list that has to_free_head as its head. void FreeLRUEntries(LRUHandle* to_free_head); // Wrapper around memkind_malloc which injects failures based on a flag. @@ -351,7 +235,7 @@ class NvmLRUCache { // lru.prev is newest entry, lru.next is oldest entry. LRUHandle lru_; - HandleTable table_; + Cache::HandleTable<LRUHandle> table_; memkind* vmp_; @@ -362,7 +246,7 @@ NvmLRUCache::NvmLRUCache(memkind* vmp) : usage_(0), vmp_(vmp), metrics_(nullptr) { - // Make empty circular linked list + // Make empty circular linked list. lru_.next = &lru_; lru_.prev = &lru_; } @@ -370,7 +254,8 @@ NvmLRUCache::NvmLRUCache(memkind* vmp) NvmLRUCache::~NvmLRUCache() { for (LRUHandle* e = lru_.next; e != &lru_; ) { LRUHandle* next = e->next; - DCHECK_EQ(e->refs, 1); // Error if caller has an unreleased handle + // Error if caller has an unreleased handle. + DCHECK_EQ(e->refs.load(std::memory_order_relaxed), 1); if (Unref(e)) { FreeEntry(e); } @@ -386,12 +271,12 @@ void* NvmLRUCache::MemkindMalloc(size_t size) { } bool NvmLRUCache::Unref(LRUHandle* e) { - DCHECK_GT(ANNOTATE_UNPROTECTED_READ(e->refs), 0); - return !base::RefCountDec(&e->refs); + DCHECK_GT(e->refs.load(std::memory_order_relaxed), 0); + return e->refs.fetch_sub(1) == 1; } void NvmLRUCache::FreeEntry(LRUHandle* e) { - DCHECK_EQ(ANNOTATE_UNPROTECTED_READ(e->refs), 0); + DCHECK_EQ(e->refs.load(std::memory_order_relaxed), 0); if (e->eviction_callback) { e->eviction_callback->EvictedEntry(e->key(), e->value()); } @@ -414,7 +299,7 @@ void NvmLRUCache::NvmLRU_Remove(LRUHandle* e) { } void NvmLRUCache::NvmLRU_Append(LRUHandle* e) { - // Make "e" newest entry by inserting just before lru_ + // Make "e" newest entry by inserting just before lru_. e->next = &lru_; e->prev = lru_.prev; e->prev->next = e; @@ -423,20 +308,20 @@ void NvmLRUCache::NvmLRU_Append(LRUHandle* e) { } Cache::Handle* NvmLRUCache::Lookup(const Slice& key, uint32_t hash, bool caching) { - LRUHandle* e; + LRUHandle* e; { std::lock_guard<MutexType> l(mutex_); e = table_.Lookup(key, hash); if (e != nullptr) { // If an entry exists, remove the old entry from the cache // and re-add to the end of the linked list. - base::RefCountInc(&e->refs); + e->refs.fetch_add(1, std::memory_order_relaxed); NvmLRU_Remove(e); NvmLRU_Append(e); } } - // Do the metrics outside of the lock. + // Do the metrics outside the lock. if (metrics_) { metrics_->lookups->Increment(); bool was_hit = (e != nullptr); @@ -489,7 +374,8 @@ Cache::Handle* NvmLRUCache::Insert(LRUHandle* e, DCHECK(e); LRUHandle* to_remove_head = nullptr; - e->refs = 2; // One from LRUCache, one for the returned handle + // One from LRUCache, one for the returned handle. + e->refs.store(2, std::memory_order_relaxed); e->eviction_callback = eviction_callback; if (PREDICT_TRUE(metrics_)) { metrics_->cache_usage->IncrementBy(e->charge); @@ -515,8 +401,7 @@ Cache::Handle* NvmLRUCache::Insert(LRUHandle* e, } } - // we free the entries here outside of mutex for - // performance reasons + // We free the entries here outside the mutex for performance reasons. FreeLRUEntries(to_remove_head); return reinterpret_cast<Cache::Handle*>(e); @@ -533,8 +418,8 @@ void NvmLRUCache::Erase(const Slice& key, uint32_t hash) { last_reference = Unref(e); } } - // mutex not held here - // last_reference will only be true if e != nullptr + // Mutex not held here. + // last_reference will only be true if e != nullptr. if (last_reference) { FreeEntry(e); } @@ -654,7 +539,7 @@ class ShardedLRUCache : public Cache { return reinterpret_cast<const LRUHandle*>(handle.get())->value(); } uint8_t* MutableValue(UniquePendingHandle* handle) override { - return reinterpret_cast<LRUHandle*>(handle->get())->val_ptr(); + return reinterpret_cast<LRUHandle*>(handle->get())->mutable_val_ptr(); } void SetMetrics(unique_ptr<CacheMetrics> metrics, @@ -674,8 +559,7 @@ class ShardedLRUCache : public Cache { DCHECK_GE(val_len, 0); // Try allocating from each of the shards -- if memkind is tight, - // this can cause eviction, so we might have better luck in different - // shards. + // this can cause eviction, so we might have better luck in different shards. for (const auto& shard : shards_) { UniquePendingHandle ph(static_cast<PendingHandle*>( shard->Allocate(sizeof(LRUHandle) + key_len + val_len)), diff --git a/src/kudu/util/nvm_cache.h b/src/kudu/util/nvm_cache.h index 4a302f2e6..d1a89801a 100644 --- a/src/kudu/util/nvm_cache.h +++ b/src/kudu/util/nvm_cache.h @@ -16,14 +16,46 @@ // under the License. #pragma once +#include <atomic> #include <cstddef> +#include <cstdint> #include <string> #include <glog/logging.h> #include "kudu/util/cache.h" +#include "kudu/util/slice.h" namespace kudu { +namespace util { + +// An entry is a variable length heap-allocated structure. Entries +// are kept in a circular doubly linked list ordered by access time. +struct LRUHandle { + Cache::EvictionCallback* eviction_callback; + LRUHandle* next_hash; + LRUHandle* next; + LRUHandle* prev; + size_t charge; // TODO(opt): Only allow uint32_t? + uint32_t key_length; + uint32_t val_length; + std::atomic<int32_t> refs; + uint32_t hash; // Hash of key(); used for fast sharding and comparisons + uint8_t* kv_data; + + Slice key() const { + return Slice(kv_data, key_length); + } + + Slice value() const { + return Slice(&kv_data[key_length], val_length); + } + + uint8_t* mutable_val_ptr() const { + return &kv_data[key_length]; + } +}; +} // namespace util // Convenience macro for invoking CanUseNVMCacheForTests. #define RETURN_IF_NO_NVM_CACHE(memory_type) do { \