[ 
https://issues.apache.org/jira/browse/TS-4331?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15351259#comment-15351259
 ] 

ASF GitHub Bot commented on TS-4331:
------------------------------------

Github user JamesPeach commented on a diff in the pull request:

    https://github.com/apache/trafficserver/pull/653#discussion_r68599511
  
    --- Diff: iocore/hostdb/P_RefCountCache.h ---
    @@ -0,0 +1,896 @@
    +/** @file
    +
    +  A cache (with map-esque interface) for RefCountObjs
    +
    +  @section license License
    +
    +  Licensed to the Apache Software Foundation (ASF) under one
    +  or more contributor license agreements.  See the NOTICE file
    +  distributed with this work for additional information
    +  regarding copyright ownership.  The ASF licenses this file
    +  to you under the Apache License, Version 2.0 (the
    +  "License"); you may not use this file except in compliance
    +  with the License.  You may obtain a copy of the License at
    +
    +      http://www.apache.org/licenses/LICENSE-2.0
    +
    +  Unless required by applicable law or agreed to in writing, software
    +  distributed under the License is distributed on an "AS IS" BASIS,
    +  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    +  See the License for the specific language governing permissions and
    +  limitations under the License.
    + */
    +#ifndef _P_RefCountCache_h_
    +#define _P_RefCountCache_h_
    +
    +#include "I_EventSystem.h"
    +#include "P_EventSystem.h" // TODO: less? just need ET_TASK
    +
    +#include <ts/Map.h>
    +#include "ts/PriorityQueue.h"
    +
    +#include <ts/List.h>
    +#include <ts/ink_hrtime.h>
    +
    +#include "ts/Vec.h"
    +#include "ts/I_Version.h"
    +#include <unistd.h>
    +
    +#define REFCOUNT_CACHE_EVENT_SYNC REFCOUNT_CACHE_EVENT_EVENTS_START
    +
    +#define REFCOUNTCACHE_MAGIC_NUMBER 0x0BAD2D9
    +#define REFCOUNTCACHE_MAJOR_VERSION 1
    +#define REFCOUNTCACHE_MINOR_VERSION 0
    +
    +// Stats
    +enum RefCountCache_Stats {
    +  refcountcache_current_items_stat,        // current number of items
    +  refcountcache_current_size_stat,         // current size of cache
    +  refcountcache_total_inserts_stat,        // total items inserted
    +  refcountcache_total_failed_inserts_stat, // total items unable to insert
    +  refcountcache_total_lookups_stat,        // total get() calls
    +  refcountcache_total_hits_stat,           // total hits
    +
    +  // Persistence metrics
    +  refcountcache_last_sync_time,   // seconds since epoch of last 
successful sync
    +  refcountcache_last_total_items, // number of items sync last time
    +  refcountcache_last_total_size,  // total size at last sync
    +
    +  RefCountCache_Stat_Count
    +};
    +
    +struct RefCountCacheItemMeta {
    +  uint64_t key;
    +  unsigned int size;
    +  ink_time_t expiry_time; // expire time as seconds since epoch
    +  RefCountCacheItemMeta(uint64_t key, unsigned int size, int expire_time = 
-1) : key(key), size(size), expiry_time(expire_time) {}
    +};
    +
    +// Layer of indirection for the hashmap-- since it needs lots of things 
inside of it
    +// We'll also use this as the item header, for persisting objects to disk
    +class RefCountCacheHashEntry
    +{
    +public:
    +  Ptr<RefCountObj> item;
    +  LINK(RefCountCacheHashEntry, item_link);
    +
    +  PriorityQueueEntry<RefCountCacheHashEntry *> *expiry_entry;
    +
    +  RefCountCacheItemMeta meta;
    +  void
    +  set(RefCountObj *i, uint64_t key, unsigned int size, int expire_time)
    +  {
    +    this->item = make_ptr(i);
    +    this->meta = RefCountCacheItemMeta(key, size, expire_time);
    +  };
    +  // Need a no-argument constructor to use the classAllocator
    +  RefCountCacheHashEntry() : expiry_entry(NULL), meta(0, 0){};
    +
    +  // make these values comparable -- so we can sort them
    +  bool
    +  operator<(const RefCountCacheHashEntry &v2) const
    +  {
    +    return this->meta.expiry_time < v2.meta.expiry_time;
    +  };
    +};
    +// Since the hashing values are all fixed size, we can simply use a 
classAllocator to avoid mallocs
    +extern ClassAllocator<RefCountCacheHashEntry> 
refCountCacheHashingValueAllocator;
    +extern ClassAllocator<PriorityQueueEntry<RefCountCacheHashEntry *>> 
expiryQueueEntry;
    +
    +struct RefCountCacheHashing {
    +  typedef uint64_t ID;
    +  typedef uint64_t const Key;
    +  typedef RefCountCacheHashEntry Value;
    +  typedef DList(RefCountCacheHashEntry, item_link) ListHead;
    +
    +  static ID
    +  hash(Key key)
    +  {
    +    return key;
    +  }
    +  static Key
    +  key(Value const *value)
    +  {
    +    return value->meta.key;
    +  }
    +  static bool
    +  equal(Key lhs, Key rhs)
    +  {
    +    return lhs == rhs;
    +  }
    +};
    +
    +// The RefCountCachePartition is simply a map of key -> Ptr<YourClass>
    +// We partition the cache to reduce lock contention
    +template <class C> class RefCountCachePartition
    +{
    +public:
    +  RefCountCachePartition(unsigned int part_num, uint64_t max_size, 
unsigned int max_items, RecRawStatBlock *rsb = NULL);
    +  Ptr<C> get(uint64_t key);
    +  void put(uint64_t key, C *item, int size = 0, int expire_time = 0);
    +  void erase(uint64_t key, int expiry_time = -1);
    +
    +  void clear();
    +  bool is_full();
    +  bool reserve(unsigned int);
    +
    +  const size_t count();
    +  void copy(Vec<RefCountCacheHashEntry *> &items);
    +
    +  typedef typename TSHashTable<RefCountCacheHashing>::iterator 
iterator_type;
    +  typedef typename TSHashTable<RefCountCacheHashing>::self hash_type;
    +  typedef typename TSHashTable<RefCountCacheHashing>::Location 
location_type;
    +  TSHashTable<RefCountCacheHashing> *get_map();
    +
    +  Ptr<ProxyMutex> lock; // Lock
    +
    +private:
    +  void metric_inc(RefCountCache_Stats metric_enum, int64_t data);
    +
    +  unsigned int part_num;
    +
    +  uint64_t max_size;
    +  unsigned int max_items;
    +
    +  uint64_t size;
    +  unsigned int items;
    +
    +  hash_type item_map;
    +
    +  PriorityQueue<RefCountCacheHashEntry *> expiry_queue;
    +  RecRawStatBlock *rsb;
    +};
    +
    +template <class C>
    +RefCountCachePartition<C>::RefCountCachePartition(unsigned int part_num, 
uint64_t max_size, unsigned int max_items,
    +                                                  RecRawStatBlock *rsb)
    +{
    +  this->part_num  = part_num;
    +  this->max_size  = max_size;
    +  this->max_items = max_items;
    +  this->size      = 0;
    +  this->items     = 0;
    +  this->rsb       = rsb;
    +
    +  // Initialize lock
    +  this->lock = new_ProxyMutex();
    +}
    +
    +template <class C>
    +Ptr<C>
    +RefCountCachePartition<C>::get(uint64_t key)
    +{
    +  this->metric_inc(refcountcache_total_lookups_stat, 1);
    +  location_type l = this->item_map.find(key);
    +  if (l.isValid()) {
    +    // found
    +    this->metric_inc(refcountcache_total_hits_stat, 1);
    +    return make_ptr((C *)l.m_value->item.get());
    +  } else {
    +    return Ptr<C>();
    +  }
    +}
    +
    +template <class C>
    +void
    +RefCountCachePartition<C>::put(uint64_t key, C *item, int size, int 
expire_time)
    +{
    +  this->metric_inc(refcountcache_total_inserts_stat, 1);
    +  size += sizeof(C);
    +  // Remove any colliding entries
    +  this->erase(key);
    +
    +  // if we are full, and can't make space-- then don't store the item
    +  if (this->is_full() && !this->reserve(size)) {
    +    Debug("refcountcache", "partition %d is full-- not storing item 
key=%ld", this->part_num, key);
    +    this->metric_inc(refcountcache_total_failed_inserts_stat, 1);
    +    return;
    +  }
    +
    +  // Create our value-- which has a ref to the `item`
    +  RefCountCacheHashEntry *val = refCountCacheHashingValueAllocator.alloc();
    +  val->set(item, key, size, expire_time);
    +
    +  // add expiry_entry to expiry queue, if the expire time is positive 
(otherwise it means don't expire)
    +  if (expire_time >= 0) {
    --- End diff --
    
    But ``0`` is not a valid expiration time and tests should not depend on 
that (though you can clearly test that 0 is invalid). AFAICT valid expirations 
are ``-1`` and ``now() + secs``.


> Hostdb consistency problems due to MultiCache
> ---------------------------------------------
>
>                 Key: TS-4331
>                 URL: https://issues.apache.org/jira/browse/TS-4331
>             Project: Traffic Server
>          Issue Type: Bug
>          Components: HostDB
>            Reporter: Thomas Jackson
>            Assignee: Thomas Jackson
>             Fix For: 7.0.0
>
>
> This ticket is for the correct long term fix to TS-4207
> pulled from a comment, which wraps up the issue
> {quote}
> Leif Hedstrom I have spent a decent amount of time on this while I was OOO on 
> vacation the last couple of weeks. It seems that the root cause of this issue 
> has always existed, and that the addition of always doing hostname storing 
> (https://github.com/apache/trafficserver/commit/0e703e1e) we are just causing 
> the issue to happen all the time.
> To understand the issue I'll give a little background in how hostdb is 
> currently working. Basically hostdb is just a wrapper around this templated 
> struct called MultiCache. MultiCache is "multi" not because it is templated, 
> but because it has two types of storage (static-- blocks and dynamic-- 
> alloc). The static side of the cache can hold N HostDBInfo structs (the 
> results of DNS queries). The dynamic side is used to store the round robin 
> records and various strings associated with the record. The size of this 
> dynamic space is defined as (N x [estimated_heap_bytes_per_entry. The basic 
> problem we are running into is that we are putting too much preassure on the 
> dynamic heap-- such that the heap is getting re-used while people still have 
> references to items in that space.
> So, I've actually been working on re-writing MultiCache to allocate the 
> entire required block at once (so we don't have this problem where the parent 
> exists but not the children), but I'm not certain if we want such a change to 
> go into the 6.x branch (I'm willing to discuss if we want). If we aren't 
> comfortable with such a large change I suggest just accounting for the 
> hostname size in the estimated_heap_bytes_per_entry as a stopgap solution. 
> The maximum allowable size is 253 (so 254 with null terminator), but we could 
> pick a smaller number (~120 or so seems to be more reasonable). Alternatively 
> you can increase the number of records in hostdb (and the size accordingly) 
> to increase the dynamic heap size.
> TLDR; almost done with the long term solution, but I'm not sure if we want to 
> merge that into 6.x-- alternatively we can do a simple workaround in 6.x 
> (https://github.com/apache/trafficserver/pull/553)
> {quote}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to