[ https://issues.apache.org/jira/browse/TS-4331?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15351295#comment-15351295 ]
ASF GitHub Bot commented on TS-4331: ------------------------------------ Github user jacksontj commented on a diff in the pull request: https://github.com/apache/trafficserver/pull/653#discussion_r68602238 --- Diff: iocore/hostdb/HostDB.cc --- @@ -300,131 +237,77 @@ HostDBProcessor::cache() return &hostDB; } -struct HostDBTestRR : public Continuation { - int fd; - char b[512]; - int nb; - int outstanding, success, failure; - int in; - - int - mainEvent(int event, Event *e) - { - if (event == EVENT_INTERVAL) { - printf("HostDBTestRR: %d outstanding %d succcess %d failure\n", outstanding, success, failure); - } - if (event == EVENT_HOST_DB_LOOKUP) { - --outstanding; - if (e) - ++success; - else - ++failure; - } - if (in) - return EVENT_CONT; - in = 1; - while (outstanding < 40) { - if (!nb) - goto Lreturn; - char *end = (char *)memchr(b, '\n', nb); - if (!end) - read_some(); - end = (char *)memchr(b, '\n', nb); - if (!end) - nb = 0; - else { - *end = 0; - outstanding++; - hostDBProcessor.getbyname_re(this, b, 0); - nb -= ((end + 1) - b); - memcpy(b, end + 1, nb); - if (!nb) - read_some(); - } - } - Lreturn: - in = 0; - return EVENT_CONT; - } - - void - read_some() - { - nb = read(fd, b + nb, 512 - nb); - ink_release_assert(nb >= 0); - } - - HostDBTestRR() : Continuation(new_ProxyMutex()), nb(0), outstanding(0), success(0), failure(0), in(0) - { - printf("starting HostDBTestRR....\n"); - fd = open("hostdb_test.config", O_RDONLY, 0); - ink_release_assert(fd >= 0); - read_some(); - SET_HANDLER(&HostDBTestRR::mainEvent); - } - - ~HostDBTestRR() { close(fd); } -}; - -struct HostDBSyncer : public Continuation { +struct HostDBBackgroundTask : public Continuation { int frequency; ink_hrtime start_time; - int sync_event(int event, void *edata); + virtual int sync_event(int event, void *edata) = 0; int wait_event(int event, void *edata); - HostDBSyncer(); + HostDBBackgroundTask(int frequency); }; -HostDBSyncer::HostDBSyncer() : Continuation(new_ProxyMutex()), frequency(0), start_time(0) +HostDBBackgroundTask::HostDBBackgroundTask(int frequency) : Continuation(new_ProxyMutex()), frequency(frequency), start_time(0) { - SET_HANDLER(&HostDBSyncer::sync_event); + SET_HANDLER(&HostDBBackgroundTask::sync_event); } int -HostDBSyncer::sync_event(int, void *) +HostDBBackgroundTask::wait_event(int, void *) { - SET_HANDLER(&HostDBSyncer::wait_event); - start_time = Thread::get_hrtime(); - hostDBProcessor.cache()->sync_partitions(this); - return EVENT_DONE; -} + ink_hrtime next_sync = HRTIME_SECONDS(this->frequency) - (Thread::get_hrtime() - start_time); -int -HostDBSyncer::wait_event(int, void *) -{ - ink_hrtime next_sync = HRTIME_SECONDS(hostdb_sync_frequency) - (Thread::get_hrtime() - start_time); - - SET_HANDLER(&HostDBSyncer::sync_event); + SET_HANDLER(&HostDBBackgroundTask::sync_event); if (next_sync > HRTIME_MSECONDS(100)) eventProcessor.schedule_in(this, next_sync, ET_TASK); else eventProcessor.schedule_imm(this, ET_TASK); return EVENT_DONE; } +struct HostDBSync : public HostDBBackgroundTask { + std::string storage_path; + std::string full_path; + HostDBSync(int frequency, std::string storage_path, std::string full_path) + : HostDBBackgroundTask(frequency), storage_path(storage_path), full_path(full_path){}; + int + sync_event(int, void *) + { + SET_HANDLER(&HostDBSync::wait_event); + start_time = Thread::get_hrtime(); + + new RefCountCacheSync<RefCountCache<HostDBInfo>>(this, hostDBProcessor.cache()->refcountcache, this->frequency, + this->storage_path, this->full_path); + return EVENT_DONE; + } +}; + int HostDBCache::start(int flags) { - Store *hostDBStore; - Span *hostDBSpan; + (void)flags; // unused char storage_path[PATH_NAME_MAX]; - int storage_size = 33554432; // 32MB default - - bool reconfigure = ((flags & PROCESSOR_RECONFIGURE) ? true : false); - bool fix = ((flags & PROCESSOR_FIX) ? true : false); + int hostdb_max_size = 33554432; // 32MB default + int hostdb_partitions = 64; storage_path[0] = '\0'; // Read configuration // Command line overrides manager configuration. // REC_ReadConfigInt32(hostdb_enable, "proxy.config.hostdb"); - REC_ReadConfigString(hostdb_filename, "proxy.config.hostdb.filename", sizeof(hostdb_filename)); - REC_ReadConfigInt32(hostdb_size, "proxy.config.hostdb.size"); REC_ReadConfigInt32(hostdb_srv_enabled, "proxy.config.srv_enabled"); REC_ReadConfigString(storage_path, "proxy.config.hostdb.storage_path", sizeof(storage_path)); - REC_ReadConfigInt32(storage_size, "proxy.config.hostdb.storage_size"); + REC_ReadConfigString(hostdb_filename, "proxy.config.hostdb.filename", sizeof(hostdb_filename)); + + // Max number of items + REC_ReadConfigInt32(hostdb_max_count, "proxy.config.hostdb.max_count"); + // max size allowed to use + REC_ReadConfigInt32(hostdb_max_size, "proxy.config.hostdb.max_size"); --- End diff -- Well, these are actually set in the default config options. So i'll set the default here in the code to -1, but either way the config option will overwrite it. > Hostdb consistency problems due to MultiCache > --------------------------------------------- > > Key: TS-4331 > URL: https://issues.apache.org/jira/browse/TS-4331 > Project: Traffic Server > Issue Type: Bug > Components: HostDB > Reporter: Thomas Jackson > Assignee: Thomas Jackson > Fix For: 7.0.0 > > > This ticket is for the correct long term fix to TS-4207 > pulled from a comment, which wraps up the issue > {quote} > Leif Hedstrom I have spent a decent amount of time on this while I was OOO on > vacation the last couple of weeks. It seems that the root cause of this issue > has always existed, and that the addition of always doing hostname storing > (https://github.com/apache/trafficserver/commit/0e703e1e) we are just causing > the issue to happen all the time. > To understand the issue I'll give a little background in how hostdb is > currently working. Basically hostdb is just a wrapper around this templated > struct called MultiCache. MultiCache is "multi" not because it is templated, > but because it has two types of storage (static-- blocks and dynamic-- > alloc). The static side of the cache can hold N HostDBInfo structs (the > results of DNS queries). The dynamic side is used to store the round robin > records and various strings associated with the record. The size of this > dynamic space is defined as (N x [estimated_heap_bytes_per_entry. The basic > problem we are running into is that we are putting too much preassure on the > dynamic heap-- such that the heap is getting re-used while people still have > references to items in that space. > So, I've actually been working on re-writing MultiCache to allocate the > entire required block at once (so we don't have this problem where the parent > exists but not the children), but I'm not certain if we want such a change to > go into the 6.x branch (I'm willing to discuss if we want). If we aren't > comfortable with such a large change I suggest just accounting for the > hostname size in the estimated_heap_bytes_per_entry as a stopgap solution. > The maximum allowable size is 253 (so 254 with null terminator), but we could > pick a smaller number (~120 or so seems to be more reasonable). Alternatively > you can increase the number of records in hostdb (and the size accordingly) > to increase the dynamic heap size. > TLDR; almost done with the long term solution, but I'm not sure if we want to > merge that into 6.x-- alternatively we can do a simple workaround in 6.x > (https://github.com/apache/trafficserver/pull/553) > {quote} -- This message was sent by Atlassian JIRA (v6.3.4#6332)