[ https://issues.apache.org/jira/browse/TS-3455?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Luca Bruno updated TS-3455: --------------------------- Description: I've written a simple test case plugin for demonstrating this problem, not sure if it's a problem on my side, but that would also mean that the regex invalidate plugin would also abort(). What the plugin does: in LOOKUP_COMPLETE, if the cache status is FRESH then set it to STALE. To reproduce: 1) Send a first cacheable request to ATS, which gets cached. 2) Request again the same url, the plugin triggers and set the cache to STALE. Then ATS does abort(). Plugin code: {noformat} #include <ts/ts.h> #include <ts/remap.h> #include <ts/experimental.h> #include <stdlib.h> #include <stdio.h> #include <getopt.h> #include <string.h> #include <string> #include <iterator> #include <map> const char PLUGIN_NAME[] = "maybebug"; static int Handler(TSCont cont, TSEvent event, void *edata); struct PluginState { PluginState() { cont = TSContCreate(Handler, NULL); TSContDataSet(cont, this); } ~PluginState() { TSContDestroy(cont); } TSCont cont; }; static int Handler(TSCont cont, TSEvent event, void* edata) { TSHttpTxn txn = (TSHttpTxn)edata; if (event == TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE) { int lookup_status; if (TS_SUCCESS == TSHttpTxnCacheLookupStatusGet(txn, &lookup_status)) { TSDebug(PLUGIN_NAME, "lookup complete: %d", lookup_status); if (lookup_status == TS_CACHE_LOOKUP_HIT_FRESH) { TSDebug(PLUGIN_NAME, "set stale"); TSHttpTxnCacheLookupStatusSet(txn, TS_CACHE_LOOKUP_HIT_STALE); } } } TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE); return TS_EVENT_NONE; } void TSPluginInit (int argc, const char *argv[]) { TSPluginRegistrationInfo info; info.plugin_name = strdup("cappello"); info.vendor_name = strdup("foo"); info.support_email = strdup("f...@bar.com"); if (TSPluginRegister(TS_SDK_VERSION_3_0 , &info) != TS_SUCCESS) { TSError("Plugin registration failed"); } PluginState* state = new PluginState(); TSHttpHookAdd(TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, state->cont); } {noformat} Output: {noformat} [Mar 19 18:40:36.254] Server {0x7f6df0b4f740} DIAG: (maybebug) lookup complete: 0 [Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) lookup complete: 2 [Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) set stale FATAL: HttpTransact.cc:433: failed assert `s->pending_work == NULL` traffic_server - STACK TRACE: /usr/local/lib/libtsutil.so.5(ink_fatal+0xa3)[0x7f6df072186d] /usr/local/lib/libtsutil.so.5(_Z12ink_get_randv+0x0)[0x7f6df071f3a0] traffic_server[0x60d0aa] traffic_server(_ZN12HttpTransact22HandleCacheOpenReadHitEPNS_5StateE+0xf82)[0x619206] ... {noformat} What happens in gdb is that HandleCacheOpenReadHit is called twice in the same request. The first time s->pending_work is NULL, the second time it's not NULL. The patch below fixes the problem: {noformat} diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index 0078ef1..852f285 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -2641,11 +2641,6 @@ HttpTransact::HandleCacheOpenReadHit(State* s) // ink_release_assert(s->current.request_to == PARENT_PROXY || // s->http_config_param->no_dns_forward_to_parent != 0); - // Set ourselves up to handle pending revalidate issues - // after the PP DNS lookup - ink_assert(s->pending_work == NULL); - s->pending_work = issue_revalidate; - // We must be going a PARENT PROXY since so did // origin server DNS lookup right after state Start // @@ -2654,6 +2649,11 @@ HttpTransact::HandleCacheOpenReadHit(State* s) // missing ip but we won't take down the system // if (s->current.request_to == PARENT_PROXY) { + // Set ourselves up to handle pending revalidate issues + // after the PP DNS lookup + ink_assert(s->pending_work == NULL); + s->pending_work = issue_revalidate; + TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, PPDNSLookup); } else if (s->current.request_to == ORIGIN_SERVER) { TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, OSDNSLookup); {noformat} was: I've written a simple test case plugin for demonstrating this problem, not sure if it's a problem on my side, but that would also mean that the regex invalidate plugin would also abort(). What the plugin does: in LOOKUP_COMPLETE, if the cache status is FRESH then set it to STALE. To reproduce: 1) First cacheable request to ATS gets cached 2) Request again the same url, the plugin triggers and set the cache to STALE. Then ATS does abort(). Plugin code: {noformat} #include <ts/ts.h> #include <ts/remap.h> #include <ts/experimental.h> #include <stdlib.h> #include <stdio.h> #include <getopt.h> #include <string.h> #include <string> #include <iterator> #include <map> const char PLUGIN_NAME[] = "maybebug"; static int Handler(TSCont cont, TSEvent event, void *edata); struct PluginState { PluginState() { cont = TSContCreate(Handler, NULL); TSContDataSet(cont, this); } ~PluginState() { TSContDestroy(cont); } TSCont cont; }; static int Handler(TSCont cont, TSEvent event, void* edata) { TSHttpTxn txn = (TSHttpTxn)edata; if (event == TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE) { int lookup_status; if (TS_SUCCESS == TSHttpTxnCacheLookupStatusGet(txn, &lookup_status)) { TSDebug(PLUGIN_NAME, "lookup complete: %d", lookup_status); if (lookup_status == TS_CACHE_LOOKUP_HIT_FRESH) { TSDebug(PLUGIN_NAME, "set stale"); TSHttpTxnCacheLookupStatusSet(txn, TS_CACHE_LOOKUP_HIT_STALE); } } } TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE); return TS_EVENT_NONE; } void TSPluginInit (int argc, const char *argv[]) { TSPluginRegistrationInfo info; info.plugin_name = strdup("cappello"); info.vendor_name = strdup("foo"); info.support_email = strdup("f...@bar.com"); if (TSPluginRegister(TS_SDK_VERSION_3_0 , &info) != TS_SUCCESS) { TSError("Plugin registration failed"); } PluginState* state = new PluginState(); TSHttpHookAdd(TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, state->cont); } {noformat} Output: {noformat} [Mar 19 18:40:36.254] Server {0x7f6df0b4f740} DIAG: (maybebug) lookup complete: 0 [Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) lookup complete: 2 [Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) set stale FATAL: HttpTransact.cc:433: failed assert `s->pending_work == NULL` traffic_server - STACK TRACE: /usr/local/lib/libtsutil.so.5(ink_fatal+0xa3)[0x7f6df072186d] /usr/local/lib/libtsutil.so.5(_Z12ink_get_randv+0x0)[0x7f6df071f3a0] traffic_server[0x60d0aa] traffic_server(_ZN12HttpTransact22HandleCacheOpenReadHitEPNS_5StateE+0xf82)[0x619206] ... {noformat} What happens in gdb is that HandleCacheOpenReadHit is called twice in the same request. The first time s->pending_work is NULL, the second time it's not NULL. The patch below fixes the problem: {noformat} diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index 0078ef1..852f285 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -2641,11 +2641,6 @@ HttpTransact::HandleCacheOpenReadHit(State* s) // ink_release_assert(s->current.request_to == PARENT_PROXY || // s->http_config_param->no_dns_forward_to_parent != 0); - // Set ourselves up to handle pending revalidate issues - // after the PP DNS lookup - ink_assert(s->pending_work == NULL); - s->pending_work = issue_revalidate; - // We must be going a PARENT PROXY since so did // origin server DNS lookup right after state Start // @@ -2654,6 +2649,11 @@ HttpTransact::HandleCacheOpenReadHit(State* s) // missing ip but we won't take down the system // if (s->current.request_to == PARENT_PROXY) { + // Set ourselves up to handle pending revalidate issues + // after the PP DNS lookup + ink_assert(s->pending_work == NULL); + s->pending_work = issue_revalidate; + TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, PPDNSLookup); } else if (s->current.request_to == ORIGIN_SERVER) { TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, OSDNSLookup); {noformat} > Marking the cache STALE in lookup-complete causes abort() > --------------------------------------------------------- > > Key: TS-3455 > URL: https://issues.apache.org/jira/browse/TS-3455 > Project: Traffic Server > Issue Type: Bug > Reporter: Luca Bruno > > I've written a simple test case plugin for demonstrating this problem, not > sure if it's a problem on my side, but that would also mean that the regex > invalidate plugin would also abort(). > What the plugin does: in LOOKUP_COMPLETE, if the cache status is FRESH then > set it to STALE. > To reproduce: > 1) Send a first cacheable request to ATS, which gets cached. > 2) Request again the same url, the plugin triggers and set the cache to > STALE. Then ATS does abort(). > Plugin code: > {noformat} > #include <ts/ts.h> > #include <ts/remap.h> > #include <ts/experimental.h> > #include <stdlib.h> > #include <stdio.h> > #include <getopt.h> > #include <string.h> > #include <string> > #include <iterator> > #include <map> > const char PLUGIN_NAME[] = "maybebug"; > static int Handler(TSCont cont, TSEvent event, void *edata); > struct PluginState { > PluginState() > { > cont = TSContCreate(Handler, NULL); > TSContDataSet(cont, this); > } > ~PluginState() > { > TSContDestroy(cont); > } > TSCont cont; > }; > static int Handler(TSCont cont, TSEvent event, void* edata) { > TSHttpTxn txn = (TSHttpTxn)edata; > if (event == TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE) { > int lookup_status; > if (TS_SUCCESS == TSHttpTxnCacheLookupStatusGet(txn, &lookup_status)) { > TSDebug(PLUGIN_NAME, "lookup complete: %d", lookup_status); > if (lookup_status == TS_CACHE_LOOKUP_HIT_FRESH) { > TSDebug(PLUGIN_NAME, "set stale"); > TSHttpTxnCacheLookupStatusSet(txn, TS_CACHE_LOOKUP_HIT_STALE); > } > } > } > TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE); > return TS_EVENT_NONE; > } > void TSPluginInit (int argc, const char *argv[]) { > TSPluginRegistrationInfo info; > info.plugin_name = strdup("cappello"); > info.vendor_name = strdup("foo"); > info.support_email = strdup("f...@bar.com"); > if (TSPluginRegister(TS_SDK_VERSION_3_0 , &info) != TS_SUCCESS) { > TSError("Plugin registration failed"); > } > PluginState* state = new PluginState(); > TSHttpHookAdd(TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, state->cont); > } > {noformat} > Output: > {noformat} > [Mar 19 18:40:36.254] Server {0x7f6df0b4f740} DIAG: (maybebug) lookup > complete: 0 > [Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) lookup > complete: 2 > [Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) set stale > FATAL: HttpTransact.cc:433: failed assert `s->pending_work == NULL` > traffic_server - STACK TRACE: > /usr/local/lib/libtsutil.so.5(ink_fatal+0xa3)[0x7f6df072186d] > /usr/local/lib/libtsutil.so.5(_Z12ink_get_randv+0x0)[0x7f6df071f3a0] > traffic_server[0x60d0aa] > traffic_server(_ZN12HttpTransact22HandleCacheOpenReadHitEPNS_5StateE+0xf82)[0x619206] > ... > {noformat} > What happens in gdb is that HandleCacheOpenReadHit is called twice in the > same request. The first time s->pending_work is NULL, the second time it's > not NULL. > The patch below fixes the problem: > {noformat} > diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc > index 0078ef1..852f285 100644 > --- a/proxy/http/HttpTransact.cc > +++ b/proxy/http/HttpTransact.cc > @@ -2641,11 +2641,6 @@ HttpTransact::HandleCacheOpenReadHit(State* s) > // ink_release_assert(s->current.request_to == PARENT_PROXY || > // s->http_config_param->no_dns_forward_to_parent != 0); > > - // Set ourselves up to handle pending revalidate issues > - // after the PP DNS lookup > - ink_assert(s->pending_work == NULL); > - s->pending_work = issue_revalidate; > - > // We must be going a PARENT PROXY since so did > // origin server DNS lookup right after state Start > // > @@ -2654,6 +2649,11 @@ HttpTransact::HandleCacheOpenReadHit(State* s) > // missing ip but we won't take down the system > // > if (s->current.request_to == PARENT_PROXY) { > + // Set ourselves up to handle pending revalidate issues > + // after the PP DNS lookup > + ink_assert(s->pending_work == NULL); > + s->pending_work = issue_revalidate; > + > TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, PPDNSLookup); > } else if (s->current.request_to == ORIGIN_SERVER) { > TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, OSDNSLookup); > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)