I was finally able to turn on my SSL lazy loader plugin (temporarily) to
production traffic. After several minutes (random between 8 and 30 minutes), I
get a seg fault at the dynamic_cast of the vc inside TSVConnSSLConnectionGet:
#0 0x00007ffff6e443b9 in __dynamic_cast () from /usr/lib64/libstdc++.so.6
#1 0x00000000005836fe in TSVConnSSLConnectionGet (sslp=0x6330004b1480) at
InkAPI.cc:8746
#2 0x00007fffe32a8a9e in (anonymous namespace)::Loader_Thread
(cont=0x62800001b6d0, event=TS_EVENT_TIMEOUT, arg=0x62b00016ec60) at
ssl-lazy-loader.cc:704
#3 0x000000000055eedb in INKContInternal::handle_event (this=0x62800001b6d0,
event=2, edata=0x62b00016ec60) at InkAPI.cc:1003
#4 0x00000000005311fd in Continuation::handleEvent (this=0x62800001b6d0,
event=2, data=0x62b00016ec60) at ../iocore/eventsystem/I_Continuation.h:145
#5 0x0000000000a35595 in EThread::process_event (this=0x7fffca063800,
e=0x62b00016ec60, calling_code=2) at UnixEThread.cc:128
#6 0x0000000000a35ebc in EThread::execute (this=0x7fffca063800) at
UnixEThread.cc:207
#7 0x0000000000a33a93 in spawn_thread_internal (a=0x6040003c2a90) at
Thread.cc:85
#8 0x00007ffff74a2aa1 in start_thread () from /lib64/libpthread.so.0
#9 0x00007ffff6643aad in clone () from /lib64/libc.so.6
(gdb) fr 1
#1 0x00000000005836fe in TSVConnSSLConnectionGet (sslp=0x6330004b1480) at
InkAPI.cc:8746
8746 SSLNetVConnection *ssl_vc = dynamic_cast<SSLNetVConnection *>(vc);
(gdb) l
8741 TSVConnSSLConnectionGet(TSVConn sslp)
8742 {
8743 TSSslConnection ssl = NULL;
8744 NetVConnection *vc = reinterpret_cast<NetVConnection *>(sslp);
8745 if (vc != NULL) {
8746 SSLNetVConnection *ssl_vc = dynamic_cast<SSLNetVConnection *>(vc);
8747 if (ssl_vc != NULL) {
8748 ssl = reinterpret_cast<TSSslConnection>(ssl_vc->ssl);
8749 }
8750 }
(gdb) p vc
$1 = (NetVConnection *) 0x6330004b1480
(gdb) p sslp
$2 = (TSVConn) 0x6330004b1480
(gdb) p ssl_vc
$3 = (SSLNetVConnection *) 0x60e00026aa70
I am running this linked with ASAN. This is from a different session than the
gdb listing above:
ASAN:SIGSEGV
=================================================================
==16299==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000 (pc
0x2b624393f3b9 sp 0x2b624ca9d7f0 bp 0x000000000000 T14)
[Jul 17 17:30:59.765] Server {0x2b624dec1700} DIAG: (ssl-cert-loader-backlog)
PUSHing the vc for iwoneworleans.org off the waitingVConns while creating the
new entry in CB_servername
#0 0x2b624393f3b8 in __dynamic_cast (/usr/lib64/libstdc++.so.6+0xbb3b8)
#1 0x58379d in TSVConnSSLConnectionGet
/home/sfeltner/projects/trafficserver/proxy/InkAPI.cc:8746
#2 0x2b6256deda9d in Loader_Thread
/home/sfeltner/projects/trafficserver/plugins/experimental/ssl_cert_loader/ssl-cert-loader.cc:704
#3 0x55ef7a in INKContInternal::handle_event(int, void*)
/home/sfeltner/projects/trafficserver/proxy/InkAPI.cc:1003
#4 0x53129c in Continuation::handleEvent(int, void*)
/home/sfeltner/projects/trafficserver/iocore/eventsystem/I_Continuation.h:145
#5 0xa3579e in EThread::process_event(Event*, int)
/home/sfeltner/projects/trafficserver/iocore/eventsystem/UnixEThread.cc:128
#6 0xa360c5 in EThread::execute()
/home/sfeltner/projects/trafficserver/iocore/eventsystem/UnixEThread.cc:207
#7 0xa33c9c in spawn_thread_internal
/home/sfeltner/projects/trafficserver/iocore/eventsystem/Thread.cc:85
#8 0x2b6243261aa0 in start_thread (/lib64/libpthread.so.0+0x7aa0)
#9 0x2b624410daac in __clone (/lib64/libc.so.6+0xe8aac)
AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV ??:0 __dynamic_cast
Thread T14 ([ET_NET 13]) created by T0 ([ET_NET 0]) here:
#0 0x3e0cc23c7a in pthread_create (/usr/lib64/libasan.so.1+0x3e0cc23c7a)
#1 0xa33778 in ink_thread_create ../../lib/ts/ink_thread.h:150
#2 0xa33e25 in Thread::start(char const*, unsigned long, void* (*)(void*),
void*) /home/sfeltner/projects/trafficserver/iocore/eventsystem/Thread.cc:100
#3 0xa3959d in EventProcessor::start(int, unsigned long)
/home/sfeltner/projects/trafficserver/iocore/eventsystem/UnixEventProcessor.cc:140
#4 0x597735 in main /home/sfeltner/projects/trafficserver/proxy/Main.cc:1647
#5 0x2b6244043d1c in __libc_start_main (/lib64/libc.so.6+0x1ed1c)
==16299==ABORTING
Here is the original code that was seg faulting:
TSSslConnection
TSVConnSSLConnectionGet(TSVConn sslp)
{
TSSslConnection ssl = NULL;
NetVConnection *vc = reinterpret_cast<NetVConnection *>(sslp);
SSLNetVConnection *ssl_vc = dynamic_cast<SSLNetVConnection *>(vc);
if (ssl_vc != NULL) {
ssl = reinterpret_cast<TSSslConnection>(ssl_vc->ssl);
}
return ssl;
}
I have added some checks for NULL and this is the resulting updated code:
TSSslConnection
TSVConnSSLConnectionGet(TSVConn sslp)
{
TSSslConnection ssl = NULL;
NetVConnection *vc = reinterpret_cast<NetVConnection *>(sslp);
if (vc != NULL) {
SSLNetVConnection *ssl_vc = dynamic_cast<SSLNetVConnection *>(vc);
if (ssl_vc != NULL) {
ssl = reinterpret_cast<TSSslConnection>(ssl_vc->ssl);
}
}
return ssl;
}
and here is the code that calls that function:
TSMutexLock(entry->mutex);
while (entry->waitingVConns.begin() != entry->waitingVConns.end()) {
TSVConn vc = entry->waitingVConns.back();
entry->waitingVConns.pop_back();
if (vc != NULL){
TSSslConnection sslobj = TSVConnSSLConnectionGet(vc);
SSL *ssl = reinterpret_cast<SSL *>(sslobj);
SSL_set_SSL_CTX(ssl, entry->ctx);
TSDebug("redis-loader-thread", "Resolving the SSL ctx for %s in the "
"Loader_Thread", entry->redis_CN.c_str());
TSVConnReenable(vc);
}
How can I be getting a NULL reference (according to ASAN) with all of these
checks in place? Why is the dynamic_cast referencing a NULL? What am I
missing?
I would appreciate any and all feedback or advice...
Thanks,
Steven