[ 
https://issues.apache.org/jira/browse/TS-3486?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14603354#comment-14603354
 ] 

ASF GitHub Bot commented on TS-3486:
------------------------------------

GitHub user shinrich opened a pull request:

    https://github.com/apache/trafficserver/pull/235

    TS-3486: Crashes due to race condition on server sessions moving betw…

    …een threads.
    
    Addressing crash issues disucssed in TS-3266 and TS-3486.  This fix has 
been running on Y! 5.3.x on one production machine for over a day and on three 
other production machines for several hours.
    
    There is a race condition when server sessions reused from a global pool 
are moved between threads.  This patch addresses the issue by holding both the 
session pool lock and the activating state machine lock while setting up the 
first read.vio on the reactivated server.  This prevents conflicting IO using 
one lock or the other on another thread.

You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/shinrich/trafficserver ts-3486

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/trafficserver/pull/235.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #235
    
----
commit 8c1a8631b92f8c7b1bbe2e47835575524f1fba8c
Author: shinrich <shinr...@yahoo-inc.com>
Date:   2015-06-26T18:11:40Z

    TS-3486: Crashes due to race condition on server sessions moving between 
threads.

----


> Segfault in do_io_write with plugin (??)
> ----------------------------------------
>
>                 Key: TS-3486
>                 URL: https://issues.apache.org/jira/browse/TS-3486
>             Project: Traffic Server
>          Issue Type: Bug
>    Affects Versions: 5.2.0, 5.3.0
>            Reporter: Qiang Li
>            Assignee: Phil Sorber
>              Labels: crash
>             Fix For: sometime
>
>         Attachments: ts-3266-2.diff, ts-3266-complete.diff, 
> ts3486-ptrace.txt.gz
>
>
> {code}
> (gdb) bt
> #0  0x00000000005bdb8b in HttpServerSession::do_io_write (this=<value 
> optimized out>, c=0x2aaadccc4bf0, nbytes=576, buf=0x2aaafc2ffee8, owner=false)
>     at HttpServerSession.cc:104
> #1  0x00000000005acc1d in HttpSM::setup_server_send_request 
> (this=0x2aaadccc4bf0) at HttpSM.cc:5686
> #2  0x00000000005b3f85 in HttpSM::handle_api_return (this=0x2aaadccc4bf0) at 
> HttpSM.cc:1520
> #3  0x00000000005b45f8 in HttpSM::state_api_callout (this=0x2aaadccc4bf0, 
> event=60000, data=0x0) at HttpSM.cc:1455
> #4  0x00000000005b980b in HttpSM::state_api_callback (this=0x2aaadccc4bf0, 
> event=60000, data=0x0) at HttpSM.cc:1275
> #5  0x00000000004d7a1b in TSHttpTxnReenable (txnp=0x2aaadccc4bf0, 
> event=TS_EVENT_HTTP_CONTINUE) at InkAPI.cc:5614
> #6  0x00002ba118441c89 in cachefun (contp=<value optimized out>, event=<value 
> optimized out>, edata=0x2aaadccc4bf0) at main.cpp:1876
> #7  0x00000000005b4466 in HttpSM::state_api_callout (this=0x2aaadccc4bf0, 
> event=<value optimized out>, data=<value optimized out>) at HttpSM.cc:1381
> #8  0x00000000005b627d in HttpSM::do_http_server_open (this=0x2aaadccc4bf0, 
> raw=<value optimized out>) at HttpSM.cc:4639
> #9  0x00000000005baa04 in HttpSM::set_next_state (this=0x2aaadccc4bf0) at 
> HttpSM.cc:7021
> #10 0x00000000005b25a3 in HttpSM::state_cache_open_write 
> (this=0x2aaadccc4bf0, event=1108, data=0x2aab1c3b6800) at HttpSM.cc:2442
> #11 0x00000000005b5b28 in HttpSM::main_handler (this=0x2aaadccc4bf0, 
> event=1108, data=0x2aab1c3b6800) at HttpSM.cc:2554
> #12 0x000000000059338a in handleEvent (this=0x2aaadccc6618, event=<value 
> optimized out>, data=0x2aab1c3b6800) at 
> ../../iocore/eventsystem/I_Continuation.h:145
> #13 HttpCacheSM::state_cache_open_write (this=0x2aaadccc6618, event=<value 
> optimized out>, data=0x2aab1c3b6800) at HttpCacheSM.cc:167
> #14 0x0000000000697223 in handleEvent (this=0x2aab1c3b6800, event=<value 
> optimized out>) at ../../iocore/eventsystem/I_Continuation.h:145
> #15 CacheVC::callcont (this=0x2aab1c3b6800, event=<value optimized out>) at 
> ../../iocore/cache/P_CacheInternal.h:662
> #16 0x0000000000715940 in Cache::open_write (this=<value optimized out>, 
> cont=<value optimized out>, key=0x2ba0ff762d70, info=<value optimized out>, 
>     apin_in_cache=46914401429576, type=CACHE_FRAG_TYPE_HTTP, 
>     hostname=0x2aaadd281078 
> "www.mifangba.comhttpapi.phpwww.mifangba.comhttp://www.mifangba.com/api.php?op=count&id=4&modelid=12";,
>  host_len=16) at CacheWrite.cc:1788
> #17 0x00000000006e5765 in open_write (this=<value optimized out>, 
> cont=0x2aaadccc6618, expected_size=<value optimized out>, url=0x2aaadccc5310, 
>     cluster_cache_local=<value optimized out>, request=<value optimized out>, 
> old_info=0x0, pin_in_cache=0, type=CACHE_FRAG_TYPE_HTTP) at 
> P_CacheInternal.h:1093
> #18 CacheProcessor::open_write (this=<value optimized out>, 
> cont=0x2aaadccc6618, expected_size=<value optimized out>, url=0x2aaadccc5310, 
>     cluster_cache_local=<value optimized out>, request=<value optimized out>, 
> old_info=0x0, pin_in_cache=0, type=CACHE_FRAG_TYPE_HTTP) at Cache.cc:3622
> #19 0x00000000005936f0 in HttpCacheSM::open_write (this=0x2aaadccc6618, 
> url=<value optimized out>, request=<value optimized out>, old_info=<value 
> optimized out>, 
>     pin_in_cache=<value optimized out>, retry=<value optimized out>, 
> allow_multiple=false) at HttpCacheSM.cc:298
> #20 0x00000000005a022e in HttpSM::do_cache_prepare_action 
> (this=0x2aaadccc4bf0, c_sm=0x2aaadccc6618, object_read_info=0x0, retry=true, 
> allow_multiple=false) at HttpSM.cc:4511
> #21 0x00000000005babd9 in do_cache_prepare_write (this=0x2aaadccc4bf0) at 
> HttpSM.cc:4436
> #22 HttpSM::set_next_state (this=0x2aaadccc4bf0) at HttpSM.cc:7098
> #23 0x00000000005b3f5f in HttpSM::handle_api_return (this=0x2aaadccc4bf0) at 
> HttpSM.cc:1517
> #24 0x00000000005b45f8 in HttpSM::state_api_callout (this=0x2aaadccc4bf0, 
> event=0, data=0x0) at HttpSM.cc:1455
> #25 0x00000000005ba712 in HttpSM::set_next_state (this=0x2aaadccc4bf0) at 
> HttpSM.cc:6876
> #26 0x00000000005ba702 in HttpSM::set_next_state (this=0x2aaadccc4bf0) at 
> HttpSM.cc:6919
> #27 0x00000000005b3f5f in HttpSM::handle_api_return (this=0x2aaadccc4bf0) at 
> HttpSM.cc:1517
> #28 0x00000000005b45f8 in HttpSM::state_api_callout (this=0x2aaadccc4bf0, 
> event=60000, data=0x0) at HttpSM.cc:1455
> #29 0x00000000005b980b in HttpSM::state_api_callback (this=0x2aaadccc4bf0, 
> event=60000, data=0x0) at HttpSM.cc:1275
> #30 0x00000000004d7a1b in TSHttpTxnReenable (txnp=0x2aaadccc4bf0, 
> event=TS_EVENT_HTTP_CONTINUE) at InkAPI.cc:5614
> #31 0x00002ba11844215d in service_from_this (contp=<value optimized out>, 
> event=<value optimized out>, edata=0x2aaadccc4bf0) at main.cpp:1498
> #32 cachefun (contp=<value optimized out>, event=<value optimized out>, 
> edata=0x2aaadccc4bf0) at main.cpp:1811
> #33 0x00000000005b4466 in HttpSM::state_api_callout (this=0x2aaadccc4bf0, 
> event=<value optimized out>, data=<value optimized out>) at HttpSM.cc:1381
> #34 0x00000000005b980b in HttpSM::state_api_callback (this=0x2aaadccc4bf0, 
> event=60000, data=0x0) at HttpSM.cc:1275
> #35 0x00000000004d7a1b in TSHttpTxnReenable (txnp=0x2aaadccc4bf0, 
> event=TS_EVENT_HTTP_CONTINUE) at InkAPI.cc:5614
> #36 0x00002ba1070e3275 in refresh_hook (contp=<value optimized out>, 
> event=<value optimized out>, edata=0x2aaadccc4bf0) at main.cpp:533
> #37 0x00000000005b4466 in HttpSM::state_api_callout (this=0x2aaadccc4bf0, 
> event=<value optimized out>, data=<value optimized out>) at HttpSM.cc:1381
> #38 0x00000000005b7a68 in do_api_callout (this=0x2aaadccc4bf0, event=1103, 
> data=0xffffffffffffb050) at HttpSM.cc:442
> #39 setup_cache_lookup_complete_api (this=0x2aaadccc4bf0, event=1103, 
> data=0xffffffffffffb050) at HttpSM.cc:2451
> #40 HttpSM::state_cache_open_read (this=0x2aaadccc4bf0, event=1103, 
> data=0xffffffffffffb050) at HttpSM.cc:2512
> ---Type <return> to continue, or q <return> to quit---
> #41 0x00000000005b5b28 in HttpSM::main_handler (this=0x2aaadccc4bf0, 
> event=1103, data=0xffffffffffffb050) at HttpSM.cc:2554
> #42 0x0000000000593a42 in handleEvent (this=0x2aaadccc6618, event=1103, 
> data=0xffffffffffffb050) at ../../iocore/eventsystem/I_Continuation.h:145
> #43 HttpCacheSM::state_cache_open_read (this=0x2aaadccc6618, event=1103, 
> data=0xffffffffffffb050) at HttpCacheSM.cc:131
> #44 0x0000000000703cae in Cache::open_read (this=<value optimized out>, 
> cont=0x2aaadccc6618, key=<value optimized out>, request=0x2aaadccc52f8, 
> params=0x2aaadccc4cd0, 
>     type=<value optimized out>, hostname=0x2aaadd281078 
> "www.mifangba.comhttpapi.phpwww.mifangba.comhttp://www.mifangba.com/api.php?op=count&id=4&modelid=12";,
>  host_len=16)
>     at CacheRead.cc:149
> #45 0x00000000006e153d in open_read (this=<value optimized out>, 
> cont=0x2aaadccc6618, url=0x2aaadccc5310, cluster_cache_local=<value optimized 
> out>, request=0x2aaadccc52f8, 
>     params=0x2aaadccc4cd0, pin_in_cache=0, type=CACHE_FRAG_TYPE_HTTP) at 
> P_CacheInternal.h:1074
> #46 CacheProcessor::open_read (this=<value optimized out>, 
> cont=0x2aaadccc6618, url=0x2aaadccc5310, cluster_cache_local=<value optimized 
> out>, request=0x2aaadccc52f8, 
>     params=0x2aaadccc4cd0, pin_in_cache=0, type=CACHE_FRAG_TYPE_HTTP) at 
> Cache.cc:3598
> #47 0x00000000005934f4 in do_cache_open_read (this=<value optimized out>, 
> url=<value optimized out>, hdr=<value optimized out>, params=<value optimized 
> out>, 
>     pin_in_cache=<value optimized out>) at HttpCacheSM.cc:211
> #48 HttpCacheSM::open_read (this=<value optimized out>, url=<value optimized 
> out>, hdr=<value optimized out>, params=<value optimized out>, 
> pin_in_cache=<value optimized out>)
>     at HttpCacheSM.cc:243
> #49 0x00000000005a0533 in HttpSM::do_cache_lookup_and_read 
> (this=0x2aaadccc4bf0) at HttpSM.cc:4389
> #50 0x00000000005bac3f in HttpSM::set_next_state (this=0x2aaadccc4bf0) at 
> HttpSM.cc:6986
> #51 0x00000000005b3f5f in HttpSM::handle_api_return (this=0x2aaadccc4bf0) at 
> HttpSM.cc:1517
> #52 0x00000000005b45f8 in HttpSM::state_api_callout (this=0x2aaadccc4bf0, 
> event=0, data=0x0) at HttpSM.cc:1455
> #53 0x00000000005ba712 in HttpSM::set_next_state (this=0x2aaadccc4bf0) at 
> HttpSM.cc:6876
> #54 0x000000000059fa6e in HttpSM::state_remap_request (this=0x2aaadccc4bf0, 
> event=63002) at HttpSM.cc:3880
> #55 0x00000000005b5b28 in HttpSM::main_handler (this=0x2aaadccc4bf0, 
> event=63002, data=0x0) at HttpSM.cc:2554
> #56 0x000000000061b15a in handleEvent (this=0x2aaadcfc8080, event=1, 
> e=0x2aaaf9350d80) at ../../../iocore/eventsystem/I_Continuation.h:145
> #57 RemapPlugins::run_remap (this=0x2aaadcfc8080, event=1, e=0x2aaaf9350d80) 
> at RemapPlugins.cc:179
> #58 0x0000000000765165 in handleEvent (this=0x2ba0ff262010, e=0x2aaaf9350d80, 
> calling_code=1) at I_Continuation.h:145
> #59 EThread::process_event (this=0x2ba0ff262010, e=0x2aaaf9350d80, 
> calling_code=1) at UnixEThread.cc:128
> #60 0x00000000007659fb in EThread::execute (this=0x2ba0ff262010) at 
> UnixEThread.cc:179
> #61 0x00000000007645aa in spawn_thread_internal (a=0x2919020) at Thread.cc:85
> #62 0x00000035966079d1 in start_thread () from /lib64/libpthread.so.0
> #63 0x0000003595ee88fd in clone () from /lib64/libc.so.6
> (gdb) f 0
> #0  0x00000000005bdb8b in HttpServerSession::do_io_write (this=<value 
> optimized out>, c=0x2aaadccc4bf0, nbytes=576, buf=0x2aaafc2ffee8, owner=false)
>     at HttpServerSession.cc:104
> 104     return server_vc->do_io_write(c, nbytes, buf, owner);
> (gdb) l
> 99    }
> 100   
> 101   VIO *
> 102   HttpServerSession::do_io_write(Continuation *c, int64_t nbytes, 
> IOBufferReader *buf, bool owner)
> 103   {
> 104     return server_vc->do_io_write(c, nbytes, buf, owner);
> 105   }
> 106   
> 107   void
> 108   HttpServerSession::do_io_shutdown(ShutdownHowTo_t howto)
> (gdb) p server_vc
> value has been optimized out
> (gdb) f 1
> #1  0x00000000005acc1d in HttpSM::setup_server_send_request 
> (this=0x2aaadccc4bf0) at HttpSM.cc:5686
> 5686    server_entry->write_vio = server_entry->vc->do_io_write(this, 
> hdr_length, buf_start);
> (gdb) l
> 5681                                                 
> MIME_PRESENCE_WWW_AUTHENTICATE)) {
> 5682      DebugSM("http_ss", "Setting server session to private for 
> authorization header");
> 5683      set_server_session_private(true);
> 5684    }
> 5685    milestones.server_begin_write = ink_get_hrtime();
> 5686    server_entry->write_vio = server_entry->vc->do_io_write(this, 
> hdr_length, buf_start);
> 5687  }
> 5688  
> 5689  void
> 5690  HttpSM::setup_server_read_response_header()
> (gdb) p server_entry
> $1 = (HttpVCTableEntry *) 0x0
> (gdb) 
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to