Thanks, the backtraces really helped. Could you try the following patch? -Stefan
h2_beam_locks_v1.diff
Description: Binary data
> Am 19.04.2017 um 08:32 schrieb Stefan Priebe - Profihost AG > <s.pri...@profihost.ag>: > > Hi Stefan, > > this is now with v1.10.2 but it does not help. > > PID old gen: > > 2 15608 yes (old gen) 2 no 0 0 0 0 > 0 > > > requests in G state: > > 2-2 15608 4/36/36 G 252.75 30695 41 22.1 0.87 0.87 > A.B.C.D h2 XYZ:443 > GET /themes/Frontend/Responsive/frontend/_public/src/js/vendors > > 2-2 15608 2/5/5 G 69.62 39645 36 104.0 0.18 0.18 > A.B.C.D h2 XYZ:443 GET > /web/cache/1492526985_9e591f3c4f420ebca36f3c1be30bd5ee.css > > thread all bt: > https://pastebin.com/raw/Cq9ymt9u > > I think the interesting thread is: > Thread 6 > > Greets, > Stefan > > Am 18.04.2017 um 15:06 schrieb Stefan Priebe - Profihost AG: >> >> Am 18.04.2017 um 15:03 schrieb Stefan Eissing: >>> Stefan, >>> >>> that is a 1.10.0, right? That was the first version without nested locking >>> and I fixed 2 possible dead locks in 1.10.1. >>> >>> I am about to release a 1.10.2 with added conformity checks and a fix for >>> client omitting EOF flags. Could you give that one a try? >> >> Yes sure where is 1.10.2? >> >> Stefan >> >>> >>> -Stefan >>> >>>> Am 18.04.2017 um 14:57 schrieb Stefan Priebe - Profihost AG >>>> <s.pri...@profihost.ag>: >>>> >>>> Hi, >>>> >>>> i saw that all of them are still serving one h2 connection. >>>> >>>> server-status: >>>> 0-3 32375 42/64/181 G 30.09 1020776 214 1285.1 >>>> 2.40 5.81 >>>> h081217236127.dyn.cm.kabsi.at h2 XXXX:443 GET >>>> /wp-content/uploads/0000Bloglr212222-8003asd.jpg HTTP/2.0 >>>> >>>> And they all have Mode of operation => G >>>> >>>> thread apply all bt full shows: >>>> #0 0x00007f8e80b687fc in __lll_lock_wait () from >>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>> No symbol table info available. >>>> #1 0x00007f8e80b6b6f8 in _L_cond_lock_886 () from >>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>> No symbol table info available. >>>> #2 0x00007f8e80b6b464 in __pthread_mutex_cond_lock () from >>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>> No symbol table info available. >>>> #3 0x00007f8e80b6650a in pthread_cond_timedwait@@GLIBC_2.3.2 () from >>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>> No symbol table info available. >>>> #4 0x00007f8e80d92322 in apr_thread_cond_timedwait >>>> (cond=0x7f8d7846bc70, mutex=0x7f8d7846bc20, timeout=60000000) at >>>> locks/unix/thread_cond.c:89 >>>> rv = <optimized out> >>>> then = <optimized out> >>>> abstime = {tv_sec = 1491843095, tv_nsec = 700198000} >>>> #5 0x000055e62c0951c5 in r_wait_space (premain=<synthetic pointer>, >>>> pbl=0x7f8de853da60, block=APR_BLOCK_READ, beam=0x7f8d7846bb08) at >>>> h2_bucket_beam.c:337 >>>> status = <optimized out> >>>> #6 append_bucket (pbl=0x7f8de853da60, block=APR_BLOCK_READ, >>>> b=0x7f8d7846dd38, beam=0x7f8d7846bb08) at h2_bucket_beam.c:776 >>>> data = 0x7f8de853dac8 "\240\037" >>>> len = 0 >>>> space_left = 0 >>>> status = <optimized out> >>>> #7 h2_beam_send (beam=0x7f8d7846bb08, sender_bb=0x7f8d7864ba90, >>>> block=APR_BLOCK_READ) at h2_bucket_beam.c:906 >>>> force_report = 1 >>>> b = 0x7f8d7846dd38 >>>> status = 0 >>>> bl = {mutex = 0x7f8d7846bc20, leave = 0x55e62c094250 >>>> <mutex_leave>, leave_ctx = 0x0} >>>> #8 0x000055e62c08f3aa in send_out (task=0x7f8d7846f740, >>>> bb=0x7f8d7864ba90, block=1) at h2_task.c:100 >>>> written = 8096 >>>> left = 140245585033024 >>>> status = 1 >>>> #9 0x000055e62c08f976 in slave_out (task=0x7f8d7846f740, >>>> f=0x7f8d7846bdd8, bb=0x7f8d7864ba90) at h2_task.c:177 >>>> status = -512 >>>> flush = 0 >>>> blocking = 1 >>>> #10 0x000055e62c08fbfd in h2_filter_slave_output (filter=0x7f8d7846bdd8, >>>> brigade=0x7f8d7864ba90) at h2_task.c:370 >>>> task = 0x7f8d7846f740 >>>> status = <optimized out> >>>> #11 0x000055e62bffc01c in ap_content_length_filter (f=0x7f8d78472ca8, >>>> b=0x7f8d7864ba90) at protocol.c:1713 >>>> r = 0x7f8d78471750 >>>> ctx = 0x7f8d7864bc90 >>>> e = 0x7f8d7864ba98 >>>> eblock = (unknown: 2159446012) >>>> #12 0x000055e62c04853d in deflate_out_filter (f=0x7f8d7864b650, >>>> bb=0x7f8d7864b8f8) at mod_deflate.c:961 >>>> rv = -512 >>>> r = 0x7f8d78471750 >>>> ctx = 0x7f8d7864b9a8 >>>> len = 8096 >>>> blen = 1157663 >>>> data = 0x7f8d7e40b000 "/*! jQuery v2.1.4 | (c) 2005, 2015 jQuery >>>> Foundation, Inc. | jquery.org/license >>>> */\n!function(a,b){\"object\"==typeof module&&\"object\"==typeof >>>> module.exports?module.exports=a.document?b(a,!0):function(a)"... >>>> c = 0x55e62c7a7498 >>>> #13 0x000055e62c044b1d in filter_harness (f=0x7f8d7846bc28, bb=0x80) at >>>> mod_filter.c:323 >>>> ret = -512 >>>> cachecontrol = 0xfffffffffffffe00 <error: Cannot access memory >>>> at address 0xfffffffffffffe00> >>>> filter = 0xe >>>> #14 0x000055e62c002242 in default_handler (r=0x7f8d78471750) at core.c:4746 >>>> c = 0x7f8d7846b720 >>>> bb = 0x7f8d7864b8f8 >>>> e = 0xfffffffffffffe00 >>>> d = 0x7f8d7864dfa0 >>>> fd = 0x7f8d7864b7b8 >>>> bld_content_md5 = 2019866872 >>>> #15 0x000055e62c014130 in ap_run_handler (r=r@entry=0x7f8d78471750) at >>>> config.c:170 >>>> pHook = <optimized out> >>>> n = 11 >>>> rv = -1 >>>> #16 0x000055e62c014679 in ap_invoke_handler (r=0x7f8d78471750) at >>>> config.c:434 >>>> handler = <optimized out> >>>> p = <optimized out> >>>> result = <optimized out> >>>> old_handler = 0x0 >>>> ignore = <optimized out> >>>> #17 0x000055e62c04dcb2 in ap_process_async_request (r=0x7f8d78471750) at >>>> http_request.c:436 >>>> c = 0x7f8d7846b720 >>>> access_status = 0 >>>> #18 0x000055e62c04de50 in ap_process_request (r=0x7f8d78471750) at >>>> http_request.c:471 >>>> bb = 0x7f8d7846bd80 >>>> c = 0x7f8d7846b720 >>>> rv = -512 >>>> #19 0x000055e62c08ec32 in h2_task_process_request (c=<optimized out>, >>>> task=<optimized out>) at h2_task.c:666 >>>> req = 0x7f8d78471750 >>>> cs = 0x7f8d7846bd80 >>>> r = 0x7f8d78471750 >>>> #20 h2_task_process_conn (c=0x7f8d7846bc28) at h2_task.c:713 >>>> ctx = 0x7f8d7846bd80 >>>> #21 0x000055e62c01e130 in ap_run_process_connection (c=0x7f8d7846b720) >>>> at connection.c:42 >>>> pHook = <optimized out> >>>> n = 0 >>>> rv = -1 >>>> #22 0x000055e62c09012b in h2_task_do (task=0x7f8d7846f740, >>>> thread=0x55e62cb0ce70, worker_id=0) at h2_task.c:623 >>>> c = 0x7f8d7846b720 >>>> #23 0x000055e62c093619 in slot_run (thread=0x55e62cb0ce70, >>>> wctx=0x55e62c8b9268) at h2_workers.c:217 >>>> slot = 0x55e62c8b9268 >>>> #24 0x00007f8e80b620a4 in start_thread () from >>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>> No symbol table info available. >>>> #25 0x00007f8e8089762d in clone () from /lib/x86_64-linux-gnu/libc.so.6 >>>> No symbol table info available. >>>> >>>> Thread 3 (Thread 0x7f8da678c700 (LWP 28251)): >>>> #0 0x00007f8e80b687fc in __lll_lock_wait () from >>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>> No symbol table info available. >>>> #1 0x00007f8e80b644b9 in _L_lock_909 () from >>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>> No symbol table info available. >>>> #2 0x00007f8e80b642e0 in pthread_mutex_lock () from >>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>> No symbol table info available. >>>> #3 0x000055e62c0942a8 in enter_yellow (pbl=0x7f8da678a8d0, >>>> beam=0x7f8d7846bb08) at h2_bucket_beam.c:217 >>>> ctx = <optimized out> >>>> enter = <optimized out> >>>> #4 h2_beam_emitted (proxy=0x7f8cd435bd38, beam=0x7f8d7846bb08) at >>>> h2_bucket_beam.c:352 >>>> bl = {mutex = 0x7f8d7846bc20, leave = 0x55e62c094250 >>>> <mutex_leave>, leave_ctx = 0x7f8c9806b3ed} >>>> b = <optimized out> >>>> next = <optimized out> >>>> #5 beam_bucket_destroy (data=0x7f8cd435bd38) at h2_bucket_beam.c:106 >>>> data = 0x7f8cd435bd38 >>>> d = 0x7f8cd435bd38 >>>> #6 0x00007f8e80fb584c in apr_brigade_cleanup (data=0x7f8d1002d220) at >>>> buckets/apr_brigade.c:44 >>>> b = 0x7f8d1002d220 >>>> e = 0x7f8cd435bab8 >>>> #7 0x000055e62c094be4 in h2_beam_leave (beam=0x7f8d7846bb08) at >>>> h2_bucket_beam.c:701 >>>> bl = {mutex = 0x7f8d7846bc20, leave = 0x55e62c094250 >>>> <mutex_leave>, leave_ctx = 0x4} >>>> #8 0x000055e62c08a7e2 in h2_stream_rst (stream=0x7f8d1002cc00, >>>> error_code=8) at h2_stream.c:608 >>>> No locals. >>>> #9 0x000055e62c084870 in on_stream_close_cb (ngh2=<optimized out>, >>>> stream_id=<optimized out>, error_code=8, userp=0x7f8d10135620) at >>>> h2_session.c:266 >>>> #10 0x00007f8e81666c22 in ?? () from >>>> /usr/lib/x86_64-linux-gnu/libnghttp2.so.14 >>>> No symbol table info available. >>>> #11 0x00007f8e8166823d in ?? () from >>>> /usr/lib/x86_64-linux-gnu/libnghttp2.so.14 >>>> No symbol table info available. >>>> #12 0x00007f8e8166c7cb in nghttp2_session_mem_recv () from >>>> /usr/lib/x86_64-linux-gnu/libnghttp2.so.14 >>>> No symbol table info available. >>>> #13 0x000055e62c079695 in recv_RAW_DATA (c=<optimized out>, >>>> cin=0x7f8d10135cb0, block=APR_NONBLOCK_READ, b=0x7f8ca80e4a88) at >>>> h2_filter.c:59 >>>> status = 0 >>>> len = 13 >>>> data = 0x7f8c9806b3e0 "" >>>> session = 0x7f8d10135620 >>>> n = <optimized out> >>>> #14 recv_RAW_brigade (c=<optimized out>, cin=0x7f8d10135cb0, >>>> bb=0x7f8d10135e18, block=APR_NONBLOCK_READ) at h2_filter.c:100 >>>> status = 0 >>>> b = 0x7f8ca80e4a88 >>>> consumed = <optimized out> >>>> #15 0x000055e62c07a29c in h2_filter_core_input (f=0x7f8c9806d738, >>>> brigade=0x7f8d10135ce0, mode=AP_MODE_READBYTES, block=APR_NONBLOCK_READ, >>>> readbytes=0) at h2_filter.c:182 >>>> cin = 0x7f8d10135cb0 >>>> saved_timeout = -1 >>>> #16 0x000055e62c084b82 in session_read (block=<optimized out>, >>>> readlen=491520, session=0x7f8d10135620) at h2_session.c:1481 >>>> status = <optimized out> >>>> rstatus = 0 >>>> c = 0x7f8d7808e858 >>>> read_start = 1919 >>>> #17 h2_session_read (session=0x7f8d10135620, block=<optimized out>) at >>>> h2_session.c:1538 >>>> No locals. >>>> #18 0x000055e62c08857f in h2_session_process (session=0x7f8d10135620, >>>> async=128) at h2_session.c:2126 >>>> c = 0x7f8d7808e858 >>>> mpm_state = 1 >>>> trace = 0 >>>> #19 0x000055e62c0789ce in h2_conn_run (ctx=<optimized out>, >>>> c=0x7f8d7808e858) at h2_conn.c:203 >>>> status = <optimized out> >>>> mpm_state = 0 >>>> session = 0x7f8d10135620 >>>> #20 0x000055e62c07bb21 in h2_h2_process_conn (c=0x7f8d7808e858) at >>>> h2_h2.c:659 >>>> ctx = 0x7f8c9806d668 >>>> #21 0x000055e62c01e130 in ap_run_process_connection (c=0x7f8d7808e858) >>>> at connection.c:42 >>>> pHook = <optimized out> >>>> n = 2 >>>> rv = -1 >>>> #22 0x000055e62c0b8830 in process_socket (my_thread_num=<optimized out>, >>>> my_child_num=<optimized out>, cs=0x7f8d7808e7c8, sock=<optimized out>, >>>> p=<optimized out>, thd=<optimized out>) at event.c:1134 >>>> c = 0x55e62c0e3748 >>>> sbh = 0x7f8d7808e7b0 >>>> #23 worker_thread (thd=0x7f8d7846bc28, dummy=0x80) at event.c:2137 >>>> process_slot = 5 >>>> thread_slot = 121 >>>> csd = 0x7f8d7808e5c0 >>>> cs = 0x0 >>>> ptrans = 0x7f8d7808e538 >>>> is_idle = 2013849544 >>>> te = 0x0 >>>> #24 0x00007f8e80b620a4 in start_thread () from >>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>> No symbol table info available. >>>> #25 0x00007f8e8089762d in clone () from /lib/x86_64-linux-gnu/libc.so.6 >>>> No symbol table info available. >>>> >>>> Thread 2 (Thread 0x7f8d7ef3d700 (LWP 28405)): >>>> #0 0x00007f8e80897c03 in epoll_wait () from >>>> /lib/x86_64-linux-gnu/libc.so.6 >>>> No symbol table info available. >>>> #1 0x00007f8e80d99a93 in impl_pollset_poll (pollset=0x55e62c8459f0, >>>> timeout=<optimized out>, num=0x7f8d7ef3ce6c, descriptors=0x7f8d7ef3ce88) >>>> at poll/unix/epoll.c:266 >>>> ret = <optimized out> >>>> i = <optimized out> >>>> j = <optimized out> >>>> rv = 0 >>>> fdptr = <optimized out> >>>> #2 0x000055e62c0ba297 in listener_thread (thd=0x2c, dummy=0x0) at >>>> event.c:1738 >>>> workers_were_busy = 0 >>>> ep = 0x54d70667c9d88 >>>> te = 0x0 >>>> rc = 4 >>>> tpool = 0x55e62c328eb8 <ap_server_conf> >>>> csd = 0x7f8d78006ce0 >>>> ptrans = 0x7f8d78006c58 >>>> lr = 0x54d70667c9d83 >>>> have_idle_worker = 0 >>>> out_pfd = 0x7f8de0002918 >>>> num = 0 >>>> timeout_interval = 140247325214952 >>>> pt = 0x1 >>>> closed = 1 >>>> #3 0x00007f8e80b620a4 in start_thread () from >>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>> No symbol table info available. >>>> #4 0x00007f8e8089762d in clone () from /lib/x86_64-linux-gnu/libc.so.6 >>>> No symbol table info available. >>>> >>>> Thread 1 (Thread 0x7f8e8215a780 (LWP 27400)): >>>> #0 0x00007f8e80b634db in pthread_join () from >>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>> No symbol table info available. >>>> #1 0x00007f8e80d9e4ab in apr_thread_join (retval=0x7ffda735e794, >>>> thd=0x7f8de00dbb48) at threadproc/unix/thread.c:217 >>>> stat = <optimized out> >>>> thread_stat = 0x1 >>>> #2 0x000055e62bfedeb3 in join_workers (listener=0x7f8de00dbb48, >>>> threads=0x55e62cb2f960) at event.c:2376 >>>> thread_rv = 21990 >>>> #3 0x000055e62bfee25a in child_main (child_num_arg=5, >>>> child_bucket=749926752) at event.c:2574 >>>> threads = 0x55e62cb2f960 >>>> ts = 0x55e62c844430 >>>> thread_attr = 0x55e62c844450 >>>> start_thread_id = 0x55e62c8444b0 >>>> #4 0x000055e62c0b8fe0 in make_child (s=0x7f8d7ef3d9d0, slot=5, >>>> bucket=0) at event.c:2646 >>>> No locals. >>>> #5 0x000055e62c0b950c in server_main_loop (num_buckets=<optimized out>, >>>> remaining_children_to_start=<optimized out>) at event.c:2916 >>>> ps = 0xfffffffffffffe00 >>>> status = 0 >>>> pid = {pid = 21894, in = 0x7f8e80d89d76 <find_entry+134>, out = >>>> 0x0, err = 0xc} >>>> child_slot = 5 >>>> exitwhy = APR_PROC_EXIT >>>> processed_status = 0 >>>> #6 event_run (_pconf=0x7f8d7ef3d9d0, plog=0x5, s=0xc8) at event.c:3035 >>>> remaining_children_to_start = 0 >>>> #7 0x000055e62bff642e in ap_run_mpm (pconf=0x55e62c74f138, >>>> plog=0x55e62c790bb8, s=0x55e62c7805a8) at mpm_common.c:94 >>>> pHook = <optimized out> >>>> n = 0 >>>> rv = -1 >>>> #8 0x000055e62bfeed15 in main (argc=2, argv=0x7ffda735eb98) at main.c:783 >>>> c = 68 'D' >>>> error = 0xfffffffffffffe00 <error: Cannot access memory at >>>> address 0xfffffffffffffe00> >>>> process = 0x55e62c74d218 >>>> pconf = 0x55e62c74f138 >>>> plog = 0x55e62c790bb8 >>>> ptemp = 0x55e62c7a8d98 >>>> pcommands = 0x55e62c771248 >>>> opt = 0x55e62c771338 >>>> mod = 0x55e62c3235d0 <ap_prelinked_modules+336> >>>> opt_arg = 0x7ffda735eef1 "FOREGROUND" >>>> signal_server = 0xfffffffffffffe00 >>>> >>>> Greets, >>>> Stefan >>>> >>>> >>>> Am 18.04.2017 um 14:47 schrieb Eric Covener: >>>>> On Tue, Apr 18, 2017 at 8:43 AM, Stefan Priebe - Profihost AG >>>>> <s.pri...@profihost.ag> wrote: >>>>>> bt of such a process shows: >>>>>> (gdb) bt >>>>>> #0 0x00007f5df74f64db in pthread_join () from >>>>>> /lib/x86_64-linux-gnu/libpthread.so.0 >>>>> >>>>> Need to see the other threads in the process, this one is just waiting >>>>> for the others to complete. >>>>> >>>>> "thread apply all bt full" >>>>> >>>>> >>>