Hi All,

We have hit a VPP Worker Thread Deadlock issue. And from the call stacks it
looks like the main thread is waiting for workers to come back to their
main loop( ie has taken the barrier lock) and one of the two workers is on
spin lock to make an rpc to the main thread.
I believe this lock is held by the main thread.

We are using *19.08 version* and complete bt is pasted below. Also, Can
someone explain the purpose of* vl_api_clnt_node*.

414 <http://rajith/lxr/http/source/src/vlibmemory/vlib_api.c?sn=vpp-19-08#L414>
*/* *INDENT-OFF* */*415
<http://rajith/lxr/http/source/src/vlibmemory/vlib_api.c?sn=vpp-19-08#L415>
VLIB_REGISTER_NODE
<http://rajith/lxr/http/ident?sn=vpp-19-08;i=VLIB_REGISTER_NODE>
(vl_api_clnt_node
<http://rajith/lxr/http/ident?sn=vpp-19-08;i=vl_api_clnt_node>) =416
<http://rajith/lxr/http/source/src/vlibmemory/vlib_api.c?sn=vpp-19-08#L416>
{417 <http://rajith/lxr/http/source/src/vlibmemory/vlib_api.c?sn=vpp-19-08#L417>
  .function <http://rajith/lxr/http/ident?sn=vpp-19-08;i=function> =
vl_api_clnt_process
<http://rajith/lxr/http/ident?sn=vpp-19-08;i=vl_api_clnt_process>,418
<http://rajith/lxr/http/source/src/vlibmemory/vlib_api.c?sn=vpp-19-08#L418>
  .type <http://rajith/lxr/http/ident?sn=vpp-19-08;i=type> =
VLIB_NODE_TYPE_PROCESS,419
<http://rajith/lxr/http/source/src/vlibmemory/vlib_api.c?sn=vpp-19-08#L419>
  .name <http://rajith/lxr/http/ident?sn=vpp-19-08;i=name> =
*"api-rx-from-ring"*,420
<http://rajith/lxr/http/source/src/vlibmemory/vlib_api.c?sn=vpp-19-08#L420>
  .state <http://rajith/lxr/http/ident?sn=vpp-19-08;i=state> =
VLIB_NODE_STATE_DISABLED,421
<http://rajith/lxr/http/source/src/vlibmemory/vlib_api.c?sn=vpp-19-08#L421>
};422 
<http://rajith/lxr/http/source/src/vlibmemory/vlib_api.c?sn=vpp-19-08#L422>
*/* *INDENT-ON* */*

*Complete Backtrace:*

Thread 3 (Thread 0x7ffa511c9700 (LWP 448)):
---Type <return> to continue, or q <return> to quit---
#0  0x00007ffa9f6bc276 in vlib_worker_thread_barrier_check () at
/development/libvpp/src/vlib/threads.h:430
#1  0x00007ffa9f6c3f19 in vlib_main_or_worker_loop (vm=0x7ffa8797adc0,
is_main=0) at /development/libvpp/src/vlib/main.c:1757
#2  0x00007ffa9f6c4fbd in vlib_worker_loop (vm=0x7ffa8797adc0) at
/development/libvpp/src/vlib/main.c:1988
#3  0x00007ffa9f703ff1 in vlib_worker_thread_fn (arg=0x7ffa6ccc8640)
at /development/libvpp/src/vlib/threads.c:1803
#4  0x00007ffa9f383560 in clib_calljmp () from
/usr/local/lib/libvppinfra.so.1.0.1
#5  0x00007ffa511c8ec0 in ?? ()
#6  0x00007ffa9f6fe588 in vlib_worker_thread_bootstrap_fn
(arg=0x7ffa6ccc8640) at /development/libvpp/src/vlib/threads.c:573
Backtrace stopped: previous frame inner to this frame (corrupt stack?)

Thread 2 (Thread 0x7ffa519ca700 (LWP 447)):
#0  0x00007ffaaae87ef7 in sched_yield () at
../sysdeps/unix/syscall-template.S:78
#1  0x00007ffa9f40fb49 in spin_acquire_lock (sl=0x130046384) at
/development/libvpp/src/vppinfra/dlmalloc.c:466
#2  0x00007ffa9f4173a4 in mspace_malloc (msp=0x130046010, bytes=66) at
/development/libvpp/src/vppinfra/dlmalloc.c:4347
#3  0x00007ffa9f4170de in mspace_get_aligned (msp=0x130046010,
n_user_data_bytes=66, align=16, align_offset=8)
    at /development/libvpp/src/vppinfra/dlmalloc.c:4233
#4  0x00007ffa9f4036fc in clib_mem_alloc_aligned_at_offset (size=46,
align=8, align_offset=8, os_out_of_memory_on_failure=1)
    at /development/libvpp/src/vppinfra/mem.h:139
#5  0x00007ffa9f403947 in vec_resize_allocate_memory (v=0x0,
length_increment=38, data_bytes=46, header_bytes=8, data_align=8)
    at /development/libvpp/src/vppinfra/vec.c:59
#6  0x00007ffa9f370aa5 in _vec_resize_inline (v=0x0,
length_increment=38, data_bytes=38, header_bytes=0, data_align=1)
    at /development/libvpp/src/vppinfra/vec.h:147
#7  0x00007ffa9f371a93 in do_percent (_s=0x7ffa6f6e4b58,
fmt=0x7ffa9f4244e8 "%s:%d (%s) assertion `%s' fails",
va=0x7ffa6f6e4c08)
    at /development/libvpp/src/vppinfra/format.c:341
#8  0x00007ffa9f371edb in va_format (s=0x0, fmt=0x7ffa9f4244e8 "%s:%d
(%s) assertion `%s' fails", va=0x7ffa6f6e4c08)
    at /development/libvpp/src/vppinfra/format.c:404
#9  0x00007ffa9f3629d0 in _clib_error (how_to_die=2,
function_name=0x0, line_number=0, fmt=0x7ffa9f4244e8 "%s:%d (%s)
assertion `%s' fails")
    at /development/libvpp/src/vppinfra/error.c:127
#10 0x00007ffa9f370a3c in _vec_resize_inline (v=0x7ffa6dd00470,
length_increment=16, data_bytes=16, header_bytes=0, data_align=1)
    at /development/libvpp/src/vppinfra/vec.h:136
#11 0x00007ffa9f371ea2 in va_format (s=0x7ffa6dd00470 "received signal
SIGABRT, PC 0x7ffaaadc2fb7",
    fmt=0x7ffa9f74923b "received signal %U, PC %U", va=0x7ffa6f6e4e08)
at /development/libvpp/src/vppinfra/format.c:403
#12 0x00007ffa9f372048 in format (s=0x7ffa6dd00470 "received signal
SIGABRT, PC 0x7ffaaadc2fb7",
    fmt=0x7ffa9f74923b "received signal %U, PC %U") at
/development/libvpp/src/vppinfra/format.c:428
#13 0x00007ffa9f72adf5 in unix_signal_handler (signum=7,
si=0x7ffa6f6e50f0, uc=0x7ffa6f6e4fc0)
    at /development/libvpp/src/vlib/unix/main.c:127
#14 <signal handler called>
#15 0x00007ffa9f417c03 in mspace_malloc (msp=0x130046010, bytes=77) at
/development/libvpp/src/vppinfra/dlmalloc.c:4437
---Type <return> to continue, or q <return> to quit---
#16 0x00007ffa9f416f6f in mspace_get_aligned (msp=0x130046010,
n_user_data_bytes=77, align=1, align_offset=0)
    at /development/libvpp/src/vppinfra/dlmalloc.c:4186
#17 0x00007ffaa0c7d04f in clib_mem_alloc_aligned_at_offset (size=73,
align=1, align_offset=0, os_out_of_memory_on_failure=1)
    at /development/libvpp/src/vppinfra/mem.h:139
#18 0x00007ffaa0c7d0a2 in clib_mem_alloc (size=73) at
/development/libvpp/src/vppinfra/mem.h:155
#19 0x00007ffaa0c7da0a in vl_msg_api_alloc_internal (nbytes=73,
pool=0, may_return_null=0)
    at /development/libvpp/src/vlibmemory/memory_shared.c:177
#20 0x00007ffaa0c7db6f in vl_msg_api_alloc_as_if_client (nbytes=57) at
/development/libvpp/src/vlibmemory/memory_shared.c:236
#21 0x00007ffaa0c994fd in vl_api_rpc_call_main_thread_inline
(fp=0x7ffa9ffee83c <ip6_neighbor_set_unset_rpc_callback>,
    data=0x7ffa6f6e58c0 "\001", data_length=32, force_rpc=0 '\000') at
/development/libvpp/src/vlibmemory/vlib_api.c:578
#22 0x00007ffaa0c99673 in vl_api_rpc_call_main_thread
(fp=0x7ffa9ffee83c <ip6_neighbor_set_unset_rpc_callback>,
data=0x7ffa6f6e58c0 "\001",
    data_length=32) at /development/libvpp/src/vlibmemory/vlib_api.c:602
#23 0x00007ffa9ffebb32 in set_unset_ip6_neighbor_rpc
(vm=0x7ffa8797a640, sw_if_index=53, a=0x7ffa6f6e5c18,
mac=0x7ffa6f6e5c34, is_add=1,
    flags=IP_NEIGHBOR_FLAG_NONE) at
/development/libvpp/src/vnet/ip/ip6_neighbor.c:441
#24 0x00007ffa9ffed406 in vnet_set_ip6_ethernet_neighbor
(vm=0x7ffa8797a640, sw_if_index=53, a=0x7ffa6f6e5c18,
mac=0x7ffa6f6e5c34,
    flags=IP_NEIGHBOR_FLAG_NONE) at
/development/libvpp/src/vnet/ip/ip6_neighbor.c:809
#25 0x00007ffa9fff2226 in icmp6_router_advertisement
(vm=0x7ffa8797a640, node=0x7ffa890f65c0, frame=0x7ffa88d14380)
    at /development/libvpp/src/vnet/ip/ip6_neighbor.c:2298
#26 0x00007ffa9f6c2011 in dispatch_node (vm=0x7ffa8797a640,
node=0x7ffa890f65c0, type=VLIB_NODE_TYPE_INTERNAL,
    dispatch_state=VLIB_NODE_STATE_POLLING, frame=0x7ffa88d14380,
last_time_stamp=203681032322150)
    at /development/libvpp/src/vlib/main.c:1213
#27 0x00007ffa9f6c27d2 in dispatch_pending_node (vm=0x7ffa8797a640,
pending_frame_index=27, last_time_stamp=203681032322150)
    at /development/libvpp/src/vlib/main.c:1381
#28 0x00007ffa9f6c446c in vlib_main_or_worker_loop (vm=0x7ffa8797a640,
is_main=0) at /development/libvpp/src/vlib/main.c:1839
#29 0x00007ffa9f6c4fbd in vlib_worker_loop (vm=0x7ffa8797a640) at
/development/libvpp/src/vlib/main.c:1988
#30 0x00007ffa9f703ff1 in vlib_worker_thread_fn (arg=0x7ffa6ccc8540)
at /development/libvpp/src/vlib/threads.c:1803
#31 0x00007ffa9f383560 in clib_calljmp () from
/usr/local/lib/libvppinfra.so.1.0.1

#32 0x00007ffa519c9ec0 in ?? ()
#33 0x00007ffa9f6fe588 in vlib_worker_thread_bootstrap_fn
(arg=0x7ffa6ccc8540) at /development/libvpp/src/vlib/threads.c:573
Backtrace stopped: previous frame inner to this frame (corrupt stack?)

Thread 1 (Thread 0x7ffaabd62d00 (LWP 257)):
#0  0x00007ffaaae68492 in __GI___waitpid (pid=17980,
stat_loc=stat_loc@entry=0x7ffa6d60fe18, options=options@entry=0)
    at ../sysdeps/unix/sysv/linux/waitpid.c:30
#1  0x00007ffaaadd3177 in do_system (line=<optimized out>) at
../sysdeps/posix/system.c:149
#2  0x00007ffaab8f5457 in bd_signal_handler_cb (signo=6) at
/development/librtbrickinfra/bd/src/bd.c:770
#3  0x00007ffa9f72abf7 in rtb_bd_signal_handler (signo=6) at
/development/libvpp/src/vlib/unix/main.c:80
---Type <return> to continue, or q <return> to quit---
#4  0x00007ffa9f72af92 in unix_signal_handler (signum=6,
si=0x7ffa6d6106f0, uc=0x7ffa6d6105c0)
    at /development/libvpp/src/vlib/unix/main.c:180
#5  <signal handler called>
#6  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#7  0x00007ffaaadc4921 in __GI_abort () at abort.c:79
#8  0x00007ffaa0ed8086 in os_panic () at
/development/libvpp/src/vpp/vnet/main.c:559
#9  0x00007ffa9f703342 in vlib_worker_thread_barrier_sync_int
(vm=0x7ffa9f964e80 <vlib_global_main>,
    func_name=0x7ffaa105c2e0 <__FUNCTION__.40056>
"vl_msg_api_barrier_sync") at
/development/libvpp/src/vlib/threads.c:1489
#10 0x00007ffaa0ed80ed in vl_msg_api_barrier_sync () at
/development/libvpp/src/vpp/vnet/main.c:599
#11 0x00007ffaa0c7bd52 in vl_mem_api_handle_rpc (vm=0x7ffa9f964e80
<vlib_global_main>, node=0x7ffa6d608000)
    at /development/libvpp/src/vlibmemory/memory_api.c:757
#12 0x00007ffaa0c981f5 in vl_api_clnt_process (vm=0x7ffa9f964e80
<vlib_global_main>, node=0x7ffa6d608000, f=0x0)
    at /development/libvpp/src/vlibmemory/vlib_api.c:326
#13 0x00007ffa9f6c2a57 in vlib_process_bootstrap (_a=140713942780864)
at /development/libvpp/src/vlib/main.c:1476
#14 0x00007ffa9f383560 in clib_calljmp () from
/usr/local/lib/libvppinfra.so.1.0.1
#15 0x00007ffa84930b90 in ?? ()
#16 0x00007ffa9f6c2b5c in vlib_process_startup (vm=0xffffffffffffffff,
p=0xdf00000000, f=0x7ffa6d608000)
    at /development/libvpp/src/vlib/main.c:1498
#17 0x0000000000000000 in ?? ()
(gdb)


Thanks,

Rajith
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#18245): https://lists.fd.io/g/vpp-dev/message/18245
Mute This Topic: https://lists.fd.io/mt/78681829/21656
Group Owner: vpp-dev+ow...@lists.fd.io
Unsubscribe: https://lists.fd.io/g/vpp-dev/unsub [arch...@mail-archive.com]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to