Dear openvswitch developers,
Now, we using ovn for openstack, and setup a bond port with balance-tcp mode 
for geneve. When we keep a longrun after 7 days, the ovs-vswitchd is locked. 
The following is openvswitch version and threads' backtraceļ¼š

version
---------
$ ovs-vsctl list open

_uuid               : 6a13ea5a-8d73-43d8-9310-8de6e9f13549
bridges             : [074eb20c-771c-40a1-8834-8a0d07b6856c, 
07d6f063-0d29-4483-b583-3b097446a6f1, 0b9f66c5-a8a4-4c9d-8b96-1a611127641c, 
17af48a0-b7a2-4b57-8e29-ce7e5ee72561, 37ff52bc-7c8e-4573-b4a9-a76c98fe52ad, 
49642ab1-1541-4df6-acd8-0811bf1296f7, 63720517-d2d5-4ef7-b697-c88b0a4a9cf3, 
726ea6b1-495e-46a3-97ee-e05416a0eb6e, 83b39602-5166-4d84-b568-7ca32ea9e413, 
905e43b6-fc09-45af-a971-46677efab160, bad4b831-e3d7-4cf5-b745-b6f21f750cb7, 
d3fbdc77-5522-465d-94a5-7932dc907784, dcf51e64-e8aa-4baa-9076-dad5d6380ad1, 
e2e9d5e6-874f-4f31-8a66-a4c97af88013, f6092afa-dd50-463b-abc3-1f99d64a6ef1, 
fd28b135-cb1a-4bad-b7ed-29d737032f2f]
cur_cfg             : 2350
datapath_types      : [netdev, system]
datapaths           : {netdev=07d89e11-507d-4a13-96c6-aa3d706a3b69, 
system=e3b59398-737e-441c-bf4e-92b523c5a190}
db_version          : "8.3.0"
dpdk_initialized    : true
dpdk_version        : "DPDK 20.11.3"
external_ids        : {hostname=node-4.domain.tld, 
ovn-bridge-mappings="physnet1:br-ex,physnet2:br-prv,physnet20:br-vm-roller,physnet21:br-vm-mgmt,physnet22:br-vm-storpub",
 ovn-clear-flows="false", ovn-clear-patch="true", 
ovn-cms-options="enable-chassis-as-gw,availability-zones=default-az", 
ovn-encap-ip="33.168.20.5", ovn-encap-type=geneve, 
ovn-remote="tcp:ovn-ovsdb-sb-relay.openstack.svc.cluster.local:6642", 
ovn-remote-probe-interval="60000", rundir="/var/run/openvswitch", 
system-id="2887acd2-36d4-4f24-83cc-43de0c399d6c"}
iface_types         : [bareudp, dpdk, dpdkvhostuser, dpdkvhostuserclient, 
erspan, geneve, gre, gtpu, internal, ip6erspan, ip6gre, lisp, patch, stt, 
system, tap, vxlan]
manager_options     : [8531b436-797f-4a96-98f8-e1016a1a50b5]
next_cfg            : 2350
other_config        : {dpdk-extra="--iova-mode=pa", dpdk-init=try, 
dpdk-socket-limit="0,0,0,0,0,0,0,0", 
dpdk-socket-mem="8192,8192,8192,8192,8192,8192,8192,8192", 
emc-insert-inv-prob="10", enable-statistics="false", pmd-auto-lb="true", 
pmd-auto-lb-improvement-threshold="50", pmd-auto-lb-load-threshold="80", 
pmd-auto-lb-rebal-interval="1", pmd-cpu-mask="0xF0F0F0F0F0F0F0F0", 
pmd-perf-metrics="false", smc-enable="true", tx-flush-interval="50", 
vlan-limit="0"}
ovs_version         : "2.16.2.20220801"
ssl                 : []
statistics          : {}
system_type         : escore
system_version      : "8.4"


ovs-vswitchd
-------------
$ echo y | gdb -p 41906 -ex 'thread 1' -ex bt -ex quit

Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
0x00007f6ce5e472f5 in pthread_rwlock_wrlock () from /usr/lib64/libpthread.so.0
[Switching to thread 1 (Thread 0x7f6ce69ecc40 (LWP 41906))]
#0  0x00007f6ce5e472f5 in pthread_rwlock_wrlock () from 
/usr/lib64/libpthread.so.0
#0  0x00007f6ce5e472f5 in pthread_rwlock_wrlock () from 
/usr/lib64/libpthread.so.0
#1  0x00007f6ce61bebac in ovs_rwlock_wrlock_at () from 
/usr/lib64/libopenvswitch-2.16.so.0
#2  0x00007f6ce657cc22 in bond_update_post_recirc_rules () from 
/usr/lib64/libofproto-2.16.so.0
#3  0x00007f6ce65bc423 in output_normal () from /usr/lib64/libofproto-2.16.so.0
#4  0x00007f6ce65bd208 in xlate_output_action () from 
/usr/lib64/libofproto-2.16.so.0
#5  0x00007f6ce65bdc58 in do_xlate_actions () from 
/usr/lib64/libofproto-2.16.so.0
#6  0x00007f6ce65c1c21 in clone_xlate_actions () from 
/usr/lib64/libofproto-2.16.so.0
#7  0x00007f6ce65b7d2e in xlate_table_action () from 
/usr/lib64/libofproto-2.16.so.0
#8  0x00007f6ce65c1441 in patch_port_output.isra () from 
/usr/lib64/libofproto-2.16.so.0
#9  0x00007f6ce65ba773 in compose_output_action..constprop () from 
/usr/lib64/libofproto-2.16.so.0
#10 0x00007f6ce65bc31d in output_normal () from /usr/lib64/libofproto-2.16.so.0
#11 0x00007f6ce65bd208 in xlate_output_action () from 
/usr/lib64/libofproto-2.16.so.0
#12 0x00007f6ce65bdc58 in do_xlate_actions () from 
/usr/lib64/libofproto-2.16.so.0
#13 0x00007f6ce65c1c21 in clone_xlate_actions () from 
/usr/lib64/libofproto-2.16.so.0
#14 0x00007f6ce65b7d2e in xlate_table_action () from 
/usr/lib64/libofproto-2.16.so.0
#15 0x00007f6ce65c1441 in patch_port_output.isra () from 
/usr/lib64/libofproto-2.16.so.0
#16 0x00007f6ce65bb74e in compose_output_action..constprop () from 
/usr/lib64/libofproto-2.16.so.0
#17 0x00007f6ce65bcfbc in xlate_output_action () from 
/usr/lib64/libofproto-2.16.so.0
#18 0x00007f6ce65bfe08 in do_xlate_actions () from 
/usr/lib64/libofproto-2.16.so.0
#19 0x00007f6ce65b7d2e in xlate_table_action () from 
/usr/lib64/libofproto-2.16.so.0
#20 0x00007f6ce65bedac in do_xlate_actions () from 
/usr/lib64/libofproto-2.16.so.0
#21 0x00007f6ce65c3c03 in xlate_actions () from /usr/lib64/libofproto-2.16.so.0
#22 0x00007f6ce659bb71 in packet_xlate () from /usr/lib64/libofproto-2.16.so.0
#23 0x00007f6ce658e7c9 in handle_packet_out () from 
/usr/lib64/libofproto-2.16.so.0
#24 0x00007f6ce6593004 in handle_openflow () from 
/usr/lib64/libofproto-2.16.so.0
#25 0x00007f6ce6581a44 in connmgr_run () from /usr/lib64/libofproto-2.16.so.0
#26 0x00007f6ce658b5a8 in ofproto_run () from /usr/lib64/libofproto-2.16.so.0
#27 0x000055a8f50ad81c in bridge_run__ ()
#28 0x000055a8f50afa55 in bridge_reconfigure ()
#29 0x000055a8f50b3da2 in bridge_run ()
#30 0x000055a8f50aa235 in main ()
A debugging session is active.


revalidator
-----------
$ echo y | gdb -p 41906 -ex 'thread 128' -ex bt -ex quit


New LWP 46936]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
0x00007f6ce5e472f5 in pthread_rwlock_wrlock () from /usr/lib64/libpthread.so.0
[Switching to thread 128 (Thread 0x7f696e3f1700 (LWP 46411))]
#0  0x00007f6ce5e4b65d in __lll_lock_wait () from /usr/lib64/libpthread.so.0
#0  0x00007f6ce5e4b65d in __lll_lock_wait () from /usr/lib64/libpthread.so.0
#1  0x00007f6ce5e44979 in pthread_mutex_lock () from /usr/lib64/libpthread.so.0
#2  0x00007f6ce61beaac in ovs_mutex_lock_at () from 
/usr/lib64/libopenvswitch-2.16.so.0
#3  0x00007f6ce65920b6 in handle_flow_mod__ () from 
/usr/lib64/libofproto-2.16.so.0
#4  0x00007f6ce65a3d7d in ofproto_dpif_add_internal_flow () from 
/usr/lib64/libofproto-2.16.so.0
#5  0x00007f6ce657c426 in update_recirc_rules__ () from 
/usr/lib64/libofproto-2.16.so.0
#6  0x00007f6ce657cc5f in bond_update_post_recirc_rules () from 
/usr/lib64/libofproto-2.16.so.0
#7  0x00007f6ce65bc423 in output_normal () from /usr/lib64/libofproto-2.16.so.0
#8  0x00007f6ce65bd208 in xlate_output_action () from 
/usr/lib64/libofproto-2.16.so.0
#9  0x00007f6ce65bdc58 in do_xlate_actions () from 
/usr/lib64/libofproto-2.16.so.0
#10 0x00007f6ce65c1c21 in clone_xlate_actions () from 
/usr/lib64/libofproto-2.16.so.0
#11 0x00007f6ce65b7d2e in xlate_table_action () from 
/usr/lib64/libofproto-2.16.so.0
#12 0x00007f6ce65c1441 in patch_port_output.isra () from 
/usr/lib64/libofproto-2.16.so.0
#13 0x00007f6ce65ba773 in compose_output_action..constprop () from 
/usr/lib64/libofproto-2.16.so.0
#14 0x00007f6ce65bc31d in output_normal () from /usr/lib64/libofproto-2.16.so.0
#15 0x00007f6ce65bd208 in xlate_output_action () from 
/usr/lib64/libofproto-2.16.so.0
#16 0x00007f6ce65bdc58 in do_xlate_actions () from 
/usr/lib64/libofproto-2.16.so.0
#17 0x00007f6ce65c3c03 in xlate_actions () from /usr/lib64/libofproto-2.16.so.0
#18 0x00007f6ce65b0135 in xlate_key.isra () from /usr/lib64/libofproto-2.16.so.0
#19 0x00007f6ce65b044c in revalidate_ukey__ () from 
/usr/lib64/libofproto-2.16.so.0
#20 0x00007f6ce65b0785 in revalidate_ukey () from 
/usr/lib64/libofproto-2.16.so.0
#21 0x00007f6ce65b3810 in revalidate.isra () from 
/usr/lib64/libofproto-2.16.so.0
#22 0x00007f6ce65b48d1 in udpif_revalidator () from 
/usr/lib64/libofproto-2.16.so.0
#23 0x00007f6ce61bfa03 in ovsthread_wrapper () from 
/usr/lib64/libopenvswitch-2.16.so.0
#24 0x00007f6ce5e4214a in start_thread () from /usr/lib64/libpthread.so.0
#25 0x00007f6ce5b71dc3 in clone () from /usr/lib64/libc.so.6
A debugging session is active.


Anyone has an idea how to fix it? 

Best Regards,
Daniel Ding


_______________________________________________
discuss mailing list
disc...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-discuss

Reply via email to