Hi OVS experts,

Our ovs-vswitchd runs to core at the ovs_mutex_trylock(&ukey->mutex) in the
function revalidator_sweep__.

I've sent the mail before but have no response.
https://mail.openvswitch.org/pipermail/ovs-discuss/2023-August/052604.html

So I'm trying to send this mail again. And I may apologize in advance because
I would like to post as much useful information as possible to help identify
potential issues. So this mail will have a really long text.

Compared to the mail 2023-August/052604.html, we upgrade the OVS to 2.17.8
and DPDK to 22.11 to pray for good luck that maybe the community has potential
fixes for this issue. But unfortunately, the ovs-vswitchd still runs to core.

Here are some local debug information:

(gdb) bt
#0  0x00007f8751bbf337 in raise () from /lib64/libc.so.6
#1  0x00007f8751bc0a28 in abort () from /lib64/libc.so.6
#2  0x000055c52ed06c7e in ovs_abort_valist (err_no=<optimized out>,
format=<optimized out>, args=args@entry=0x7f8744249370) at
lib/util.c:499
#3  0x000055c52ed06d14 in ovs_abort (err_no=err_no@entry=0,
format=format@entry=0x55c52f01b1e8 "%s: %s() passed uninitialized
ovs_mutex") at lib/util.c:491
#4  0x000055c52ecd17e1 in ovs_mutex_trylock_at
(l_=l_@entry=0x7f8718dcc098, where=where@entry=0x55c52eff5c60
"ofproto/ofproto-dpif-upcall.c:3044") at lib/ovs-thread.c:106
#5  0x000055c52ebf25f9 in revalidator_sweep__
(revalidator=revalidator@entry=0x55c533082c70,
purge=purge@entry=false) at ofproto/ofproto-dpif-upcall.c:3044
#6  0x000055c52ebf640f in revalidator_sweep
(revalidator=0x55c533082c70) at ofproto/ofproto-dpif-upcall.c:3102
#7  udpif_revalidator (arg=0x55c533082c70) at ofproto/ofproto-dpif-upcall.c:1101
#8  0x000055c52ecd239f in ovsthread_wrapper (aux_=<optimized out>) at
lib/ovs-thread.c:422
#9  0x00007f8753d16e65 in start_thread () from /lib64/libpthread.so.0
#10 0x00007f8751c8788d in clone () from /lib64/libc.so.6

bt output with pretty print
(gdb) bt full
#0  0x00007f8751bbf337 in raise () from /lib64/libc.so.6
No symbol table info available.
#1  0x00007f8751bc0a28 in abort () from /lib64/libc.so.6
No symbol table info available.
#2  0x000055c52ed06c7e in ovs_abort_valist (err_no=<optimized out>,
format=<optimized out>, args=args@entry=0x7f8744249370) at
lib/util.c:499
No locals.
#3  0x000055c52ed06d14 in ovs_abort (err_no=err_no@entry=0,
format=format@entry=0x55c52f01b1e8 "%s: %s() passed uninitialized
ovs_mutex") at lib/util.c:491
        args = {{
            gp_offset = 32,
            fp_offset = 48,
            overflow_arg_area = 0x7f8744249450,
            reg_save_area = 0x7f8744249390
          }}
#4  0x000055c52ecd17e1 in ovs_mutex_trylock_at
(l_=l_@entry=0x7f8718dcc098, where=where@entry=0x55c52eff5c60
"ofproto/ofproto-dpif-upcall.c:3044") at lib/ovs-thread.c:106
        l = 0x7f8718dcc098
        error = <optimized out>
        __func__ = "ovs_mutex_trylock_at"
#5  0x000055c52ebf25f9 in revalidator_sweep__
(revalidator=revalidator@entry=0x55c533082c70,
purge=purge@entry=false) at ofproto/ofproto-dpif-upcall.c:3044
        ukey_state = <optimized out>
        cursor_52 = {
          impl = 0x7f86f826c8c0,
          bucket_idx = 2,
          entry_idx = 3,
          node = 0x7f86f8dc8020
        }
        odp_actions_stub = {140218217949376, 9951266880679575560,
55834640392, 10323741882, 0, 2, 140218217949592, 168, 140218217949416,
12885426177, 140218940560728, 281510948569217, 1125934266581005,
281509421383809,
          1099511627785, 0, 140218226622112, 4, 819, 10323556447, 0,
2, 140218226622328, 148, 140218226622152, 12885753857,
140218940560856, 2984229694, 18446744069414584320,
18446462603027808255, 4294967295, 1407392063422464,
          140218257906000, 3, 248, 10322875848, 0, 2, 140218257906216,
168, 140218257906040, 12884901889, 140218940560984, 0, 0, 0, 0, 0,
140218228630736, 5, 1088, 10292016731, 24, 2, 140218228630952, 168,
140218228630776,
          12884901889, 140218940561112, 0, 0, 0, 0, 0,
140218255617104, 2, 120, 10124105851, 0, 2, 140218255617320, 148,
140218255617144, 12884901889, 140218940561240, 0, 0, 0, 0, 0,
140218243823504, 0, 0, 0, 0, 2,
          140218243823720, 140, 140218243823544, 12884901889,
140218940561368, 0 <repeats 37 times>}
        odp_actions = {
          base = 0x7f8744249550,
          data = 0x7f8744249550,
          size = 0,
          allocated = 1024,
          header = 0x0,
          msg = 0x0,
          list_node = {
            prev = 0xcccccccccccccccc,
            next = 0xcccccccccccccccc
          },
          source = OFPBUF_STUB
        }
        ukey = 0x7f8718dcc050
        n_ops = 0
        ops = {
            ... a really long list ....
           {
            ukey = 0x0,
            stats = {

              n_packets = 0,
              n_bytes = 0,
              used = 0,
              tcp_flags = 0
            },
            dop = {
              type = 0,
              error = 0,
              {
                flow_put = {
                  flags = (DPIF_FP_CREATE | unknown: 905450480),
                  key = 0x50178a28944953ad,
                  key_len = 12884901889,
                  mask = 0x0,
                  mask_len = 0,
                  actions = 0x0,
                  actions_len = 0,
                  ufid = 0x1,
                  pmd_id = 788417076,
                  stats = 0x55c52ecd15f8 <ovs_mutex_lock_at+24>
                },
                flow_del = {
                  key = 0x2694413835f813f1,
                  key_len = 5771233354389738413,
                  ufid = 0x300000001,
                  terse = false,
                  pmd_id = 0,
                  stats = 0x0
                },
                execute = {
                  actions = 0x2694413835f813f1,
                  actions_len = 5771233354389738413,
                  needs_help = true,
                  probe = false,
                  mtu = 3,
                  hash = 0,
                  flow = 0x0,
                  packet = 0x0
                },
                flow_get = {
                  key = 0x2694413835f813f1,
                  key_len = 5771233354389738413,
                  ufid = 0x300000001,
                  pmd_id = 0,
                  buffer = 0x0,
                  flow = 0x0
                }
              }
            }
          }
            ... a really long list ....
}
        umap = 0x55c53301f998
        cur = <optimized out>
        i = 39
        udpif = 0x55c53301ee20
        dump_seq = 3090869337
        reval_seq = 3090869356
        slice = <optimized out>
        __func__ = "revalidator_sweep__"
#6  0x000055c52ebf640f in revalidator_sweep
(revalidator=0x55c533082c70) at ofproto/ofproto-dpif-upcall.c:3102

No locals.
#7  udpif_revalidator (arg=0x55c533082c70) at ofproto/ofproto-dpif-upcall.c:1101
        revalidator = 0x55c533082c70
        udpif = 0x55c53301ee20
        leader = true
        start_time = 10324370685
        last_reval_seq = 3090867551
        n_flows = 14393
#8  0x000055c52ecd239f in ovsthread_wrapper (aux_=<optimized out>) at
lib/ovs-thread.c:422
        auxp = <optimized out>
        aux = {
          start = 0x55c52ebf6350 <udpif_revalidator>,
          arg = 0x55c533082c70,
          name = "revalidator\000\000\000\000"
        }
        id = 7
        subprogram_name = 0x7f87280008c0 "pN\314(\207\177"
#9  0x00007f8753d16e65 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#10 0x00007f8751c8788d in clone () from /lib64/libc.so.6
No symbol table info available.

The umap of loop iteration udpif->ukeys[39] has ukeys (output with
ovs_dump_udpif_keys):

(struct umap *) 0x55c53301f998:
  (struct udpif_key *) 0x7f86f8df3930: key_len = 148, mask_len = 152
                                       ufid =
a18d9eac-9718-21db-7f97-4a287638e2ef
                                       hash = 0x7b0c4227, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 1, n_bytes = 115
                                       used = 10324368860, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871a6db4e0: key_len = 140, mask_len = 152
                                       ufid =
4e04d989-8729-49fc-1d0a-46ef2449ef75
                                       hash = 0x2fe84627, pmd_id = 3
                                       state = UKEY_VISIBLE
                                       state_where = 0x55c52eff5da8
"ofproto/ofproto-dpif-upcall.c:2036"
                                       n_packets = 0, n_bytes = 0
                                       used = 0, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f8719909d50: key_len = 160, mask_len = 172
                                       ufid =
aa5170a7-818e-c902-44bf-4f60ff18f0f7
                                       hash = 0x f037027, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 1, n_bytes = 66
                                       used = 10324370390, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871a5af4c0: key_len = 168, mask_len = 172
                                       ufid =
f9773957-96f7-8c4e-7ba3-46cf73f7b3e0
                                       hash = 0x32da0a27, pmd_id = 3
                                       state = UKEY_EVICTED
                                       state_where = 0x55c52eff5b48
"ofproto/ofproto-dpif-upcall.c:2608"
                                       n_packets = 5, n_bytes = 1243
                                       used = 10324368587, tcp_flags = 0x0018
  (struct udpif_key *) 0x7f8718834ff0: key_len = 148, mask_len = 152
                                       ufid =
0d733731-a642-a60f-4f4f-4c6425dd398e
                                       hash = 0x61e7a027, pmd_id = 3
                                       state = UKEY_EVICTED
                                       state_where = 0x55c52eff5b48
"ofproto/ofproto-dpif-upcall.c:2608"
                                       n_packets = 2, n_bytes = 120
                                       used = 10324367306, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871aafb0c0: key_len = 168, mask_len = 172
                                       ufid =
c9a3ab02-8f78-44ee-6583-45e778fa98a5
                                       hash = 0x3e3fbc27, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 1, n_bytes = 60
                                       used = 10324370056, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f8718c5d660: key_len = 160, mask_len = 172
                                       ufid =
01a3b747-909a-8077-c2d1-4e3542235fd6
                                       hash = 0x7cff9c27, pmd_id = 3
                                       state = UKEY_EVICTED
                                       state_where = 0x55c52eff5b48
"ofproto/ofproto-dpif-upcall.c:2608"
                                       n_packets = 8, n_bytes = 492
                                       used = 10324368705, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f8718d04340: key_len = 168, mask_len = 172
                                       ufid =
d3965df7-bb19-f997-e736-41e7c701b5f7
                                       hash = 0x8db4d027, pmd_id = 3
                                       state = UKEY_EVICTED
                                       state_where = 0x55c52eff5b48
"ofproto/ofproto-dpif-upcall.c:2608"
                                       n_packets = 7, n_bytes = 3781
                                       used = 10324367542, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871aa85860: key_len = 160, mask_len = 172
                                       ufid =
44c1e2ec-b297-fa54-851c-41bf0c255865
                                       hash = 0xec913027, pmd_id = 3
                                       state = UKEY_EVICTED
                                       state_where = 0x55c52eff5b48
"ofproto/ofproto-dpif-upcall.c:2608"
                                       n_packets = 1, n_bytes = 66
                                       used = 10324367277, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871a260390: key_len = 168, mask_len = 172
                                       ufid =
b6c15cd7-907d-8695-a503-4253a3cb0b7a
                                       hash = 0xd35a2c27, pmd_id = 3
                                       state = UKEY_EVICTED
                                       state_where = 0x55c52eff5b48
"ofproto/ofproto-dpif-upcall.c:2608"
                                       n_packets = 7, n_bytes = 462
                                       used = 10324367339, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871bfae340: key_len = 140, mask_len = 152
                                       ufid =
84068184-aed0-3d90-5923-4d6f53fc309c
                                       hash = 0xaa917227, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 5, n_bytes = 322
                                       used = 10324369800, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f8718d91730: key_len = 148, mask_len = 152
                                       ufid =
70833d4f-afe7-4c55-6267-4458ebddf3fa
                                       hash = 0x7ad61a27, pmd_id = 3
                                       state = UKEY_EVICTED
                                       state_where = 0x55c52eff5b48
"ofproto/ofproto-dpif-upcall.c:2608"
                                       n_packets = 1, n_bytes = 140
                                       used = 10324365702, tcp_flags = 0x0018
  (struct udpif_key *) 0x7f871879ad50: key_len = 168, mask_len = 172
                                       ufid =
d6b2900e-adbe-02b5-e7d5-48f216c1710c
                                       hash = 0xde1a0e27, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 0, n_bytes = 0
                                       used = 10324369841, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871a061ec0: key_len = 160, mask_len = 172
                                       ufid =
47dea4f7-9248-e065-2764-438e3d727cd0
                                       hash = 0x55b18227, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 0, n_bytes = 0
                                       used = 10324369527, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871ab964b0: key_len = 168, mask_len = 172
                                       ufid =
286cf642-9ca0-8b63-f6ad-443081df74e2
                                       hash = 0x8d81d627, pmd_id = 3
                                       state = UKEY_EVICTED
                                       state_where = 0x55c52eff5b48
"ofproto/ofproto-dpif-upcall.c:2608"
                                       n_packets = 187, n_bytes = 41849
                                       used = 10324366880, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f86f93190e0: key_len = 168, mask_len = 172
                                       ufid =
9de8567b-b3f2-f4e9-cc00-4279bc9be36a
                                       hash = 0xfb47e827, pmd_id = 3
                                       state = UKEY_VISIBLE
                                       state_where = 0x55c52eff5e18
"ofproto/ofproto-dpif-upcall.c:2089"
                                       n_packets = 0, n_bytes = 0
                                       used = 0, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f86f8feceb0: key_len = 168, mask_len = 172
                                       ufid =
3f87d43a-8835-1833-1d24-4dbe0213ace2
                                       hash = 0x 7274627, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 0, n_bytes = 0
                                       used = 10324370446, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871a74abc0: key_len = 148, mask_len = 152
                                       ufid =
d33fdfff-bed2-5328-2f23-4d76ec8b406e
                                       hash = 0xad0aba27, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 1149, n_bytes = 73127
                                       used = 10324370702, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871966d0d0: key_len = 168, mask_len = 172
                                       ufid =
b410e4d7-86cb-f77e-e4ff-4f7e42c2c28a
                                       hash = 0xc5ac4227, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 0, n_bytes = 0
                                       used = 10324369984, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871b49ddb0: key_len = 168, mask_len = 172
                                       ufid =
c9fd9fa3-b621-d37b-507c-4e22b318ce81
                                       hash = 0xa0cdd627, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 9, n_bytes = 2957
                                       used = 10324370136, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871a9a7940: key_len = 160, mask_len = 172
                                       ufid =
4c0b3bce-8bde-4621-6393-4935fe209d85
                                       hash = 0x d0d6e27, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 3, n_bytes = 206
                                       used = 10324370325, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f8719fae470: key_len = 148, mask_len = 152
                                       ufid =
a6a3c8f2-8de5-14eb-110b-4ed2bf989a23
                                       hash = 0xeda41827, pmd_id = 3
                                       state = UKEY_OPERATIONAL
                                       state_where = 0x55c52eff6358
"ofproto/ofproto-dpif-upcall.c:2957"
                                       n_packets = 43, n_bytes = 31311
                                       used = 10324368839, tcp_flags = 0x0018
  (struct udpif_key *) 0x7f86f8d1adb0: key_len = 148, mask_len = 152
                                       ufid =
80b9fe38-8aba-d4b3-c119-410418fe1092
                                       hash = 0x137d4027, pmd_id = 3
                                       state = UKEY_EVICTED
                                       state_where = 0x55c52eff5b48
"ofproto/ofproto-dpif-upcall.c:2608"
                                       n_packets = 2, n_bytes = 124
                                       used = 10324367255, tcp_flags = 0x0000
  (struct udpif_key *) 0x7f871ba370d0: key_len = 168, mask_len = 172
                                       ufid =
7e5c26fd-af10-ba15-653c-454a828c068d
                                       hash = 0x9306ba27, pmd_id = 3
                                       state = UKEY_EVICTED
                                       state_where = 0x55c52eff5b48
"ofproto/ofproto-dpif-upcall.c:2608"
                                       n_packets = 5, n_bytes = 820
                                       used = 10324368550, tcp_flags = 0x0000
The length is 24.

The umap details:
(gdb) p *(struct umap *) 0x55c53301f998
$12 = {
  mutex = {
    lock = {
      __data = {
        __lock = 0,
        __count = 0,
        __owner = 0,
        __nusers = 0,
        __kind = 2,
        __spins = 0,
        __elision = 0,
        __list = {
          __prev = 0x0,
          __next = 0x0
        }
      },
      __size = '\000' <repeats 16 times>, "\002", '\000' <repeats 22 times>,
      __align = 0
    },
    where = 0x55c52efef4be "<unlocked>"
  },
  cmap = {
    impl = {
      p = 0x7f86f826c8c0
    }
  }
}


As we can see the umap 0x55c53301f998 does not have a ukey 0x7f8718dcc050
(but bt full output has ukey = 0x7f8718dcc050). And this ukey =
0x7f8718dcc050 indeed
has a mutex with an uninitialized 'where' pointer. Maybe this pointer
is just invalid.

(gdb) p *(struct udpif_key *)0x7f8718dcc050
$11 = {
   ...
  mutex = {
    lock = {
      __data = {
        __lock = 0,
        __count = 0,
        __owner = 0,
        __nusers = 0,
        __kind = -1,
        __spins = 0,
        __elision = 0,
        __list = {
          __prev = 0x0,
          __next = 0x0
        }
      },
      __size = '\000' <repeats 16 times>, "\377\377\377\377", '\000'
<repeats 19 times>,
      __align = 0
    },
    where = 0x0
  },
...
}

There seems to be an out-of-bounds access to the linked list of ukeys here.

So, I would greatly appreciate your help, as it is crucial for OVS to operate
in our production environment.

I can provide further debug related output information at any time.
Waiting for your response...
Thank you very much in advance.

Best regards,
LIU Yulong
_______________________________________________
discuss mailing list
disc...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-discuss

Reply via email to