When a tunnel_key for a datapath was changed, the local_datapaths hmap was not 
properly updated
as the old/initial entry was not removed.
- If the datapath was not deleted at the same time, a new entry (for the same 
dp) was created
  in the local_datapaths as the previous entry was not found (wrong hash).
- If the datapath was deleted at the same time, the former entry also remained 
(as, again, the hash
  was wrong). So, we kept a deleted (dp) entry in the hmap, and might crash 
when we used it later.

When tunnel_key is updated for an existing datapath, we now clean the 
local_datapaths,
removing bad entries (i.e entries for which the hash is not the tunnel_key).

This issue was identified by flaky failures of test "ovn-ic -- route deletion 
upon TS deletion".

Backtrace:
0  0x0000000000504a9a in hmap_first_with_hash (hmap=0x20f4d90, 
hmap@entry=0x5d5634, hmap=0x20f4d90, hmap@entry=0x5d5634, hash=1956414673) at 
./include/openvswitch/hmap.h:360
1  smap_find__ (smap=smap@entry=0x20f4d90, key=key@entry=0x5d5634 
"snat-ct-zone", key_len=key_len@entry=12, hash=1956414673) at lib/smap.c:421
2  0x0000000000505073 in smap_get_node (key=0x5d5634 "snat-ct-zone", 
smap=0x20f4d90) at lib/smap.c:217
3  smap_get_def (def=0x0, key=0x5d5634 "snat-ct-zone", smap=0x20f4d90) at 
lib/smap.c:208
4  smap_get (smap=0x20f4d90, key=key@entry=0x5d5634 "snat-ct-zone") at 
lib/smap.c:200
5  0x0000000000419898 in get_common_nat_zone (ldp=0x20a8c40, ldp=0x20a8c40) at 
controller/lflow.c:831
6  add_matches_to_flow_table (lflow=lflow@entry=0x21ddf90, 
ldp=ldp@entry=0x20a8c40, matches=matches@entry=0x211bb50, 
ptable=ptable@entry=10 '\n', output_ptable=output_ptable@entry=37 '%', 
ovnacts=ovnacts@entry=0x7ffd19b99de0,
    ingress=true, l_ctx_in=0x7ffd19b9a300, l_ctx_out=0x7ffd19b9a2c0) at 
controller/lflow.c:892
7  0x000000000041a29b in consider_logical_flow__ (lflow=lflow@entry=0x21ddf90, 
dp=0x20eb720, l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, 
l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1207
8  0x000000000041a587 in consider_logical_flow (lflow=lflow@entry=0x21ddf90, 
is_recompute=is_recompute@entry=false, l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, 
l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1276
9  0x000000000041e30f in lflow_handle_changed_flows 
(l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, 
l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:271
10 0x0000000000439fc7 in lflow_output_sb_logical_flow_handler 
(node=0x7ffd19ba5b70, data=<optimized out>) at controller/ovn-controller.c:4045
11 0x00000000004682fe in engine_compute (recompute_allowed=<optimized out>, 
node=<optimized out>) at lib/inc-proc-eng.c:441
12 engine_run_node (recompute_allowed=true, node=0x7ffd19ba5b70) at 
lib/inc-proc-eng.c:503
13 engine_run (recompute_allowed=recompute_allowed@entry=true) at 
lib/inc-proc-eng.c:528
14 0x000000000040ade2 in main (argc=<optimized out>, argv=<optimized out>) at 
controller/ovn-controller.c:5709

Signed-off-by: Xavier Simonart <xsimo...@redhat.com>

---
v2: Fixed potential memory leak.
---
 controller/local_data.c     | 14 +++++++++++++
 controller/local_data.h     |  4 ++++
 controller/ovn-controller.c | 22 +++++++++++++++++++
 tests/ovn.at                | 42 +++++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+)

diff --git a/controller/local_data.c b/controller/local_data.c
index 3a7d3afeb..8f0890a14 100644
--- a/controller/local_data.c
+++ b/controller/local_data.c
@@ -56,6 +56,20 @@ static bool datapath_is_transit_switch(const struct 
sbrec_datapath_binding *);
 
 static uint64_t local_datapath_usage;
 
+/* To be used when hmap_node.hash might be wrong e.g. tunnel_key got updated */
+struct local_datapath *
+get_local_datapath_no_hash(const struct hmap *local_datapaths,
+                                uint32_t tunnel_key)
+{
+    struct local_datapath *ld;
+    HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
+        if (ld->datapath->tunnel_key == tunnel_key) {
+            return ld;
+        }
+    }
+    return NULL;
+}
+
 struct local_datapath *
 get_local_datapath(const struct hmap *local_datapaths, uint32_t tunnel_key)
 {
diff --git a/controller/local_data.h b/controller/local_data.h
index f6d8f725f..a1bf0790b 100644
--- a/controller/local_data.h
+++ b/controller/local_data.h
@@ -69,6 +69,10 @@ struct local_datapath *local_datapath_alloc(
     const struct sbrec_datapath_binding *);
 struct local_datapath *get_local_datapath(const struct hmap *,
                                           uint32_t tunnel_key);
+struct local_datapath
+*get_local_datapath_no_hash(const struct hmap *local_datapaths,
+                                 uint32_t tunnel_key);
+
 bool
 need_add_peer_to_local(
     struct ovsdb_idl_index *sbrec_port_binding_by_name,
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index da7d145ed..ca8e383d8 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1902,6 +1902,7 @@ runtime_data_sb_datapath_binding_handler(struct 
engine_node *node OVS_UNUSED,
             engine_get_input("SB_datapath_binding", node));
     const struct sbrec_datapath_binding *dp;
     struct ed_type_runtime_data *rt_data = data;
+    struct local_datapath *ld;
 
     SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) {
         if (sbrec_datapath_binding_is_deleted(dp)) {
@@ -1909,6 +1910,27 @@ runtime_data_sb_datapath_binding_handler(struct 
engine_node *node OVS_UNUSED,
                                    dp->tunnel_key)) {
                 return false;
             }
+            /* If the tunnel key got updated, get_local_datapath will not find
+             * the ld. Use get_local_datapath_no_hash which does not
+             * rely on the hash.
+             */
+            if (sbrec_datapath_binding_is_updated(dp,
+                SBREC_DATAPATH_BINDING_COL_TUNNEL_KEY)) {
+                if (get_local_datapath_no_hash(&rt_data->local_datapaths,
+                                                    dp->tunnel_key)) {
+                    return false;
+                }
+            }
+        } else if (sbrec_datapath_binding_is_updated(dp,
+                SBREC_DATAPATH_BINDING_COL_TUNNEL_KEY) &&
+            !sbrec_datapath_binding_is_new(dp)) {
+            /* If the tunnel key is updated, remove the entry (with a wrong
+             * hash) from the map. It will be (properly) added back later.
+             */
+            if ((ld = get_local_datapath_no_hash(&rt_data->local_datapaths,
+                                                 dp->tunnel_key))) {
+                hmap_remove(&rt_data->local_datapaths, &ld->hmap_node);
+            }
         }
     }
 
diff --git a/tests/ovn.at b/tests/ovn.at
index 637d92bed..aba08856b 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -36957,3 +36957,45 @@ AT_CHECK([grep -c "NXT_CT_FLUSH_ZONE" 
hv1/ovs-vswitchd.log], [0], [dnl
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([Delete ls after changing tunnel_key])
+ovn_start
+
+net_add n1
+
+sim_add hv1
+as hv1
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.11
+
+check ovn-nbctl --wait=hv ls-add ls \
+          -- lsp-add ls lsp1 \
+          -- lsp-add ls ls-lr \
+          -- lr-add lr \
+          -- lrp-add lr lr-ls f0:00:00:00:00:f1 192.168.1.1/24 \
+          -- set Logical_Switch_Port ls-lr \
+               type=router \
+               options:router-port=lr-ls \
+               addresses=router \
+          -- lrp-set-gateway-chassis lr-ls hv1
+
+sleep_controller hv1
+
+check ovn-nbctl --wait=sb set Logical_Switch ls 
other_config:requested-tnl-key=1000
+check ovn-nbctl --wait=sb ls-del ls
+wake_up_controller hv1
+
+check ovn-nbctl --wait=hv sync
+
+check ovn-nbctl --wait=hv ls-add ls1 \
+      -- lsp-add ls1 ls1-lr \
+      -- lrp-add lr lr-ls1 f0:00:00:00:00:f2 192.168.2.1/24 \
+      -- set Logical_Switch_Port ls1-lr type=router options:router-port=lr-ls1 
addresses=router \
+      -- lrp-set-gateway-chassis lr-ls1 hv1
+
+OVN_CLEANUP([hv1])
+
+AT_CLEANUP
+])
+
-- 
2.31.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to