From: Han Zhou <hz...@ovn.org>

There were problems observed occasionally after OVS restart, the
OVS flow bundle installation from ovn-controller was failed because of
"GROUP_EXISTS" error, which end up with missing flows/groups/meters
in OVS until ovn-controller is restarted.

Example error logs in OVS:
2022-07-08T01:38:22.837Z|00676|connmgr|INFO|br-int<->unix#0: sending 
OFPGMFC_GROUP_EXISTS error reply to OFPT_BUNDLE_ADD_MESSAGE message
2022-07-08T01:38:22.913Z|00677|connmgr|INFO|br-int<->unix#0: sending 
OFPBFC_MSG_FAILED error reply to OFPT_BUNDLE_CONTROL message

The root cause is that with ovn-ofctrl-wait-before-clear set, ofctrl
module would call ovn_extend_table_clear() to clear the "existing" group
table AFTER ovn-controller finished computing the desired
flows/groups/meters in the state S_CLEAR. However, the function
ovn_extend_table_clear() clears the bitmap of the group IDs, while the
IDs are still being used by the "desired" group table. This is not a
problem if a recompute happens soon, the desired group table will be
cleared first and IDs will be reallocated and the bitmap will reflect
the actual allocations. However, if there are any group creation changes
(related to LB, ECMP, etc.) happen before the recompute, new IDs may be
allocated to be conflict with existing IDs because the cleared bitmap
status doesn't reflect the real IDs being used. The conflict IDs finally
causes the "GROUP_EXIST" error replied by OVS when ovn-controller tries
to install the desired groups to OVS. Even worse, because the group
modifications are now wrapped in a bundle with flow modifications, it
would end up with not only missing groups but also missing flows.

Both desired table and existing table share the same bitmap, which is to
avoid reallocating an ID that still exists in OVS, but the current
logic seems to have an assumption that the "existing" table entries are
deleted always AFTER the "desired" entries. This assumption is not true
after the introduction of ovn-ofctrl-wait-before-clear feature.

The fix here is to introduce a reference between the desired and
existing entries, so that when deleting an entry in either of the tables
it knows if the ID is still in use by its peer and decide if it is the
right time to clear the bit from the bitmap, without depending on the
order of deletion.

Fixes: 896adfd2d8b3 ("ofctrl: Support ovn-ofctrl-wait-before-clear to reduce 
down time during upgrade.")
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2112111
Signed-off-by: Han Zhou <hz...@ovn.org>
Acked-by: Numan Siddique <num...@ovn.org>
(cherry picked from commit db15cf29a1f9857b55389f424c5d747406550cb7)
---
 lib/extend-table.c      | 67 +++++++++++++++++++++--------------------
 lib/extend-table.h      | 17 ++++++++---
 tests/ovn-controller.at | 26 +++++++++++++++-
 3 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/lib/extend-table.c b/lib/extend-table.c
index 32d541b55..ee8b0d898 100644
--- a/lib/extend-table.c
+++ b/lib/extend-table.c
@@ -40,13 +40,17 @@ ovn_extend_table_init(struct ovn_extend_table *table)
 }
 
 static struct ovn_extend_table_info *
-ovn_extend_table_info_alloc(const char *name, uint32_t id, bool is_new_id,
+ovn_extend_table_info_alloc(const char *name, uint32_t id,
+                            struct ovn_extend_table_info *peer,
                             uint32_t hash)
 {
     struct ovn_extend_table_info *e = xmalloc(sizeof *e);
     e->name = xstrdup(name);
     e->table_id = id;
-    e->new_table_id = is_new_id;
+    e->peer = peer;
+    if (peer) {
+        peer->peer = e;
+    }
     e->hmap_node.hash = hash;
     hmap_init(&e->references);
     return e;
@@ -184,9 +188,10 @@ ovn_extend_table_clear(struct ovn_extend_table *table, 
bool existing)
     /* Clear the target table. */
     HMAP_FOR_EACH_SAFE (g, next, hmap_node, target) {
         hmap_remove(target, &g->hmap_node);
-        /* Don't unset bitmap for desired group_info if the group_id
-         * was not freshly reserved. */
-        if (existing || g->new_table_id) {
+        if (g->peer) {
+            g->peer->peer = NULL;
+        } else {
+            /* Unset the bitmap because the peer is deleted already. */
             bitmap_set0(table->table_ids, g->table_id);
         }
         ovn_extend_table_info_destroy(g);
@@ -209,11 +214,15 @@ void
 ovn_extend_table_remove_existing(struct ovn_extend_table *table,
                                  struct ovn_extend_table_info *existing)
 {
-    /* Remove 'existing' from 'groups->existing' */
+    /* Remove 'existing' from 'table->existing' */
     hmap_remove(&table->existing, &existing->hmap_node);
 
-    /* Dealloc group_id. */
-    bitmap_set0(table->table_ids, existing->table_id);
+    if (existing->peer) {
+        existing->peer->peer = NULL;
+    } else {
+        /* Dealloc the ID. */
+        bitmap_set0(table->table_ids, existing->table_id);
+    }
     ovn_extend_table_info_destroy(existing);
 }
 
@@ -230,7 +239,9 @@ ovn_extend_table_delete_desired(struct ovn_extend_table 
*table,
             VLOG_DBG("%s: %s, "UUID_FMT, __func__,
                      e->name, UUID_ARGS(&l->lflow_uuid));
             hmap_remove(&table->desired, &e->hmap_node);
-            if (e->new_table_id) {
+            if (e->peer) {
+                e->peer->peer = NULL;
+            } else {
                 bitmap_set0(table->table_ids, e->table_id);
             }
             ovn_extend_table_info_destroy(e);
@@ -254,17 +265,6 @@ ovn_extend_table_remove_desired(struct ovn_extend_table 
*table,
     ovn_extend_table_delete_desired(table, l);
 }
 
-static struct ovn_extend_table_info*
-ovn_extend_info_clone(struct ovn_extend_table_info *source)
-{
-    struct ovn_extend_table_info *clone =
-        ovn_extend_table_info_alloc(source->name,
-                                    source->table_id,
-                                    source->new_table_id,
-                                    source->hmap_node.hash);
-    return clone;
-}
-
 void
 ovn_extend_table_sync(struct ovn_extend_table *table)
 {
@@ -273,11 +273,13 @@ ovn_extend_table_sync(struct ovn_extend_table *table)
     /* Copy the contents of desired to existing. */
     HMAP_FOR_EACH_SAFE (desired, next, hmap_node, &table->desired) {
         if (!ovn_extend_table_lookup(&table->existing, desired)) {
-            desired->new_table_id = false;
-            struct ovn_extend_table_info *clone =
-                ovn_extend_info_clone(desired);
-            hmap_insert(&table->existing, &clone->hmap_node,
-                        clone->hmap_node.hash);
+            struct ovn_extend_table_info *existing =
+                ovn_extend_table_info_alloc(desired->name,
+                                            desired->table_id,
+                                            desired,
+                                            desired->hmap_node.hash);
+            hmap_insert(&table->existing, &existing->hmap_node,
+                        existing->hmap_node.hash);
         }
     }
 }
@@ -289,7 +291,7 @@ ovn_extend_table_assign_id(struct ovn_extend_table *table, 
const char *name,
                            struct uuid lflow_uuid)
 {
     uint32_t table_id = 0, hash;
-    struct ovn_extend_table_info *table_info;
+    struct ovn_extend_table_info *table_info, *existing_info;
 
     hash = hash_string(name, 0);
 
@@ -307,17 +309,18 @@ ovn_extend_table_assign_id(struct ovn_extend_table 
*table, const char *name,
 
     /* Check whether we already have an installed entry for this
      * combination. */
+    existing_info = NULL;
     HMAP_FOR_EACH_WITH_HASH (table_info, hmap_node, hash, &table->existing) {
         if (!strcmp(table_info->name, name)) {
-            table_id = table_info->table_id;
+            existing_info = table_info;
+            table_id = existing_info->table_id;
+            break;
         }
     }
 
-    bool new_table_id = false;
-    if (!table_id) {
-        /* Reserve a new group_id. */
+    if (!existing_info) {
+        /* Reserve a new id. */
         table_id = bitmap_scan(table->table_ids, 0, 1, MAX_EXT_TABLE_ID + 1);
-        new_table_id = true;
     }
 
     if (table_id == MAX_EXT_TABLE_ID + 1) {
@@ -327,7 +330,7 @@ ovn_extend_table_assign_id(struct ovn_extend_table *table, 
const char *name,
     }
     bitmap_set1(table->table_ids, table_id);
 
-    table_info = ovn_extend_table_info_alloc(name, table_id, new_table_id,
+    table_info = ovn_extend_table_info_alloc(name, table_id, existing_info,
                                              hash);
 
     hmap_insert(&table->desired,
diff --git a/lib/extend-table.h b/lib/extend-table.h
index 6240b946e..44152c940 100644
--- a/lib/extend-table.h
+++ b/lib/extend-table.h
@@ -28,8 +28,12 @@
  * such as the Group Table or Meter Table. */
 struct ovn_extend_table {
     unsigned long *table_ids;  /* Used as a bitmap with value set
-                                * for allocated group ids in either
-                                * desired or existing. */
+                                * for allocated ids in either desired or
+                                * existing (or both).  If the same "name"
+                                * exists in both desired and existing tables,
+                                * they must share the same ID.  The "peer"
+                                * pointer would tell if the ID is still used by
+                                * the same item in the peer table. */
     struct hmap desired;
     struct hmap lflow_to_desired; /* Index for looking up desired table
                                    * items from given lflow uuid, with
@@ -48,8 +52,13 @@ struct ovn_extend_table_info {
     struct hmap_node hmap_node;
     char *name;         /* Name for the table entity. */
     uint32_t table_id;
-    bool new_table_id;  /* 'True' if 'table_id' was reserved from
-                         * ovn_extend_table's 'table_ids' bitmap. */
+    struct ovn_extend_table_info *peer; /* The extend tables exist as pairs,
+                                           one for desired items and one for
+                                           existing items. "peer" maintains the
+                                           link between a pair of items in
+                                           these tables. If "peer" is NULL, it
+                                           means the counterpart is not created
+                                           yet or deleted already. */
     struct hmap references; /* The lflows that are using this item, with
                              * ovn_extend_table_lflow_ref nodes. Only useful
                              * for items in ovn_extend_table.desired. */
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index 94e3aa71e..c23355300 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -2118,9 +2118,20 @@ check ovs-vsctl -- add-port br-int hv1-vif1 -- \
 
 check ovn-nbctl ls-add ls1
 
-check ovn-nbctl --wait=hv lsp-add ls1 ls1-lp1 \
+check ovn-nbctl lsp-add ls1 ls1-lp1 \
 -- lsp-set-addresses ls1-lp1 "f0:00:00:00:00:01 10.1.2.3"
 
+check ovn-nbctl lb-add lb1 1.1.1.1 10.1.2.3 \
+-- ls-lb-add ls1 lb1
+
+check ovn-nbctl lb-add lb2 2.2.2.2 10.1.2.4 \
+-- ls-lb-add ls1 lb2
+
+check ovn-nbctl --wait=hv sync
+# There should be 2 group IDs allocated
+AT_CHECK([ovn-appctl -t ovn-controller group-table-list | awk '{print $2}' | 
sort | uniq | wc -l], [0], [2
+])
+
 # Set 5 seconds wait time before clearing OVS flows.
 check ovs-vsctl set open . external_ids:ovn-ofctrl-wait-before-clear=5000
 
@@ -2153,6 +2164,19 @@ lflow_run_2=$(ovn-appctl -t ovn-controller 
coverage/read-counter lflow_run)
 AT_CHECK_UNQUOTED([echo $lflow_run_1], [0], [$lflow_run_2
 ])
 
+# Restart OVS this time, and wait until flows are reinstalled
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+start_daemon ovs-vswitchd --enable-dummy=system -vvconn -vofproto_dpif 
-vunixctl
+OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 10.1.2.4], [0], [ignore])
+
+check ovn-nbctl --wait=hv lb-add lb3 2.2.2.2 10.1.2.5 \
+-- ls-lb-add ls1 lb3
+
+# There should be 3 group IDs allocated (this is to ensure the group ID
+# allocation is correct after ofctrl state reset.
+AT_CHECK([ovn-appctl -t ovn-controller group-table-list | awk '{print $2}' | 
sort | uniq | wc -l], [0], [3
+])
+
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 
-- 
2.37.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to