For kernel datapath, when a new tunnel port is created in the same
transaction in which an old tunnel port with the same tunnel
configuration is deleted, the new tunnel port creation will fail and
left in an error state. This can be easily reproduced in OVN by
triggering a chassis deletion and addition with the same encap in the
same SB DB transaction, such as:

ovn-sbctl chassis-add aaaaaa geneve 1.2.3.4
ovn-sbctl chassis-del aaaaaa -- chassis-add bbbbbb 1.2.3.4

ovs-vsctl show | grep error
error: "could not add network device ovn-bbbbbb-0 to ofproto (File exists)"

Related logs in OVS:
—
2024-02-23T05:41:49.978Z|405933|bridge|INFO|bridge br-int: deleted interface 
ovn-aaaaaa-0 on port 113
2024-02-23T05:41:49.989Z|405935|tunnel|WARN|ovn-bbbbbb-0: attempting to add 
tunnel port with same config as port 'ovn-aaaaaa-0' (::->1.2.3.4, key=flow, 
legacy_l2, dp port=9)
2024-02-23T05:41:49.989Z|405936|ofproto|WARN|br-int: could not add port 
ovn-bbbbbb-0 (File exists)
2024-02-23T05:41:49.989Z|405937|bridge|WARN|could not add network device 
ovn-bbbbbb-0 to ofproto (File exists)
—

Depending on when there are other OVSDB changes, it may take a long time
for the device to be added successfully, triggered by the next OVS
iteration.

(note: native tunnel ports do not have this problem)

The problem is how the tunnel port deletion and creation are handled. In
bridge_reconfigure(), port deletion is handled before creation, to avoid
such resource conflict. However, for kernel tunnel ports, the real clean
up is performed at the end of the bridge_reconfigure() in the:
bridge_run__()->ofproto_run()->...->ofproto_dpif:port_destruct()

We cannot call bridge_run__() at an earlier point before all
reconfigurations are done, so this patch tries a generic approach to
just re-run the bridge_reconfigure() when there are any port creations
encountered "File exists" error, which indicates a possible resource
conflict may have happened due to a later deleted port, and retry may
succeed.

Signed-off-by: Han Zhou <hz...@ovn.org>
---
This is RFC because I am not sure if there is a better way to solve the problem
more specifically by executing the port_destruct for the old port before trying
to create the new port. The fix may be more complex though.

 tests/tunnel.at   |  1 +
 vswitchd/bridge.c | 47 ++++++++++++++++++++++++++++++++---------------
 2 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/tests/tunnel.at b/tests/tunnel.at
index 71e7c2df4eae..d570c78790c7 100644
--- a/tests/tunnel.at
+++ b/tests/tunnel.at
@@ -1059,6 +1059,7 @@ AT_CHECK([ovs-vsctl add-port br0 p2 -- set Interface p2 
type=vxlan \
 
 AT_CHECK([grep 'p2: could not set configuration (File exists)' 
ovs-vswitchd.log | sed "s/^.*\(p2:.*\)$/\1/"], [0],
   [p2: could not set configuration (File exists)
+p2: could not set configuration (File exists)
 ])
 
 OVS_VSWITCHD_STOP(["/p2: VXLAN-GBP, and non-VXLAN-GBP tunnels can't be 
configured on the same dst_port/d
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 95a65fcdcd5e..9057da98e6c0 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -278,7 +278,7 @@ static void bridge_delete_ofprotos(void);
 static void bridge_delete_or_reconfigure_ports(struct bridge *);
 static void bridge_del_ports(struct bridge *,
                              const struct shash *wanted_ports);
-static void bridge_add_ports(struct bridge *,
+static bool bridge_add_ports(struct bridge *,
                              const struct shash *wanted_ports);
 
 static void bridge_configure_datapath_id(struct bridge *);
@@ -333,8 +333,8 @@ static void mirror_refresh_stats(struct mirror *);
 
 static void iface_configure_lacp(struct iface *,
                                  struct lacp_member_settings *);
-static bool iface_create(struct bridge *, const struct ovsrec_interface *,
-                         const struct ovsrec_port *);
+static int iface_create(struct bridge *, const struct ovsrec_interface *,
+                        const struct ovsrec_port *);
 static bool iface_is_internal(const struct ovsrec_interface *iface,
                               const struct ovsrec_bridge *br);
 static const char *iface_get_type(const struct ovsrec_interface *,
@@ -858,7 +858,9 @@ datapath_reconfigure(const struct ovsrec_open_vswitch *cfg)
     }
 }
 
-static void
+/* Returns true if any ports addition failed and may need retry. Otherwise
+ * return false. */
+static bool
 bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
 {
     struct sockaddr_in *managers;
@@ -943,8 +945,11 @@ bridge_reconfigure(const struct ovsrec_open_vswitch 
*ovs_cfg)
 
     config_ofproto_types(&ovs_cfg->other_config);
 
+    bool need_retry = false;
     HMAP_FOR_EACH (br, node, &all_bridges) {
-        bridge_add_ports(br, &br->wanted_ports);
+        if (bridge_add_ports(br, &br->wanted_ports)) {
+            need_retry = true;
+        }
         shash_destroy(&br->wanted_ports);
     }
 
@@ -1003,6 +1008,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch 
*ovs_cfg)
      * narrow race window in which e.g. ofproto/trace will not recognize the
      * new configuration (sometimes this causes unit test failures). */
     bridge_run__();
+    return need_retry;
 }
 
 /* Delete ofprotos which aren't configured or have the wrong type.  Create
@@ -1197,11 +1203,14 @@ bridge_delete_or_reconfigure_ports(struct bridge *br)
     sset_destroy(&ofproto_ports);
 }
 
-static void
+/* Returns true if any ports addition failed and may need retry. Otherwise
+ * return false. */
+static bool
 bridge_add_ports__(struct bridge *br, const struct shash *wanted_ports,
                    bool with_requested_port)
 {
     struct shash_node *port_node;
+    bool need_retry = false;
 
     SHASH_FOR_EACH (port_node, wanted_ports) {
         const struct ovsrec_port *port_cfg = port_node->data;
@@ -1216,23 +1225,29 @@ bridge_add_ports__(struct bridge *br, const struct 
shash *wanted_ports,
                 struct iface *iface = iface_lookup(br, iface_cfg->name);
 
                 if (!iface) {
-                    iface_create(br, iface_cfg, port_cfg);
+                    if (EEXIST == iface_create(br, iface_cfg, port_cfg)) {
+                        need_retry = true;
+                    }
                 }
             }
         }
     }
+    return need_retry;
 }
 
-static void
+/* Returns true if any ports addition failed and may need retry. Otherwise
+ * return false. */
+static bool
 bridge_add_ports(struct bridge *br, const struct shash *wanted_ports)
 {
     /* First add interfaces that request a particular port number. */
-    bridge_add_ports__(br, wanted_ports, true);
+    bool ret1 = bridge_add_ports__(br, wanted_ports, true);
 
     /* Then add interfaces that want automatic port number assignment.
      * We add these afterward to avoid accidentally taking a specifically
      * requested port number. */
-    bridge_add_ports__(br, wanted_ports, false);
+    bool ret2 = bridge_add_ports__(br, wanted_ports, false);
+    return ret1 || ret2;
 }
 
 static void
@@ -2157,8 +2172,8 @@ error:
  * automatically allocated for the iface.  Takes ownership of and
  * deallocates 'if_cfg'.
  *
- * Return true if an iface is successfully created, false otherwise. */
-static bool
+ * Return 0 if an iface is successfully created, error otherwise. */
+static int
 iface_create(struct bridge *br, const struct ovsrec_interface *iface_cfg,
              const struct ovsrec_port *port_cfg)
 {
@@ -2175,7 +2190,7 @@ iface_create(struct bridge *br, const struct 
ovsrec_interface *iface_cfg,
     if (error) {
         iface_clear_db_record(iface_cfg, errp);
         free(errp);
-        return false;
+        return error;
     }
 
     /* Get or create the port structure. */
@@ -2223,7 +2238,7 @@ iface_create(struct bridge *br, const struct 
ovsrec_interface *iface_cfg,
         }
     }
 
-    return true;
+    return 0;
 }
 
 /* Set forward BPDU option. */
@@ -3364,7 +3379,9 @@ bridge_run(void)
 
         idl_seqno = ovsdb_idl_get_seqno(idl);
         txn = ovsdb_idl_txn_create(idl);
-        bridge_reconfigure(cfg ? cfg : &null_cfg);
+        if (bridge_reconfigure(cfg ? cfg : &null_cfg)) {
+            bridge_reconfigure(cfg ? cfg : &null_cfg);
+        }
 
         if (cfg) {
             ovsrec_open_vswitch_set_cur_cfg(cfg, cfg->next_cfg);
-- 
2.38.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to