>From 04f9b903646cc934bbd863174697b9343ed17e49 Mon Sep 17 00:00:00 2001
From: Qingzhou Hu <[email protected]>
Date: Wed, 4 Mar 2026 09:09:53 +0800
Subject: [PATCH OVN] controller/binding: Fix missing local_datapath updates in
OVS iface handler.
When a VIF tap interface first appears after ovs-vswitchd restart, OVN's
incremental engine handles it via:
binding_handle_ovs_interface_changes()
-> consider_iface_claim()
-> consider_vif_lport_()
-> add_local_datapath() /* creates a new local_datapath */
add_local_datapath() allocates the local_datapath struct with all
per-datapath fields (localnet_port, external_ports, vtep_port,
multichassis_ports) left NULL/empty. The full binding_run() path
avoids this by doing a second pass over all relevant lports and calling
update_ld_localnet_port(), update_ld_external_ports(), etc. for each
newly added local_datapath. The incremental path in
binding_handle_ovs_interface_changes() never performs that pass, so
these fields remain uninitialised.
The most visible consequence is in physical.c consider_mc_group(): when
get_localnet_port() returns NULL it adds all remote chassis to the
_MC_flood group and installs Geneve tunnel output actions. Any
broadcast or GARP sent by the just-claimed VM is therefore tunnelled to
every remote chassis instead of being forwarded out through the localnet
patch port.
The window is:
ovs-vswitchd starts -> ovn-controller first binding_run (no VIFs yet,
localnet update is a no-op) -> nova-compute recreates tap ->
incremental claim -> wrong OpenFlow installed -> VM sends GARP burst
-> tunnelled.
This is exacerbated on large deployments (many IDL cells / OVS ports)
because the longer initialisation delay makes the race much more likely.
Restarting ovn-controller closes the window by triggering a fresh full
binding_run() with the tap already present.
If two hypervisors simultaneously hit this race on the same provider
network, each chassis includes the other in its _MC_flood tunnel group,
turning a single broadcast into an overlay-level broadcast storm until
ovn-controller is restarted on at least one node.
Fix: add a post-processing block at the end of
binding_handle_ovs_interface_changes(), mirroring the equivalent block
that commit 50b3af8938c9 added to binding_handle_port_binding_changes().
That commit fixed the same class of problem for the port-binding handler
but did not cover this path. The new block iterates over all
newly-added local datapaths recorded in tracked_dp_bindings and calls
consider_localnet_lport(), update_ld_localnet_port(),
update_ld_external_ports(), update_ld_vtep_port(), and
update_ld_multichassis_ports() as appropriate for each port binding on
those datapaths.
Fixes: 50b3af8938c9 ("binding.c: Missing local_datapath update in runtime_data
port_binding handler.")
Reported-by: Qingzhou Hu <[email protected]>
Signed-off-by: Qingzhou Hu <[email protected]>
---
controller/binding.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/controller/binding.c b/controller/binding.c
index 0712d7030..2e3a54f13 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -2666,6 +2666,45 @@ binding_handle_ovs_interface_changes(struct
binding_ctx_in *b_ctx_in,
}
}
+ if (handled) {
+ /* There may be new local datapaths added by the above handling, so
+ * go through each port_binding of newly added local datapaths to
+ * update related local_datapaths if needed. */
+ struct shash bridge_mappings = SHASH_INITIALIZER(&bridge_mappings);
+ add_ovs_bridge_mappings(b_ctx_in->ovs_table,
+ b_ctx_in->bridge_table,
+ &bridge_mappings);
+ struct tracked_datapath *t_dp;
+ HMAP_FOR_EACH (t_dp, node, b_ctx_out->tracked_dp_bindings) {
+ if (t_dp->tracked_type != TRACKED_RESOURCE_NEW) {
+ continue;
+ }
+ struct sbrec_port_binding *target =
+ sbrec_port_binding_index_init_row(
+ b_ctx_in->sbrec_port_binding_by_datapath);
+ sbrec_port_binding_index_set_datapath(target, t_dp->dp);
+ const struct sbrec_port_binding *pb;
+ SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
+ b_ctx_in->sbrec_port_binding_by_datapath) {
+ enum en_lport_type lport_type = get_lport_type(pb);
+ if (lport_type == LP_LOCALNET) {
+ consider_localnet_lport(pb, b_ctx_out);
+ update_ld_localnet_port(pb, &bridge_mappings,
+ b_ctx_out->local_datapaths);
+ } else if (lport_type == LP_EXTERNAL) {
+ update_ld_external_ports(pb, b_ctx_out->local_datapaths);
+ } else if (lport_type == LP_VTEP) {
+ update_ld_vtep_port(pb, b_ctx_out->local_datapaths);
+ } else if (pb->n_additional_chassis) {
+ update_ld_multichassis_ports(pb,
+ b_ctx_out->local_datapaths);
+ }
+ }
+ sbrec_port_binding_index_destroy_row(target);
+ }
+ shash_destroy(&bridge_mappings);
+ }
+
return handled;
}
--
2.50.1 (Apple Git-155)
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev