On Thu, Mar 12, 2026 at 12:23 PM Felix Moebius <[email protected]>
wrote:

> The mechanism for refreshing mac binding entries will send out an ARP/ND
> request using the logical port's mac address, regardless of whether the
> port is local to a given chassis.
>
> For gateway ports this will make a given LRP incorrectly appear to be
> claimed on the chassis that sends out the request, thereby confusing
> physical switches and other gateway chassis, which ultimately results in
> broken connectivity for that LRP.
> In particular, when the chassis that has actually claimed the LRP, sees
> the request from the other chassis with the mac address from its LRP,
> the OVS bridge that provides the external connectivity will incorrectly
> relearn the LRP's mac address on the port that is connected to the
> physical network. This causes it to drop all further traffic destined
> to the LRP until egress traffic coming from the LRP causes it to move
> the mac back to the correct port or until the FDB entry times out.
>
> Add a check to verify that a logical port from a mac binding is local to
> a given chassis before trying to refresh it.
>
> Fixes: 1e4d4409f391 ("controller: Send ARP/ND for stale mac_bindings
> entries.")
> Signed-off-by: Felix Moebius <[email protected]>
> Signed-off-by: Ales Musil <[email protected]>
> (cherry picked from commit 215b0a1e314e941b309e9b84c7679cc3e8b28a48)
> ---
> controller/lport.c | 27 ++++++++++++++
> controller/lport.h | 3 ++
> controller/mac-cache.c | 9 +++++
> controller/mac-cache.h | 1 +
> controller/ovn-controller.c | 2 +-
> controller/statctrl.c | 2 ++
> controller/statctrl.h | 1 +
> tests/ovn.at | 72 +++++++++++++++++++++++++++++++++++++
> 8 files changed, 116 insertions(+), 1 deletion(-)
>
> diff --git a/controller/lport.c b/controller/lport.c
> index b3721024b..10a59f33b 100644
> --- a/controller/lport.c
> +++ b/controller/lport.c
> @@ -98,6 +98,33 @@ lport_is_chassis_resident(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> }
> }
>
> +bool
> +lport_pb_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis,
> + const struct sbrec_port_binding *pb)
> +{
> + if (!pb || !chassis) {
> + return false;
> + }
> +
> + if (pb->chassis == chassis) {
> + return true;
> + }
> +
> + const char *crp_name = smap_get(&pb->options, "chassis-redirect-port");
> + const struct sbrec_port_binding *cr_pb = NULL;
> + if (crp_name) {
> + cr_pb = lport_lookup_by_name(sbrec_port_binding_by_name, crp_name);
> + }
> +
> + if (cr_pb) {
> + return cr_pb->chassis == chassis;
> + } else {
> + /* Patch ports that are not redirect ports are always local. */
> + return get_lport_type(pb) == LP_PATCH;
> + }
> +}
> +
> const struct sbrec_port_binding *
> lport_get_peer(const struct sbrec_port_binding *pb,
> struct ovsdb_idl_index *sbrec_port_binding_by_name)
> diff --git a/controller/lport.h b/controller/lport.h
> index 2f72aef5e..fc4589bed 100644
> --- a/controller/lport.h
> +++ b/controller/lport.h
> @@ -68,6 +68,9 @@ lport_is_chassis_resident(struct ovsdb_idl_index
> *sbrec_port_binding_by_name,
> const struct sbrec_chassis *chassis,
> const struct sset *active_tunnels,
> const char *port_name);
> +bool lport_pb_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis,
> + const struct sbrec_port_binding *pb);
> const struct sbrec_port_binding *lport_get_peer(
> const struct sbrec_port_binding *,
> struct ovsdb_idl_index *sbrec_port_binding_by_name);
> diff --git a/controller/mac-cache.c b/controller/mac-cache.c
> index 732a5db17..3d29ffb8c 100644
> --- a/controller/mac-cache.c
> +++ b/controller/mac-cache.c
> @@ -953,6 +953,15 @@ mac_binding_probe_stats_run(struct ovs_list
> *stats_list, uint64_t *req_delay,
> continue;
> }
>
> + if (!lport_pb_is_local(probe_data->sbrec_port_binding_by_name,
> + probe_data->chassis, pb)) {
> + mac_binding_update_log("Not sending ARP/ND request for non-local",
> + &mb->data, true, threshold,
> + stats->idle_age_ms, since_updated_ms);
> + free(stats);
> + continue;
> + }
> +
> struct lport_addresses laddr;
> if (!extract_lsp_addresses(pb->mac[0], &laddr)) {
> free(stats);
> diff --git a/controller/mac-cache.h b/controller/mac-cache.h
> index caf062e2d..95f46fc19 100644
> --- a/controller/mac-cache.h
> +++ b/controller/mac-cache.h
> @@ -66,6 +66,7 @@ struct mac_binding_probe_data {
> struct mac_cache_data *cache_data;
> struct rconn *swconn;
> struct ovsdb_idl_index *sbrec_port_binding_by_name;
> + const struct sbrec_chassis *chassis;
> };
>
> struct mac_binding {
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 94837410d..6df14db62 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -5834,7 +5834,7 @@ main(int argc, char *argv[])
> mac_cache_data = engine_get_data(&en_mac_cache);
> if (mac_cache_data) {
> statctrl_run(ovnsb_idl_txn, sbrec_port_binding_by_name,
> - mac_cache_data);
> + chassis, mac_cache_data);
> }
>
> ofctrl_seqno_update_create(
> diff --git a/controller/statctrl.c b/controller/statctrl.c
> index e1e2cae2b..fb2a9814a 100644
> --- a/controller/statctrl.c
> +++ b/controller/statctrl.c
> @@ -175,6 +175,7 @@ statctrl_init(void)
> void
> statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis,
> struct mac_cache_data *mac_cache_data)
> {
> if (!ovnsb_idl_txn) {
> @@ -185,6 +186,7 @@ statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> .cache_data = mac_cache_data,
> .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
> .swconn = statctrl_ctx.swconn,
> + .chassis = chassis,
> };
>
> void *node_data[STATS_MAX] = {
> diff --git a/controller/statctrl.h b/controller/statctrl.h
> index 69a0b6f35..3e56b4a54 100644
> --- a/controller/statctrl.h
> +++ b/controller/statctrl.h
> @@ -21,6 +21,7 @@
> void statctrl_init(void);
> void statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis,
> struct mac_cache_data *mac_cache_data);
>
> void statctrl_update_swconn(const char *target, int probe_interval);
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 7b6faa2ae..1496ce69e 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -35446,6 +35446,78 @@ OVN_CLEANUP([hv1])
> AT_CLEANUP
> ])
>
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([MAC binding aging - probing distributed GW router])
> +AT_SKIP_IF([test $HAVE_SCAPY = no])
> +ovn_start
> +
> +# Check that during mac probing only the chassis that has currently
> claimed
> +# a given port tries to refresh the mac bindings associated with that
> port.
> +# Create a router connected to a switch with a localnet port on br-phys.
> +# Create two hypervisors hv1 and hv2 and bind the lrp on hv1. hv1 should
> +# then try to refresh mac bindings for the lrp whereas hv2 should not.
> +
> +net_add n1
> +for i in 1 2; do
> + sim_add hv$i
> + as hv$i
> + ovn-appctl -t ovn-controller vlog/set mac_cache:file:dbg pinctrl:file:dbg
> + check ovs-vsctl set open .
> external-ids:ovn-bridge-mappings=physnet:br-phys
> + check ovs-vsctl add-br br-phys
> + check ovs-vsctl add-port br-phys snoopvif -- \
> + set Interface snoopvif \
> + options:tx_pcap=hv$i/snoopvif-tx.pcap \
> + options:rxq_pcap=hv$i/snoopvif-rx.pcap
> + check ovs-vsctl add-br br-underlay
> + ovn_attach n1 br-underlay 192.168.0.$i
> +done
> +
> +lrp_mac=00:00:00:00:00:0a
> +lrp_ip=192.168.1.10
> +aging_th=10
> +
> +check ovn-nbctl lr-add lr
> +check ovn-nbctl set logical_router lr
> options:mac_binding_age_threshold=$aging_th
> +check ovn-nbctl lrp-add lr lr-ls $lrp_mac $lrp_ip/24
> +check ovn-nbctl ls-add ls
> +check ovn-nbctl lsp-add-router-port ls ls-lr lr-ls
> +check ovn-nbctl lsp-add-localnet-port ls ls-phys physnet
> +check ovn-nbctl lrp-set-gateway-chassis lr-ls hv1 20
> +check ovn-nbctl lrp-set-gateway-chassis lr-ls hv2 10
> +
> +check ovn-nbctl --wait=hv sync
> +wait_for_ports_up
> +
> +# Wait for hv1 to claim the lrp
> +hv1_chassis=$(fetch_column Chassis _uuid name=hv1)
> +wait_row_count Port_Binding 1 logical_port=cr-lr-ls chassis=$hv1_chassis
> +
> +# Let hv1 receive an external garp to create a mac binding on the lrp
> +ext_mac=00:00:00:00:00:14
> +ext_ip=192.168.1.20
> +send_garp hv1 snoopvif 1 $ext_mac "ff:ff:ff:ff:ff:ff" $ext_ip $ext_ip
> +
> +# Wait for mac binding to be created
> +wait_row_count mac_binding 1 ip="$ext_ip" logical_port="lr-ls"
> +
> +# Wait for mac binding to be removed
> +wait_row_count mac_binding 0 ip="$ext_ip" logical_port="lr-ls"
> +
> +# hv1 should have sent out arps for mac binding refresh
> +arp_req=$(fmt_pkt \
> + "Ether(dst='ff:ff:ff:ff:ff:ff', src='$lrp_mac')/ \
> + ARP(hwsrc='$lrp_mac', hwdst='00:00:00:00:00:00', psrc='$lrp_ip',
> pdst='$ext_ip')")
> +echo $arp_req >> hv1_snoopvif.expected
> +OVN_CHECK_PACKETS_CONTAIN([hv1/snoopvif-tx.pcap], [hv1_snoopvif.expected])
> +
> +# hv2 should have sent nothing
> +: > hv2_snoopvif.expected
> +OVN_CHECK_PACKETS([hv2/snoopvif-tx.pcap], [hv2_snoopvif.expected])
> +
> +OVN_CLEANUP([hv1], [hv2])
> +AT_CLEANUP
> +])
> +
> OVN_FOR_EACH_NORTHD([
> AT_SETUP([MAC binding aging - probing GW router Dynamic Neigh])
> AT_SKIP_IF([test $HAVE_SCAPY = no])
> --
> 2.52.0
>
>
>

Thank you Felix,

I rearranged `lport_pb_is_local()` slightly to align it more closely with
the version in 25.03 onwards, and I applied that change to 24.09 and 24.03.

Regards,
Ales

>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to