On Fri, Aug 29, 2025 at 10:40 AM Xavier Simonart <xsimo...@redhat.com> wrote:
> If BFD is down between two gateways in a chassis group, while both gateways > are alive, both ovn-controller were fighting to claim the router port. > There was a 500 msec, hardcoded, grace time preventing both controller > to run at 100% CPU. > Depending of sb dynamics, either gw1 or gw2 ended up claiming the port > most of the time. > For instance: > - gw1 claims the port at time T > - gw2 claims the port at time T (+ few micros) > - gw1 postpone the claims. > - gw1 claims the port at time T+500. > - gw2 claims the port at time T+500 (+ few micros) > - gw1 postpone the claims > However, computes still send their traffic towards the highest priority > gateway. > > This patch guarantees that, if the highest priority gateway is alive, it > always claims > the port. > This patch does not change the behavior if the highest priority chassis > "dies". > > Note: computes send their traffic towards ha gateways based on BFD status > and priority, i.e. > without ovn-controller and sb handling. > We do not want to base the traffic on which gateway claims the router > port, as this would require > ovn-controller & sb handling, potentially delaying migration of the > traffic. > > Signed-off-by: Xavier Simonart <xsimo...@redhat.com> > Acked-by: Ales Musil <amu...@redhat.com> > Signed-off-by: Ales Musil <amu...@redhat.com> > (cherry picked from commit 9c2f897bcebb3d89afa223854ca07b76996bc870) > --- > v2: - Rebased. > - Added missing cherry-pick and Acked-by/Signed-off-by from origin > commit. > --- > controller/binding.c | 24 +++++++++++++++++++----- > controller/binding.h | 4 +++- > controller/ha-chassis.c | 2 +- > controller/ha-chassis.h | 4 ++++ > controller/if-status.c | 14 ++++++++++---- > 5 files changed, 37 insertions(+), 11 deletions(-) > > diff --git a/controller/binding.c b/controller/binding.c > index 2217080b4..c22f26b66 100644 > --- a/controller/binding.c > +++ b/controller/binding.c > @@ -1351,16 +1351,30 @@ remove_additional_chassis(const struct > sbrec_port_binding *pb, > } > > bool > -lport_maybe_postpone(const char *port_name, long long int now, > +lport_maybe_postpone(const struct sbrec_port_binding *pb, > + const struct sbrec_chassis *chassis_rec, > + long long int now, > struct sset *postponed_ports) > { > - long long int last_claimed = get_claim_timestamp(port_name); > + if (pb->ha_chassis_group) { > + struct ha_chassis_ordered *ordered_ha_ch = > + get_ordered_ha_chassis_list(pb->ha_chassis_group, NULL, > + chassis_rec); > + if (ordered_ha_ch && > + ordered_ha_ch->ha_ch[0].chassis == chassis_rec) { > + ha_chassis_destroy_ordered(ordered_ha_ch); > + return false; > + } else { > + ha_chassis_destroy_ordered(ordered_ha_ch); > + } > + } > + long long int last_claimed = get_claim_timestamp(pb->logical_port); > if (now - last_claimed >= CLAIM_TIME_THRESHOLD_MS) { > return false; > } > > - sset_add(postponed_ports, port_name); > - VLOG_DBG("Postponed claim on logical port %s.", port_name); > + sset_add(postponed_ports, pb->logical_port); > + VLOG_DBG("Postponed claim on logical port %s.", pb->logical_port); > > return true; > } > @@ -1391,7 +1405,7 @@ claim_lport(const struct sbrec_port_binding *pb, > if (pb->chassis != chassis_rec) { > long long int now = time_msec(); > if (pb->chassis) { > - if (lport_maybe_postpone(pb->logical_port, now, > + if (lport_maybe_postpone(pb, chassis_rec, now, > postponed_ports)) { > return true; > } > diff --git a/controller/binding.h b/controller/binding.h > index 00dd9dbf5..5bc4734e6 100644 > --- a/controller/binding.h > +++ b/controller/binding.h > @@ -276,7 +276,9 @@ void update_qos(struct ovsdb_idl_index * > sbrec_port_binding_by_name, > const struct ovsrec_open_vswitch_table *ovs_table, > const struct ovsrec_bridge_table *bridge_table); > > -bool lport_maybe_postpone(const char *port_name, long long int now, > +bool lport_maybe_postpone(const struct sbrec_port_binding *pb, > + const struct sbrec_chassis *chassis_rec, > + long long int now, > struct sset *postponed_ports); > > void claimed_lport_set_up(const struct sbrec_port_binding *pb, > diff --git a/controller/ha-chassis.c b/controller/ha-chassis.c > index 945c306b6..ad0b3ef0b 100644 > --- a/controller/ha-chassis.c > +++ b/controller/ha-chassis.c > @@ -59,7 +59,7 @@ compare_chassis_prio_(const void *a_, const void *b_) > * If active_tunnels is set is empty and local_chassis is HA3, > * then it returns NULL. > */ > -static struct ha_chassis_ordered * > +struct ha_chassis_ordered * > get_ordered_ha_chassis_list(const struct sbrec_ha_chassis_group > *ha_ch_grp, > const struct sset *active_tunnels, > const struct sbrec_chassis *local_chassis) > diff --git a/controller/ha-chassis.h b/controller/ha-chassis.h > index 3768c2a5c..c7c91e001 100644 > --- a/controller/ha-chassis.h > +++ b/controller/ha-chassis.h > @@ -46,5 +46,9 @@ struct ha_chassis_ordered *ha_chassis_get_ordered( > > void ha_chassis_destroy_ordered( > struct ha_chassis_ordered *ordered_ha_ch); > +struct ha_chassis_ordered *get_ordered_ha_chassis_list( > + const struct sbrec_ha_chassis_group *ha_ch_grp, > + const struct sset *active_tunnels, > + const struct sbrec_chassis *local_chassis); > > #endif /* OVN_HA_CHASSIS_H */ > diff --git a/controller/if-status.c b/controller/if-status.c > index 32b6064ba..9b0f2bcdf 100644 > --- a/controller/if-status.c > +++ b/controller/if-status.c > @@ -515,8 +515,11 @@ if_status_mgr_update(struct if_status_mgr *mgr, > chassis_rec)) { > if (!sb_readonly) { > long long int now = time_msec(); > - if (lport_maybe_postpone(iface->id, now, > - get_postponed_ports())) { > + const struct sbrec_port_binding *pb = > + sbrec_port_binding_table_get_for_uuid(pb_table, > + > &iface->pb_uuid); > + if (pb && lport_maybe_postpone(pb, chassis_rec, now, > + > get_postponed_ports())) { > continue; > } > local_binding_set_pb(bindings, iface->id, chassis_rec, > @@ -572,8 +575,11 @@ if_status_mgr_update(struct if_status_mgr *mgr, > if (!local_bindings_pb_chassis_is_set(bindings, iface->id, > chassis_rec)) { > long long int now = time_msec(); > - if (lport_maybe_postpone(iface->id, now, > - get_postponed_ports())) { > + const struct sbrec_port_binding *pb = > + sbrec_port_binding_table_get_for_uuid(pb_table, > + > &iface->pb_uuid); > + if (pb && lport_maybe_postpone(pb, chassis_rec, now, > + get_postponed_ports())) { > continue; > } > local_binding_set_pb(bindings, iface->id, chassis_rec, > -- > 2.47.1 > > Thank you Xavier, I went ahead and merged the series into 25.03. Regards, Ales _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev