On Wed, Aug 30, 2023 at 7:24 PM Lorenzo Bianconi <lorenzo.bianc...@redhat.com> wrote: > > When using VLAN backed networks and OVN routers leveraging the > 'ovn-chassis-mac-mappings' option for east-west traffic, the eth.src field is > replaced by the chassis mac address in order to not expose the router mac > address from different nodes and confuse the TOR switch. However doing so > the TOR switch is not able to learn the port/mac bindings for routed E/W > traffic and it is force to always flood it. Fix this issue adding the > capability to configure a given timeout for garp sent by ovn-controller > and not disable it after the exponential backoff in order to keep > refreshing the entries in TOR swtich fdb table. > More into about the issue can be found here [0]. > > [0] > https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050678.html > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2087779 > > Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com> > --- > Changes since v3: > - simplify code logic > > Changes since v2: > - cap exponential backoff timeout to garp_max_timeout > > Changes since v1: > - add uni-test > - add documentation > --- > controller/ovn-controller.8.xml | 11 +++++ > controller/ovn-controller.c | 4 +- > controller/pinctrl.c | 73 +++++++++++++++++++++++++++------ > controller/pinctrl.h | 4 +- > tests/ovn.at | 16 ++++++++ > 5 files changed, 93 insertions(+), 15 deletions(-) > > diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml > index 0883d8da9..7b4100592 100644 > --- a/controller/ovn-controller.8.xml > +++ b/controller/ovn-controller.8.xml > @@ -365,6 +365,17 @@ > heplful to pin source outer IP for the tunnel when multiple > interfaces > are used on the host for overlay traffic. > </dd> > + <dt><code>external_ids:garp-max-timeout-sec</code></dt> > + <dd> > + When used, this configuration value specifies the maximum timeout > + (in seconds) between two consecutive GARP packets sent by > + <code>ovn-controller</code>. > + <code>ovn-controller</code> by default sends just 4 GARP packets > + with an exponential backoff timeout. > + Setting <code>external_ids:garp-max-timeout-sec</code> allows to > + cap for the exponential backoff used by <code>ovn-controller</code> > + to send GARPs packets. > + </dd> > </dl> > > <p> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index 1e49f423f..90bb80d56 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -5787,7 +5787,9 @@ main(int argc, char *argv[]) > &runtime_data->local_datapaths, > &runtime_data->active_tunnels, > > &runtime_data->local_active_ports_ipv6_pd, > - &runtime_data->local_active_ports_ras); > + &runtime_data->local_active_ports_ras, > + ovsrec_open_vswitch_table_get( > + ovs_idl_loop.idl)); > stopwatch_stop(PINCTRL_RUN_STOPWATCH_NAME, > time_msec()); > mirror_run(ovs_idl_txn, > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > index bed90fe0b..c280d2256 100644 > --- a/controller/pinctrl.c > +++ b/controller/pinctrl.c > @@ -166,6 +166,10 @@ static struct ovs_mutex pinctrl_mutex = > OVS_MUTEX_INITIALIZER; > static struct seq *pinctrl_handler_seq; > static struct seq *pinctrl_main_seq; > > +#define GARP_RARP_DEF_MAX_TIMEOUT 16000 > +static long long int garp_rarp_max_timeout = GARP_RARP_DEF_MAX_TIMEOUT; > +static bool garp_rarp_continuous; > + > static void *pinctrl_handler(void *arg); > > struct pinctrl { > @@ -227,7 +231,8 @@ static void send_garp_rarp_prepare( > const struct ovsrec_bridge *, > const struct sbrec_chassis *, > const struct hmap *local_datapaths, > - const struct sset *active_tunnels) > + const struct sset *active_tunnels, > + const struct ovsrec_open_vswitch_table *ovs_table) > OVS_REQUIRES(pinctrl_mutex); > static void send_garp_rarp_run(struct rconn *swconn, > long long int *send_garp_rarp_time) > @@ -3492,7 +3497,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > const struct hmap *local_datapaths, > const struct sset *active_tunnels, > const struct shash *local_active_ports_ipv6_pd, > - const struct shash *local_active_ports_ras) > + const struct shash *local_active_ports_ras, > + const struct ovsrec_open_vswitch_table *ovs_table) > { > ovs_mutex_lock(&pinctrl_mutex); > run_put_mac_bindings(ovnsb_idl_txn, sbrec_datapath_binding_by_key, > @@ -3503,7 +3509,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > send_garp_rarp_prepare(ovnsb_idl_txn, sbrec_port_binding_by_datapath, > sbrec_port_binding_by_name, > sbrec_mac_binding_by_lport_ip, br_int, chassis, > - local_datapaths, active_tunnels); > + local_datapaths, active_tunnels, ovs_table); > prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name); > prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name, > local_active_ports_ipv6_pd, chassis, > @@ -4379,7 +4385,8 @@ struct garp_rarp_data { > struct eth_addr ea; /* Ethernet address of port. */ > ovs_be32 ipv4; /* Ipv4 address of port. */ > long long int announce_time; /* Next announcement in ms. */ > - int backoff; /* Backoff for the next announcement. */ > + int backoff; /* Backoff timeout for the next > + * announcement (in msecs). */ > uint32_t dp_key; /* Datapath used to output this GARP. */ > uint32_t port_key; /* Port to inject the GARP into. */ > }; > @@ -4408,7 +4415,7 @@ add_garp_rarp(const char *name, const struct eth_addr > ea, ovs_be32 ip, > garp_rarp->ea = ea; > garp_rarp->ipv4 = ip; > garp_rarp->announce_time = time_msec() + 1000; > - garp_rarp->backoff = 1; > + garp_rarp->backoff = 1000; /* msec. */ > garp_rarp->dp_key = dp_key; > garp_rarp->port_key = port_key; > shash_add(&send_garp_rarp_data, name, garp_rarp); > @@ -4424,7 +4431,9 @@ send_garp_rarp_update(struct ovsdb_idl_txn > *ovnsb_idl_txn, > struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip, > const struct hmap *local_datapaths, > const struct sbrec_port_binding *binding_rec, > - struct shash *nat_addresses) > + struct shash *nat_addresses, > + long long int garp_max_timeout, > + bool garp_continuous) > { > volatile struct garp_rarp_data *garp_rarp = NULL; > > @@ -4450,6 +4459,12 @@ send_garp_rarp_update(struct ovsdb_idl_txn > *ovnsb_idl_txn, > if (garp_rarp) { > garp_rarp->dp_key = binding_rec->datapath->tunnel_key; > garp_rarp->port_key = binding_rec->tunnel_key; > + if (garp_max_timeout != garp_rarp_max_timeout || > + garp_continuous != garp_rarp_continuous) { > + /* reset backoff */ > + garp_rarp->announce_time = time_msec() + 1000; > + garp_rarp->backoff = 1000; /* msec. */ > + } > } else { > add_garp_rarp(name, laddrs->ea, > laddrs->ipv4_addrs[i].addr, > @@ -4474,6 +4489,12 @@ send_garp_rarp_update(struct ovsdb_idl_txn > *ovnsb_idl_txn, > if (garp_rarp) { > garp_rarp->dp_key = > binding_rec->datapath->tunnel_key; > garp_rarp->port_key = binding_rec->tunnel_key; > + if (garp_max_timeout != garp_rarp_max_timeout || > + garp_continuous != garp_rarp_continuous) { > + /* reset backoff */ > + garp_rarp->announce_time = time_msec() + 1000; > + garp_rarp->backoff = 1000; /* msec. */ > + } > } else { > add_garp_rarp(name, laddrs->ea, > 0, binding_rec->datapath->tunnel_key, > @@ -4493,6 +4514,12 @@ send_garp_rarp_update(struct ovsdb_idl_txn > *ovnsb_idl_txn, > if (garp_rarp) { > garp_rarp->dp_key = binding_rec->datapath->tunnel_key; > garp_rarp->port_key = binding_rec->tunnel_key; > + if (garp_max_timeout != garp_rarp_max_timeout || > + garp_continuous != garp_rarp_continuous) { > + /* reset backoff */ > + garp_rarp->announce_time = time_msec() + 1000; > + garp_rarp->backoff = 1000; /* msec. */ > + } > return; > } > > @@ -4578,13 +4605,15 @@ send_garp_rarp(struct rconn *swconn, struct > garp_rarp_data *garp_rarp, > ofpbuf_uninit(&ofpacts); > > /* Set the next announcement. At most 5 announcements are sent for a > - * vif. */ > - if (garp_rarp->backoff < 16) { > - garp_rarp->backoff *= 2; > - garp_rarp->announce_time = current_time + garp_rarp->backoff * 1000; > + * vif if garp_rarp_max_timeout is not specified otherwise cap the max > + * timeout to garp_rarp_max_timeout. */ > + if (garp_rarp_continuous || garp_rarp->backoff < garp_rarp_max_timeout) { > + garp_rarp->announce_time = current_time + garp_rarp->backoff; > } else { > garp_rarp->announce_time = LLONG_MAX; > } > + garp_rarp->backoff = MIN(garp_rarp_max_timeout, garp_rarp->backoff * 2); > + > return garp_rarp->announce_time; > } > > @@ -5881,13 +5910,26 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn > *ovnsb_idl_txn, > const struct ovsrec_bridge *br_int, > const struct sbrec_chassis *chassis, > const struct hmap *local_datapaths, > - const struct sset *active_tunnels) > + const struct sset *active_tunnels, > + const struct ovsrec_open_vswitch_table *ovs_table) > OVS_REQUIRES(pinctrl_mutex) > { > struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs); > struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports); > struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys); > struct shash nat_addresses; > + unsigned long long garp_max_timeout = GARP_RARP_DEF_MAX_TIMEOUT; > + bool garp_continuous = false; > + const struct ovsrec_open_vswitch *cfg = > + ovsrec_open_vswitch_table_first(ovs_table); > + if (cfg) { > + garp_max_timeout = smap_get_ullong( > + &cfg->external_ids, "garp-max-timeout-sec", 0) * 1000; > + garp_continuous = !!garp_max_timeout; > + if (!garp_max_timeout) { > + garp_max_timeout = GARP_RARP_DEF_MAX_TIMEOUT; > + } > + } > > shash_init(&nat_addresses); > > @@ -5918,7 +5960,8 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn > *ovnsb_idl_txn, > if (pb) { > send_garp_rarp_update(ovnsb_idl_txn, > sbrec_mac_binding_by_lport_ip, > - local_datapaths, pb, &nat_addresses); > + local_datapaths, pb, &nat_addresses, > + garp_max_timeout, garp_continuous); > } > } > > @@ -5929,7 +5972,8 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn > *ovnsb_idl_txn, > = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port); > if (pb) { > send_garp_rarp_update(ovnsb_idl_txn, > sbrec_mac_binding_by_lport_ip, > - local_datapaths, pb, &nat_addresses); > + local_datapaths, pb, &nat_addresses, > + garp_max_timeout, garp_continuous); > } > } > > @@ -5947,6 +5991,9 @@ send_garp_rarp_prepare(struct ovsdb_idl_txn > *ovnsb_idl_txn, > shash_destroy(&nat_addresses); > > sset_destroy(&nat_ip_keys); > + > + garp_rarp_max_timeout = garp_max_timeout; > + garp_rarp_continuous = garp_continuous; > } > > static bool > diff --git a/controller/pinctrl.h b/controller/pinctrl.h > index 279a49fbc..23343f097 100644 > --- a/controller/pinctrl.h > +++ b/controller/pinctrl.h > @@ -30,6 +30,7 @@ struct ovsdb_idl; > struct ovsdb_idl_index; > struct ovsdb_idl_txn; > struct ovsrec_bridge; > +struct ovsrec_open_vswitch_table; > struct sbrec_chassis; > struct sbrec_dns_table; > struct sbrec_controller_event_table; > @@ -57,7 +58,8 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > const struct hmap *local_datapaths, > const struct sset *active_tunnels, > const struct shash *local_active_ports_ipv6_pd, > - const struct shash *local_active_ports_ras); > + const struct shash *local_active_ports_ras, > + const struct ovsrec_open_vswitch_table *ovs_table); > void pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn); > void pinctrl_destroy(void); > void pinctrl_set_br_int_name(const char *br_int_name); > diff --git a/tests/ovn.at b/tests/ovn.at > index bb5cbf0b9..72420f2bb 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -9098,6 +9098,7 @@ AT_CLEANUP > > OVN_FOR_EACH_NORTHD([ > AT_SETUP([send gratuitous arp for l3gateway only on selected chassis]) > +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) > ovn_start > > # Create logical switch > @@ -9187,6 +9188,21 @@ sleep 2 > OVN_CHECK_PACKETS_CONTAIN([hv2/snoopvif-tx.pcap], [arp_expected]) > OVN_CHECK_PACKETS([hv1/snoopvif-tx.pcap], [empty_expected]) > > +# Temporarily remove lr0 chassis > +AT_CHECK([ovn-nbctl --wait=hv remove logical_router lr0 options chassis]) > + > +as hv1 reset_pcap_file snoopvif hv1/snoopvif > +as hv2 reset_pcap_file snoopvif hv2/snoopvif > + > +AT_CHECK([ovn-nbctl --wait=hv set logical_router lr0 options:chassis=hv1]) > +# set garp max timeout to 2s > +AT_CHECK([as hv1 ovs-vsctl set Open_vSwitch . > external-ids:garp-max-timeout-sec=2]) > + > +OVS_WAIT_UNTIL([ > +n_arp=$(tcpdump -c 10 -ner hv1/snoopvif-tx.pcap arp | wc -l) > +test "$n_arp" = 10 > +]) > + > OVN_CLEANUP([hv1],[hv2]) > > AT_CLEANUP > -- > 2.41.0 >
Looks good to me, thanks. Acked-by: Ales Musil <amu...@redhat.com> -- Ales Musil Senior Software Engineer - OVN Core Red Hat EMEA amu...@redhat.com IM: amusil _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev