On 5/30/23 11:59, Vladislav Odintsov wrote: > Hi Dumitru, > > thanks for the review! > My answers are inline. > >> On 30 May 2023, at 12:27, Dumitru Ceara <dce...@redhat.com> wrote: >> >> On 5/19/23 20:18, Vladislav Odintsov wrote: >>> This patch is intended to make next two changes: >>> >>> 1. Support create/update of MAC_Binding for GARP/ND from HW VTEP. >>> >>> Prior to this patch MAC_Binding records were created only when LRP issued >>> ARP request/Neighbor solicitation. If IP to MAC in network attached via >>> vtep lport was changed the old MAC_Binding prevented normal >>> communication. Now router port (chassisredirect) in logical switch with >>> vtep lport catches GARP or Neighbor Solicitation and updates MAC_Binding. >>> >>> New flow with max priority was added in ls_in_arp_nd_resp and additional >>> OF rule in table 37 (OFTABLE_REMOTE_OUTPUT) to process multicast packets >>> received from RAMP_SWITCH. >>> >>> 2. Answer to ARP/ND on requests coming from HW VTEP in lrouter pipeline. >>> >>> This is needed to reduce duplicated arp-responses, which were generated >>> by OVN arp-responder flows in node, where chassisredirect port located >>> with responses coming directly from VM interfaces. >>> >>> As a result of this change, now vifs located on same chassis, where >>> chassisredirect ports are located receive ARP/ND mcast packets, which >>> previously were suppressed by arp-responder on the node. >>> >>> Tests and documentation were updated to conform to new behaviour. >>> >>> Signed-off-by: Vladislav Odintsov <odiv...@gmail.com> >>> --- >> >> Hi Vladislav, >> >> Thanks for the patch. >> >>> controller/binding.c | 48 ++++++++++++++++++++ >>> controller/local_data.h | 3 ++ >>> controller/physical.c | 46 +++++++++++++++++-- >>> northd/northd.c | 10 +++++ >>> northd/ovn-northd.8.xml | 6 +++ >>> tests/ovn.at <http://ovn.at/> | 99 >>> ++++++++++++++++++++++++++++++++++++++++- >> >> Could you please add a NEWS entry for this behavior change? > > Sure, will do it in v2. > >> >>> 6 files changed, 207 insertions(+), 5 deletions(-) >>> >>> diff --git a/controller/binding.c b/controller/binding.c >>> index c7a2b3f10..5932ad785 100644 >>> --- a/controller/binding.c >>> +++ b/controller/binding.c >>> @@ -509,6 +509,30 @@ update_ld_multichassis_ports(const struct >>> sbrec_port_binding *binding_rec, >>> } >>> } >>> >>> +static void >>> +update_ld_vtep_port(const struct sbrec_port_binding *binding_rec, >>> + struct hmap *local_datapaths) >>> +{ >>> + struct local_datapath *ld >>> + = get_local_datapath(local_datapaths, >>> + binding_rec->datapath->tunnel_key); >>> + if (!ld) { >>> + return; >>> + } >>> + >>> + if (ld->vtep_port && strcmp(ld->vtep_port->logical_port, >>> + binding_rec->logical_port)) { >>> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); >>> + VLOG_WARN_RL(&rl, "vtep port '%s' already set for datapath " >>> + "'%"PRId64"', skipping the new port '%s'.", >>> + ld->vtep_port->logical_port, >>> + binding_rec->datapath->tunnel_key, >>> + binding_rec->logical_port); >>> + return; >>> + } >>> + ld->vtep_port = binding_rec; >>> +} >>> + >>> static void >>> update_ld_localnet_port(const struct sbrec_port_binding *binding_rec, >>> struct shash *bridge_mappings, >>> @@ -1987,6 +2011,7 @@ binding_run(struct binding_ctx_in *b_ctx_in, >>> struct binding_ctx_out *b_ctx_out) >>> struct ovs_list external_lports = >>> OVS_LIST_INITIALIZER(&external_lports); >>> struct ovs_list multichassis_ports = OVS_LIST_INITIALIZER( >>> >>> &multichassis_ports); >>> + struct ovs_list vtep_lports = OVS_LIST_INITIALIZER(&vtep_lports); >>> >>> struct lport { >>> struct ovs_list list_node; >>> @@ -2010,8 +2035,13 @@ binding_run(struct binding_ctx_in *b_ctx_in, >>> struct binding_ctx_out *b_ctx_out) >>> >>> switch (lport_type) { >>> case LP_PATCH: >>> + update_related_lport(pb, b_ctx_out); >>> + break; >>> case LP_VTEP: >>> update_related_lport(pb, b_ctx_out); >>> + struct lport *vtep_lport = xmalloc(sizeof *vtep_lport); >>> + vtep_lport->pb = pb; >>> + ovs_list_push_back(&vtep_lports, &vtep_lport->list_node); >>> break; >>> >>> case LP_LOCALPORT: >>> @@ -2111,6 +2141,16 @@ binding_run(struct binding_ctx_in *b_ctx_in, >>> struct binding_ctx_out *b_ctx_out) >>> free(multichassis_lport); >>> } >>> >>> + /* Run through vtep lport list to see if there are vtep ports >>> + * on local datapaths discovered from above loop, and update the >>> + * corresponding local datapath accordingly. */ >>> + struct lport *vtep_lport; >>> + ovs_list_size(&vtep_lports); >> >> I guess this is a leftover. > > Oops, will fix it in v2. > >> >>> + LIST_FOR_EACH_POP (vtep_lport, list_node, &vtep_lports) { >>> + update_ld_vtep_port(vtep_lport->pb, b_ctx_out->local_datapaths); >>> + free(vtep_lport); >>> + } >>> + >>> shash_destroy(&bridge_mappings); >>> /* Remove stale QoS entries. */ >>> ovs_qos_entries_gc(b_ctx_in->ovs_idl_txn, >>> b_ctx_in->ovsrec_port_by_qos, >>> @@ -2175,6 +2215,11 @@ remove_pb_from_local_datapath(const struct >>> sbrec_port_binding *pb, >>> } >>> } else if (!strcmp(pb->type, "external")) { >>> remove_local_datapath_external_port(ld, pb->logical_port); >>> + } else if (!strcmp(pb->type, "vtep")) { >>> + if (ld->vtep_port && !strcmp(ld->vtep_port->logical_port, >>> + pb->logical_port)) { >>> + ld->vtep_port = NULL; >>> + } >>> } >>> remove_local_datapath_multichassis_port(ld, pb->logical_port); >>> } >>> @@ -2836,6 +2881,7 @@ handle_updated_port(struct binding_ctx_in >>> *b_ctx_in, >>> >>> case LP_VTEP: >>> update_related_lport(pb, b_ctx_out); >>> + update_ld_vtep_port(pb, b_ctx_out->local_datapaths); >>> /* VTEP lports are claimed/released by ovn-controller-vteps. >>> * We are not sure what changed. */ >>> b_ctx_out->non_vif_ports_changed = true; >>> @@ -3091,6 +3137,8 @@ delete_done: >>> } else if (pb->n_additional_chassis) { >>> update_ld_multichassis_ports(pb, >>> >>> b_ctx_out->local_datapaths); >>> + } else if (lport_type == LP_VTEP) { >>> + update_ld_vtep_port(pb, b_ctx_out->local_datapaths); >>> } >> >> Nit: I think I'd move this "else if (lport_type == LP_VTEP) {" just >> under the LP_EXTERNAL case. > > I’ll move it, but I guess it’s not possible to have additional chassis > for vtep pb. > >> >>> } >>> sbrec_port_binding_index_destroy_row(target); >>> diff --git a/controller/local_data.h b/controller/local_data.h >>> index 748f009aa..19d13a558 100644 >>> --- a/controller/local_data.h >>> +++ b/controller/local_data.h >>> @@ -50,6 +50,9 @@ struct local_datapath { >>> /* The localnet port in this datapath, if any (at most one is >>> allowed). */ >>> const struct sbrec_port_binding *localnet_port; >>> >>> + /* The vtep port in this datapath, if any (at most one is >>> allowed). */ >>> + const struct sbrec_port_binding *vtep_port; >>> + >>> struct { >>> const struct sbrec_port_binding *local; >>> const struct sbrec_port_binding *remote; >>> diff --git a/controller/physical.c b/controller/physical.c >>> index 2925dcd1d..b75e120c7 100644 >>> --- a/controller/physical.c >>> +++ b/controller/physical.c >>> @@ -194,6 +194,15 @@ get_localnet_port(const struct hmap >>> *local_datapaths, int64_t tunnel_key) >>> } >>> >>> >>> +static const struct sbrec_port_binding * >>> +get_vtep_port(const struct hmap *local_datapaths, int64_t tunnel_key) >>> +{ >>> + const struct local_datapath *ld = >>> get_local_datapath(local_datapaths, >>> + tunnel_key); >>> + return ld ? ld->vtep_port : NULL; >>> +} >>> + >>> + >>> static struct zone_ids >>> get_zone_ids(const struct sbrec_port_binding *binding, >>> const struct simap *ct_zones) >>> @@ -1732,7 +1741,7 @@ consider_mc_group(struct ovsdb_idl_index >>> *sbrec_port_binding_by_name, >>> struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis); >>> struct sset vtep_chassis = SSET_INITIALIZER(&vtep_chassis); >>> >>> - struct match match; >>> + struct match match, match_ramp; >>> match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key); >>> >>> /* Go through all of the ports in the multicast group: >>> @@ -1755,10 +1764,10 @@ consider_mc_group(struct ovsdb_idl_index >>> *sbrec_port_binding_by_name, >>> * set the output port to be the router patch port for which >>> * the redirect port was added. >>> */ >>> - struct ofpbuf ofpacts; >>> + struct ofpbuf ofpacts, remote_ofpacts, remote_ofpacts_ramp; >>> ofpbuf_init(&ofpacts, 0); >>> - struct ofpbuf remote_ofpacts; >>> ofpbuf_init(&remote_ofpacts, 0); >>> + ofpbuf_init(&remote_ofpacts_ramp, 0); >>> for (size_t i = 0; i < mc->n_ports; i++) { >>> struct sbrec_port_binding *port = mc->ports[i]; >>> >>> @@ -1783,6 +1792,7 @@ consider_mc_group(struct ovsdb_idl_index >>> *sbrec_port_binding_by_name, >>> local_output_pb(port->tunnel_key, &ofpacts); >>> } else { >>> local_output_pb(port->tunnel_key, &remote_ofpacts); >>> + local_output_pb(port->tunnel_key, &remote_ofpacts_ramp); >>> } >>> } if (!strcmp(port->type, "remote")) { >>> if (port->chassis) { >>> @@ -1866,6 +1876,35 @@ consider_mc_group(struct ovsdb_idl_index >>> *sbrec_port_binding_by_name, >>> * multicast group as the logical output port. */ >>> put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, >>> &remote_ofpacts); >>> + >>> + if (get_vtep_port(local_datapaths, >>> mc->datapath->tunnel_key)) { >> >> I'd declare 'match_ramp' here as it's only used on this branch of the if >> statement. > > Okay, will be fixed in v2. > >> >>> + match_set_reg_masked(&match, MFF_LOG_FLAGS - >>> MFF_REG0, 0, >>> + MLF_RCV_FROM_RAMP); >>> + >> >> Are we going to match less traffic now? We add this "flags & >> MLF_RCV_FROM_RAMP" condition to the already existing priority-100 >> OFTABLE_REMOTE_OUTPUT flow. Does this break multicast traffic not >> received from RAMP (vtep) ports? > > Yes, this limits traffic matching on this flow. > Such flow matches only traffic, which was received from non-RAMP lports. > We need to flood traffic to all multicast group ports only if it was > received from non-RAMP ports. > This is due to fact that flood on the directon "from vtep" is done on > the vtep (ramp) switch: it floods BUM to all hypervisors. > If we don’t do it, we’ll duplicate BUM traffic (first packet is > originally sent from VTEP switch to all hypervisors) and others are > replicated on all hypervisors and re-sent to other hypervisors. > > Please, let me know if this makes sense. >
It does, sorry for the noise. I missed the value = 0 argument. Thanks for the clarification! I'm planning to apply the first 4 patches today and I'll look forward for a v2 of this patch afterwards. Regards, Dumitru _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev