On 5/30/23 11:59, Vladislav Odintsov wrote:
> Hi Dumitru,
> 
> thanks for the review!
> My answers are inline.
> 
>> On 30 May 2023, at 12:27, Dumitru Ceara <dce...@redhat.com> wrote:
>>
>> On 5/19/23 20:18, Vladislav Odintsov wrote:
>>> This patch is intended to make next two changes:
>>>
>>> 1. Support create/update of MAC_Binding for GARP/ND from HW VTEP.
>>>
>>> Prior to this patch MAC_Binding records were created only when LRP issued
>>> ARP request/Neighbor solicitation.  If IP to MAC in network attached via
>>> vtep lport was changed the old MAC_Binding prevented normal
>>> communication.  Now router port (chassisredirect) in logical switch with
>>> vtep lport catches GARP or Neighbor Solicitation and updates MAC_Binding.
>>>
>>> New flow with max priority was added in ls_in_arp_nd_resp and additional
>>> OF rule in table 37 (OFTABLE_REMOTE_OUTPUT) to process multicast packets
>>> received from RAMP_SWITCH.
>>>
>>> 2. Answer to ARP/ND on requests coming from HW VTEP in lrouter pipeline.
>>>
>>> This is needed to reduce duplicated arp-responses, which were generated
>>> by OVN arp-responder flows in node, where chassisredirect port located
>>> with responses coming directly from VM interfaces.
>>>
>>> As a result of this change, now vifs located on same chassis, where
>>> chassisredirect ports are located receive ARP/ND mcast packets, which
>>> previously were suppressed by arp-responder on the node.
>>>
>>> Tests and documentation were updated to conform to new behaviour.
>>>
>>> Signed-off-by: Vladislav Odintsov <odiv...@gmail.com>
>>> ---
>>
>> Hi Vladislav,
>>
>> Thanks for the patch.
>>
>>> controller/binding.c    | 48 ++++++++++++++++++++
>>> controller/local_data.h |  3 ++
>>> controller/physical.c   | 46 +++++++++++++++++--
>>> northd/northd.c         | 10 +++++
>>> northd/ovn-northd.8.xml |  6 +++
>>> tests/ovn.at <http://ovn.at/>            | 99
>>> ++++++++++++++++++++++++++++++++++++++++-
>>
>> Could you please add a NEWS entry for this behavior change?
> 
> Sure, will do it in v2.
> 
>>
>>> 6 files changed, 207 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/controller/binding.c b/controller/binding.c
>>> index c7a2b3f10..5932ad785 100644
>>> --- a/controller/binding.c
>>> +++ b/controller/binding.c
>>> @@ -509,6 +509,30 @@ update_ld_multichassis_ports(const struct
>>> sbrec_port_binding *binding_rec,
>>>     }
>>> }
>>>
>>> +static void
>>> +update_ld_vtep_port(const struct sbrec_port_binding *binding_rec,
>>> +                    struct hmap *local_datapaths)
>>> +{
>>> +    struct local_datapath *ld
>>> +        = get_local_datapath(local_datapaths,
>>> +                             binding_rec->datapath->tunnel_key);
>>> +    if (!ld) {
>>> +        return;
>>> +    }
>>> +
>>> +    if (ld->vtep_port && strcmp(ld->vtep_port->logical_port,
>>> +                                binding_rec->logical_port)) {
>>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
>>> +        VLOG_WARN_RL(&rl, "vtep port '%s' already set for datapath "
>>> +                     "'%"PRId64"', skipping the new port '%s'.",
>>> +                     ld->vtep_port->logical_port,
>>> +                     binding_rec->datapath->tunnel_key,
>>> +                     binding_rec->logical_port);
>>> +        return;
>>> +    }
>>> +    ld->vtep_port = binding_rec;
>>> +}
>>> +
>>> static void
>>> update_ld_localnet_port(const struct sbrec_port_binding *binding_rec,
>>>                         struct shash *bridge_mappings,
>>> @@ -1987,6 +2011,7 @@ binding_run(struct binding_ctx_in *b_ctx_in,
>>> struct binding_ctx_out *b_ctx_out)
>>>     struct ovs_list external_lports =
>>> OVS_LIST_INITIALIZER(&external_lports);
>>>     struct ovs_list multichassis_ports = OVS_LIST_INITIALIZER(
>>>                                                         
>>> &multichassis_ports);
>>> +    struct ovs_list vtep_lports = OVS_LIST_INITIALIZER(&vtep_lports);
>>>
>>>     struct lport {
>>>         struct ovs_list list_node;
>>> @@ -2010,8 +2035,13 @@ binding_run(struct binding_ctx_in *b_ctx_in,
>>> struct binding_ctx_out *b_ctx_out)
>>>
>>>         switch (lport_type) {
>>>         case LP_PATCH:
>>> +            update_related_lport(pb, b_ctx_out);
>>> +            break;
>>>         case LP_VTEP:
>>>             update_related_lport(pb, b_ctx_out);
>>> +            struct lport *vtep_lport = xmalloc(sizeof *vtep_lport);
>>> +            vtep_lport->pb = pb;
>>> +            ovs_list_push_back(&vtep_lports, &vtep_lport->list_node);
>>>             break;
>>>
>>>         case LP_LOCALPORT:
>>> @@ -2111,6 +2141,16 @@ binding_run(struct binding_ctx_in *b_ctx_in,
>>> struct binding_ctx_out *b_ctx_out)
>>>         free(multichassis_lport);
>>>     }
>>>
>>> +    /* Run through vtep lport list to see if there are vtep ports
>>> +     * on local datapaths discovered from above loop, and update the
>>> +     * corresponding local datapath accordingly. */
>>> +    struct lport *vtep_lport;
>>> +    ovs_list_size(&vtep_lports);
>>
>> I guess this is a leftover.
> 
> Oops, will fix it in v2.
> 
>>
>>> +    LIST_FOR_EACH_POP (vtep_lport, list_node, &vtep_lports) {
>>> +        update_ld_vtep_port(vtep_lport->pb, b_ctx_out->local_datapaths);
>>> +        free(vtep_lport);
>>> +    }
>>> +
>>>     shash_destroy(&bridge_mappings);
>>>     /* Remove stale QoS entries. */
>>>     ovs_qos_entries_gc(b_ctx_in->ovs_idl_txn,
>>> b_ctx_in->ovsrec_port_by_qos,
>>> @@ -2175,6 +2215,11 @@ remove_pb_from_local_datapath(const struct
>>> sbrec_port_binding *pb,
>>>         }
>>>     } else if (!strcmp(pb->type, "external")) {
>>>         remove_local_datapath_external_port(ld, pb->logical_port);
>>> +    } else if (!strcmp(pb->type, "vtep")) {
>>> +        if (ld->vtep_port && !strcmp(ld->vtep_port->logical_port,
>>> +                                     pb->logical_port)) {
>>> +            ld->vtep_port = NULL;
>>> +        }
>>>     }
>>>     remove_local_datapath_multichassis_port(ld, pb->logical_port);
>>> }
>>> @@ -2836,6 +2881,7 @@ handle_updated_port(struct binding_ctx_in
>>> *b_ctx_in,
>>>
>>>     case LP_VTEP:
>>>         update_related_lport(pb, b_ctx_out);
>>> +        update_ld_vtep_port(pb, b_ctx_out->local_datapaths);
>>>         /* VTEP lports are claimed/released by ovn-controller-vteps.
>>>          * We are not sure what changed. */
>>>         b_ctx_out->non_vif_ports_changed = true;
>>> @@ -3091,6 +3137,8 @@ delete_done:
>>>                 } else if (pb->n_additional_chassis) {
>>>                     update_ld_multichassis_ports(pb,
>>>                                                  
>>> b_ctx_out->local_datapaths);
>>> +                } else if (lport_type == LP_VTEP) {
>>> +                    update_ld_vtep_port(pb, b_ctx_out->local_datapaths);
>>>                 }
>>
>> Nit: I think I'd move this "else if (lport_type == LP_VTEP) {" just
>> under the LP_EXTERNAL case.
> 
> I’ll move it, but I guess it’s not possible to have additional chassis
> for vtep pb.
> 
>>
>>>             }
>>>             sbrec_port_binding_index_destroy_row(target);
>>> diff --git a/controller/local_data.h b/controller/local_data.h
>>> index 748f009aa..19d13a558 100644
>>> --- a/controller/local_data.h
>>> +++ b/controller/local_data.h
>>> @@ -50,6 +50,9 @@ struct local_datapath {
>>>     /* The localnet port in this datapath, if any (at most one is
>>> allowed). */
>>>     const struct sbrec_port_binding *localnet_port;
>>>
>>> +    /* The vtep port in this datapath, if any (at most one is
>>> allowed). */
>>> +    const struct sbrec_port_binding *vtep_port;
>>> +
>>>     struct {
>>>         const struct sbrec_port_binding *local;
>>>         const struct sbrec_port_binding *remote;
>>> diff --git a/controller/physical.c b/controller/physical.c
>>> index 2925dcd1d..b75e120c7 100644
>>> --- a/controller/physical.c
>>> +++ b/controller/physical.c
>>> @@ -194,6 +194,15 @@ get_localnet_port(const struct hmap
>>> *local_datapaths, int64_t tunnel_key)
>>> }
>>>
>>>
>>> +static const struct sbrec_port_binding *
>>> +get_vtep_port(const struct hmap *local_datapaths, int64_t tunnel_key)
>>> +{
>>> +    const struct local_datapath *ld =
>>> get_local_datapath(local_datapaths,
>>> +                                                         tunnel_key);
>>> +    return ld ? ld->vtep_port : NULL;
>>> +}
>>> +
>>> +
>>> static struct zone_ids
>>> get_zone_ids(const struct sbrec_port_binding *binding,
>>>              const struct simap *ct_zones)
>>> @@ -1732,7 +1741,7 @@ consider_mc_group(struct ovsdb_idl_index
>>> *sbrec_port_binding_by_name,
>>>     struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis);
>>>     struct sset vtep_chassis = SSET_INITIALIZER(&vtep_chassis);
>>>
>>> -    struct match match;
>>> +    struct match match, match_ramp;
>>>     match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key);
>>>
>>>     /* Go through all of the ports in the multicast group:
>>> @@ -1755,10 +1764,10 @@ consider_mc_group(struct ovsdb_idl_index
>>> *sbrec_port_binding_by_name,
>>>      *      set the output port to be the router patch port for which
>>>      *      the redirect port was added.
>>>      */
>>> -    struct ofpbuf ofpacts;
>>> +    struct ofpbuf ofpacts, remote_ofpacts, remote_ofpacts_ramp;
>>>     ofpbuf_init(&ofpacts, 0);
>>> -    struct ofpbuf remote_ofpacts;
>>>     ofpbuf_init(&remote_ofpacts, 0);
>>> +    ofpbuf_init(&remote_ofpacts_ramp, 0);
>>>     for (size_t i = 0; i < mc->n_ports; i++) {
>>>         struct sbrec_port_binding *port = mc->ports[i];
>>>
>>> @@ -1783,6 +1792,7 @@ consider_mc_group(struct ovsdb_idl_index
>>> *sbrec_port_binding_by_name,
>>>                 local_output_pb(port->tunnel_key, &ofpacts);
>>>             } else {
>>>                 local_output_pb(port->tunnel_key, &remote_ofpacts);
>>> +                local_output_pb(port->tunnel_key, &remote_ofpacts_ramp);
>>>             }
>>>         } if (!strcmp(port->type, "remote")) {
>>>             if (port->chassis) {
>>> @@ -1866,6 +1876,35 @@ consider_mc_group(struct ovsdb_idl_index
>>> *sbrec_port_binding_by_name,
>>>              * multicast group as the logical output port. */
>>>             put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
>>>                      &remote_ofpacts);
>>> +
>>> +            if (get_vtep_port(local_datapaths,
>>> mc->datapath->tunnel_key)) {
>>
>> I'd declare 'match_ramp' here as it's only used on this branch of the if
>> statement.
> 
> Okay, will be fixed in v2.
> 
>>
>>> +                match_set_reg_masked(&match, MFF_LOG_FLAGS -
>>> MFF_REG0, 0,
>>> +                                     MLF_RCV_FROM_RAMP);
>>> +
>>
>> Are we going to match less traffic now?  We add this "flags &
>> MLF_RCV_FROM_RAMP" condition to the already existing priority-100
>> OFTABLE_REMOTE_OUTPUT flow.  Does this break multicast traffic not
>> received from RAMP (vtep) ports?
> 
> Yes, this limits traffic matching on this flow.
> Such flow matches only traffic, which was received from non-RAMP lports.
> We need to flood traffic to all multicast group ports only if it was
> received from non-RAMP ports.
> This is due to fact that flood on the directon "from vtep" is done on
> the vtep (ramp) switch: it floods BUM to all hypervisors.
> If we don’t do it, we’ll duplicate BUM traffic (first packet is
> originally sent from VTEP switch to all hypervisors) and others are
> replicated on all hypervisors and re-sent to other hypervisors.
> 
> Please, let me know if this makes sense.
> 

It does, sorry for the noise.  I missed the value = 0 argument.

Thanks for the clarification!

I'm planning to apply the first 4 patches today and I'll look forward
for a v2 of this patch afterwards.

Regards,
Dumitru

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to