The 15-bit port key range used for multicast groups can't be covered by 12-bit key space available for port keys in VXLAN. To make multicast keys work, we have to transform 16-bit multicast port keys to 12-bit keys before fanning out packets through VXLAN tunnels. Otherwise significant bits are not retained, and multicast / broadcast traffic does not reach ports located on other chassis.
This patch introduces a mapping scheme between core 16-bit multicast port keys and 12-bit key range available in VXLAN. The scheme is as follows: 1) Before sending a packet through VXLAN tunnel, the most significant bit of a 16-bit port key is copied into the most significant bit of 12-bit VXLAN key. The 11 least significant bits of a 16-bit port key are copied to the least significant bits of 12-bit VXLAN key. 2) When receiving a packet through VXLAN tunnel, the most significant bit of a VXLAN 12-bit port key is copied into the most significant bit of 16-bit port key used in core. The 11 least significant bits of a VXLAN 12-bit port key are copied into the least significant bits of a 16-bit port key used in core. This change also implies that the range available for multicast port keys is more limited and fits into 11-bit space. The same rule should be enforced for unicast port keys, like we already do for tunnel keys when a VXLAN encap is present in a cluster. This enforcement is implied here but missing in master and will be implemented in a separate patch. (The missing enforcement is an oversight of the original patch that added support for VXLAN tunnels.) Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings") Signed-off-by: Ihar Hrachyshka <ihrac...@redhat.com> --- controller-vtep/gateway.c | 2 + controller/physical.c | 101 ++++++++++++++++++++++++++++++-------- lib/mcast-group-index.h | 15 ++++++ tests/ovn.at | 23 +++++++-- 4 files changed, 116 insertions(+), 25 deletions(-) diff --git a/controller-vtep/gateway.c b/controller-vtep/gateway.c index e9419138b..288772dc4 100644 --- a/controller-vtep/gateway.c +++ b/controller-vtep/gateway.c @@ -61,6 +61,8 @@ create_chassis_rec(struct ovsdb_idl_txn *txn, const char *name, sbrec_encap_set_options(encap_rec, &options); sbrec_encap_set_chassis_name(encap_rec, name); sbrec_chassis_set_encaps(chassis_rec, &encap_rec, 1); + const struct smap oc = SMAP_CONST1(&oc, "is-vtep", "true"); + sbrec_chassis_set_other_config(chassis_rec, &oc); return chassis_rec; } diff --git a/controller/physical.c b/controller/physical.c index 6f2c1cea0..aa2942dd4 100644 --- a/controller/physical.c +++ b/controller/physical.c @@ -37,6 +37,7 @@ #include "openvswitch/ofp-parse.h" #include "ovn-controller.h" #include "lib/chassis-index.h" +#include "lib/mcast-group-index.h" #include "lib/ovn-sb-idl.h" #include "lib/ovn-util.h" #include "physical.h" @@ -63,6 +64,7 @@ static void load_logical_ingress_metadata(const struct sbrec_port_binding *binding, const struct zone_ids *zone_ids, struct ofpbuf *ofpacts_p); +static int64_t get_vxlan_port_key(int64_t port_key); /* UUID to identify OF flows not associated with ovsdb rows. */ static struct uuid *hc_uuid = NULL; @@ -160,8 +162,9 @@ put_encapsulation(enum mf_field_id mff_ovn_geneve, } else if (tun->type == VXLAN) { uint64_t vni = datapath->tunnel_key; if (!is_ramp_switch) { - /* Only some bits are used for regular tunnels. */ - vni |= (uint64_t) outport << 12; + /* Map southbound 16-bit port key to limited 12-bit range + * available for VXLAN, which differs for multicast groups. */ + vni |= get_vxlan_port_key(outport) << 12; } put_load(vni, MFF_TUN_ID, 0, 24, ofpacts); } else { @@ -1372,6 +1375,43 @@ out: } } +static int64_t +get_vxlan_port_key(int64_t port_key) +{ + if (port_key >= OVN_MIN_MULTICAST) { + /* 0b1<11 least significant bits> */ + return OVN_VXLAN_MIN_MULTICAST | + (port_key & (OVN_VXLAN_MIN_MULTICAST - 1)); + } + return port_key; +} + +static void +fanout_to_chassis(enum mf_field_id mff_ovn_geneve, + struct sset *remote_chassis, + const struct hmap *chassis_tunnels, + const struct sbrec_datapath_binding *datapath, + uint16_t outport, bool is_ramp_switch, + struct ofpbuf *remote_ofpacts) +{ + const char *chassis_name; + const struct chassis_tunnel *prev = NULL; + SSET_FOR_EACH (chassis_name, remote_chassis) { + const struct chassis_tunnel *tun + = chassis_tunnel_find(chassis_tunnels, chassis_name, NULL); + if (!tun) { + continue; + } + + if (!prev || tun->type != prev->type) { + put_encapsulation(mff_ovn_geneve, tun, datapath, + outport, is_ramp_switch, remote_ofpacts); + prev = tun; + } + ofpact_put_OUTPUT(remote_ofpacts)->port = tun->ofport; + } +} + static void consider_mc_group(enum mf_field_id mff_ovn_geneve, const struct simap *ct_zones, @@ -1389,6 +1429,7 @@ consider_mc_group(enum mf_field_id mff_ovn_geneve, } struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis); + struct sset vtep_chassis = SSET_INITIALIZER(&vtep_chassis); struct match match; match_init_catchall(&match); @@ -1397,7 +1438,8 @@ consider_mc_group(enum mf_field_id mff_ovn_geneve, /* Go through all of the ports in the multicast group: * - * - For remote ports, add the chassis to 'remote_chassis'. + * - For remote ports, add the chassis to 'remote_chassis' or + * 'vtep_chassis'. * * - For local ports (other than logical patch ports), add actions * to 'ofpacts' to set the output port and resubmit. @@ -1445,7 +1487,12 @@ consider_mc_group(enum mf_field_id mff_ovn_geneve, /* Add remote chassis only when localnet port not exist, * otherwise multicast will reach remote ports through localnet * port. */ - sset_add(&remote_chassis, port->chassis->name); + if (smap_get_bool(&port->chassis->other_config, + "is-vtep", false)) { + sset_add(&vtep_chassis, port->chassis->name); + } else { + sset_add(&remote_chassis, port->chassis->name); + } } } @@ -1470,7 +1517,8 @@ consider_mc_group(enum mf_field_id mff_ovn_geneve, * * Handle output to the remote chassis in the multicast group, if * any. */ - if (!sset_is_empty(&remote_chassis) || remote_ofpacts.size > 0) { + if (!sset_is_empty(&remote_chassis) || + !sset_is_empty(&vtep_chassis) || remote_ofpacts.size > 0) { if (remote_ofpacts.size > 0) { /* Following delivery to logical patch ports, restore the * multicast group as the logical output port. */ @@ -1478,22 +1526,12 @@ consider_mc_group(enum mf_field_id mff_ovn_geneve, &remote_ofpacts); } - const char *chassis_name; - const struct chassis_tunnel *prev = NULL; - SSET_FOR_EACH (chassis_name, &remote_chassis) { - const struct chassis_tunnel *tun - = chassis_tunnel_find(chassis_tunnels, chassis_name, NULL); - if (!tun) { - continue; - } - - if (!prev || tun->type != prev->type) { - put_encapsulation(mff_ovn_geneve, tun, mc->datapath, - mc->tunnel_key, true, &remote_ofpacts); - prev = tun; - } - ofpact_put_OUTPUT(&remote_ofpacts)->port = tun->ofport; - } + fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels, + mc->datapath, mc->tunnel_key, false, + &remote_ofpacts); + fanout_to_chassis(mff_ovn_geneve, &vtep_chassis, chassis_tunnels, + mc->datapath, mc->tunnel_key, true, + &remote_ofpacts); if (remote_ofpacts.size) { if (local_ports) { @@ -1660,6 +1698,27 @@ physical_run(struct physical_ctx *p_ctx, &ofpacts, hc_uuid); } + /* Add VXLAN specific rules to transform port keys + * from 12 bits to 16 bits used elsewhere. */ + HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) { + if (tun->type == VXLAN) { + ofpbuf_clear(&ofpacts); + + struct match match = MATCH_CATCHALL_INITIALIZER; + match_set_in_port(&match, tun->ofport); + ovs_be64 mcast_bits = htonll((OVN_VXLAN_MIN_MULTICAST << 12)); + match_set_tun_id_masked(&match, mcast_bits, mcast_bits); + + put_load(1, MFF_LOG_OUTPORT, 15, 1, &ofpacts); + put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT, 0, 11, &ofpacts); + put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, &ofpacts); + put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); + + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 105, 0, + &match, &ofpacts, hc_uuid); + } + } + /* Handle ramp switch encapsulations. */ HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) { if (tun->type != VXLAN) { diff --git a/lib/mcast-group-index.h b/lib/mcast-group-index.h index 72af117a4..4ebfc9520 100644 --- a/lib/mcast-group-index.h +++ b/lib/mcast-group-index.h @@ -35,6 +35,21 @@ enum ovn_mcast_tunnel_keys { OVN_MAX_IP_MULTICAST = OVN_MAX_MULTICAST, }; +#define OVN_VXLAN_MIN_MULTICAST 2048 +#define OVN_VXLAN_MAX_MULTICAST 4095 + +enum ovn_vxlan_mcast_tunnel_keys { + + OVN_VXLAN_MCAST_FLOOD_TUNNEL_KEY = OVN_VXLAN_MIN_MULTICAST, + OVN_VXLAN_MCAST_UNKNOWN_TUNNEL_KEY, + OVN_VXLAN_MCAST_MROUTER_FLOOD_TUNNEL_KEY, + OVN_VXLAN_MCAST_MROUTER_STATIC_TUNNEL_KEY, + OVN_VXLAN_MCAST_STATIC_TUNNEL_KEY, + OVN_VXLAN_MCAST_FLOOD_L2_TUNNEL_KEY, + OVN_VXLAN_MIN_IP_MULTICAST, + OVN_VXLAN_MAX_IP_MULTICAST = OVN_VXLAN_MAX_MULTICAST, +}; + struct ovsdb_idl_index *mcast_group_index_create(struct ovsdb_idl *); const struct sbrec_multicast_group * mcast_group_lookup(struct ovsdb_idl_index *mcgroup_index, diff --git a/tests/ovn.at b/tests/ovn.at index 30625ec37..5ad5511f3 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -3271,8 +3271,13 @@ AT_SETUP([VLAN transparency, passthru=true, ARP responder disabled]) ovn_start net_add net -check ovs-vsctl add-br br-phys -ovn_attach net br-phys 192.168.0.1 +for i in 1 2; do + sim_add hv-$i + as hv-$i + check ovs-vsctl add-br br-phys + check ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys + ovn_attach net br-phys 192.168.0.$i 24 vxlan +done check ovn-nbctl ls-add ls check ovn-nbctl --wait=sb add Logical-Switch ls other_config vlan-passthru=true @@ -3283,6 +3288,7 @@ for i in 1 2; do done for i in 1 2; do + as hv-$i check ovs-vsctl add-port br-int vif$i -- set Interface vif$i external-ids:iface-id=lsp$i \ options:tx_pcap=vif$i-tx.pcap \ options:rxq_pcap=vif$i-rx.pcap \ @@ -3298,18 +3304,27 @@ AT_CHECK([grep -w "ls_in_arp_rsp" lsflows | sort], [0], [dnl table=16(ls_in_arp_rsp ), priority=0 , match=(1), action=(next;) ]) +for i in 1 2; do + : > $i.expected +done + test_arp() { local inport=$1 outport=$2 sha=$3 spa=$4 tpa=$5 reply_ha=$6 tag=8100fefe local request=ffffffffffff${sha}${tag}08060001080006040001${sha}${spa}ffffffffffff${tpa} - ovs-appctl netdev-dummy/receive vif$inport $request + as hv-$inport ovs-appctl netdev-dummy/receive vif$inport $request echo $request >> $outport.expected local reply=${sha}${reply_ha}${tag}08060001080006040002${reply_ha}${tpa}${sha}${spa} - ovs-appctl netdev-dummy/receive vif$outport $reply + as hv-$outport ovs-appctl netdev-dummy/receive vif$outport $reply echo $reply >> $inport.expected } +# Pre-populate the hypervisors' ARP tables so that we don't lose any +# packets for ARP resolution (native tunneling doesn't queue packets +# for ARP resolution). +OVN_POPULATE_ARP + test_arp 1 2 f00000000001 0a000001 0a000002 f00000000002 test_arp 2 1 f00000000002 0a000002 0a000001 f00000000001 -- 2.31.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev