On 1/22/17, 3:40 AM, Nikolay Aleksandrov wrote: > On 21/01/17 06:46, Roopa Prabhu wrote: >> From: Roopa Prabhu <ro...@cumulusnetworks.com> >> >> This patch series makes vxlan COLLECT_METADATA mode bridge >> and layer2 network friendly. Vxlan COLLECT_METADATA mode today >> solves the per-vni netdev scalability problem in l3 networks. >> When vxlan collect metadata device participates in bridging >> vlan to vn-segments, It can only get the vlan mapped vni in >> the xmit tunnel dst metadata. It will need the vxlan driver to >> continue learn, hold forwarding state and remote destination >> information similar to how it already does for non COLLECT_METADATA >> vxlan netdevices today. >> >> Changes introduced by this patch: >> - allow learning and forwarding database state to vxlan netdev in >> COLLECT_METADATA mode. Current behaviour is not changed >> by default. tunnel info flag IP_TUNNEL_INFO_BRIDGE is used >> to support the new bridge friendly mode. >> - A single fdb table hashed by (mac, vni) to allow fdb entries with >> multiple vnis in the same fdb table >> - rx path already has the vni >> - tx path expects a vni in the packet with dst_metadata >> - prior to this series, fdb remote_dsts carried remote vni and >> the vxlan device carrying the fdb table represented the >> source vni. With the vxlan device now representing multiple vnis, >> this patch adds a src vni attribute to the fdb entry. The remote >> vni already uses NDA_VNI attribute. This patch introduces >> NDA_SRC_VNI netlink attribute to represent the src vni in a multi >> vni fdb table. >> >> Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com> >> --- > [snip] >> @@ -2173,23 +2221,29 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, >> struct net_device *dev) >> bool did_rsc = false; >> struct vxlan_rdst *rdst, *fdst = NULL; >> struct vxlan_fdb *f; >> + __be32 vni = 0; >> >> info = skb_tunnel_info(skb); >> >> skb_reset_mac_header(skb); >> >> if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { >> - if (info && info->mode & IP_TUNNEL_INFO_TX) >> - vxlan_xmit_one(skb, dev, NULL, false); >> - else >> - kfree_skb(skb); >> - return NETDEV_TX_OK; >> + if (info && info->mode & IP_TUNNEL_INFO_BRIDGE && >> + info->mode & IP_TUNNEL_INFO_TX) { > nit: parentheses around the IP_TUNNEL_INFO_TX check > >> + vni = tunnel_id_to_key32(info->key.tun_id); >> + } else { >> + if (info && info->mode & IP_TUNNEL_INFO_TX) > nit: parentheses around the IP_TUNNEL_INFO_TX check
ack >> + vxlan_xmit_one(skb, dev, vni, NULL, false); >> + else >> + kfree_skb(skb); >> + return NETDEV_TX_OK; >> + } >> } >> >> if (vxlan->flags & VXLAN_F_PROXY) { >> eth = eth_hdr(skb); >> if (ntohs(eth->h_proto) == ETH_P_ARP) >> - return arp_reduce(dev, skb); >> + return arp_reduce(dev, skb, vni); >> #if IS_ENABLED(CONFIG_IPV6) >> else if (ntohs(eth->h_proto) == ETH_P_IPV6 && >> pskb_may_pull(skb, sizeof(struct ipv6hdr) >> @@ -2200,13 +2254,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, >> struct net_device *dev) >> msg = (struct nd_msg >> *)skb_transport_header(skb); >> if (msg->icmph.icmp6_code == 0 && >> msg->icmph.icmp6_type == >> NDISC_NEIGHBOUR_SOLICITATION) >> - return neigh_reduce(dev, skb); >> + return neigh_reduce(dev, skb, vni); >> } >> #endif >> } >> >> eth = eth_hdr(skb); >> - f = vxlan_find_mac(vxlan, eth->h_dest); >> + f = vxlan_find_mac(vxlan, eth->h_dest, vni); >> did_rsc = false; >> >> if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) && >> @@ -2214,11 +2268,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, >> struct net_device *dev) >> ntohs(eth->h_proto) == ETH_P_IPV6)) { >> did_rsc = route_shortcircuit(dev, skb); >> if (did_rsc) >> - f = vxlan_find_mac(vxlan, eth->h_dest); >> + f = vxlan_find_mac(vxlan, eth->h_dest, vni); >> } >> >> if (f == NULL) { >> - f = vxlan_find_mac(vxlan, all_zeros_mac); >> + f = vxlan_find_mac(vxlan, all_zeros_mac, vni); >> if (f == NULL) { >> if ((vxlan->flags & VXLAN_F_L2MISS) && >> !is_multicast_ether_addr(eth->h_dest)) >> @@ -2239,11 +2293,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, >> struct net_device *dev) >> } >> skb1 = skb_clone(skb, GFP_ATOMIC); >> if (skb1) >> - vxlan_xmit_one(skb1, dev, rdst, did_rsc); >> + vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc); >> } >> >> if (fdst) >> - vxlan_xmit_one(skb, dev, fdst, did_rsc); >> + vxlan_xmit_one(skb, dev, vni, fdst, did_rsc); >> else >> kfree_skb(skb); >> return NETDEV_TX_OK; >> @@ -2307,12 +2361,12 @@ static int vxlan_init(struct net_device *dev) >> return 0; >> } >> >> -static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan) >> +static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) >> { >> struct vxlan_fdb *f; >> >> spin_lock_bh(&vxlan->hash_lock); >> - f = __vxlan_find_mac(vxlan, all_zeros_mac); >> + f = __vxlan_find_mac(vxlan, all_zeros_mac, vni); >> if (f) >> vxlan_fdb_destroy(vxlan, f); >> spin_unlock_bh(&vxlan->hash_lock); >> @@ -2322,7 +2376,7 @@ static void vxlan_uninit(struct net_device *dev) >> { >> struct vxlan_dev *vxlan = netdev_priv(dev); >> >> - vxlan_fdb_delete_default(vxlan); >> + vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni); >> >> free_percpu(dev->tstats); >> } >> @@ -2536,6 +2590,8 @@ static void vxlan_setup(struct net_device *dev) >> dev->vlan_features = dev->features; >> dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; >> dev->hw_features |= NETIF_F_GSO_SOFTWARE; >> + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; >> + dev->features |= dev->hw_features; >> netif_keep_dst(dev); >> dev->priv_flags |= IFF_NO_QUEUE; >> >> @@ -2921,6 +2977,7 @@ static int vxlan_dev_configure(struct net *src_net, >> struct net_device *dev, >> NLM_F_EXCL|NLM_F_CREATE, >> vxlan->cfg.dst_port, >> vxlan->default_dst.remote_vni, >> + vxlan->default_dst.remote_vni, >> vxlan->default_dst.remote_ifindex, >> NTF_SELF); >> if (err) >> @@ -2929,7 +2986,7 @@ static int vxlan_dev_configure(struct net *src_net, >> struct net_device *dev, >> >> err = register_netdevice(dev); >> if (err) { >> - vxlan_fdb_delete_default(vxlan); >> + vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni); >> return err; >> } >> >> @@ -3023,19 +3080,19 @@ static int vxlan_newlink(struct net *src_net, struct >> net_device *dev, >> conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX; >> >> if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] && >> - nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX])) >> + !nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX])) >> conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_TX; >> >> if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] && >> - nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) >> + !nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) >> conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_RX; >> >> if (data[IFLA_VXLAN_REMCSUM_TX] && >> - nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX])) >> + !nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX])) >> conf.flags |= VXLAN_F_REMCSUM_TX; >> >> if (data[IFLA_VXLAN_REMCSUM_RX] && >> - nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX])) >> + !nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX])) >> conf.flags |= VXLAN_F_REMCSUM_RX; > Aren't these going to break user-space ? correct... ignore these. Not intentional. these were from an incorrect merge with an earlier changelink patch i had. Did not realize these had crept it. thanks.