Current Linux kernel git tree has included VxLAN-gpe implementation

author  Jiri Benc <jb...@redhat.com>
committer       David S. Miller <da...@davemloft.net>
commit  e1e5314de08ba6003b358125eafc9ad9e75a950c (patch)
tree    1e18cdabf1c9d9ef17e26c6480e629465447f77f /drivers/net/vxlan.c
parent  a6d5bbf34efa8330af7b0b1dba0f38148516ed97 (diff)
vxlan: implement GPE

This patch is to port it to ovs in order that people also can use VxLAN-gpe
even if they don't replace their kernels with latest Linux kernel.

Signed-off-by: Johnson Li <johnson...@intel.com>
Signed-off-by: Yi Yang <yi.y.y...@intel.com>
---
 datapath/linux/compat/include/linux/if_link.h     |   4 +
 datapath/linux/compat/include/linux/openvswitch.h |   1 +
 datapath/linux/compat/include/net/vxlan.h         |  73 ++++
 datapath/linux/compat/vxlan.c                     | 461 ++++++++++++++++++++--
 lib/dpif-netlink.c                                |   5 +
 lib/netdev-vport.c                                |   4 +-
 6 files changed, 512 insertions(+), 36 deletions(-)

diff --git a/datapath/linux/compat/include/linux/if_link.h 
b/datapath/linux/compat/include/linux/if_link.h
index 6209dcb..de87769 100644
--- a/datapath/linux/compat/include/linux/if_link.h
+++ b/datapath/linux/compat/include/linux/if_link.h
@@ -100,6 +100,10 @@ enum {
        IFLA_VXLAN_REMCSUM_NOPARTIAL,
 #define IFLA_VXLAN_COLLECT_METADATA rpl_IFLA_VXLAN_COLLECT_METADATA
        IFLA_VXLAN_COLLECT_METADATA,
+#define IFLA_VXLAN_LABEL rpl_IFLA_VXLAN_LABEL
+        IFLA_VXLAN_LABEL,
+#define IFLA_VXLAN_GPE rpl_IFLA_VXLAN_GPE
+        IFLA_VXLAN_GPE,
 #define __IFLA_VXLAN_MAX rpl___IFLA_VXLAN_MAX
        __IFLA_VXLAN_MAX
 };
diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
b/datapath/linux/compat/include/linux/openvswitch.h
index edfa7a1..761d9c6 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -287,6 +287,7 @@ enum ovs_vport_attr {
 enum {
        OVS_VXLAN_EXT_UNSPEC,
        OVS_VXLAN_EXT_GBP,      /* Flag or __u32 */
+       OVS_VXLAN_EXT_GPE,      /* Flag, Generic Protocol Extension */
        __OVS_VXLAN_EXT_MAX,
 };
 
diff --git a/datapath/linux/compat/include/net/vxlan.h 
b/datapath/linux/compat/include/net/vxlan.h
index 75a5a7a..b3f45c4 100644
--- a/datapath/linux/compat/include/net/vxlan.h
+++ b/datapath/linux/compat/include/net/vxlan.h
@@ -84,6 +84,66 @@ struct vxlanhdr_gbp {
 #define VXLAN_GBP_POLICY_APPLIED       (BIT(3) << 16)
 #define VXLAN_GBP_ID_MASK              (0xFFFF)
 
+/*
+ * VXLAN Generic Protocol Extension (VXLAN_F_GPE):
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|Ver|I|P|R|O|       Reserved                |Next Protocol  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Ver = Version. Indicates VXLAN GPE protocol version.
+ *
+ * P = Next Protocol Bit. The P bit is set to indicate that the
+ *     Next Protocol field is present.
+ *
+ * O = OAM Flag Bit. The O bit is set to indicate that the packet
+ *     is an OAM packet.
+ *
+ * Next Protocol = This 8 bit field indicates the protocol header
+ * immediately following the VXLAN GPE header.
+ *
+ * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01
+ */
+
+struct vxlanhdr_gpe {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       u8      oam_flag:1,
+               reserved_flags1:1,
+               np_applied:1,
+               instance_applied:1,
+               version:2,
+reserved_flags2:2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       u8      reserved_flags2:2,
+               version:2,
+               instance_applied:1,
+               np_applied:1,
+               reserved_flags1:1,
+               oam_flag:1;
+#endif
+       u8      reserved_flags3;
+       u8      reserved_flags4;
+       u8      next_protocol;
+       __be32  vx_vni;
+};
+
+/* VXLAN-GPE header flags. */
+#define VXLAN_HF_VER   (BIT(29) | BIT(28))
+#define VXLAN_HF_NP    (BIT(26))
+#define VXLAN_HF_OAM   (BIT(24))
+#define VXLAN_HF_GPE   (BIT(26))
+
+#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \
+                            (0xFF))
+
+/* VXLAN-GPE header Next Protocol. */
+#define VXLAN_GPE_NP_IPV4      0x01
+#define VXLAN_GPE_NP_IPV6      0x02
+#define VXLAN_GPE_NP_ETHERNET  0x03
+#define VXLAN_GPE_NP_NSH       0x04
+#define ETH_P_NSH              0x894f
+
 /* VXLAN protocol header:
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |G|R|R|R|I|R|R|C|               Reserved                        |
@@ -167,6 +227,7 @@ struct vxlan_config {
        __u16                   port_max;
        __u8                    tos;
        __u8                    ttl;
+       __be32                  label;
        u32                     flags;
        unsigned long           age_interval;
        unsigned int            addrmax;
@@ -205,15 +266,27 @@ struct vxlan_dev {
 #define VXLAN_F_GBP                    0x800
 #define VXLAN_F_REMCSUM_NOPARTIAL      0x1000
 #define VXLAN_F_COLLECT_METADATA       0x2000
+#define VXLAN_F_GPE                     0x4000
+#define VXLAN_F_UDP_ZERO_CSUM_TX VXLAN_F_UDP_CSUM
 
 /* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
  */
 #define VXLAN_F_RCV_FLAGS              (VXLAN_F_GBP |                  \
+                                         VXLAN_F_GPE |                  \
                                         VXLAN_F_UDP_ZERO_CSUM6_RX |    \
                                         VXLAN_F_REMCSUM_RX |           \
                                         VXLAN_F_REMCSUM_NOPARTIAL |    \
                                         VXLAN_F_COLLECT_METADATA)
+
+/* Flags that can be set together with VXLAN_F_GPE. */
+#define VXLAN_F_ALLOWED_GPE             (VXLAN_F_GPE |                  \
+                                         VXLAN_F_IPV6 |                 \
+                                         VXLAN_F_UDP_CSUM |     \
+                                         VXLAN_F_UDP_ZERO_CSUM6_TX |    \
+                                         VXLAN_F_UDP_ZERO_CSUM6_RX |    \
+                                         VXLAN_F_COLLECT_METADATA)
+
 #define vxlan_dev_create rpl_vxlan_dev_create
 struct net_device *rpl_vxlan_dev_create(struct net *net, const char *name,
                                    u8 name_assign_type, struct vxlan_config 
*conf);
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 4faa18f..570d2d9 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -812,6 +812,45 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, 
struct vxlanhdr *vh,
 }
 #endif
 
+static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
+                              __be32 *protocol,
+                              struct sk_buff *skb, u32 vxflags)
+{
+       struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
+
+       /* Need to have Next Protocol set for interfaces in GPE mode. */
+       if (!gpe->np_applied)
+              return false;
+       /* "The initial version is 0. If a receiver does not support the
+       * version indicated it MUST drop the packet.
+       */
+       if (gpe->version != 0)
+              return false;
+       /* "When the O bit is set to 1, the packet is an OAM packet and OAM
+       * processing MUST occur." However, we don't implement OAM
+       * processing, thus drop the packet.
+       */
+       if (gpe->oam_flag)
+              return false;
+
+       switch (gpe->next_protocol) {
+       case VXLAN_GPE_NP_IPV4:
+              *protocol = htons(ETH_P_IP);
+              break;
+       case VXLAN_GPE_NP_IPV6:
+              *protocol = htons(ETH_P_IPV6);
+              break;
+       case VXLAN_GPE_NP_ETHERNET:
+              *protocol = htons(ETH_P_TEB);
+              break;
+       default:
+              return false;
+       }
+
+       unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
+       return true;
+}
+
 static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
                      struct vxlan_metadata *md, u32 vni,
                      struct metadata_dst *tun_dst)
@@ -822,6 +861,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff 
*skb,
        struct pcpu_sw_netstats *stats;
        union vxlan_addr saddr;
        int err = 0;
+       struct vxlanhdr unparsed;
+       __be32 protocol = htons(ETH_P_TEB);
+       bool raw_proto = false;
 
        /* For flow based devices, map all packets to VNI 0 */
        if (vs->flags & VXLAN_F_COLLECT_METADATA)
@@ -832,14 +874,35 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct 
sk_buff *skb,
        if (!vxlan)
                goto drop;
 
-       skb_reset_mac_header(skb);
-       skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
-       skb->protocol = eth_type_trans(skb, vxlan->dev);
-       skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+       /* For backwards compatibility, only allow reserved fields to be
+        * used by VXLAN extensions if explicitly requested.
+        */
+       if (vs->flags & VXLAN_F_GPE) {
+               unparsed = *(struct vxlanhdr *)(udp_hdr(skb) + 1);
+               if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
+                       goto drop;
+               if (protocol != htons(ETH_P_TEB)) {
+                   raw_proto = true;
+               }
+       }
 
-       /* Ignore packet loops (and multicast echo) */
-       if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
-               goto drop;
+       if (!raw_proto) {
+               skb_reset_mac_header(skb);
+               skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
+               skb->protocol = eth_type_trans(skb, vxlan->dev);
+               skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+               /* Ignore packet loops (and multicast echo) */
+               if (ether_addr_equal(eth_hdr(skb)->h_source, 
vxlan->dev->dev_addr))
+                       goto drop;
+
+               if ((vxlan->flags & VXLAN_F_LEARN) &&
+                   vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
+                       goto drop;
+       } else {
+               skb->dev = vxlan->dev;
+               skb->pkt_type = PACKET_HOST;
+       }
 
        /* Get data from the outer IP header */
        if (vxlan_get_sk_family(vs) == AF_INET) {
@@ -861,10 +924,6 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct 
sk_buff *skb,
                goto drop;
        }
 
-       if ((vxlan->flags & VXLAN_F_LEARN) &&
-           vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
-               goto drop;
-
        skb_reset_network_header(skb);
        /* In flow-based mode, GBP is carried in dst_metadata */
        if (!(vs->flags & VXLAN_F_COLLECT_METADATA))
@@ -908,6 +967,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
                struct metadata_dst dst;
                char buf[sizeof(struct metadata_dst) + sizeof(*md)];
        } buf;
+       struct vxlanhdr unparsed;
+       __be32 protocol = htons(ETH_P_TEB);
 
        /* Need Vxlan and inner Ethernet header to be present */
        if (!pskb_may_pull(skb, VXLAN_HLEN))
@@ -924,14 +985,25 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
                goto bad_flags;
        }
 
-       if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
-               goto drop;
-       vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
-
        vs = rcu_dereference_sk_user_data(sk);
        if (!vs)
                goto drop;
 
+       /* For backwards compatibility, only allow reserved fields to be
+        * used by VXLAN extensions if explicitly requested.
+        */
+       if (vs->flags & VXLAN_F_GPE) {
+               unparsed = *(struct vxlanhdr *)(udp_hdr(skb) + 1);
+               if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
+                       goto drop;
+               buf.dst.u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
+               flags &= ~VXLAN_GPE_USED_BITS;
+       }
+
+       if (iptunnel_pull_header(skb, VXLAN_HLEN, protocol))
+               goto drop;
+       vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+
 #ifdef HAVE_VXLAN_HF_RCO
        if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
                vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni,
@@ -1023,6 +1095,33 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, 
u32 vxflags,
        gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
 }
 
+static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
+                              __be16 protocol)
+{
+       struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
+
+       vxh->vx_flags |= htonl(VXLAN_HF_GPE);
+       gpe->np_applied = 1;
+       gpe->version = 0;
+       gpe->oam_flag = 0;
+
+       switch (protocol) {
+       case htons(ETH_P_IP):
+               gpe->next_protocol = VXLAN_GPE_NP_IPV4;
+               return 0;
+       case htons(ETH_P_IPV6):
+               gpe->next_protocol = VXLAN_GPE_NP_IPV6;
+               return 0;
+       case htons(ETH_P_TEB):
+               gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
+               return 0;
+       case htons(ETH_P_NSH):
+               gpe->next_protocol = VXLAN_GPE_NP_NSH;
+               return 0;
+       }
+       return -EPFNOSUPPORT;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
                           struct sk_buff *skb,
@@ -1036,6 +1135,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct 
sock *sk,
        int err;
        bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX);
        int type = 0;
+       __be16 inner_protocol = htons(ETH_P_TEB);
 
        if ((vxflags & VXLAN_F_REMCSUM_TX) &&
            skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1106,8 +1206,14 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct 
sock *sk,
 
        if (vxflags & VXLAN_F_GBP)
                vxlan_build_gbp_hdr(vxh, vxflags, md);
+       if (vxflags & VXLAN_F_GPE) {
+               err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
+               if (err < 0)
+                       goto err;
+               inner_protocol = skb->protocol;
+       }
 
-       ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+       ovs_skb_set_inner_protocol(skb, inner_protocol);
 
        udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio,
                             ttl, src_port, dst_port,
@@ -1129,6 +1235,7 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock 
*sk, struct sk_buff *sk
        int err;
        bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM);
        int type = 0;
+       __be16 inner_protocol = htons(ETH_P_TEB);
 
        if ((vxflags & VXLAN_F_REMCSUM_TX) &&
            skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1191,8 +1298,14 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock 
*sk, struct sk_buff *sk
        }
        if (vxflags & VXLAN_F_GBP)
                vxlan_build_gbp_hdr(vxh, vxflags, md);
+       if (vxflags & VXLAN_F_GPE) {
+               err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
+               if (err < 0)
+                       return err;
+               inner_protocol = skb->protocol;
+       }
 
-       ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+       ovs_skb_set_inner_protocol(skb, inner_protocol);
 
        return udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos,
                                   ttl, df, src_port, dst_port, xnet,
@@ -1419,7 +1532,7 @@ tx_free:
  *
  * Outer IP header inherits ECN and DF from inner header.
  * Outer UDP destination is the VXLAN assigned port.
- *           source port is based on hash of flow
+ *        source port is based on hash of flow
  */
 netdev_tx_t rpl_vxlan_xmit(struct sk_buff *skb)
 {
@@ -1648,7 +1761,7 @@ static netdev_tx_t vxlan_dev_xmit(struct sk_buff *skb, 
struct net_device *dev)
        return NETDEV_TX_OK;
 }
 
-static const struct net_device_ops vxlan_netdev_ops = {
+static const struct net_device_ops vxlan_netdev_ether_ops = {
        .ndo_init               = vxlan_init,
        .ndo_uninit             = vxlan_uninit,
        .ndo_get_stats64        = ip_tunnel_get_stats64,
@@ -1661,6 +1774,16 @@ static const struct net_device_ops vxlan_netdev_ops = {
        .ndo_set_mac_address    = eth_mac_addr,
 };
 
+static const struct net_device_ops vxlan_netdev_raw_ops = {
+       .ndo_init               = vxlan_init,
+       .ndo_uninit             = vxlan_uninit,
+       .ndo_get_stats64        = ip_tunnel_get_stats64,
+       .ndo_open               = vxlan_open,
+       .ndo_stop               = vxlan_stop,
+       .ndo_start_xmit         = vxlan_dev_xmit,
+       .ndo_change_mtu         = vxlan_change_mtu,
+};
+
 /* Info for udev, that this is a virtual tunnel endpoint */
 static struct device_type vxlan_type = {
        .name = "vxlan",
@@ -1675,7 +1798,7 @@ static void vxlan_setup(struct net_device *dev)
        eth_hw_addr_random(dev);
        ether_setup(dev);
 
-       dev->netdev_ops = &vxlan_netdev_ops;
+       dev->netdev_ops = &vxlan_netdev_ether_ops;
        dev->destructor = free_netdev;
        SET_NETDEV_DEVTYPE(dev, &vxlan_type);
 
@@ -1712,8 +1835,51 @@ static void vxlan_setup(struct net_device *dev)
                INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
 }
 
+static void vxlan_ether_setup(struct net_device *dev)
+{
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+       dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+       dev->netdev_ops = &vxlan_netdev_ether_ops;
+}
+
+static void vxlan_raw_setup(struct net_device *dev)
+{
+       dev->header_ops = NULL;
+       dev->type = ARPHRD_NONE;
+       dev->hard_header_len = 0;
+       dev->addr_len = 0;
+       dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+       dev->netdev_ops = &vxlan_netdev_raw_ops;
+}
+
 static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
-       [IFLA_VXLAN_PORT]       = { .type = NLA_U16 },
+       [IFLA_VXLAN_ID]  = { .type = NLA_U32 },
+       [IFLA_VXLAN_GROUP]      = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+       [IFLA_VXLAN_GROUP6]     = { .len = sizeof(struct in6_addr) },
+       [IFLA_VXLAN_LINK]       = { .type = NLA_U32 },
+       [IFLA_VXLAN_LOCAL]      = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
+       [IFLA_VXLAN_LOCAL6]     = { .len = sizeof(struct in6_addr) },
+       [IFLA_VXLAN_TOS]        = { .type = NLA_U8 },
+       [IFLA_VXLAN_TTL]        = { .type = NLA_U8 },
+       [IFLA_VXLAN_LABEL]      = { .type = NLA_U32 },
+       [IFLA_VXLAN_LEARNING]   = { .type = NLA_U8 },
+       [IFLA_VXLAN_AGEING]     = { .type = NLA_U32 },
+       [IFLA_VXLAN_LIMIT]      = { .type = NLA_U32 },
+       [IFLA_VXLAN_PORT_RANGE] = { .len  = sizeof(struct 
ifla_vxlan_port_range) },
+       [IFLA_VXLAN_PROXY]      = { .type = NLA_U8 },
+       [IFLA_VXLAN_RSC]        = { .type = NLA_U8 },
+       [IFLA_VXLAN_L2MISS]     = { .type = NLA_U8 },
+       [IFLA_VXLAN_L3MISS]     = { .type = NLA_U8 },
+       [IFLA_VXLAN_COLLECT_METADATA]   = { .type = NLA_U8 },
+       [IFLA_VXLAN_PORT]       = { .type = NLA_U16 },
+       [IFLA_VXLAN_UDP_CSUM]   = { .type = NLA_U8 },
+       [IFLA_VXLAN_UDP_ZERO_CSUM6_TX]  = { .type = NLA_U8 },
+       [IFLA_VXLAN_UDP_ZERO_CSUM6_RX]  = { .type = NLA_U8 },
+       [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
+       [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
+       [IFLA_VXLAN_GBP]        = { .type = NLA_FLAG, },
+       [IFLA_VXLAN_GPE]        = { .type = NLA_FLAG, },
+       [IFLA_VXLAN_REMCSUM_NOPARTIAL]  = { .type = NLA_FLAG },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1897,6 +2063,21 @@ static int vxlan_dev_configure(struct net *src_net, 
struct net_device *dev,
        __be16 default_port = vxlan->cfg.dst_port;
        struct net_device *lowerdev = NULL;
 
+       if (conf->flags & VXLAN_F_GPE) {
+               if (conf->flags & ~VXLAN_F_ALLOWED_GPE)
+                       return -EINVAL;
+               /* For now, allow GPE only together with COLLECT_METADATA.
+                * This can be relaxed later; in such case, the other side
+                * of the PtP link will have to be provided.
+                */
+               if (!(conf->flags & VXLAN_F_COLLECT_METADATA))
+                       return -EINVAL;
+
+               vxlan_raw_setup(dev);
+       } else {
+               vxlan_ether_setup(dev);
+       }
+
        vxlan->net = src_net;
 
        dst->remote_vni = conf->vni;
@@ -2023,7 +2204,136 @@ static int vxlan_newlink(struct net_device *dev,
                         struct nlattr *tb[], struct nlattr *data[])
 #endif
 {
-       return -EINVAL;
+       struct vxlan_config conf;
+       int err;
+
+       memset(&conf, 0, sizeof(conf));
+
+       if (data[IFLA_VXLAN_ID])
+               conf.vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
+
+       if (data[IFLA_VXLAN_GROUP]) {
+               conf.remote_ip.sin.sin_addr.s_addr = 
nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
+       } else if (data[IFLA_VXLAN_GROUP6]) {
+               if (!IS_ENABLED(CONFIG_IPV6))
+                       return -EPFNOSUPPORT;
+
+               conf.remote_ip.sin6.sin6_addr = 
nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
+               conf.remote_ip.sa.sa_family = AF_INET6;
+       }
+
+       if (data[IFLA_VXLAN_LOCAL]) {
+               conf.saddr.sin.sin_addr.s_addr = 
nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
+               conf.saddr.sa.sa_family = AF_INET;
+       } else if (data[IFLA_VXLAN_LOCAL6]) {
+               if (!IS_ENABLED(CONFIG_IPV6))
+                       return -EPFNOSUPPORT;
+
+               /* TODO: respect scope id */
+               conf.saddr.sin6.sin6_addr = 
nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
+               conf.saddr.sa.sa_family = AF_INET6;
+       }
+
+       if (data[IFLA_VXLAN_LINK])
+               conf.remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]);
+
+       if (data[IFLA_VXLAN_TOS])
+               conf.tos  = nla_get_u8(data[IFLA_VXLAN_TOS]);
+
+       if (data[IFLA_VXLAN_TTL])
+               conf.ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
+
+       if (data[IFLA_VXLAN_LABEL])
+               conf.label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
+                            IPV6_FLOWLABEL_MASK;
+
+       if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING]))
+               conf.flags |= VXLAN_F_LEARN;
+
+       if (data[IFLA_VXLAN_AGEING])
+               conf.age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
+
+       if (data[IFLA_VXLAN_PROXY] && nla_get_u8(data[IFLA_VXLAN_PROXY]))
+               conf.flags |= VXLAN_F_PROXY;
+
+       if (data[IFLA_VXLAN_RSC] && nla_get_u8(data[IFLA_VXLAN_RSC]))
+               conf.flags |= VXLAN_F_RSC;
+
+       if (data[IFLA_VXLAN_L2MISS] && nla_get_u8(data[IFLA_VXLAN_L2MISS]))
+               conf.flags |= VXLAN_F_L2MISS;
+
+       if (data[IFLA_VXLAN_L3MISS] && nla_get_u8(data[IFLA_VXLAN_L3MISS]))
+               conf.flags |= VXLAN_F_L3MISS;
+
+       if (data[IFLA_VXLAN_LIMIT])
+               conf.addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
+
+       if (data[IFLA_VXLAN_COLLECT_METADATA] &&
+           nla_get_u8(data[IFLA_VXLAN_COLLECT_METADATA]))
+               conf.flags |= VXLAN_F_COLLECT_METADATA;
+
+       if (data[IFLA_VXLAN_PORT_RANGE]) {
+               const struct ifla_vxlan_port_range *p
+                       = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
+               conf.port_min = ntohs(p->low);
+               conf.port_max = ntohs(p->high);
+       }
+
+       if (data[IFLA_VXLAN_PORT])
+               conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
+
+       if (data[IFLA_VXLAN_UDP_CSUM] &&
+           !nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
+               conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
+
+       if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
+           nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
+               conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+
+       if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] &&
+           nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
+               conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
+
+       if (data[IFLA_VXLAN_REMCSUM_TX] &&
+           nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
+               conf.flags |= VXLAN_F_REMCSUM_TX;
+
+       if (data[IFLA_VXLAN_REMCSUM_RX] &&
+           nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
+               conf.flags |= VXLAN_F_REMCSUM_RX;
+
+       if (data[IFLA_VXLAN_GBP])
+               conf.flags |= VXLAN_F_GBP;
+
+       if (data[IFLA_VXLAN_GPE])
+               conf.flags |= VXLAN_F_GPE;
+
+       if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
+               conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;
+
+       if (tb[IFLA_MTU])
+               conf.mtu = nla_get_u32(tb[IFLA_MTU]);
+
+       err = vxlan_dev_configure(src_net, dev, &conf);
+       switch (err) {
+       case -ENODEV:
+               pr_info("ifindex %d does not exist\n", conf.remote_ifindex);
+               break;
+
+       case -EPERM:
+               pr_info("IPv6 is disabled via sysctl\n");
+               break;
+
+       case -EEXIST:
+               pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
+               break;
+
+       case -EINVAL:
+               pr_info("unsupported combination of extensions\n");
+               break;
+       }
+
+       return err;
 }
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
@@ -2047,20 +2357,21 @@ static void vxlan_dellink(struct net_device *dev)
 static size_t vxlan_get_size(const struct net_device *dev)
 {
 
-       return nla_total_size(sizeof(__u32)) +  /* IFLA_VXLAN_ID */
+       return nla_total_size(sizeof(__u32)) +  /* IFLA_VXLAN_ID */
                nla_total_size(sizeof(struct in6_addr)) + /* 
IFLA_VXLAN_GROUP{6} */
-               nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */
+               nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */
                nla_total_size(sizeof(struct in6_addr)) + /* 
IFLA_VXLAN_LOCAL{6} */
-               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TTL */
-               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TOS */
-               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_LEARNING */
-               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_PROXY */
-               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_RSC */
-               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_L2MISS */
-               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_L3MISS */
-               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_COLLECT_METADATA 
*/
-               nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */
-               nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TTL */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TOS */
+               nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_LEARNING */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_PROXY */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_RSC */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_L2MISS */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_L3MISS */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_COLLECT_METADATA 
*/
+               nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */
+               nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */
                nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
                nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */
                nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
@@ -2074,8 +2385,88 @@ static size_t vxlan_get_size(const struct net_device 
*dev)
 static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
        const struct vxlan_dev *vxlan = netdev_priv(dev);
+       const struct vxlan_rdst *dst = &vxlan->default_dst;
+       struct ifla_vxlan_port_range ports = {
+               .low =  htons(vxlan->cfg.port_min),
+               .high = htons(vxlan->cfg.port_max),
+       };
+
+       if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni)))
+               goto nla_put_failure;
+
+       if (!vxlan_addr_any(&dst->remote_ip)) {
+               if (dst->remote_ip.sa.sa_family == AF_INET) {
+                       if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP,
+                                           dst->remote_ip.sin.sin_addr.s_addr))
+                               goto nla_put_failure;
+#if IS_ENABLED(CONFIG_IPV6)
+               } else {
+                       if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6,
+                                            &dst->remote_ip.sin6.sin6_addr))
+                               goto nla_put_failure;
+#endif
+               }
+       }
+
+       if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, 
dst->remote_ifindex))
+               goto nla_put_failure;
+
+       if (!vxlan_addr_any(&vxlan->cfg.saddr)) {
+               if (vxlan->cfg.saddr.sa.sa_family == AF_INET) {
+                       if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL,
+                                           
vxlan->cfg.saddr.sin.sin_addr.s_addr))
+                               goto nla_put_failure;
+#if IS_ENABLED(CONFIG_IPV6)
+               } else {
+                       if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6,
+                                            &vxlan->cfg.saddr.sin6.sin6_addr))
+                               goto nla_put_failure;
+#endif
+               }
+       }
+
+       if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
+           nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
+           nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
+           nla_put_u8(skb, IFLA_VXLAN_LEARNING,
+                       !!(vxlan->flags & VXLAN_F_LEARN)) ||
+           nla_put_u8(skb, IFLA_VXLAN_PROXY,
+                       !!(vxlan->flags & VXLAN_F_PROXY)) ||
+           nla_put_u8(skb, IFLA_VXLAN_RSC, !!(vxlan->flags & VXLAN_F_RSC)) ||
+           nla_put_u8(skb, IFLA_VXLAN_L2MISS,
+                       !!(vxlan->flags & VXLAN_F_L2MISS)) ||
+           nla_put_u8(skb, IFLA_VXLAN_L3MISS,
+                       !!(vxlan->flags & VXLAN_F_L3MISS)) ||
+           nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA,
+                      !!(vxlan->flags & VXLAN_F_COLLECT_METADATA)) ||
+           nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) ||
+           nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
+           nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
+           nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
+                       !(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
+           nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+                       !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
+           nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+                       !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
+           nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
+                       !!(vxlan->flags & VXLAN_F_REMCSUM_TX)) ||
+           nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
+                       !!(vxlan->flags & VXLAN_F_REMCSUM_RX)))
+               goto nla_put_failure;
+
+       if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
+               goto nla_put_failure;
+
+       if (vxlan->flags & VXLAN_F_GBP &&
+           nla_put_flag(skb, IFLA_VXLAN_GBP))
+               goto nla_put_failure;
+
+       if (vxlan->flags & VXLAN_F_GPE &&
+           nla_put_flag(skb, IFLA_VXLAN_GPE))
+               goto nla_put_failure;
 
-       if (nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port))
+       if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL &&
+           nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
                goto nla_put_failure;
 
        return 0;
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 1e88c13..2b07e54 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -988,6 +988,8 @@ netdev_geneve_destroy(const char *name)
 #define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20
 #define IFLA_VXLAN_GBP 23
 #define IFLA_VXLAN_COLLECT_METADATA 25
+#define IFLA_VXLAN_LABEL 26
+#define IFLA_VXLAN_GPE 27
 #endif
 
 #if IFLA_GRE_MAX < 18
@@ -1037,6 +1039,9 @@ netdev_vxlan_create(struct netdev *netdev)
             if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)) {
                 nl_msg_put_flag(&request, IFLA_VXLAN_GBP);
             }
+            else if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)) {
+                nl_msg_put_flag(&request, IFLA_VXLAN_GPE);
+            }
             nl_msg_put_be16(&request, IFLA_VXLAN_PORT, tnl_cfg->dst_port);
         nl_msg_end_nested(&request, infodata_off);
     nl_msg_end_nested(&request, linkinfo_off);
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index ec5c44e..fa56af5 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -541,7 +541,9 @@ set_tunnel_config(struct netdev *dev_, const struct smap 
*args)
             while (ext) {
                 if (!strcmp(type, "vxlan") && !strcmp(ext, "gbp")) {
                     tnl_cfg.exts |= (1 << OVS_VXLAN_EXT_GBP);
-                } else {
+                } else if (!strcmp(type, "vxlan") && !strcmp(ext, "gpe")) {
+                     tnl_cfg.exts |= (1 << OVS_VXLAN_EXT_GPE);
+               } else {
                     VLOG_WARN("%s: unknown extension '%s'", name, ext);
                 }
 
-- 
1.9.3

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to