On Wed, Mar 16, 2016 at 9:51 AM, Jiri Benc wrote:
> Implement VXLAN-GPE. Only COLLECT_METADATA is supported for now (it is
> possible to support static configuration, too, if there is demand for it).
>
> The GPE header parsing has to be moved before iptunnel_pull_header, as we
> need to know the protocol.
>
> v2: Removed what was called "L2 mode" in v1 of the patchset. Only "L3 mode"
> (now called "raw mode") is added by this patch. This mode does not allow
> Ethernet header to be encapsulated in VXLAN-GPE when using ip route to
> specify the encapsulation, IP header is encapsulated instead. The patch
> does support Ethernet to be encapsulated, though, using ETH_P_TEB in
> skb->protocol. This will be utilized by other COLLECT_METADATA users
> (openvswitch in particular).
>
> If there is ever demand for Ethernet encapsulation with VXLAN-GPE using
> ip route, it's easy to add a new flag switching the interface to
> "Ethernet mode" (called "L2 mode" in v1 of this patchset). For now,
> leave this out, it seems we don't need it.
>
> Disallowed more flag combinations, especially RCO with GPE.
> Added comment explaining that GBP and GPE cannot be set together.
>
> Signed-off-by: Jiri Benc
> ---
> drivers/net/vxlan.c | 170
> ++-
> include/net/vxlan.h | 68 +
> include/uapi/linux/if_link.h | 1 +
> 3 files changed, 222 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index f75786c4bd40..419e155c7153 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -1192,6 +1192,45 @@ out:
> unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
> }
>
> +static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
> + __be32 *protocol,
> + struct sk_buff *skb, u32 vxflags)
> +{
> + struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
> +
> + /* Need to have Next Protocol set for interfaces in GPE mode. */
> + if (!gpe->np_applied)
> + return false;
> + /* "The initial version is 0. If a receiver does not support the
> +* version indicated it MUST drop the packet.
> +*/
> + if (gpe->version != 0)
> + return false;
> + /* "When the O bit is set to 1, the packet is an OAM packet and OAM
> +* processing MUST occur." However, we don't implement OAM
> +* processing, thus drop the packet.
> +*/
> + if (gpe->oam_flag)
> + return false;
> +
> + switch (gpe->next_protocol) {
> + case VXLAN_GPE_NP_IPV4:
> + *protocol = htons(ETH_P_IP);
> + break;
> + case VXLAN_GPE_NP_IPV6:
> + *protocol = htons(ETH_P_IPV6);
> + break;
> + case VXLAN_GPE_NP_ETHERNET:
> + *protocol = htons(ETH_P_TEB);
> + break;
> + default:
> + return false;
> + }
> +
> + unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
> + return true;
> +}
> +
> static bool vxlan_set_mac(struct vxlan_dev *vxlan,
> struct vxlan_sock *vs,
> struct sk_buff *skb)
> @@ -1257,9 +1296,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff
> *skb)
> struct vxlanhdr unparsed;
> struct vxlan_metadata _md;
> struct vxlan_metadata *md = &_md;
> + __be32 protocol = htons(ETH_P_TEB);
> + bool raw_proto = false;
> void *oiph;
>
> - /* Need Vxlan and inner Ethernet header to be present */
> + /* Need UDP and VXLAN header to be present */
> if (!pskb_may_pull(skb, VXLAN_HLEN))
> return 1;
>
> @@ -1283,9 +1324,18 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff
> *skb)
> if (!vxlan)
> goto drop;
>
> - if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB),
> -!net_eq(vxlan->net, dev_net(vxlan->dev
> - goto drop;
> + /* For backwards compatibility, only allow reserved fields to be
> +* used by VXLAN extensions if explicitly requested.
> +*/
> + if (vs->flags & VXLAN_F_GPE) {
> + if (!vxlan_parse_gpe_hdr(, , skb,
> vs->flags))
> + goto drop;
> + raw_proto = true;
> + }
> +
> + if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
> + !net_eq(vxlan->net, dev_net(vxlan->dev
> + goto drop;
>
> if (vxlan_collect_metadata(vs)) {
> __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
> @@ -1304,14 +1354,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff
> *skb)
> memset(md, 0, sizeof(*md));
> }
>
> - /* For backwards