[PATCH net-next v2 4/4] vxlan: implement GPE

2016-03-20 Thread Jiri Benc
Implement VXLAN-GPE. Only COLLECT_METADATA is supported for now (it is
possible to support static configuration, too, if there is demand for it).

The GPE header parsing has to be moved before iptunnel_pull_header, as we
need to know the protocol.

v2: Removed what was called "L2 mode" in v1 of the patchset. Only "L3 mode"
(now called "raw mode") is added by this patch. This mode does not allow
Ethernet header to be encapsulated in VXLAN-GPE when using ip route to
specify the encapsulation, IP header is encapsulated instead. The patch
does support Ethernet to be encapsulated, though, using ETH_P_TEB in
skb->protocol. This will be utilized by other COLLECT_METADATA users
(openvswitch in particular).

If there is ever demand for Ethernet encapsulation with VXLAN-GPE using
ip route, it's easy to add a new flag switching the interface to
"Ethernet mode" (called "L2 mode" in v1 of this patchset). For now,
leave this out, it seems we don't need it.

Disallowed more flag combinations, especially RCO with GPE.
Added comment explaining that GBP and GPE cannot be set together.

Signed-off-by: Jiri Benc 
---
 drivers/net/vxlan.c  | 170 ++-
 include/net/vxlan.h  |  68 +
 include/uapi/linux/if_link.h |   1 +
 3 files changed, 222 insertions(+), 17 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f75786c4bd40..419e155c7153 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1192,6 +1192,45 @@ out:
unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
 }
 
+static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
+   __be32 *protocol,
+   struct sk_buff *skb, u32 vxflags)
+{
+   struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
+
+   /* Need to have Next Protocol set for interfaces in GPE mode. */
+   if (!gpe->np_applied)
+   return false;
+   /* "The initial version is 0. If a receiver does not support the
+* version indicated it MUST drop the packet.
+*/
+   if (gpe->version != 0)
+   return false;
+   /* "When the O bit is set to 1, the packet is an OAM packet and OAM
+* processing MUST occur." However, we don't implement OAM
+* processing, thus drop the packet.
+*/
+   if (gpe->oam_flag)
+   return false;
+
+   switch (gpe->next_protocol) {
+   case VXLAN_GPE_NP_IPV4:
+   *protocol = htons(ETH_P_IP);
+   break;
+   case VXLAN_GPE_NP_IPV6:
+   *protocol = htons(ETH_P_IPV6);
+   break;
+   case VXLAN_GPE_NP_ETHERNET:
+   *protocol = htons(ETH_P_TEB);
+   break;
+   default:
+   return false;
+   }
+
+   unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
+   return true;
+}
+
 static bool vxlan_set_mac(struct vxlan_dev *vxlan,
  struct vxlan_sock *vs,
  struct sk_buff *skb)
@@ -1257,9 +1296,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff 
*skb)
struct vxlanhdr unparsed;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
+   __be32 protocol = htons(ETH_P_TEB);
+   bool raw_proto = false;
void *oiph;
 
-   /* Need Vxlan and inner Ethernet header to be present */
+   /* Need UDP and VXLAN header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN))
return 1;
 
@@ -1283,9 +1324,18 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff 
*skb)
if (!vxlan)
goto drop;
 
-   if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB),
-!net_eq(vxlan->net, dev_net(vxlan->dev
-   goto drop;
+   /* For backwards compatibility, only allow reserved fields to be
+* used by VXLAN extensions if explicitly requested.
+*/
+   if (vs->flags & VXLAN_F_GPE) {
+   if (!vxlan_parse_gpe_hdr(, , skb, vs->flags))
+   goto drop;
+   raw_proto = true;
+   }
+
+   if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
+  !net_eq(vxlan->net, dev_net(vxlan->dev
+   goto drop;
 
if (vxlan_collect_metadata(vs)) {
__be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
@@ -1304,14 +1354,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff 
*skb)
memset(md, 0, sizeof(*md));
}
 
-   /* For backwards compatibility, only allow reserved fields to be
-* used by VXLAN extensions if explicitly requested.
-*/
if (vs->flags & VXLAN_F_REMCSUM_RX)
if (!vxlan_remcsum(, skb, vs->flags))
goto drop;
if (vs->flags & VXLAN_F_GBP)

Re: [PATCH net-next v2 4/4] vxlan: implement GPE

2016-03-19 Thread Tom Herbert
On Wed, Mar 16, 2016 at 9:51 AM, Jiri Benc  wrote:
> Implement VXLAN-GPE. Only COLLECT_METADATA is supported for now (it is
> possible to support static configuration, too, if there is demand for it).
>
> The GPE header parsing has to be moved before iptunnel_pull_header, as we
> need to know the protocol.
>
> v2: Removed what was called "L2 mode" in v1 of the patchset. Only "L3 mode"
> (now called "raw mode") is added by this patch. This mode does not allow
> Ethernet header to be encapsulated in VXLAN-GPE when using ip route to
> specify the encapsulation, IP header is encapsulated instead. The patch
> does support Ethernet to be encapsulated, though, using ETH_P_TEB in
> skb->protocol. This will be utilized by other COLLECT_METADATA users
> (openvswitch in particular).
>
> If there is ever demand for Ethernet encapsulation with VXLAN-GPE using
> ip route, it's easy to add a new flag switching the interface to
> "Ethernet mode" (called "L2 mode" in v1 of this patchset). For now,
> leave this out, it seems we don't need it.
>
> Disallowed more flag combinations, especially RCO with GPE.
> Added comment explaining that GBP and GPE cannot be set together.
>
> Signed-off-by: Jiri Benc 
> ---
>  drivers/net/vxlan.c  | 170 
> ++-
>  include/net/vxlan.h  |  68 +
>  include/uapi/linux/if_link.h |   1 +
>  3 files changed, 222 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index f75786c4bd40..419e155c7153 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -1192,6 +1192,45 @@ out:
> unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
>  }
>
> +static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
> +   __be32 *protocol,
> +   struct sk_buff *skb, u32 vxflags)
> +{
> +   struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
> +
> +   /* Need to have Next Protocol set for interfaces in GPE mode. */
> +   if (!gpe->np_applied)
> +   return false;
> +   /* "The initial version is 0. If a receiver does not support the
> +* version indicated it MUST drop the packet.
> +*/
> +   if (gpe->version != 0)
> +   return false;
> +   /* "When the O bit is set to 1, the packet is an OAM packet and OAM
> +* processing MUST occur." However, we don't implement OAM
> +* processing, thus drop the packet.
> +*/
> +   if (gpe->oam_flag)
> +   return false;
> +
> +   switch (gpe->next_protocol) {
> +   case VXLAN_GPE_NP_IPV4:
> +   *protocol = htons(ETH_P_IP);
> +   break;
> +   case VXLAN_GPE_NP_IPV6:
> +   *protocol = htons(ETH_P_IPV6);
> +   break;
> +   case VXLAN_GPE_NP_ETHERNET:
> +   *protocol = htons(ETH_P_TEB);
> +   break;
> +   default:
> +   return false;
> +   }
> +
> +   unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
> +   return true;
> +}
> +
>  static bool vxlan_set_mac(struct vxlan_dev *vxlan,
>   struct vxlan_sock *vs,
>   struct sk_buff *skb)
> @@ -1257,9 +1296,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff 
> *skb)
> struct vxlanhdr unparsed;
> struct vxlan_metadata _md;
> struct vxlan_metadata *md = &_md;
> +   __be32 protocol = htons(ETH_P_TEB);
> +   bool raw_proto = false;
> void *oiph;
>
> -   /* Need Vxlan and inner Ethernet header to be present */
> +   /* Need UDP and VXLAN header to be present */
> if (!pskb_may_pull(skb, VXLAN_HLEN))
> return 1;
>
> @@ -1283,9 +1324,18 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff 
> *skb)
> if (!vxlan)
> goto drop;
>
> -   if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB),
> -!net_eq(vxlan->net, dev_net(vxlan->dev
> -   goto drop;
> +   /* For backwards compatibility, only allow reserved fields to be
> +* used by VXLAN extensions if explicitly requested.
> +*/
> +   if (vs->flags & VXLAN_F_GPE) {
> +   if (!vxlan_parse_gpe_hdr(, , skb, 
> vs->flags))
> +   goto drop;
> +   raw_proto = true;
> +   }
> +
> +   if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
> +  !net_eq(vxlan->net, dev_net(vxlan->dev
> +   goto drop;
>
> if (vxlan_collect_metadata(vs)) {
> __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
> @@ -1304,14 +1354,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff 
> *skb)
> memset(md, 0, sizeof(*md));
> }
>
> -   /* For backwards 

Re: [PATCH net-next v2 4/4] vxlan: implement GPE

2016-03-18 Thread Jiri Benc
On Wed, 16 Mar 2016 10:31:10 -0700, Tom Herbert wrote:
> Sorry, I still don't like this. For VXLAN-GPE packets the above two
> conditionals are a complete waste of time and I shouldn't have to go
> pawing through configuration to determine what protocol has actually
> be implemented.  Please, at least move these into the else block of
> "if (vs->flags & VXLAN_F_GPE) {" above. This saves two conditionals in
> the data path, makes the parsing code more readable, and you don't
> need to reference configuration to figure things out.

As I already wrote, this is not possible. GPE parsing needs to occur
before iptunnel_pull_header (because we need to know the protocol), GBP
parsing needs to happen after it (after udp_tun_rx_dst specifically).

Believe me, I would do it that way if it was possible.

I also considered splitting rx path for GPE and non-GPE case and the
result was much uglier and longer code.

 Jiri