[PATCH net-next 22/22] openvswitch: Use regular VXLAN net_device device

2015-07-17 Thread Thomas Graf
This gets rid of all OVS specific VXLAN code in the receive and
transmit path by using a VXLAN net_device to represent the vport.
Only a small shim layer remains which takes care of handling the
VXLAN specific OVS Netlink configuration.

Unexports vxlan_sock_add(), vxlan_sock_release(), vxlan_xmit_skb()
since they are no longer needed.

Signed-off-by: Thomas Graf 
Signed-off-by: Pravin B Shelar 
---
 drivers/net/vxlan.c| 242 +++
 include/net/rtnetlink.h|   1 +
 include/net/vxlan.h|  24 +--
 net/core/rtnetlink.c   |  26 ++--
 net/openvswitch/Kconfig|  12 --
 net/openvswitch/Makefile   |   1 -
 net/openvswitch/flow_netlink.c |   6 +-
 net/openvswitch/vport-netdev.c | 201 -
 net/openvswitch/vport-vxlan.c  | 322 -
 net/openvswitch/vport-vxlan.h  |  11 --
 10 files changed, 339 insertions(+), 507 deletions(-)
 delete mode 100644 net/openvswitch/vport-vxlan.c
 delete mode 100644 net/openvswitch/vport-vxlan.h

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 5ae6c0c..76466ef 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -75,6 +75,9 @@ static struct rtnl_link_ops vxlan_link_ops;
 
 static const u8 all_zeros_mac[ETH_ALEN];
 
+static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
+bool no_share, u32 flags);
+
 /* per-network namespace private data for this module */
 struct vxlan_net {
struct list_head  vxlan_list;
@@ -1027,7 +1030,7 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct 
vxlan_dev *dev)
return false;
 }
 
-void vxlan_sock_release(struct vxlan_sock *vs)
+static void vxlan_sock_release(struct vxlan_sock *vs)
 {
struct sock *sk = vs->sock->sk;
struct net *net = sock_net(sk);
@@ -1043,7 +1046,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
 
queue_work(vxlan_wq, &vs->del_work);
 }
-EXPORT_SYMBOL_GPL(vxlan_sock_release);
 
 /* Update multicast group membership when first VNI on
  * multicast address is brought up
@@ -1126,6 +1128,102 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff 
*skb, struct vxlanhdr *vh,
return vh;
 }
 
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+ struct vxlan_metadata *md, u32 vni,
+ struct metadata_dst *tun_dst)
+{
+   struct iphdr *oip = NULL;
+   struct ipv6hdr *oip6 = NULL;
+   struct vxlan_dev *vxlan;
+   struct pcpu_sw_netstats *stats;
+   union vxlan_addr saddr;
+   int err = 0;
+   union vxlan_addr *remote_ip;
+
+   /* For flow based devices, map all packets to VNI 0 */
+   if (vs->flags & VXLAN_F_FLOW_BASED)
+   vni = 0;
+
+   /* Is this VNI defined? */
+   vxlan = vxlan_vs_find_vni(vs, vni);
+   if (!vxlan)
+   goto drop;
+
+   remote_ip = &vxlan->default_dst.remote_ip;
+   skb_reset_mac_header(skb);
+   skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
+   skb->protocol = eth_type_trans(skb, vxlan->dev);
+   skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+   /* Ignore packet loops (and multicast echo) */
+   if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
+   goto drop;
+
+   /* Re-examine inner Ethernet packet */
+   if (remote_ip->sa.sa_family == AF_INET) {
+   oip = ip_hdr(skb);
+   saddr.sin.sin_addr.s_addr = oip->saddr;
+   saddr.sa.sa_family = AF_INET;
+#if IS_ENABLED(CONFIG_IPV6)
+   } else {
+   oip6 = ipv6_hdr(skb);
+   saddr.sin6.sin6_addr = oip6->saddr;
+   saddr.sa.sa_family = AF_INET6;
+#endif
+   }
+
+   if (tun_dst) {
+   skb_dst_set(skb, (struct dst_entry *)tun_dst);
+   tun_dst = NULL;
+   }
+
+   if ((vxlan->flags & VXLAN_F_LEARN) &&
+   vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
+   goto drop;
+
+   skb_reset_network_header(skb);
+   /* In flow-based mode, GBP is carried in dst_metadata */
+   if (!(vs->flags & VXLAN_F_FLOW_BASED))
+   skb->mark = md->gbp;
+
+   if (oip6)
+   err = IP6_ECN_decapsulate(oip6, skb);
+   if (oip)
+   err = IP_ECN_decapsulate(oip, skb);
+
+   if (unlikely(err)) {
+   if (log_ecn_error) {
+   if (oip6)
+   net_info_ratelimited("non-ECT from %pI6\n",
+&oip6->saddr);
+   if (oip)
+   net_info_ratelimited("non-ECT from %pI4 with 
TOS=%#x\n",
+&oip->saddr, oip->tos);
+   }
+   if (err > 1) {
+   ++vxlan->dev->stats.rx_frame_errors;
+   ++vxlan->d

[PATCH net-next 22/22] openvswitch: Use regular VXLAN net_device device

2015-07-21 Thread Thomas Graf
This gets rid of all OVS specific VXLAN code in the receive and
transmit path by using a VXLAN net_device to represent the vport.
Only a small shim layer remains which takes care of handling the
VXLAN specific OVS Netlink configuration.

Unexports vxlan_sock_add(), vxlan_sock_release(), vxlan_xmit_skb()
since they are no longer needed.

Signed-off-by: Thomas Graf 
Signed-off-by: Pravin B Shelar 
---
 drivers/net/vxlan.c| 242 +++
 include/net/rtnetlink.h|   1 +
 include/net/vxlan.h|  24 +--
 net/core/rtnetlink.c   |  26 ++--
 net/openvswitch/Kconfig|  12 --
 net/openvswitch/Makefile   |   1 -
 net/openvswitch/flow_netlink.c |   6 +-
 net/openvswitch/vport-netdev.c | 201 -
 net/openvswitch/vport-vxlan.c  | 322 -
 net/openvswitch/vport-vxlan.h  |  11 --
 10 files changed, 339 insertions(+), 507 deletions(-)
 delete mode 100644 net/openvswitch/vport-vxlan.c
 delete mode 100644 net/openvswitch/vport-vxlan.h

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 30e1f21..e9feefb 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -75,6 +75,9 @@ static struct rtnl_link_ops vxlan_link_ops;
 
 static const u8 all_zeros_mac[ETH_ALEN];
 
+static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
+bool no_share, u32 flags);
+
 /* per-network namespace private data for this module */
 struct vxlan_net {
struct list_head  vxlan_list;
@@ -1027,7 +1030,7 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct 
vxlan_dev *dev)
return false;
 }
 
-void vxlan_sock_release(struct vxlan_sock *vs)
+static void vxlan_sock_release(struct vxlan_sock *vs)
 {
struct sock *sk = vs->sock->sk;
struct net *net = sock_net(sk);
@@ -1043,7 +1046,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
 
queue_work(vxlan_wq, &vs->del_work);
 }
-EXPORT_SYMBOL_GPL(vxlan_sock_release);
 
 /* Update multicast group membership when first VNI on
  * multicast address is brought up
@@ -1126,6 +1128,102 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff 
*skb, struct vxlanhdr *vh,
return vh;
 }
 
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+ struct vxlan_metadata *md, u32 vni,
+ struct metadata_dst *tun_dst)
+{
+   struct iphdr *oip = NULL;
+   struct ipv6hdr *oip6 = NULL;
+   struct vxlan_dev *vxlan;
+   struct pcpu_sw_netstats *stats;
+   union vxlan_addr saddr;
+   int err = 0;
+   union vxlan_addr *remote_ip;
+
+   /* For flow based devices, map all packets to VNI 0 */
+   if (vs->flags & VXLAN_F_FLOW_BASED)
+   vni = 0;
+
+   /* Is this VNI defined? */
+   vxlan = vxlan_vs_find_vni(vs, vni);
+   if (!vxlan)
+   goto drop;
+
+   remote_ip = &vxlan->default_dst.remote_ip;
+   skb_reset_mac_header(skb);
+   skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
+   skb->protocol = eth_type_trans(skb, vxlan->dev);
+   skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+   /* Ignore packet loops (and multicast echo) */
+   if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
+   goto drop;
+
+   /* Re-examine inner Ethernet packet */
+   if (remote_ip->sa.sa_family == AF_INET) {
+   oip = ip_hdr(skb);
+   saddr.sin.sin_addr.s_addr = oip->saddr;
+   saddr.sa.sa_family = AF_INET;
+#if IS_ENABLED(CONFIG_IPV6)
+   } else {
+   oip6 = ipv6_hdr(skb);
+   saddr.sin6.sin6_addr = oip6->saddr;
+   saddr.sa.sa_family = AF_INET6;
+#endif
+   }
+
+   if (tun_dst) {
+   skb_dst_set(skb, (struct dst_entry *)tun_dst);
+   tun_dst = NULL;
+   }
+
+   if ((vxlan->flags & VXLAN_F_LEARN) &&
+   vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
+   goto drop;
+
+   skb_reset_network_header(skb);
+   /* In flow-based mode, GBP is carried in dst_metadata */
+   if (!(vs->flags & VXLAN_F_FLOW_BASED))
+   skb->mark = md->gbp;
+
+   if (oip6)
+   err = IP6_ECN_decapsulate(oip6, skb);
+   if (oip)
+   err = IP_ECN_decapsulate(oip, skb);
+
+   if (unlikely(err)) {
+   if (log_ecn_error) {
+   if (oip6)
+   net_info_ratelimited("non-ECT from %pI6\n",
+&oip6->saddr);
+   if (oip)
+   net_info_ratelimited("non-ECT from %pI4 with 
TOS=%#x\n",
+&oip->saddr, oip->tos);
+   }
+   if (err > 1) {
+   ++vxlan->dev->stats.rx_frame_errors;
+   ++vxlan->d