[work-in-progress, works but needs changes]
[v2: refactored lots of things, e.g. dst_metadata, no more genetlink]

Signed-off-by: David Lamparter <equi...@diac24.net>
---
 include/net/dst_metadata.h |  21 ++
 include/net/vpls.h         |   8 +
 net/mpls/Kconfig           |  11 ++
 net/mpls/Makefile          |   1 +
 net/mpls/vpls.c            | 469 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 510 insertions(+)
 create mode 100644 include/net/vpls.h
 create mode 100644 net/mpls/vpls.c

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 8858dc441458..aeee4ce3b654 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -3,11 +3,13 @@
 
 #include <linux/skbuff.h>
 #include <net/ip_tunnels.h>
+#include <net/vpls.h>
 #include <net/dst.h>
 
 enum metadata_type {
        METADATA_IP_TUNNEL,
        METADATA_HW_PORT_MUX,
+       METADATA_VPLS,
 };
 
 struct hw_port_info {
@@ -21,6 +23,7 @@ struct metadata_dst {
        union {
                struct ip_tunnel_info   tun_info;
                struct hw_port_info     port_info;
+               struct vpls_info        vpls_info;
        } u;
 };
 
@@ -49,6 +52,15 @@ static inline struct ip_tunnel_info *skb_tunnel_info(struct 
sk_buff *skb)
        return NULL;
 }
 
+static inline struct vpls_info *skb_vpls_info(struct sk_buff *skb)
+{
+       struct metadata_dst *md_dst = skb_metadata_dst(skb);
+       if (md_dst && md_dst->type == METADATA_VPLS)
+               return &md_dst->u.vpls_info;
+       return NULL;
+}
+
+
 static inline bool skb_valid_dst(const struct sk_buff *skb)
 {
        struct dst_entry *dst = skb_dst(skb);
@@ -73,6 +85,9 @@ static inline int dst_metadata_cmp(const struct dst_entry 
*dst_a,
        case METADATA_HW_PORT_MUX:
                return memcmp(&a->u.port_info, &b->u.port_info,
                              sizeof(a->u.port_info));
+       case METADATA_VPLS:
+               return memcmp(&a->u.vpls_info, &b->u.vpls_info,
+                             sizeof(a->u.vpls_info));
        case METADATA_IP_TUNNEL:
                return memcmp(&a->u.tun_info, &b->u.tun_info,
                              sizeof(a->u.tun_info) +
@@ -218,4 +233,10 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct 
sk_buff *skb,
                                  0, ip6_flowlabel(ip6h), flags, tunnel_id,
                                  md_size);
 }
+
+static inline struct metadata_dst *vpls_rx_dst(void)
+{
+       return metadata_dst_alloc(0, METADATA_VPLS, GFP_ATOMIC);
+}
+
 #endif /* __NET_DST_METADATA_H */
diff --git a/include/net/vpls.h b/include/net/vpls.h
new file mode 100644
index 000000000000..b261e2d97734
--- /dev/null
+++ b/include/net/vpls.h
@@ -0,0 +1,8 @@
+#ifndef __NET_VPLS_H
+#define __NET_VPLS_H 1
+
+struct vpls_info {
+       u32             pw_label;
+};
+
+#endif /* __NET_VPLS_H */
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index 5c467ef97311..c15ba73efb34 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -27,6 +27,17 @@ config MPLS_ROUTING
        ---help---
         Add support for forwarding of mpls packets.
 
+config MPLS_VPLS
+       bool "VPLS support"
+       default y
+       depends on MPLS_ROUTING && BRIDGE_NETFILTER=n
+       ---help---
+        Add support for de-&encapsulating VPLS.  Not compatible with
+        bridge netfilter due to the latter stomping over VPLS' dst metadata.
+
+comment "disable 'Bridged IP/ARP packets filtering' for VPLS support"
+       depends on BRIDGE_NETFILTER
+
 config MPLS_IPTUNNEL
        tristate "MPLS: IP over MPLS tunnel support"
        depends on LWTUNNEL && MPLS_ROUTING
diff --git a/net/mpls/Makefile b/net/mpls/Makefile
index 9ca923625016..3c028600a980 100644
--- a/net/mpls/Makefile
+++ b/net/mpls/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_MPLS_ROUTING) += mpls_router.o
 obj-$(CONFIG_MPLS_IPTUNNEL) += mpls_iptunnel.o
 
 mpls_router-y := af_mpls.o
+mpls_router-$(CONFIG_MPLS_VPLS) += vpls.o
diff --git a/net/mpls/vpls.c b/net/mpls/vpls.c
new file mode 100644
index 000000000000..28ac810da6e9
--- /dev/null
+++ b/net/mpls/vpls.c
@@ -0,0 +1,469 @@
+/*
+ *  net/mpls/vpls.c
+ *
+ *  Copyright (C) 2016 David Lamparter
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/ethtool.h>
+#include <linux/etherdevice.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/mpls.h>
+
+#include <net/rtnetlink.h>
+#include <net/dst.h>
+#include <net/xfrm.h>
+#include <net/mpls.h>
+#include <linux/module.h>
+#include <net/dst_metadata.h>
+#include <net/ip_tunnels.h>
+
+#include "internal.h"
+
+#define DRV_NAME       "vpls"
+
+#define MIN_MTU 68             /* Min L3 MTU */
+#define MAX_MTU 65535          /* Max L3 MTU (arbitrary) */
+
+struct vpls_wirelist {
+       struct rcu_head rcu;
+       size_t count;
+       unsigned wires[0];
+};
+
+struct vpls_priv {
+       struct net *encap_net;
+       struct vpls_wirelist __rcu *wires;
+};
+
+static int vpls_xmit_wire(struct sk_buff *skb, struct net_device *dev,
+                         struct vpls_priv *vpls, u32 wire)
+{
+       struct mpls_route *rt;
+       struct mpls_entry_decoded dec;
+
+       dec.bos = 1;
+       dec.ttl = 255;
+
+       rt = mpls_route_input_rcu(vpls->encap_net, wire);
+       if (!rt)
+               return -ENOENT;
+       if (rt->rt_vpls_dev != dev)
+               return -EINVAL;
+
+       return mpls_rt_xmit(skb, rt, dec);
+}
+
+static netdev_tx_t vpls_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       int err = -EINVAL, ok_count = 0;
+       struct vpls_priv *priv = netdev_priv(dev);
+       struct vpls_info *vi;
+       struct pcpu_sw_netstats *stats;
+       size_t len = skb->len;
+
+       rcu_read_lock();
+       vi = skb_vpls_info(skb);
+
+       skb_orphan(skb);
+       skb_forward_csum(skb);
+
+       if (vi) {
+               err = vpls_xmit_wire(skb, dev, priv, vi->pw_label);
+               if (err)
+                       goto out_err;
+       } else {
+               struct sk_buff *cloned;
+               struct vpls_wirelist *wl;
+               size_t i;
+
+               wl = rcu_dereference(priv->wires);
+               if (wl->count == 0) {
+                       dev->stats.tx_carrier_errors++;
+                       goto out_err;
+               }
+
+               for (i = 0; i < wl->count; i++) {
+                       cloned = skb_clone(skb, GFP_KERNEL);
+                       if (vpls_xmit_wire(cloned, dev, priv, wl->wires[i]))
+                               consume_skb(cloned);
+                       else
+                               ok_count++;
+               }
+               if (!ok_count)
+                       goto out_err;
+
+               consume_skb(skb);
+       }
+
+       stats = this_cpu_ptr(dev->tstats);
+       u64_stats_update_begin(&stats->syncp);
+       stats->tx_packets++;
+       stats->tx_bytes += len;
+       u64_stats_update_end(&stats->syncp);
+
+       rcu_read_unlock();
+       return 0;
+
+out_err:
+       dev->stats.tx_errors++;
+
+       consume_skb(skb);
+       rcu_read_unlock();
+       return err;
+}
+
+int vpls_rcv(struct sk_buff *skb, struct net_device *in_dev,
+            struct packet_type *pt, struct mpls_route *rt,
+            struct mpls_shim_hdr *hdr, struct net_device *orig_dev)
+{
+       struct net_device *dev = rt->rt_vpls_dev;
+       struct mpls_entry_decoded dec;
+       struct metadata_dst *md_dst;
+       struct pcpu_sw_netstats *stats;
+
+       if (!dev)
+               goto drop_nodev;
+
+       dec = mpls_entry_decode(hdr);
+       if (!dec.bos) {
+               dev->stats.rx_frame_errors++;
+               goto drop;
+       }
+
+       skb_pull(skb, sizeof(*hdr));
+
+       if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) {
+               dev->stats.rx_length_errors++;
+               goto drop;
+       }
+
+       md_dst = vpls_rx_dst();
+       if (unlikely(!md_dst)) {
+               netdev_err(dev, "failed to allocate dst metadata\n");
+               goto drop;
+       }
+       md_dst->u.vpls_info.pw_label = dec.label;
+
+       skb->dev = dev;
+
+       skb_reset_mac_header(skb);
+       skb->protocol = eth_type_trans(skb, dev);
+       skb->ip_summed = CHECKSUM_NONE;
+       skb->pkt_type = PACKET_HOST;
+
+       skb_clear_hash(skb);
+       skb->vlan_tci = 0;
+       skb_set_queue_mapping(skb, 0);
+       skb_scrub_packet(skb, !net_eq(dev_net(in_dev), dev_net(dev)));
+
+       skb_reset_network_header(skb);
+       skb_probe_transport_header(skb, 0);
+
+       skb_dst_drop(skb);
+       skb_dst_set(skb, &md_dst->dst);
+
+       stats = this_cpu_ptr(dev->tstats);
+       u64_stats_update_begin(&stats->syncp);
+       stats->rx_packets++;
+       stats->rx_bytes += skb->len;
+       u64_stats_update_end(&stats->syncp);
+
+       netif_rx(skb);
+       return 0;
+
+drop:
+       dev->stats.rx_errors++;
+drop_nodev:
+       kfree_skb(skb);
+       return NET_RX_DROP;
+}
+
+void vpls_label_update(unsigned label, struct mpls_route *rt_old,
+                      struct mpls_route *rt_new)
+{
+       struct vpls_priv *priv;
+       struct vpls_wirelist *wl, *wl_new;
+       size_t i;
+
+       ASSERT_RTNL();
+
+       if (rt_old && rt_new && rt_old->rt_vpls_dev == rt_new->rt_vpls_dev)
+               return;
+
+       if (rt_old && rt_old->rt_vpls_dev) {
+               priv = netdev_priv(rt_old->rt_vpls_dev);
+               wl = rcu_dereference(priv->wires);
+
+               for (i = 0; i < wl->count; i++)
+                       if (wl->wires[i] == label)
+                               break;
+
+               if (i == wl->count) {
+                       netdev_err(rt_old->rt_vpls_dev,
+                                  "can't find pseudowire to remove!\n");
+                       goto update_new;
+               }
+
+               wl_new = kmalloc(sizeof(*wl) +
+                                (wl->count - 1) * sizeof(wl->wires[0]),
+                                GFP_ATOMIC);
+               if (!wl_new) {
+                       netdev_err(rt_old->rt_vpls_dev,
+                                  "out of memory for pseudowire delete!\n");
+                       goto update_new;
+               }
+
+               wl_new->count = wl->count - 1;
+               memcpy(wl_new->wires, wl->wires, i * sizeof(wl->wires[0]));
+               memcpy(wl_new->wires + i, wl->wires + i + 1,
+                       (wl->count - i - 1) * sizeof(wl->wires[0]));
+
+               rcu_assign_pointer(priv->wires, wl_new);
+               kfree_rcu(wl, rcu);
+
+               if (wl_new->count == 0)
+                       netif_carrier_off(rt_old->rt_vpls_dev);
+       }
+
+update_new:
+       if (rt_new && rt_new->rt_vpls_dev) {
+               priv = netdev_priv(rt_new->rt_vpls_dev);
+               wl = rcu_dereference(priv->wires);
+
+               wl_new = kmalloc(sizeof(*wl) +
+                                (wl->count + 1) * sizeof(wl->wires[0]),
+                                GFP_ATOMIC);
+               if (!wl_new) {
+                       netdev_err(rt_new->rt_vpls_dev,
+                                  "out of memory for pseudowire add!\n");
+                       return;
+               }
+               wl_new->count = wl->count + 1;
+               memcpy(wl_new->wires, wl->wires,
+                       wl->count * sizeof(wl->wires[0]));
+               wl_new->wires[wl->count] = label;
+
+               rcu_assign_pointer(priv->wires, wl_new);
+               kfree_rcu(wl, rcu);
+
+               if (wl_new->count == 1)
+                       netif_carrier_on(rt_new->rt_vpls_dev);
+       }
+}
+
+/* fake multicast ability */
+static void vpls_set_multicast_list(struct net_device *dev)
+{
+}
+
+static int vpls_open(struct net_device *dev)
+{
+       struct vpls_priv *priv = netdev_priv(dev);
+       struct vpls_wirelist *wl;
+
+       wl = rcu_dereference(priv->wires);
+       if (wl->count > 0)
+               netif_carrier_on(dev);
+
+       return 0;
+}
+
+static int vpls_close(struct net_device *dev)
+{
+       netif_carrier_off(dev);
+       return 0;
+}
+
+static int is_valid_vpls_mtu(int new_mtu)
+{
+       return new_mtu >= MIN_MTU && new_mtu <= MAX_MTU;
+}
+
+static int vpls_change_mtu(struct net_device *dev, int new_mtu)
+{
+       if (!is_valid_vpls_mtu(new_mtu))
+               return -EINVAL;
+       dev->mtu = new_mtu;
+       return 0;
+}
+
+static int vpls_dev_init(struct net_device *dev)
+{
+       struct vpls_priv *priv = netdev_priv(dev);
+       priv->wires = kzalloc(sizeof(struct vpls_wirelist), GFP_KERNEL);
+       if (!priv->wires)
+               return -ENOMEM;
+
+       dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+       if (!dev->tstats) {
+               kfree(priv->wires);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void vpls_dev_free(struct net_device *dev)
+{
+       struct vpls_priv *priv = netdev_priv(dev);
+
+       free_percpu(dev->tstats);
+
+       if (priv->wires)
+               kfree(priv->wires);
+
+       if (priv->encap_net)
+               put_net(priv->encap_net);
+
+       free_netdev(dev);
+}
+
+static const struct net_device_ops vpls_netdev_ops = {
+       .ndo_init               = vpls_dev_init,
+       .ndo_open               = vpls_open,
+       .ndo_stop               = vpls_close,
+       .ndo_start_xmit         = vpls_xmit,
+       .ndo_change_mtu         = vpls_change_mtu,
+       .ndo_get_stats64        = ip_tunnel_get_stats64,
+       .ndo_set_rx_mode        = vpls_set_multicast_list,
+       .ndo_set_mac_address    = eth_mac_addr,
+       .ndo_features_check     = passthru_features_check,
+};
+
+int is_vpls_dev(struct net_device *dev)
+{
+       return dev->netdev_ops == &vpls_netdev_ops;
+}
+
+#define VPLS_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | \
+                      NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA)
+
+static void vpls_setup(struct net_device *dev)
+{
+       ether_setup(dev);
+
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+       dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+       dev->priv_flags |= IFF_NO_QUEUE;
+
+       dev->netdev_ops = &vpls_netdev_ops;
+       dev->features |= NETIF_F_LLTX;
+       dev->features |= VPLS_FEATURES;
+       dev->vlan_features = dev->features;
+       dev->priv_destructor = vpls_dev_free;
+
+       dev->hw_features = VPLS_FEATURES;
+       dev->hw_enc_features = VPLS_FEATURES;
+
+       netif_keep_dst(dev);
+}
+
+/*
+ * netlink interface
+ */
+
+static int vpls_validate(struct nlattr *tb[], struct nlattr *data[],
+                        struct netlink_ext_ack *extack)
+{
+       if (tb[IFLA_ADDRESS]) {
+               if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
+                       NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
+                                   "Invalid Ethernet address length");
+                       return -EINVAL;
+               }
+               if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
+                       NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
+                                   "Invalid Ethernet address");
+                       return -EADDRNOTAVAIL;
+               }
+       }
+       if (tb[IFLA_MTU]) {
+               if (!is_valid_vpls_mtu(nla_get_u32(tb[IFLA_MTU]))) {
+                       NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
+                                   "Invalid MTU");
+                       return -EINVAL;
+               }
+       }
+       return 0;
+}
+
+static struct rtnl_link_ops vpls_link_ops;
+
+static int vpls_newlink(struct net *src_net, struct net_device *dev,
+                       struct nlattr *tb[], struct nlattr *data[],
+                       struct netlink_ext_ack *extack)
+{
+       int err;
+       struct vpls_priv *priv = netdev_priv(dev);
+
+       if (tb[IFLA_ADDRESS] == NULL)
+               eth_hw_addr_random(dev);
+
+       if (tb[IFLA_IFNAME])
+               nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
+       else
+               snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
+
+       err = register_netdevice(dev);
+       if (err < 0)
+               goto err;
+       priv->encap_net = get_net(src_net);
+
+       netif_carrier_off(dev);
+       return 0;
+
+err:
+       return err;
+}
+
+static void vpls_dellink(struct net_device *dev, struct list_head *head)
+{
+       unregister_netdevice_queue(dev, head);
+}
+
+
+static struct rtnl_link_ops vpls_link_ops = {
+       .kind           = DRV_NAME,
+       .priv_size      = sizeof(struct vpls_priv),
+       .setup          = vpls_setup,
+       .validate       = vpls_validate,
+       .newlink        = vpls_newlink,
+       .dellink        = vpls_dellink,
+};
+
+/*
+ * init/fini
+ */
+
+__init int vpls_init(void)
+{
+       int ret;
+
+       ret = rtnl_link_register(&vpls_link_ops);
+       if (ret)
+               goto out;
+
+       return 0;
+
+out:
+       return ret;
+}
+
+__exit void vpls_exit(void)
+{
+       rtnl_link_unregister(&vpls_link_ops);
+}
+
+#if 0
+/* not currently available as a separate module... */
+
+module_init(vpls_init);
+module_exit(vpls_exit);
+
+MODULE_DESCRIPTION("Virtual Private LAN Service");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_RTNL_LINK(DRV_NAME);
+#endif
-- 
2.13.0

Reply via email to