From: Roopa Prabhu <ro...@cumulusnetworks.com> Still in the works. Not fully functional. And in some cases me going back and forth on a few things.
Started with thinking the tunnel hdr can be shared and refcounted. Its somewhere mid way now. Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com> --- include/linux/if_lwtunnel.h | 8 ++ include/net/lwtunnel.h | 61 +++++++++++++ include/uapi/linux/if_lwtunnel.h | 12 +++ include/uapi/linux/rtnetlink.h | 8 +- net/Makefile | 2 +- net/lwtunnel.c | 177 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 266 insertions(+), 2 deletions(-) create mode 100644 include/linux/if_lwtunnel.h create mode 100644 include/net/lwtunnel.h create mode 100644 include/uapi/linux/if_lwtunnel.h create mode 100644 net/lwtunnel.c diff --git a/include/linux/if_lwtunnel.h b/include/linux/if_lwtunnel.h new file mode 100644 index 0000000..5fa7ca2 --- /dev/null +++ b/include/linux/if_lwtunnel.h @@ -0,0 +1,8 @@ +#ifndef _IF_LWTUNNEL_H_ +#define _IF_LWTUNNEL_H_ + +#include <linux/ip.h> +#include <linux/in6.h> +#include <uapi/linux/if_lwtunnel.h> + +#endif /* _IF_LWTUNNEL_H_ */ diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h new file mode 100644 index 0000000..3964f15 --- /dev/null +++ b/include/net/lwtunnel.h @@ -0,0 +1,61 @@ +#ifndef __NET_LW_TUNNELS_H +#define __NET_LW_TUNNELS_H 1 + +#include <linux/if_lwtunnel.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <linux/u64_stats_sync.h> +#include <net/dsfield.h> +#include <net/gro_cells.h> +#include <net/inet_ecn.h> +#include <net/ip.h> +#include <net/netns/generic.h> +#include <net/rtnetlink.h> + +#define LW_TNL_HASH_BITS 7 +#define LW_TNL_HASH_SIZE (1 << LW_TNL_HASH_BITS) + +struct lwtunnel_hdr { + int len; + __u8 data[0]; +}; + +struct lwtunnel_state { + __u16 type; + atomic_t refcnt; + spinlock_t lock; + struct lwtunnel_hdr tunnel; +}; + +struct lwtunnel_net { + struct hlist_head tunnels[LW_TNL_HASH_SIZE]; +}; + +struct lwtunnel_encap_ops { + size_t (*encap_hlen)(struct rtencap *e); + int (*build_state)(struct rtencap *e, int len, struct lwtunnel_state **ts); + int (*output)(struct sock *sk, struct sk_buff *skb); + int (*dump_encap_hdr)(struct sk_buff *skb, int attr, struct lwtunnel_state *lwtstate); +}; + +#define MAX_LWTUN_ENCAP_OPS 8 +extern const struct lwtunnel_encap_ops __rcu * + lwtun_encaps[MAX_LWTUN_ENCAP_OPS]; + +int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, + unsigned int num); +int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, + unsigned int num); +int lwtunnel_dst_set_output(struct lwtunnel_state *lwtstate, + struct dst_entry *dst); +int lwtunnel_build_state(struct rtencap *encap, int len, + struct lwtunnel_state **lws); +int lwtunnel_dump_encap(struct sk_buff *skb, + struct lwtunnel_state *lwtstate); +int lwtunnel_encap_size(struct lwtunnel_state *lwtstate); + +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); +void lwtunnel_state_free(struct lwtunnel_state *lws); + +#endif /* __NET_LW_TUNNELS_H */ diff --git a/include/uapi/linux/if_lwtunnel.h b/include/uapi/linux/if_lwtunnel.h new file mode 100644 index 0000000..28b8497 --- /dev/null +++ b/include/uapi/linux/if_lwtunnel.h @@ -0,0 +1,12 @@ +#ifndef _UAPI_IF_LW_TUNNEL_H_ +#define _UAPI_IF_LW_TUNNEL_H_ + +#include <linux/types.h> +#include <asm/byteorder.h> + +enum tunnel_encap_types { + LW_TUNNEL_ENCAP_NONE, + LW_TUNNEL_ENCAP_MPLS, +}; + +#endif /* _UAPI_IF_LW_TUNNEL_H_ */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 17fb02f..47e5de1 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -308,6 +308,7 @@ enum rtattr_type_t { RTA_VIA, RTA_NEWDST, RTA_PREF, + RTA_ENCAP, __RTA_MAX }; @@ -356,8 +357,13 @@ struct rtvia { __u8 rtvia_addr[0]; }; -/* RTM_CACHEINFO */ +/* RTA_ENCAP */ +struct rtencap { + __u16 type; + __u8 dst[0]; +}; +/* RTM_CACHEINFO */ struct rta_cacheinfo { __u32 rta_clntref; __u32 rta_lastuse; diff --git a/net/Makefile b/net/Makefile index 3995613..6d51a9d 100644 --- a/net/Makefile +++ b/net/Makefile @@ -5,7 +5,7 @@ # Rewritten to use lists instead of if-statements. # -obj-$(CONFIG_NET) := socket.o core/ +obj-$(CONFIG_NET) := socket.o lwtunnel.o core/ tmp-$(CONFIG_COMPAT) := compat.o obj-$(CONFIG_NET) += $(tmp-y) diff --git a/net/lwtunnel.c b/net/lwtunnel.c new file mode 100644 index 0000000..e367a60 --- /dev/null +++ b/net/lwtunnel.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2015 Cumulus Networks, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_arp.h> +#include <linux/mroute.h> +#include <linux/init.h> +#include <linux/in6.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/netfilter_ipv4.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/rculist.h> +#include <linux/err.h> + +#include <net/sock.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/protocol.h> +#include <net/lwtunnel.h> +#include <net/arp.h> +#include <net/checksum.h> +#include <net/dsfield.h> +#include <net/inet_ecn.h> +#include <net/rtnetlink.h> +#include <net/udp.h> + + +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len) +{ + struct lwtunnel_state *lws; + + lws = kzalloc(sizeof(*lws) + hdr_len, GFP_KERNEL); + + return lws; +} +EXPORT_SYMBOL(lwtunnel_state_alloc); + +void lwtunnel_state_free(struct lwtunnel_state *lws) +{ + kfree(lws); +} + +const struct lwtunnel_encap_ops __rcu * + lwtun_encaps[MAX_LWTUN_ENCAP_OPS] __read_mostly; + +int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, + unsigned int num) +{ + if (num >= MAX_LWTUN_ENCAP_OPS) + return -ERANGE; + + return !cmpxchg((const struct lwtunnel_encap_ops **) + &lwtun_encaps[num], + NULL, ops) ? 0 : -1; +} +EXPORT_SYMBOL(lwtunnel_encap_add_ops); + +int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, + unsigned int num) +{ + int ret; + + if (num >= MAX_LWTUN_ENCAP_OPS) + return -ERANGE; + + ret = (cmpxchg((const struct lwtunnel_encap_ops **) + &lwtun_encaps[num], + ops, NULL) == ops) ? 0 : -1; + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_encap_del_ops); + +int lwtunnel_build_state(struct rtencap *encap, int len, + struct lwtunnel_state **lws) +{ + struct lwtunnel_encap_ops *ops; + int ret = -EINVAL; + + if (encap->type == LW_TUNNEL_ENCAP_NONE) + return ret; + + if (encap->type == MAX_LWTUN_ENCAP_OPS) + return ret; + + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[encap->type]); + if (likely(ops && ops->build_state)) + ret = ops->build_state(encap, len, lws); + rcu_read_unlock(); + + return 0; +} +EXPORT_SYMBOL(lwtunnel_build_state); + +int lwtunnel_dst_set_output(struct lwtunnel_state *lwstate, + struct dst_entry *dst) +{ + struct lwtunnel_encap_ops *ops; + int ret = -EINVAL; + + if (lwstate->type == LW_TUNNEL_ENCAP_NONE) + return 0; + + if (lwstate->type == MAX_LWTUN_ENCAP_OPS) + return 0; + + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwstate->type]); + if (likely(ops && ops->output)) + dst->output = ops->output; + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_dst_set_output); + +int lwtunnel_dump_encap(struct sk_buff *skb, struct lwtunnel_state *lwstate) +{ + struct lwtunnel_encap_ops *ops; + int ret = -EINVAL; + + if (lwstate->type == LW_TUNNEL_ENCAP_NONE) + return 0; + + if (lwstate->type == MAX_LWTUN_ENCAP_OPS) + return 0; + + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwstate->type]); + if (likely(ops && ops->dump_encap_hdr)) + ret = ops->dump_encap_hdr(skb, RTA_ENCAP, lwstate); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_dump_encap); + +int lwtunnel_encap_size(struct lwtunnel_state *lwstate) +{ + return lwstate->tunnel.len; +} +EXPORT_SYMBOL(lwtunnel_encap_size); + +MODULE_LICENSE("GPL"); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html