Currently only one protocol handler of GREPROTO_CISCO protocol is allowed. Soon we will have ovs tunnel registering for same protocol as GRE device. Following patch extends GRE de-multiplexer so that it can multiple GRE modules can register GRE protocol handler.
Signed-off-by: Pravin B Shelar <[email protected]> --- include/net/gre.h | 12 +++ include/net/ipip.h | 9 ++ net/ipv4/gre.c | 281 ++++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_gre.c | 139 ++++---------------------- 4 files changed, 323 insertions(+), 118 deletions(-) diff --git a/include/net/gre.h b/include/net/gre.h index 8266547..82e9276 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -2,6 +2,7 @@ #define __LINUX_GRE_H #include <linux/skbuff.h> +#include <net/ipip.h> #define GREPROTO_CISCO 0 #define GREPROTO_PPTP 1 @@ -15,4 +16,15 @@ struct gre_protocol { int gre_add_protocol(const struct gre_protocol *proto, u8 version); int gre_del_protocol(const struct gre_protocol *proto, u8 version); +struct gre_protocol_v0 { + int (*handler)(struct sk_buff *skb, struct tnl_ptk_info *tpi); + int (*err_handler)(struct sk_buff *skb, u32 info, + struct tnl_ptk_info *tpi); +}; + +int gre_add_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority); +int gre_del_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority); +void build_gre_header(struct sk_buff *skb, int hlen, __be16 flags, + __be16 proto, __be32 key, u32 seqno); + #endif diff --git a/include/net/ipip.h b/include/net/ipip.h index a93cf6d..4b77671 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -38,6 +38,15 @@ struct ip_tunnel { unsigned int prl_count; /* # of entries in PRL */ }; +struct tnl_ptk_info { + __be16 flags; + __be16 proto; + __be32 key; + __be32 seq; + int hdr_len; + __be16 csum; +}; + struct ip_tunnel_prl_entry { struct ip_tunnel_prl_entry __rcu *next; __be32 addr; diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 42a4910..2c384c7 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -16,15 +16,27 @@ #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/skbuff.h> +#include <linux/if.h> +#include <linux/icmp.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/if_tunnel.h> #include <linux/netdevice.h> #include <linux/spinlock.h> #include <net/protocol.h> #include <net/gre.h> +#include <net/icmp.h> +#define GREPROTO_V0_MAX 2 +#define GRE_HEADER_SECTION 4 + +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; +static const struct gre_protocol_v0 __rcu *gre_proto_v0[GREPROTO_V0_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); int gre_add_protocol(const struct gre_protocol *proto, u8 version) @@ -112,12 +124,273 @@ static void gre_err(struct sk_buff *skb, u32 info) rcu_read_unlock(); } +int gre_add_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority) +{ + if (priority >= GREPROTO_V0_MAX) + goto err_out; + + spin_lock(&gre_proto_lock); + if (gre_proto_v0[priority]) + goto err_out_unlock; + + RCU_INIT_POINTER(gre_proto_v0[priority], proto); + spin_unlock(&gre_proto_lock); + return 0; + +err_out_unlock: + spin_unlock(&gre_proto_lock); +err_out: + return -1; +} +EXPORT_SYMBOL_GPL(gre_add_protocol_v0); + +int gre_del_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority) +{ + if (priority >= GREPROTO_V0_MAX) + goto err_out; + + spin_lock(&gre_proto_lock); + if (rcu_dereference_protected(gre_proto_v0[priority], + lockdep_is_held(&gre_proto_lock)) != proto) + goto err_out_unlock; + RCU_INIT_POINTER(gre_proto_v0[priority], NULL); + spin_unlock(&gre_proto_lock); + synchronize_rcu(); + return 0; + +err_out_unlock: + spin_unlock(&gre_proto_lock); +err_out: + return -1; +} +EXPORT_SYMBOL_GPL(gre_del_protocol_v0); + +void build_gre_header(struct sk_buff *skb, int hlen, __be16 flags, + __be16 proto, __be32 key, u32 seqno) +{ + struct iphdr *iph = ip_hdr(skb); + + ((__be16 *)(iph + 1))[0] = flags; + ((__be16 *)(iph + 1))[1] = proto; + + if (flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)iph) + hlen - 4); + + if (flags&GRE_SEQ) { + *ptr = htonl(seqno); + ptr--; + } + if (flags&GRE_KEY) { + *ptr = key; + ptr--; + } + if (flags&GRE_CSUM) { + *ptr = 0; + *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), + skb->len - sizeof(struct iphdr)); + } + } +} +EXPORT_SYMBOL(build_gre_header); + +static __sum16 check_checksum(struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1); + __sum16 csum = 0; + + if (greh->flags & GRE_CSUM) { + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); + + if (!csum) + break; + /* Fall through. */ + + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; + } + } + + return csum; +} + +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi) +{ + + /* IP and ICMP protocol handlers check that the IHL is valid. */ + struct gre_base_hdr *greh = (struct gre_base_hdr *)skb->data; + __be32 *options = (__be32 *)(greh + 1); + + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; + + tpi->flags = greh->flags; + tpi->proto = greh->protocol; + + tpi->hdr_len = GRE_HEADER_SECTION; + tpi->csum = check_checksum(skb); + + if (greh->flags & GRE_CSUM) { + tpi->hdr_len += GRE_HEADER_SECTION; + options++; + } + + if (greh->flags & GRE_KEY) { + if ((void *)(options + 1) > (void *)skb_tail_pointer(skb)) + return -1; + tpi->hdr_len += GRE_HEADER_SECTION; + tpi->key = *options; + options++; + } else + tpi->key = 0; + + if (unlikely(greh->flags & GRE_SEQ)) { + if ((void *) (options + 1) > (void *)skb_tail_pointer(skb)) + return -1; + + tpi->seq = *options; + tpi->hdr_len += GRE_HEADER_SECTION; + options++; + } else + tpi->seq = 0; + + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (tpi->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) + tpi->hdr_len += 4; + } + + return 0; +} + +static int ipgre_rcv_v0(struct sk_buff *skb) +{ + struct tnl_ptk_info tpi; + int i; + + if (!pskb_may_pull(skb, 16)) + goto drop; + + if (parse_gre_header(skb, &tpi) < 0) + goto drop; + + rcu_read_lock(); + for (i = 0; i < GREPROTO_V0_MAX; i++) { + if (gre_proto_v0[i]->handler) { + int ret; + + ret = gre_proto_v0[i]->handler(skb, &tpi); + if (ret <= 0) { + rcu_read_unlock(); + return ret; + } + } + + } + rcu_read_unlock(); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + +drop: + kfree_skb(skb); + return 0; +} + +static void ipgre_err_v0(struct sk_buff *skb, u32 info) +{ + + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + * + * Moreover, Cisco "wise men" put GRE key to the third word + * in GRE header. It makes impossible maintaining even soft + * state for keyed + * GRE tunnels with enabled checksum. Tell them "thank you". + * + * Well, I wonder, rfc1812 was written by Cisco employee, + * what the hell these idiots break standards established + * by themselves??? + **/ + + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; + struct tnl_ptk_info tpi; + int i; + + if (!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN)) + return; + + parse_gre_header(skb, &tpi); + + /* If only 8 bytes returned, keyed message will be dropped here */ + if (tpi.flags & GRE_KEY) { + if ((tpi.flags & GRE_CSUM) && (tpi.hdr_len < 12)) + return; + if (tpi.hdr_len < 8) + return; + } + + switch (type) { + default: + case ICMP_PARAMETERPROB: + return; + + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + case ICMP_PORT_UNREACH: + /* Impossible event. */ + return; + default: + /* All others are translated to HOST_UNREACH. + rfc2003 contains "deep thoughts" about NET_UNREACH, + I believe they are just ether pollution. --ANK + */ + break; + } + break; + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + return; + break; + + case ICMP_REDIRECT: + break; + } + + rcu_read_lock(); + for (i = 0; i < GREPROTO_V0_MAX; i++) { + if (gre_proto_v0[i]->err_handler) { + if (gre_proto_v0[i]->err_handler(skb, info, &tpi) <= 0) { + rcu_read_unlock(); + return; + } + } + + } + rcu_read_unlock(); +} + static const struct net_protocol net_gre_protocol = { .handler = gre_rcv, .err_handler = gre_err, .netns_ok = 1, }; +static const struct gre_protocol ipgre_protocol = { + .handler = ipgre_rcv_v0, + .err_handler = ipgre_err_v0, +}; + static int __init gre_init(void) { pr_info("GRE over IPv4 demultiplexor driver\n"); @@ -126,12 +399,20 @@ static int __init gre_init(void) pr_err("can't add protocol\n"); return -EAGAIN; } + if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) { + pr_info("%s: can't add ipgre handler\n", __func__); + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); + return -EAGAIN; + } return 0; } static void __exit gre_exit(void) { + if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) + pr_info("%s: can't remove protocol\n", __func__); + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f233c1d..3bda6e2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -466,7 +466,7 @@ static void ipgre_tunnel_uninit(struct net_device *dev) } -static void ipgre_err(struct sk_buff *skb, u32 info) +static int ipgre_err(struct sk_buff *skb, u32 info, struct tnl_ptk_info *tnl_ptk_info) { /* All the routers (except for Linux) return only @@ -483,63 +483,16 @@ static void ipgre_err(struct sk_buff *skb, u32 info) */ const struct iphdr *iph = (const struct iphdr *)skb->data; - __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2)); - int grehlen = (iph->ihl<<2) + 4; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; - __be16 flags; - - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } - } - - /* If only 8 bytes returned, keyed message will be dropped here */ - if (skb_headlen(skb) < grehlen) - return; - - switch (type) { - default: - case ICMP_PARAMETERPROB: - return; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe they are just ether pollution. --ANK - */ - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return; - break; - - case ICMP_REDIRECT: - break; - } rcu_read_lock(); t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, - flags & GRE_KEY ? - *(((__be32 *)p) + (grehlen / 4) - 1) : 0, - p[1]); - if (t == NULL) - goto out; + tnl_ptk_info->key, tnl_ptk_info->proto); + if (t == NULL) { + return 1; + } if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, @@ -565,6 +518,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) t->err_time = jiffies; out: rcu_read_unlock(); + return 0; } static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb) @@ -589,80 +543,29 @@ ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb) return INET_ECN_encapsulate(tos, inner); } -static int ipgre_rcv(struct sk_buff *skb) +static int ipgre_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi) { const struct iphdr *iph; - u8 *h; - __be16 flags; - __sum16 csum = 0; - __be32 key = 0; - u32 seqno = 0; struct ip_tunnel *tunnel; - int offset = 4; - __be16 gre_proto; if (!pskb_may_pull(skb, 16)) goto drop_nolock; iph = ip_hdr(skb); - h = skb->data; - flags = *(__be16 *)h; - - if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { - /* - Version must be 0. - - We do not support routing headers. - */ - if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop_nolock; - - if (flags&GRE_CSUM) { - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - if (!csum) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - } - offset += 4; - } - if (flags&GRE_KEY) { - key = *(__be32 *)(h + offset); - offset += 4; - } - if (flags&GRE_SEQ) { - seqno = ntohl(*(__be32 *)(h + offset)); - offset += 4; - } - } - - gre_proto = *(__be16 *)(h + 2); rcu_read_lock(); if ((tunnel = ipgre_tunnel_lookup(skb->dev, - iph->saddr, iph->daddr, key, - gre_proto))) { + iph->saddr, iph->daddr, tpi->key, + tpi->proto))) { struct pcpu_tstats *tstats; secpath_reset(skb); - skb->protocol = gre_proto; - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IP); - if ((*(h + offset) & 0xF0) != 0x40) - offset += 4; - } + skb->protocol = tpi->proto; skb->mac_header = skb->network_header; - __pskb_pull(skb, offset); - skb_postpull_rcsum(skb, skb_transport_header(skb), offset); + __pskb_pull(skb, tpi->hdr_len); + skb_postpull_rcsum(skb, skb_transport_header(skb), tpi->hdr_len); skb->pkt_type = PACKET_HOST; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { @@ -674,20 +577,20 @@ static int ipgre_rcv(struct sk_buff *skb) } #endif - if (((flags&GRE_CSUM) && csum) || - (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { + if (((tpi->flags&GRE_CSUM) && tpi->csum) || + (!(tpi->flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { tunnel->dev->stats.rx_crc_errors++; tunnel->dev->stats.rx_errors++; goto drop; } if (tunnel->parms.i_flags&GRE_SEQ) { - if (!(flags&GRE_SEQ) || - (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { + if (!(tpi->flags&GRE_SEQ) || + (tunnel->i_seqno && (s32)(tpi->seq - tunnel->i_seqno) < 0)) { tunnel->dev->stats.rx_fifo_errors++; tunnel->dev->stats.rx_errors++; goto drop; } - tunnel->i_seqno = seqno + 1; + tunnel->i_seqno = tpi->seq + 1; } /* Warning: All skb pointers will be invalidated! */ @@ -1373,7 +1276,7 @@ static void ipgre_fb_tunnel_init(struct net_device *dev) } -static const struct gre_protocol ipgre_protocol = { +static const struct gre_protocol_v0 ipgre_protocol = { .handler = ipgre_rcv, .err_handler = ipgre_err, }; @@ -1771,7 +1674,7 @@ static int __init ipgre_init(void) if (err < 0) return err; - err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); + err = gre_add_protocol_v0(&ipgre_protocol, 0); if (err < 0) { pr_info("%s: can't add protocol\n", __func__); goto add_proto_failed; @@ -1791,7 +1694,7 @@ out: tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); + gre_del_protocol_v0(&ipgre_protocol, 0); add_proto_failed: unregister_pernet_device(&ipgre_net_ops); goto out; @@ -1801,7 +1704,7 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) + if (gre_del_protocol_v0(&ipgre_protocol, 0) < 0) pr_info("%s: can't remove protocol\n", __func__); unregister_pernet_device(&ipgre_net_ops); } -- 1.7.10 _______________________________________________ dev mailing list [email protected] http://openvswitch.org/mailman/listinfo/dev
