Currently only one protocol handler of GREPROTO_CISCO protocol
is allowed. Soon we will have ovs tunnel registering for same protocol
as GRE device.
Following patch extends GRE de-multiplexer so that it can multiple GRE 
modules can register GRE protocol handler.

Signed-off-by: Pravin B Shelar <[email protected]>
---
 include/net/gre.h  |   12 +++
 include/net/ipip.h |    9 ++
 net/ipv4/gre.c     |  281 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/ip_gre.c  |  139 ++++----------------------
 4 files changed, 323 insertions(+), 118 deletions(-)

diff --git a/include/net/gre.h b/include/net/gre.h
index 8266547..82e9276 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -2,6 +2,7 @@
 #define __LINUX_GRE_H
 
 #include <linux/skbuff.h>
+#include <net/ipip.h>
 
 #define GREPROTO_CISCO         0
 #define GREPROTO_PPTP          1
@@ -15,4 +16,15 @@ struct gre_protocol {
 int gre_add_protocol(const struct gre_protocol *proto, u8 version);
 int gre_del_protocol(const struct gre_protocol *proto, u8 version);
 
+struct gre_protocol_v0 {
+       int (*handler)(struct sk_buff *skb, struct tnl_ptk_info *tpi);
+       int (*err_handler)(struct sk_buff *skb, u32 info,
+                          struct tnl_ptk_info *tpi);
+};
+
+int gre_add_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority);
+int gre_del_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority);
+void build_gre_header(struct sk_buff *skb, int hlen, __be16 flags,
+                     __be16 proto, __be32 key, u32 seqno);
+
 #endif
diff --git a/include/net/ipip.h b/include/net/ipip.h
index a93cf6d..4b77671 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -38,6 +38,15 @@ struct ip_tunnel {
        unsigned int                    prl_count;      /* # of entries in PRL 
*/
 };
 
+struct tnl_ptk_info {
+       __be16 flags;
+       __be16 proto;
+       __be32 key;
+       __be32 seq;
+       int hdr_len;
+       __be16 csum;
+};
+
 struct ip_tunnel_prl_entry {
        struct ip_tunnel_prl_entry __rcu *next;
        __be32                          addr;
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 42a4910..2c384c7 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -16,15 +16,27 @@
 #include <linux/kernel.h>
 #include <linux/kmod.h>
 #include <linux/skbuff.h>
+#include <linux/if.h>
+#include <linux/icmp.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/if_tunnel.h>
 #include <linux/netdevice.h>
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 #include <net/gre.h>
+#include <net/icmp.h>
 
+#define GREPROTO_V0_MAX 2
+#define GRE_HEADER_SECTION 4
+
+struct gre_base_hdr {
+       __be16 flags;
+       __be16 protocol;
+};
 
 static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
+static const struct gre_protocol_v0 __rcu *gre_proto_v0[GREPROTO_V0_MAX] 
__read_mostly;
 static DEFINE_SPINLOCK(gre_proto_lock);
 
 int gre_add_protocol(const struct gre_protocol *proto, u8 version)
@@ -112,12 +124,273 @@ static void gre_err(struct sk_buff *skb, u32 info)
        rcu_read_unlock();
 }
 
+int gre_add_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority)
+{
+       if (priority >= GREPROTO_V0_MAX)
+               goto err_out;
+
+       spin_lock(&gre_proto_lock);
+       if (gre_proto_v0[priority])
+               goto err_out_unlock;
+
+       RCU_INIT_POINTER(gre_proto_v0[priority], proto);
+       spin_unlock(&gre_proto_lock);
+       return 0;
+
+err_out_unlock:
+       spin_unlock(&gre_proto_lock);
+err_out:
+       return -1;
+}
+EXPORT_SYMBOL_GPL(gre_add_protocol_v0);
+
+int gre_del_protocol_v0(const struct gre_protocol_v0 *proto, u8 priority)
+{
+       if (priority >= GREPROTO_V0_MAX)
+               goto err_out;
+
+       spin_lock(&gre_proto_lock);
+       if (rcu_dereference_protected(gre_proto_v0[priority],
+                       lockdep_is_held(&gre_proto_lock)) != proto)
+               goto err_out_unlock;
+       RCU_INIT_POINTER(gre_proto_v0[priority], NULL);
+       spin_unlock(&gre_proto_lock);
+       synchronize_rcu();
+       return 0;
+
+err_out_unlock:
+       spin_unlock(&gre_proto_lock);
+err_out:
+       return -1;
+}
+EXPORT_SYMBOL_GPL(gre_del_protocol_v0);
+
+void build_gre_header(struct sk_buff *skb, int hlen, __be16 flags,
+                     __be16 proto, __be32 key, u32 seqno)
+{
+       struct iphdr *iph = ip_hdr(skb);
+
+       ((__be16 *)(iph + 1))[0] = flags;
+       ((__be16 *)(iph + 1))[1] = proto;
+
+       if (flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+               __be32 *ptr = (__be32 *)(((u8 *)iph) + hlen - 4);
+
+               if (flags&GRE_SEQ) {
+                       *ptr = htonl(seqno);
+                       ptr--;
+               }
+               if (flags&GRE_KEY) {
+                       *ptr = key;
+                       ptr--;
+               }
+               if (flags&GRE_CSUM) {
+                       *ptr = 0;
+                       *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1),
+                                          skb->len - sizeof(struct iphdr));
+               }
+       }
+}
+EXPORT_SYMBOL(build_gre_header);
+
+static __sum16 check_checksum(struct sk_buff *skb)
+{
+       struct iphdr *iph = ip_hdr(skb);
+       struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1);
+       __sum16 csum = 0;
+
+       if (greh->flags & GRE_CSUM) {
+               switch (skb->ip_summed) {
+               case CHECKSUM_COMPLETE:
+                       csum = csum_fold(skb->csum);
+
+                       if (!csum)
+                               break;
+                       /* Fall through. */
+
+               case CHECKSUM_NONE:
+                       skb->csum = 0;
+                       csum = __skb_checksum_complete(skb);
+                       skb->ip_summed = CHECKSUM_COMPLETE;
+                       break;
+               }
+       }
+
+       return csum;
+}
+
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi)
+{
+
+       /* IP and ICMP protocol handlers check that the IHL is valid. */
+       struct gre_base_hdr *greh = (struct gre_base_hdr *)skb->data;
+       __be32 *options = (__be32 *)(greh + 1);
+
+       if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+               return -EINVAL;
+
+       tpi->flags = greh->flags;
+       tpi->proto = greh->protocol;
+
+       tpi->hdr_len = GRE_HEADER_SECTION;
+       tpi->csum = check_checksum(skb);
+
+       if (greh->flags & GRE_CSUM) {
+               tpi->hdr_len += GRE_HEADER_SECTION;
+               options++;
+       }
+
+       if (greh->flags & GRE_KEY) {
+               if ((void *)(options + 1) > (void *)skb_tail_pointer(skb))
+                       return -1;
+               tpi->hdr_len += GRE_HEADER_SECTION;
+               tpi->key = *options;
+               options++;
+       } else
+               tpi->key = 0;
+
+       if (unlikely(greh->flags & GRE_SEQ)) {
+               if ((void *) (options + 1) > (void *)skb_tail_pointer(skb))
+                       return -1;
+
+               tpi->seq = *options;
+               tpi->hdr_len += GRE_HEADER_SECTION;
+               options++;
+       } else
+               tpi->seq = 0;
+
+       /* WCCP version 1 and 2 protocol decoding.
+        * - Change protocol to IP
+        * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+        */
+       if (tpi->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+               tpi->proto = htons(ETH_P_IP);
+               if ((*(u8 *)options & 0xF0) != 0x40)
+                       tpi->hdr_len += 4;
+       }
+
+       return 0;
+}
+
+static int ipgre_rcv_v0(struct sk_buff *skb)
+{
+       struct tnl_ptk_info tpi;
+       int i;
+
+       if (!pskb_may_pull(skb, 16))
+               goto drop;
+
+       if (parse_gre_header(skb, &tpi) < 0)
+               goto drop;
+
+       rcu_read_lock();
+       for (i = 0; i < GREPROTO_V0_MAX; i++) {
+               if (gre_proto_v0[i]->handler) {
+                       int ret;
+
+                       ret = gre_proto_v0[i]->handler(skb, &tpi);
+                       if (ret <= 0) {
+                               rcu_read_unlock();
+                               return ret;
+                       }
+               }
+
+       }
+       rcu_read_unlock();
+       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+drop:
+       kfree_skb(skb);
+       return 0;
+}
+
+static void ipgre_err_v0(struct sk_buff *skb, u32 info)
+{
+
+       /* All the routers (except for Linux) return only
+        * 8 bytes of packet payload. It means, that precise relaying of
+        * ICMP in the real Internet is absolutely infeasible.
+        *
+        * Moreover, Cisco "wise men" put GRE key to the third word
+        * in GRE header. It makes impossible maintaining even soft
+        * state for keyed
+        * GRE tunnels with enabled checksum. Tell them "thank you".
+        *
+        * Well, I wonder, rfc1812 was written by Cisco employee,
+        * what the hell these idiots break standards established
+        * by themselves???
+        **/
+
+       const int type = icmp_hdr(skb)->type;
+       const int code = icmp_hdr(skb)->code;
+       struct tnl_ptk_info tpi;
+       int i;
+
+       if (!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN))
+               return;
+
+       parse_gre_header(skb, &tpi);
+
+       /* If only 8 bytes returned, keyed message will be dropped here */
+       if (tpi.flags & GRE_KEY) {
+               if ((tpi.flags & GRE_CSUM) && (tpi.hdr_len < 12))
+                       return;
+               if (tpi.hdr_len < 8)
+                       return;
+       }
+
+       switch (type) {
+       default:
+       case ICMP_PARAMETERPROB:
+               return;
+
+       case ICMP_DEST_UNREACH:
+               switch (code) {
+               case ICMP_SR_FAILED:
+               case ICMP_PORT_UNREACH:
+                       /* Impossible event. */
+               return;
+               default:
+                       /* All others are translated to HOST_UNREACH.
+                          rfc2003 contains "deep thoughts" about NET_UNREACH,
+                          I believe they are just ether pollution. --ANK
+                        */
+               break;
+               }
+               break;
+       case ICMP_TIME_EXCEEDED:
+               if (code != ICMP_EXC_TTL)
+                       return;
+               break;
+
+       case ICMP_REDIRECT:
+               break;
+       }
+
+       rcu_read_lock();
+       for (i = 0; i < GREPROTO_V0_MAX; i++) {
+               if (gre_proto_v0[i]->err_handler) {
+                       if (gre_proto_v0[i]->err_handler(skb, info, &tpi) <= 0) 
{
+                               rcu_read_unlock();
+                               return;
+                       }
+               }
+
+       }
+       rcu_read_unlock();
+}
+
 static const struct net_protocol net_gre_protocol = {
        .handler     = gre_rcv,
        .err_handler = gre_err,
        .netns_ok    = 1,
 };
 
+static const struct gre_protocol ipgre_protocol = {
+       .handler     = ipgre_rcv_v0,
+       .err_handler = ipgre_err_v0,
+};
+
 static int __init gre_init(void)
 {
        pr_info("GRE over IPv4 demultiplexor driver\n");
@@ -126,12 +399,20 @@ static int __init gre_init(void)
                pr_err("can't add protocol\n");
                return -EAGAIN;
        }
+       if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) {
+               pr_info("%s: can't add ipgre handler\n", __func__);
+               inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+               return -EAGAIN;
+       }
 
        return 0;
 }
 
 static void __exit gre_exit(void)
 {
+       if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
+               pr_info("%s: can't remove protocol\n", __func__);
+
        inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
 }
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f233c1d..3bda6e2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -466,7 +466,7 @@ static void ipgre_tunnel_uninit(struct net_device *dev)
 }
 
 
-static void ipgre_err(struct sk_buff *skb, u32 info)
+static int ipgre_err(struct sk_buff *skb, u32 info, struct tnl_ptk_info 
*tnl_ptk_info)
 {
 
 /* All the routers (except for Linux) return only
@@ -483,63 +483,16 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
  */
 
        const struct iphdr *iph = (const struct iphdr *)skb->data;
-       __be16       *p = (__be16 *)(skb->data+(iph->ihl<<2));
-       int grehlen = (iph->ihl<<2) + 4;
        const int type = icmp_hdr(skb)->type;
        const int code = icmp_hdr(skb)->code;
        struct ip_tunnel *t;
-       __be16 flags;
-
-       flags = p[0];
-       if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
-               if (flags&(GRE_VERSION|GRE_ROUTING))
-                       return;
-               if (flags&GRE_KEY) {
-                       grehlen += 4;
-                       if (flags&GRE_CSUM)
-                               grehlen += 4;
-               }
-       }
-
-       /* If only 8 bytes returned, keyed message will be dropped here */
-       if (skb_headlen(skb) < grehlen)
-               return;
-
-       switch (type) {
-       default:
-       case ICMP_PARAMETERPROB:
-               return;
-
-       case ICMP_DEST_UNREACH:
-               switch (code) {
-               case ICMP_SR_FAILED:
-               case ICMP_PORT_UNREACH:
-                       /* Impossible event. */
-                       return;
-               default:
-                       /* All others are translated to HOST_UNREACH.
-                          rfc2003 contains "deep thoughts" about NET_UNREACH,
-                          I believe they are just ether pollution. --ANK
-                        */
-                       break;
-               }
-               break;
-       case ICMP_TIME_EXCEEDED:
-               if (code != ICMP_EXC_TTL)
-                       return;
-               break;
-
-       case ICMP_REDIRECT:
-               break;
-       }
 
        rcu_read_lock();
        t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
-                               flags & GRE_KEY ?
-                               *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
-                               p[1]);
-       if (t == NULL)
-               goto out;
+                               tnl_ptk_info->key, tnl_ptk_info->proto);
+       if (t == NULL) {
+               return 1;
+       }
 
        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
@@ -565,6 +518,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
        t->err_time = jiffies;
 out:
        rcu_read_unlock();
+       return 0;
 }
 
 static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct 
sk_buff *skb)
@@ -589,80 +543,29 @@ ipgre_ecn_encapsulate(u8 tos, const struct iphdr 
*old_iph, struct sk_buff *skb)
        return INET_ECN_encapsulate(tos, inner);
 }
 
-static int ipgre_rcv(struct sk_buff *skb)
+static int ipgre_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi)
 {
        const struct iphdr *iph;
-       u8     *h;
-       __be16    flags;
-       __sum16   csum = 0;
-       __be32 key = 0;
-       u32    seqno = 0;
        struct ip_tunnel *tunnel;
-       int    offset = 4;
-       __be16 gre_proto;
 
        if (!pskb_may_pull(skb, 16))
                goto drop_nolock;
 
        iph = ip_hdr(skb);
-       h = skb->data;
-       flags = *(__be16 *)h;
-
-       if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
-               /* - Version must be 0.
-                  - We do not support routing headers.
-                */
-               if (flags&(GRE_VERSION|GRE_ROUTING))
-                       goto drop_nolock;
-
-               if (flags&GRE_CSUM) {
-                       switch (skb->ip_summed) {
-                       case CHECKSUM_COMPLETE:
-                               csum = csum_fold(skb->csum);
-                               if (!csum)
-                                       break;
-                               /* fall through */
-                       case CHECKSUM_NONE:
-                               skb->csum = 0;
-                               csum = __skb_checksum_complete(skb);
-                               skb->ip_summed = CHECKSUM_COMPLETE;
-                       }
-                       offset += 4;
-               }
-               if (flags&GRE_KEY) {
-                       key = *(__be32 *)(h + offset);
-                       offset += 4;
-               }
-               if (flags&GRE_SEQ) {
-                       seqno = ntohl(*(__be32 *)(h + offset));
-                       offset += 4;
-               }
-       }
-
-       gre_proto = *(__be16 *)(h + 2);
 
        rcu_read_lock();
        if ((tunnel = ipgre_tunnel_lookup(skb->dev,
-                                         iph->saddr, iph->daddr, key,
-                                         gre_proto))) {
+                                         iph->saddr, iph->daddr, tpi->key,
+                                         tpi->proto))) {
                struct pcpu_tstats *tstats;
 
                secpath_reset(skb);
 
-               skb->protocol = gre_proto;
-               /* WCCP version 1 and 2 protocol decoding.
-                * - Change protocol to IP
-                * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
-                */
-               if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
-                       skb->protocol = htons(ETH_P_IP);
-                       if ((*(h + offset) & 0xF0) != 0x40)
-                               offset += 4;
-               }
+               skb->protocol = tpi->proto;
 
                skb->mac_header = skb->network_header;
-               __pskb_pull(skb, offset);
-               skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
+               __pskb_pull(skb, tpi->hdr_len);
+               skb_postpull_rcsum(skb, skb_transport_header(skb), 
tpi->hdr_len);
                skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
                if (ipv4_is_multicast(iph->daddr)) {
@@ -674,20 +577,20 @@ static int ipgre_rcv(struct sk_buff *skb)
                }
 #endif
 
-               if (((flags&GRE_CSUM) && csum) ||
-                   (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
+               if (((tpi->flags&GRE_CSUM) && tpi->csum) ||
+                   (!(tpi->flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) 
{
                        tunnel->dev->stats.rx_crc_errors++;
                        tunnel->dev->stats.rx_errors++;
                        goto drop;
                }
                if (tunnel->parms.i_flags&GRE_SEQ) {
-                       if (!(flags&GRE_SEQ) ||
-                           (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) 
< 0)) {
+                       if (!(tpi->flags&GRE_SEQ) ||
+                           (tunnel->i_seqno && (s32)(tpi->seq - 
tunnel->i_seqno) < 0)) {
                                tunnel->dev->stats.rx_fifo_errors++;
                                tunnel->dev->stats.rx_errors++;
                                goto drop;
                        }
-                       tunnel->i_seqno = seqno + 1;
+                       tunnel->i_seqno = tpi->seq + 1;
                }
 
                /* Warning: All skb pointers will be invalidated! */
@@ -1373,7 +1276,7 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
 }
 
 
-static const struct gre_protocol ipgre_protocol = {
+static const struct gre_protocol_v0 ipgre_protocol = {
        .handler     = ipgre_rcv,
        .err_handler = ipgre_err,
 };
@@ -1771,7 +1674,7 @@ static int __init ipgre_init(void)
        if (err < 0)
                return err;
 
-       err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
+       err = gre_add_protocol_v0(&ipgre_protocol, 0);
        if (err < 0) {
                pr_info("%s: can't add protocol\n", __func__);
                goto add_proto_failed;
@@ -1791,7 +1694,7 @@ out:
 tap_ops_failed:
        rtnl_link_unregister(&ipgre_link_ops);
 rtnl_link_failed:
-       gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
+       gre_del_protocol_v0(&ipgre_protocol, 0);
 add_proto_failed:
        unregister_pernet_device(&ipgre_net_ops);
        goto out;
@@ -1801,7 +1704,7 @@ static void __exit ipgre_fini(void)
 {
        rtnl_link_unregister(&ipgre_tap_ops);
        rtnl_link_unregister(&ipgre_link_ops);
-       if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
+       if (gre_del_protocol_v0(&ipgre_protocol, 0) < 0)
                pr_info("%s: can't remove protocol\n", __func__);
        unregister_pernet_device(&ipgre_net_ops);
 }
-- 
1.7.10

_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev

Reply via email to