[PATCH 11/12] [IPSEC]: Reinject packet instead of calling netfilter directly on input

2007-10-16 Thread Herbert Xu
[IPSEC]: Reinject packet instead of calling netfilter directly on input

Currently we call netfilter directly on input after a series of transport
mode transforms (and BEET but that's a separate bug).  This is inconsistent
because other parts of the stack such AF_PACKET cannot see the decapsulated
packet.  In fact this is a common complaint about the Linux IPsec stack.

Another problem is that there is a potential for stack overflow if we
encounter a DNAT rule which turns a foreign packet into a local one that
contains another transport mode SA.

This patch introduces a major behavioural change by reinjecting the
packet instead of calling netfilter directly.

This solves both of the aformentioned problems.

It is still inconsistent with how we do things on output since we don't
pass things through AF_PACKET there either but the same inconsistency
exists for tunnel mode too so it's not a new problem.

To make things easier I've added a new function called netif_rerx which
resets netfilter and the dst before reinjecting the packet using netif_rx.
This can be used by other tunnel code as well.

I haven't added a reinject function for RO mode since it can never be
called on that path and if it does we want to know about it through an
OOPS.

Signed-off-by: Herbert Xu [EMAIL PROTECTED]
---

 include/linux/netdevice.h   |1 +
 include/net/xfrm.h  |8 
 net/core/dev.c  |   12 
 net/ipv4/xfrm4_input.c  |   24 ++--
 net/ipv4/xfrm4_mode_beet.c  |7 +++
 net/ipv4/xfrm4_mode_transport.c |   11 +++
 net/ipv4/xfrm4_mode_tunnel.c|7 +++
 net/ipv6/xfrm6_input.c  |   23 ++-
 net/ipv6/xfrm6_mode_beet.c  |7 +++
 net/ipv6/xfrm6_mode_transport.c |   10 ++
 net/ipv6/xfrm6_mode_tunnel.c|7 +++
 11 files changed, 74 insertions(+), 43 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 39dd83b..097f911 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1039,6 +1039,7 @@ extern void dev_kfree_skb_any(struct sk_buff *skb);
 #define HAVE_NETIF_RX 1
 extern int netif_rx(struct sk_buff *skb);
 extern int netif_rx_ni(struct sk_buff *skb);
+extern int netif_rerx(struct sk_buff *skb);
 #define HAVE_NETIF_RECEIVE_SKB 1
 extern int netif_receive_skb(struct sk_buff *skb);
 extern int dev_valid_name(const char *name);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a9e8247..e5ae5fa 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -311,6 +311,14 @@ struct xfrm_mode {
 */
int (*output)(struct xfrm_state *x,struct sk_buff *skb);
 
+   /*
+* Reinject packet into stack.
+*
+* On entry, the packet is in the state as on exit from the
+* input function above.
+*/
+   int (*reinject)(struct xfrm_state *x,struct sk_buff *skb);
+
struct xfrm_state_afinfo *afinfo;
struct module *owner;
unsigned int encap;
diff --git a/net/core/dev.c b/net/core/dev.c
index 38b03da..b753ec8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1808,6 +1808,18 @@ int netif_rx_ni(struct sk_buff *skb)
 
 EXPORT_SYMBOL(netif_rx_ni);
 
+/* Reinject a packet that has previously been processed, e.g., by tunneling. */
+int netif_rerx(struct sk_buff *skb)
+{
+   nf_reset(skb);
+
+   dst_release(skb-dst);
+   skb-dst = NULL;
+
+   return netif_rx(skb);
+}
+EXPORT_SYMBOL(netif_rerx);
+
 static inline struct net_device *skb_bond(struct sk_buff *skb)
 {
struct net_device *dev = skb-dev;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 5cb0b59..f5576d5 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -41,7 +41,6 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 
spi,
struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
struct xfrm_state *x;
int xfrm_nr = 0;
-   int decaps = 0;
unsigned int nhoff = offsetof(struct iphdr, protocol);
 
seq = 0;
@@ -95,7 +94,6 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 
spi,
goto drop;
 
if (x-props.mode == XFRM_MODE_TUNNEL) {
-   decaps = 1;
break;
}
 
@@ -122,26 +120,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, 
__be32 spi,
   xfrm_nr * sizeof(xfrm_vec[0]));
skb-sp-len += xfrm_nr;
 
-   nf_reset(skb);
-
-   if (decaps) {
-   dst_release(skb-dst);
-   skb-dst = NULL;
-   netif_rx(skb);
-   return 0;
-   } else {
-#ifdef CONFIG_NETFILTER
-   __skb_push(skb, skb-data - skb_network_header(skb));
-   ip_hdr(skb)-tot_len = htons(skb-len);
-   ip_send_check(ip_hdr(skb));
-
-   NF_HOOK(PF_INET, 

Re: [PATCH 11/12] [IPSEC]: Reinject packet instead of calling netfilter directly on input

2007-10-16 Thread YOSHIFUJI Hideaki / 吉藤英明
Herbert,

I think with this change, we parse extension headers, twice.
We really do not want to do this.

--yoshfuji

In article [EMAIL PROTECTED] (at Tue, 16 Oct 2007 22:33:19 +0800), Herbert Xu 
[EMAIL PROTECTED] says:

 [IPSEC]: Reinject packet instead of calling netfilter directly on input
 
 Currently we call netfilter directly on input after a series of transport
 mode transforms (and BEET but that's a separate bug).  This is inconsistent
 because other parts of the stack such AF_PACKET cannot see the decapsulated
 packet.  In fact this is a common complaint about the Linux IPsec stack.
 
 Another problem is that there is a potential for stack overflow if we
 encounter a DNAT rule which turns a foreign packet into a local one that
 contains another transport mode SA.
 
 This patch introduces a major behavioural change by reinjecting the
 packet instead of calling netfilter directly.
 
 This solves both of the aformentioned problems.
 
 It is still inconsistent with how we do things on output since we don't
 pass things through AF_PACKET there either but the same inconsistency
 exists for tunnel mode too so it's not a new problem.
 
 To make things easier I've added a new function called netif_rerx which
 resets netfilter and the dst before reinjecting the packet using netif_rx.
 This can be used by other tunnel code as well.
 
 I haven't added a reinject function for RO mode since it can never be
 called on that path and if it does we want to know about it through an
 OOPS.
 
 Signed-off-by: Herbert Xu [EMAIL PROTECTED]
 ---
 
  include/linux/netdevice.h   |1 +
  include/net/xfrm.h  |8 
  net/core/dev.c  |   12 
  net/ipv4/xfrm4_input.c  |   24 ++--
  net/ipv4/xfrm4_mode_beet.c  |7 +++
  net/ipv4/xfrm4_mode_transport.c |   11 +++
  net/ipv4/xfrm4_mode_tunnel.c|7 +++
  net/ipv6/xfrm6_input.c  |   23 ++-
  net/ipv6/xfrm6_mode_beet.c  |7 +++
  net/ipv6/xfrm6_mode_transport.c |   10 ++
  net/ipv6/xfrm6_mode_tunnel.c|7 +++
  11 files changed, 74 insertions(+), 43 deletions(-)
 
 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
 index 39dd83b..097f911 100644
 --- a/include/linux/netdevice.h
 +++ b/include/linux/netdevice.h
 @@ -1039,6 +1039,7 @@ extern void dev_kfree_skb_any(struct sk_buff *skb);
  #define HAVE_NETIF_RX 1
  extern int   netif_rx(struct sk_buff *skb);
  extern int   netif_rx_ni(struct sk_buff *skb);
 +extern int   netif_rerx(struct sk_buff *skb);
  #define HAVE_NETIF_RECEIVE_SKB 1
  extern int   netif_receive_skb(struct sk_buff *skb);
  extern int   dev_valid_name(const char *name);
 diff --git a/include/net/xfrm.h b/include/net/xfrm.h
 index a9e8247..e5ae5fa 100644
 --- a/include/net/xfrm.h
 +++ b/include/net/xfrm.h
 @@ -311,6 +311,14 @@ struct xfrm_mode {
*/
   int (*output)(struct xfrm_state *x,struct sk_buff *skb);
  
 + /*
 +  * Reinject packet into stack.
 +  *
 +  * On entry, the packet is in the state as on exit from the
 +  * input function above.
 +  */
 + int (*reinject)(struct xfrm_state *x,struct sk_buff *skb);
 +
   struct xfrm_state_afinfo *afinfo;
   struct module *owner;
   unsigned int encap;
 diff --git a/net/core/dev.c b/net/core/dev.c
 index 38b03da..b753ec8 100644
 --- a/net/core/dev.c
 +++ b/net/core/dev.c
 @@ -1808,6 +1808,18 @@ int netif_rx_ni(struct sk_buff *skb)
  
  EXPORT_SYMBOL(netif_rx_ni);
  
 +/* Reinject a packet that has previously been processed, e.g., by tunneling. 
 */
 +int netif_rerx(struct sk_buff *skb)
 +{
 + nf_reset(skb);
 +
 + dst_release(skb-dst);
 + skb-dst = NULL;
 +
 + return netif_rx(skb);
 +}
 +EXPORT_SYMBOL(netif_rerx);
 +
  static inline struct net_device *skb_bond(struct sk_buff *skb)
  {
   struct net_device *dev = skb-dev;
 diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
 index 5cb0b59..f5576d5 100644
 --- a/net/ipv4/xfrm4_input.c
 +++ b/net/ipv4/xfrm4_input.c
 @@ -41,7 +41,6 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, 
 __be32 spi,
   struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
   struct xfrm_state *x;
   int xfrm_nr = 0;
 - int decaps = 0;
   unsigned int nhoff = offsetof(struct iphdr, protocol);
  
   seq = 0;
 @@ -95,7 +94,6 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, 
 __be32 spi,
   goto drop;
  
   if (x-props.mode == XFRM_MODE_TUNNEL) {
 - decaps = 1;
   break;
   }
  
 @@ -122,26 +120,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, 
 __be32 spi,
  xfrm_nr * sizeof(xfrm_vec[0]));
   skb-sp-len += xfrm_nr;
  
 - nf_reset(skb);
 -
 - if (decaps) {
 - dst_release(skb-dst);
 - skb-dst = NULL;
 - netif_rx(skb);
 

Re: [PATCH 11/12] [IPSEC]: Reinject packet instead of calling netfilter directly on input

2007-10-16 Thread Herbert Xu
On Wed, Oct 17, 2007 at 12:05:47AM +0900, YOSHIFUJI Hideaki / 吉藤英明 wrote:
 
 I think with this change, we parse extension headers, twice.
 We really do not want to do this.

Good point.  I'll need to think of some other way to do this then.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html