Re: IP_PKTINFO broken by acf568ee859f0 (xfrm: Reinject transport-mode packets through tasklet)

2018-05-01 Thread Steffen Klassert
On Fri, Apr 27, 2018 at 11:16:53PM +0200, Maxime Bizon wrote:
> 
> Hello Herbert,
> 
> That patch just went into stable 4.14 and is causing a regression on my
> setup.
> 
> Basically, IP_PKTINFO does not work anymore on transport-mode packets,
> because skb->cb is now used to store the finish callback.
> 
> Was that expected or is it an unforeseen side effect ?

This should be fixed by:

commit 9a3fb9fb84cc ("xfrm: Fix transport mode skb control buffer usage.")



IP_PKTINFO broken by acf568ee859f0 (xfrm: Reinject transport-mode packets through tasklet)

2018-04-27 Thread Maxime Bizon

Hello Herbert,

That patch just went into stable 4.14 and is causing a regression on my
setup.

Basically, IP_PKTINFO does not work anymore on transport-mode packets,
because skb->cb is now used to store the finish callback.

Was that expected or is it an unforeseen side effect ?

Thanks,

-- 
Maxime




[PATCH 8/8] xfrm: Reinject transport-mode packets through tasklet

2017-12-22 Thread Steffen Klassert
From: Herbert Xu 

This is an old bugbear of mine:

https://www.mail-archive.com/netdev@vger.kernel.org/msg03894.html

By crafting special packets, it is possible to cause recursion
in our kernel when processing transport-mode packets at levels
that are only limited by packet size.

The easiest one is with DNAT, but an even worse one is where
UDP encapsulation is used in which case you just have to insert
an UDP encapsulation header in between each level of recursion.

This patch avoids this problem by reinjecting tranport-mode packets
through a tasklet.

Fixes: b05e106698d9 ("[IPV4/6]: Netfilter IPsec input hooks")
Signed-off-by: Herbert Xu 
Signed-off-by: Steffen Klassert 
---
 include/net/xfrm.h |  3 +++
 net/ipv4/xfrm4_input.c | 12 ++-
 net/ipv6/xfrm6_input.c | 10 -
 net/xfrm/xfrm_input.c  | 57 ++
 4 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dc28a98ce97c..ae35991b5877 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1570,6 +1570,9 @@ int xfrm_init_state(struct xfrm_state *x);
 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
 int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
+int xfrm_trans_queue(struct sk_buff *skb,
+int (*finish)(struct net *, struct sock *,
+  struct sk_buff *));
 int xfrm_output_resume(struct sk_buff *skb, int err);
 int xfrm_output(struct sock *sk, struct sk_buff *skb);
 int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e50b7fea57ee..bcfc00e88756 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff 
*skb)
return xfrm4_extract_header(skb);
 }
 
+static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
+  struct sk_buff *skb)
+{
+   return dst_input(skb);
+}
+
 static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
 struct sk_buff *skb)
 {
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, 
struct sock *sk,
 iph->tos, skb->dev))
goto drop;
}
-   return dst_input(skb);
+
+   if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
+   goto drop;
+
+   return 0;
 drop:
kfree_skb(skb);
return NET_RX_DROP;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index fe04e23af986..841f4a07438e 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 
spi,
 }
 EXPORT_SYMBOL(xfrm6_rcv_spi);
 
+static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
+  struct sk_buff *skb)
+{
+   if (xfrm_trans_queue(skb, ip6_rcv_finish))
+   __kfree_skb(skb);
+   return -1;
+}
+
 int xfrm6_transport_finish(struct sk_buff *skb, int async)
 {
struct xfrm_offload *xo = xfrm_offload(skb);
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
-   ip6_rcv_finish);
+   xfrm6_transport_finish2);
return -1;
 }
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index da6447389ffb..3f6f6f8c9fa5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -8,15 +8,29 @@
  *
  */
 
+#include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 
+struct xfrm_trans_tasklet {
+   struct tasklet_struct tasklet;
+   struct sk_buff_head queue;
+};
+
+struct xfrm_trans_cb {
+   int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
+};
+
+#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
+
 static struct kmem_cache *secpath_cachep __read_mostly;
 
 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu 
*xfrm_input_afinfo[AF_INET6 + 1];
 static struct gro_cells gro_cells;
 static struct net_device xfrm_napi_dev;
 
+static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
+
 int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
 {
int err = 0;
@@ -477,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
 }
 EXPORT_SYMBOL(xfrm_input_resume);
 
+static void xfrm_trans_reinject(unsigned long data)
+{
+   struct xfrm_trans_tasklet *trans = (void *)data;
+   struct sk_buff_head queue;
+   struct sk_buff *skb;
+
+   __skb_queue_head_ini

Re: xfrm: Reinject transport-mode packets through tasklet

2017-12-19 Thread Steffen Klassert
On Fri, Dec 15, 2017 at 04:40:44PM +1100, Herbert Xu wrote:
> This is an old bugbear of mine:
> 
> https://www.mail-archive.com/netdev@vger.kernel.org/msg03894.html
> 
> By crafting special packets, it is possible to cause recursion
> in our kernel when processing transport-mode packets at levels
> that are only limited by packet size.
> 
> The easiest one is with DNAT, but an even worse one is where
> UDP encapsulation is used in which case you just have to insert
> an UDP encapsulation header in between each level of recursion.
> 
> This patch avoids this problem by reinjecting tranport-mode packets
> through a tasklet.
> 
> Fixes: b05e106698d9 ("[IPV4/6]: Netfilter IPsec input hooks")
> Signed-off-by: Herbert Xu 

Applied to the ipsec tree, thanks Herbert!


xfrm: Reinject transport-mode packets through tasklet

2017-12-14 Thread Herbert Xu
This is an old bugbear of mine:

https://www.mail-archive.com/netdev@vger.kernel.org/msg03894.html

By crafting special packets, it is possible to cause recursion
in our kernel when processing transport-mode packets at levels
that are only limited by packet size.

The easiest one is with DNAT, but an even worse one is where
UDP encapsulation is used in which case you just have to insert
an UDP encapsulation header in between each level of recursion.

This patch avoids this problem by reinjecting tranport-mode packets
through a tasklet.

Fixes: b05e106698d9 ("[IPV4/6]: Netfilter IPsec input hooks")
Signed-off-by: Herbert Xu 

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dc28a98..ae35991 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1570,6 +1570,9 @@ struct xfrmk_spdinfo {
 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
 int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
+int xfrm_trans_queue(struct sk_buff *skb,
+int (*finish)(struct net *, struct sock *,
+  struct sk_buff *));
 int xfrm_output_resume(struct sk_buff *skb, int err);
 int xfrm_output(struct sock *sk, struct sk_buff *skb);
 int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e50b7fe..bcfc00e 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff 
*skb)
return xfrm4_extract_header(skb);
 }
 
+static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
+  struct sk_buff *skb)
+{
+   return dst_input(skb);
+}
+
 static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
 struct sk_buff *skb)
 {
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, 
struct sock *sk,
 iph->tos, skb->dev))
goto drop;
}
-   return dst_input(skb);
+
+   if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
+   goto drop;
+
+   return 0;
 drop:
kfree_skb(skb);
return NET_RX_DROP;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index fe04e23..841f4a0 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 
spi,
 }
 EXPORT_SYMBOL(xfrm6_rcv_spi);
 
+static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
+  struct sk_buff *skb)
+{
+   if (xfrm_trans_queue(skb, ip6_rcv_finish))
+   __kfree_skb(skb);
+   return -1;
+}
+
 int xfrm6_transport_finish(struct sk_buff *skb, int async)
 {
struct xfrm_offload *xo = xfrm_offload(skb);
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
-   ip6_rcv_finish);
+   xfrm6_transport_finish2);
return -1;
 }
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 347ab31..444fa37 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -8,15 +8,29 @@
  *
  */
 
+#include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 
+struct xfrm_trans_tasklet {
+   struct tasklet_struct tasklet;
+   struct sk_buff_head queue;
+};
+
+struct xfrm_trans_cb {
+   int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
+};
+
+#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
+
 static struct kmem_cache *secpath_cachep __read_mostly;
 
 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@ -25,6 +39,8 @@
 static struct gro_cells gro_cells;
 static struct net_device xfrm_napi_dev;
 
+static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
+
 int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
 {
int err = 0;
@@ -467,9 +483,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
 }
 EXPORT_SYMBOL(xfrm_input_resume);
 
+static void xfrm_trans_reinject(unsigned long data)
+{
+   struct xfrm_trans_tasklet *trans = (void *)data;
+   struct sk_buff_head queue;
+   struct sk_buff *skb;
+
+   __skb_queue_head_init(&queue);
+   skb_queue_splice_init(&trans->queue, &queue);
+
+   while ((skb = __skb_dequeue(&queue)))
+   XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
+}
+
+int xfrm_trans_queue(struct sk_buff *skb,
+int (*finish)(struct net *, struct sock *,
+  struct sk_buff *))
+{
+   struct xfrm_trans_tasklet *trans;
+
+   trans = this_cpu_ptr(&xfrm_