Re: [PATCH net-next] tcp: enhance tcp_collapse_retrans() with skb_shift()

2016-11-24 Thread Eric Dumazet
On Thu, 2016-11-24 at 15:42 -0500, David Miller wrote:
> From: Eric Dumazet 
> Date: Tue, 22 Nov 2016 10:57:33 -0800
> 
> > David, patch is marked 'Superseded' in
> > https://patchwork.ozlabs.org/patch/695264/
> > 
> > Not sure what this means exactly ?
> > Did I miss a mail/feedback/something ?
> 
> I must have mistakenly marked it that way, sorry.
> 
> Applied to net-next, thanks Eric.

Thanks David !




Re: [PATCH net-next] tcp: enhance tcp_collapse_retrans() with skb_shift()

2016-11-24 Thread David Miller
From: Eric Dumazet 
Date: Tue, 22 Nov 2016 10:57:33 -0800

> David, patch is marked 'Superseded' in
> https://patchwork.ozlabs.org/patch/695264/
> 
> Not sure what this means exactly ?
> Did I miss a mail/feedback/something ?

I must have mistakenly marked it that way, sorry.

Applied to net-next, thanks Eric.


Re: [PATCH net-next] tcp: enhance tcp_collapse_retrans() with skb_shift()

2016-11-23 Thread Yuchung Cheng
On Tue, Nov 15, 2016 at 12:51 PM, Eric Dumazet  wrote:
>
> From: Eric Dumazet 
>
> In commit 2331ccc5b323 ("tcp: enhance tcp collapsing"),
> we made a first step allowing copying right skb to left skb head.
>
> Since all skbs in socket write queue are headless (but possibly the very
> first one), this strategy often does not work.
>
> This patch extends tcp_collapse_retrans() to perform frag shifting,
> thanks to skb_shift() helper.
>
> This helper needs to not BUG on non headless skbs, as callers are ok
> with that.
>
> Tested:
>
> Following packetdrill test now passes :
>
> 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
>+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
>+0 bind(3, ..., ...) = 0
>+0 listen(3, 1) = 0
>
>+0 < S 0:0(0) win 32792 
>+0 > S. 0:0(0) ack 1 
> +.100 < . 1:1(0) ack 1 win 257
>+0 accept(3, ..., ...) = 4
>
>+0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
>+0 write(4, ..., 200) = 200
>+0 > P. 1:201(200) ack 1
> +.001 write(4, ..., 200) = 200
>+0 > P. 201:401(200) ack 1
> +.001 write(4, ..., 200) = 200
>+0 > P. 401:601(200) ack 1
> +.001 write(4, ..., 200) = 200
>+0 > P. 601:801(200) ack 1
> +.001 write(4, ..., 200) = 200
>+0 > P. 801:1001(200) ack 1
> +.001 write(4, ..., 100) = 100
>+0 > P. 1001:1101(100) ack 1
> +.001 write(4, ..., 100) = 100
>+0 > P. 1101:1201(100) ack 1
> +.001 write(4, ..., 100) = 100
>+0 > P. 1201:1301(100) ack 1
> +.001 write(4, ..., 100) = 100
>+0 > P. 1301:1401(100) ack 1
>
> +.099 < . 1:1(0) ack 201 win 257
> +.001 < . 1:1(0) ack 201 win 257 
>+0 > P. 201:1001(800) ack 1
>
> Signed-off-by: Eric Dumazet 
> Cc: Neal Cardwell 
> Cc: Yuchung Cheng 
Acked-by: Yuchung Cheng 

Nice follow-up patch. This also works well with RACK loss detection
since RACK only cares about time (skb_mstamp) not sequence so
collapsing sequences is not a problem.

> ---
>  net/core/skbuff.c |4 +++-
>  net/ipv4/tcp_output.c |   22 +++---
>  2 files changed, 14 insertions(+), 12 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 
> 0b2a6e94af2de73ed638634c47a0fb71e2cbc1cb..a9cb81a10c4ba895587727aa4cf098e9a38424ea
>  100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -2656,7 +2656,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, 
> int shiftlen)
> struct skb_frag_struct *fragfrom, *fragto;
>
> BUG_ON(shiftlen > skb->len);
> -   BUG_ON(skb_headlen(skb));   /* Would corrupt stream */
> +
> +   if (skb_headlen(skb))
> +   return 0;
>
> todo = shiftlen;
> from = 0;
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 
> f57b5aa51b59cf0a58975fe34a7dcdb886ea8c50..19105b46a30436ebb85fe97ee43089e77aa028bb
>  100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -2514,7 +2514,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
>  }
>
>  /* Collapses two adjacent SKB's during retransmission. */
> -static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
> +static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
>  {
> struct tcp_sock *tp = tcp_sk(sk);
> struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
> @@ -2525,14 +2525,17 @@ static void tcp_collapse_retrans(struct sock *sk, 
> struct sk_buff *skb)
>
> BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
>
> +   if (next_skb_size) {
> +   if (next_skb_size <= skb_availroom(skb))
> +   skb_copy_bits(next_skb, 0, skb_put(skb, 
> next_skb_size),
> + next_skb_size);
> +   else if (!skb_shift(skb, next_skb, next_skb_size))
> +   return false;
> +   }
> tcp_highest_sack_combine(sk, next_skb, skb);
>
> tcp_unlink_write_queue(next_skb, sk);
>
> -   if (next_skb_size)
> -   skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
> - next_skb_size);
> -
> if (next_skb->ip_summed == CHECKSUM_PARTIAL)
> skb->ip_summed = CHECKSUM_PARTIAL;
>
> @@ -2561,6 +2564,7 @@ static void tcp_collapse_retrans(struct sock *sk, 
> struct sk_buff *skb)
> tcp_skb_collapse_tstamp(skb, next_skb);
>
> sk_wmem_free_skb(sk, next_skb);
> +   return true;
>  }
>
>  /* Check if coalescing SKBs is legal. */
> @@ -2610,16 +2614,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, 
> struct sk_buff *to,
>
> if (space < 0)
> break;
> -   /* Punt if not enough space exists in the first SKB for
> -* the data in the second
> -*/
> -   if (skb->len > skb_availroom(to))
> -   break;
>
>  

Re: [PATCH net-next] tcp: enhance tcp_collapse_retrans() with skb_shift()

2016-11-22 Thread Eric Dumazet
On Tue, 2016-11-15 at 12:51 -0800, Eric Dumazet wrote:
> From: Eric Dumazet 
> 
> In commit 2331ccc5b323 ("tcp: enhance tcp collapsing"),
> we made a first step allowing copying right skb to left skb head.
> 
> Since all skbs in socket write queue are headless (but possibly the very
> first one), this strategy often does not work.
> 
> This patch extends tcp_collapse_retrans() to perform frag shifting,
> thanks to skb_shift() helper.
> 
> This helper needs to not BUG on non headless skbs, as callers are ok
> with that.
> 
> Tested:
> 
> Following packetdrill test now passes :
> 
> 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
>+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
>+0 bind(3, ..., ...) = 0
>+0 listen(3, 1) = 0
> 
>+0 < S 0:0(0) win 32792 
>+0 > S. 0:0(0) ack 1 
> +.100 < . 1:1(0) ack 1 win 257
>+0 accept(3, ..., ...) = 4
> 
>+0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
>+0 write(4, ..., 200) = 200
>+0 > P. 1:201(200) ack 1
> +.001 write(4, ..., 200) = 200
>+0 > P. 201:401(200) ack 1
> +.001 write(4, ..., 200) = 200
>+0 > P. 401:601(200) ack 1
> +.001 write(4, ..., 200) = 200
>+0 > P. 601:801(200) ack 1
> +.001 write(4, ..., 200) = 200
>+0 > P. 801:1001(200) ack 1
> +.001 write(4, ..., 100) = 100
>+0 > P. 1001:1101(100) ack 1
> +.001 write(4, ..., 100) = 100
>+0 > P. 1101:1201(100) ack 1
> +.001 write(4, ..., 100) = 100
>+0 > P. 1201:1301(100) ack 1
> +.001 write(4, ..., 100) = 100
>+0 > P. 1301:1401(100) ack 1
> 
> +.099 < . 1:1(0) ack 201 win 257
> +.001 < . 1:1(0) ack 201 win 257 
>+0 > P. 201:1001(800) ack 1
> 
> Signed-off-by: Eric Dumazet 
> Cc: Neal Cardwell 
> Cc: Yuchung Cheng 
> ---
>  net/core/skbuff.c |4 +++-
>  net/ipv4/tcp_output.c |   22 +++---
>  2 files changed, 14 insertions(+), 12 deletions(-)
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 
> 0b2a6e94af2de73ed638634c47a0fb71e2cbc1cb..a9cb81a10c4ba895587727aa4cf098e9a38424ea
>  100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -2656,7 +2656,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, 
> int shiftlen)
>   struct skb_frag_struct *fragfrom, *fragto;
>  
>   BUG_ON(shiftlen > skb->len);
> - BUG_ON(skb_headlen(skb));   /* Would corrupt stream */
> +
> + if (skb_headlen(skb))
> + return 0;
>  
>   todo = shiftlen;
>   from = 0;
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 
> f57b5aa51b59cf0a58975fe34a7dcdb886ea8c50..19105b46a30436ebb85fe97ee43089e77aa028bb
>  100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -2514,7 +2514,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
>  }
>  
>  /* Collapses two adjacent SKB's during retransmission. */
> -static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
> +static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
>  {
>   struct tcp_sock *tp = tcp_sk(sk);
>   struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
> @@ -2525,14 +2525,17 @@ static void tcp_collapse_retrans(struct sock *sk, 
> struct sk_buff *skb)
>  
>   BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
>  
> + if (next_skb_size) {
> + if (next_skb_size <= skb_availroom(skb))
> + skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
> +   next_skb_size);
> + else if (!skb_shift(skb, next_skb, next_skb_size))
> + return false;
> + }
>   tcp_highest_sack_combine(sk, next_skb, skb);
>  
>   tcp_unlink_write_queue(next_skb, sk);
>  
> - if (next_skb_size)
> - skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
> -   next_skb_size);
> -
>   if (next_skb->ip_summed == CHECKSUM_PARTIAL)
>   skb->ip_summed = CHECKSUM_PARTIAL;
>  
> @@ -2561,6 +2564,7 @@ static void tcp_collapse_retrans(struct sock *sk, 
> struct sk_buff *skb)
>   tcp_skb_collapse_tstamp(skb, next_skb);
>  
>   sk_wmem_free_skb(sk, next_skb);
> + return true;
>  }
>  
>  /* Check if coalescing SKBs is legal. */
> @@ -2610,16 +2614,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, 
> struct sk_buff *to,
>  
>   if (space < 0)
>   break;
> - /* Punt if not enough space exists in the first SKB for
> -  * the data in the second
> -  */
> - if (skb->len > skb_availroom(to))
> - break;
>  
>   if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
>   break;
>  
> - tcp_collapse_retrans(sk, to);
> + if (!tcp_collapse_retrans(sk, to))
> + break;
>   }
>  }
>  


David, patch is marked 'Superseded' 

[PATCH net-next] tcp: enhance tcp_collapse_retrans() with skb_shift()

2016-11-15 Thread Eric Dumazet
From: Eric Dumazet 

In commit 2331ccc5b323 ("tcp: enhance tcp collapsing"),
we made a first step allowing copying right skb to left skb head.

Since all skbs in socket write queue are headless (but possibly the very
first one), this strategy often does not work.

This patch extends tcp_collapse_retrans() to perform frag shifting,
thanks to skb_shift() helper.

This helper needs to not BUG on non headless skbs, as callers are ok
with that.

Tested:

Following packetdrill test now passes :

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

   +0 < S 0:0(0) win 32792 
   +0 > S. 0:0(0) ack 1 
+.100 < . 1:1(0) ack 1 win 257
   +0 accept(3, ..., ...) = 4

   +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
   +0 write(4, ..., 200) = 200
   +0 > P. 1:201(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 201:401(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 401:601(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 601:801(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 801:1001(200) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1001:1101(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1101:1201(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1201:1301(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1301:1401(100) ack 1

+.099 < . 1:1(0) ack 201 win 257
+.001 < . 1:1(0) ack 201 win 257 
   +0 > P. 201:1001(800) ack 1

Signed-off-by: Eric Dumazet 
Cc: Neal Cardwell 
Cc: Yuchung Cheng 
---
 net/core/skbuff.c |4 +++-
 net/ipv4/tcp_output.c |   22 +++---
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 
0b2a6e94af2de73ed638634c47a0fb71e2cbc1cb..a9cb81a10c4ba895587727aa4cf098e9a38424ea
 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2656,7 +2656,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, 
int shiftlen)
struct skb_frag_struct *fragfrom, *fragto;
 
BUG_ON(shiftlen > skb->len);
-   BUG_ON(skb_headlen(skb));   /* Would corrupt stream */
+
+   if (skb_headlen(skb))
+   return 0;
 
todo = shiftlen;
from = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 
f57b5aa51b59cf0a58975fe34a7dcdb886ea8c50..19105b46a30436ebb85fe97ee43089e77aa028bb
 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2514,7 +2514,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
 }
 
 /* Collapses two adjacent SKB's during retransmission. */
-static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
+static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 {
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
@@ -2525,14 +2525,17 @@ static void tcp_collapse_retrans(struct sock *sk, 
struct sk_buff *skb)
 
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
 
+   if (next_skb_size) {
+   if (next_skb_size <= skb_availroom(skb))
+   skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
+ next_skb_size);
+   else if (!skb_shift(skb, next_skb, next_skb_size))
+   return false;
+   }
tcp_highest_sack_combine(sk, next_skb, skb);
 
tcp_unlink_write_queue(next_skb, sk);
 
-   if (next_skb_size)
-   skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
- next_skb_size);
-
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_PARTIAL;
 
@@ -2561,6 +2564,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct 
sk_buff *skb)
tcp_skb_collapse_tstamp(skb, next_skb);
 
sk_wmem_free_skb(sk, next_skb);
+   return true;
 }
 
 /* Check if coalescing SKBs is legal. */
@@ -2610,16 +2614,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, 
struct sk_buff *to,
 
if (space < 0)
break;
-   /* Punt if not enough space exists in the first SKB for
-* the data in the second
-*/
-   if (skb->len > skb_availroom(to))
-   break;
 
if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
break;
 
-   tcp_collapse_retrans(sk, to);
+   if (!tcp_collapse_retrans(sk, to))
+   break;
}
 }