[PATCH] TCP Compound: dwnd=0 on ssthresh

2006-07-05 Thread Angelo P. Castellani

In the TCP Compound article used as a reference for the implementation, we read:
"If a retransmission timeout occurs, dwnd should be reset to zero and
the delay-based component is disabled." at page 5 of
ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf

The attached patch implements this requirement.

Regards,
Angelo P. Castellani
diff -urd a/net/ipv4/tcp_compound.c b/net/ipv4/tcp_compound.c
--- a/net/ipv4/tcp_compound.c	2006-07-05 17:19:28.0 +0200
+++ b/net/ipv4/tcp_compound.c	2006-07-05 17:20:42.0 +0200
@@ -221,12 +221,9 @@
 		tcp_compound_init(sk);
 }
 
-static void tcp_compound_cong_avoid(struct sock *sk, u32 ack,
-u32 seq_rtt, u32 in_flight, int flag)
-{
+static inline void tcp_compound_synch(struct sock *sk) {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct compound *vegas = inet_csk_ca(sk);
-	u8 inc = 0;
 
 	if (vegas->cwnd + vegas->dwnd > tp->snd_cwnd) {
 		if (vegas->cwnd > tp->snd_cwnd || vegas->dwnd > tp->snd_cwnd) {
@@ -234,9 +231,19 @@
 			vegas->dwnd = 0;
 		} else
 			vegas->cwnd = tp->snd_cwnd - vegas->dwnd;
-
 	}
 
+}
+
+static void tcp_compound_cong_avoid(struct sock *sk, u32 ack,
+u32 seq_rtt, u32 in_flight, int flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct compound *vegas = inet_csk_ca(sk);
+	u8 inc = 0;
+	
+	tcp_compound_synch(sk);
+
 	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
@@ -415,9 +422,21 @@
 	}
 }
 
+static u32 tcp_compound_ssthresh(struct sock *sk) {
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct compound *vegas = inet_csk_ca(sk);
+
+	tcp_compound_synch(sk);
+	
+	vegas->dwnd = 0;
+	tp->snd_cwnd = vegas->cwnd;
+	
+	return tcp_reno_ssthresh(sk);
+}
+
 static struct tcp_congestion_ops tcp_compound = {
 	.init		= tcp_compound_init,
-	.ssthresh	= tcp_reno_ssthresh,
+	.ssthresh	= tcp_compound_ssthresh,
 	.cong_avoid	= tcp_compound_cong_avoid,
 	.min_cwnd	= tcp_reno_min_cwnd,
 	.rtt_sample	= tcp_compound_rtt_calc,


Re: [PATCH] TCP Compound

2006-05-25 Thread Stephen Hemminger
The existing code did a 64 bit divide directly, which won't work on
32 bit platforms.  This is what I am testing, it uses math similar to
TCP CUBIC to do a quad root. It seemed more efficient to just do
one operation rather than two square roots.

-
diff --git a/net/ipv4/tcp_compound.c b/net/ipv4/tcp_compound.c
index 01048e2..74c26a0 100644
--- a/net/ipv4/tcp_compound.c
+++ b/net/ipv4/tcp_compound.c
@@ -52,8 +52,6 @@
 
 #define TCP_COMPOUND_ALPHA  3U
 #define TCP_COMPOUND_BETA   1U
-#define TCP_COMPOUND_KAPPA_POW  3
-#define TCP_COMPOUND_KAPPA_NSQRT2
 #define TCP_COMPOUND_GAMMA 30
 #define TCP_COMPOUND_ZETA   1
 
@@ -156,6 +154,58 @@ static void tcp_compound_state(struct so
vegas_disable(sk);
 }
 
+
+/* 64bit divisor, dividend and result. dynamic precision */
+static inline u64 div64_64(u64 dividend, u64 divisor)
+{
+   u32 d = divisor;
+
+   if (divisor > 0xULL) {
+   unsigned int shift = fls(divisor >> 32);
+
+   d = divisor >> shift;
+   dividend >>= shift;
+   }
+
+   /* avoid 64 bit division if possible */
+   if (dividend >> 32)
+   do_div(dividend, d);
+   else
+   dividend = (u32) dividend / d;
+
+   return dividend;
+}
+
+/* calculate the quartic root of "a" using Newton-Raphson */
+static u32 qroot(u64 a)
+{
+   u32 x, x1;
+
+   /* Initial estimate is based on:
+* qrt(x) = exp(log(x) / 4)
+*/
+   x = 1u << (fls64(a) >> 2);
+
+   /*
+* Iteration based on:
+* 3
+* x= ( 3 * x  +  a / x  ) / 4
+*  k+1  k k
+*/
+   do {
+   u64 x3 = x;
+
+   x1 = x;
+   x3 *= x;
+   x3 *= x;
+
+   x = (3 * x + (u32) div64_64(a, x3)) / 4;
+   } while (abs(x1 - x) > 1);
+
+   return x;
+}
+
+
 /*
  * If the connection is idle and we are restarting,
  * then we don't want to do any Vegas calculations
@@ -307,29 +357,23 @@ static void tcp_compound_cong_avoid(stru
dwnd = vegas->dwnd;
 
if (diff < (TCP_COMPOUND_GAMMA << V_PARAM_SHIFT)) {
-   u32 i, j, x, x2;
-   u64 v;
-
-   v = 1;
-
-   for (i = 0; i < TCP_COMPOUND_KAPPA_POW; i++)
-   v *= old_wnd;
-
-   for (i = 0; i < TCP_COMPOUND_KAPPA_NSQRT; i++) {
-   x = 1;
-   for (j = 0; j < 200; j++) {
-   x2 = (x + v / x) / 2;
-
-   if (x2 == x || !x2)
-   break;
-
-   x = x2;
-   }
-   v = x;
-   }
+   u64 win3;
 
-   x = (u32) v >> TCP_COMPOUND_ALPHA;
+   /*
+* The TCP Compound paper describes the choice
+* of "k" determines the agressiveness,
+* ie. slope of the response function.
+*
+* For same value as HSTCP would be 0.8
+* but for computaional reasons, both the
+* original authors and this implementation
+* use 0.75.
+*/
+   win3 = old_wnd;
+   win3 *= old_wnd;
+   win3 *= old_wnd;
 
+   x = qroot(win3) >> TCP_COMPOUND_ALPHA;
if (x > 1)
dwnd = x - 1;
else
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] TCP Compound

2006-05-25 Thread Angelo P. Castellani

From: Angelo P. Castellani <[EMAIL PROTECTED]>

TCP Compound is a sender-side only change to TCP that uses
a mixed Reno/Vegas approach to calculate the cwnd.

For further details look here:
 ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf

Signed-off-by: Angelo P. Castellani <[EMAIL PROTECTED]>

---

This new revision of the TCP Compound implementation fixes some issues
present in the previous patch and has been reverted to a stand-alone
file (thanks to Stephen suggestion).

Regards,
Angelo P. Castellani
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 326676b..e577eb8 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -542,6 +542,16 @@ config TCP_CONG_LP
 	``fair share`` of bandwidth as targeted by TCP.
 	See http://www-ece.rice.edu/networks/TCP-LP/
 
+config TCP_CONG_COMPOUND
+	tristate "TCP Compound"
+	depends on EXPERIMENTAL
+	default n
+	---help---
+	TCP Compound is a sender-side only change to TCP that uses 
+	a mixed Reno/Vegas approach to calculate the cwnd.
+	For further details look here:
+	  ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf
+
 endmenu
 
 config TCP_CONG_BIC
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 5c65487..f0697c4 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.
 obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
+obj-$(CONFIG_TCP_CONG_COMPOUND) += tcp_compound.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
 		  xfrm4_output.o










/*
 * TCP Vegas congestion control
 *
 * This is based on the congestion detection/avoidance scheme described in
 *Lawrence S. Brakmo and Larry L. Peterson.
 *"TCP Vegas: End to end congestion avoidance on a global internet."
 *IEEE Journal on Selected Areas in Communication, 13(8):1465--1480,
 *October 1995. Available from:
 *	ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps
 *
 * See http://www.cs.arizona.edu/xkernel/ for their implementation.
 * The main aspects that distinguish this implementation from the
 * Arizona Vegas implementation are:
 *   o We do not change the loss detection or recovery mechanisms of
 * Linux in any way. Linux already recovers from losses quite well,
 * using fine-grained timers, NewReno, and FACK.
 *   o To avoid the performance penalty imposed by increasing cwnd
 * only every-other RTT during slow start, we increase during
 * every RTT during slow start, just like Reno.
 *   o Largely to allow continuous cwnd growth during slow start,
 * we use the rate at which ACKs come back as the "actual"
 * rate, rather than the rate at which data is sent.
 *   o To speed convergence to the right rate, we set the cwnd
 * to achieve the right ("actual") rate when we exit slow start.
 *   o To filter out the noise caused by delayed ACKs, we use the
 * minimum RTT sample observed during the last RTT to calculate
 * the actual rate.
 *   o When the sender re-starts from idle, it waits until it has
 * received ACKs for an entire flight of new data before making
 * a cwnd adjustment decision. The original Vegas implementation
 * assumed senders never went idle.
 * 
 *
 *   TCP Compound based on TCP Vegas
 *
 *   further details can be found here:
 *  ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf
 */

#include 
#include 
#include 
#include 
#include 

#include 

/* Default values of the Vegas variables, in fixed-point representation
 * with V_PARAM_SHIFT bits to the right of the binary point.
 */
#define V_PARAM_SHIFT 1

#define TCP_COMPOUND_ALPHA  3U
#define TCP_COMPOUND_BETA   1U
#define TCP_COMPOUND_KAPPA_POW  3
#define TCP_COMPOUND_KAPPA_NSQRT2
#define TCP_COMPOUND_GAMMA 30
#define TCP_COMPOUND_ZETA   1

/* TCP compound variables */
struct compound {
	u32 beg_snd_nxt;	/* right edge during last RTT */
	u32 beg_snd_una;	/* left edge  during last RTT */
	u32 beg_snd_cwnd;	/* saves the size of the cwnd */
	u8 doing_vegas_now;	/* if true, do vegas for this RTT */
	u16 cntRTT;		/* # of RTTs measured within last RTT */
	u32 minRTT;		/* min of RTTs measured within last RTT (in usec) */
	u32 baseRTT;		/* the min of all Vegas RTT measurements seen (in usec) */

	u32 cwnd;
	u32 dwnd;
};

/* There are several situations when we must "re-start" Vegas:
 *
 *  o when a connection is established
 *  o after an RTO
 *  o after fast recovery
 *  o when we send a packet and there is no outstanding
 *unacknowledged data (restarting an idle connection)
 *
 * In these circumstances we cannot do a Vegas calculation at the
 * end of the first RTT, because any calculation we do is using
 * stale info -- both the saved cwnd and congestion feedback are
 * stale.
 *
 * Instead we must wait until the completion of an RTT during
 * which we actually receive ACKs.
 */
static inline void vegas_enable(struct sock *sk)
{
	const str