[PATCH] TCP Compound: dwnd=0 on ssthresh
In the TCP Compound article used as a reference for the implementation, we read: "If a retransmission timeout occurs, dwnd should be reset to zero and the delay-based component is disabled." at page 5 of ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf The attached patch implements this requirement. Regards, Angelo P. Castellani diff -urd a/net/ipv4/tcp_compound.c b/net/ipv4/tcp_compound.c --- a/net/ipv4/tcp_compound.c 2006-07-05 17:19:28.0 +0200 +++ b/net/ipv4/tcp_compound.c 2006-07-05 17:20:42.0 +0200 @@ -221,12 +221,9 @@ tcp_compound_init(sk); } -static void tcp_compound_cong_avoid(struct sock *sk, u32 ack, -u32 seq_rtt, u32 in_flight, int flag) -{ +static inline void tcp_compound_synch(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct compound *vegas = inet_csk_ca(sk); - u8 inc = 0; if (vegas->cwnd + vegas->dwnd > tp->snd_cwnd) { if (vegas->cwnd > tp->snd_cwnd || vegas->dwnd > tp->snd_cwnd) { @@ -234,9 +231,19 @@ vegas->dwnd = 0; } else vegas->cwnd = tp->snd_cwnd - vegas->dwnd; - } +} + +static void tcp_compound_cong_avoid(struct sock *sk, u32 ack, +u32 seq_rtt, u32 in_flight, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct compound *vegas = inet_csk_ca(sk); + u8 inc = 0; + + tcp_compound_synch(sk); + if (!tcp_is_cwnd_limited(sk, in_flight)) return; @@ -415,9 +422,21 @@ } } +static u32 tcp_compound_ssthresh(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + struct compound *vegas = inet_csk_ca(sk); + + tcp_compound_synch(sk); + + vegas->dwnd = 0; + tp->snd_cwnd = vegas->cwnd; + + return tcp_reno_ssthresh(sk); +} + static struct tcp_congestion_ops tcp_compound = { .init = tcp_compound_init, - .ssthresh = tcp_reno_ssthresh, + .ssthresh = tcp_compound_ssthresh, .cong_avoid = tcp_compound_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, .rtt_sample = tcp_compound_rtt_calc,
Re: [PATCH] TCP Compound
The existing code did a 64 bit divide directly, which won't work on 32 bit platforms. This is what I am testing, it uses math similar to TCP CUBIC to do a quad root. It seemed more efficient to just do one operation rather than two square roots. - diff --git a/net/ipv4/tcp_compound.c b/net/ipv4/tcp_compound.c index 01048e2..74c26a0 100644 --- a/net/ipv4/tcp_compound.c +++ b/net/ipv4/tcp_compound.c @@ -52,8 +52,6 @@ #define TCP_COMPOUND_ALPHA 3U #define TCP_COMPOUND_BETA 1U -#define TCP_COMPOUND_KAPPA_POW 3 -#define TCP_COMPOUND_KAPPA_NSQRT2 #define TCP_COMPOUND_GAMMA 30 #define TCP_COMPOUND_ZETA 1 @@ -156,6 +154,58 @@ static void tcp_compound_state(struct so vegas_disable(sk); } + +/* 64bit divisor, dividend and result. dynamic precision */ +static inline u64 div64_64(u64 dividend, u64 divisor) +{ + u32 d = divisor; + + if (divisor > 0xULL) { + unsigned int shift = fls(divisor >> 32); + + d = divisor >> shift; + dividend >>= shift; + } + + /* avoid 64 bit division if possible */ + if (dividend >> 32) + do_div(dividend, d); + else + dividend = (u32) dividend / d; + + return dividend; +} + +/* calculate the quartic root of "a" using Newton-Raphson */ +static u32 qroot(u64 a) +{ + u32 x, x1; + + /* Initial estimate is based on: +* qrt(x) = exp(log(x) / 4) +*/ + x = 1u << (fls64(a) >> 2); + + /* +* Iteration based on: +* 3 +* x= ( 3 * x + a / x ) / 4 +* k+1 k k +*/ + do { + u64 x3 = x; + + x1 = x; + x3 *= x; + x3 *= x; + + x = (3 * x + (u32) div64_64(a, x3)) / 4; + } while (abs(x1 - x) > 1); + + return x; +} + + /* * If the connection is idle and we are restarting, * then we don't want to do any Vegas calculations @@ -307,29 +357,23 @@ static void tcp_compound_cong_avoid(stru dwnd = vegas->dwnd; if (diff < (TCP_COMPOUND_GAMMA << V_PARAM_SHIFT)) { - u32 i, j, x, x2; - u64 v; - - v = 1; - - for (i = 0; i < TCP_COMPOUND_KAPPA_POW; i++) - v *= old_wnd; - - for (i = 0; i < TCP_COMPOUND_KAPPA_NSQRT; i++) { - x = 1; - for (j = 0; j < 200; j++) { - x2 = (x + v / x) / 2; - - if (x2 == x || !x2) - break; - - x = x2; - } - v = x; - } + u64 win3; - x = (u32) v >> TCP_COMPOUND_ALPHA; + /* +* The TCP Compound paper describes the choice +* of "k" determines the agressiveness, +* ie. slope of the response function. +* +* For same value as HSTCP would be 0.8 +* but for computaional reasons, both the +* original authors and this implementation +* use 0.75. +*/ + win3 = old_wnd; + win3 *= old_wnd; + win3 *= old_wnd; + x = qroot(win3) >> TCP_COMPOUND_ALPHA; if (x > 1) dwnd = x - 1; else - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] TCP Compound
From: Angelo P. Castellani <[EMAIL PROTECTED]> TCP Compound is a sender-side only change to TCP that uses a mixed Reno/Vegas approach to calculate the cwnd. For further details look here: ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf Signed-off-by: Angelo P. Castellani <[EMAIL PROTECTED]> --- This new revision of the TCP Compound implementation fixes some issues present in the previous patch and has been reverted to a stand-alone file (thanks to Stephen suggestion). Regards, Angelo P. Castellani diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 326676b..e577eb8 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -542,6 +542,16 @@ config TCP_CONG_LP ``fair share`` of bandwidth as targeted by TCP. See http://www-ece.rice.edu/networks/TCP-LP/ +config TCP_CONG_COMPOUND + tristate "TCP Compound" + depends on EXPERIMENTAL + default n + ---help--- + TCP Compound is a sender-side only change to TCP that uses + a mixed Reno/Vegas approach to calculate the cwnd. + For further details look here: + ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf + endmenu config TCP_CONG_BIC diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 5c65487..f0697c4 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp. obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o +obj-$(CONFIG_TCP_CONG_COMPOUND) += tcp_compound.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o /* * TCP Vegas congestion control * * This is based on the congestion detection/avoidance scheme described in *Lawrence S. Brakmo and Larry L. Peterson. *"TCP Vegas: End to end congestion avoidance on a global internet." *IEEE Journal on Selected Areas in Communication, 13(8):1465--1480, *October 1995. Available from: * ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps * * See http://www.cs.arizona.edu/xkernel/ for their implementation. * The main aspects that distinguish this implementation from the * Arizona Vegas implementation are: * o We do not change the loss detection or recovery mechanisms of * Linux in any way. Linux already recovers from losses quite well, * using fine-grained timers, NewReno, and FACK. * o To avoid the performance penalty imposed by increasing cwnd * only every-other RTT during slow start, we increase during * every RTT during slow start, just like Reno. * o Largely to allow continuous cwnd growth during slow start, * we use the rate at which ACKs come back as the "actual" * rate, rather than the rate at which data is sent. * o To speed convergence to the right rate, we set the cwnd * to achieve the right ("actual") rate when we exit slow start. * o To filter out the noise caused by delayed ACKs, we use the * minimum RTT sample observed during the last RTT to calculate * the actual rate. * o When the sender re-starts from idle, it waits until it has * received ACKs for an entire flight of new data before making * a cwnd adjustment decision. The original Vegas implementation * assumed senders never went idle. * * * TCP Compound based on TCP Vegas * * further details can be found here: * ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf */ #include #include #include #include #include #include /* Default values of the Vegas variables, in fixed-point representation * with V_PARAM_SHIFT bits to the right of the binary point. */ #define V_PARAM_SHIFT 1 #define TCP_COMPOUND_ALPHA 3U #define TCP_COMPOUND_BETA 1U #define TCP_COMPOUND_KAPPA_POW 3 #define TCP_COMPOUND_KAPPA_NSQRT2 #define TCP_COMPOUND_GAMMA 30 #define TCP_COMPOUND_ZETA 1 /* TCP compound variables */ struct compound { u32 beg_snd_nxt; /* right edge during last RTT */ u32 beg_snd_una; /* left edge during last RTT */ u32 beg_snd_cwnd; /* saves the size of the cwnd */ u8 doing_vegas_now; /* if true, do vegas for this RTT */ u16 cntRTT; /* # of RTTs measured within last RTT */ u32 minRTT; /* min of RTTs measured within last RTT (in usec) */ u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */ u32 cwnd; u32 dwnd; }; /* There are several situations when we must "re-start" Vegas: * * o when a connection is established * o after an RTO * o after fast recovery * o when we send a packet and there is no outstanding *unacknowledged data (restarting an idle connection) * * In these circumstances we cannot do a Vegas calculation at the * end of the first RTT, because any calculation we do is using * stale info -- both the saved cwnd and congestion feedback are * stale. * * Instead we must wait until the completion of an RTT during * which we actually receive ACKs. */ static inline void vegas_enable(struct sock *sk) { const str