[RFC PATCH v2 net-next 3/3] tcp: add NV congestion control
This is a request for comments. TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of NV was presented at 2010's LPC (slides). It is a delayed based congestion avoidance for the data center. This version has been tested within a 10G rack where the HW RTTs are 20-50us. A description of TCP-NV, including implementation and experimental results, can be found at: http://www.brakmo.org/networking/tcp-nv/TCPNV.html The current version includes many module parameters to support experimentation with the parameters. Signed-off-by: Lawrence Brakmo --- include/net/tcp.h | 1 + net/ipv4/Kconfig | 16 ++ net/ipv4/Makefile | 1 + net/ipv4/sysctl_net_ipv4.c | 9 + net/ipv4/tcp_input.c | 2 + net/ipv4/tcp_nv.c | 479 + 6 files changed, 508 insertions(+) create mode 100644 net/ipv4/tcp_nv.c diff --git a/include/net/tcp.h b/include/net/tcp.h index 2e62efe..c0690ae 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -281,6 +281,7 @@ extern unsigned int sysctl_tcp_notsent_lowat; extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_autocorking; extern int sysctl_tcp_invalid_ratelimit; +extern int sysctl_tcp_nv_enable; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 6fb3c90..c37b374 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -539,6 +539,22 @@ config TCP_CONG_VEGAS window. TCP Vegas should provide less packet loss, but it is not as aggressive as TCP Reno. +config TCP_CONG_NV + tristate "TCP NV" + default m + ---help--- + TCP NV is a follow up to TCP Vegas. It has been modified to deal with + 10G networks, measurement noise introduced by LRO, GRO and interrupt + coalescence. In addition, it will decrease its cwnd multiplicative + instead of linearly. + + Note that in general congestion avoidance (cwnd decreased when # packets + queued grows) cannot coexist with congestion control (cwnd decreased only + when there is packet loss) due to fairness issues. One scenario when the + can coexist safely is when the CA flows have RTTs << CC flows RTTs. + + For further details see http://www.brakmo.org/networking/tcp-nv/ + config TCP_CONG_SCALABLE tristate "Scalable TCP" default n diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index efc43f3..06f335f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o +obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 433231c..31846d5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -730,6 +730,15 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_ms_jiffies, }, { + .procname = "tcp_nv_enable", + .data = &sysctl_tcp_nv_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "icmp_msgs_per_sec", .data = &sysctl_icmp_msgs_per_sec, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index aca4ae5..87560d9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -101,6 +101,8 @@ int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_early_retrans __read_mostly = 3; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; +int sysctl_tcp_nv_enable __read_mostly = 1; +EXPORT_SYMBOL(sysctl_tcp_nv_enable); #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE0x02 /* Incoming ACK was a window update. */ diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c new file mode 100644 index 000..af451b6 --- /dev/null +++ b/net/ipv4/tcp_nv.c @@ -0,0 +1,479 @@ +/* + * TCP NV: TCP with Congestion Avoidance + * + * TCP-NV is a successor of TCP-Vegas that has been developed to + * deal with the issues that occur in modern networks. + * Like TCP-Vegas, TCP-NV supports true congestion avoidance, + * the ability to detect congestion before packet losses occur. + * When congestion (queue buildup) starts to occur, TCP-NV + * predicts what the cwnd size should be for the current + * throughput and it re
Re: [RFC PATCH v2 net-next 3/3] tcp: add NV congestion control
On Tue, Jul 21, 2015 at 9:21 PM, Lawrence Brakmo wrote: > This is a request for comments. > > TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of > NV was presented at 2010's LPC (slides). It is a delayed based > congestion avoidance for the data center. This version has been tested > within a 10G rack where the HW RTTs are 20-50us. > > A description of TCP-NV, including implementation and experimental > results, can be found at: > http://www.brakmo.org/networking/tcp-nv/TCPNV.html > > The current version includes many module parameters to support > experimentation with the parameters. > > Signed-off-by: Lawrence Brakmo > --- > include/net/tcp.h | 1 + > net/ipv4/Kconfig | 16 ++ > net/ipv4/Makefile | 1 + > net/ipv4/sysctl_net_ipv4.c | 9 + > net/ipv4/tcp_input.c | 2 + > net/ipv4/tcp_nv.c | 479 > + > 6 files changed, 508 insertions(+) > create mode 100644 net/ipv4/tcp_nv.c > > diff --git a/include/net/tcp.h b/include/net/tcp.h > index 2e62efe..c0690ae 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -281,6 +281,7 @@ extern unsigned int sysctl_tcp_notsent_lowat; > extern int sysctl_tcp_min_tso_segs; > extern int sysctl_tcp_autocorking; > extern int sysctl_tcp_invalid_ratelimit; > +extern int sysctl_tcp_nv_enable; > > extern atomic_long_t tcp_memory_allocated; > extern struct percpu_counter tcp_sockets_allocated; > diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig > index 6fb3c90..c37b374 100644 > --- a/net/ipv4/Kconfig > +++ b/net/ipv4/Kconfig > @@ -539,6 +539,22 @@ config TCP_CONG_VEGAS > window. TCP Vegas should provide less packet loss, but it is > not as aggressive as TCP Reno. > > +config TCP_CONG_NV > + tristate "TCP NV" > + default m > + ---help--- > + TCP NV is a follow up to TCP Vegas. It has been modified to deal with > + 10G networks, measurement noise introduced by LRO, GRO and interrupt > + coalescence. In addition, it will decrease its cwnd multiplicative multiplicatively > + instead of linearly. > + > + Note that in general congestion avoidance (cwnd decreased when # > packets > + queued grows) cannot coexist with congestion control (cwnd decreased > only > + when there is packet loss) due to fairness issues. One scenario when > the s/the/they > + can coexist safely is when the CA flows have RTTs << CC flows RTTs. > + > + For further details see http://www.brakmo.org/networking/tcp-nv/ > + > config TCP_CONG_SCALABLE > tristate "Scalable TCP" > default n > diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile > index efc43f3..06f335f 100644 > --- a/net/ipv4/Makefile > +++ b/net/ipv4/Makefile > @@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o > obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o > obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o > obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o > +obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o > obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o > obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o > obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o > diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c > index 433231c..31846d5 100644 > --- a/net/ipv4/sysctl_net_ipv4.c > +++ b/net/ipv4/sysctl_net_ipv4.c > @@ -730,6 +730,15 @@ static struct ctl_table ipv4_table[] = { > .proc_handler = proc_dointvec_ms_jiffies, > }, > { > + .procname = "tcp_nv_enable", > + .data = &sysctl_tcp_nv_enable, > + .maxlen = sizeof(int), > + .mode = 0644, > + .proc_handler = proc_dointvec_minmax, > + .extra1 = &zero, > + .extra2 = &one, > + }, > + { > .procname = "icmp_msgs_per_sec", > .data = &sysctl_icmp_msgs_per_sec, > .maxlen = sizeof(int), > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c > index aca4ae5..87560d9 100644 > --- a/net/ipv4/tcp_input.c > +++ b/net/ipv4/tcp_input.c > @@ -101,6 +101,8 @@ int sysctl_tcp_thin_dupack __read_mostly; > int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; > int sysctl_tcp_early_retrans __read_mostly = 3; > int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; > +int sysctl_tcp_nv_enable __read_mostly = 1; > +EXPORT_SYMBOL(sysctl_tcp_nv_enable); > > #define FLAG_DATA 0x01 /* Incoming frame contained data. > */ > #define FLAG_WIN_UPDATE0x02 /* Incoming ACK was a window > update. */ > diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c > new file mode 100644 > index 000..af451b6 > --- /dev/null > +++ b/net/ipv4/tcp_nv.c > @@ -0,0 +1,479 @@ > +/* > + * TCP NV: TCP with Congestion Avoidance > + * > + * TCP-NV is a successor of TCP-Vegas that has been developed to > + * deal wi
Re: [RFC PATCH v2 net-next 3/3] tcp: add NV congestion control
On Wed, Jul 22, 2015 at 2:50 AM, Yuchung Cheng wrote: > On Tue, Jul 21, 2015 at 9:21 PM, Lawrence Brakmo wrote: >> This is a request for comments. >> >> TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of >> NV was presented at 2010's LPC (slides). It is a delayed based >> congestion avoidance for the data center. This version has been tested >> within a 10G rack where the HW RTTs are 20-50us. >> >> A description of TCP-NV, including implementation and experimental >> results, can be found at: >> http://www.brakmo.org/networking/tcp-nv/TCPNV.html >> >> The current version includes many module parameters to support >> experimentation with the parameters. ... >> +extern int sysctl_tcp_nv_enable; The sysctl_tcp_nv_enable only seems to be used within the NV module. Can it be a module parameter instead of sysctl? neal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v2 net-next 3/3] tcp: add NV congestion control
Thank you all for your comments, I¹m currently testing the changes. Other comments inline. On 7/21/15, 11:50 PM, "Yuchung Cheng" wrote: >On Tue, Jul 21, 2015 at 9:21 PM, Lawrence Brakmo wrote: >> This is a request for comments. >> >> TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of >> NV was presented at 2010's LPC (slides). It is a delayed based >> congestion avoidance for the data center. This version has been tested >> within a 10G rack where the HW RTTs are 20-50us. >> >> A description of TCP-NV, including implementation and experimental >> results, can be found at: >> >>https://urldefense.proofpoint.com/v1/url?u=http://www.brakmo.org/networki >>ng/tcp-nv/TCPNV.html&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=m30SgjN07T%2FK%2 >>FdV1ZIt1iA%3D%3D%0A&m=XeELWxnafKynbNgkHg6RW%2F85hv1bPWlufUn2Dh4cOH4%3D%0A >>&s=0029c47e62d84d6ffd22bd33e1895a3f61eaa21d88cbfb553aa1df780bbbdcf9 >> >> The current version includes many module parameters to support >> experimentation with the parameters. >> >> Signed-off-by: Lawrence Brakmo >> --- >> include/net/tcp.h | 1 + >> net/ipv4/Kconfig | 16 ++ >> net/ipv4/Makefile | 1 + >> net/ipv4/sysctl_net_ipv4.c | 9 + >> net/ipv4/tcp_input.c | 2 + >> net/ipv4/tcp_nv.c | 479 >>+ >> 6 files changed, 508 insertions(+) >> create mode 100644 net/ipv4/tcp_nv.c >> >> diff --git a/include/net/tcp.h b/include/net/tcp.h >> index 2e62efe..c0690ae 100644 >> --- a/include/net/tcp.h >> +++ b/include/net/tcp.h >> @@ -281,6 +281,7 @@ extern unsigned int sysctl_tcp_notsent_lowat; >> extern int sysctl_tcp_min_tso_segs; >> extern int sysctl_tcp_autocorking; >> extern int sysctl_tcp_invalid_ratelimit; >> +extern int sysctl_tcp_nv_enable; >> >> extern atomic_long_t tcp_memory_allocated; >> extern struct percpu_counter tcp_sockets_allocated; >> diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig >> index 6fb3c90..c37b374 100644 >> --- a/net/ipv4/Kconfig >> +++ b/net/ipv4/Kconfig >> @@ -539,6 +539,22 @@ config TCP_CONG_VEGAS >> window. TCP Vegas should provide less packet loss, but it is >> not as aggressive as TCP Reno. >> >> +config TCP_CONG_NV >> + tristate "TCP NV" >> + default m >> + ---help--- >> + TCP NV is a follow up to TCP Vegas. It has been modified to >>deal with >> + 10G networks, measurement noise introduced by LRO, GRO and >>interrupt >> + coalescence. In addition, it will decrease its cwnd >>multiplicative >multiplicatively > >> + instead of linearly. >> + >> + Note that in general congestion avoidance (cwnd decreased when >># packets >> + queued grows) cannot coexist with congestion control (cwnd >>decreased only >> + when there is packet loss) due to fairness issues. One scenario >>when the >s/the/they >> + can coexist safely is when the CA flows have RTTs << CC flows >>RTTs. >> + >> + For further details see >>https://urldefense.proofpoint.com/v1/url?u=http://www.brakmo.org/networki >>ng/tcp-nv/&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=m30SgjN07T%2FK%2FdV1ZIt1iA >>%3D%3D%0A&m=XeELWxnafKynbNgkHg6RW%2F85hv1bPWlufUn2Dh4cOH4%3D%0A&s=3441162 >>a0eefcad01003dbf0ba478e00a2080f76cd460eaf12213eb74f2eedbd >> + >> config TCP_CONG_SCALABLE >> tristate "Scalable TCP" >> default n >> diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile >> index efc43f3..06f335f 100644 >> --- a/net/ipv4/Makefile >> +++ b/net/ipv4/Makefile >> @@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o >> obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o >> obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o >> obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o >> +obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o >> obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o >> obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o >> obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o >> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c >> index 433231c..31846d5 100644 >> --- a/net/ipv4/sysctl_net_ipv4.c >> +++ b/net/ipv4/sysctl_net_ipv4.c >> @@ -730,6 +730,15 @@ static struct ctl_table ipv4_table[] = { >> .proc_handler = proc_dointvec_ms_jiffies, >> }, >> { >> + .procname = "tcp_nv_enable", >> + .data = &sysctl_tcp_nv_enable, >> + .maxlen = sizeof(int), >> + .mode = 0644, >> + .proc_handler = proc_dointvec_minmax, >> + .extra1 = &zero, >> + .extra2 = &one, >> + }, >> + { >> .procname = "icmp_msgs_per_sec", >> .data = &sysctl_icmp_msgs_per_sec, >> .maxlen = sizeof(int), >> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c >> index aca4ae5..87560d9 100644 >> --- a/net/ipv4/tcp_input.c >> +++ b/net/ipv4/tcp_input.c >> @@ -101,6 +101,8 @@ int sysctl_tcp_thin_dupack __read_