[RFC PATCH v2 net-next 3/3] tcp: add NV congestion control

2015-07-21 Thread Lawrence Brakmo
This is a request for comments.

TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of
NV was presented at 2010's LPC (slides). It is a delayed based
congestion avoidance for the data center. This version has been tested
within a 10G rack where the HW RTTs are 20-50us.

A description of TCP-NV, including implementation and experimental
results, can be found at:
http://www.brakmo.org/networking/tcp-nv/TCPNV.html

The current version includes many module parameters to support
experimentation with the parameters.

Signed-off-by: Lawrence Brakmo 
---
 include/net/tcp.h  |   1 +
 net/ipv4/Kconfig   |  16 ++
 net/ipv4/Makefile  |   1 +
 net/ipv4/sysctl_net_ipv4.c |   9 +
 net/ipv4/tcp_input.c   |   2 +
 net/ipv4/tcp_nv.c  | 479 +
 6 files changed, 508 insertions(+)
 create mode 100644 net/ipv4/tcp_nv.c

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2e62efe..c0690ae 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -281,6 +281,7 @@ extern unsigned int sysctl_tcp_notsent_lowat;
 extern int sysctl_tcp_min_tso_segs;
 extern int sysctl_tcp_autocorking;
 extern int sysctl_tcp_invalid_ratelimit;
+extern int sysctl_tcp_nv_enable;
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 6fb3c90..c37b374 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -539,6 +539,22 @@ config TCP_CONG_VEGAS
window. TCP Vegas should provide less packet loss, but it is
not as aggressive as TCP Reno.
 
+config TCP_CONG_NV
+   tristate "TCP NV"
+   default m
+   ---help---
+   TCP NV is a follow up to TCP Vegas. It has been modified to deal with
+   10G networks, measurement noise introduced by LRO, GRO and interrupt
+   coalescence. In addition, it will decrease its cwnd multiplicative
+   instead of linearly.
+
+   Note that in general congestion avoidance (cwnd decreased when # packets
+   queued grows) cannot coexist with congestion control (cwnd decreased 
only
+   when there is packet loss) due to fairness issues. One scenario when the
+   can coexist safely is when the CA flows have RTTs << CC flows RTTs.
+
+   For further details see http://www.brakmo.org/networking/tcp-nv/
+
 config TCP_CONG_SCALABLE
tristate "Scalable TCP"
default n
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index efc43f3..06f335f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
 obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
 obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o
 obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
+obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o
 obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 433231c..31846d5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -730,6 +730,15 @@ static struct ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec_ms_jiffies,
},
{
+   .procname   = "tcp_nv_enable",
+   .data   = &sysctl_tcp_nv_enable,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = &zero,
+   .extra2 = &one,
+   },  
+   {
.procname   = "icmp_msgs_per_sec",
.data   = &sysctl_icmp_msgs_per_sec,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index aca4ae5..87560d9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -101,6 +101,8 @@ int sysctl_tcp_thin_dupack __read_mostly;
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_early_retrans __read_mostly = 3;
 int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
+int sysctl_tcp_nv_enable __read_mostly = 1;
+EXPORT_SYMBOL(sysctl_tcp_nv_enable);
 
 #define FLAG_DATA  0x01 /* Incoming frame contained data.  
*/
 #define FLAG_WIN_UPDATE0x02 /* Incoming ACK was a window 
update.   */
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
new file mode 100644
index 000..af451b6
--- /dev/null
+++ b/net/ipv4/tcp_nv.c
@@ -0,0 +1,479 @@
+/*
+ * TCP NV: TCP with Congestion Avoidance
+ *
+ * TCP-NV is a successor of TCP-Vegas that has been developed to
+ * deal with the issues that occur in modern networks. 
+ * Like TCP-Vegas, TCP-NV supports true congestion avoidance,
+ * the ability to detect congestion before packet losses occur.
+ * When congestion (queue buildup) starts to occur, TCP-NV
+ * predicts what the cwnd size should be for the current
+ * throughput and it re

Re: [RFC PATCH v2 net-next 3/3] tcp: add NV congestion control

2015-07-21 Thread Yuchung Cheng
On Tue, Jul 21, 2015 at 9:21 PM, Lawrence Brakmo  wrote:
> This is a request for comments.
>
> TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of
> NV was presented at 2010's LPC (slides). It is a delayed based
> congestion avoidance for the data center. This version has been tested
> within a 10G rack where the HW RTTs are 20-50us.
>
> A description of TCP-NV, including implementation and experimental
> results, can be found at:
> http://www.brakmo.org/networking/tcp-nv/TCPNV.html
>
> The current version includes many module parameters to support
> experimentation with the parameters.
>
> Signed-off-by: Lawrence Brakmo 
> ---
>  include/net/tcp.h  |   1 +
>  net/ipv4/Kconfig   |  16 ++
>  net/ipv4/Makefile  |   1 +
>  net/ipv4/sysctl_net_ipv4.c |   9 +
>  net/ipv4/tcp_input.c   |   2 +
>  net/ipv4/tcp_nv.c  | 479 
> +
>  6 files changed, 508 insertions(+)
>  create mode 100644 net/ipv4/tcp_nv.c
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 2e62efe..c0690ae 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -281,6 +281,7 @@ extern unsigned int sysctl_tcp_notsent_lowat;
>  extern int sysctl_tcp_min_tso_segs;
>  extern int sysctl_tcp_autocorking;
>  extern int sysctl_tcp_invalid_ratelimit;
> +extern int sysctl_tcp_nv_enable;
>
>  extern atomic_long_t tcp_memory_allocated;
>  extern struct percpu_counter tcp_sockets_allocated;
> diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
> index 6fb3c90..c37b374 100644
> --- a/net/ipv4/Kconfig
> +++ b/net/ipv4/Kconfig
> @@ -539,6 +539,22 @@ config TCP_CONG_VEGAS
> window. TCP Vegas should provide less packet loss, but it is
> not as aggressive as TCP Reno.
>
> +config TCP_CONG_NV
> +   tristate "TCP NV"
> +   default m
> +   ---help---
> +   TCP NV is a follow up to TCP Vegas. It has been modified to deal with
> +   10G networks, measurement noise introduced by LRO, GRO and interrupt
> +   coalescence. In addition, it will decrease its cwnd multiplicative
multiplicatively

> +   instead of linearly.
> +
> +   Note that in general congestion avoidance (cwnd decreased when # 
> packets
> +   queued grows) cannot coexist with congestion control (cwnd decreased 
> only
> +   when there is packet loss) due to fairness issues. One scenario when 
> the
s/the/they
> +   can coexist safely is when the CA flows have RTTs << CC flows RTTs.
> +
> +   For further details see http://www.brakmo.org/networking/tcp-nv/
> +
>  config TCP_CONG_SCALABLE
> tristate "Scalable TCP"
> default n
> diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
> index efc43f3..06f335f 100644
> --- a/net/ipv4/Makefile
> +++ b/net/ipv4/Makefile
> @@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
>  obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
>  obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o
>  obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
> +obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o
>  obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
>  obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
>  obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 433231c..31846d5 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -730,6 +730,15 @@ static struct ctl_table ipv4_table[] = {
> .proc_handler   = proc_dointvec_ms_jiffies,
> },
> {
> +   .procname   = "tcp_nv_enable",
> +   .data   = &sysctl_tcp_nv_enable,
> +   .maxlen = sizeof(int),
> +   .mode   = 0644,
> +   .proc_handler   = proc_dointvec_minmax,
> +   .extra1 = &zero,
> +   .extra2 = &one,
> +   },
> +   {
> .procname   = "icmp_msgs_per_sec",
> .data   = &sysctl_icmp_msgs_per_sec,
> .maxlen = sizeof(int),
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index aca4ae5..87560d9 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -101,6 +101,8 @@ int sysctl_tcp_thin_dupack __read_mostly;
>  int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
>  int sysctl_tcp_early_retrans __read_mostly = 3;
>  int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
> +int sysctl_tcp_nv_enable __read_mostly = 1;
> +EXPORT_SYMBOL(sysctl_tcp_nv_enable);
>
>  #define FLAG_DATA  0x01 /* Incoming frame contained data.
>   */
>  #define FLAG_WIN_UPDATE0x02 /* Incoming ACK was a window 
> update.   */
> diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
> new file mode 100644
> index 000..af451b6
> --- /dev/null
> +++ b/net/ipv4/tcp_nv.c
> @@ -0,0 +1,479 @@
> +/*
> + * TCP NV: TCP with Congestion Avoidance
> + *
> + * TCP-NV is a successor of TCP-Vegas that has been developed to
> + * deal wi

Re: [RFC PATCH v2 net-next 3/3] tcp: add NV congestion control

2015-07-22 Thread Neal Cardwell
On Wed, Jul 22, 2015 at 2:50 AM, Yuchung Cheng  wrote:
> On Tue, Jul 21, 2015 at 9:21 PM, Lawrence Brakmo  wrote:
>> This is a request for comments.
>>
>> TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of
>> NV was presented at 2010's LPC (slides). It is a delayed based
>> congestion avoidance for the data center. This version has been tested
>> within a 10G rack where the HW RTTs are 20-50us.
>>
>> A description of TCP-NV, including implementation and experimental
>> results, can be found at:
>> http://www.brakmo.org/networking/tcp-nv/TCPNV.html
>>
>> The current version includes many module parameters to support
>> experimentation with the parameters.
...
>> +extern int sysctl_tcp_nv_enable;

The sysctl_tcp_nv_enable only seems to be used within the NV module.
Can it be a module parameter instead of sysctl?

neal
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v2 net-next 3/3] tcp: add NV congestion control

2015-07-23 Thread Lawrence Brakmo
Thank you all for your comments, I¹m currently testing the changes.
Other comments inline.

On 7/21/15, 11:50 PM, "Yuchung Cheng"  wrote:

>On Tue, Jul 21, 2015 at 9:21 PM, Lawrence Brakmo  wrote:
>> This is a request for comments.
>>
>> TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of
>> NV was presented at 2010's LPC (slides). It is a delayed based
>> congestion avoidance for the data center. This version has been tested
>> within a 10G rack where the HW RTTs are 20-50us.
>>
>> A description of TCP-NV, including implementation and experimental
>> results, can be found at:
>> 
>>https://urldefense.proofpoint.com/v1/url?u=http://www.brakmo.org/networki
>>ng/tcp-nv/TCPNV.html&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=m30SgjN07T%2FK%2
>>FdV1ZIt1iA%3D%3D%0A&m=XeELWxnafKynbNgkHg6RW%2F85hv1bPWlufUn2Dh4cOH4%3D%0A
>>&s=0029c47e62d84d6ffd22bd33e1895a3f61eaa21d88cbfb553aa1df780bbbdcf9
>>
>> The current version includes many module parameters to support
>> experimentation with the parameters.
>>
>> Signed-off-by: Lawrence Brakmo 
>> ---
>>  include/net/tcp.h  |   1 +
>>  net/ipv4/Kconfig   |  16 ++
>>  net/ipv4/Makefile  |   1 +
>>  net/ipv4/sysctl_net_ipv4.c |   9 +
>>  net/ipv4/tcp_input.c   |   2 +
>>  net/ipv4/tcp_nv.c  | 479
>>+
>>  6 files changed, 508 insertions(+)
>>  create mode 100644 net/ipv4/tcp_nv.c
>>
>> diff --git a/include/net/tcp.h b/include/net/tcp.h
>> index 2e62efe..c0690ae 100644
>> --- a/include/net/tcp.h
>> +++ b/include/net/tcp.h
>> @@ -281,6 +281,7 @@ extern unsigned int sysctl_tcp_notsent_lowat;
>>  extern int sysctl_tcp_min_tso_segs;
>>  extern int sysctl_tcp_autocorking;
>>  extern int sysctl_tcp_invalid_ratelimit;
>> +extern int sysctl_tcp_nv_enable;
>>
>>  extern atomic_long_t tcp_memory_allocated;
>>  extern struct percpu_counter tcp_sockets_allocated;
>> diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
>> index 6fb3c90..c37b374 100644
>> --- a/net/ipv4/Kconfig
>> +++ b/net/ipv4/Kconfig
>> @@ -539,6 +539,22 @@ config TCP_CONG_VEGAS
>> window. TCP Vegas should provide less packet loss, but it is
>> not as aggressive as TCP Reno.
>>
>> +config TCP_CONG_NV
>> +   tristate "TCP NV"
>> +   default m
>> +   ---help---
>> +   TCP NV is a follow up to TCP Vegas. It has been modified to
>>deal with
>> +   10G networks, measurement noise introduced by LRO, GRO and
>>interrupt
>> +   coalescence. In addition, it will decrease its cwnd
>>multiplicative
>multiplicatively
>
>> +   instead of linearly.
>> +
>> +   Note that in general congestion avoidance (cwnd decreased when
>># packets
>> +   queued grows) cannot coexist with congestion control (cwnd
>>decreased only
>> +   when there is packet loss) due to fairness issues. One scenario
>>when the
>s/the/they
>> +   can coexist safely is when the CA flows have RTTs << CC flows
>>RTTs.
>> +
>> +   For further details see
>>https://urldefense.proofpoint.com/v1/url?u=http://www.brakmo.org/networki
>>ng/tcp-nv/&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=m30SgjN07T%2FK%2FdV1ZIt1iA
>>%3D%3D%0A&m=XeELWxnafKynbNgkHg6RW%2F85hv1bPWlufUn2Dh4cOH4%3D%0A&s=3441162
>>a0eefcad01003dbf0ba478e00a2080f76cd460eaf12213eb74f2eedbd
>> +
>>  config TCP_CONG_SCALABLE
>> tristate "Scalable TCP"
>> default n
>> diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
>> index efc43f3..06f335f 100644
>> --- a/net/ipv4/Makefile
>> +++ b/net/ipv4/Makefile
>> @@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
>>  obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
>>  obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o
>>  obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
>> +obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o
>>  obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
>>  obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
>>  obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
>> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
>> index 433231c..31846d5 100644
>> --- a/net/ipv4/sysctl_net_ipv4.c
>> +++ b/net/ipv4/sysctl_net_ipv4.c
>> @@ -730,6 +730,15 @@ static struct ctl_table ipv4_table[] = {
>> .proc_handler   = proc_dointvec_ms_jiffies,
>> },
>> {
>> +   .procname   = "tcp_nv_enable",
>> +   .data   = &sysctl_tcp_nv_enable,
>> +   .maxlen = sizeof(int),
>> +   .mode   = 0644,
>> +   .proc_handler   = proc_dointvec_minmax,
>> +   .extra1 = &zero,
>> +   .extra2 = &one,
>> +   },
>> +   {
>> .procname   = "icmp_msgs_per_sec",
>> .data   = &sysctl_icmp_msgs_per_sec,
>> .maxlen = sizeof(int),
>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>> index aca4ae5..87560d9 100644
>> --- a/net/ipv4/tcp_input.c
>> +++ b/net/ipv4/tcp_input.c
>> @@ -101,6 +101,8 @@ int sysctl_tcp_thin_dupack __read_