The current code changes txhash (flowlables) on every retransmitted SYN/ACK, but only after the 2nd retransmitted SYN and only after tcp_retries1 RTO retransmits.
With this patch: 1) txhash is changed with every SYN retransmist 2) adds the option for the txhash to be changed before tcp_retries1 RTO retransmits. A new sysctl tcp_rto_txhash_prob represents the probability that txhash will be changed. The default value is 0 which maintains previous behavior and a value of 100 will always change it. Tested with packetdrill tests Signed-off-by: Lawrence Brakmo <bra...@fb.com> --- include/net/tcp.h | 1 + net/ipv4/sysctl_net_ipv4.c | 10 ++++++++++ net/ipv4/tcp_input.c | 2 ++ net/ipv4/tcp_timer.c | 7 +++++++ 4 files changed, 20 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index f83b7f2..406d474 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -271,6 +271,7 @@ extern int sysctl_tcp_autocorking; extern int sysctl_tcp_invalid_ratelimit; extern int sysctl_tcp_pacing_ss_ratio; extern int sysctl_tcp_pacing_ca_ratio; +extern int sysctl_tcp_rto_txhash_prob; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1cb67de..0b185a1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -28,6 +28,7 @@ static int zero; static int one = 1; static int four = 4; +static int hundred = 100; static int thousand = 1000; static int gso_max_segs = GSO_MAX_SEGS; static int tcp_retr1_max = 255; @@ -624,6 +625,15 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_ms_jiffies, }, { + .procname = "tcp_rto_txhash_prob", + .data = &sysctl_tcp_rto_txhash_prob, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &hundred, + }, + { .procname = "icmp_msgs_per_sec", .data = &sysctl_icmp_msgs_per_sec, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8c6ad2d..2fea29d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -101,6 +101,8 @@ int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_early_retrans __read_mostly = 3; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; +int sysctl_tcp_rto_txhash_prob __read_mostly = 100; + #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f712b41..0df8c5f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -192,6 +192,8 @@ static int tcp_write_timeout(struct sock *sk) if (tp->syn_data && icsk->icsk_retransmits == 1) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); + } else if (!tp->syn_data && !tp->syn_fastopen) { + sk_rethink_txhash(sk); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; syn_set = true; @@ -213,6 +215,11 @@ static int tcp_write_timeout(struct sock *sk) tcp_mtu_probing(icsk, sk); dst_negative_advice(sk); + } else if (sk->sk_txhash && sysctl_tcp_rto_txhash_prob > 0) { + u32 v = prandom_u32() % 100; + + if (v < sysctl_tcp_rto_txhash_prob) + sk_set_txhash(sk); } retry_until = net->ipv4.sysctl_tcp_retries2; -- 2.9.3