The original patch that changes TCP's congestion control via eBPF only re-initializes the new congestion control, if the BPF op is set to an (invalid) value beyond BPF_SOCK_OPS_NEEDS_ECN. Consequently TCP will run the new congestion control from random states. This patch fixes the issue by always re-init the congestion control like other means such as setsockopt and sysctl changes.
Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control") Signed-off-by: Yuchung Cheng <ych...@google.com> Signed-off-by: Eric Dumazet <eduma...@google.com> Signed-off-by: Neal Cardwell <ncardw...@google.com> Signed-off-by: Soheil Hassas Yeganeh <soh...@google.com> --- include/net/tcp.h | 2 +- net/core/filter.c | 3 +-- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_cong.c | 11 ++--------- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 6da880d2f022..f94a71b62ba5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1006,7 +1006,7 @@ void tcp_get_default_congestion_control(struct net *net, char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); -int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit); +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load); u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); diff --git a/net/core/filter.c b/net/core/filter.c index 6a85e67fafce..757d52adccfc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3233,12 +3233,11 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_prot->setsockopt == tcp_setsockopt) { if (optname == TCP_CONGESTION) { char name[TCP_CA_NAME_MAX]; - bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN; strncpy(name, optval, min_t(long, optlen, TCP_CA_NAME_MAX-1)); name[TCP_CA_NAME_MAX-1] = 0; - ret = tcp_set_congestion_control(sk, name, false, reinit); + ret = tcp_set_congestion_control(sk, name, false); } else { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f08eebe60446..21e2a07e857e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2550,7 +2550,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(sk, name, true, true); + err = tcp_set_congestion_control(sk, name, true); release_sock(sk); return err; } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index bc6c02f16243..70895bee3026 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -332,7 +332,7 @@ int tcp_set_allowed_congestion_control(char *val) * tcp_reinit_congestion_control (if the current congestion control was * already initialized. */ -int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit) +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; @@ -356,15 +356,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo if (!ca) { err = -ENOENT; } else if (!load) { - const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops; - if (try_module_get(ca->owner)) { - if (reinit) { - tcp_reinit_congestion_control(sk, ca); - } else { - icsk->icsk_ca_ops = ca; - module_put(old_ca->owner); - } + tcp_reinit_congestion_control(sk, ca); } else { err = -EBUSY; } -- 2.16.0.rc1.238.g530d649a79-goog