The original patch that changes TCP's congestion control via eBPF only
re-initializes the new congestion control, if the BPF op is set to an
(invalid) value beyond BPF_SOCK_OPS_NEEDS_ECN. Consequently TCP will
run the new congestion control from random states. This patch fixes
the issue by always re-init the congestion control like other means
such as setsockopt and sysctl changes.

Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control")
Signed-off-by: Yuchung Cheng <ych...@google.com>
Signed-off-by: Eric Dumazet <eduma...@google.com>
Signed-off-by: Neal Cardwell <ncardw...@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soh...@google.com>
---
 include/net/tcp.h   |  2 +-
 net/core/filter.c   |  3 +--
 net/ipv4/tcp.c      |  2 +-
 net/ipv4/tcp_cong.c | 11 ++---------
 4 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6da880d2f022..f94a71b62ba5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1006,7 +1006,7 @@ void tcp_get_default_congestion_control(struct net *net, 
char *name);
 void tcp_get_available_congestion_control(char *buf, size_t len);
 void tcp_get_allowed_congestion_control(char *buf, size_t len);
 int tcp_set_allowed_congestion_control(char *allowed);
-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, 
bool reinit);
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
 u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 6a85e67fafce..757d52adccfc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3233,12 +3233,11 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, 
bpf_sock,
                   sk->sk_prot->setsockopt == tcp_setsockopt) {
                if (optname == TCP_CONGESTION) {
                        char name[TCP_CA_NAME_MAX];
-                       bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
 
                        strncpy(name, optval, min_t(long, optlen,
                                                    TCP_CA_NAME_MAX-1));
                        name[TCP_CA_NAME_MAX-1] = 0;
-                       ret = tcp_set_congestion_control(sk, name, false, 
reinit);
+                       ret = tcp_set_congestion_control(sk, name, false);
                } else {
                        struct tcp_sock *tp = tcp_sk(sk);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f08eebe60446..21e2a07e857e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2550,7 +2550,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                name[val] = 0;
 
                lock_sock(sk);
-               err = tcp_set_congestion_control(sk, name, true, true);
+               err = tcp_set_congestion_control(sk, name, true);
                release_sock(sk);
                return err;
        }
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index bc6c02f16243..70895bee3026 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -332,7 +332,7 @@ int tcp_set_allowed_congestion_control(char *val)
  * tcp_reinit_congestion_control (if the current congestion control was
  * already initialized.
  */
-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, 
bool reinit)
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        const struct tcp_congestion_ops *ca;
@@ -356,15 +356,8 @@ int tcp_set_congestion_control(struct sock *sk, const char 
*name, bool load, boo
        if (!ca) {
                err = -ENOENT;
        } else if (!load) {
-               const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
-
                if (try_module_get(ca->owner)) {
-                       if (reinit) {
-                               tcp_reinit_congestion_control(sk, ca);
-                       } else {
-                               icsk->icsk_ca_ops = ca;
-                               module_put(old_ca->owner);
-                       }
+                       tcp_reinit_congestion_control(sk, ca);
                } else {
                        err = -EBUSY;
                }
-- 
2.16.0.rc1.238.g530d649a79-goog

Reply via email to