Added support for changing congestion control for SOCK_OPS bpf
programs through the setsockopt bpf helper function. It also adds
a new SOCK_OPS op, BPF_SOCK_OPS_NEEDS_ECN, that is needed for
congestion controls, like dctcp, that need to enable ECN in the
SYN packets.
Signed-off-by: Lawrence Brakmo
---
include/net/tcp.h| 9 -
include/uapi/linux/bpf.h | 3 +++
net/core/filter.c| 11 +--
net/ipv4/tcp.c | 2 +-
net/ipv4/tcp_cong.c | 32 ++--
net/ipv4/tcp_input.c | 3 ++-
net/ipv4/tcp_output.c| 8 +---
7 files changed, 50 insertions(+), 18 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ff806d7..58d67be 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1003,7 +1003,9 @@ void tcp_get_default_congestion_control(char *name);
void tcp_get_available_congestion_control(char *buf, size_t len);
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
-int tcp_set_congestion_control(struct sock *sk, const char *name);
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
+void tcp_reinit_congestion_control(struct sock *sk,
+ const struct tcp_congestion_ops *ca);
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
@@ -2072,4 +2074,9 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk, bool
is_req_sock)
rwnd = 0;
return rwnd;
}
+
+static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
+{
+ return (tcp_call_bpf(sk, true, BPF_SOCK_OPS_NEEDS_ECN) == 1);
+}
#endif /* _TCP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4856d16..c222059 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -776,6 +776,9 @@ enum {
* passive connection is
* established
*/
+ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
+* needs ECN
+*/
};
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/core/filter.c b/net/core/filter.c
index b114ae1..bbf8f78 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2716,8 +2716,15 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *,
bpf_sock,
}
} else if (level == SOL_TCP &&
sk->sk_prot->setsockopt == tcp_setsockopt) {
- /* Place holder */
- ret = -EINVAL;
+ if (optname == TCP_CONGESTION) {
+ ret = tcp_set_congestion_control(sk, optval, false);
+ if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
+ /* replacing an existing ca */
+ tcp_reinit_congestion_control(sk,
+ inet_csk(sk)->icsk_ca_ops);
+ } else {
+ ret = -EINVAL;
+ }
} else {
ret = -EINVAL;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 058f509..9476fd6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2479,7 +2479,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
name[val] = 0;
lock_sock(sk);
- err = tcp_set_congestion_control(sk, name);
+ err = tcp_set_congestion_control(sk, name, true);
release_sock(sk);
return err;
}
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 324c9bc..fde983f 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
INET_ECN_dontxmit(sk);
}
-static void tcp_reinit_congestion_control(struct sock *sk,
- const struct tcp_congestion_ops *ca)
+void tcp_reinit_congestion_control(struct sock *sk,
+ const struct tcp_congestion_ops *ca)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -333,8 +333,12 @@ int tcp_set_allowed_congestion_control(char *val)
return ret;
}
-/* Change congestion control for socket */
-int tcp_set_congestion_control(struct sock *sk, const char *name)
+/* Change congestion control for socket. If load is false, then it is the
+ * responsibility of the caller to call tcp_init_congestion_control or
+ * tcp_reinit_congestion_control (if the current congestion control was
+ * already initialized.
+ */
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_congestion_ops *ca;
@@ -344,21 +348,29 @@ int