We want to be able to set up the monitoring application so that it can
be aysnchronously notified when "interesting" events happen, e.g., when
application-determined thresholds on parameters like RTT estimate, number
of retransmissions, RTO are reached.

The bpf_sock_ops infrastructure provided as part of Commit 40304b2a1567
("bpf: BPF support for sock_ops") provides an elegant way to trigger
this asynchronous notification. The BPF program can examine the
current TCP state reported in the bpf_sock_ops and conditionally
return a (new) status BPF_TCP_INFO_NOTIFY. The return status is used
by the caller to queue up a tcp_info notification for the application.

Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
---
 include/net/tcp.h        |   15 +++++++++++++--
 include/uapi/linux/bpf.h |    4 ++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0d29292..df06a9f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -47,6 +47,7 @@
 #include <linux/seq_file.h>
 #include <linux/memcontrol.h>
 #include <linux/bpf-cgroup.h>
+#include <linux/sock_diag.h>
 
 extern struct inet_hashinfo tcp_hashinfo;
 
@@ -2065,6 +2066,12 @@ struct tcp_ulp_ops {
        __MODULE_INFO(alias, alias_userspace, name);            \
        __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
 
+#define        TCPDIAG_CB(sk)                                                  
\
+do {                                                                   \
+       if (unlikely(sk->sk_net_refcnt && sock_diag_has_listeners(sk))) \
+               sock_diag_broadcast(sk);                                \
+} while (0)
+
 /* Call BPF_SOCK_OPS program that returns an int. If the return value
  * is < 0, then the BPF op failed (for example if the loaded BPF
  * program does not support the chosen operation or there is no BPF
@@ -2088,9 +2095,13 @@ static inline int tcp_call_bpf(struct sock *sk, int op, 
u32 nargs, u32 *args)
                memcpy(sock_ops.args, args, nargs * sizeof(*args));
 
        ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
-       if (ret == 0)
+       if (ret == 0) {
                ret = sock_ops.reply;
-       else
+
+               /* XXX would be nice if we could use replylong[1] here */
+               if (ret == BPF_TCP_INFO_NOTIFY)
+                       TCPDIAG_CB(sk);
+       } else
                ret = -1;
        return ret;
 }
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index aa5ccd2..bc45e5e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2678,6 +2678,10 @@ enum {
        BPF_TCP_MAX_STATES      /* Leave at the end! */
 };
 
+enum {
+       BPF_TCP_INFO_NOTIFY = 2
+};
+
 #define TCP_BPF_IW             1001    /* Set TCP initial congestion window */
 #define TCP_BPF_SNDCWND_CLAMP  1002    /* Set sndcwnd_clamp */
 
-- 
1.7.1

Reply via email to