Adds field bpf_sock_ops_cb_flags to tcp_sock and bpf_sock_ops. Its primary
use is to determine if there should be calls to sock_ops bpf program at
various points in the TCP code. The field is initialized to zero,
disabling the calls. A sock_ops BPF program can set it, per connection and
as necessary, when the connection is established.

It also adds support for reading and writting the field within a
sock_ops BPF program. Reading is done by accessing the field directly.
However, writing is done through the helper function
bpf_sock_ops_cb_flags_set, in order to return an error if a BPF program
is trying to set a callback that is not supported in the current kernel
(i.e. running an older kernel). The helper function returns 0 if it was
able to set all of the bits set in the argument, a positive number
containing the bits that could not be set, or -EINVAL if the socket is
not a full TCP socket.

Examples of where one could call the bpf program:

1) When RTO fires
2) When a packet is retransmitted
3) When the connection terminates
4) When a packet is sent
5) When a packet is received

Signed-off-by: Lawrence Brakmo <bra...@fb.com>
Acked-by: Alexei Starovoitov <a...@kernel.org>
---
 include/linux/tcp.h      | 11 +++++++++++
 include/uapi/linux/bpf.h | 12 +++++++++++-
 include/uapi/linux/tcp.h |  5 +++++
 net/core/filter.c        | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4f93f095..8f4c549 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -335,6 +335,17 @@ struct tcp_sock {
 
        int                     linger2;
 
+
+/* Sock_ops bpf program related variables */
+#ifdef CONFIG_BPF
+       u8      bpf_sock_ops_cb_flags;  /* Control calling BPF programs
+                                        * values defined in uapi/linux/tcp.h
+                                        */
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG)
+#else
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
+#endif
+
 /* Receiver side RTT estimation */
        struct {
                u32     rtt_us;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8d5874c..7573f5b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -642,6 +642,14 @@ union bpf_attr {
  *     @optlen: length of optval in bytes
  *     Return: 0 or negative error
  *
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ *     Set callback flags for sock_ops
+ *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ *     @flags: flags value
+ *     Return: 0 for no error
+ *             -EINVAL if there is no full tcp socket
+ *             bits in flags that are not supported by current kernel
+ *
  * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
  *     Grow or shrink room in sk_buff.
  *     @skb: pointer to skb
@@ -748,7 +756,8 @@ union bpf_attr {
        FN(perf_event_read_value),      \
        FN(perf_prog_read_value),       \
        FN(getsockopt),                 \
-       FN(override_return),
+       FN(override_return),            \
+       FN(sock_ops_cb_flags_set),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -969,6 +978,7 @@ struct bpf_sock_ops {
                                 */
        __u32 snd_cwnd;
        __u32 srtt_us;          /* Averaged RTT << 3 in usecs */
+       __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
 };
 
 /* List of known BPF sock_ops operators.
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index b4a4f64..d1df2f6 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -268,4 +268,9 @@ struct tcp_diag_md5sig {
        __u8    tcpm_key[TCP_MD5SIG_MAXKEYLEN];
 };
 
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_ALL_CB_FLAGS       0              /* Mask of all currently
+                                                        * supported cb flags
+                                                        */
+
 #endif /* _UAPI_LINUX_TCP_H */
diff --git a/net/core/filter.c b/net/core/filter.c
index c356ec0..6936d19 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3328,6 +3328,33 @@ static const struct bpf_func_proto bpf_getsockopt_proto 
= {
        .arg5_type      = ARG_CONST_SIZE,
 };
 
+BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
+          int, argval)
+{
+       struct sock *sk = bpf_sock->sk;
+       int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
+
+       if (!sk_fullsock(sk))
+               return -EINVAL;
+
+#ifdef CONFIG_INET
+       if (val)
+               tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
+
+       return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
+#else
+       return -EINVAL;
+#endif
+}
+
+static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
+       .func           = bpf_sock_ops_cb_flags_set,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -3510,6 +3537,8 @@ static const struct bpf_func_proto *
                return &bpf_setsockopt_proto;
        case BPF_FUNC_getsockopt:
                return &bpf_getsockopt_proto;
+       case BPF_FUNC_sock_ops_cb_flags_set:
+               return &bpf_sock_ops_cb_flags_set_proto;
        case BPF_FUNC_sock_map_update:
                return &bpf_sock_map_update_proto;
        default:
@@ -4546,6 +4575,11 @@ static u32 sock_ops_convert_ctx_access(enum 
bpf_access_type type,
        case offsetof(struct bpf_sock_ops, srtt_us):
                SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
                break;
+
+       case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
+               SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
+                                  struct tcp_sock);
+               break;
        }
        return insn - insn_buf;
 }
-- 
2.9.5

Reply via email to