From: Geliang Tang <tanggeli...@kylinos.cn>

This patch adds a "cgroup/getsockopt" way to inspect the subflows of an
MPTCP socket, and verify the modifications done by the same BPF program
in the previous commit: a different mark per subflow, and a different
TCP CC set on the second one. This new hook will be used by the next
commit to verify the socket options set on each subflow.

This extra "cgroup/getsockopt" prog walks the msk->conn_list and use
bpf_core_cast to cast a pointer for readonly. It allows to inspect all
the fields of a structure.

Note that on the kernel side, the MPTCP socket stores a list of subflows
under 'msk->conn_list'. They can be iterated using the generic 'list'
helpers. They have been imported here, with a small difference:
list_for_each_entry() uses 'can_loop' to limit the number of iterations,
and ease its use. Because only data need to be read here, it is enough
to use this technique. It is planned to use bpf_iter, when BPF programs
will be used to modify data from the different subflows.
mptcp_subflow_tcp_sock() and mptcp_for_each_stubflow() helpers have also
be imported.

Suggested-by: Martin KaFai Lau <martin....@kernel.org>
Signed-off-by: Geliang Tang <tanggeli...@kylinos.cn>
Reviewed-by: Matthieu Baerts (NGI0) <matt...@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matt...@kernel.org>
---
Notes:
  - v5: new patch, instead of using 'ss' in the following patch
  - v7: use 'can_loop' instead of 'cond_break'. (Martin)
---
 MAINTAINERS                                       |  2 +-
 tools/testing/selftests/bpf/progs/mptcp_bpf.h     | 42 ++++++++++++++
 tools/testing/selftests/bpf/progs/mptcp_subflow.c | 69 +++++++++++++++++++++++
 3 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 
3bce6cc05553dad53db5f06d36e6957061886dd0..8817aa26b2fc0ba3581576d040f5093124cc60a7
 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16097,7 +16097,7 @@ F:      include/net/mptcp.h
 F:     include/trace/events/mptcp.h
 F:     include/uapi/linux/mptcp*.h
 F:     net/mptcp/
-F:     tools/testing/selftests/bpf/*/*mptcp*.c
+F:     tools/testing/selftests/bpf/*/*mptcp*.[ch]
 F:     tools/testing/selftests/net/mptcp/
 
 NETWORKING [TCP]
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h 
b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
new file mode 100644
index 
0000000000000000000000000000000000000000..3b188ccdcc4041acb4f7ed38ae8ddf5a7305466a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __MPTCP_BPF_H__
+#define __MPTCP_BPF_H__
+
+#include "bpf_experimental.h"
+
+/* list helpers from include/linux/list.h */
+static inline int list_is_head(const struct list_head *list,
+                              const struct list_head *head)
+{
+       return list == head;
+}
+
+#define list_entry(ptr, type, member)                                  \
+       container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member)                            \
+       list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member)                                   \
+       list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member)                          \
+       list_is_head(&pos->member, (head))
+
+/* small difference: 'can_loop' has been added in the conditions */
+#define list_for_each_entry(pos, head, member)                         \
+       for (pos = list_first_entry(head, typeof(*pos), member);        \
+            !list_entry_is_head(pos, head, member) && can_loop;        \
+            pos = list_next_entry(pos, member))
+
+/* mptcp helpers from protocol.h */
+#define mptcp_for_each_subflow(__msk, __subflow)                       \
+       list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+
+static __always_inline struct sock *
+mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
+{
+       return subflow->tcp_sock;
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c 
b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index 
2e28f4a215b5469fcbc31168071887687ca34792..70302477e326eecaef6aad4ecf899aa3d6606f23
 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -4,10 +4,12 @@
 
 /* vmlinux.h, bpf_helpers.h and other 'define' */
 #include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
 
 char _license[] SEC("license") = "GPL";
 
 char cc[TCP_CA_NAME_MAX] = "reno";
+int pid;
 
 /* Associate a subflow counter to each token */
 struct {
@@ -57,3 +59,70 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
 
        return 1;
 }
+
+static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct 
bpf_sockopt *ctx)
+{
+       struct mptcp_subflow_context *subflow;
+       int i = 0;
+
+       mptcp_for_each_subflow(msk, subflow) {
+               struct sock *ssk;
+
+               ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+                                                          struct 
mptcp_subflow_context));
+
+               if (ssk->sk_mark != ++i) {
+                       ctx->retval = -2;
+                       break;
+               }
+       }
+
+       return 1;
+}
+
+static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct 
bpf_sockopt *ctx)
+{
+       struct mptcp_subflow_context *subflow;
+
+       mptcp_for_each_subflow(msk, subflow) {
+               struct inet_connection_sock *icsk;
+               struct sock *ssk;
+
+               ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+                                                          struct 
mptcp_subflow_context));
+               icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+               if (ssk->sk_mark == 2 &&
+                   __builtin_memcmp(icsk->icsk_ca_ops->name, cc, 
TCP_CA_NAME_MAX)) {
+                       ctx->retval = -2;
+                       break;
+               }
+       }
+
+       return 1;
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt_subflow(struct bpf_sockopt *ctx)
+{
+       struct bpf_sock *sk = ctx->sk;
+       struct mptcp_sock *msk;
+
+       if (bpf_get_current_pid_tgid() >> 32 != pid)
+               return 1;
+
+       if (!sk || sk->protocol != IPPROTO_MPTCP ||
+           (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) &&
+            !(ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)))
+               return 1;
+
+       msk = bpf_core_cast(sk, struct mptcp_sock);
+       if (msk->pm.subflows != 1) {
+               ctx->retval = -1;
+               return 1;
+       }
+
+       if (ctx->optname == SO_MARK)
+               return _check_getsockopt_subflow_mark(msk, ctx);
+       return _check_getsockopt_subflow_cc(msk, ctx);
+}

-- 
2.45.2


Reply via email to