When run in server mode, the sample RDS program opens PF_RDS socket,
attaches ebpf program to RDS socket which then uses bpf_skb_next_frag
helper along with bpf tail calls to inspect skb linear and non-linear
data.

To ease testing, RDS client functionality is also added so that users
can generate RDS packet.

Run server:
[root@lab71 bpf]# ./rds_skb -s 192.168.3.71
running server in a loop
transport tcp
server bound to address: 192.168.3.71 port 4000
server listening on 192.168.3.71
192.168.3.71 received a packet from 192.168.3.71 of len 8192 cmsg len 0,
on port 52287
payload contains:30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f 40 41
42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f 50 51 52 53 54 55 56 57 58 59
5a 5b 5c 5d 5e 5f 60 61 62 63 64 65 66 67 68 69 6a 6b ...
server listening on 192.168.3.71

Run client:
[root@lab70 bpf]# ./rds_skb -s 192.168.3.71 -c 192.168.3.70
transport tcp
client bound to address: 192.168.3.71 port 47437
client sending 8192 byte message from 192.168.3.71 to 192.168.3.70 on
port 47437

bpf program output:
[root@lab71]# cat /sys/kernel/debug/tracing/trace_pipe
          <idle>-0     [000] ..s. 218923.839673: 0: 30 31 32
          <idle>-0     [000] ..s. 218923.839682: 0: 33 34 35
          <idle>-0     [000] ..s. 218923.845133: 0: be bf c0
          <idle>-0     [000] ..s. 218923.845135: 0: c1 c2 c3
          <idle>-0     [000] ..s. 218923.850581: 0: be bf c0
          <idle>-0     [000] ..s. 218923.850582: 0: c1 c2 c3
          <idle>-0     [000] ..s. 218923.850582: 0: no more skb frag

Note: changing MTU to 9000 help assure that RDS get skb with
fragments.

Signed-off-by: Tushar Dave <tushar.n.d...@oracle.com>
Reviewed-by: Shannon Nelson <shannon.nel...@oracle.com>
Reviewed-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
---
 samples/bpf/Makefile       |   3 +
 samples/bpf/rds_skb_kern.c |  87 +++++++++++++
 samples/bpf/rds_skb_user.c | 311 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 401 insertions(+)
 create mode 100644 samples/bpf/rds_skb_kern.c
 create mode 100644 samples/bpf/rds_skb_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 62a99ab..a05c3b2 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -51,6 +51,7 @@ hostprogs-y += cpustat
 hostprogs-y += xdp_adjust_tail
 hostprogs-y += xdpsock
 hostprogs-y += xdp_fwd
+hostprogs-y += rds_skb
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o
 xdp_adjust_tail-objs := xdp_adjust_tail_user.o
 xdpsock-objs := bpf_load.o xdpsock_user.o
 xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
+rds_skb-objs := bpf_load.o rds_skb_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -160,6 +162,7 @@ always += cpustat_kern.o
 always += xdp_adjust_tail_kern.o
 always += xdpsock_kern.o
 always += xdp_fwd_kern.o
+always += rds_skb_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/rds_skb_kern.c b/samples/bpf/rds_skb_kern.c
new file mode 100644
index 0000000..c8832d4
--- /dev/null
+++ b/samples/bpf/rds_skb_kern.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/filter.h>
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <linux/rds.h>
+#include "bpf_helpers.h"
+
+
+#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
+
+#define bpf_printk(fmt, ...)                           \
+({                                                     \
+       char ____fmt[] = fmt;                           \
+       bpf_trace_printk(____fmt, sizeof(____fmt),      \
+                       ##__VA_ARGS__);                 \
+})
+
+
+struct bpf_map_def SEC("maps") jmp_table = {
+       .type = BPF_MAP_TYPE_PROG_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(u32),
+       .max_entries = 2,
+};
+
+#define FRAG 1
+
+static inline void dump_skb(struct __sk_buff *skb)
+{
+       void *data = (void *)(long) skb->data_meta;
+       void *data_end = (void *)(long) skb->data_end;
+       unsigned char *d;
+
+       if (data + 6 > data_end)
+               return;
+
+       d = (unsigned char *)data;
+       bpf_printk("%x %x %x\n", d[0], d[1], d[2]);
+       bpf_printk("%x %x %x\n", d[3], d[4], d[5]);
+       return;
+}
+
+static void populate_skb_frags(struct __sk_buff *skb)
+{
+       int ret;
+
+       ret = bpf_next_skb_frag(skb);
+       if (ret == -ENODATA) {
+               bpf_printk("no more skb frag\n");
+               return;
+       }
+
+       bpf_tail_call(skb, &jmp_table, 1);
+}
+
+/* walk skb frag */
+
+PROG(FRAG)(struct __sk_buff *skb)
+{
+       dump_skb(skb);
+       populate_skb_frags(skb);
+       return 0;
+}
+
+SEC("socket/0")
+int main_prog(struct __sk_buff *skb)
+{
+       void *data = (void *)(long) skb->data;
+       void *data_end = (void *)(long) skb->data_end;
+       int ret;
+       unsigned char *d;
+
+       if (data + 6 > data_end) {
+               bpf_printk("out\n");
+               return 0;
+       }
+
+       d = (unsigned char *)data;
+       bpf_printk("%x %x %x\n", d[0], d[1], d[2]);
+       bpf_printk("%x %x %x\n", d[3], d[4], d[5]);
+
+       populate_skb_frags(skb);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/rds_skb_user.c b/samples/bpf/rds_skb_user.c
new file mode 100644
index 0000000..9f73dc3
--- /dev/null
+++ b/samples/bpf/rds_skb_user.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <arpa/inet.h>
+#include <assert.h>
+#include "bpf_load.h"
+#include <getopt.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <limits.h>
+#include <linux/sockios.h>
+#include <linux/rds.h>
+#include <linux/errqueue.h>
+#include <linux/bpf.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#define TESTPORT       4000
+#define BUFSIZE                8192
+
+static const char *trans2str(int trans)
+{
+       switch (trans) {
+       case RDS_TRANS_TCP:
+               return ("tcp");
+       case RDS_TRANS_NONE:
+               return ("none");
+       default:
+               return ("unknown");
+       }
+}
+
+static int gettransport(int sock)
+{
+       int err;
+       char val;
+       socklen_t len = sizeof(int);
+
+       err = getsockopt(sock, SOL_RDS, SO_RDS_TRANSPORT,
+                        (char *)&val, &len);
+       if (err < 0) {
+               fprintf(stderr, "%s: getsockopt %s\n",
+                       __func__, strerror(errno));
+               return err;
+       }
+       return (int)val;
+}
+
+static int settransport(int sock, int transport)
+{
+       int err;
+
+       err = setsockopt(sock, SOL_RDS, SO_RDS_TRANSPORT,
+                        (char *)&transport, sizeof(transport));
+       if (err < 0) {
+               fprintf(stderr, "could not set transport %s, %s\n",
+                       trans2str(transport), strerror(errno));
+       }
+       return err;
+}
+
+static void print_sock_local_info(int fd, char *str, struct sockaddr_in *ret)
+{
+       socklen_t sin_size = sizeof(struct sockaddr_in);
+       struct sockaddr_in sin;
+       int err;
+
+       err = getsockname(fd, (struct sockaddr *)&sin, &sin_size);
+       if (err < 0) {
+               fprintf(stderr, "%s getsockname %s\n",
+                       __func__, strerror(errno));
+               return;
+       }
+       printf("%s address: %s port %d\n",
+               (str ? str : ""), inet_ntoa(sin.sin_addr), ntohs(sin.sin_port));
+
+       if (ret != NULL)
+               *ret = sin;
+}
+
+static void server(char *address, in_port_t port)
+{
+       struct sockaddr_in sin, din;
+       struct msghdr msg;
+       struct iovec *iov;
+       int rc, sock;
+       char *buf;
+
+       buf = calloc(BUFSIZE, sizeof(char));
+       if (!buf) {
+               fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno));
+               return;
+       }
+
+       sock = socket(PF_RDS, SOCK_SEQPACKET, 0);
+       if (sock < 0) {
+               fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
+               goto out;
+       }
+       if (settransport(sock, RDS_TRANS_TCP) < 0)
+               goto out;
+
+       printf("transport %s\n", trans2str(gettransport(sock)));
+
+       memset(&sin, 0, sizeof(sin));
+       sin.sin_family = AF_INET;
+       sin.sin_addr.s_addr = inet_addr(address);
+       sin.sin_port = htons(port);
+
+       rc = bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+       if (rc < 0) {
+               fprintf(stderr, "%s: bind %s\n", __func__, strerror(errno));
+               goto out;
+       }
+
+       /* attach eBPF program */
+       assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[1],
+                         sizeof(prog_fd[0])) == 0);
+
+       print_sock_local_info(sock, "server bound to", NULL);
+
+       iov = calloc(1, sizeof(struct iovec));
+       if (!iov) {
+               fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno));
+               goto out;
+       }
+
+       while (1) {
+               memset(buf, 0, BUFSIZE);
+               iov[0].iov_base = buf;
+               iov[0].iov_len = BUFSIZE;
+
+               memset(&msg, 0, sizeof(msg));
+               msg.msg_name = &din;
+               msg.msg_namelen = sizeof(din);
+               msg.msg_iov = iov;
+               msg.msg_iovlen = 1;
+
+               printf("server listening on %s\n", inet_ntoa(sin.sin_addr));
+
+               rc = recvmsg(sock, &msg, 0);
+               if (rc < 0) {
+                       fprintf(stderr, "%s: recvmsg %s\n",
+                               __func__, strerror(errno));
+                       break;
+               }
+
+               printf("%s received a packet from %s of len %d cmsg len %d, on 
port %d\n",
+                       inet_ntoa(sin.sin_addr),
+                       inet_ntoa(din.sin_addr),
+                       (uint32_t) iov[0].iov_len,
+                       (uint32_t) msg.msg_controllen,
+                       ntohs(din.sin_port));
+
+               {
+                       int i;
+
+                       printf("payload contains:");
+                       for (i = 0; i < 60; i++)
+                               printf("%x ", buf[i]);
+                       printf("...\n");
+               }
+       }
+       free(iov);
+out:
+       free(buf);
+}
+
+static void create_message(char *buf)
+{
+       unsigned int i;
+
+       for (i = 0; i < BUFSIZE; i++) {
+               buf[i] = i + 0x30;
+       }
+}
+
+static int build_rds_packet(struct msghdr *msg, char *buf)
+{
+       struct iovec *iov;
+
+       iov = calloc(1, sizeof(struct iovec));
+       if (!iov) {
+               fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno));
+               return -1;
+       }
+
+       msg->msg_iov = iov;
+       msg->msg_iovlen = 1;
+
+       iov[0].iov_base = buf;
+       iov[0].iov_len = BUFSIZE * sizeof(char);
+
+       return 0;
+}
+
+static void client(char *localaddr, char *remoteaddr, in_port_t server_port)
+{
+       struct sockaddr_in sin, din;
+       struct msghdr msg;
+       int rc, sock;
+       char *buf;
+
+       buf = calloc(BUFSIZE, sizeof(char));
+       if (!buf) {
+               fprintf(stderr, "%s: calloc %s\n", __func__, strerror(errno));
+               return;
+       }
+
+       create_message(buf);
+
+       sock = socket(PF_RDS, SOCK_SEQPACKET, 0);
+       if (sock < 0) {
+               fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
+               goto out;
+       }
+
+       if (settransport(sock, RDS_TRANS_TCP) < 0)
+               goto out;
+
+       printf("transport %s\n", trans2str(gettransport(sock)));
+
+       memset(&sin, 0, sizeof(sin));
+       sin.sin_family = AF_INET;
+       sin.sin_addr.s_addr = inet_addr(localaddr);
+       sin.sin_port = 0;
+
+       rc = bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+       if (rc < 0) {
+               fprintf(stderr, "%s: bind %s\n", __func__, strerror(errno));
+               goto out;
+       }
+       print_sock_local_info(sock, "client bound to",  &sin);
+
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_name = &din;
+       msg.msg_namelen = sizeof(din);
+
+       memset(&din, 0, sizeof(din));
+       din.sin_family = AF_INET;
+       din.sin_addr.s_addr = inet_addr(remoteaddr);
+       din.sin_port = htons(server_port);
+
+       rc = build_rds_packet(&msg, buf);
+       if (rc < 0)
+               goto out;
+
+       printf("client sending %d byte message from %s to %s on port %d\n",
+               (uint32_t) msg.msg_iov->iov_len, localaddr,
+               remoteaddr, ntohs(sin.sin_port));
+
+       rc = sendmsg(sock, &msg, 0);
+       if (rc < 0)
+               fprintf(stderr, "%s: sendmsg %s\n", __func__, strerror(errno));
+
+       if (msg.msg_control)
+               free(msg.msg_control);
+       if (msg.msg_iov)
+               free(msg.msg_iov);
+out:
+       free(buf);
+
+       return;
+}
+
+static void usage(char *progname)
+{
+       fprintf(stderr, "Usage %s [-s srvaddr] [-c clientaddr]\n", progname);
+}
+
+int main(int argc, char **argv)
+{
+       in_port_t server_port = TESTPORT;
+       char *serveraddr = NULL;
+       char *clientaddr = NULL;
+       char filename[256];
+       int opt;
+
+       while ((opt = getopt(argc, argv, "s:c:")) != -1) {
+               switch (opt) {
+               case 's':
+                       serveraddr = optarg;
+                       break;
+               case 'c':
+                       clientaddr = optarg;
+                       break;
+               default:
+                       usage(argv[0]);
+                       return 1;
+               }
+       }
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       if (load_bpf_file(filename)) {
+               fprintf(stderr, "Error: load_bpf_file %s", bpf_log_buf);
+               return 1;
+       }
+
+       if (serveraddr && !clientaddr) {
+               printf("running server in a loop\n");
+               server(serveraddr, server_port);
+       } else if (serveraddr && clientaddr) {
+               client(clientaddr, serveraddr, server_port);
+       }
+
+       return 0;
+}
-- 
1.8.3.1

Reply via email to