Today socket filter only deals with linear skbs. This change allows
ebpf programs to look into non-linear skb e.g. skb frags. This will be
useful when users need to look into data which is not contained in the
linear part of skb.

Signed-off-by: Tushar Dave <tushar.n.d...@oracle.com>
Reviewed-by: Shannon Nelson <shannon.nel...@oracle.com>
Reviewed-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
---
 include/linux/filter.h                    |  2 ++
 include/uapi/linux/bpf.h                  | 10 ++++++-
 net/core/filter.c                         | 44 +++++++++++++++++++++++++++++--
 tools/include/uapi/linux/bpf.h            | 10 ++++++-
 tools/testing/selftests/bpf/bpf_helpers.h |  2 ++
 5 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9dbcb9d..603b8bf 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -500,6 +500,7 @@ struct sk_filter {
 
 struct bpf_skb_data_end {
        struct qdisc_skb_cb qdisc_cb;
+       u8 index;
        void *data_meta;
        void *data_end;
 };
@@ -534,6 +535,7 @@ static inline void bpf_compute_data_pointers(struct sk_buff 
*skb)
        BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb));
        cb->data_meta = skb->data - skb_metadata_len(skb);
        cb->data_end  = skb->data + skb_headlen(skb);
+       cb->index = 0;
 }
 
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d94d333..5fe9668 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1902,6 +1902,13 @@ struct bpf_stack_build_id {
  *             egress otherwise). This is the only flag supported for now.
  *     Return
  *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_next_skb_frag(struct sk_buff *skb)
+ *     Description
+ *             This helper allows users to look into non-linear part of skb
+ *             e.g. skb frags.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -1976,7 +1983,8 @@ struct bpf_stack_build_id {
        FN(fib_lookup),                 \
        FN(sock_hash_update),           \
        FN(msg_redirect_hash),          \
-       FN(sk_redirect_hash),
+       FN(sk_redirect_hash),           \
+       FN(next_skb_frag),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 51ea7dd..fd8e90f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3752,6 +3752,38 @@ static unsigned long bpf_xdp_copy(void *dst_buff, const 
void *src_buff,
        .arg1_type      = ARG_PTR_TO_CTX,
 };
 
+BPF_CALL_1(bpf_next_skb_frag, struct sk_buff *, skb)
+{
+       struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+       const skb_frag_t *frag;
+
+       if (skb->data_len == 0)
+               return -ENODATA;
+
+       if (cb->index == (u8)skb_shinfo(skb)->nr_frags)
+               return -ENODATA;
+
+       /* get the frag start and end address into data_meta and data_end
+        * respectively so eBPF program can look into skb frag
+        */
+       frag = &skb_shinfo(skb)->frags[cb->index];
+       cb->data_meta = page_address(skb_frag_page(frag)) +
+                       frag->page_offset;
+       cb->data_end = cb->data_meta + skb_frag_size(frag);
+
+       /* update frag index */
+       cb->index++;
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_next_skb_frag_proto = {
+       .func           = bpf_next_skb_frag,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
 BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
           int, level, int, optname, char *, optval, int, optlen)
 {
@@ -4415,6 +4447,8 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct 
bpf_fib_lookup *params,
                return &bpf_get_socket_cookie_proto;
        case BPF_FUNC_get_socket_uid:
                return &bpf_get_socket_uid_proto;
+       case BPF_FUNC_next_skb_frag:
+               return &bpf_next_skb_frag_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -4698,10 +4732,16 @@ static bool sk_filter_is_valid_access(int off, int size,
                                      struct bpf_insn_access_aux *info)
 {
        switch (off) {
-       case bpf_ctx_range(struct __sk_buff, tc_classid):
        case bpf_ctx_range(struct __sk_buff, data):
-       case bpf_ctx_range(struct __sk_buff, data_meta):
+               info->reg_type = PTR_TO_PACKET;
+               break;
        case bpf_ctx_range(struct __sk_buff, data_end):
+               info->reg_type = PTR_TO_PACKET_END;
+               break;
+       case bpf_ctx_range(struct __sk_buff, data_meta):
+               info->reg_type = PTR_TO_PACKET;
+               break;
+       case bpf_ctx_range(struct __sk_buff, tc_classid):
        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
                return false;
        }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d94d333..5fe9668 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1902,6 +1902,13 @@ struct bpf_stack_build_id {
  *             egress otherwise). This is the only flag supported for now.
  *     Return
  *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_next_skb_frag(struct sk_buff *skb)
+ *     Description
+ *             This helper allows users to look into non-linear part of skb
+ *             e.g. skb frags.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -1976,7 +1983,8 @@ struct bpf_stack_build_id {
        FN(fib_lookup),                 \
        FN(sock_hash_update),           \
        FN(msg_redirect_hash),          \
-       FN(sk_redirect_hash),
+       FN(sk_redirect_hash),           \
+       FN(next_skb_frag),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h 
b/tools/testing/selftests/bpf/bpf_helpers.h
index 8f143df..51f2153 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -114,6 +114,8 @@ static int (*bpf_get_stack)(void *ctx, void *buf, int size, 
int flags) =
 static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
                             int plen, __u32 flags) =
        (void *) BPF_FUNC_fib_lookup;
+static unsigned long long (*bpf_next_skb_frag)(void *ctx) =
+       (void *) BPF_FUNC_next_skb_frag;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
-- 
1.8.3.1

Reply via email to