BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

Two helper functions bpf_compute_and_save_data_pointers() and
bpf_restore_data_pointers() are introduced. There are used in
__cgroup_bpf_run_filter_skb(), to compute proper data_end for the
BPF program, and restore original data afterwards.

Signed-off-by: Song Liu <songliubrav...@fb.com>
---
 include/linux/filter.h | 24 ++++++++++++++++++++++++
 kernel/bpf/cgroup.c    |  6 ++++++
 net/core/filter.c      | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5771874bc01e..96b3ee7f14c9 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,6 +548,30 @@ static inline void bpf_compute_data_pointers(struct 
sk_buff *skb)
        cb->data_end  = skb->data + skb_headlen(skb);
 }
 
+/* Similar to bpf_compute_data_pointers(), except that save orginal
+ * data in cb->data and cb->meta_data for restore.
+ */
+static inline void bpf_compute_and_save_data_pointers(
+       struct sk_buff *skb, void *saved_pointers[2])
+{
+       struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+       saved_pointers[0] = cb->data_meta;
+       saved_pointers[1] = cb->data_end;
+       cb->data_meta = skb->data - skb_metadata_len(skb);
+       cb->data_end  = skb->data + skb_headlen(skb);
+}
+
+/* Restore data saved by bpf_compute_data_pointers(). */
+static inline void bpf_restore_data_pointers(
+       struct sk_buff *skb, void *saved_pointers[2])
+{
+       struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+       cb->data_meta = saved_pointers[0];
+       cb->data_end = saved_pointers[1];;
+}
+
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 {
        /* eBPF programs may read/write skb->cb[] area to transfer meta
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..5f5180104ddc 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -554,6 +554,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
        unsigned int offset = skb->data - skb_network_header(skb);
        struct sock *save_sk;
        struct cgroup *cgrp;
+       void *saved_pointers[2];
        int ret;
 
        if (!sk || !sk_fullsock(sk))
@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
        save_sk = skb->sk;
        skb->sk = sk;
        __skb_push(skb, offset);
+
+       /* compute pointers for the bpf prog */
+       bpf_compute_and_save_data_pointers(skb, saved_pointers);
+
        ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
                                 bpf_prog_run_save_cb);
+       bpf_restore_data_pointers(skb, saved_pointers);
        __skb_pull(skb, offset);
        skb->sk = save_sk;
        return ret == 1 ? 0 : -EPERM;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a3ac6c46873..e3ca30bd6840 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5346,6 +5346,40 @@ static bool sk_filter_is_valid_access(int off, int size,
        return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+                                  enum bpf_access_type type,
+                                  const struct bpf_prog *prog,
+                                  struct bpf_insn_access_aux *info)
+{
+       switch (off) {
+       case bpf_ctx_range(struct __sk_buff, tc_classid):
+       case bpf_ctx_range(struct __sk_buff, data_meta):
+       case bpf_ctx_range(struct __sk_buff, flow_keys):
+               return false;
+       }
+       if (type == BPF_WRITE) {
+               switch (off) {
+               case bpf_ctx_range(struct __sk_buff, mark):
+               case bpf_ctx_range(struct __sk_buff, priority):
+               case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+                       break;
+               default:
+                       return false;
+               }
+       }
+
+       switch (off) {
+       case bpf_ctx_range(struct __sk_buff, data):
+               info->reg_type = PTR_TO_PACKET;
+               break;
+       case bpf_ctx_range(struct __sk_buff, data_end):
+               info->reg_type = PTR_TO_PACKET_END;
+               break;
+       }
+
+       return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
                                enum bpf_access_type type,
                                const struct bpf_prog *prog,
@@ -7038,7 +7072,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
        .get_func_proto         = cg_skb_func_proto,
-       .is_valid_access        = sk_filter_is_valid_access,
+       .is_valid_access        = cg_skb_is_valid_access,
        .convert_ctx_access     = bpf_convert_ctx_access,
 };
 
-- 
2.17.1

Reply via email to