Adds a bpf helper, bpf_skb_in_cgroup, to decide if a skb->sk
belongs to a descendant of a cgroup2.  It is similar to the
feature added in netfilter:
commit c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match")

The user is expected to populate a BPF_MAP_TYPE_CGROUP_ARRAY
which will be used by the bpf_skb_in_cgroup.

Modifications to the bpf verifier is to ensure BPF_MAP_TYPE_CGROUP_ARRAY
and bpf_skb_in_cgroup() are always used together.

Signed-off-by: Martin KaFai Lau <ka...@fb.com>
Cc: Alexei Starovoitov <a...@fb.com>
Cc: Daniel Borkmann <dan...@iogearbox.net>
Cc: Tejun Heo <t...@kernel.org>
Acked-by: Alexei Starovoitov <a...@kernel.org>
---
 include/uapi/linux/bpf.h | 12 ++++++++++++
 kernel/bpf/verifier.c    |  8 ++++++++
 net/core/filter.c        | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ef4e386..bad309f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -314,6 +314,18 @@ enum bpf_func_id {
         */
        BPF_FUNC_skb_get_tunnel_opt,
        BPF_FUNC_skb_set_tunnel_opt,
+
+       /**
+        * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+        * @skb: pointer to skb
+        * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+        * @index: index of the cgroup in the bpf_map
+        * Return:
+        *   == 0 skb failed the cgroup2 descendant test
+        *   == 1 skb succeeded the cgroup2 descendant test
+        *    < 0 error
+        */
+       BPF_FUNC_skb_in_cgroup,
        __BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 668e079..68753e0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1062,6 +1062,10 @@ static int check_map_func_compatibility(struct bpf_map 
*map, int func_id)
                if (func_id != BPF_FUNC_get_stackid)
                        goto error;
                break;
+       case BPF_MAP_TYPE_CGROUP_ARRAY:
+               if (func_id != BPF_FUNC_skb_in_cgroup)
+                       goto error;
+               break;
        default:
                break;
        }
@@ -1081,6 +1085,10 @@ static int check_map_func_compatibility(struct bpf_map 
*map, int func_id)
                if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
                        goto error;
                break;
+       case BPF_FUNC_skb_in_cgroup:
+               if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
+                       goto error;
+               break;
        default:
                break;
        }
diff --git a/net/core/filter.c b/net/core/filter.c
index df6860c..a16f7d2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2024,6 +2024,42 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
        }
 }
 
+#ifdef CONFIG_CGROUPS
+static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+       struct sk_buff *skb = (struct sk_buff *)(long)r1;
+       struct bpf_map *map = (struct bpf_map *)(long)r2;
+       struct bpf_array *array = container_of(map, struct bpf_array, map);
+       struct cgroup *cgrp;
+       struct sock *sk;
+       u32 i = (u32)r3;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+
+       sk = skb->sk;
+       if (!sk || !sk_fullsock(sk))
+               return -ENOENT;
+
+       if (unlikely(i >= array->map.max_entries))
+               return -E2BIG;
+
+       cgrp = READ_ONCE(array->ptrs[i]);
+       if (unlikely(!cgrp))
+               return -ENOENT;
+
+       return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
+}
+
+static const struct bpf_func_proto bpf_skb_in_cgroup_proto = {
+       .func           = bpf_skb_in_cgroup,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_CONST_MAP_PTR,
+       .arg3_type      = ARG_ANYTHING,
+};
+#endif
+
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
@@ -2086,6 +2122,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
                return &bpf_get_route_realm_proto;
        case BPF_FUNC_perf_event_output:
                return bpf_get_event_output_proto();
+#ifdef CONFIG_CGROUPS
+       case BPF_FUNC_skb_in_cgroup:
+               return &bpf_skb_in_cgroup_proto;
+#endif
        default:
                return sk_filter_func_proto(func_id);
        }
-- 
2.5.1

Reply via email to