BPF is becoming an OS all by itself. If we were to rewrite cake to be one instance per cpu across all subscribers, we might schedule packets via the rbtree facility, or perhaps a timerwheel.
---------- Forwarded message --------- From: Dave Marchevsky <davemarchev...@fb.com> Date: Sat, Dec 17, 2022 at 12:42 AM Subject: [PATCH v2 bpf-next 09/13] bpf: Special verifier handling for bpf_rbtree_{remove, first} To: <b...@vger.kernel.org> Cc: Alexei Starovoitov <a...@kernel.org>, Daniel Borkmann <dan...@iogearbox.net>, Andrii Nakryiko <and...@kernel.org>, Kernel Team <kernel-t...@fb.com>, Kumar Kartikeya Dwivedi <mem...@gmail.com>, Tejun Heo <t...@kernel.org>, Dave Marchevsky <davemarchev...@fb.com> Newly-added bpf_rbtree_{remove,first} kfuncs have some special properties that require handling in the verifier: * both bpf_rbtree_remove and bpf_rbtree_first return the type containing the bpf_rb_node field, with the offset set to that field's offset, instead of a struct bpf_rb_node * * mark_reg_graph_node helper added in previous patch generalizes this logic, use it * bpf_rbtree_remove's node input is a node that's been inserted in the tree - a non-owning reference. * bpf_rbtree_remove must invalidate non-owning references in order to avoid aliasing issue. Add KF_INVALIDATE_NON_OWN flag, which indicates that the marked kfunc is a non-owning ref invalidation point, and associated verifier logic using previously-added invalidate_non_owning_refs helper. * Unlike other functions, which convert one of their input arg regs to non-owning reference, bpf_rbtree_first takes no arguments and just returns a non-owning reference (possibly null) * For now verifier logic for this is special-cased instead of adding new kfunc flag. This patch, along with the previous one, complete special verifier handling for all rbtree API functions added in this series. Signed-off-by: Dave Marchevsky <davemarchev...@fb.com> --- include/linux/btf.h | 1 + kernel/bpf/helpers.c | 2 +- kernel/bpf/verifier.c | 34 ++++++++++++++++++++++++++++------ 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index 8aee3f7f4248..3663911bb7c0 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -72,6 +72,7 @@ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ #define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */ #define KF_RELEASE_NON_OWN (1 << 8) /* kfunc converts its referenced arg into non-owning ref */ +#define KF_INVALIDATE_NON_OWN (1 << 9) /* kfunc invalidates non-owning refs after return */ /* * Return the name of the passed struct, if exists, or halt the build if for diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index de4523c777b7..0e6d010e6423 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2121,7 +2121,7 @@ BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE) +BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_INVALIDATE_NON_OWN) BTF_ID_FLAGS(func, bpf_rbtree_add, KF_RELEASE | KF_RELEASE_NON_OWN) BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 75979f78399d..b4bf3701de7f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8393,6 +8393,11 @@ static bool is_kfunc_release_non_own(struct bpf_kfunc_call_arg_meta *meta) return meta->kfunc_flags & KF_RELEASE_NON_OWN; } +static bool is_kfunc_invalidate_non_own(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_INVALIDATE_NON_OWN; +} + static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta) { return meta->kfunc_flags & KF_TRUSTED_ARGS; @@ -9425,10 +9430,20 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ verbose(env, "arg#%d expected pointer to allocated object\n", i); return -EINVAL; } - if (!reg->ref_obj_id) { + if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) { + if (reg->ref_obj_id) { + verbose(env, "rbtree_remove node input must be non-owning ref\n"); + return -EINVAL; + } + if (in_rbtree_lock_required_cb(env)) { + verbose(env, "rbtree_remove not allowed in rbtree cb\n"); + return -EINVAL; + } + } else if (!reg->ref_obj_id) { verbose(env, "allocated object must be referenced\n"); return -EINVAL; } + ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta); if (ret < 0) return ret; @@ -9665,11 +9680,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) { struct btf_field *field = meta.arg_list_head.field; - mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; - regs[BPF_REG_0].btf = field->graph_root.btf; - regs[BPF_REG_0].btf_id = field->graph_root.value_btf_id; - regs[BPF_REG_0].off = field->graph_root.node_offset; + mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root); + } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] || + meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) { + struct btf_field *field = meta.arg_rbtree_root.field; + + mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root); } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED; @@ -9735,7 +9751,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (is_kfunc_ret_null(&meta)) regs[BPF_REG_0].id = id; regs[BPF_REG_0].ref_obj_id = id; + } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) { + ref_set_non_owning_lock(env, ®s[BPF_REG_0]); } + + if (is_kfunc_invalidate_non_own(&meta)) + invalidate_non_owning_refs(env, &env->cur_state->active_lock); + if (reg_may_point_to_spin_lock(®s[BPF_REG_0]) && !regs[BPF_REG_0].id) regs[BPF_REG_0].id = ++env->id_gen; } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ -- 2.30.2 -- This song goes out to all the folk that thought Stadia would work: https://www.linkedin.com/posts/dtaht_the-mushroom-song-activity-6981366665607352320-FXtz Dave Täht CEO, TekLibre, LLC _______________________________________________ LibreQoS mailing list LibreQoS@lists.bufferbloat.net https://lists.bufferbloat.net/listinfo/libreqos