Re: [PATCH v2 net-next] bpf/verifier: track liveness for pruning
On 15/08/17 17:33, Daniel Borkmann wrote: > On 08/15/2017 03:53 PM, Edward Cree wrote: >> State of a register doesn't matter if it wasn't read in reaching an exit; >> a write screens off all reads downstream of it from all explored_states >> upstream of it. >> This allows us to prune many more branches; here are some processed insn >> counts for some Cilium programs: >> Program before after >> bpf_lb_opt_-DLB_L3.o 6515 3361 >> bpf_lb_opt_-DLB_L4.o 8976 5176 >> bpf_lb_opt_-DUNKNOWN.o 2960 1137 >> bpf_lxc_opt_-DDROP_ALL.o 95412 48537 >> bpf_lxc_opt_-DUNKNOWN.o 141706 78718 >> bpf_netdev.o 24251 17995 >> bpf_overlay.o 10999 9385 >> >> The runtime is also improved; here are 'time' results in ms: >> Program before after >> bpf_lb_opt_-DLB_L3.o 24 6 >> bpf_lb_opt_-DLB_L4.o 26 11 >> bpf_lb_opt_-DUNKNOWN.o 11 2 >> bpf_lxc_opt_-DDROP_ALL.o 1288139 >> bpf_lxc_opt_-DUNKNOWN.o1768234 >> bpf_netdev.o 62 31 >> bpf_overlay.o15 13 >> >> Signed-off-by: Edward Cree > [...] >> @@ -1639,10 +1675,13 @@ static int check_call(struct bpf_verifier_env *env, >> int func_id, int insn_idx) >> } >> >> /* reset caller saved regs */ >> -for (i = 0; i < CALLER_SAVED_REGS; i++) >> +for (i = 0; i < CALLER_SAVED_REGS; i++) { >> mark_reg_not_init(regs, caller_saved[i]); >> +check_reg_arg(env, i, DST_OP_NO_MARK); > > Ah, I oversaw that earlier, this needs to be: s/i/caller_saved[i]/ So it does. Of course testing didn't spot this, because caller_saved[i] == i currently. >> +} >> >> /* update return register */ >> +check_reg_arg(env, BPF_REG_0, DST_OP_NO_MARK); > > We could leave this for clarity, but ... Yeah, I'll replace it with a comment, like the other one. Thanks for review :) -Ed
Re: [PATCH v2 net-next] bpf/verifier: track liveness for pruning
On 08/15/2017 03:53 PM, Edward Cree wrote: State of a register doesn't matter if it wasn't read in reaching an exit; a write screens off all reads downstream of it from all explored_states upstream of it. This allows us to prune many more branches; here are some processed insn counts for some Cilium programs: Program before after bpf_lb_opt_-DLB_L3.o 6515 3361 bpf_lb_opt_-DLB_L4.o 8976 5176 bpf_lb_opt_-DUNKNOWN.o 2960 1137 bpf_lxc_opt_-DDROP_ALL.o 95412 48537 bpf_lxc_opt_-DUNKNOWN.o 141706 78718 bpf_netdev.o 24251 17995 bpf_overlay.o 10999 9385 The runtime is also improved; here are 'time' results in ms: Program before after bpf_lb_opt_-DLB_L3.o 24 6 bpf_lb_opt_-DLB_L4.o 26 11 bpf_lb_opt_-DUNKNOWN.o 11 2 bpf_lxc_opt_-DDROP_ALL.o 1288139 bpf_lxc_opt_-DUNKNOWN.o1768234 bpf_netdev.o 62 31 bpf_overlay.o15 13 Signed-off-by: Edward Cree [...] @@ -1639,10 +1675,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) } /* reset caller saved regs */ - for (i = 0; i < CALLER_SAVED_REGS; i++) + for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(regs, caller_saved[i]); + check_reg_arg(env, i, DST_OP_NO_MARK); Ah, I oversaw that earlier, this needs to be: s/i/caller_saved[i]/ + } /* update return register */ + check_reg_arg(env, BPF_REG_0, DST_OP_NO_MARK); We could leave this for clarity, but ... if (fn->ret_type == RET_INTEGER) { /* sets type to SCALAR_VALUE */ mark_reg_unknown(regs, BPF_REG_0); [...] /* reset caller saved regs to unreadable */ - for (i = 0; i < CALLER_SAVED_REGS; i++) + for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(regs, caller_saved[i]); + check_reg_arg(env, i, DST_OP_NO_MARK); caller_saved[i] + } /* mark destination R0 register as readable, since it contains -* the value fetched from the packet +* the value fetched from the packet. +* Already marked as written above. ... then it should be here as well. Other option is to leave out both BPF_REG_0 since covered by caller_saved[] already. */ mark_reg_unknown(regs, BPF_REG_0); return 0; @@ -3194,7 +3236,11 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, bool varlen_map_access, struct idpair *idmap) { - if (memcmp(rold, rcur, sizeof(*rold)) == 0) + if (!(rold->live & REG_LIVE_READ)) + /* explored state didn't use this */ + return true;
[PATCH v2 net-next] bpf/verifier: track liveness for pruning
State of a register doesn't matter if it wasn't read in reaching an exit; a write screens off all reads downstream of it from all explored_states upstream of it. This allows us to prune many more branches; here are some processed insn counts for some Cilium programs: Program before after bpf_lb_opt_-DLB_L3.o 6515 3361 bpf_lb_opt_-DLB_L4.o 8976 5176 bpf_lb_opt_-DUNKNOWN.o 2960 1137 bpf_lxc_opt_-DDROP_ALL.o 95412 48537 bpf_lxc_opt_-DUNKNOWN.o 141706 78718 bpf_netdev.o 24251 17995 bpf_overlay.o 10999 9385 The runtime is also improved; here are 'time' results in ms: Program before after bpf_lb_opt_-DLB_L3.o 24 6 bpf_lb_opt_-DLB_L4.o 26 11 bpf_lb_opt_-DUNKNOWN.o 11 2 bpf_lxc_opt_-DDROP_ALL.o 1288139 bpf_lxc_opt_-DUNKNOWN.o1768234 bpf_netdev.o 62 31 bpf_overlay.o15 13 Signed-off-by: Edward Cree --- v2: update liveness in LD_ABS|IND, as pointed out by Daniel Borkmann. The numbers are mostly unchanged; bpf_lxc_opt_-DUNKNOWN.o dropped about 300 insns and 20ms, while bpf_lxc_opt_-DDROP_ALL.o (despite not changing its #insns) also dropped 13ms. include/linux/bpf_verifier.h | 11 ++- kernel/bpf/verifier.c| 188 +-- 2 files changed, 156 insertions(+), 43 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c61c3033..91d07ef 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -21,6 +21,12 @@ */ #define BPF_MAX_VAR_SIZINT_MAX +enum bpf_reg_liveness { + REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ + REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */ + REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */ +}; + struct bpf_reg_state { enum bpf_reg_type type; union { @@ -40,7 +46,7 @@ struct bpf_reg_state { * came from, when one is tested for != NULL. */ u32 id; - /* These five fields must be last. See states_equal() */ + /* Ordering of fields matters. See states_equal() */ /* For scalar types (SCALAR_VALUE), this represents our knowledge of * the actual value. * For pointer types, this represents the variable part of the offset @@ -57,6 +63,8 @@ struct bpf_reg_state { s64 smax_value; /* maximum possible (s64)value */ u64 umin_value; /* minimum possible (u64)value */ u64 umax_value; /* maximum possible (u64)value */ + /* This field must be last, for states_equal() reasons. */ + enum bpf_reg_liveness live; }; enum bpf_stack_slot_type { @@ -74,6 +82,7 @@ struct bpf_verifier_state { struct bpf_reg_state regs[MAX_BPF_REG]; u8 stack_slot_type[MAX_BPF_STACK]; struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; + struct bpf_verifier_state *parent; }; /* linked list of verifier states used to prune search */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ecc590e..3affb8d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -629,8 +629,10 @@ static void init_reg_state(struct bpf_reg_state *regs) { int i; - for (i = 0; i < MAX_BPF_REG; i++) + for (i = 0; i < MAX_BPF_REG; i++) { mark_reg_not_init(regs, i); + regs[i].live = REG_LIVE_NONE; + } /* frame pointer */ regs[BPF_REG_FP].type = PTR_TO_STACK; @@ -647,9 +649,26 @@ enum reg_arg_type { DST_OP_NO_MARK /* same as above, check only, don't mark */ }; -static int check_reg_arg(struct bpf_reg_state *regs, u32 regno, +static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno) +{ + struct bpf_verifier_state *parent = state->parent; + + while (parent) { + /* if read wasn't screened by an earlier write ... */ + if (state->regs[regno].live & REG_LIVE_WRITTEN) + break; + /* ... then we depend on parent's value */ + parent->regs[regno].live |= REG_LIVE_READ; + state = parent; + parent = state->parent; + } +} + +static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, enum reg_arg_type t) { + struct bpf_reg_state *regs = env->cur_state.regs; + if (regno >= MAX_BPF_REG) { verbose("R%d is invalid\n", regno); return -EINVAL; @@ -661,12 +680,14 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno, verbose("R%d !read_ok\n", regno); return -EACCES; } + mark_reg_read(&env->cur_state, regno); } else { /* check whether register used as dest operand can be written to */ if (reg