Hi, to step into and out of pf(4) anchors, pf(4) uses a temporary stack that is a global variable. Now once we run pf_test_rule() in parallel and an anchor is evaluated in parallel, the global stack would be used concurrently, which can lead to panics. To solve this issue this diff allocates a per-cpu stack using the cpumem API.
Opinions? Patrick diff --git a/sys/net/pf.c b/sys/net/pf.c index 3ddcd7726d2..78316ae0009 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -119,12 +119,7 @@ u_char pf_tcp_secret[16]; int pf_tcp_secret_init; int pf_tcp_iss_off; -struct pf_anchor_stackframe { - struct pf_ruleset *rs; - struct pf_rule *r; - struct pf_anchor_node *parent; - struct pf_anchor *child; -} pf_anchor_stack[64]; +struct cpumem *pf_anchor_stacks; struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; @@ -3003,22 +2998,26 @@ void pf_step_into_anchor(int *depth, struct pf_ruleset **rs, struct pf_rule **r, struct pf_rule **a) { + struct pf_anchor_stack *s; struct pf_anchor_stackframe *f; - if (*depth >= sizeof(pf_anchor_stack) / - sizeof(pf_anchor_stack[0])) { + s = cpumem_enter(pf_anchor_stacks); + + if (*depth >= sizeof(*s) / sizeof(s->frame[0])) { log(LOG_ERR, "pf: anchor stack overflow\n"); *r = TAILQ_NEXT(*r, entries); + cpumem_leave(pf_anchor_stacks, s); return; } else if (a != NULL) *a = *r; - f = pf_anchor_stack + (*depth)++; + f = s->frame + (*depth)++; f->rs = *rs; f->r = *r; if ((*r)->anchor_wildcard) { f->parent = &(*r)->anchor->children; if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) { *r = NULL; + cpumem_leave(pf_anchor_stacks, s); return; } *rs = &f->child->ruleset; @@ -3028,19 +3027,23 @@ pf_step_into_anchor(int *depth, struct pf_ruleset **rs, *rs = &(*r)->anchor->ruleset; } *r = TAILQ_FIRST((*rs)->rules.active.ptr); + cpumem_leave(pf_anchor_stacks, s); } int pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, struct pf_rule **r, struct pf_rule **a, int *match) { + struct pf_anchor_stack *s; struct pf_anchor_stackframe *f; int quick = 0; + s = cpumem_enter(pf_anchor_stacks); + do { if (*depth <= 0) break; - f = pf_anchor_stack + *depth - 1; + f = s->frame + *depth - 1; if (f->parent != NULL && f->child != NULL) { f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); if (f->child != NULL) { @@ -3066,6 +3069,7 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, *r = TAILQ_NEXT(f->r, entries); } while (*r == NULL); + cpumem_leave(pf_anchor_stacks, s); return (quick); } diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c index 56a43a55ab8..869ca3eaa1d 100644 --- a/sys/net/pf_ioctl.c +++ b/sys/net/pf_ioctl.c @@ -161,6 +161,10 @@ pfattach(int num) IPL_SOFTNET, 0, "pfruleitem", NULL); pool_init(&pf_queue_pl, sizeof(struct pf_queuespec), 0, IPL_SOFTNET, 0, "pfqueue", NULL); + + pf_anchor_stacks = cpumem_malloc(sizeof(struct pf_anchor_stack), + M_TEMP); + hfsc_initialize(); pfr_initialize(); pfi_initialize(); diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 00e9b790a91..62a183727b3 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1872,6 +1872,19 @@ void pf_send_tcp(const struct pf_rule *, sa_family_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, u_int16_t, u_int); +struct pf_anchor_stackframe { + struct pf_ruleset *rs; + struct pf_rule *r; + struct pf_anchor_node *parent; + struct pf_anchor *child; +}; + +struct pf_anchor_stack { + struct pf_anchor_stackframe frame[64]; +}; + +struct cpumem; +extern struct cpumem *pf_anchor_stacks; #endif /* _KERNEL */ #endif /* _NET_PFVAR_H_ */