Hello, David (dlg@) asked me to shrink the scope of the change. The new diff introduces a mutex to pf_state and adjust pf_detach_state() to utilize the newly introduced mutex. I've also added annotation to pf_state members. The members with '?' needs our attention. We need to verify if they are either protected by global state lock, or if we cover them by newly introduced mutex.
new diff also adds mtx_init() to pf_state_import(). This was missing in my earlier diff. It was found and reported by Hrvoje@. thanks and regards sashan --------8<---------------8<---------------8<------------------8<-------- diff --git a/sys/net/pf.c b/sys/net/pf.c index 2c13c99baac..c6022867dd4 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -185,7 +185,8 @@ int pf_translate_icmp_af(struct pf_pdesc*, int, void *); void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, sa_family_t, struct pf_rule *, u_int); void pf_detach_state(struct pf_state *); -void pf_state_key_detach(struct pf_state *, int); +void pf_state_key_detach(struct pf_state *, + struct pf_state_key *); u_int32_t pf_tcp_iss(struct pf_pdesc *); void pf_rule_to_actions(struct pf_rule *, struct pf_rule_actions *); @@ -779,7 +780,8 @@ pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) s->key[idx] = sk; if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { - pf_state_key_detach(s, idx); + pf_state_key_detach(s, s->key[idx]); + s->key[idx] = NULL; return (-1); } si->s = s; @@ -799,42 +801,50 @@ pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) void pf_detach_state(struct pf_state *s) { - if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) - s->key[PF_SK_WIRE] = NULL; + struct pf_state_key *key[2]; + + mtx_enter(&s->mtx); + key[PF_SK_WIRE] = s->key[PF_SK_WIRE]; + key[PF_SK_STACK] = s->key[PF_SK_STACK]; + s->key[PF_SK_WIRE] = NULL; + s->key[PF_SK_STACK] = NULL; + mtx_leave(&s->mtx); + + if (key[PF_SK_WIRE] == key[PF_SK_STACK]) + key[PF_SK_WIRE] = NULL; - if (s->key[PF_SK_STACK] != NULL) - pf_state_key_detach(s, PF_SK_STACK); + if (key[PF_SK_STACK] != NULL) + pf_state_key_detach(s, key[PF_SK_STACK]); - if (s->key[PF_SK_WIRE] != NULL) - pf_state_key_detach(s, PF_SK_WIRE); + if (key[PF_SK_WIRE] != NULL) + pf_state_key_detach(s, key[PF_SK_WIRE]); } void -pf_state_key_detach(struct pf_state *s, int idx) +pf_state_key_detach(struct pf_state *s, struct pf_state_key *key) { struct pf_state_item *si; - struct pf_state_key *sk; - if (s->key[idx] == NULL) + PF_STATE_ASSERT_LOCKED(); + + if (key == NULL) return; - si = TAILQ_FIRST(&s->key[idx]->states); + si = TAILQ_FIRST(&key->states); while (si && si->s != s) si = TAILQ_NEXT(si, entry); if (si) { - TAILQ_REMOVE(&s->key[idx]->states, si, entry); + TAILQ_REMOVE(&key->states, si, entry); pool_put(&pf_state_item_pl, si); } - sk = s->key[idx]; - s->key[idx] = NULL; - if (TAILQ_EMPTY(&sk->states)) { - RB_REMOVE(pf_state_tree, &pf_statetbl, sk); - sk->removed = 1; - pf_state_key_unlink_reverse(sk); - pf_state_key_unlink_inpcb(sk); - pf_state_key_unref(sk); + if (TAILQ_EMPTY(&key->states)) { + RB_REMOVE(pf_state_tree, &pf_statetbl, key); + key->removed = 1; + pf_state_key_unlink_reverse(key); + pf_state_key_unlink_inpcb(key); + pf_state_key_unref(key); } } @@ -997,7 +1007,9 @@ pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, } *skw = s->key[PF_SK_WIRE]; if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { - pf_state_key_detach(s, PF_SK_WIRE); + pf_state_key_detach(s, s->key[PF_SK_WIRE]); + s->key[PF_SK_WIRE] = NULL; + *skw = NULL; PF_STATE_EXIT_WRITE(); return (-1); } @@ -1429,6 +1441,7 @@ pf_state_import(const struct pfsync_state *sp, int flags) st->sync_state = PFSYNC_S_NONE; refcnt_init(&st->refcnt); + mtx_init(&st->mtx, IPL_NET); /* XXX when we have anchors, use STATE_INC_COUNTERS */ r->states_cur++; @@ -4348,6 +4361,7 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, * pf_state_inserts() grabs reference for pfsync! */ refcnt_init(&s->refcnt); + mtx_init(&s->mtx, IPL_NET); switch (pd->proto) { case IPPROTO_TCP: diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index cc6c9f3dedc..9a75f9a1e6c 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -743,37 +743,49 @@ struct pf_state_cmp { u_int8_t pad[3]; }; +/* + * pf_state structure members require protection + * either by state lock (a global rw-lock) or + * needs to be protected by mutex which is part + * of state. The annotation goes as follows: + * - stateRW: means protected by exclusive state lock + * - mtx: protected by mutex (pf_state::state_mtx) + * - syncMTX: protected by mutex in pfsync + * - I: immutable since creation + * - ?: needs to be revisited (require mtx). + */ struct pf_state { - u_int64_t id; - u_int32_t creatorid; - u_int8_t direction; + u_int64_t id; /* I */ + u_int32_t creatorid; /* I */ + u_int8_t direction; /* I */ u_int8_t pad[3]; - TAILQ_ENTRY(pf_state) sync_list; - TAILQ_ENTRY(pf_state) sync_snap; - TAILQ_ENTRY(pf_state) entry_list; - SLIST_ENTRY(pf_state) gc_list; - RB_ENTRY(pf_state) entry_id; + TAILQ_ENTRY(pf_state) sync_list; /* syncMTX */ + TAILQ_ENTRY(pf_state) sync_snap; /* syncMTX */ + TAILQ_ENTRY(pf_state) entry_list; /* stateRW */ + SLIST_ENTRY(pf_state) gc_list; /* syncMTX */ + RB_ENTRY(pf_state) entry_id; /* stateRW */ struct pf_state_peer src; struct pf_state_peer dst; - struct pf_rule_slist match_rules; - union pf_rule_ptr rule; - union pf_rule_ptr anchor; - union pf_rule_ptr natrule; - struct pf_addr rt_addr; - struct pf_sn_head src_nodes; - struct pf_state_key *key[2]; /* addresses stack and wire */ - struct pfi_kif *kif; - u_int64_t packets[2]; - u_int64_t bytes[2]; - int32_t creation; - int32_t expire; - int32_t pfsync_time; - int rtableid[2]; /* rtables stack and wire */ - u_int16_t qid; - u_int16_t pqid; - u_int16_t tag; - u_int16_t state_flags; + struct pf_rule_slist match_rules; /* I */ + union pf_rule_ptr rule; /* I */ + union pf_rule_ptr anchor; /* I */ + union pf_rule_ptr natrule; /* I */ + struct pf_addr rt_addr; /* I */ + struct pf_sn_head src_nodes; /* I */ + struct pf_state_key *key[2]; /* mtx, stack and wire */ + struct pfi_kif *kif; /* I */ + struct mutex mtx; + u_int64_t packets[2]; /* ? */ + u_int64_t bytes[2]; /* ? */ + int32_t creation; /* I */ + int32_t expire; /* ? */ + int32_t pfsync_time; /* ? */ + int rtableid[2]; /* I rtables stack and wire */ + u_int16_t qid; /* I */ + u_int16_t pqid; /* I */ + u_int16_t tag; /* I */ + u_int16_t state_flags; /* ? */ #define PFSTATE_ALLOWOPTS 0x0001 #define PFSTATE_SLOPPY 0x0002 #define PFSTATE_PFLOW 0x0004 @@ -787,20 +799,20 @@ struct pf_state { #define PFSTATE_INP_UNLINKED 0x0400 #define PFSTATE_SCRUBMASK (PFSTATE_NODF|PFSTATE_RANDOMID|PFSTATE_SCRUB_TCP) #define PFSTATE_SETMASK (PFSTATE_SETTOS|PFSTATE_SETPRIO) - u_int8_t log; - u_int8_t timeout; - u_int8_t sync_state; /* PFSYNC_S_x */ - u_int8_t sync_updates; - u_int8_t min_ttl; - u_int8_t set_tos; - u_int8_t set_prio[2]; - u_int16_t max_mss; - u_int16_t if_index_in; - u_int16_t if_index_out; + u_int8_t log; /* I */ + u_int8_t timeout; /* ? */ + u_int8_t sync_state; /* ? PFSYNC_S_x */ + u_int8_t sync_updates; /* ? */ + u_int8_t min_ttl; /* I */ + u_int8_t set_tos; /* I */ + u_int8_t set_prio[2]; /* I */ + u_int16_t max_mss; /* I */ + u_int16_t if_index_in; /* I */ + u_int16_t if_index_out; /* I */ pf_refcnt_t refcnt; - u_int16_t delay; - u_int8_t rt; - u_int8_t snapped; + u_int16_t delay; /* I */ + u_int8_t rt; /* I */ + u_int8_t snapped; /* syncMTX */ }; /*