Hello,

below is a diff which hopes to fix the issue. Although diff is fairly
large the change itself is kind of straightforward. Let me briefly
explain what's going on here. Diff introduces a mutex to pf_state,
which protects array of keys (pf_state::key) bound to state.

The panic which diff below hopes to fix is caused by a race between timer
thread, which expires state and pfsync dispatch task, which updates a peer.
According to data provided by Hrvoje we panic due to NULL pointer dereference
in pf_state_export(), which finds sk->key[] to be NULL. This may happen because
purge state mechanism detaches state key from state under protection
of PF_STATE_LOCK, while pfsync dispatch task just keeps a reference to state
without using a PF_STATE_LOCK to access a state instance.

In order to synchronize access to pf_statey::key between purge thread
and pfsync dispatch task diff below introduces pf_state::mtx.
pfsync uses pf_state::mtx to attempt to grab references to keys bound
to state, while purge task uses mtx to safely invalidate state keys
in pf_detach_state().

Such change requires pfsync(4) to deal with situation when state
got detached while waiting in dispatch queue to update a peer.
We have to a .write() operation on sync-queue to indicate a failure
so pfsync_sendout() will just skip the state when processing dispatch
queue.

Also diff changes pf_state_key_detach() such caller must pass pointer to state
key instead of key index to be detached from state.  It also requires caller to
invalidate a state key entry in pf_state::key member.

I've just smoked tested the diff _without_ pfsync.

thanks and
regards
sashan

--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c
index c78ca62766e..02d45673e20 100644
--- a/sys/net/if_pfsync.c
+++ b/sys/net/if_pfsync.c
@@ -157,16 +157,16 @@ struct {
 };
 
 struct pfsync_q {
-       void            (*write)(struct pf_state *, void *);
+       int             (*write)(struct pf_state *, void *);
        size_t          len;
        u_int8_t        action;
 };
 
 /* we have one of these for every PFSYNC_S_ */
-void   pfsync_out_state(struct pf_state *, void *);
-void   pfsync_out_iack(struct pf_state *, void *);
-void   pfsync_out_upd_c(struct pf_state *, void *);
-void   pfsync_out_del(struct pf_state *, void *);
+int    pfsync_out_state(struct pf_state *, void *);
+int    pfsync_out_iack(struct pf_state *, void *);
+int    pfsync_out_upd_c(struct pf_state *, void *);
+int    pfsync_out_del(struct pf_state *, void *);
 
 struct pfsync_q pfsync_qs[] = {
        { pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
@@ -516,10 +516,10 @@ pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
        return (0);
 }
 
-void
+int
 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
 {
-       pf_state_export(sp, st);
+       return (pf_state_export(sp, st));
 }
 
 int
@@ -1529,24 +1529,25 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
        return (0);
 }
 
-void
+int
 pfsync_out_state(struct pf_state *st, void *buf)
 {
        struct pfsync_state *sp = buf;
 
-       pfsync_state_export(sp, st);
+       return (pfsync_state_export(sp, st));
 }
 
-void
+int
 pfsync_out_iack(struct pf_state *st, void *buf)
 {
        struct pfsync_ins_ack *iack = buf;
 
        iack->id = st->id;
        iack->creatorid = st->creatorid;
+       return (0);
 }
 
-void
+int
 pfsync_out_upd_c(struct pf_state *st, void *buf)
 {
        struct pfsync_upd_c *up = buf;
@@ -1557,9 +1558,10 @@ pfsync_out_upd_c(struct pf_state *st, void *buf)
        pf_state_peer_hton(&st->dst, &up->dst);
        up->creatorid = st->creatorid;
        up->timeout = st->timeout;
+       return (0);
 }
 
-void
+int
 pfsync_out_del(struct pf_state *st, void *buf)
 {
        struct pfsync_del_c *dp = buf;
@@ -1568,6 +1570,7 @@ pfsync_out_del(struct pf_state *st, void *buf)
        dp->creatorid = st->creatorid;
 
        SET(st->state_flags, PFSTATE_NOSYNC);
+       return (0);
 }
 
 void
@@ -1881,8 +1884,8 @@ pfsync_sendout(void)
                        KASSERT(st->snapped == 1);
                        st->sync_state = PFSYNC_S_NONE;
                        st->snapped = 0;
-                       pfsync_qs[q].write(st, m->m_data + offset);
-                       offset += pfsync_qs[q].len;
+                       if (pfsync_qs[q].write(st, m->m_data + offset) == 0)
+                               offset += pfsync_qs[q].len;
 
                        pf_state_unref(st);
                        count++;
diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h
index bee6c77f228..4440a561854 100644
--- a/sys/net/if_pfsync.h
+++ b/sys/net/if_pfsync.h
@@ -326,7 +326,7 @@ int                 pfsync_sysctl(int *, u_int,  void *, 
size_t *,
 #define        PFSYNC_SI_CKSUM         0x02
 #define        PFSYNC_SI_ACK           0x04
 int                    pfsync_state_import(struct pfsync_state *, int);
-void                   pfsync_state_export(struct pfsync_state *,
+int                    pfsync_state_export(struct pfsync_state *,
                            struct pf_state *);
 
 void                   pfsync_insert_state(struct pf_state *);
diff --git a/sys/net/pf.c b/sys/net/pf.c
index 93fe5702625..c76f4c71263 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -185,7 +185,8 @@ int                  pf_translate_icmp_af(struct pf_pdesc*, 
int, void *);
 void                    pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int,
                            sa_family_t, struct pf_rule *, u_int);
 void                    pf_detach_state(struct pf_state *);
-void                    pf_state_key_detach(struct pf_state *, int);
+void                    pf_state_key_detach(struct pf_state *,
+                           struct pf_state_key *);
 u_int32_t               pf_tcp_iss(struct pf_pdesc *);
 void                    pf_rule_to_actions(struct pf_rule *,
                            struct pf_rule_actions *);
@@ -260,6 +261,9 @@ void                         
pf_state_key_unlink_inpcb(struct pf_state_key *);
 void                    pf_inpcb_unlink_state_key(struct inpcb *);
 void                    pf_pktenqueue_delayed(void *);
 int32_t                         pf_state_expires(const struct pf_state *, 
uint8_t);
+void                    pf_state_keys_take(struct pf_state *,
+                           struct pf_state_key **);
+void                    pf_state_keys_rele(struct pf_state_key **);
 
 #if NPFLOG > 0
 void                    pf_log_matches(struct pf_pdesc *, struct pf_rule *,
@@ -778,7 +782,8 @@ pf_state_key_attach(struct pf_state_key *sk, struct 
pf_state *s, int idx)
                s->key[idx] = sk;
 
        if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) {
-               pf_state_key_detach(s, idx);
+               pf_state_key_detach(s, s->key[idx]);
+               s->key[idx] = NULL;
                return (-1);
        }
        si->s = s;
@@ -798,42 +803,50 @@ pf_state_key_attach(struct pf_state_key *sk, struct 
pf_state *s, int idx)
 void
 pf_detach_state(struct pf_state *s)
 {
-       if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
-               s->key[PF_SK_WIRE] = NULL;
+       struct pf_state_key *key[2];
+
+       mtx_enter(&s->mtx);
+       key[PF_SK_WIRE] = s->key[PF_SK_WIRE];
+       key[PF_SK_STACK] = s->key[PF_SK_STACK];
+       s->key[PF_SK_WIRE] = NULL;
+       s->key[PF_SK_STACK] = NULL;
+       mtx_leave(&s->mtx);
+
+       if (key[PF_SK_WIRE] == key[PF_SK_STACK])
+               key[PF_SK_WIRE] = NULL;
 
-       if (s->key[PF_SK_STACK] != NULL)
-               pf_state_key_detach(s, PF_SK_STACK);
+       if (key[PF_SK_STACK] != NULL)
+               pf_state_key_detach(s, key[PF_SK_STACK]);
 
-       if (s->key[PF_SK_WIRE] != NULL)
-               pf_state_key_detach(s, PF_SK_WIRE);
+       if (key[PF_SK_WIRE] != NULL)
+               pf_state_key_detach(s, key[PF_SK_WIRE]);
 }
 
 void
-pf_state_key_detach(struct pf_state *s, int idx)
+pf_state_key_detach(struct pf_state *s, struct pf_state_key *key)
 {
        struct pf_state_item    *si;
-       struct pf_state_key     *sk;
 
-       if (s->key[idx] == NULL)
+       PF_STATE_ASSERT_LOCKED();
+
+       if (key == NULL)
                return;
 
-       si = TAILQ_FIRST(&s->key[idx]->states);
+       si = TAILQ_FIRST(&key->states);
        while (si && si->s != s)
            si = TAILQ_NEXT(si, entry);
 
        if (si) {
-               TAILQ_REMOVE(&s->key[idx]->states, si, entry);
+               TAILQ_REMOVE(&key->states, si, entry);
                pool_put(&pf_state_item_pl, si);
        }
 
-       sk = s->key[idx];
-       s->key[idx] = NULL;
-       if (TAILQ_EMPTY(&sk->states)) {
-               RB_REMOVE(pf_state_tree, &pf_statetbl, sk);
-               sk->removed = 1;
-               pf_state_key_unlink_reverse(sk);
-               pf_state_key_unlink_inpcb(sk);
-               pf_state_key_unref(sk);
+       if (TAILQ_EMPTY(&key->states)) {
+               RB_REMOVE(pf_state_tree, &pf_statetbl, key);
+               key->removed = 1;
+               pf_state_key_unlink_reverse(key);
+               pf_state_key_unlink_inpcb(key);
+               pf_state_key_unref(key);
        }
 }
 
@@ -996,7 +1009,9 @@ pf_state_insert(struct pfi_kif *kif, struct pf_state_key 
**skw,
                }
                *skw = s->key[PF_SK_WIRE];
                if (pf_state_key_attach(*sks, s, PF_SK_STACK)) {
-                       pf_state_key_detach(s, PF_SK_WIRE);
+                       pf_state_key_detach(s, s->key[PF_SK_WIRE]);
+                       s->key[PF_SK_WIRE] = NULL;
+                       *skw = NULL;
                        PF_STATE_EXIT_WRITE();
                        return (-1);
                }
@@ -1185,30 +1200,35 @@ pf_find_state_all(struct pf_state_key_cmp *key, u_int 
dir, int *more)
        return (ret ? ret->s : NULL);
 }
 
-void
+int
 pf_state_export(struct pfsync_state *sp, struct pf_state *st)
 {
        int32_t expire;
+       struct pf_state_key *key[2] = { NULL, NULL };
 
        memset(sp, 0, sizeof(struct pfsync_state));
 
        /* copy from state key */
-       sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
-       sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
-       sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
-       sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
-       sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain);
-       sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af;
-       sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
-       sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
-       sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
-       sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
-       sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain);
-       sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af;
+       pf_state_keys_take(st, key);
+       if ((key[PF_SK_WIRE] == NULL) || (key[PF_SK_STACK] == NULL))
+               return (-1);
+
+       sp->key[PF_SK_WIRE].addr[0] = key[PF_SK_WIRE]->addr[0];
+       sp->key[PF_SK_WIRE].addr[1] = key[PF_SK_WIRE]->addr[1];
+       sp->key[PF_SK_WIRE].port[0] = key[PF_SK_WIRE]->port[0];
+       sp->key[PF_SK_WIRE].port[1] = key[PF_SK_WIRE]->port[1];
+       sp->key[PF_SK_WIRE].rdomain = htons(key[PF_SK_WIRE]->rdomain);
+       sp->key[PF_SK_WIRE].af = key[PF_SK_WIRE]->af;
+       sp->key[PF_SK_STACK].addr[0] = key[PF_SK_STACK]->addr[0];
+       sp->key[PF_SK_STACK].addr[1] = key[PF_SK_STACK]->addr[1];
+       sp->key[PF_SK_STACK].port[0] = key[PF_SK_STACK]->port[0];
+       sp->key[PF_SK_STACK].port[1] = key[PF_SK_STACK]->port[1];
+       sp->key[PF_SK_STACK].rdomain = htons(key[PF_SK_STACK]->rdomain);
+       sp->key[PF_SK_STACK].af = key[PF_SK_STACK]->af;
        sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]);
        sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]);
-       sp->proto = st->key[PF_SK_WIRE]->proto;
-       sp->af = st->key[PF_SK_WIRE]->af;
+       sp->proto = key[PF_SK_WIRE]->proto;
+       sp->af = key[PF_SK_WIRE]->af;
 
        /* copy from state */
        strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
@@ -1255,6 +1275,10 @@ pf_state_export(struct pfsync_state *sp, struct pf_state 
*st)
        sp->set_tos = st->set_tos;
        sp->set_prio[0] = st->set_prio[0];
        sp->set_prio[1] = st->set_prio[1];
+
+       pf_state_keys_rele(key);
+
+       return (0);
 }
 
 /* END state table stuff */
@@ -4108,6 +4132,7 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, 
struct pf_rule *a,
         * pf_state_inserts() grabs reference for pfsync!
         */
        refcnt_init(&s->refcnt);
+       mtx_init(&s->mtx, IPL_NET);
 
        switch (pd->proto) {
        case IPPROTO_TCP:
@@ -7789,3 +7814,19 @@ pf_pktenqueue_delayed(void *arg)
 
        pool_put(&pf_pktdelay_pl, pdy);
 }
+
+void
+pf_state_keys_take(struct pf_state *st, struct pf_state_key *keys[])
+{
+       mtx_enter(&st->mtx);
+       keys[PF_SK_WIRE] = pf_state_key_ref(st->key[PF_SK_WIRE]);
+       keys[PF_SK_STACK] = pf_state_key_ref(st->key[PF_SK_STACK]);
+       mtx_leave(&st->mtx);
+}
+
+void
+pf_state_keys_rele(struct pf_state_key *keys[])
+{
+       pf_state_key_unref(keys[PF_SK_WIRE]);
+       pf_state_key_unref(keys[PF_SK_STACK]);
+}
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 93394e6da5c..4604869ede3 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -763,6 +763,7 @@ struct pf_state {
        struct pf_sn_head        src_nodes;
        struct pf_state_key     *key[2];        /* addresses stack and wire  */
        struct pfi_kif          *kif;
+       struct mutex             mtx;
        u_int64_t                packets[2];
        u_int64_t                bytes[2];
        int32_t                  creation;
@@ -1737,7 +1738,7 @@ void                               
pf_state_rm_src_node(struct pf_state *,
 extern struct pf_state         *pf_find_state_byid(struct pf_state_cmp *);
 extern struct pf_state         *pf_find_state_all(struct pf_state_key_cmp *,
                                    u_int, int *);
-extern void                     pf_state_export(struct pfsync_state *,
+extern int                      pf_state_export(struct pfsync_state *,
                                    struct pf_state *);
 extern void                     pf_print_state(struct pf_state *);
 extern void                     pf_print_flags(u_int8_t);

Reply via email to