Hello,

I've managed to get pf_test() running in parallel on forwarding path in my
experimental tree. And there was some fall out. PF died on ASSERT() in
pf_state_key_link_reverse() at line 7371:

    7368 pf_state_key_link_reverse(struct pf_state_key *sk, ...)
    7369 {
    7370         /* Note that sk and skrev may be equal, then we ... */
    7371         KASSERT(sk->reverse == NULL);
    7372         KASSERT(skrev->reverse == NULL);
    7373         sk->reverse = pf_state_key_ref(skrev);
    7374         skrev->reverse = pf_state_key_ref(sk);
    7375 }


pf_state_key_link_reverse() function is being called from pf_find_state() here:

    1074         if (sk == NULL) {
    1075                 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
    1076                     (struct pf_state_key *)key)) == NULL)
    1077                         return (PF_DROP);
    1078                 if (pd->dir == PF_OUT && pkt_sk &&
    1079                     pf_compare_state_keys(pkt_sk, sk, ...) == 0)
    1080                         pf_state_key_link_reverse(sk, pkt_sk);
    1081                 else if (pd->dir == PF_OUT && pd->m->m_pkthdr.pf.inp &&
    1082                     !pd->m->m_pkthdr.pf.inp->inp_pf_sk && !sk->inp)
    1083                         pf_state_key_link_inpcb(sk, pd->m->m_pkt...);
    1084         }

the pf_find_state() performs a look up to PF state table and as such it runs
under PF_STATE_ENTER_READ() lock. So there might be actually more threads
running at pf_state_key_link_reverse() function. The thread, which currently
loses the race trips the assert in 7371. Patch below uses atomic ops
to handle the race properly.

OK?

thanks and
regards
sashan

--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/pf.c b/sys/net/pf.c
index 9e454e5c941..6e2ec19faaa 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -7367,11 +7367,20 @@ pf_inp_unlink(struct inpcb *inp)
 void
 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev)
 {
-       /* Note that sk and skrev may be equal, then we refcount twice. */
-       KASSERT(sk->reverse == NULL);
-       KASSERT(skrev->reverse == NULL);
-       sk->reverse = pf_state_key_ref(skrev);
-       skrev->reverse = pf_state_key_ref(sk);
+       struct pf_state_key *old_reverse;
+
+       old_reverse = atomic_cas_ptr(&sk->reverse, NULL, skrev);
+       if (old_reverse != NULL)
+               KASSERT(old_reverse == skrev);
+       else
+               pf_state_key_ref(skrev);
+
+
+       old_reverse = atomic_cas_ptr(&skrev->reverse, NULL, sk);
+       if (old_reverse != NULL)
+               KASSERT(old_reverse == sk);
+       else
+               pf_state_key_ref(sk);
 }
 
 #if NPFLOG > 0

Reply via email to