Hello Benjamin,

On Mon, Jul 05, 2021 at 04:44:55PM -0500, Warriner, Benjamin wrote:
> Synopsis: Kernel Panic not sure as to cause
> > Category: kernel panic
> > Environment:
> System : OpenBSD 6.9
> Details : OpenBSD 6.9 (GENERIC.MP) #3: Mon Jun 7 08:21:26 MDT 2021
> r...@syspatch-69-amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> 
> Architecture: OpenBSD.amd64
> Machine : amd64
> > Description:
> Kernel Panic'd on the 2nd of July and again today, appears to be similar
> issue?
> Starting stack trace...
> panic(ffffffff81e27b16) at panic+0x11d
> rw_enter(ffffffff820f40d0,1) at rw_enter+0x26f
> pf_test(18,2,ffff8000019d0800,ffff800023c5d940) at pf_test+0x1108
> ip6_output(fffffd8062ea1300,0,0,0,ffff800023c5d9f0,0) at ip6_output+0xcad
> nd6_ns_output(ffff8000019d0800,0,ffff800023c5dc90,fffffd833ddcd540,0) at
> nd6_ns_output+0x3db
> nd6_resolve(ffff8000019d0800,fffffd83d82527e0,fffffd806743fa00,ffff800023c5dc88,ffff800023c5dc08)
> at nd6_resolve+0x2a7
> ether_resolve(ffff8000019d0800,fffffd806743fa00,ffff800023c5dc88,fffffd83d82527e0,ffff800023c5dc08)
> at ether_resolve+0x23b
> ether_output(ffff8000019d0800,fffffd806743fa00,ffff800023c5dc88,fffffd83d82527e0)
> at ether_output+0x2c
> ip6_output(fffffd806743fa00,0,0,0,0,0) at ip6_output+0x10bc
> pfsync_undefer_notify(fffffd82f7be24e8) at pfsync_undefer_notify+0xae
> pfsync_undefer(fffffd82f7be24e8,0) at pfsync_undefer+0xa8
> pfsync_defer(fffffd8325b24090,fffffd8060597000) at pfsync_defer+0x10e
> pf_test_rule(ffff800023c5e0f0,ffff800023c5e1d8,ffff800023c5e1e0,ffff800023c5e1d0,ffff800023c5e1c0,ffff800023c5e1ee)
> at pf_test_rule+0x680
> pf_test(2,3,ffff8000019d7800,ffff800023c5e2e0) at pf_test+0x1073
> ip_output(fffffd8060597000,0,ffff800023c5e470,1,0,0) at ip_output+0x7b6
> ip_forward(fffffd8060597000,ffff8000013b4048,fffffd83d022fe20,0) at
> ip_forward+0x261
> ip_input_if(ffff800023c5e5a8,ffff800023c5e5b4,4,0,ffff8000013b4048) at
> ip_input_if+0x600
> ipv4_input(ffff8000013b4048,fffffd8060597000) at ipv4_input+0x39
> if_input_process(ffff8000013b4048,ffff800023c5e628) at if_input_process+0x6f
> ifiq_process(ffff8000013b4458) at ifiq_process+0x69
> taskq_thread(ffff800000030080) at taskq_thread+0x81
> end trace frame: 0x0, count: 236
> End of stack trace.

    It looks like you are hitting the same bug, which has been reported
    by David Gwynne few weeks back. I believe patch [1], which I sent to
    tech@ should help. It will be great if you would kindly test it.
    I'm attaching the same diff here.

    the panic stack above tells us the system dies, when CPU attempts
    to acquire PF_LOCK() it holds already. Note there are two calls
    to pf_test() in the stack above.

    diff below moves call to pfsync_undefer() to pf_test(). The
    pfsync_undefer() will get called _after_ we drop PF_LOCK().
    diff below still needs OKs to get committed.

thanks and
regards
sashan

[1] https://marc.info/?l=openbsd-tech&m=162388474129236&w=2

[2] https://marc.info/?t=162127422600001&r=1&w=2

--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c
index 744a7ca9dbc..11b4e3f1f2f 100644
--- a/sys/net/if_pfsync.c
+++ b/sys/net/if_pfsync.c
@@ -1931,7 +1931,7 @@ pfsync_insert_state(struct pf_state *st)
 }
 
 int
-pfsync_defer(struct pf_state *st, struct mbuf *m)
+pfsync_defer(struct pf_state *st, struct mbuf *m, struct pfsync_deferral **ppd)
 {
        struct pfsync_softc *sc = pfsyncif;
        struct pfsync_deferral *pd;
@@ -1944,21 +1944,30 @@ pfsync_defer(struct pf_state *st, struct mbuf *m)
            m->m_flags & (M_BCAST|M_MCAST))
                return (0);
 
+       pd = pool_get(&sc->sc_pool, M_NOWAIT);
+       if (pd == NULL)
+               return (0);
+
+       /*
+        * deferral queue grows faster, than timeout can consume,
+        * we have to ask packet (caller) to help timer and dispatch
+        * one deferral for us.
+        *
+        * We wish to call pfsync_undefer() here. Unfortunately we can't,
+        * because pfsync_undefer() will be calling to ip_output(),
+        * which in turn will call to pf_test(), which would then attempt
+        * to grab PF_LOCK() we currently hold.
+        */
        if (sc->sc_deferred >= 128) {
                mtx_enter(&sc->sc_deferrals_mtx);
-               pd = TAILQ_FIRST(&sc->sc_deferrals);
-               if (pd != NULL) {
-                       TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+               *ppd = TAILQ_FIRST(&sc->sc_deferrals);
+               if (*ppd != NULL) {
+                       TAILQ_REMOVE(&sc->sc_deferrals, *ppd, pd_entry);
                        sc->sc_deferred--;
                }
                mtx_leave(&sc->sc_deferrals_mtx);
-               if (pd != NULL)
-                       pfsync_undefer(pd, 0);
-       }
-
-       pd = pool_get(&sc->sc_pool, M_NOWAIT);
-       if (pd == NULL)
-               return (0);
+       } else
+               *ppd = NULL;
 
        m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
        SET(st->state_flags, PFSTATE_ACK);
diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h
index 2520c87a36c..037219537e8 100644
--- a/sys/net/if_pfsync.h
+++ b/sys/net/if_pfsync.h
@@ -297,6 +297,8 @@ enum pfsync_counters {
 
 extern struct cpumem *pfsynccounters;
 
+struct pfsync_deferral;
+
 static inline void
 pfsyncstat_inc(enum pfsync_counters c)
 {
@@ -335,7 +337,9 @@ void                        pfsync_clear_states(u_int32_t, 
const char *);
 void                   pfsync_update_tdb(struct tdb *, int);
 void                   pfsync_delete_tdb(struct tdb *);
 
-int                    pfsync_defer(struct pf_state *, struct mbuf *);
+int                    pfsync_defer(struct pf_state *, struct mbuf *,
+                           struct pfsync_deferral **);
+void                   pfsync_undefer(struct pfsync_deferral *, int);
 
 int                    pfsync_up(void);
 int                    pfsync_state_in_use(struct pf_state *);
diff --git a/sys/net/pf.c b/sys/net/pf.c
index 672f4159719..0672f5f6992 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -194,7 +194,8 @@ void                         pf_rule_to_actions(struct 
pf_rule *,
                            struct pf_rule_actions *);
 int                     pf_test_rule(struct pf_pdesc *, struct pf_rule **,
                            struct pf_state **, struct pf_rule **,
-                           struct pf_ruleset **, u_short *);
+                           struct pf_ruleset **, u_short *,
+                           void **);
 static __inline int     pf_create_state(struct pf_pdesc *, struct pf_rule *,
                            struct pf_rule *, struct pf_rule *,
                            struct pf_state_key **, struct pf_state_key **,
@@ -3808,7 +3809,8 @@ pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset 
*ruleset)
 
 int
 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
-    struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason)
+    struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason,
+    void **pdeferral)
 {
        struct pf_rule          *r = NULL;
        struct pf_rule          *a = NULL;
@@ -4041,7 +4043,8 @@ pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, 
struct pf_state **sm,
                 * firewall has to know about it to allow
                 * replies through it.
                 */
-               if (pfsync_defer(*sm, pd->m))
+               if (pfsync_defer(*sm, pd->m,
+                   (struct pfsync_deferral **)pdeferral))
                        return (PF_DEFER);
        }
 #endif /* NPFSYNC > 0 */
@@ -6890,6 +6893,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, 
struct mbuf **m0)
        int                      dir = (fwdir == PF_FWD) ? PF_OUT : fwdir;
        u_int32_t                qid, pqid = 0;
        int                      have_pf_lock = 0;
+       void                    *deferral = NULL;
 
        if (!pf_status.running)
                return (PF_PASS);
@@ -6992,7 +6996,8 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, 
struct mbuf **m0)
                 */
                PF_LOCK();
                have_pf_lock = 1;
-               action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason);
+               action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason,
+                   &deferral);
                s = pf_state_ref(s);
                if (action != PF_PASS)
                        REASON_SET(&reason, PFRES_FRAG);
@@ -7024,7 +7029,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, 
struct mbuf **m0)
                        PF_LOCK();
                        have_pf_lock = 1;
                        action = pf_test_rule(&pd, &r, &s, &a, &ruleset,
-                           &reason);
+                           &reason, &deferral);
                        s = pf_state_ref(s);
                }
                break;
@@ -7056,7 +7061,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, 
struct mbuf **m0)
                        PF_LOCK();
                        have_pf_lock = 1;
                        action = pf_test_rule(&pd, &r, &s, &a, &ruleset,
-                           &reason);
+                           &reason, &deferral);
                        s = pf_state_ref(s);
                }
                break;
@@ -7132,7 +7137,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, 
struct mbuf **m0)
                        PF_LOCK();
                        have_pf_lock = 1;
                        action = pf_test_rule(&pd, &r, &s, &a, &ruleset,
-                           &reason);
+                           &reason, &deferral);
                        s = pf_state_ref(s);
                }
 
@@ -7268,6 +7273,14 @@ done:
                m_freem(pd.m);
                /* FALLTHROUGH */
        case PF_DEFER:
+#if NPFSYNC > 0
+               /*
+                * We no longer hold PF_LOCK() here, so we can dispatch
+                * deferral if we are asked to do so.
+                */
+               if (deferral != NULL)
+                       pfsync_undefer(deferral, 0);
+#endif /* NPFSYNC > 0 */
                pd.m = NULL;
                action = PF_PASS;
                break;

Reply via email to