pfsync deferrals are used so if you have firewalls that could both
process packets, you defer sending the initial packet in state so the
peer can learn about the state before potentially handling packets for
it.

there are three ways that a deferal can end. the preferred one is if a
peer firewall acks the insert of the state, and then this firewall can
send the packet. the second is if the number of deferalls gets too high,
it tries to pop the earliest deferal and push the original packet on
without waiting for the firewall. the third is a timeout expires cos the
peer firewall didn't ack in time, so we push the packet on.

every deferal has its own timeout at the moment. unfortunately there
isnt good coordination between the three different paths that could
clean up the deferal, so you can get a fault like this one:

kernel page fault
uvm_fault(0xffffffff82131f60, 0x7, 0, 2) -> e
pfsync_defer_tmo(fffffd816d66d0d0) at pfsync_defer_tmo+0x41
end trace frame: 0xffff800024cf2130, count: 0
ddb{0}> tr
pfsync_defer_tmo(fffffd816d66d0d0) at pfsync_defer_tmo+0x41
softclock_thread(ffff8000ffffefc0) at softclock_thread+0x169
end trace frame: 0x0, count: -2

basically two paths are cleaning up a deferal and one of them ends up
doing a use after free. the timeout handler in this one.

this diff moves things around so there's a single timeout that pfsync
processes a list of deferrals from. this allows us to say that a
deferall is pending if it is on the list, and whoever takes it off the
list is responsible for handling it to completion. the reasoning about
memory lifetimes is a lot easier.

we've been running this for a week in production, and it's been good so
far.

i hate the timersub/timercmp macros, so i was going to implement this
with a uint64_t and getnanosecuptime, but i can do that later.

ok?

Index: if_pfsync.c
===================================================================
RCS file: /cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.288
diff -u -p -r1.288 if_pfsync.c
--- if_pfsync.c 10 Mar 2021 10:21:48 -0000      1.288
+++ if_pfsync.c 4 Jun 2021 00:31:56 -0000
@@ -187,7 +187,7 @@ struct pfsync_deferral {
        TAILQ_ENTRY(pfsync_deferral)             pd_entry;
        struct pf_state                         *pd_st;
        struct mbuf                             *pd_m;
-       struct timeout                           pd_tmo;
+       struct timeval                           pd_deadline;
 };
 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
 
@@ -223,6 +223,7 @@ struct pfsync_softc {
        struct pfsync_deferrals  sc_deferrals;
        u_int                    sc_deferred;
        struct mutex             sc_deferrals_mtx;
+       struct timeout           sc_deferrals_tmo;
 
        void                    *sc_plus;
        size_t                   sc_pluslen;
@@ -273,7 +274,7 @@ void        pfsync_ifdetach(void *);
 
 void   pfsync_deferred(struct pf_state *, int);
 void   pfsync_undefer(struct pfsync_deferral *, int);
-void   pfsync_defer_tmo(void *);
+void   pfsync_deferrals_tmo(void *);
 
 void   pfsync_cancel_full_update(struct pfsync_softc *);
 void   pfsync_request_full_update(struct pfsync_softc *);
@@ -346,6 +347,7 @@ pfsync_clone_create(struct if_clone *ifc
        mtx_init(&sc->sc_upd_req_mtx, IPL_SOFTNET);
        TAILQ_INIT(&sc->sc_deferrals);
        mtx_init(&sc->sc_deferrals_mtx, IPL_SOFTNET);
+       timeout_set_proc(&sc->sc_deferrals_tmo, pfsync_deferrals_tmo, sc);
        task_set(&sc->sc_ltask, pfsync_syncdev_state, sc);
        task_set(&sc->sc_dtask, pfsync_ifdetach, sc);
        sc->sc_deferred = 0;
@@ -1931,6 +1933,9 @@ pfsync_defer(struct pf_state *st, struct
 {
        struct pfsync_softc *sc = pfsyncif;
        struct pfsync_deferral *pd;
+       struct timeval now;
+       unsigned int sched;
+       static const struct timeval defer = { 0, 20000 };
 
        NET_ASSERT_LOCKED();
 
@@ -1942,10 +1947,12 @@ pfsync_defer(struct pf_state *st, struct
        if (sc->sc_deferred >= 128) {
                mtx_enter(&sc->sc_deferrals_mtx);
                pd = TAILQ_FIRST(&sc->sc_deferrals);
-               TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
-               sc->sc_deferred--;
+               if (pd != NULL) {
+                       TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+                       sc->sc_deferred--;
+               }
                mtx_leave(&sc->sc_deferrals_mtx);
-               if (timeout_del(&pd->pd_tmo))
+               if (pd != NULL)
                        pfsync_undefer(pd, 0);
        }
 
@@ -1959,13 +1966,18 @@ pfsync_defer(struct pf_state *st, struct
        pd->pd_st = pf_state_ref(st);
        pd->pd_m = m;
 
+       getmicrouptime(&now);
+       timeradd(&now, &defer, &pd->pd_deadline);
+
        mtx_enter(&sc->sc_deferrals_mtx);
-       sc->sc_deferred++;
+       sched = TAILQ_EMPTY(&sc->sc_deferrals);
+
        TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
+       sc->sc_deferred++;
        mtx_leave(&sc->sc_deferrals_mtx);
 
-       timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd);
-       timeout_add_msec(&pd->pd_tmo, 20);
+       if (sched)
+               timeout_add_tv(&sc->sc_deferrals_tmo, &defer);
 
        schednetisr(NETISR_PFSYNC);
 
@@ -2047,17 +2059,44 @@ pfsync_undefer(struct pfsync_deferral *p
 }
 
 void
-pfsync_defer_tmo(void *arg)
+pfsync_deferrals_tmo(void *arg)
 {
-       struct pfsync_softc *sc = pfsyncif;
-       struct pfsync_deferral *pd = arg;
+       struct pfsync_softc *sc = arg;
+       struct pfsync_deferral *pd;
+       struct timeval now, tv;
+       struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds);
 
        mtx_enter(&sc->sc_deferrals_mtx);
-       TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
-       sc->sc_deferred--;
+       for (;;) {
+               pd = TAILQ_FIRST(&sc->sc_deferrals);
+               if (pd == NULL)
+                       break;
+
+               if (timercmp(&now, &pd->pd_deadline, <)) {
+                       timersub(&now, &pd->pd_deadline, &tv);
+                       break;
+               }
+
+               TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+               sc->sc_deferred--;
+               TAILQ_INSERT_TAIL(&pds, pd, pd_entry);
+       }
        mtx_leave(&sc->sc_deferrals_mtx);
+
+       if (pd != NULL) {
+               /* we were looking at a pd, but it wasn't old enough */
+               timeout_add_tv(&sc->sc_deferrals_tmo, &tv);
+       }
+
+       if (TAILQ_EMPTY(&pds))
+               return;
+
        NET_LOCK();
-       pfsync_undefer(pd, 0);
+       while ((pd = TAILQ_FIRST(&pds)) != NULL) {
+               TAILQ_REMOVE(&pds, pd, pd_entry);
+
+               pfsync_undefer(pd, 0);
+       }
        NET_UNLOCK();
 }
 
@@ -2072,17 +2111,15 @@ pfsync_deferred(struct pf_state *st, int
        mtx_enter(&sc->sc_deferrals_mtx);
        TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
                 if (pd->pd_st == st) {
-                       if (timeout_del(&pd->pd_tmo)) {
-                               TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
-                               sc->sc_deferred--;
-                               mtx_leave(&sc->sc_deferrals_mtx);
-                               pfsync_undefer(pd, drop);
-                       } else
-                               mtx_leave(&sc->sc_deferrals_mtx);
-                       return;
+                       TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+                       sc->sc_deferred--;
+                       break;
                }
        }
        mtx_leave(&sc->sc_deferrals_mtx);
+
+       if (pd != NULL)
+               pfsync_undefer(pd, drop);
 }
 
 void

Reply via email to