Hello,

</snip>
> 
> Could you you make 2 definitions for the lock?  It doesn't make sense
> to enable them by default for now.  I'd like to see you diff committed
> now with empty defines and an easy way to enable it.
> 
> That means ok mpi@ if the defines to not take/release locks by default.
> 

    I introduce WITH_PF_LOCK compile time option. To enable PF_LOCK one has
    to add 'option WITH_PF_LOCK' to kernel build configuration (see config(8)
    for details). The PF_LOCK is disabled by default. Same goes to mutex,
    which protects the fragments. If WITH_PF_LOCK is not found the code
    builds without pf_frag_mtx.

    This is the diff to pf-lock.diff I've sent earlier [1], the updated diff
    I'm going to commit is attached to email.

--------8<---------------8<---------------8<------------------8<--------
diff -r cf7bdb0a6054 src/sys/net/pf_ioctl.c
--- a/src/sys/net/pf_ioctl.c    Tue May 30 20:27:39 2017 +0200
+++ b/src/sys/net/pf_ioctl.c    Wed May 31 09:40:33 2017 +0200
@@ -129,7 +129,9 @@ struct {
 TAILQ_HEAD(pf_tags, pf_tagname)        pf_tags = 
TAILQ_HEAD_INITIALIZER(pf_tags),
                                pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids);
 
+#ifdef WITH_PF_LOCK
 struct rwlock           pf_lock = RWLOCK_INITIALIZER("pf_lock");
+#endif /* WITH_PF_LOCK */
 
 #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
 #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
diff -r cf7bdb0a6054 src/sys/net/pf_norm.c
--- a/src/sys/net/pf_norm.c     Tue May 30 20:27:39 2017 +0200
+++ b/src/sys/net/pf_norm.c     Wed May 31 09:40:33 2017 +0200
@@ -136,11 +136,17 @@ struct pool                pf_frent_pl, pf_frag_pl;
 struct pool             pf_state_scrub_pl;
 int                     pf_nfrents;
 
+#ifdef WITH_PF_LOCK
 struct mutex            pf_frag_mtx;
 
 #define PF_FRAG_LOCK_INIT()    mtx_init(&pf_frag_mtx, IPL_SOFTNET)
 #define PF_FRAG_LOCK()         mtx_enter(&pf_frag_mtx)
 #define PF_FRAG_UNLOCK()       mtx_leave(&pf_frag_mtx)
+#else /* !WITH_PF_LOCK */
+#define PF_FRAG_LOCK_INIT()    (void)(0)
+#define PF_FRAG_LOCK()         (void)(0)
+#define PF_FRAG_UNLOCK()       (void)(0)
+#endif /* WITH_PF_LOCK */
 
 void
 pf_normalize_init(void)
diff -r cf7bdb0a6054 src/sys/net/pfvar_priv.h
--- a/src/sys/net/pfvar_priv.h  Tue May 30 20:27:39 2017 +0200
+++ b/src/sys/net/pfvar_priv.h  Wed May 31 09:40:33 2017 +0200
@@ -98,6 +98,7 @@ struct pf_pdesc {
        } hdr;
 };
 
+#ifdef WITH_PF_LOCK
 extern struct rwlock   pf_lock;
 
 #define PF_LOCK()              do {            \
@@ -120,6 +121,12 @@ extern struct rwlock       pf_lock;
                if (rw_status(&pf_lock) == RW_WRITE)    \
                        splassert_fail(0, rw_status(&pf_lock), __func__);\
        } while (0)
+#else /* !WITH_PF_LOCK */
+#define PF_LOCK()              (void)(0)
+#define PF_UNLOCK()            (void)(0)
+#define PF_ASSERT_LOCKED()     (void)(0)
+#define PF_ASSERT_UNLOCKED()   (void)(0)
+#endif /* WITH_PF_LOCK */
 
 #endif /* _KERNEL */
 
--------8<---------------8<---------------8<------------------8<--------

thanks and
regards
sasha

[1] 
http://openbsd-archive.7691.n7.nabble.com/let-s-add-PF-LOCK-td319624.html#a319661
diff -r 85b6b6ce74cd .hgtags
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgtags   Wed May 31 09:42:36 2017 +0200
@@ -0,0 +1,1 @@
+d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline
diff -r 85b6b6ce74cd src/sys/net/pf.c
--- a/src/sys/net/pf.c  Tue May 30 20:11:44 2017 +0200
+++ b/src/sys/net/pf.c  Wed May 31 09:42:36 2017 +0200
@@ -923,7 +923,7 @@ int
 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw,
     struct pf_state_key **sks, struct pf_state *s)
 {
-       NET_ASSERT_LOCKED();
+       PF_ASSERT_LOCKED();
 
        s->kif = kif;
        if (*skw == *sks) {
@@ -1186,7 +1186,7 @@ pf_purge_expired_rules(void)
 {
        struct pf_rule  *r;
 
-       NET_ASSERT_LOCKED();
+       PF_ASSERT_LOCKED();
 
        if (SLIST_EMPTY(&pf_rule_gcl))
                return;
@@ -1207,6 +1207,7 @@ pf_purge_thread(void *v)
                tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
 
                NET_LOCK(s);
+               PF_LOCK();
 
                /* process a fraction of the state table every second */
                pf_purge_expired_states(1 + (pf_status.states
@@ -1214,13 +1215,20 @@ pf_purge_thread(void *v)
 
                /* purge other expired types every PFTM_INTERVAL seconds */
                if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
-                       pf_purge_expired_fragments();
                        pf_purge_expired_src_nodes(0);
                        pf_purge_expired_rules();
+               }
+
+               PF_UNLOCK();
+               NET_UNLOCK(s);
+
+               /*
+                * Fragments don't require PF_LOCK(), they use their own mutex.
+                */
+               if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
+                       pf_purge_expired_fragments();
                        nloops = 0;
                }
-
-               NET_UNLOCK(s);
        }
 }
 
@@ -1267,7 +1275,7 @@ pf_purge_expired_src_nodes(void)
 {
        struct pf_src_node              *cur, *next;
 
-       NET_ASSERT_LOCKED();
+       PF_ASSERT_LOCKED();
 
        for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
        next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
@@ -1303,7 +1311,7 @@ pf_src_tree_remove_state(struct pf_state
 void
 pf_remove_state(struct pf_state *cur)
 {
-       NET_ASSERT_LOCKED();
+       PF_ASSERT_LOCKED();
 
        /* handle load balancing related tasks */
        pf_postprocess_addr(cur);
@@ -1320,7 +1328,6 @@ pf_remove_state(struct pf_state *cur)
        }
        RB_REMOVE(pf_state_tree_id, &tree_id, cur);
 #if NPFLOW > 0
-       if (cur->state_flags & PFSTATE_PFLOW)
                export_pflow(cur);
 #endif /* NPFLOW > 0 */
 #if NPFSYNC > 0
@@ -1350,7 +1357,7 @@ pf_free_state(struct pf_state *cur)
 {
        struct pf_rule_item *ri;
 
-       NET_ASSERT_LOCKED();
+       PF_ASSERT_LOCKED();
 
 #if NPFSYNC > 0
        if (pfsync_state_in_use(cur))
@@ -1386,7 +1393,7 @@ pf_purge_expired_states(u_int32_t maxche
        static struct pf_state  *cur = NULL;
        struct pf_state         *next;
 
-       NET_ASSERT_LOCKED();
+       PF_ASSERT_LOCKED();
 
        while (maxcheck--) {
                /* wrap to start of list when we hit the end */
@@ -3146,13 +3153,13 @@ pf_socket_lookup(struct pf_pdesc *pd)
        case IPPROTO_TCP:
                sport = pd->hdr.tcp.th_sport;
                dport = pd->hdr.tcp.th_dport;
-               NET_ASSERT_LOCKED();
+               PF_ASSERT_LOCKED();
                tb = &tcbtable;
                break;
        case IPPROTO_UDP:
                sport = pd->hdr.udp.uh_sport;
                dport = pd->hdr.udp.uh_dport;
-               NET_ASSERT_LOCKED();
+               PF_ASSERT_LOCKED();
                tb = &udbtable;
                break;
        default:
@@ -6673,6 +6680,7 @@ pf_test(sa_family_t af, int fwdir, struc
        /* if packet sits in reassembly queue, return without error */
        if (pd.m == NULL)
                return PF_PASS;
+
        if (action != PF_PASS) {
 #if NPFLOG > 0
                pd.pflog |= PF_LOG_FORCE;
@@ -6692,6 +6700,9 @@ pf_test(sa_family_t af, int fwdir, struc
        }
        pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED;
 
+       /* lock the look-up/write section of pf_test() */
+       PF_LOCK();
+
        switch (pd.virtual_proto) {
 
        case PF_VPROTO_FRAGMENT: {
@@ -6711,7 +6722,7 @@ pf_test(sa_family_t af, int fwdir, struc
                        REASON_SET(&reason, PFRES_NORM);
                        DPFPRINTF(LOG_NOTICE,
                            "dropping IPv6 packet with ICMPv4 payload");
-                       goto done;
+                       goto unlock;
                }
                action = pf_test_state_icmp(&pd, &s, &reason);
                if (action == PF_PASS || action == PF_AFRT) {
@@ -6736,7 +6747,7 @@ pf_test(sa_family_t af, int fwdir, struc
                        REASON_SET(&reason, PFRES_NORM);
                        DPFPRINTF(LOG_NOTICE,
                            "dropping IPv4 packet with ICMPv6 payload");
-                       goto done;
+                       goto unlock;
                }
                action = pf_test_state_icmp(&pd, &s, &reason);
                if (action == PF_PASS || action == PF_AFRT) {
@@ -6761,7 +6772,7 @@ pf_test(sa_family_t af, int fwdir, struc
                                pqid = 1;
                        action = pf_normalize_tcp(&pd);
                        if (action == PF_DROP)
-                               goto done;
+                               goto unlock;
                }
                action = pf_test_state(&pd, &s, &reason);
                if (action == PF_PASS || action == PF_AFRT) {
@@ -6788,6 +6799,15 @@ pf_test(sa_family_t af, int fwdir, struc
                break;
        }
 
+unlock:
+       PF_UNLOCK();
+
+       /*
+        * At the moment, we rely on NET_LOCK() to prevent removal of items
+        * we've collected above ('s', 'r', 'anchor' and 'ruleset').  They'll
+        * have to be refcounted when NET_LOCK() is gone.
+        */
+
 done:
        if (action != PF_DROP) {
                if (s) {
@@ -6991,6 +7011,7 @@ done:
        }
 
        *m0 = pd.m;
+
        return (action);
 }
 
diff -r 85b6b6ce74cd src/sys/net/pf_ioctl.c
--- a/src/sys/net/pf_ioctl.c    Tue May 30 20:11:44 2017 +0200
+++ b/src/sys/net/pf_ioctl.c    Wed May 31 09:42:36 2017 +0200
@@ -129,6 +129,10 @@ struct {
 TAILQ_HEAD(pf_tags, pf_tagname)        pf_tags = 
TAILQ_HEAD_INITIALIZER(pf_tags),
                                pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids);
 
+#ifdef WITH_PF_LOCK
+struct rwlock           pf_lock = RWLOCK_INITIALIZER("pf_lock");
+#endif /* WITH_PF_LOCK */
+
 #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
 #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
 #endif
@@ -999,6 +1003,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
                }
 
        NET_LOCK(s);
+       PF_LOCK();
        switch (cmd) {
 
        case DIOCSTART:
@@ -2452,6 +2457,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
                break;
        }
 fail:
+       PF_UNLOCK();
        NET_UNLOCK(s);
        return (error);
 }
diff -r 85b6b6ce74cd src/sys/net/pf_norm.c
--- a/src/sys/net/pf_norm.c     Tue May 30 20:11:44 2017 +0200
+++ b/src/sys/net/pf_norm.c     Wed May 31 09:42:36 2017 +0200
@@ -39,6 +39,7 @@
 #include <sys/time.h>
 #include <sys/pool.h>
 #include <sys/syslog.h>
+#include <sys/mutex.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
@@ -135,6 +136,18 @@ struct pool                 pf_frent_pl, pf_frag_pl;
 struct pool             pf_state_scrub_pl;
 int                     pf_nfrents;
 
+#ifdef WITH_PF_LOCK
+struct mutex            pf_frag_mtx;
+
+#define PF_FRAG_LOCK_INIT()    mtx_init(&pf_frag_mtx, IPL_SOFTNET)
+#define PF_FRAG_LOCK()         mtx_enter(&pf_frag_mtx)
+#define PF_FRAG_UNLOCK()       mtx_leave(&pf_frag_mtx)
+#else /* !WITH_PF_LOCK */
+#define PF_FRAG_LOCK_INIT()    (void)(0)
+#define PF_FRAG_LOCK()         (void)(0)
+#define PF_FRAG_UNLOCK()       (void)(0)
+#endif /* WITH_PF_LOCK */
+
 void
 pf_normalize_init(void)
 {
@@ -149,6 +162,8 @@ pf_normalize_init(void)
        pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
 
        TAILQ_INIT(&pf_fragqueue);
+
+       PF_FRAG_LOCK_INIT();
 }
 
 static __inline int
@@ -176,15 +191,18 @@ pf_purge_expired_fragments(void)
        struct pf_fragment      *frag;
        int32_t                  expire;
 
-       NET_ASSERT_LOCKED();
+       PF_ASSERT_UNLOCKED();
 
        expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG];
+
+       PF_FRAG_LOCK();
        while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
                if (frag->fr_timeout > expire)
                        break;
                DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag);
                pf_free_fragment(frag);
        }
+       PF_FRAG_UNLOCK();
 }
 
 /*
@@ -533,14 +551,19 @@ pf_reassemble(struct mbuf **m0, int dir,
        key.fr_id = ip->ip_id;
        key.fr_direction = dir;
 
-       if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
+       PF_FRAG_LOCK();
+       if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
+               PF_FRAG_UNLOCK();
                return (PF_DROP);
+       }
 
        /* The mbuf is part of the fragment entry, no direct free or access */
        m = *m0 = NULL;
 
-       if (!pf_isfull_fragment(frag))
+       if (!pf_isfull_fragment(frag)) {
+               PF_FRAG_UNLOCK();
                return (PF_PASS);  /* drop because *m0 is NULL, no error */
+       }
 
        /* We have all the data */
        frent = TAILQ_FIRST(&frag->fr_queue);
@@ -564,6 +587,7 @@ pf_reassemble(struct mbuf **m0, int dir,
        ip->ip_off &= ~(IP_MF|IP_OFFMASK);
 
        if (hdrlen + total > IP_MAXPACKET) {
+               PF_FRAG_UNLOCK();
                DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
                ip->ip_len = 0;
                REASON_SET(reason, PFRES_SHORT);
@@ -571,6 +595,7 @@ pf_reassemble(struct mbuf **m0, int dir,
                return (PF_DROP);
        }
 
+       PF_FRAG_UNLOCK();
        DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip->ip_len));
        return (PF_PASS);
 }
@@ -610,14 +635,19 @@ pf_reassemble6(struct mbuf **m0, struct 
        key.fr_id = fraghdr->ip6f_ident;
        key.fr_direction = dir;
 
-       if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
+       PF_FRAG_LOCK();
+       if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
+               PF_FRAG_UNLOCK();
                return (PF_DROP);
+       }
 
        /* The mbuf is part of the fragment entry, no direct free or access */
        m = *m0 = NULL;
 
-       if (!pf_isfull_fragment(frag))
+       if (!pf_isfull_fragment(frag)) {
+               PF_FRAG_UNLOCK();
                return (PF_PASS);  /* drop because *m0 is NULL, no error */
+       }
 
        /* We have all the data */
        extoff = frent->fe_extoff;
@@ -671,17 +701,20 @@ pf_reassemble6(struct mbuf **m0, struct 
                ip6->ip6_nxt = proto;
 
        if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
+               PF_FRAG_UNLOCK();
                DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
                ip6->ip6_plen = 0;
                REASON_SET(reason, PFRES_SHORT);
                /* PF_DROP requires a valid mbuf *m0 in pf_test6() */
                return (PF_DROP);
        }
+       PF_FRAG_UNLOCK();
 
        DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip6->ip6_plen));
        return (PF_PASS);
 
 fail:
+       PF_FRAG_UNLOCK();
        REASON_SET(reason, PFRES_MEMORY);
        /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */
        return (PF_DROP);
diff -r 85b6b6ce74cd src/sys/net/pfvar_priv.h
--- a/src/sys/net/pfvar_priv.h  Tue May 30 20:11:44 2017 +0200
+++ b/src/sys/net/pfvar_priv.h  Wed May 31 09:42:36 2017 +0200
@@ -37,6 +37,10 @@
 
 #ifdef _KERNEL
 
+#include <sys/rwlock.h>
+
+extern struct rwlock pf_lock;
+
 struct pf_pdesc {
        struct {
                int      done;
@@ -94,6 +98,36 @@ struct pf_pdesc {
        } hdr;
 };
 
+#ifdef WITH_PF_LOCK
+extern struct rwlock   pf_lock;
+
+#define PF_LOCK()              do {            \
+               NET_ASSERT_LOCKED();            \
+               rw_enter_write(&pf_lock);       \
+       } while (0)
+
+#define PF_UNLOCK()            do {            \
+               PF_ASSERT_LOCKED();             \
+               rw_exit_write(&pf_lock);        \
+       } while (0)
+
+#define PF_ASSERT_LOCKED()     do {                    \
+               if (rw_status(&pf_lock) != RW_WRITE)    \
+                       splassert_fail(RW_WRITE,        \
+                           rw_status(&pf_lock),__func__);\
+       } while (0)
+
+#define PF_ASSERT_UNLOCKED()   do {                    \
+               if (rw_status(&pf_lock) == RW_WRITE)    \
+                       splassert_fail(0, rw_status(&pf_lock), __func__);\
+       } while (0)
+#else /* !WITH_PF_LOCK */
+#define PF_LOCK()              (void)(0)
+#define PF_UNLOCK()            (void)(0)
+#define PF_ASSERT_LOCKED()     (void)(0)
+#define PF_ASSERT_UNLOCKED()   (void)(0)
+#endif /* WITH_PF_LOCK */
+
 #endif /* _KERNEL */
 
 #endif /* _NET_PFVAR_PRIV_H_ */

Reply via email to