Hello,

after reading emails from Philip Guenther and Mark Kettenis, doing some RTFM on
locking in OpenBSD kernel I have a new patch. I call it as a smp-step-0.

Patch introduces a KERNEL_LOCK() to PF. Many dances with KERNEL_LOCK() happens
in pf_test(). From future work point of view there are distinct parts as
follows in pf_test():

        - packet sanitization with reassembly. We grab and drop KERNEL_LOCK()
          pf_noralize_ip*() functions around  pf_reassemble*(). Fragment cache
          will get its own rw-lock later.

        - as soon as packet is sanitized (or should say normalized), we do
          state check. State check will get its own rw-lock (I call it
          smp-step-1)

        - if no state is found PF proceeds to rule check, which will get
          its rw-lock too (smp-step-2).

Basically KERNEL_LOCK() is placed everywhere, where individual rw-locks will be
introduced later.

As Philip Guenther has pointed out earlier in email, the user-threads must
grab KERNEL_LOCK() before they will jump to rw-locks. The game is easy for PF
in ioctl() subsystem, since everything there will be user thread.

For packets (pf_test()) function the game is not that clear, since not all
packets will be running in user-thread context. My assumption is the packets
bound to loopback and local outbound packets are running in user-threads.  All
other packets (inbound and forwarded) are running in interrupt-threads. For
this reason I'd like to introduce the extra code to pf_test(), which decides
whether packet requires KERNEL_LOCK(), because it is running as a user thread.

I'd like to get the patch in, before proceeding to next SMP step.

thanks and
regards
sasha

--------8<---------------8<---------------8<------------------8<--------

Index: net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.937
diff -u -p -r1.937 pf.c
--- net/pf.c    1 Sep 2015 19:12:25 -0000       1.937
+++ net/pf.c    4 Sep 2015 12:52:55 -0000
@@ -6316,6 +6316,7 @@ pf_test(sa_family_t af, int fwdir, struc
        union pf_headers         pdhdrs;
        int                      dir = (fwdir == PF_FWD) ? PF_OUT : fwdir;
        u_int32_t                qid, pqid = 0;
+       int                      have_lock = 0;
 
        if (!pf_status.running)
                return (PF_PASS);
@@ -6349,6 +6350,26 @@ pf_test(sa_family_t af, int fwdir, struc
                return (PF_PASS);
        }
 
+       /*
+        * Unlike kernel-threads, the user-threads require to have
+        * KENREL_LOCK() before they will touch rw-locks.
+        *
+        * The assumption here is fairly simple:
+        *      - all inbound packets run as interrupt (kernel-thread),
+        *        unless they are bound to loopback
+        *
+        *      - outbound packets run as kernel-thread, if they
+        *        are marked by PF_TAG_KERNEL flag.
+        *
+        * in all other cases we must assume packet runs in user-thread
+        * context and we should get KERNEL_LOCK().
+        */
+       if ((dir == PF_IN) && (ifp->if_type != IFT_LOOP)) {
+               (*m0)->m_pkthdr.pf.flags |= PF_TAG_KERNEL;
+       } else if (((*m0)->m_pkthdr.pf.flags & PF_TAG_KERNEL) == 0) {
+               PF_LOCK(have_lock);
+       }
+
        action = pf_setup_pdesc(&pd, &pdhdrs, af, dir, kif, *m0, &reason);
        if (action != PF_PASS) {
 #if NPFLOG > 0
@@ -6399,6 +6420,7 @@ pf_test(sa_family_t af, int fwdir, struc
                 * handle fragments that aren't reassembled by
                 * normalization
                 */
+               PF_LOCK(have_lock);
                action = pf_test_rule(&pd, &r, &s, &a, &ruleset);
                if (action != PF_PASS)
                        REASON_SET(&reason, PFRES_FRAG);
@@ -6413,6 +6435,7 @@ pf_test(sa_family_t af, int fwdir, struc
                            "dropping IPv6 packet with ICMPv4 payload");
                        goto done;
                }
+               PF_LOCK(have_lock);
                action = pf_test_state_icmp(&pd, &s, &reason);
                if (action == PF_PASS || action == PF_AFRT) {
 #if NPFSYNC > 0
@@ -6437,6 +6460,7 @@ pf_test(sa_family_t af, int fwdir, struc
                            "dropping IPv4 packet with ICMPv6 payload");
                        goto done;
                }
+               PF_LOCK(have_lock);
                action = pf_test_state_icmp(&pd, &s, &reason);
                if (action == PF_PASS || action == PF_AFRT) {
 #if NPFSYNC > 0
@@ -6461,6 +6485,7 @@ pf_test(sa_family_t af, int fwdir, struc
                        if (action == PF_DROP)
                                goto done;
                }
+               PF_LOCK(have_lock);
                action = pf_test_state(&pd, &s, &reason);
                if (action == PF_PASS || action == PF_AFRT) {
 #if NPFSYNC > 0
@@ -6658,6 +6683,8 @@ done:
                else if (!s->if_index_out && dir == PF_OUT)
                        s->if_index_out = ifp->if_index;
        }
+
+       PF_UNLOCK(have_lock);
 
        return (action);
 }
Index: net/pf_ioctl.c
===================================================================
RCS file: /cvs/src/sys/net/pf_ioctl.c,v
retrieving revision 1.289
diff -u -p -r1.289 pf_ioctl.c
--- net/pf_ioctl.c      21 Jul 2015 02:32:04 -0000      1.289
+++ net/pf_ioctl.c      4 Sep 2015 12:52:57 -0000
@@ -906,6 +906,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
                        return (EACCES);
                }
 
+       /*
+        * Unlike kernel-thread, the user thread must obtain KERNEL_LOCK()
+        * before it touches rw-locks.
+        */
+       KERNEL_LOCK();
        if (flags & FWRITE)
                rw_enter_write(&pf_consistency_lock);
        else
@@ -2310,6 +2315,8 @@ fail:
                rw_exit_write(&pf_consistency_lock);
        else
                rw_exit_read(&pf_consistency_lock);
+       KERNEL_UNLOCK();
+
        return (error);
 }
 
Index: net/pf_norm.c
===================================================================
RCS file: /cvs/src/sys/net/pf_norm.c,v
retrieving revision 1.181
diff -u -p -r1.181 pf_norm.c
--- net/pf_norm.c       19 Aug 2015 21:22:41 -0000      1.181
+++ net/pf_norm.c       4 Sep 2015 12:53:00 -0000
@@ -772,6 +772,7 @@ pf_normalize_ip(struct pf_pdesc *pd, u_s
        struct ip       *h = mtod(pd->m, struct ip *);
        u_int16_t        fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
        u_int16_t        mff = (ntohs(h->ip_off) & IP_MF);
+       int              rv;
 
        if (!fragoff && !mff)
                goto no_fragment;
@@ -794,7 +795,10 @@ pf_normalize_ip(struct pf_pdesc *pd, u_s
                return (PF_PASS);       /* no reassembly */
 
        /* Returns PF_DROP or m is NULL or completely reassembled mbuf */
-       if (pf_reassemble(&pd->m, pd->dir, reason) != PF_PASS)
+       KERNEL_LOCK();
+       rv = pf_reassemble(&pd->m, pd->dir, reason);
+       KERNEL_UNLOCK();
+       if (rv != PF_PASS)
                return (PF_DROP);
        if (pd->m == NULL)
                return (PF_PASS);  /* packet has been reassembled, no error */
@@ -814,6 +818,7 @@ int
 pf_normalize_ip6(struct pf_pdesc *pd, u_short *reason)
 {
        struct ip6_frag          frag;
+       int                      rv;
 
        if (pd->fragoff == 0)
                goto no_fragment;
@@ -826,8 +831,11 @@ pf_normalize_ip6(struct pf_pdesc *pd, u_
                return (PF_PASS);       /* no reassembly */
 
        /* Returns PF_DROP or m is NULL or completely reassembled mbuf */
-       if (pf_reassemble6(&pd->m, &frag, pd->fragoff + sizeof(frag),
-           pd->extoff, pd->dir, reason) != PF_PASS)
+       KERNEL_LOCK();
+       rv = pf_reassemble6(&pd->m, &frag, pd->fragoff + sizeof(frag),
+           pd->extoff, pd->dir, reason);
+       KERNEL_UNLOCK();
+       if (rv != PF_PASS);
                return (PF_DROP);
        if (pd->m == NULL)
                return (PF_PASS);  /* packet has been reassembled, no error */
Index: net/pfvar.h
===================================================================
RCS file: /cvs/src/sys/net/pfvar.h,v
retrieving revision 1.420
diff -u -p -r1.420 pfvar.h
--- net/pfvar.h 19 Aug 2015 21:22:41 -0000      1.420
+++ net/pfvar.h 4 Sep 2015 12:53:04 -0000
@@ -1909,5 +1909,18 @@ void                      pf_cksum(struct pf_pdesc *, 
stru
 
 #endif /* _KERNEL */
 
+#define        PF_LOCK(_have_lock_)    do {            \
+               if ((_have_lock_) == 0) {       \
+                       KERNEL_LOCK();          \
+                       (_have_lock_) = 1;      \
+               }                               \
+       } while (0)
+
+#define        PF_UNLOCK(_have_lock_)  do {            \
+               if ((_have_lock_) != 0) {       \
+                       KERNEL_UNLOCK();        \
+                       (_have_lock_) = 0;      \
+               }                               \
+       } while (0)
 
 #endif /* _NET_PFVAR_H_ */
Index: sys/mbuf.h
===================================================================
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.196
diff -u -p -r1.196 mbuf.h
--- sys/mbuf.h  14 Aug 2015 05:25:29 -0000      1.196
+++ sys/mbuf.h  4 Sep 2015 12:53:05 -0000
@@ -106,6 +106,7 @@ struct pkthdr_pf {
 
 /* pkthdr_pf.flags */
 #define        PF_TAG_GENERATED                0x01
+#define        PF_TAG_KERNEL                   0x02    /* packet runs as 
kernel-thr */
 #define        PF_TAG_TRANSLATE_LOCALHOST      0x04
 #define        PF_TAG_DIVERTED                 0x08
 #define        PF_TAG_DIVERTED_PACKET          0x10
@@ -115,7 +116,7 @@ struct pkthdr_pf {
 
 #ifdef _KERNEL
 #define MPF_BITS \
-    ("\20\1GENERATED\3TRANSLATE_LOCALHOST\4DIVERTED\5DIVERTED_PACKET" \
+    ("\20\1GENERATED\2KERNEL\3TRANSLATE_LOCALHOST\4DIVERTED\5DIVERTED_PACKET" \
     "\6REROUTE\7REFRAGMENTED\10PROCESSED")
 #endif

Reply via email to