Hello,
after reading emails from Philip Guenther and Mark Kettenis, doing some RTFM on
locking in OpenBSD kernel I have a new patch. I call it as a smp-step-0.
Patch introduces a KERNEL_LOCK() to PF. Many dances with KERNEL_LOCK() happens
in pf_test(). From future work point of view there are distinct parts as
follows in pf_test():
- packet sanitization with reassembly. We grab and drop KERNEL_LOCK()
pf_noralize_ip*() functions around pf_reassemble*(). Fragment cache
will get its own rw-lock later.
- as soon as packet is sanitized (or should say normalized), we do
state check. State check will get its own rw-lock (I call it
smp-step-1)
- if no state is found PF proceeds to rule check, which will get
its rw-lock too (smp-step-2).
Basically KERNEL_LOCK() is placed everywhere, where individual rw-locks will be
introduced later.
As Philip Guenther has pointed out earlier in email, the user-threads must
grab KERNEL_LOCK() before they will jump to rw-locks. The game is easy for PF
in ioctl() subsystem, since everything there will be user thread.
For packets (pf_test()) function the game is not that clear, since not all
packets will be running in user-thread context. My assumption is the packets
bound to loopback and local outbound packets are running in user-threads. All
other packets (inbound and forwarded) are running in interrupt-threads. For
this reason I'd like to introduce the extra code to pf_test(), which decides
whether packet requires KERNEL_LOCK(), because it is running as a user thread.
I'd like to get the patch in, before proceeding to next SMP step.
thanks and
regards
sasha
--------8<---------------8<---------------8<------------------8<--------
Index: net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.937
diff -u -p -r1.937 pf.c
--- net/pf.c 1 Sep 2015 19:12:25 -0000 1.937
+++ net/pf.c 4 Sep 2015 12:52:55 -0000
@@ -6316,6 +6316,7 @@ pf_test(sa_family_t af, int fwdir, struc
union pf_headers pdhdrs;
int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir;
u_int32_t qid, pqid = 0;
+ int have_lock = 0;
if (!pf_status.running)
return (PF_PASS);
@@ -6349,6 +6350,26 @@ pf_test(sa_family_t af, int fwdir, struc
return (PF_PASS);
}
+ /*
+ * Unlike kernel-threads, the user-threads require to have
+ * KENREL_LOCK() before they will touch rw-locks.
+ *
+ * The assumption here is fairly simple:
+ * - all inbound packets run as interrupt (kernel-thread),
+ * unless they are bound to loopback
+ *
+ * - outbound packets run as kernel-thread, if they
+ * are marked by PF_TAG_KERNEL flag.
+ *
+ * in all other cases we must assume packet runs in user-thread
+ * context and we should get KERNEL_LOCK().
+ */
+ if ((dir == PF_IN) && (ifp->if_type != IFT_LOOP)) {
+ (*m0)->m_pkthdr.pf.flags |= PF_TAG_KERNEL;
+ } else if (((*m0)->m_pkthdr.pf.flags & PF_TAG_KERNEL) == 0) {
+ PF_LOCK(have_lock);
+ }
+
action = pf_setup_pdesc(&pd, &pdhdrs, af, dir, kif, *m0, &reason);
if (action != PF_PASS) {
#if NPFLOG > 0
@@ -6399,6 +6420,7 @@ pf_test(sa_family_t af, int fwdir, struc
* handle fragments that aren't reassembled by
* normalization
*/
+ PF_LOCK(have_lock);
action = pf_test_rule(&pd, &r, &s, &a, &ruleset);
if (action != PF_PASS)
REASON_SET(&reason, PFRES_FRAG);
@@ -6413,6 +6435,7 @@ pf_test(sa_family_t af, int fwdir, struc
"dropping IPv6 packet with ICMPv4 payload");
goto done;
}
+ PF_LOCK(have_lock);
action = pf_test_state_icmp(&pd, &s, &reason);
if (action == PF_PASS || action == PF_AFRT) {
#if NPFSYNC > 0
@@ -6437,6 +6460,7 @@ pf_test(sa_family_t af, int fwdir, struc
"dropping IPv4 packet with ICMPv6 payload");
goto done;
}
+ PF_LOCK(have_lock);
action = pf_test_state_icmp(&pd, &s, &reason);
if (action == PF_PASS || action == PF_AFRT) {
#if NPFSYNC > 0
@@ -6461,6 +6485,7 @@ pf_test(sa_family_t af, int fwdir, struc
if (action == PF_DROP)
goto done;
}
+ PF_LOCK(have_lock);
action = pf_test_state(&pd, &s, &reason);
if (action == PF_PASS || action == PF_AFRT) {
#if NPFSYNC > 0
@@ -6658,6 +6683,8 @@ done:
else if (!s->if_index_out && dir == PF_OUT)
s->if_index_out = ifp->if_index;
}
+
+ PF_UNLOCK(have_lock);
return (action);
}
Index: net/pf_ioctl.c
===================================================================
RCS file: /cvs/src/sys/net/pf_ioctl.c,v
retrieving revision 1.289
diff -u -p -r1.289 pf_ioctl.c
--- net/pf_ioctl.c 21 Jul 2015 02:32:04 -0000 1.289
+++ net/pf_ioctl.c 4 Sep 2015 12:52:57 -0000
@@ -906,6 +906,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
return (EACCES);
}
+ /*
+ * Unlike kernel-thread, the user thread must obtain KERNEL_LOCK()
+ * before it touches rw-locks.
+ */
+ KERNEL_LOCK();
if (flags & FWRITE)
rw_enter_write(&pf_consistency_lock);
else
@@ -2310,6 +2315,8 @@ fail:
rw_exit_write(&pf_consistency_lock);
else
rw_exit_read(&pf_consistency_lock);
+ KERNEL_UNLOCK();
+
return (error);
}
Index: net/pf_norm.c
===================================================================
RCS file: /cvs/src/sys/net/pf_norm.c,v
retrieving revision 1.181
diff -u -p -r1.181 pf_norm.c
--- net/pf_norm.c 19 Aug 2015 21:22:41 -0000 1.181
+++ net/pf_norm.c 4 Sep 2015 12:53:00 -0000
@@ -772,6 +772,7 @@ pf_normalize_ip(struct pf_pdesc *pd, u_s
struct ip *h = mtod(pd->m, struct ip *);
u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
u_int16_t mff = (ntohs(h->ip_off) & IP_MF);
+ int rv;
if (!fragoff && !mff)
goto no_fragment;
@@ -794,7 +795,10 @@ pf_normalize_ip(struct pf_pdesc *pd, u_s
return (PF_PASS); /* no reassembly */
/* Returns PF_DROP or m is NULL or completely reassembled mbuf */
- if (pf_reassemble(&pd->m, pd->dir, reason) != PF_PASS)
+ KERNEL_LOCK();
+ rv = pf_reassemble(&pd->m, pd->dir, reason);
+ KERNEL_UNLOCK();
+ if (rv != PF_PASS)
return (PF_DROP);
if (pd->m == NULL)
return (PF_PASS); /* packet has been reassembled, no error */
@@ -814,6 +818,7 @@ int
pf_normalize_ip6(struct pf_pdesc *pd, u_short *reason)
{
struct ip6_frag frag;
+ int rv;
if (pd->fragoff == 0)
goto no_fragment;
@@ -826,8 +831,11 @@ pf_normalize_ip6(struct pf_pdesc *pd, u_
return (PF_PASS); /* no reassembly */
/* Returns PF_DROP or m is NULL or completely reassembled mbuf */
- if (pf_reassemble6(&pd->m, &frag, pd->fragoff + sizeof(frag),
- pd->extoff, pd->dir, reason) != PF_PASS)
+ KERNEL_LOCK();
+ rv = pf_reassemble6(&pd->m, &frag, pd->fragoff + sizeof(frag),
+ pd->extoff, pd->dir, reason);
+ KERNEL_UNLOCK();
+ if (rv != PF_PASS);
return (PF_DROP);
if (pd->m == NULL)
return (PF_PASS); /* packet has been reassembled, no error */
Index: net/pfvar.h
===================================================================
RCS file: /cvs/src/sys/net/pfvar.h,v
retrieving revision 1.420
diff -u -p -r1.420 pfvar.h
--- net/pfvar.h 19 Aug 2015 21:22:41 -0000 1.420
+++ net/pfvar.h 4 Sep 2015 12:53:04 -0000
@@ -1909,5 +1909,18 @@ void pf_cksum(struct pf_pdesc *,
stru
#endif /* _KERNEL */
+#define PF_LOCK(_have_lock_) do { \
+ if ((_have_lock_) == 0) { \
+ KERNEL_LOCK(); \
+ (_have_lock_) = 1; \
+ } \
+ } while (0)
+
+#define PF_UNLOCK(_have_lock_) do { \
+ if ((_have_lock_) != 0) { \
+ KERNEL_UNLOCK(); \
+ (_have_lock_) = 0; \
+ } \
+ } while (0)
#endif /* _NET_PFVAR_H_ */
Index: sys/mbuf.h
===================================================================
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.196
diff -u -p -r1.196 mbuf.h
--- sys/mbuf.h 14 Aug 2015 05:25:29 -0000 1.196
+++ sys/mbuf.h 4 Sep 2015 12:53:05 -0000
@@ -106,6 +106,7 @@ struct pkthdr_pf {
/* pkthdr_pf.flags */
#define PF_TAG_GENERATED 0x01
+#define PF_TAG_KERNEL 0x02 /* packet runs as
kernel-thr */
#define PF_TAG_TRANSLATE_LOCALHOST 0x04
#define PF_TAG_DIVERTED 0x08
#define PF_TAG_DIVERTED_PACKET 0x10
@@ -115,7 +116,7 @@ struct pkthdr_pf {
#ifdef _KERNEL
#define MPF_BITS \
- ("\20\1GENERATED\3TRANSLATE_LOCALHOST\4DIVERTED\5DIVERTED_PACKET" \
+ ("\20\1GENERATED\2KERNEL\3TRANSLATE_LOCALHOST\4DIVERTED\5DIVERTED_PACKET" \
"\6REROUTE\7REFRAGMENTED\10PROCESSED")
#endif