Re: let's add PF_LOCK()
Hello, > I don't think it matters. We still need more diffs to be able to test > that. > > Now I'd really like if you could commit this so we continue improving it > in-tree. I've commit the patch _without_ the diff to sys/conf/GENERIC, we can add it later, once more diffs will be in. regards sasha
Re: let's add PF_LOCK()
On 01/06/17(Thu) 09:32, Alexandr Nedvedicky wrote: > [...] > > > > Where do you want to define this WITH_PF_LOCK? > > > > I currently add > > option WITH_PF_LOCK > > to sys/arch/amd64/compile/GENERIC.MP to build PF with PF_LOCK. > But there should be better place in tree. Perhaps sys/conf/GENERIC? I don't think it matters. We still need more diffs to be able to test that. Now I'd really like if you could commit this so we continue improving it in-tree. ok mpi@ > diff -r ccb9f01e56a7 .hgtags > --- /dev/null Thu Jan 01 00:00:00 1970 + > +++ .hgtags Thu Jun 01 09:29:15 2017 +0200 > @@ -0,0 +1,1 @@ > +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline > diff -r ccb9f01e56a7 src/sys/conf/GENERIC > --- src/sys/conf/GENERIC Thu Jun 01 09:19:56 2017 +0200 > +++ src/sys/conf/GENERIC Thu Jun 01 09:29:15 2017 +0200 > @@ -16,7 +16,7 @@ option ACCOUNTING # acct(2) process acc > option KMEMSTATS # collect malloc(9) statistics > option PTRACE # ptrace(2) system call > #option WITNESS # witness(4) lock checker > -#option PF_LOCK # build with pf lock support > +#option WITH_PF_LOCK# PF lock support > > #option KVA_GUARDPAGES # slow virtual address recycling (+ > guarding) > option POOL_DEBUG # pool corruption detection > diff -r ccb9f01e56a7 src/sys/net/pf.c > --- src/sys/net/pf.c Thu Jun 01 09:19:56 2017 +0200 > +++ src/sys/net/pf.c Thu Jun 01 09:29:15 2017 +0200 > @@ -923,7 +923,7 @@ int > pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, > struct pf_state_key **sks, struct pf_state *s) > { > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > s->kif = kif; > if (*skw == *sks) { > @@ -1186,7 +1186,7 @@ pf_purge_expired_rules(void) > { > struct pf_rule *r; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > if (SLIST_EMPTY(&pf_rule_gcl)) > return; > @@ -1208,15 +1208,22 @@ pf_purge_thread(void *v) > > NET_LOCK(s); > > + PF_LOCK(); > /* process a fraction of the state table every second */ > pf_purge_expired_states(1 + (pf_status.states > / pf_default_rule.timeout[PFTM_INTERVAL])); > > /* purge other expired types every PFTM_INTERVAL seconds */ > if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > - pf_purge_expired_fragments(); > pf_purge_expired_src_nodes(0); > pf_purge_expired_rules(); > + } > + PF_UNLOCK(); > + /* > + * Fragments don't require PF_LOCK(), they use their own lock. > + */ > + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > + pf_purge_expired_fragments(); > nloops = 0; > } > > @@ -1267,7 +1274,7 @@ pf_purge_expired_src_nodes(void) > { > struct pf_src_node *cur, *next; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { > next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); > @@ -1303,7 +1310,7 @@ pf_src_tree_remove_state(struct pf_state > void > pf_remove_state(struct pf_state *cur) > { > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > /* handle load balancing related tasks */ > pf_postprocess_addr(cur); > @@ -1350,7 +1357,7 @@ pf_free_state(struct pf_state *cur) > { > struct pf_rule_item *ri; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > #if NPFSYNC > 0 > if (pfsync_state_in_use(cur)) > @@ -1386,7 +1393,7 @@ pf_purge_expired_states(u_int32_t maxche > static struct pf_state *cur = NULL; > struct pf_state *next; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > while (maxcheck--) { > /* wrap to start of list when we hit the end */ > @@ -3146,13 +3153,13 @@ pf_socket_lookup(struct pf_pdesc *pd) > case IPPROTO_TCP: > sport = pd->hdr.tcp.th_sport; > dport = pd->hdr.tcp.th_dport; > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > tb = &tcbtable; > break; > case IPPROTO_UDP: > sport = pd->hdr.udp.uh_sport; > dport = pd->hdr.udp.uh_dport; > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > tb = &udbtable; > break; > default: > @@ -6678,6 +6685,7 @@ pf_test(sa_family_t af, int fwdir, struc > /* if packet sits in reassembly queue, return without error */ > if (pd.m == NULL) > return PF_PASS; > + > if (action != PF_PASS) { > #if NPFLOG > 0 > pd.pflog |=
Re: let's add PF_LOCK()
Hello, > > diff -r 6abbb123112a .hgtags > > --- /dev/null Thu Jan 01 00:00:00 1970 + > > +++ b/.hgtags Wed May 31 10:42:50 2017 +0200 > > @@ -0,0 +1,1 @@ > > +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline > > Please be careful and don't include VCS's goo in diffs. > It'd be nice if you could manage to instruct mercurial > to remove a/ and b/ parts. I see. Adding a 'noprefix=true' to [diff] section in .hgrc removes the prefix. > > + } > > + PF_UNLOCK(); > > + /* > > +* Fragments don't require PF_LOCK(), they use their own mutex. > > +*/ > > + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > > + pf_purge_expired_fragments(); > > nloops = 0; > > } > > > > Minor nit: change comment above to say "they use their own lock." > to remove the reference to a specific lock implementation. fixed. > > > @@ -1320,7 +1327,6 @@ pf_remove_state(struct pf_state *cur) > > } > > RB_REMOVE(pf_state_tree_id, &tree_id, cur); > > #if NPFLOW > 0 > > - if (cur->state_flags & PFSTATE_PFLOW) > > export_pflow(cur); > > If you're removing this "if" statemenet, please remove the tab > from the next line. the "if" statement must stay. I've mismerged the code. Thank you for catching this!!! > > > @@ -6692,6 +6699,9 @@ pf_test(sa_family_t af, int fwdir, struc > > } > > pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; > > > > + /* lock the look-up/write section of pf_test() */ > Minor nit: spell "lookup" without a dash. fixed > > diff -r 6abbb123112a src/sys/net/pf_ioctl.c > > --- a/src/sys/net/pf_ioctl.cWed May 31 10:21:18 2017 +0200 > > +++ b/src/sys/net/pf_ioctl.cWed May 31 10:42:50 2017 +0200 > > @@ -129,6 +129,10 @@ struct { > > TAILQ_HEAD(pf_tags, pf_tagname)pf_tags = > > TAILQ_HEAD_INITIALIZER(pf_tags), > > pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); > > > > +#ifdef WITH_PF_LOCK > > +struct rwlock pf_lock = RWLOCK_INITIALIZER("pf_lock"); > > +#endif /* WITH_PF_LOCK */ > > + > > #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) > > #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE > > #endif > > Where do you want to define this WITH_PF_LOCK? > I currently add option WITH_PF_LOCK to sys/arch/amd64/compile/GENERIC.MP to build PF with PF_LOCK. But there should be better place in tree. Perhaps sys/conf/GENERIC? --- src/sys/conf/GENERICThu Jun 01 09:21:03 2017 +0200 +++ src/sys/conf/GENERICThu Jun 01 09:21:52 2017 +0200 @@ -16,6 +16,7 @@ optionACCOUNTING # acct(2) process acc option KMEMSTATS # collect malloc(9) statistics option PTRACE # ptrace(2) system call #optionWITNESS # witness(4) lock checker +#optionWITH_PF_LOCK# PF lock support #optionKVA_GUARDPAGES # slow virtual address recycling (+ guarding) option POOL_DEBUG # pool corruption detection > I suggest to make this look a tiny bit prettier: > > #define PF_LOCK() do {\ > NET_ASSERT_LOCKED();\ > rw_enter_write(&pf_lock); \ > } while (0) > > #define PF_UNLOCK() do {\ > PF_ASSERT_LOCKED(); \ > rw_exit_write(&pf_lock);\ > } while (0) indeed, it should look nice now. Note: the updated diff adds '#option WITH_PF_LOCK' to src/sys/conf/GENERIC thanks a lot regards sasha 8<---8<---8<--8< diff -r ccb9f01e56a7 .hgtags --- /dev/null Thu Jan 01 00:00:00 1970 + +++ .hgtags Thu Jun 01 09:29:15 2017 +0200 @@ -0,0 +1,1 @@ +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline diff -r ccb9f01e56a7 src/sys/conf/GENERIC --- src/sys/conf/GENERICThu Jun 01 09:19:56 2017 +0200 +++ src/sys/conf/GENERICThu Jun 01 09:29:15 2017 +0200 @@ -16,7 +16,7 @@ optionACCOUNTING # acct(2) process acc option KMEMSTATS # collect malloc(9) statistics option PTRACE # ptrace(2) system call #optionWITNESS # witness(4) lock checker -#optionPF_LOCK # build with pf lock support +#optionWITH_PF_LOCK# PF lock support #optionKVA_GUARDPAGES # slow virtual address recycling (+ guarding) option POOL_DEBUG # pool corruption detection diff -r ccb9f01e56a7 src/sys/net/pf.c --- src/sys/net/pf.cThu Jun 01 09:19:56 2017 +0200 +++ src/sys/net/pf.cThu Jun 01 09:29:15 2017 +0200 @@ -923,7 +923,7 @@ int pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, struct pf_state_key **sks, struct pf_state *s) { - NE
Re: let's add PF_LOCK()
On Wed, May 31, 2017 at 10:50 +0200, Alexandr Nedvedicky wrote: > Hello Mike, > > I'd like to ask you to take a one more look to change I'm going > to commit. Looks much better w/o SPLs. I did't find any issues with the diff and I'm OK with you going forward with this. > I did one more check of changes with respect to WITH_PF_LOCK > and found one more bit to fix. We need to keep > pf_purge_expired_fragments() under protection of NET_LOCK() > Fair enough. > diff -r 6abbb123112a .hgtags > --- /dev/null Thu Jan 01 00:00:00 1970 + > +++ b/.hgtags Wed May 31 10:42:50 2017 +0200 > @@ -0,0 +1,1 @@ > +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline Please be careful and don't include VCS's goo in diffs. It'd be nice if you could manage to instruct mercurial to remove a/ and b/ parts. > @@ -1208,15 +1208,22 @@ pf_purge_thread(void *v) > > NET_LOCK(s); > > + PF_LOCK(); > /* process a fraction of the state table every second */ > pf_purge_expired_states(1 + (pf_status.states > / pf_default_rule.timeout[PFTM_INTERVAL])); > > /* purge other expired types every PFTM_INTERVAL seconds */ > if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > - pf_purge_expired_fragments(); > pf_purge_expired_src_nodes(0); > pf_purge_expired_rules(); > + } > + PF_UNLOCK(); > + /* > + * Fragments don't require PF_LOCK(), they use their own mutex. > + */ > + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > + pf_purge_expired_fragments(); > nloops = 0; > } > Minor nit: change comment above to say "they use their own lock." to remove the reference to a specific lock implementation. > @@ -1320,7 +1327,6 @@ pf_remove_state(struct pf_state *cur) > } > RB_REMOVE(pf_state_tree_id, &tree_id, cur); > #if NPFLOW > 0 > - if (cur->state_flags & PFSTATE_PFLOW) > export_pflow(cur); If you're removing this "if" statemenet, please remove the tab from the next line. > @@ -6692,6 +6699,9 @@ pf_test(sa_family_t af, int fwdir, struc > } > pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; > > + /* lock the look-up/write section of pf_test() */ > + PF_LOCK(); > + > switch (pd.virtual_proto) { > > case PF_VPROTO_FRAGMENT: { Minor nit: spell "lookup" without a dash. > @@ -6711,7 +6721,7 @@ pf_test(sa_family_t af, int fwdir, struc > REASON_SET(&reason, PFRES_NORM); > DPFPRINTF(LOG_NOTICE, > "dropping IPv6 packet with ICMPv4 payload"); > - goto done; > + goto unlock; > } > action = pf_test_state_icmp(&pd, &s, &reason); > if (action == PF_PASS || action == PF_AFRT) { > @@ -6736,7 +6746,7 @@ pf_test(sa_family_t af, int fwdir, struc > REASON_SET(&reason, PFRES_NORM); > DPFPRINTF(LOG_NOTICE, > "dropping IPv4 packet with ICMPv6 payload"); > - goto done; > + goto unlock; > } > action = pf_test_state_icmp(&pd, &s, &reason); > if (action == PF_PASS || action == PF_AFRT) { > @@ -6761,7 +6771,7 @@ pf_test(sa_family_t af, int fwdir, struc > pqid = 1; > action = pf_normalize_tcp(&pd); > if (action == PF_DROP) > - goto done; > + goto unlock; > } > action = pf_test_state(&pd, &s, &reason); > if (action == PF_PASS || action == PF_AFRT) { > @@ -6788,6 +6798,15 @@ pf_test(sa_family_t af, int fwdir, struc > break; > } > > +unlock: > + PF_UNLOCK(); > + > + /* > + * At the moment, we rely on NET_LOCK() to prevent removal of items > + * we've collected above ('s', 'r', 'anchor' and 'ruleset'). They'll > + * have to be refcounted when NET_LOCK() is gone. > + */ > + > done: > if (action != PF_DROP) { > if (s) { > @@ -6991,6 +7010,7 @@ done: > } > > *m0 = pd.m; > + > return (action); > } > > diff -r 6abbb123112a src/sys/net/pf_ioctl.c > --- a/src/sys/net/pf_ioctl.c Wed May 31 10:21:18 2017 +0200 > +++ b/src/sys/net/pf_ioctl.c Wed May 31 10:42:50 2017 +0200 > @@ -129,6 +129,10 @@ struct { > TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = > TAILQ_HEAD_INITIALIZER(pf_tags), > pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); > > +#ifdef WITH_PF_LOCK > +struct rwlock pf_lock = RWLOCK_INITIALIZER("pf_lock"); > +#endif /* WITH_PF_LOCK */ > + > #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) > #error
Re: let's add PF_LOCK()
Hello Mike, I'd like to ask you to take a one more look to change I'm going to commit. I did one more check of changes with respect to WITH_PF_LOCK and found one more bit to fix. We need to keep pf_purge_expired_fragments() under protection of NET_LOCK() 8<---8<---8<--8< diff -r 3f9d12f8bc14 src/sys/net/pf.c --- a/src/sys/net/pf.c Wed May 31 10:26:43 2017 +0200 +++ b/src/sys/net/pf.c Wed May 31 10:42:22 2017 +0200 @@ -1207,8 +1207,8 @@ pf_purge_thread(void *v) tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); NET_LOCK(s); + PF_LOCK(); - /* process a fraction of the state table every second */ pf_purge_expired_states(1 + (pf_status.states / pf_default_rule.timeout[PFTM_INTERVAL])); @@ -1218,10 +1218,7 @@ pf_purge_thread(void *v) pf_purge_expired_src_nodes(0); pf_purge_expired_rules(); } - PF_UNLOCK(); - NET_UNLOCK(s); - /* * Fragments don't require PF_LOCK(), they use their own mutex. */ @@ -1229,6 +1226,8 @@ pf_purge_thread(void *v) pf_purge_expired_fragments(); nloops = 0; } + + NET_UNLOCK(s); } } 8<---8<---8<--8< complete patch is attached. thanks a lot regards sasha diff -r 6abbb123112a .hgtags --- /dev/null Thu Jan 01 00:00:00 1970 + +++ b/.hgtags Wed May 31 10:42:50 2017 +0200 @@ -0,0 +1,1 @@ +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline diff -r 6abbb123112a src/sys/net/pf.c --- a/src/sys/net/pf.c Wed May 31 10:21:18 2017 +0200 +++ b/src/sys/net/pf.c Wed May 31 10:42:50 2017 +0200 @@ -923,7 +923,7 @@ int pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, struct pf_state_key **sks, struct pf_state *s) { - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); s->kif = kif; if (*skw == *sks) { @@ -1186,7 +1186,7 @@ pf_purge_expired_rules(void) { struct pf_rule *r; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); if (SLIST_EMPTY(&pf_rule_gcl)) return; @@ -1208,15 +1208,22 @@ pf_purge_thread(void *v) NET_LOCK(s); + PF_LOCK(); /* process a fraction of the state table every second */ pf_purge_expired_states(1 + (pf_status.states / pf_default_rule.timeout[PFTM_INTERVAL])); /* purge other expired types every PFTM_INTERVAL seconds */ if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { - pf_purge_expired_fragments(); pf_purge_expired_src_nodes(0); pf_purge_expired_rules(); + } + PF_UNLOCK(); + /* +* Fragments don't require PF_LOCK(), they use their own mutex. +*/ + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_fragments(); nloops = 0; } @@ -1267,7 +1274,7 @@ pf_purge_expired_src_nodes(void) { struct pf_src_node *cur, *next; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); @@ -1303,7 +1310,7 @@ pf_src_tree_remove_state(struct pf_state void pf_remove_state(struct pf_state *cur) { - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); /* handle load balancing related tasks */ pf_postprocess_addr(cur); @@ -1320,7 +1327,6 @@ pf_remove_state(struct pf_state *cur) } RB_REMOVE(pf_state_tree_id, &tree_id, cur); #if NPFLOW > 0 - if (cur->state_flags & PFSTATE_PFLOW) export_pflow(cur); #endif /* NPFLOW > 0 */ #if NPFSYNC > 0 @@ -1350,7 +1356,7 @@ pf_free_state(struct pf_state *cur) { struct pf_rule_item *ri; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); #if NPFSYNC > 0 if (pfsync_state_in_use(cur)) @@ -1386,7 +1392,7 @@ pf_purge_expired_states(u_int32_t maxche static struct pf_state *cur = NULL; struct pf_state *next; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); while (maxcheck--) { /* wrap to start of list when we hit the end */ @@ -3146,13 +3152,13 @@ pf_socket_lookup(struct pf_pdesc *pd) case IPPROTO_TCP: sport = pd->hdr.tcp.th_sport; dport = pd->hdr.tcp.th_dport; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); tb = &tcbtable; break; case IPPROTO_UDP: sp
Re: let's add PF_LOCK()
Hello, > > Could you you make 2 definitions for the lock? It doesn't make sense > to enable them by default for now. I'd like to see you diff committed > now with empty defines and an easy way to enable it. > > That means ok mpi@ if the defines to not take/release locks by default. > I introduce WITH_PF_LOCK compile time option. To enable PF_LOCK one has to add 'option WITH_PF_LOCK' to kernel build configuration (see config(8) for details). The PF_LOCK is disabled by default. Same goes to mutex, which protects the fragments. If WITH_PF_LOCK is not found the code builds without pf_frag_mtx. This is the diff to pf-lock.diff I've sent earlier [1], the updated diff I'm going to commit is attached to email. 8<---8<---8<--8< diff -r cf7bdb0a6054 src/sys/net/pf_ioctl.c --- a/src/sys/net/pf_ioctl.cTue May 30 20:27:39 2017 +0200 +++ b/src/sys/net/pf_ioctl.cWed May 31 09:40:33 2017 +0200 @@ -129,7 +129,9 @@ struct { TAILQ_HEAD(pf_tags, pf_tagname)pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); +#ifdef WITH_PF_LOCK struct rwlock pf_lock = RWLOCK_INITIALIZER("pf_lock"); +#endif /* WITH_PF_LOCK */ #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE diff -r cf7bdb0a6054 src/sys/net/pf_norm.c --- a/src/sys/net/pf_norm.c Tue May 30 20:27:39 2017 +0200 +++ b/src/sys/net/pf_norm.c Wed May 31 09:40:33 2017 +0200 @@ -136,11 +136,17 @@ struct poolpf_frent_pl, pf_frag_pl; struct pool pf_state_scrub_pl; int pf_nfrents; +#ifdef WITH_PF_LOCK struct mutexpf_frag_mtx; #define PF_FRAG_LOCK_INIT()mtx_init(&pf_frag_mtx, IPL_SOFTNET) #define PF_FRAG_LOCK() mtx_enter(&pf_frag_mtx) #define PF_FRAG_UNLOCK() mtx_leave(&pf_frag_mtx) +#else /* !WITH_PF_LOCK */ +#define PF_FRAG_LOCK_INIT()(void)(0) +#define PF_FRAG_LOCK() (void)(0) +#define PF_FRAG_UNLOCK() (void)(0) +#endif /* WITH_PF_LOCK */ void pf_normalize_init(void) diff -r cf7bdb0a6054 src/sys/net/pfvar_priv.h --- a/src/sys/net/pfvar_priv.h Tue May 30 20:27:39 2017 +0200 +++ b/src/sys/net/pfvar_priv.h Wed May 31 09:40:33 2017 +0200 @@ -98,6 +98,7 @@ struct pf_pdesc { } hdr; }; +#ifdef WITH_PF_LOCK extern struct rwlock pf_lock; #define PF_LOCK() do {\ @@ -120,6 +121,12 @@ extern struct rwlock pf_lock; if (rw_status(&pf_lock) == RW_WRITE)\ splassert_fail(0, rw_status(&pf_lock), __func__);\ } while (0) +#else /* !WITH_PF_LOCK */ +#define PF_LOCK() (void)(0) +#define PF_UNLOCK()(void)(0) +#define PF_ASSERT_LOCKED() (void)(0) +#define PF_ASSERT_UNLOCKED() (void)(0) +#endif /* WITH_PF_LOCK */ #endif /* _KERNEL */ 8<---8<---8<--8< thanks and regards sasha [1] http://openbsd-archive.7691.n7.nabble.com/let-s-add-PF-LOCK-td319624.html#a319661 diff -r 85b6b6ce74cd .hgtags --- /dev/null Thu Jan 01 00:00:00 1970 + +++ b/.hgtags Wed May 31 09:42:36 2017 +0200 @@ -0,0 +1,1 @@ +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline diff -r 85b6b6ce74cd src/sys/net/pf.c --- a/src/sys/net/pf.c Tue May 30 20:11:44 2017 +0200 +++ b/src/sys/net/pf.c Wed May 31 09:42:36 2017 +0200 @@ -923,7 +923,7 @@ int pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, struct pf_state_key **sks, struct pf_state *s) { - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); s->kif = kif; if (*skw == *sks) { @@ -1186,7 +1186,7 @@ pf_purge_expired_rules(void) { struct pf_rule *r; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); if (SLIST_EMPTY(&pf_rule_gcl)) return; @@ -1207,6 +1207,7 @@ pf_purge_thread(void *v) tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); NET_LOCK(s); + PF_LOCK(); /* process a fraction of the state table every second */ pf_purge_expired_states(1 + (pf_status.states @@ -1214,13 +1215,20 @@ pf_purge_thread(void *v) /* purge other expired types every PFTM_INTERVAL seconds */ if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { - pf_purge_expired_fragments(); pf_purge_expired_src_nodes(0); pf_purge_expired_rules(); + } + + PF_UNLOCK(); + NET_UNLOCK(s); + + /* +* Fragments don't require PF_LOCK(), they use their own mutex. +*/ + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_fragments(); nloops = 0;
Re: let's add PF_LOCK()
On 30/05/17(Tue) 20:31, Alexandr Nedvedicky wrote: > Hello Martin, > > > > > rw_exit_write(&netlock); > > > export_pflow(cur); > > > rw_enter_write(&netlock); > > > + rw_enter_write(&pf_lock); > > > } > > > > This is not needed, you're not diffing against the latest version of > > net/pf.c. > > indeed my tree is old by couple hours. > > > > > > +extern struct rwlock pf_lock; > > > + > > > +#define PF_LOCK(s) do {\ > > > + NET_ASSERT_LOCKED();\ > > > + rw_enter_write(&pf_lock); \ > > > + s = splsoftnet(); \ > > > + } while (0) > > > > There's no need for splsoftnet()/splx() nor splsoftassert(). > > O.K. removed, the 'intspl;' at pf.c is also gone now. > > thank you for looking at my changes. updated diff is further below Could you you make 2 definitions for the lock? It doesn't make sense to enable them by default for now. I'd like to see you diff committed now with empty defines and an easy way to enable it. That means ok mpi@ if the defines to not take/release locks by default. > > regards > sasha > 8<---8<---8<--8< > diff -r 85b6b6ce74cd .hgtags > --- /dev/null Thu Jan 01 00:00:00 1970 + > +++ b/.hgtags Tue May 30 20:27:43 2017 +0200 > @@ -0,0 +1,1 @@ > +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline > diff -r 85b6b6ce74cd src/sys/net/pf.c > --- a/src/sys/net/pf.cTue May 30 20:11:44 2017 +0200 > +++ b/src/sys/net/pf.cTue May 30 20:27:43 2017 +0200 > @@ -923,7 +923,7 @@ int > pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, > struct pf_state_key **sks, struct pf_state *s) > { > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > s->kif = kif; > if (*skw == *sks) { > @@ -1186,7 +1186,7 @@ pf_purge_expired_rules(void) > { > struct pf_rule *r; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > if (SLIST_EMPTY(&pf_rule_gcl)) > return; > @@ -1207,6 +1207,7 @@ pf_purge_thread(void *v) > tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); > > NET_LOCK(s); > + PF_LOCK(); > > /* process a fraction of the state table every second */ > pf_purge_expired_states(1 + (pf_status.states > @@ -1214,13 +1215,20 @@ pf_purge_thread(void *v) > > /* purge other expired types every PFTM_INTERVAL seconds */ > if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > - pf_purge_expired_fragments(); > pf_purge_expired_src_nodes(0); > pf_purge_expired_rules(); > + } > + > + PF_UNLOCK(); > + NET_UNLOCK(s); > + > + /* > + * Fragments don't require PF_LOCK(), they use their own mutex. > + */ > + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > + pf_purge_expired_fragments(); > nloops = 0; > } > - > - NET_UNLOCK(s); > } > } > > @@ -1267,7 +1275,7 @@ pf_purge_expired_src_nodes(void) > { > struct pf_src_node *cur, *next; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { > next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); > @@ -1303,7 +1311,7 @@ pf_src_tree_remove_state(struct pf_state > void > pf_remove_state(struct pf_state *cur) > { > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > /* handle load balancing related tasks */ > pf_postprocess_addr(cur); > @@ -1320,7 +1328,6 @@ pf_remove_state(struct pf_state *cur) > } > RB_REMOVE(pf_state_tree_id, &tree_id, cur); > #if NPFLOW > 0 > - if (cur->state_flags & PFSTATE_PFLOW) > export_pflow(cur); > #endif /* NPFLOW > 0 */ > #if NPFSYNC > 0 > @@ -1350,7 +1357,7 @@ pf_free_state(struct pf_state *cur) > { > struct pf_rule_item *ri; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > #if NPFSYNC > 0 > if (pfsync_state_in_use(cur)) > @@ -1386,7 +1393,7 @@ pf_purge_expired_states(u_int32_t maxche > static struct pf_state *cur = NULL; > struct pf_state *next; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > while (maxcheck--) { > /* wrap to start of list when we hit the end */ > @@ -3146,13 +3153,13 @@ pf_socket_lookup(struct pf_pdesc *pd) > case IPPROTO_TCP: > sport = pd->hdr.tcp.th_sport; > dport = pd->hdr.tcp.th_dport; > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > tb = &tcbtable; > break; > case IPPROTO_UDP: > sport = pd->hdr.udp.uh_sport; > dport = p
Re: let's add PF_LOCK()
Hello Martin, > > rw_exit_write(&netlock); > > export_pflow(cur); > > rw_enter_write(&netlock); > > + rw_enter_write(&pf_lock); > > } > > This is not needed, you're not diffing against the latest version of > net/pf.c. indeed my tree is old by couple hours. > > > +extern struct rwlock pf_lock; > > + > > +#define PF_LOCK(s) do {\ > > + NET_ASSERT_LOCKED();\ > > + rw_enter_write(&pf_lock); \ > > + s = splsoftnet(); \ > > + } while (0) > > There's no need for splsoftnet()/splx() nor splsoftassert(). O.K. removed, the 'int spl;' at pf.c is also gone now. thank you for looking at my changes. updated diff is further below regards sasha 8<---8<---8<--8< diff -r 85b6b6ce74cd .hgtags --- /dev/null Thu Jan 01 00:00:00 1970 + +++ b/.hgtags Tue May 30 20:27:43 2017 +0200 @@ -0,0 +1,1 @@ +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline diff -r 85b6b6ce74cd src/sys/net/pf.c --- a/src/sys/net/pf.c Tue May 30 20:11:44 2017 +0200 +++ b/src/sys/net/pf.c Tue May 30 20:27:43 2017 +0200 @@ -923,7 +923,7 @@ int pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, struct pf_state_key **sks, struct pf_state *s) { - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); s->kif = kif; if (*skw == *sks) { @@ -1186,7 +1186,7 @@ pf_purge_expired_rules(void) { struct pf_rule *r; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); if (SLIST_EMPTY(&pf_rule_gcl)) return; @@ -1207,6 +1207,7 @@ pf_purge_thread(void *v) tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); NET_LOCK(s); + PF_LOCK(); /* process a fraction of the state table every second */ pf_purge_expired_states(1 + (pf_status.states @@ -1214,13 +1215,20 @@ pf_purge_thread(void *v) /* purge other expired types every PFTM_INTERVAL seconds */ if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { - pf_purge_expired_fragments(); pf_purge_expired_src_nodes(0); pf_purge_expired_rules(); + } + + PF_UNLOCK(); + NET_UNLOCK(s); + + /* +* Fragments don't require PF_LOCK(), they use their own mutex. +*/ + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_fragments(); nloops = 0; } - - NET_UNLOCK(s); } } @@ -1267,7 +1275,7 @@ pf_purge_expired_src_nodes(void) { struct pf_src_node *cur, *next; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); @@ -1303,7 +1311,7 @@ pf_src_tree_remove_state(struct pf_state void pf_remove_state(struct pf_state *cur) { - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); /* handle load balancing related tasks */ pf_postprocess_addr(cur); @@ -1320,7 +1328,6 @@ pf_remove_state(struct pf_state *cur) } RB_REMOVE(pf_state_tree_id, &tree_id, cur); #if NPFLOW > 0 - if (cur->state_flags & PFSTATE_PFLOW) export_pflow(cur); #endif /* NPFLOW > 0 */ #if NPFSYNC > 0 @@ -1350,7 +1357,7 @@ pf_free_state(struct pf_state *cur) { struct pf_rule_item *ri; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); #if NPFSYNC > 0 if (pfsync_state_in_use(cur)) @@ -1386,7 +1393,7 @@ pf_purge_expired_states(u_int32_t maxche static struct pf_state *cur = NULL; struct pf_state *next; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); while (maxcheck--) { /* wrap to start of list when we hit the end */ @@ -3146,13 +3153,13 @@ pf_socket_lookup(struct pf_pdesc *pd) case IPPROTO_TCP: sport = pd->hdr.tcp.th_sport; dport = pd->hdr.tcp.th_dport; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); tb = &tcbtable; break; case IPPROTO_UDP: sport = pd->hdr.udp.uh_sport; dport = pd->hdr.udp.uh_dport; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); tb = &udbtable; break; default: @@ -6673,6 +6680,7 @@ pf_test(sa_family_t af, int fwdir, struc /* if packet sits in reassembly queue, return without error */ if (pd.m == NULL) return PF_PASS; + if (action != PF_PASS) { #if NPFLOG > 0 pd.pflog |= PF_LOG_FORCE; @@ -6692,6 +6700,9 @@ pf_test
Re: let's add PF_LOCK()
On 30/05/17(Tue) 19:11, Alexandr Nedvedicky wrote: > @@ -1322,9 +1330,18 @@ > #if NPFLOW > 0 > if (cur->state_flags & PFSTATE_PFLOW) { > /* XXXSMP breaks atomicity */ > + /* > + * The only way how state gets freed from memory is when it > + * gets garbage collected by pf_purge_thread(). The > + * pf_purge_thread() removes only states, which are marked as > + * PFTM_UNLINKED -> state will stay around, once we re-acquire > + * netlock. > + */ > + rw_exit_write(&pf_lock); > rw_exit_write(&netlock); > export_pflow(cur); > rw_enter_write(&netlock); > + rw_enter_write(&pf_lock); > } This is not needed, you're not diffing against the latest version of net/pf.c. > +extern struct rwlock pf_lock; > + > +#define PF_LOCK(s) do {\ > + NET_ASSERT_LOCKED();\ > + rw_enter_write(&pf_lock); \ > + s = splsoftnet(); \ > + } while (0) There's no need for splsoftnet()/splx() nor splsoftassert(). > +#define PF_UNLOCK(s) do {\ > + PF_ASSERT_LOCKED(); \ > + splx(s);\ > + rw_exit_write(&pf_lock);\ > + } while (0) > + > +#define PF_ASSERT_LOCKED() do {\ > + if (rw_status(&pf_lock) != RW_WRITE)\ > + splassert_fail(RW_WRITE,\ > + rw_status(&pf_lock),__func__);\ > + splsoftassert(IPL_SOFTNET); \ > + } while (0) > + > +#define PF_ASSERT_UNLOCKED() do {\ > + if (rw_status(&pf_lock) == RW_WRITE)\ > + splassert_fail(0, rw_status(&pf_lock), __func__);\ > + } while (0) > + > #endif /* _KERNEL */ > > #endif /* _NET_PFVAR_PRIV_H_ */ > 8<---8<---8<--8< >
Re: let's add PF_LOCK()
Hello Mike, thanks for looking at my stuff. > > It's great stuff, I think you should move forward with this. > A couple of nits: > > - pfvar*.h doesn't have tabs after #define, just one space fixed > > - do you really want to put MTX in PF_FRAG_MTX_* defines? >why not PF_FRAG_LOCK? it would be aligned to PF_LOCK >and then we can change the nature of the lock w/o changing >defines. you are right the lock name should not refer to type of the lock in this case. > > Some additional comments inline (especially the nloops). indeed, this is very good catch, thanks a lot. > > It would be really nice if you'd generate diffs with -p so I'm sorry, I've forgot to add a '-p' switch. I'll make '-p' persistent in my .hgrc > > The way nloops gets set to 0 in the previous "if" statement > makes this looks suspicious. Perhaps setting nloops to zero > has to be happen in the last part? this is the fix I have in updated patch: diff -r 36784633f1f3 src/sys/net/pf.c --- a/src/sys/net/pf.c Tue May 30 10:59:16 2017 +0200 +++ b/src/sys/net/pf.c Tue May 30 18:57:21 2017 +0200 @@ -1217,7 +1217,6 @@ pf_purge_thread(void *v) if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { pf_purge_expired_src_nodes(0); pf_purge_expired_rules(); - nloops = 0; } PF_UNLOCK(s); @@ -1226,9 +1225,10 @@ pf_purge_thread(void *v) /* * Fragments don't require PF_LOCK(), they use their own mutex. */ - if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { pf_purge_expired_fragments(); - + nloops = 0; + } } } > > "The only guy, who kills states (frees from memory) is" > I think commets have to have a bit better style. There are > no guys who kill states :) does this improved comment sound good to you?: @ -1331,10 +1331,11 @@ pf_remove_state(struct pf_state *cur) if (cur->state_flags & PFSTATE_PFLOW) { /* XXXSMP breaks atomicity */ /* -* The only guy, who kills states (frees from memory) is -* pf_purge_thread(). The pf_purge_thread() kills only states, -* which are marked as PFTM_UNLINKED -> state will stay around, -* once we re-acquire netlock. +* The only way how state gets freed from memory is when it +* gets garbage collected by pf_purge_thread(). The +* pf_purge_thread() removes only states, which are marked as +* PFTM_UNLINKED -> state will stay around, once we re-acquire +* netlock. */ > Perhaps rephrase the whole thing like this to stress that we rely > on NET_LOCK right now and something will have to replace it when > it'll be gone? > > "At the moment, we rely on NET_LOCK() to prevent removal of > items we've collected above. They'll have to be refcounted > when NET_LOCK() is gone." I like your comment. It's brief and clear @@ -6808,12 +6809,11 @@ pf_test(sa_family_t af, int fwdir, struc unlock: PF_UNLOCK(spl); + /* -* We've just left the look-up section of pf_test(). Code further down -* assumes, all objects (state, rule, anchor,...) are going to stay -* around. It's fair assumption, NET_LOCK() prevents the items -* we've collected above from removing. Once NET_LOCK() will be gone, -* we must have reference counting ready. +* At the moment, we rely on NET_LOCK() to prevent removal of item
Re: let's add PF_LOCK()
On Tue, May 30, 2017 at 14:46 +0200, Alexandr Nedvedicky wrote: > oh, not again... > > I'm sorry for not attaching patch to the first email > > On Tue, May 30, 2017 at 02:34:32PM +0200, Alexandr Nedvedicky wrote: > > Hello, > > > > patch delivers two changes to PF: > > > > it adds PF_LOCK() et. al. At the moment the PF_LOCK() sort of > > duplicates the current NET_LOCK(). It essentially synchronizes > > packets with ioctl(2) and timer thread, which purges states. > > The future work is going to break PF_LOCK into smaller locks, > > which each will protect relevant parts of PF. Think of pf_state_lock, > > pf_rule_lock, ... > > > > The other change, which gets introduced is mutex for IP reassembly > > done by PF. The mutex synchronizes fragmented packets with timer > > thread, which expires incomplete packets from fragment cache. > > > > O.K.? > > > > thanks and > > regards > > sasha > > > It's great stuff, I think you should move forward with this. A couple of nits: - pfvar*.h doesn't have tabs after #define, just one space - do you really want to put MTX in PF_FRAG_MTX_* defines? why not PF_FRAG_LOCK? it would be aligned to PF_LOCK and then we can change the nature of the lock w/o changing defines. Some additional comments inline (especially the nloops). It would be really nice if you'd generate diffs with -p so that additional location information for each hunk would be visible. > 8<---8<---8<--8< > diff -r 21414694ee7a .hgtags > --- /dev/null Thu Jan 01 00:00:00 1970 + > +++ b/.hgtags Tue May 30 14:25:01 2017 +0200 > @@ -0,0 +1,1 @@ > +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline > diff -r 21414694ee7a src/sys/net/pf.c > --- a/src/sys/net/pf.cTue May 30 10:55:41 2017 +0200 > +++ b/src/sys/net/pf.cTue May 30 14:25:01 2017 +0200 > @@ -923,7 +923,7 @@ > pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, > struct pf_state_key **sks, struct pf_state *s) > { > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > s->kif = kif; > if (*skw == *sks) { > @@ -1186,7 +1186,7 @@ > { > struct pf_rule *r; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > if (SLIST_EMPTY(&pf_rule_gcl)) > return; > @@ -1207,6 +1207,7 @@ > tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); > > NET_LOCK(s); > + PF_LOCK(s); > > /* process a fraction of the state table every second */ > pf_purge_expired_states(1 + (pf_status.states > @@ -1214,13 +1215,20 @@ > > /* purge other expired types every PFTM_INTERVAL seconds */ > if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > - pf_purge_expired_fragments(); > pf_purge_expired_src_nodes(0); > pf_purge_expired_rules(); > nloops = 0; > } > > + PF_UNLOCK(s); > NET_UNLOCK(s); > + > + /* > + * Fragments don't require PF_LOCK(), they use their own mutex. > + */ > + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) > + pf_purge_expired_fragments(); > + > } > } > The way nloops gets set to 0 in the previous "if" statement makes this looks suspicious. Perhaps setting nloops to zero has to be happen in the last part? > @@ -1267,7 +1275,7 @@ > { > struct pf_src_node *cur, *next; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { > next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); > @@ -1303,7 +1311,7 @@ > void > pf_remove_state(struct pf_state *cur) > { > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > /* handle load balancing related tasks */ > pf_postprocess_addr(cur); > @@ -1322,9 +1330,17 @@ > #if NPFLOW > 0 > if (cur->state_flags & PFSTATE_PFLOW) { > /* XXXSMP breaks atomicity */ > + /* > + * The only guy, who kills states (frees from memory) is > + * pf_purge_thread(). The pf_purge_thread() kills only states, > + * which are marked as PFTM_UNLINKED -> state will stay around, > + * once we re-acquire netlock. "The only guy, who kills states (frees from memory) is" I think commets have to have a bit better style. There are no guys who kill states :) > + */ > + rw_exit_write(&pf_lock); > rw_exit_write(&netlock); > export_pflow(cur); > rw_enter_write(&netlock); > + rw_enter_write(&pf_lock); > } > #endif /* NPFLOW > 0 */ > #if NPFSYNC > 0 > @@ -1354,7 +1370,7 @@ > { > struct pf_rule_item *ri; > > - NET_ASSERT_LOCKED(); > +
Re: let's add PF_LOCK()
oh, not again... I'm sorry for not attaching patch to the first email On Tue, May 30, 2017 at 02:34:32PM +0200, Alexandr Nedvedicky wrote: > Hello, > > patch delivers two changes to PF: > > it adds PF_LOCK() et. al. At the moment the PF_LOCK() sort of > duplicates the current NET_LOCK(). It essentially synchronizes > packets with ioctl(2) and timer thread, which purges states. > The future work is going to break PF_LOCK into smaller locks, > which each will protect relevant parts of PF. Think of pf_state_lock, > pf_rule_lock, ... > > The other change, which gets introduced is mutex for IP reassembly > done by PF. The mutex synchronizes fragmented packets with timer > thread, which expires incomplete packets from fragment cache. > > O.K.? > > thanks and > regards > sasha > 8<---8<---8<--8< diff -r 21414694ee7a .hgtags --- /dev/null Thu Jan 01 00:00:00 1970 + +++ b/.hgtags Tue May 30 14:25:01 2017 +0200 @@ -0,0 +1,1 @@ +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline diff -r 21414694ee7a src/sys/net/pf.c --- a/src/sys/net/pf.c Tue May 30 10:55:41 2017 +0200 +++ b/src/sys/net/pf.c Tue May 30 14:25:01 2017 +0200 @@ -923,7 +923,7 @@ pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, struct pf_state_key **sks, struct pf_state *s) { - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); s->kif = kif; if (*skw == *sks) { @@ -1186,7 +1186,7 @@ { struct pf_rule *r; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); if (SLIST_EMPTY(&pf_rule_gcl)) return; @@ -1207,6 +1207,7 @@ tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); NET_LOCK(s); + PF_LOCK(s); /* process a fraction of the state table every second */ pf_purge_expired_states(1 + (pf_status.states @@ -1214,13 +1215,20 @@ /* purge other expired types every PFTM_INTERVAL seconds */ if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { - pf_purge_expired_fragments(); pf_purge_expired_src_nodes(0); pf_purge_expired_rules(); nloops = 0; } + PF_UNLOCK(s); NET_UNLOCK(s); + + /* +* Fragments don't require PF_LOCK(), they use their own mutex. +*/ + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) + pf_purge_expired_fragments(); + } } @@ -1267,7 +1275,7 @@ { struct pf_src_node *cur, *next; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); @@ -1303,7 +1311,7 @@ void pf_remove_state(struct pf_state *cur) { - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); /* handle load balancing related tasks */ pf_postprocess_addr(cur); @@ -1322,9 +1330,17 @@ #if NPFLOW > 0 if (cur->state_flags & PFSTATE_PFLOW) { /* XXXSMP breaks atomicity */ + /* +* The only guy, who kills states (frees from memory) is +* pf_purge_thread(). The pf_purge_thread() kills only states, +* which are marked as PFTM_UNLINKED -> state will stay around, +* once we re-acquire netlock. +*/ + rw_exit_write(&pf_lock); rw_exit_write(&netlock); export_pflow(cur); rw_enter_write(&netlock); + rw_enter_write(&pf_lock); } #endif /* NPFLOW > 0 */ #if NPFSYNC > 0 @@ -1354,7 +1370,7 @@ { struct pf_rule_item *ri; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); #if NPFSYNC > 0 if (pfsync_state_in_use(cur)) @@ -1390,7 +1406,7 @@ static struct pf_state *cur = NULL; struct pf_state *next; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); while (maxcheck--) { /* wrap to start of list when we hit the end */ @@ -3142,13 +3158,13 @@ case IPPROTO_TCP: sport = pd->hdr.tcp.th_sport; dport = pd->hdr.tcp.th_dport; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); tb = &tcbtable; break; case IPPROTO_UDP: sport = pd->hdr.udp.uh_sport; dport = pd->hdr.udp.uh_dport; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); tb = &udbtable; break; default: @@ -6611,6 +6627,7 @@ struct pf_pdesc pd; int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; u_int32_t
let's add PF_LOCK()
Hello, patch delivers two changes to PF: it adds PF_LOCK() et. al. At the moment the PF_LOCK() sort of duplicates the current NET_LOCK(). It essentially synchronizes packets with ioctl(2) and timer thread, which purges states. The future work is going to break PF_LOCK into smaller locks, which each will protect relevant parts of PF. Think of pf_state_lock, pf_rule_lock, ... The other change, which gets introduced is mutex for IP reassembly done by PF. The mutex synchronizes fragmented packets with timer thread, which expires incomplete packets from fragment cache. O.K.? thanks and regards sasha