Hello, I'm not sure I got everything right in Calgary. So this patch should roughly illustrates how I think we should start moving forward to make PF MULTIPROCESSOR friendly. It's quite likely my proposal/way is completely off, I'll be happy if you put me back to ground.
The brief summary of what patch is trying to achieve is as follows: patch trades all splsoftnet() for KERNEL_LOCK() when it gets compiled with MULTIPROCESSOR option on. if MULTIPROCESSOR option is off, the compiler produces PF, which uses splsoftnet. To achieve this the patch introduces macros PF_LOCK()/PF_UNLOCK(), which expand to KERNEL_LOCK()/KERNEL_UNLOCK(), when MULTIPROCESSOR is on. On the other hand if MULTIPROCESSOR is off the PF_*LOCK() macros become splsoftnet()/splx() Skip to =breakage= if you don't care about details/future plans. Currently PF must synchronize all those guys: - packets, which are running through pf_test(). IP stack already serializes calls to pf_test() (there is always one running pf_test() instance at most) - ioctl() operations on PF driver with packets and with each other (it looks like there might be more processes, which read state table, those are allowed to run in parallel). To serialize ioctl() operations with each other PF uses pf_consistency_lock (which is an RW-lock). If particular ioctl() operation must be synchronized with packets it must get splsotnet. - purge thread, which expires states. purge thread must grab pf_consistency_lock and splsoftnet. The desired state is to break a giant pf_consistency_lock into few more RW-locks. Which will protect various data PF keeps. Those RW-locks will also synchronize packets. The list of locks, which I have on mind is as follows: - pf_state_rw - pf_anchors_rw (packets don't need to grab it as they grab rw-locks bound to individual rulesets) - pf_tables_rw (packets don't need to grab it as they grab rw-locks bound to table instances). The first major milestone in this effort is to introduce pf_state_rw. The patch I'm proposing here buys us enough freedom to relatively safely decompose the pf_consistency_lock and make pf_test() parallel for packets. =breakage= The proposed patch breaks 'return-*' action, when PF gets compiled with MULTIPROCESSOR on. I think it is unsafe to call icmp_err*() functions, while holding a KERNEL_LOCK(). And it is risky to give up KERNEL_LOCK(), execute a send operation on response packet and re-grab KERNEL_LOCK() again as we would arrive to different world (different in sense the pointer we remember might be invalid now). To fix that we must introduce a reference counting for objects, so it will become safe to drop and re-grab KERNEL_LOCK(), while holding a reference. The problem has been solved for pf_route*() functions, so PBR works in MULTIPROCESSOR friendly PF. My patch does not touch if_pfsync.c at all. The PF_SYNC support in MULTIPROCESSOR PF will have to come in some later phase. You should consider it to be broken in MULTIPROCESSOR version. There should be no breakage in PF for GENERIC kernel. regards sasha --------8<----------------8<---------------8<-----------------8<-------- Index: pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.936 diff -u -p -r1.936 pf.c --- pf.c 19 Aug 2015 21:22:41 -0000 1.936 +++ pf.c 26 Aug 2015 14:11:17 -0000 @@ -906,7 +906,7 @@ int pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, struct pf_state_key **sks, struct pf_state *s) { - splsoftassert(IPL_SOFTNET); + PF_ASSERT_LOCKED(nothing); s->kif = kif; if (*skw == *sks) { @@ -1150,12 +1150,13 @@ pf_state_export(struct pfsync_state *sp, void pf_purge_thread(void *v) { - int nloops = 0, s; + int nloops = 0; + PF_LOCK_INSTANCE(s); for (;;) { tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); - s = splsoftnet(); + PF_LOCK(s); /* process a fraction of the state table every second */ pf_purge_expired_states(1 + (pf_status.states @@ -1168,7 +1169,7 @@ pf_purge_thread(void *v) nloops = 0; } - splx(s); + PF_UNLOCK(s); } } @@ -1259,7 +1260,7 @@ pf_src_tree_remove_state(struct pf_state void pf_unlink_state(struct pf_state *cur) { - splsoftassert(IPL_SOFTNET); + PF_ASSERT_LOCKED(nothing); /* handle load balancing related tasks */ pf_postprocess_addr(cur); @@ -1294,7 +1295,7 @@ pf_free_state(struct pf_state *cur) { struct pf_rule_item *ri; - splsoftassert(IPL_SOFTNET); + PF_ASSERT_LOCKED(nothing); #if NPFSYNC > 0 if (pfsync_state_in_use(cur)) @@ -2414,6 +2415,13 @@ pf_send_tcp(const struct pf_rule *r, sa_ memcpy((opt + 2), &mss, 2); } + /* + * XXX: I think we should not call ip*_output() while holding a + * KERNEL_LOCK() We have to figure out a way around... + */ +#ifdef MULTIPROCESSOR + m_freem(m); +#else /* !MULTIPROCESSOR */ switch (af) { case AF_INET: ip_output(m, NULL, NULL, 0, NULL, NULL, 0); @@ -2424,6 +2432,7 @@ pf_send_tcp(const struct pf_rule *r, sa_ break; #endif /* INET6 */ } +#endif /* MULTIPROCESSOR */ } void @@ -2442,6 +2451,13 @@ pf_send_icmp(struct mbuf *m, u_int8_t ty if (r && r->qid) m0->m_pkthdr.pf.qid = r->qid; + /* + * XXX: I think we should not call icmp_error*() while holding a + * KERNEL_LOCK() We have to figure out a way around... + */ +#ifdef MULTIPROCESSOR + m_freem(m0); +#else /* !MULTIPROCESSOR */ switch (af) { case AF_INET: icmp_error(m0, type, code, 0, 0); @@ -2452,6 +2468,7 @@ pf_send_icmp(struct mbuf *m, u_int8_t ty break; #endif /* INET6 */ } +#endif /* MULTIPROCESSOR */ } /* @@ -5482,21 +5499,27 @@ pf_route(struct mbuf **m, struct pf_rule if ((*m)->m_pkthdr.pf.routed++ > 3) { m0 = *m; *m = NULL; + KERNEL_UNLOCK(); goto bad; } if (r->rt == PF_DUPTO) { - if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) + if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) { + KERNEL_UNLOCK(); return; + } } else { - if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { + KERNEL_UNLOCK(); return; + } m0 = *m; } if (m0->m_len < sizeof(struct ip)) { DPFPRINTF(LOG_ERR, "pf_route: m0->m_len < sizeof(struct ip)"); + KERNEL_UNLOCK(); goto bad; } @@ -5513,6 +5536,7 @@ pf_route(struct mbuf **m, struct pf_rule rt = rtalloc(sintosa(dst), RT_REPORT|RT_RESOLVE, rtableid); if (rt == NULL) { ipstat.ips_noroute++; + KERNEL_UNLOCK(); goto bad; } @@ -5531,6 +5555,7 @@ pf_route(struct mbuf **m, struct pf_rule &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { DPFPRINTF(LOG_ERR, "pf_route: pf_map_addr() failed."); + KERNEL_UNLOCK(); goto bad; } @@ -5545,15 +5570,20 @@ pf_route(struct mbuf **m, struct pf_rule ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } } - if (ifp == NULL) + + KERNEL_UNLOCK(); + + if (ifp == NULL) { goto bad; + } if (oifp != ifp) { if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) goto bad; - else if (m0 == NULL) + else if (m0 == NULL) { goto done; + } if (m0->m_len < sizeof(struct ip)) { DPFPRINTF(LOG_ERR, "pf_route: m0->m_len < sizeof(struct ip)"); @@ -5642,21 +5672,27 @@ pf_route6(struct mbuf **m, struct pf_rul if ((*m)->m_pkthdr.pf.routed++ > 3) { m0 = *m; *m = NULL; + KERNEL_UNLOCK(); goto bad; } if (r->rt == PF_DUPTO) { - if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) + if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) { + KERNEL_UNLOCK(); return; + } } else { - if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { + KERNEL_UNLOCK(); return; + } m0 = *m; } if (m0->m_len < sizeof(struct ip6_hdr)) { DPFPRINTF(LOG_ERR, "pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); + KERNEL_UNLOCK(); goto bad; } ip6 = mtod(m0, struct ip6_hdr *); @@ -5669,6 +5705,7 @@ pf_route6(struct mbuf **m, struct pf_rul if (!r->rt) { m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; + KERNEL_UNLOCK(); ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); return; } @@ -5679,6 +5716,7 @@ pf_route6(struct mbuf **m, struct pf_rul &naddr, NULL, sns, &r->route, PF_SN_ROUTE)) { DPFPRINTF(LOG_ERR, "pf_route6: pf_map_addr() failed."); + KERNEL_UNLOCK(); goto bad; } if (!PF_AZERO(&naddr, AF_INET6)) @@ -5691,6 +5729,9 @@ pf_route6(struct mbuf **m, struct pf_rul &s->rt_addr, AF_INET6); ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } + + KERNEL_UNLOCK(); + if (ifp == NULL) goto bad; @@ -6349,6 +6390,8 @@ pf_test(sa_family_t af, int fwdir, struc return (PF_PASS); } + KERNEL_LOCK(); + action = pf_setup_pdesc(&pd, &pdhdrs, af, dir, kif, *m0, &reason); if (action != PF_PASS) { #if NPFLOG > 0 @@ -6589,6 +6632,7 @@ done: case PF_DEFER: *m0 = NULL; action = PF_PASS; + KERNEL_UNLOCK(); break; case PF_DIVERT: switch (pd.af) { @@ -6604,6 +6648,7 @@ done: break; #endif /* INET6 */ } + KERNEL_UNLOCK(); action = PF_PASS; break; #ifdef INET6 @@ -6614,10 +6659,20 @@ done: action = PF_DROP; break; } - if (pd.naf == AF_INET) + switch (pd.naf) { + case AF_INET: pf_route(&pd.m, r, dir, kif->pfik_ifp, s); - if (pd.naf == AF_INET6) + break; + case AF_INET6: pf_route6(&pd.m, r, dir, kif->pfik_ifp, s); + break; + default: + /* + * pf_route*() functions unlock the kernel lock for us. + */ + KERNEL_UNLOCK(); + m_freem(pd.m); + } *m0 = NULL; action = PF_PASS; break; @@ -6634,7 +6689,15 @@ done: pf_route6(m0, r, pd.dir, pd.kif->pfik_ifp, s); break; #endif /* INET6 */ + default: + /* + * pf_route*() functions unlock the kernel lock + * for us. + */ + KERNEL_UNLOCK(); } + } else { + KERNEL_UNLOCK(); } break; } Index: pf_if.c =================================================================== RCS file: /cvs/src/sys/net/pf_if.c,v retrieving revision 1.79 diff -u -p -r1.79 pf_if.c --- pf_if.c 21 Jul 2015 02:32:04 -0000 1.79 +++ pf_if.c 26 Aug 2015 14:11:17 -0000 @@ -216,10 +216,10 @@ void pfi_attach_ifnet(struct ifnet *ifp) { struct pfi_kif *kif; - int s; + PF_LOCK_INSTANCE(s); pfi_initialize(); - s = splsoftnet(); + PF_LOCK(s); pfi_update++; if ((kif = pfi_kif_get(ifp->if_xname)) == NULL) panic("pfi_kif_get failed"); @@ -234,19 +234,19 @@ pfi_attach_ifnet(struct ifnet *ifp) pfi_kif_update(kif); - splx(s); + PF_UNLOCK(s); } void pfi_detach_ifnet(struct ifnet *ifp) { - int s; struct pfi_kif *kif; + PF_LOCK_INSTANCE(s); if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL) return; - s = splsoftnet(); + PF_LOCK(s); pfi_update++; hook_disestablish(ifp->if_addrhooks, kif->pfik_ah_cookie); pfi_kif_update(kif); @@ -260,17 +260,17 @@ pfi_detach_ifnet(struct ifnet *ifp) ifp->if_pf_kif = NULL; pfi_kif_unref(kif, PFI_KIF_REF_NONE); - splx(s); + PF_UNLOCK(s); } void pfi_attach_ifgroup(struct ifg_group *ifg) { struct pfi_kif *kif; - int s; + PF_LOCK_INSTANCE(s); pfi_initialize(); - s = splsoftnet(); + PF_LOCK(s); pfi_update++; if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL) panic("pfi_kif_get failed"); @@ -278,41 +278,41 @@ pfi_attach_ifgroup(struct ifg_group *ifg kif->pfik_group = ifg; ifg->ifg_pf_kif = (caddr_t)kif; - splx(s); + PF_UNLOCK(s); } void pfi_detach_ifgroup(struct ifg_group *ifg) { - int s; struct pfi_kif *kif; + PF_LOCK_INSTANCE(s); if ((kif = (struct pfi_kif *)ifg->ifg_pf_kif) == NULL) return; - s = splsoftnet(); + PF_LOCK(s); pfi_update++; kif->pfik_group = NULL; ifg->ifg_pf_kif = NULL; pfi_kif_unref(kif, PFI_KIF_REF_NONE); - splx(s); + PF_UNLOCK(s); } void pfi_group_change(const char *group) { struct pfi_kif *kif; - int s; + PF_LOCK_INSTANCE(s); - s = splsoftnet(); + PF_LOCK(s); pfi_update++; if ((kif = pfi_kif_get(group)) == NULL) panic("pfi_kif_get failed"); pfi_kif_update(kif); - splx(s); + PF_UNLOCK(s); } int @@ -354,7 +354,8 @@ pfi_dynaddr_setup(struct pf_addr_wrap *a struct pfi_dynaddr *dyn; char tblname[PF_TABLE_NAME_SIZE]; struct pf_ruleset *ruleset = NULL; - int s, rv = 0; + int rv = 0; + PF_LOCK_INSTANCE(s); if (aw->type != PF_ADDR_DYNIFTL) return (0); @@ -362,7 +363,7 @@ pfi_dynaddr_setup(struct pf_addr_wrap *a == NULL) return (1); - s = splsoftnet(); + PF_LOCK(s); if (!strcmp(aw->v.ifname, "self")) dyn->pfid_kif = pfi_kif_get(IFG_ALL); else @@ -405,7 +406,7 @@ pfi_dynaddr_setup(struct pf_addr_wrap *a TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); aw->p.dyn = dyn; pfi_kif_update(dyn->pfid_kif); - splx(s); + PF_UNLOCK(s); return (0); _bad: @@ -416,7 +417,7 @@ _bad: if (dyn->pfid_kif != NULL) pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE); pool_put(&pfi_addr_pl, dyn); - splx(s); + PF_UNLOCK(s); return (rv); } @@ -587,13 +588,13 @@ pfi_address_add(struct sockaddr *sa, sa_ void pfi_dynaddr_remove(struct pf_addr_wrap *aw) { - int s; + PF_LOCK_INSTANCE(s); if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL || aw->p.dyn->pfid_kt == NULL) return; - s = splsoftnet(); + PF_LOCK(s); TAILQ_REMOVE(&aw->p.dyn->pfid_kif->pfik_dynaddrs, aw->p.dyn, entry); pfi_kif_unref(aw->p.dyn->pfid_kif, PFI_KIF_REF_RULE); aw->p.dyn->pfid_kif = NULL; @@ -601,7 +602,7 @@ pfi_dynaddr_remove(struct pf_addr_wrap * aw->p.dyn->pfid_kt = NULL; pool_put(&pfi_addr_pl, aw->p.dyn); aw->p.dyn = NULL; - splx(s); + PF_UNLOCK(s); } void @@ -616,13 +617,13 @@ pfi_dynaddr_copyout(struct pf_addr_wrap void pfi_kifaddr_update(void *v) { - int s; struct pfi_kif *kif = (struct pfi_kif *)v; + PF_LOCK_INSTANCE(s); - s = splsoftnet(); + PF_LOCK(s); pfi_update++; pfi_kif_update(kif); - splx(s); + PF_UNLOCK(s); } int @@ -638,23 +639,24 @@ pfi_update_status(const char *name, stru struct pfi_kif_cmp key; struct ifg_member p_member, *ifgm; TAILQ_HEAD(, ifg_member) ifg_members; - int i, j, k, s; + int i, j, k; + PF_LOCK_INSTANCE(s); - s = splsoftnet(); + PF_LOCK(s); if (*name == '\0' && pfs == NULL) { RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { bzero(p->pfik_packets, sizeof(p->pfik_packets)); bzero(p->pfik_bytes, sizeof(p->pfik_bytes)); p->pfik_tzero = time_second; } - splx(s); + PF_UNLOCK(s); return; } strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key); if (p == NULL) { - splx(s); + PF_UNLOCK(s); return; } if (p->pfik_group != NULL) { @@ -692,16 +694,17 @@ pfi_update_status(const char *name, stru p->pfik_bytes[i][j][k]; } } - splx(s); + PF_UNLOCK(s); } int pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) { struct pfi_kif *p, *nextp; - int s, n = 0; + int n = 0; + PF_LOCK_INSTANCE(s); - s = splsoftnet(); + PF_LOCK(s); for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) { nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); if (pfi_skip_if(name, p)) @@ -712,14 +715,14 @@ pfi_get_ifaces(const char *name, struct pfi_kif_ref(p, PFI_KIF_REF_RULE); if (copyout(p, buf++, sizeof(*buf))) { pfi_kif_unref(p, PFI_KIF_REF_RULE); - splx(s); + PF_UNLOCK(s); return (EFAULT); } nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); pfi_kif_unref(p, PFI_KIF_REF_RULE); } } - splx(s); + PF_UNLOCK(s); *size = n; return (0); } @@ -750,15 +753,15 @@ int pfi_set_flags(const char *name, int flags) { struct pfi_kif *p; - int s; + PF_LOCK_INSTANCE(s); - s = splsoftnet(); + PF_LOCK(s); RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { if (pfi_skip_if(name, p)) continue; p->pfik_flags_new = p->pfik_flags | flags; } - splx(s); + PF_UNLOCK(s); return (0); } @@ -766,15 +769,15 @@ int pfi_clear_flags(const char *name, int flags) { struct pfi_kif *p; - int s; + PF_LOCK_INSTANCE(s); - s = splsoftnet(); + PF_LOCK(s); RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { if (pfi_skip_if(name, p)) continue; p->pfik_flags_new = p->pfik_flags & ~flags; } - splx(s); + PF_UNLOCK(s); return (0); } @@ -782,12 +785,12 @@ void pfi_xcommit(void) { struct pfi_kif *p; - int s; + PF_LOCK_INSTANCE(s); - s = splsoftnet(); + PF_LOCK(s); RB_FOREACH(p, pfi_ifhead, &pfi_ifs) p->pfik_flags = p->pfik_flags_new; - splx(s); + PF_UNLOCK(s); } /* from pf_print_state.c */ Index: pf_ioctl.c =================================================================== RCS file: /cvs/src/sys/net/pf_ioctl.c,v retrieving revision 1.289 diff -u -p -r1.289 pf_ioctl.c --- pf_ioctl.c 21 Jul 2015 02:32:04 -0000 1.289 +++ pf_ioctl.c 26 Aug 2015 14:11:18 -0000 @@ -689,8 +689,9 @@ pf_commit_rules(u_int32_t ticket, char * struct pf_ruleset *rs; struct pf_rule *rule, **old_array; struct pf_rulequeue *old_rules; - int s, error; + int error; u_int32_t old_rcount; + PF_LOCK_INSTANCE(s); rs = pf_find_ruleset(anchor); if (rs == NULL || !rs->rules.inactive.open || @@ -705,7 +706,7 @@ pf_commit_rules(u_int32_t ticket, char * } /* Swap rules, keep the old. */ - s = splsoftnet(); + PF_LOCK(s); old_rules = rs->rules.active.ptr; old_rcount = rs->rules.active.rcount; old_array = rs->rules.active.ptr_array; @@ -730,7 +731,7 @@ pf_commit_rules(u_int32_t ticket, char * rs->rules.inactive.rcount = 0; rs->rules.inactive.open = 0; pf_remove_if_empty_ruleset(rs); - splx(s); + PF_UNLOCK(s); /* queue defs only in the main ruleset */ if (anchor[0]) @@ -804,11 +805,34 @@ pf_addr_copyout(struct pf_addr_wrap *add pf_rtlabel_copyout(addr); } +/* + * Macro defines list of case statements for ioctl commands, which must not be + * covered by PF global locks such as: KERNEL_LOCK(), splsoftnet, + * pf_consistency_lock. + * + * In example if want to exclude ioctl commands DIOCGETSTATE, DIOCGETSTATES, + * DIOCNATLOOK, DIOCGETSRCNODES, DIOCCLRSRCNODES, DIOCXBEGIN, DIOCXROLLBACK, + * DIOCXCOMMIT, then we define macro as follows: + * case DIOCGETSTATE: \ + * case DIOCGETSTATES: \ + * case DIOCNATLOOK: \ + * case DIOCGETSRCNODES: \ + * case DIOCCLRSRCNODES: \ + * case DIOCXBEGIN: \ + * case DIOCXROLLBACK: \ + * case DIOCXCOMMIT: \ + * break; + * + * The macro is temporal hack as soon as all PF will be turned to be MP + * friendly the macro will be removed. + */ +#define PF_NO_GLOBAL_LOCK /* is empty now */ + int pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) { - int s; int error = 0; + PF_LOCK_INSTANCE(s); /* XXX keep in sync with switch() below */ if (securelevel > 1) @@ -906,12 +930,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a return (EACCES); } - if (flags & FWRITE) - rw_enter_write(&pf_consistency_lock); - else - rw_enter_read(&pf_consistency_lock); + /* + * select ioctl commands, which has to be guarded by giant lock. Giant + * lock here is either KERNEL_LOCK() (when PF gets compiled with + * MULTITHREADED option) or giant lock becomes splsoftnet (when PF is + * part of non-MP kernel). + */ + switch (cmd) { + PF_NO_GLOBAL_LOCK + default: + if (flags & FWRITE) + rw_enter_write(&pf_consistency_lock); + else + rw_enter_read(&pf_consistency_lock); + PF_LOCK(s); + } - s = splsoftnet(); switch (cmd) { case DIOCSTART: @@ -2305,11 +2339,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a break; } fail: - splx(s); - if (flags & FWRITE) - rw_exit_write(&pf_consistency_lock); - else - rw_exit_read(&pf_consistency_lock); + switch (cmd) { + PF_NO_GLOBAL_LOCK + default: + if (flags & FWRITE) + rw_exit_write(&pf_consistency_lock); + else + rw_exit_read(&pf_consistency_lock); + PF_UNLOCK(s); + } + return (error); } Index: pf_table.c =================================================================== RCS file: /cvs/src/sys/net/pf_table.c,v retrieving revision 1.113 diff -u -p -r1.113 pf_table.c --- pf_table.c 20 Jul 2015 18:42:08 -0000 1.113 +++ pf_table.c 26 Aug 2015 14:11:18 -0000 @@ -791,7 +791,7 @@ pfr_lookup_addr(struct pfr_ktable *kt, s union sockaddr_union sa, mask; struct radix_node_head *head; struct pfr_kentry *ke; - int s; + PF_LOCK_INSTANCE(s); bzero(&sa, sizeof(sa)); switch (ad->pfra_af) { @@ -810,9 +810,9 @@ pfr_lookup_addr(struct pfr_ktable *kt, s } if (ADDR_NETWORK(ad)) { pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net); - s = splsoftnet(); /* rn_lookup makes use of globals */ + PF_LOCK(s); /* rn_lookup makes use of globals */ ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head); - splx(s); + PF_UNLOCK(s); } else { ke = (struct pfr_kentry *)rn_match(&sa, head); if (exact && ke && KENTRY_NETWORK(ke)) @@ -990,17 +990,17 @@ void pfr_clstats_kentries(struct pfr_kentryworkq *workq, time_t tzero, int negchange) { struct pfr_kentry *p; - int s; + PF_LOCK_INSTANCE(s); SLIST_FOREACH(p, workq, pfrke_workq) { - s = splsoftnet(); + PF_LOCK(s); if (negchange) p->pfrke_flags ^= PFRKE_FLAG_NOT; if (p->pfrke_counters) { pool_put(&pfr_kcounters_pl, p->pfrke_counters); p->pfrke_counters = NULL; } - splx(s); + PF_UNLOCK(s); p->pfrke_tzero = tzero; } } @@ -1061,7 +1061,7 @@ pfr_route_kentry(struct pfr_ktable *kt, union sockaddr_union mask; struct radix_node *rn; struct radix_node_head *head; - int s; + PF_LOCK_INSTANCE(s); bzero(ke->pfrke_node, sizeof(ke->pfrke_node)); switch (ke->pfrke_af) { @@ -1077,13 +1077,13 @@ pfr_route_kentry(struct pfr_ktable *kt, unhandled_af(ke->pfrke_af); } - s = splsoftnet(); + PF_LOCK(s); if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node, 0); } else rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node, 0); - splx(s); + PF_UNLOCK(s); return (rn == NULL ? -1 : 0); } @@ -1094,7 +1094,7 @@ pfr_unroute_kentry(struct pfr_ktable *kt union sockaddr_union mask; struct radix_node *rn; struct radix_node_head *head; - int s; + PF_LOCK_INSTANCE(s); switch (ke->pfrke_af) { case AF_INET: @@ -1109,13 +1109,13 @@ pfr_unroute_kentry(struct pfr_ktable *kt unhandled_af(ke->pfrke_af); } - s = splsoftnet(); + PF_LOCK(s); if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); rn = rn_delete(&ke->pfrke_sa, &mask, head, NULL); } else rn = rn_delete(&ke->pfrke_sa, NULL, head, NULL); - splx(s); + PF_UNLOCK(s); if (rn == NULL) { DPFPRINTF(LOG_ERR, "pfr_unroute_kentry: delete failed.\n"); @@ -1170,7 +1170,8 @@ pfr_walktree(struct radix_node *rn, void { struct pfr_kentry *ke = (struct pfr_kentry *)rn; struct pfr_walktree *w = arg; - int s, flags = w->pfrw_flags; + int flags = w->pfrw_flags; + PF_LOCK_INSTANCE(s); switch (w->pfrw_op) { case PFRW_MARK: @@ -1200,7 +1201,7 @@ pfr_walktree(struct radix_node *rn, void pfr_copyout_addr(&as.pfras_a, ke); - s = splsoftnet(); + PF_LOCK(s); if (ke->pfrke_counters) { bcopy(ke->pfrke_counters->pfrkc_packets, as.pfras_packets, sizeof(as.pfras_packets)); @@ -1212,7 +1213,7 @@ pfr_walktree(struct radix_node *rn, void bzero(as.pfras_bytes, sizeof(as.pfras_bytes)); as.pfras_a.pfra_fback = PFR_FB_NOCOUNT; } - splx(s); + PF_UNLOCK(s); as.pfras_tzero = ke->pfrke_tzero; if (COPYOUT(&as, w->pfrw_astats, sizeof(as), flags)) @@ -1447,8 +1448,9 @@ pfr_get_tstats(struct pfr_table *filter, { struct pfr_ktable *p; struct pfr_ktableworkq workq; - int s, n, nn; + int n, nn; time_t tzero = time_second; + PF_LOCK_INSTANCE(s); /* XXX PFR_FLAG_CLSTATS disabled */ ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS); @@ -1467,12 +1469,12 @@ pfr_get_tstats(struct pfr_table *filter, continue; if (n-- <= 0) continue; - s = splsoftnet(); + PF_LOCK(s); if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl), flags)) { - splx(s); + PF_UNLOCK(s); return (EFAULT); } - splx(s); + PF_UNLOCK(s); SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); } if (flags & PFR_FLAG_CLSTATS) @@ -1992,17 +1994,17 @@ void pfr_clstats_ktable(struct pfr_ktable *kt, time_t tzero, int recurse) { struct pfr_kentryworkq addrq; - int s; + PF_LOCK_INSTANCE(s); if (recurse) { pfr_enqueue_addrs(kt, &addrq, NULL, 0); pfr_clstats_kentries(&addrq, tzero, 0); } - s = splsoftnet(); + PF_LOCK(s); bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets)); bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes)); kt->pfrkt_match = kt->pfrkt_nomatch = 0; - splx(s); + PF_UNLOCK(s); kt->pfrkt_tzero = tzero; } @@ -2523,13 +2525,13 @@ void pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn) { struct pfr_walktree w; - int s; + PF_LOCK_INSTANCE(s); bzero(&w, sizeof(w)); w.pfrw_op = PFRW_DYNADDR_UPDATE; w.pfrw_dyn = dyn; - s = splsoftnet(); + PF_LOCK(s); dyn->pfid_acnt4 = 0; dyn->pfid_acnt6 = 0; switch (dyn->pfid_af) { @@ -2548,7 +2550,7 @@ pfr_dynaddr_update(struct pfr_ktable *kt default: unhandled_af(dyn->pfid_af); } - splx(s); + PF_UNLOCK(s); } void Index: pfvar.h =================================================================== RCS file: /cvs/src/sys/net/pfvar.h,v retrieving revision 1.420 diff -u -p -r1.420 pfvar.h --- pfvar.h 19 Aug 2015 21:22:41 -0000 1.420 +++ pfvar.h 26 Aug 2015 14:11:18 -0000 @@ -1909,5 +1909,46 @@ void pf_cksum(struct pf_pdesc *, stru #endif /* _KERNEL */ +/* + * PF_LOCK_* macros are compatibility bridge between MP and non-MP PF. The + * idea is to use KERNEL_LOCK()/KERNEL_UNLOCK() when PF gets compilied with + * MULTIPROCESSOR support enabled. If MULTIPROCESSOR compile time option + * is not present, the macro will use splsoftnet()/splx(). + * + * This should buy us enough lock freedom to deploy phased approach to giant + * lock dismantling in PF code. + */ +#ifdef MULTIPROCESSOR +#define PF_LOCK_INSTANCE(_s_) ((void)0) + +#define PF_LOCK(_s_) do { \ + KERNEL_LOCK(); \ + } while (0) + +#define PF_UNLOCK(_s_) do { \ + KERNEL_ASSERT_LOCKED(); \ + KERNEL_UNLOCK(); \ + } while (0) + +#define PF_ASSERT_LOCKED(_s_) KERNEL_ASSERT_LOCKED() + +#define PF_ASSERT_UNLOCKED(_s_) KERNEL_ASSERT_UNLOCKED() + +#else /* !MULTIPROCESSOR */ + +#define PF_LOCK_INSTANCE(_s_) int _s_ + +#define PF_LOCK(_s_) do { \ + _s_ = splsoftnet(); \ + } while (0) + +#define PF_UNLOCK(_s_) do { \ + splx(_s_); \ + } while (0); + +#define PF_ASSERT_LOCKED(_s_) ((void)0) +#define PF_ASSERT_UNLOCKED(_s_) ((void)0) + +#endif /* MULTIPROCESSOR */ #endif /* _NET_PFVAR_H_ */