Hello, time has come to backout stuff, which has been baked just before Christmas 2015. Although there was some progress over the last few days the things are not improving that fast to become good enough for 5.9 release.
I took patch (~sthen/pf-statekey-backout.diff) prepared by sthen last week and did some minor tweaks so it applies cleanly. Patch removes the offending KASSERT() and related changes. I'll try to post partial patches in next few days, so brave volunteers will be able to continue testing. So it will be possible re-commit the stuff, I'd like to back out now, in much better shape. thanks and regards sasha --------8<---------------8<---------------8<------------------8<-------- Index: kern/uipc_mbuf.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.217 diff -u -p -r1.217 uipc_mbuf.c --- kern/uipc_mbuf.c 7 Jan 2016 22:23:13 -0000 1.217 +++ kern/uipc_mbuf.c 30 Jan 2016 22:23:31 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_mbuf.c,v 1.217 2016/01/07 22:23:13 sashan Exp $ */ +/* $OpenBSD: uipc_mbuf.c,v 1.216 2015/12/23 21:04:55 jasper Exp $ */ /* $NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $ */ /* @@ -72,8 +72,6 @@ * Research Laboratory (NRL). */ -#include "pf.h" - #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> @@ -87,9 +85,6 @@ #include <sys/socket.h> #include <sys/socketvar.h> #include <net/if.h> -#if NPF > 0 -#include <net/pfvar.h> -#endif /* NPF > 0 */ #include <uvm/uvm_extern.h> @@ -266,10 +261,6 @@ m_resethdr(struct mbuf *m) /* delete all mbuf tags to reset the state */ m_tag_delete_chain(m); -#if NPF > 0 - pf_pkt_unlink_state_key(m); -#endif /* NPF > 0 */ - /* like m_inithdr(), but keep any associated data and mbufs */ memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr)); m->m_pkthdr.pf.prio = IFQ_DEFPRIO; @@ -359,12 +350,8 @@ m_free(struct mbuf *m) if (n) n->m_flags |= M_ZEROIZE; } - if (m->m_flags & M_PKTHDR) { + if (m->m_flags & M_PKTHDR) m_tag_delete_chain(m); -#if NPF > 0 - pf_pkt_unlink_state_key(m); -#endif /* NPF > 0 */ - } if (m->m_flags & M_EXT) m_extfree(m); @@ -1214,10 +1201,6 @@ m_dup_pkthdr(struct mbuf *to, struct mbu to->m_flags = (to->m_flags & (M_EXT | M_EXTWR)); to->m_flags |= (from->m_flags & M_COPYFLAGS); to->m_pkthdr = from->m_pkthdr; - -#if NPF > 0 - pf_pkt_state_key_ref(to); -#endif /* NPF > 0 */ SLIST_INIT(&to->m_pkthdr.ph_tags); Index: net/if_pfsync.c =================================================================== RCS file: /cvs/src/sys/net/if_pfsync.c,v retrieving revision 1.226 diff -u -p -r1.226 if_pfsync.c --- net/if_pfsync.c 27 Jan 2016 04:35:56 -0000 1.226 +++ net/if_pfsync.c 30 Jan 2016 22:23:33 -0000 @@ -523,7 +523,6 @@ pfsync_state_import(struct pfsync_state skw->port[0] = sp->key[PF_SK_WIRE].port[0]; skw->port[1] = sp->key[PF_SK_WIRE].port[1]; skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); - PF_REF_INIT(skw->refcnt); skw->proto = sp->proto; if (!(skw->af = sp->key[PF_SK_WIRE].af)) skw->af = sp->af; @@ -533,7 +532,6 @@ pfsync_state_import(struct pfsync_state sks->port[0] = sp->key[PF_SK_STACK].port[0]; sks->port[1] = sp->key[PF_SK_STACK].port[1]; sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); - PF_REF_INIT(sks->refcnt); if (!(sks->af = sp->key[PF_SK_STACK].af)) sks->af = sp->af; if (sks->af != skw->af) { Index: net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.964 diff -u -p -r1.964 pf.c --- net/pf.c 25 Jan 2016 18:49:57 -0000 1.964 +++ net/pf.c 30 Jan 2016 22:23:41 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.964 2016/01/25 18:49:57 sashan Exp $ */ +/* $OpenBSD: pf.c,v 1.962 2015/12/23 21:04:55 jasper Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -231,11 +231,6 @@ int pf_step_out_of_anchor(int *, stru void pf_counters_inc(int, struct pf_pdesc *, struct pf_state *, struct pf_rule *, struct pf_rule *); -void pf_state_key_link(struct pf_state_key *, - struct pf_state_key *); -void pf_inpcb_unlink_state_key(struct inpcb *); -void pf_state_key_unlink_reverse(struct pf_state_key *); - #if NPFLOG > 0 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, @@ -699,9 +694,8 @@ pf_state_key_attach(struct pf_state_key } pool_put(&pf_state_key_pl, sk); s->key[idx] = cur; - } else { + } else s->key[idx] = sk; - } if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { pf_state_key_detach(s, idx); @@ -738,7 +732,6 @@ void pf_state_key_detach(struct pf_state *s, int idx) { struct pf_state_item *si; - struct pf_state_key *sk; if (s->key[idx] == NULL) return; @@ -752,15 +745,15 @@ pf_state_key_detach(struct pf_state *s, pool_put(&pf_state_item_pl, si); } - sk = s->key[idx]; - s->key[idx] = NULL; - if (TAILQ_EMPTY(&sk->states)) { - RB_REMOVE(pf_state_tree, &pf_statetbl, sk); - sk->removed = 1; - pf_state_key_unlink_reverse(sk); - pf_inpcb_unlink_state_key(sk->inp); - pf_state_key_unref(sk); + if (TAILQ_EMPTY(&s->key[idx]->states)) { + RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]); + if (s->key[idx]->reverse) + s->key[idx]->reverse->reverse = NULL; + if (s->key[idx]->inp) + s->key[idx]->inp->inp_pf_sk = NULL; + pool_put(&pf_state_key_pl, s->key[idx]); } + s->key[idx] = NULL; } struct pf_state_key * @@ -847,8 +840,6 @@ pf_state_key_setup(struct pf_pdesc *pd, sk1->proto = pd->proto; sk1->af = pd->af; sk1->rdomain = pd->rdomain; - PF_REF_INIT(sk1->refcnt); - sk1->removed = 0; if (rtableid >= 0) wrdom = rtable_l2(rtableid); @@ -880,8 +871,6 @@ pf_state_key_setup(struct pf_pdesc *pd, sk2->proto = pd->proto; sk2->af = pd->naf; sk2->rdomain = wrdom; - PF_REF_INIT(sk2->refcnt); - sk2->removed = 0; } else sk2 = sk1; @@ -997,7 +986,7 @@ struct pf_state * pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, struct mbuf *m) { - struct pf_state_key *sk, *pkt_sk, *inp_sk; + struct pf_state_key *sk; struct pf_state_item *si; pf_status.fcounters[FCNT_STATE_SEARCH]++; @@ -1007,47 +996,31 @@ pf_find_state(struct pfi_kif *kif, struc addlog("\n"); } - inp_sk = NULL; - pkt_sk = NULL; - sk = NULL; - if (dir == PF_OUT) { - /* first if block deals with outbound forwarded packet */ - pkt_sk = m->m_pkthdr.pf.statekey; - if (pf_state_key_isvalid(pkt_sk) && - pf_state_key_isvalid(pkt_sk->reverse)) { - sk = pkt_sk->reverse; - } else { - pf_pkt_unlink_state_key(m); - pkt_sk = NULL; - } - - if (pkt_sk == NULL) { - /* here we deal with local outbound packet */ - if (m->m_pkthdr.pf.inp != NULL) { - inp_sk = m->m_pkthdr.pf.inp->inp_pf_sk; - if (pf_state_key_isvalid(inp_sk)) - sk = inp_sk; - else - pf_inpcb_unlink_state_key( - m->m_pkthdr.pf.inp); - } - } - } - - if (sk == NULL) { + if (dir == PF_OUT && m->m_pkthdr.pf.statekey && + m->m_pkthdr.pf.statekey->reverse) + sk = m->m_pkthdr.pf.statekey->reverse; + else if (dir == PF_OUT && m->m_pkthdr.pf.inp && + m->m_pkthdr.pf.inp->inp_pf_sk) + sk = m->m_pkthdr.pf.inp->inp_pf_sk; + else { if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key)) == NULL) return (NULL); - if (dir == PF_OUT && pkt_sk && - pf_compare_state_keys(pkt_sk, sk, kif, dir) == 0) - pf_state_key_link(sk, pkt_sk); - else if (dir == PF_OUT) - pf_inp_link(m, m->m_pkthdr.pf.inp); + if (dir == PF_OUT && m->m_pkthdr.pf.statekey && + pf_compare_state_keys(m->m_pkthdr.pf.statekey, sk, + kif, dir) == 0) { + m->m_pkthdr.pf.statekey->reverse = sk; + sk->reverse = m->m_pkthdr.pf.statekey; + } else if (dir == PF_OUT && m->m_pkthdr.pf.inp && !sk->inp) { + m->m_pkthdr.pf.inp->inp_pf_sk = sk; + sk->inp = m->m_pkthdr.pf.inp; + } } - /* remove firewall data from outbound packet */ - if (dir == PF_OUT) - pf_pkt_addr_changed(m); + if (dir == PF_OUT) { + m->m_pkthdr.pf.statekey = NULL; + m->m_pkthdr.pf.inp = NULL; + } /* list is sorted, if-bound states before floating ones */ TAILQ_FOREACH(si, &sk->states, entry) @@ -6561,20 +6534,12 @@ done: if (action == PF_PASS && qid) pd.m->m_pkthdr.pf.qid = qid; if (pd.dir == PF_IN && s && s->key[PF_SK_STACK]) { - /* - * ASSERT() below fires whenever caller forgets to call - * pf_pkt_addr_changed(). This might happen when we deal with - * IP tunnels. - */ - KASSERT(pd.m->m_pkthdr.pf.statekey == NULL); - pd.m->m_pkthdr.pf.statekey = - pf_state_key_ref(s->key[PF_SK_STACK]); + pd.m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; } if (pd.dir == PF_OUT && pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk && s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) { - pd.m->m_pkthdr.pf.inp->inp_pf_sk = - pf_state_key_ref(s->key[PF_SK_STACK]); + pd.m->m_pkthdr.pf.inp->inp_pf_sk = s->key[PF_SK_STACK]; s->key[PF_SK_STACK]->inp = pd.m->m_pkthdr.pf.inp; } @@ -6745,7 +6710,7 @@ pf_cksum(struct pf_pdesc *pd, struct mbu void pf_pkt_addr_changed(struct mbuf *m) { - pf_pkt_unlink_state_key(m); + m->m_pkthdr.pf.statekey = NULL; m->m_pkthdr.pf.inp = NULL; } @@ -6753,40 +6718,25 @@ struct inpcb * pf_inp_lookup(struct mbuf *m) { struct inpcb *inp = NULL; - struct pf_state_key *sk = m->m_pkthdr.pf.statekey; - if (!pf_state_key_isvalid(sk)) - pf_pkt_unlink_state_key(m); - else + if (m->m_pkthdr.pf.statekey) { inp = m->m_pkthdr.pf.statekey->inp; - - if (inp && inp->inp_pf_sk) - KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); - + if (inp && inp->inp_pf_sk) + KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); + } return (inp); } void pf_inp_link(struct mbuf *m, struct inpcb *inp) { - struct pf_state_key *sk = m->m_pkthdr.pf.statekey; - - if (!pf_state_key_isvalid(sk)) { - pf_pkt_unlink_state_key(m); - return; - } - - /* - * we don't need to grab PF-lock here. At worst case we link inp to - * state, which might be just being marked as deleted by another - * thread. - */ - if (inp && !sk->inp && !inp->inp_pf_sk) { - sk->inp = inp; - inp->inp_pf_sk = pf_state_key_ref(sk); + if (m->m_pkthdr.pf.statekey && inp && + !m->m_pkthdr.pf.statekey->inp && !inp->inp_pf_sk) { + m->m_pkthdr.pf.statekey->inp = inp; + inp->inp_pf_sk = m->m_pkthdr.pf.statekey; } /* The statekey has finished finding the inp, it is no longer needed. */ - pf_pkt_unlink_state_key(m); + m->m_pkthdr.pf.statekey = NULL; } void @@ -6794,21 +6744,10 @@ pf_inp_unlink(struct inpcb *inp) { if (inp->inp_pf_sk) { inp->inp_pf_sk->inp = NULL; - pf_inpcb_unlink_state_key(inp); + inp->inp_pf_sk = NULL; } } -void -pf_state_key_link(struct pf_state_key *sk, struct pf_state_key *pkt_sk) -{ - /* - * Assert will not wire as long as we are called by pf_find_state() - */ - KASSERT((pkt_sk->reverse == NULL) && (sk->reverse == NULL)); - pkt_sk->reverse = pf_state_key_ref(sk); - sk->reverse = pf_state_key_ref(pkt_sk); -} - #if NPFLOG > 0 void pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, @@ -6825,66 +6764,3 @@ pf_log_matches(struct pf_pdesc *pd, stru PFLOG_PACKET(pd, PFRES_MATCH, rm, am, ruleset, ri->r); } #endif /* NPFLOG > 0 */ - -struct pf_state_key * -pf_state_key_ref(struct pf_state_key *sk) -{ - if (sk != NULL) - PF_REF_TAKE(sk->refcnt); - - return (sk); -} - -void -pf_state_key_unref(struct pf_state_key *sk) -{ - if ((sk != NULL) && PF_REF_RELE(sk->refcnt)) { - /* state key must be removed from tree */ - KASSERT(!pf_state_key_isvalid(sk)); - /* state key must be unlinked from reverse key */ - KASSERT(sk->reverse == NULL); - /* state key must be unlinked from socket */ - KASSERT((sk->inp == NULL) || (sk->inp->inp_pf_sk == NULL)); - sk->inp = NULL; - pool_put(&pf_state_key_pl, sk); - } -} - -int -pf_state_key_isvalid(struct pf_state_key *sk) -{ - return ((sk != NULL) && (sk->removed == 0)); -} - -void -pf_pkt_unlink_state_key(struct mbuf *m) -{ - pf_state_key_unref(m->m_pkthdr.pf.statekey); - m->m_pkthdr.pf.statekey = NULL; -} - -void -pf_pkt_state_key_ref(struct mbuf *m) -{ - pf_state_key_ref(m->m_pkthdr.pf.statekey); -} - -void -pf_inpcb_unlink_state_key(struct inpcb *inp) -{ - if (inp != NULL) { - pf_state_key_unref(inp->inp_pf_sk); - inp->inp_pf_sk = NULL; - } -} - -void -pf_state_key_unlink_reverse(struct pf_state_key *sk) -{ - if ((sk != NULL) && (sk->reverse != NULL)) { - pf_state_key_unref(sk->reverse->reverse); - sk->reverse->reverse = NULL; - pf_state_key_unref(sk->reverse); - sk->reverse = NULL; - } -} Index: net/pfvar.h =================================================================== RCS file: /cvs/src/sys/net/pfvar.h,v retrieving revision 1.429 diff -u -p -r1.429 pfvar.h --- net/pfvar.h 7 Jan 2016 22:23:13 -0000 1.429 +++ net/pfvar.h 30 Jan 2016 22:23:43 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.429 2016/01/07 22:23:13 sashan Exp $ */ +/* $OpenBSD: pfvar.h,v 1.428 2015/12/23 21:04:55 jasper Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -38,9 +38,6 @@ #include <sys/tree.h> #include <sys/rwlock.h> #include <sys/syslimits.h> -#include <sys/refcnt.h> - -#include <netinet/in.h> #include <net/radix.h> #include <net/route.h> @@ -58,11 +55,6 @@ struct ip6_hdr; #endif #endif -typedef struct refcnt pf_refcnt_t; -#define PF_REF_INIT(_x) refcnt_init(&(_x)) -#define PF_REF_TAKE(_x) refcnt_take(&(_x)) -#define PF_REF_RELE(_x) refcnt_rele(&(_x)) - enum { PF_INOUT, PF_IN, PF_OUT, PF_FWD }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER, @@ -704,8 +696,6 @@ struct pf_state_key { struct pf_statelisthead states; struct pf_state_key *reverse; struct inpcb *inp; - pf_refcnt_t refcnt; - u_int8_t removed; }; #define PF_REVERSED_KEY(key, family) \ ((key[PF_SK_WIRE]->af != key[PF_SK_STACK]->af) && \ @@ -1919,12 +1909,7 @@ int pf_postprocess_addr(struct pf_sta void pf_cksum(struct pf_pdesc *, struct mbuf *); -struct pf_state_key *pf_state_key_ref(struct pf_state_key *); -void pf_state_key_unref(struct pf_state_key *); -int pf_state_key_isvalid(struct pf_state_key *); -void pf_pkt_unlink_state_key(struct mbuf *); -void pf_pkt_state_key_ref(struct mbuf *); - #endif /* _KERNEL */ + #endif /* _NET_PFVAR_H_ */ Index: netinet/ip_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.267 diff -u -p -r1.267 ip_input.c --- netinet/ip_input.c 25 Jan 2016 18:49:57 -0000 1.267 +++ netinet/ip_input.c 30 Jan 2016 22:23:44 -0000 @@ -1458,9 +1458,6 @@ ip_forward(struct mbuf *m, struct ifnet len = min(ntohs(ip->ip_len), 68); m_copydata(m, 0, len, mfake.m_pktdat); mfake.m_pkthdr.len = mfake.m_len = len; -#if NPF > 0 - pf_pkt_unlink_state_key(&mfake); -#endif /* NPF > 0 */ fake = 1; } Index: sys/mbuf.h =================================================================== RCS file: /cvs/src/sys/sys/mbuf.h,v retrieving revision 1.206 diff -u -p -r1.206 mbuf.h --- sys/mbuf.h 7 Jan 2016 22:23:13 -0000 1.206 +++ sys/mbuf.h 30 Jan 2016 22:23:45 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: mbuf.h,v 1.206 2016/01/07 22:23:13 sashan Exp $ */ +/* $OpenBSD: mbuf.h,v 1.205 2015/11/21 11:46:25 mpi Exp $ */ /* $NetBSD: mbuf.h,v 1.19 1996/02/09 18:25:14 christos Exp $ */ /* @@ -316,7 +316,6 @@ struct mbuf { (to)->m_pkthdr = (from)->m_pkthdr; \ (from)->m_flags &= ~M_PKTHDR; \ SLIST_INIT(&(from)->m_pkthdr.ph_tags); \ - (from)->m_pkthdr.pf.statekey = NULL; \ } while (/* CONSTCOND */ 0) /*