On Thu, May 28, 2015 at 23:46 +0200, Alexandr Nedvedicky wrote: > </snip> > > >> > > >> But we'll drop this "reference" in pf_src_tree_remove_state, > > >> then how will sns[PF_SN_NAT] and sns[PF_SN_ROUTE] be different? > > > > > > I think I should take PF class again ;-) I've just realized there > > > is a test in pf_remove_src_node(): > > > > > > 572 if (sn->states > 0 || sn->expire > time_uptime) > > > 573 return; > > > > > > so it will do the right thing. This is the piece I was missing. > > > > > >> > > >> sns[] array was prepared for this state so if we can't insert > > >> the state, sns entries must be cleaned up. pf_remove_src_node > > >> checks the number of associated states and if source node should > > >> expire some time later. > > > > > > yes, it seems more clear to me now. > > > > > > > Good! At least I wasn't blind this time! (: > > > > >> > > >> Speaking of PF_SN_ROUTE, pf_set_rt_ifp should be probably called > > >> before we insert the state for the very same reason, plus it > > >> should check the pf_map_addr return value and do the cleanup. > > > > > > I don't feel entirely qualified now, to discuss the matter ;-) > > > > Hey, don't worry about it. Half of the people reading this have > > zero clue as to what the hell are we talking about. > > > > > however > > > pf_set_rt_ifp() should indeed test return value of pf_map_addr(), In case > > > of > > > failure the error should be thrown further up, so pf_create_state() can > > > handle > > > it. Probably jumping to csfailed: should be sufficient. > > > > > > > You can't just jump to csfailed unless you do a pf_set_rt_ifp > > before the pf_state_insert, but then it needs an attached key > > to only get it's address family. > > true, because once pf_state_insert() succeeds, it's no longer job for > csfailed: > branch to clean up a state. I currently have no better suggestion than taking > the easiest move: > > add af argument to pf_set_rt_ifp() and fetch it from skw, > in pf_create_state(). >
I thought about it and I believe this is correct to assume that the skw->af is correct even if we won't use this particular state key object. > Actually there is an option: > remove KASSERT() at 655 in pf_state_key_attach(). > 659 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int > idx) > 660 { > 661 struct pf_state_item *si; > 662 struct pf_state_key *cur; > 663 struct pf_state *olds = NULL; > 664 > 665 KASSERT(s->key[idx] == NULL); > Then we can simply do: > s->key[PF_SK_WIRE] = skw; > in pf_create_state(), so pf_set_rt_ifp() will get what it expects. > This would be a hack and an overkill really. > adding af argument to pf_set_rt_if() seems more clean approach to me. Yep. > below is your patch with reshuffled pf_create_state(), so pf_set_rt_ifp() > gets called before, pf_state_insert(). I've introduced a new reason: > PFRES_NOROUTE Awesome. > however I'm not sure if it is descriptive enough... > Nothing significantly better springs to mind. I'd also mark pf_set_rt_ifp __inline and remove the marker from the pf_create_state, but that can be done later. OK? > regards > sasha > > > ? create_state.diff > ? pf.c.diff > ? pf.c.patch > Index: pf.c > =================================================================== > RCS file: /cvs/src/sys/net/pf.c,v > retrieving revision 1.916 > diff -u -r1.916 pf.c > --- pf.c 26 May 2015 16:17:51 -0000 1.916 > +++ pf.c 28 May 2015 21:38:09 -0000 > @@ -204,8 +204,8 @@ > u_int16_t pf_get_mss(struct pf_pdesc *); > u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, > u_int16_t); > -void pf_set_rt_ifp(struct pf_state *, > - struct pf_addr *); > +int pf_set_rt_ifp(struct pf_state *, > + struct pf_addr *, sa_family_t); > struct pf_divert *pf_get_divert(struct mbuf *); > int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, > int, int, u_short *); > @@ -2958,32 +2958,39 @@ > return (mss); > } > > -void > -pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) > +int > +pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af) > { > struct pf_rule *r = s->rule.ptr; > struct pf_src_node *sns[PF_SN_MAX]; > + int rv; > > s->rt_kif = NULL; > if (!r->rt) > - return; > + return (0); > + > bzero(sns, sizeof(sns)); > - switch (s->key[PF_SK_WIRE]->af) { > + switch (af) { > case AF_INET: > - pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, sns, > + rv = pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, sns, > &r->route, PF_SN_ROUTE); > - s->rt_kif = r->route.kif; > - s->natrule.ptr = r; > break; > #ifdef INET6 > case AF_INET6: > - pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, sns, > + rv = pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, sns, > &r->route, PF_SN_ROUTE); > - s->rt_kif = r->route.kif; > - s->natrule.ptr = r; > break; > #endif /* INET6 */ > + default: > + rv = 1; > } > + > + if (rv == 0) { > + s->rt_kif = r->route.kif; > + s->natrule.ptr = r; > + } > + > + return (rv); > } > > u_int32_t > @@ -3557,16 +3564,6 @@ > goto csfailed; > } > > - if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { > - pf_state_key_detach(s, PF_SK_STACK); > - pf_state_key_detach(s, PF_SK_WIRE); > - *sks = *skw = NULL; > - REASON_SET(&reason, PFRES_STATEINS); > - goto csfailed; > - } else > - *sm = s; > - > - /* attach src nodes late, otherwise cleanup on error nontrivial */ > for (i = 0; i < PF_SN_MAX; i++) > if (sns[i] != NULL) { > struct pf_sn_item *sni; > @@ -3574,17 +3571,26 @@ > sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); > if (sni == NULL) { > REASON_SET(&reason, PFRES_MEMORY); > - pf_src_tree_remove_state(s); > - STATE_DEC_COUNTERS(s); > - pool_put(&pf_state_pl, s); > - return (PF_DROP); > + goto csfailed; > } > sni->sn = sns[i]; > SLIST_INSERT_HEAD(&s->src_nodes, sni, next); > sni->sn->states++; > } > > - pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ > + if (pf_set_rt_ifp(s, pd->src, (*skw)->af) != 0) { > + REASON_SET(&reason, PFRES_NOROUTE); > + goto csfailed; > + } > + > + if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { > + pf_detach_state(s); > + *sks = *skw = NULL; > + REASON_SET(&reason, PFRES_STATEINS); > + goto csfailed; > + } else > + *sm = s; > + > if (tag > 0) { > pf_tag_ref(tag); > s->tag = tag; > @@ -3612,12 +3618,16 @@ > return (PF_PASS); > > csfailed: > + if (s) { > + pf_normalize_tcp_cleanup(s); /* safe even w/o init */ > + pf_src_tree_remove_state(s); > + } > + > for (i = 0; i < PF_SN_MAX; i++) > if (sns[i] != NULL) > pf_remove_src_node(sns[i]); > + > if (s) { > - pf_normalize_tcp_cleanup(s); /* safe even w/o init */ > - pf_src_tree_remove_state(s); > STATE_DEC_COUNTERS(s); > pool_put(&pf_state_pl, s); > } > Index: pfvar.h > =================================================================== > RCS file: /cvs/src/sys/net/pfvar.h,v > retrieving revision 1.414 > diff -u -r1.414 pfvar.h > --- pfvar.h 11 Apr 2015 13:00:12 -0000 1.414 > +++ pfvar.h 28 May 2015 21:38:09 -0000 > @@ -1316,7 +1316,8 @@ > #define PFRES_SRCLIMIT 13 /* Source node/conn limit */ > #define PFRES_SYNPROXY 14 /* SYN proxy */ > #define PFRES_TRANSLATE 15 /* No translation address > available */ > -#define PFRES_MAX 16 /* total+1 */ > +#define PFRES_NOROUTE 16 /* No route found for PBR > action */ > +#define PFRES_MAX 17 /* total+1 */ > > #define PFRES_NAMES { \ > "match", \ > @@ -1335,6 +1336,7 @@ > "src-limit", \ > "synproxy", \ > "translate", \ > + "no-route", \ > NULL \ > } >