On Thu, May 28, 2015 at 23:46 +0200, Alexandr Nedvedicky wrote:
> </snip>
> > >>
> > >> But we'll drop this "reference" in pf_src_tree_remove_state,
> > >> then how will sns[PF_SN_NAT] and sns[PF_SN_ROUTE] be different?
> > >
> > > I think I should take PF class again ;-) I've just realized there
> > > is a test in pf_remove_src_node():
> > >
> > > 572 if (sn->states > 0 || sn->expire > time_uptime)
> > > 573 return;
> > >
> > > so it will do the right thing. This is the piece I was missing.
> > >
> > >>
> > >> sns[] array was prepared for this state so if we can't insert
> > >> the state, sns entries must be cleaned up. pf_remove_src_node
> > >> checks the number of associated states and if source node should
> > >> expire some time later.
> > >
> > > yes, it seems more clear to me now.
> > >
> >
> > Good! At least I wasn't blind this time! (:
> >
> > >>
> > >> Speaking of PF_SN_ROUTE, pf_set_rt_ifp should be probably called
> > >> before we insert the state for the very same reason, plus it
> > >> should check the pf_map_addr return value and do the cleanup.
> > >
> > > I don't feel entirely qualified now, to discuss the matter ;-)
> >
> > Hey, don't worry about it. Half of the people reading this have
> > zero clue as to what the hell are we talking about.
> >
> > > however
> > > pf_set_rt_ifp() should indeed test return value of pf_map_addr(), In case
> > > of
> > > failure the error should be thrown further up, so pf_create_state() can
> > > handle
> > > it. Probably jumping to csfailed: should be sufficient.
> > >
> >
> > You can't just jump to csfailed unless you do a pf_set_rt_ifp
> > before the pf_state_insert, but then it needs an attached key
> > to only get it's address family.
>
> true, because once pf_state_insert() succeeds, it's no longer job for
> csfailed:
> branch to clean up a state. I currently have no better suggestion than taking
> the easiest move:
>
> add af argument to pf_set_rt_ifp() and fetch it from skw,
> in pf_create_state().
>
I thought about it and I believe this is correct to assume that the
skw->af is correct even if we won't use this particular state key
object.
> Actually there is an option:
> remove KASSERT() at 655 in pf_state_key_attach().
> 659 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int
> idx)
> 660 {
> 661 struct pf_state_item *si;
> 662 struct pf_state_key *cur;
> 663 struct pf_state *olds = NULL;
> 664
> 665 KASSERT(s->key[idx] == NULL);
> Then we can simply do:
> s->key[PF_SK_WIRE] = skw;
> in pf_create_state(), so pf_set_rt_ifp() will get what it expects.
>
This would be a hack and an overkill really.
> adding af argument to pf_set_rt_if() seems more clean approach to me.
Yep.
> below is your patch with reshuffled pf_create_state(), so pf_set_rt_ifp()
> gets called before, pf_state_insert(). I've introduced a new reason:
> PFRES_NOROUTE
Awesome.
> however I'm not sure if it is descriptive enough...
>
Nothing significantly better springs to mind.
I'd also mark pf_set_rt_ifp __inline and remove the marker from
the pf_create_state, but that can be done later.
OK?
> regards
> sasha
>
>
> ? create_state.diff
> ? pf.c.diff
> ? pf.c.patch
> Index: pf.c
> ===================================================================
> RCS file: /cvs/src/sys/net/pf.c,v
> retrieving revision 1.916
> diff -u -r1.916 pf.c
> --- pf.c 26 May 2015 16:17:51 -0000 1.916
> +++ pf.c 28 May 2015 21:38:09 -0000
> @@ -204,8 +204,8 @@
> u_int16_t pf_get_mss(struct pf_pdesc *);
> u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int,
> u_int16_t);
> -void pf_set_rt_ifp(struct pf_state *,
> - struct pf_addr *);
> +int pf_set_rt_ifp(struct pf_state *,
> + struct pf_addr *, sa_family_t);
> struct pf_divert *pf_get_divert(struct mbuf *);
> int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *,
> int, int, u_short *);
> @@ -2958,32 +2958,39 @@
> return (mss);
> }
>
> -void
> -pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
> +int
> +pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af)
> {
> struct pf_rule *r = s->rule.ptr;
> struct pf_src_node *sns[PF_SN_MAX];
> + int rv;
>
> s->rt_kif = NULL;
> if (!r->rt)
> - return;
> + return (0);
> +
> bzero(sns, sizeof(sns));
> - switch (s->key[PF_SK_WIRE]->af) {
> + switch (af) {
> case AF_INET:
> - pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, sns,
> + rv = pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, sns,
> &r->route, PF_SN_ROUTE);
> - s->rt_kif = r->route.kif;
> - s->natrule.ptr = r;
> break;
> #ifdef INET6
> case AF_INET6:
> - pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, sns,
> + rv = pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, sns,
> &r->route, PF_SN_ROUTE);
> - s->rt_kif = r->route.kif;
> - s->natrule.ptr = r;
> break;
> #endif /* INET6 */
> + default:
> + rv = 1;
> }
> +
> + if (rv == 0) {
> + s->rt_kif = r->route.kif;
> + s->natrule.ptr = r;
> + }
> +
> + return (rv);
> }
>
> u_int32_t
> @@ -3557,16 +3564,6 @@
> goto csfailed;
> }
>
> - if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) {
> - pf_state_key_detach(s, PF_SK_STACK);
> - pf_state_key_detach(s, PF_SK_WIRE);
> - *sks = *skw = NULL;
> - REASON_SET(&reason, PFRES_STATEINS);
> - goto csfailed;
> - } else
> - *sm = s;
> -
> - /* attach src nodes late, otherwise cleanup on error nontrivial */
> for (i = 0; i < PF_SN_MAX; i++)
> if (sns[i] != NULL) {
> struct pf_sn_item *sni;
> @@ -3574,17 +3571,26 @@
> sni = pool_get(&pf_sn_item_pl, PR_NOWAIT);
> if (sni == NULL) {
> REASON_SET(&reason, PFRES_MEMORY);
> - pf_src_tree_remove_state(s);
> - STATE_DEC_COUNTERS(s);
> - pool_put(&pf_state_pl, s);
> - return (PF_DROP);
> + goto csfailed;
> }
> sni->sn = sns[i];
> SLIST_INSERT_HEAD(&s->src_nodes, sni, next);
> sni->sn->states++;
> }
>
> - pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */
> + if (pf_set_rt_ifp(s, pd->src, (*skw)->af) != 0) {
> + REASON_SET(&reason, PFRES_NOROUTE);
> + goto csfailed;
> + }
> +
> + if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) {
> + pf_detach_state(s);
> + *sks = *skw = NULL;
> + REASON_SET(&reason, PFRES_STATEINS);
> + goto csfailed;
> + } else
> + *sm = s;
> +
> if (tag > 0) {
> pf_tag_ref(tag);
> s->tag = tag;
> @@ -3612,12 +3618,16 @@
> return (PF_PASS);
>
> csfailed:
> + if (s) {
> + pf_normalize_tcp_cleanup(s); /* safe even w/o init */
> + pf_src_tree_remove_state(s);
> + }
> +
> for (i = 0; i < PF_SN_MAX; i++)
> if (sns[i] != NULL)
> pf_remove_src_node(sns[i]);
> +
> if (s) {
> - pf_normalize_tcp_cleanup(s); /* safe even w/o init */
> - pf_src_tree_remove_state(s);
> STATE_DEC_COUNTERS(s);
> pool_put(&pf_state_pl, s);
> }
> Index: pfvar.h
> ===================================================================
> RCS file: /cvs/src/sys/net/pfvar.h,v
> retrieving revision 1.414
> diff -u -r1.414 pfvar.h
> --- pfvar.h 11 Apr 2015 13:00:12 -0000 1.414
> +++ pfvar.h 28 May 2015 21:38:09 -0000
> @@ -1316,7 +1316,8 @@
> #define PFRES_SRCLIMIT 13 /* Source node/conn limit */
> #define PFRES_SYNPROXY 14 /* SYN proxy */
> #define PFRES_TRANSLATE 15 /* No translation address
> available */
> -#define PFRES_MAX 16 /* total+1 */
> +#define PFRES_NOROUTE 16 /* No route found for PBR
> action */
> +#define PFRES_MAX 17 /* total+1 */
>
> #define PFRES_NAMES { \
> "match", \
> @@ -1335,6 +1336,7 @@
> "src-limit", \
> "synproxy", \
> "translate", \
> + "no-route", \
> NULL \
> }
>