The branch main has been updated by ks: URL: https://cgit.FreeBSD.org/src/commit/?id=65c318630123fcf2b6f491bf4d02a5cad3031d20
commit 65c318630123fcf2b6f491bf4d02a5cad3031d20 Author: Kajetan Staszkiewicz <k...@freebsd.org> AuthorDate: 2025-08-01 19:01:37 +0000 Commit: Kajetan Staszkiewicz <k...@freebsd.org> CommitDate: 2025-08-29 07:58:40 +0000 pf: Add prefer-ipv6-nexthop option for route-to pools Now that pf is aware of address family of each pool address and source tracking uses distinct address family for source and redirection adddresses it is possible to add a new pool option prefer-ipv6-nexthop which enables routing of IPv4 packets over IPv6 next hops for rules with the route-to option. Add a pool option flag PF_POOL_IPV6NH, apply it to pools with a keyword prefer-ipv6-nexthop. Modify pf_map_addr() to handle pools with addresses of different families. Use *naf as a hint about what address family the forwarded packet is, then pick from the pool addresses of family that can be used as a next hop for the forwarded packet, controlled by the PF_POOL_IPV6NH flag. For NAT pools this flag is never set and thus pf_map_addr() will return an IP address of the same family as the forwarded packet. For route-to pools when the flag is enabled IPv6 addresses can be returned or IPv4 packets. In pf_route() check rt_af, it is not guaranteed to be AF_INET anymore because pf_map_addr() could have changed it (as *naf). Add tests for behaviour of pf_map_addr() both with PF_POOL_IPV6NH and without, for single IP addresses, prefixes and subnets. Reviewed by: kp Sponsored by: InnoGames GmbH Differential Revision: https://reviews.freebsd.org/D50781 --- sbin/pfctl/parse.y | 50 ++- sbin/pfctl/pfctl_parser.c | 6 +- sbin/pfctl/tests/files/pf1073.in | 1 + sbin/pfctl/tests/files/pf1073.ok | 1 + sbin/pfctl/tests/files/pf1074.fail | 1 + sbin/pfctl/tests/files/pf1074.in | 1 + sbin/pfctl/tests/pfctl_test_list.inc | 2 + share/man/man5/pf.conf.5 | 18 +- sys/net/pfvar.h | 3 +- sys/netpfil/pf/if_pfsync.c | 8 +- sys/netpfil/pf/pf.c | 63 ++- sys/netpfil/pf/pf.h | 1 + sys/netpfil/pf/pf_ioctl.c | 1 + sys/netpfil/pf/pf_lb.c | 151 ++++++-- tests/sys/netpfil/pf/route_to.sh | 716 ++++++++++++++++++++++++++++++++++- tests/sys/netpfil/pf/src_track.sh | 36 +- tests/sys/netpfil/pf/utils.subr | 4 +- 17 files changed, 953 insertions(+), 110 deletions(-) diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y index 00c36b218055..59c27d1f5d7c 100644 --- a/sbin/pfctl/parse.y +++ b/sbin/pfctl/parse.y @@ -238,6 +238,7 @@ static struct pool_opts { #define POM_TYPE 0x01 #define POM_STICKYADDRESS 0x02 #define POM_ENDPI 0x04 +#define POM_IPV6NH 0x08 u_int8_t opts; int type; int staticport; @@ -543,7 +544,7 @@ int parseport(char *, struct range *r, int); %token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY PFLOW ALLOW_RELATED %token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS %token DIVERTTO DIVERTREPLY BRIDGE_TO RECEIVEDON NE LE GE AFTO NATTO RDRTO -%token BINATTO MAXPKTRATE MAXPKTSIZE +%token BINATTO MAXPKTRATE MAXPKTSIZE IPV6NH %token <v.string> STRING %token <v.number> NUMBER %token <v.i> PORTBINARY @@ -2648,13 +2649,16 @@ pfrule : action dir logquick interface route af proto fromto YYERROR; } r.rt = $5.rt; - decide_address_family($5.redirspec->host, &r.af); - if (!(r.rule_flag & PFRULE_AFTO)) - remove_invalid_hosts(&($5.redirspec->host), &r.af); - if ($5.redirspec->host == NULL) { - yyerror("no routing address with " - "matching address family found."); - YYERROR; + + if (!($5.redirspec->pool_opts.opts & PF_POOL_IPV6NH)) { + decide_address_family($5.redirspec->host, &r.af); + if (!(r.rule_flag & PFRULE_AFTO)) + remove_invalid_hosts(&($5.redirspec->host), &r.af); + if ($5.redirspec->host == NULL) { + yyerror("no routing address with " + "matching address family found."); + YYERROR; + } } } #ifdef __FreeBSD__ @@ -2978,7 +2982,8 @@ filter_opt : USER uids { filter_opts.nat = $4; filter_opts.nat->af = $2; - if ($4->af && $4->af != $2) { + remove_invalid_hosts(&($4->host), &(filter_opts.nat->af)); + if ($4->host == NULL) { yyerror("af-to addresses must be in the " "target address family"); YYERROR; @@ -2998,8 +3003,9 @@ filter_opt : USER uids { filter_opts.nat->af = $2; filter_opts.rdr = $6; filter_opts.rdr->af = $2; - if (($4->af && $4->host->af != $2) || - ($6->af && $6->host->af != $2)) { + remove_invalid_hosts(&($4->host), &(filter_opts.nat->af)); + remove_invalid_hosts(&($6->host), &(filter_opts.rdr->af)); + if ($4->host == NULL || $6->host == NULL) { yyerror("af-to addresses must be in the " "target address family"); YYERROR; @@ -4674,6 +4680,14 @@ pool_opt : BITMASK { pool_opts.marker |= POM_ENDPI; pool_opts.opts |= PF_POOL_ENDPI; } + | IPV6NH { + if (pool_opts.marker & POM_IPV6NH) { + yyerror("prefer-ipv6-nexthop cannot be redefined"); + YYERROR; + } + pool_opts.marker |= POM_IPV6NH; + pool_opts.opts |= PF_POOL_IPV6NH; + } | MAPEPORTSET number '/' number '/' number { if (pool_opts.mape.offset) { yyerror("map-e-portset cannot be redefined"); @@ -4813,6 +4827,12 @@ natrule : nataction interface af proto fromto tag tagged rtable "address'"); YYERROR; } + if ($9->pool_opts.opts & PF_POOL_IPV6NH) { + yyerror("The prefer-ipv6-nexthop option " + "can't be used for nat/rdr/binat pools" + ); + YYERROR; + } if (!r.af && ! $9->host->ifindex) r.af = $9->host->af; @@ -5074,13 +5094,6 @@ route_host : STRING { route_host_list : route_host optnl { $$ = $1; } | route_host_list comma route_host optnl { - if ($1->af == 0) - $1->af = $3->af; - if ($1->af != $3->af) { - yyerror("all pool addresses must be in the " - "same address family"); - YYERROR; - } $1->tail->next = $3; $1->tail = $3->tail; $$ = $1; @@ -6678,6 +6691,7 @@ lookup(char *s) { "pass", PASS}, { "pflow", PFLOW}, { "port", PORT}, + { "prefer-ipv6-nexthop", IPV6NH}, { "prio", PRIO}, { "priority", PRIORITY}, { "priq", PRIQ}, diff --git a/sbin/pfctl/pfctl_parser.c b/sbin/pfctl/pfctl_parser.c index 18b78a150c28..3c4f9f6b4334 100644 --- a/sbin/pfctl/pfctl_parser.c +++ b/sbin/pfctl/pfctl_parser.c @@ -508,6 +508,8 @@ print_pool(struct pfctl_pool *pool, u_int16_t p1, u_int16_t p2, int id) if (pool->mape.offset > 0) printf(" map-e-portset %u/%u/%u", pool->mape.offset, pool->mape.psidlen, pool->mape.psid); + if (pool->opts & PF_POOL_IPV6NH) + printf(" prefer-ipv6-nexthop"); } void @@ -1438,7 +1440,7 @@ ifa_add_groups_to_map(char *ifa_name) ENTRY item; ENTRY *ret_item; int *answer; - + item.key = ifg->ifgrq_group; if (hsearch_r(item, FIND, &ret_item, &isgroup_map) == 0) { struct ifgroupreq ifgr2; @@ -1580,7 +1582,7 @@ is_a_group(char *name) { ENTRY item; ENTRY *ret_item; - + item.key = name; if (hsearch_r(item, FIND, &ret_item, &isgroup_map) == 0) return (0); diff --git a/sbin/pfctl/tests/files/pf1073.in b/sbin/pfctl/tests/files/pf1073.in new file mode 100644 index 000000000000..477995893ac3 --- /dev/null +++ b/sbin/pfctl/tests/files/pf1073.in @@ -0,0 +1 @@ +pass in on vtnet0 route-to ( vtnet1 2001:db8::1 ) prefer-ipv6-nexthop inet diff --git a/sbin/pfctl/tests/files/pf1073.ok b/sbin/pfctl/tests/files/pf1073.ok new file mode 100644 index 000000000000..f34867508c75 --- /dev/null +++ b/sbin/pfctl/tests/files/pf1073.ok @@ -0,0 +1 @@ +pass in on vtnet0 route-to (vtnet1 2001:db8::1) prefer-ipv6-nexthop inet all flags S/SA keep state diff --git a/sbin/pfctl/tests/files/pf1074.fail b/sbin/pfctl/tests/files/pf1074.fail new file mode 100644 index 000000000000..afe8ee3c458f --- /dev/null +++ b/sbin/pfctl/tests/files/pf1074.fail @@ -0,0 +1 @@ +no routing address with matching address family found. diff --git a/sbin/pfctl/tests/files/pf1074.in b/sbin/pfctl/tests/files/pf1074.in new file mode 100644 index 000000000000..5d285bc5d6e8 --- /dev/null +++ b/sbin/pfctl/tests/files/pf1074.in @@ -0,0 +1 @@ +pass in on vtnet0 route-to ( vtnet1 2001:db8::1 ) inet diff --git a/sbin/pfctl/tests/pfctl_test_list.inc b/sbin/pfctl/tests/pfctl_test_list.inc index 3a68cc06ec74..8bfccddf50e5 100644 --- a/sbin/pfctl/tests/pfctl_test_list.inc +++ b/sbin/pfctl/tests/pfctl_test_list.inc @@ -181,3 +181,5 @@ PFCTL_TEST(1069, "max-pkt-size") PFCTL_TEST_FAIL(1070, "include line number") PFCTL_TEST(1071, "mask length on (lo0)") PFCTL_TEST_FAIL(1072, "Invalid port range") +PFCTL_TEST(1073, "Filter AF different than route-to AF, with prefer-ipv6-nexthop") +PFCTL_TEST_FAIL(1074, "Filter AF different than route-to AF, without prefer-ipv6-nexthop") diff --git a/share/man/man5/pf.conf.5 b/share/man/man5/pf.conf.5 index a9ae823257a4..bdd8a843d72a 100644 --- a/share/man/man5/pf.conf.5 +++ b/share/man/man5/pf.conf.5 @@ -2470,7 +2470,13 @@ NAT address and port. This feature implements "full-cone" NAT behavior. .El .Pp -Additionally, the +Additionally, options +.Ar sticky-address +and +.Ar prefer-ipv6-nexthop +can be specified to influence how IP addresses selected from pools. +.Pp +The .Ar sticky-address option can be specified to help ensure that multiple connections from the same source are mapped to the same redirection address. @@ -2486,6 +2492,14 @@ beyond the lifetime of the states, increase the global options with See .Sx STATEFUL TRACKING OPTIONS for more ways to control the source tracking. +.Pp +The +.Ar prefer-ipv6-nexthop +option allows for IPv6 addresses to be used as the nexthop +for IPv4 packets routed with the +.Ar route-to +rule option. If a table is used with IPv4 and IPv6 addresses, first the IPv6 addresses +will be used in round-robin fashion, then IPv4 addresses. .Sh STATE MODULATION Much of the security derived from TCP is attributable to how well the initial sequence numbers (ISNs) are chosen. @@ -3580,7 +3594,7 @@ limit-item = ( "states" | "frags" | "src-nodes" ) number pooltype = ( "bitmask" | "random" | "source-hash" [ ( hex-key | string-key ) ] | - "round-robin" ) [ sticky-address ] + "round-robin" ) [ sticky-address | prefer-ipv6-nexthop ] subqueue = string | "{" queue-list "}" queue-list = string [ [ "," ] string ] diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index d6c13470f2eb..cf6d2508cf65 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -645,6 +645,7 @@ struct pf_kpool { int tblidx; u_int16_t proxy_port[2]; u_int8_t opts; + sa_family_t ipv6_nexthop_af; }; struct pf_rule_actions { @@ -2680,7 +2681,7 @@ u_short pf_map_addr(sa_family_t, struct pf_krule *, struct pf_addr *, struct pf_kpool *); u_short pf_map_addr_sn(u_int8_t, struct pf_krule *, struct pf_addr *, struct pf_addr *, - sa_family_t *, struct pfi_kkif **nkif, + sa_family_t *, struct pfi_kkif **, struct pf_addr *, struct pf_kpool *, pf_sn_types_t); int pf_get_transaddr_af(struct pf_krule *, diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index 585c196391c0..cfc300d99396 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -605,7 +605,8 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) rt_kif = rpool_first->kif; /* * Guess the AF of the route address, FreeBSD 13 does - * not support af-to so it should be safe. + * not support af-to nor prefer-ipv6-nexthop + * so it should be safe. */ rt_af = r->af; } else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) { @@ -634,8 +635,9 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) } rt = sp->pfs_1400.rt; /* - * Guess the AF of the route address, FreeBSD 13 does - * not support af-to so it should be safe. + * Guess the AF of the route address, FreeBSD 14 does + * not support af-to nor prefer-ipv6-nexthop + * so it should be safe. */ rt_af = sp->pfs_1400.af; } diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 8cd4fff95b15..4325835c7671 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -5960,7 +5960,9 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, if (r->rt) { /* * Set act.rt here instead of in pf_rule_to_actions() because - * it is applied only from the last pass rule. + * it is applied only from the last pass rule. For rules + * with the prefer-ipv6-nexthop option act.rt_af is a hint + * about AF of the forwarded packet and might be changed. */ pd->act.rt = r->rt; if (r->rt == PF_REPLYTO) @@ -8974,9 +8976,10 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp) { struct mbuf *m0, *m1, *md; - struct route ro; - const struct sockaddr *gw = &ro.ro_dst; - struct sockaddr_in *dst; + struct route_in6 ro; + union sockaddr_union rt_gw; + const union sockaddr_union *gw = (const union sockaddr_union *)&ro.ro_dst; + union sockaddr_union *dst; struct ip *ip; struct ifnet *ifp = NULL; int error = 0; @@ -9071,10 +9074,35 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, ip = mtod(m0, struct ip *); bzero(&ro, sizeof(ro)); - dst = (struct sockaddr_in *)&ro.ro_dst; - dst->sin_family = AF_INET; - dst->sin_len = sizeof(struct sockaddr_in); - dst->sin_addr.s_addr = pd->act.rt_addr.v4.s_addr; + dst = (union sockaddr_union *)&ro.ro_dst; + dst->sin.sin_family = AF_INET; + dst->sin.sin_len = sizeof(struct sockaddr_in); + dst->sin.sin_addr = ip->ip_dst; + if (ifp) { /* Only needed in forward direction and route-to */ + bzero(&rt_gw, sizeof(rt_gw)); + ro.ro_flags |= RT_HAS_GW; + gw = &rt_gw; + switch (pd->act.rt_af) { +#ifdef INET + case AF_INET: + rt_gw.sin.sin_family = AF_INET; + rt_gw.sin.sin_len = sizeof(struct sockaddr_in); + rt_gw.sin.sin_addr.s_addr = pd->act.rt_addr.v4.s_addr; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + rt_gw.sin6.sin6_family = AF_INET6; + rt_gw.sin6.sin6_len = sizeof(struct sockaddr_in6); + pf_addrcpy((struct pf_addr *)&rt_gw.sin6.sin6_addr, + &pd->act.rt_addr, AF_INET6); + break; +#endif /* INET6 */ + default: + /* Normal af-to without route-to */ + break; + } + } if (pd->dir == PF_IN) { if (ip->ip_ttl <= IPTTLDEC) { @@ -9098,10 +9126,10 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, /* Use the gateway if needed. */ if (nh->nh_flags & NHF_GATEWAY) { - gw = &nh->gw_sa; + gw = (const union sockaddr_union *)&nh->gw_sa; ro.ro_flags |= RT_HAS_GW; } else { - dst->sin_addr = ip->ip_dst; + dst->sin.sin_addr = ip->ip_dst; } /* @@ -9126,6 +9154,9 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, PF_STATE_UNLOCK(s); } + /* It must have been either set from rt_af or from fib4_lookup */ + KASSERT(gw->sin.sin_family != 0, ("%s: gw address family undetermined", __func__)); + if (ifp == NULL) { m0 = pd->m; pd->m = NULL; @@ -9210,9 +9241,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, m_clrprotoflags(m0); /* Avoid confusing lower layers. */ md = m0; - error = pf_dummynet_route(pd, s, r, ifp, gw, &md); + error = pf_dummynet_route(pd, s, r, ifp, + (const struct sockaddr *)gw, &md); if (md != NULL) { - error = (*ifp->if_output)(ifp, md, gw, &ro); + error = (*ifp->if_output)(ifp, md, + (const struct sockaddr *)gw, (struct route *)&ro); SDT_PROBE2(pf, ip, route_to, output, ifp, error); } goto done; @@ -9253,9 +9286,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp, md = m0; pd->pf_mtag = pf_find_mtag(md); error = pf_dummynet_route(pd, s, r, ifp, - gw, &md); + (const struct sockaddr *)gw, &md); if (md != NULL) { - error = (*ifp->if_output)(ifp, md, gw, &ro); + error = (*ifp->if_output)(ifp, md, + (const struct sockaddr *)gw, + (struct route *)&ro); SDT_PROBE2(pf, ip, route_to, output, ifp, error); } } else diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h index 51b3fd6390e1..8edd5a5110a1 100644 --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -131,6 +131,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 #define PF_POOL_ENDPI 0x40 +#define PF_POOL_IPV6NH 0x80 #define PF_WSCALE_FLAG 0x80 #define PF_WSCALE_MASK 0x0f diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index e5da05a958f6..d395730d6a54 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -2276,6 +2276,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, rule->nat.cur = TAILQ_FIRST(&rule->nat.list); rule->rdr.cur = TAILQ_FIRST(&rule->rdr.list); rule->route.cur = TAILQ_FIRST(&rule->route.list); + rule->route.ipv6_nexthop_af = AF_INET6; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index bc9e1dc72902..b8b5157c9b15 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -545,11 +545,18 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, uint64_t hashidx; int cnt; sa_family_t wanted_af; + u_int8_t pool_type; + bool prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH; KASSERT(saf != 0, ("%s: saf == 0", __func__)); KASSERT(naf != NULL, ("%s: naf = NULL", __func__)); KASSERT((*naf) != 0, ("%s: *naf = 0", __func__)); + /* + * Given (*naf) is a hint about AF of the forwarded packet. + * It might be changed if prefer_ipv6_nexthop is enabled and + * the combination of nexthop AF and packet AF allows for it. + */ wanted_af = (*naf); mtx_lock(&rpool->mtx); @@ -594,19 +601,38 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; - /* - * For single addresses check their address family. Unless they - * have none, which happens when addresses are added with - * the old ioctl mechanism. In such case trust that the address - * has the proper AF. - */ - if (rpool->cur->af && rpool->cur->af != wanted_af) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; + } + + /* + * For pools with a single host with the prefer-ipv6-nexthop option + * we can return pool address of any AF, unless the forwarded packet + * is IPv6, then we can return only if pool address is IPv6. + * For non-prefer-ipv6-nexthop we can return pool address only + * of wanted AF, unless the pool address'es AF is unknown, which + * happens in case old ioctls have been used to set up the pool. + * + * Round-robin pools have their own logic for retrying next addresses. + */ + pool_type = rpool->opts & PF_POOL_TYPEMASK; + if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK || + ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) && + rpool->cur->addr.type != PF_ADDR_TABLE && + rpool->cur->addr.type != PF_ADDR_DYNIFTL)) { + if (prefer_ipv6_nexthop) { + if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } + wanted_af = rpool->cur->af; + } else { + if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } } } - switch (rpool->opts & PF_POOL_TYPEMASK) { + switch (pool_type) { case PF_POOL_NONE: pf_addrcpy(naddr, raddr, wanted_af); break; @@ -631,10 +657,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, else rpool->tblidx = (int)arc4random_uniform(cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); + if (prefer_ipv6_nexthop) + wanted_af = AF_INET6; + retry_other_af_random: if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, false)) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; /* unsupported */ + /* Retry with IPv4 nexthop for IPv4 traffic */ + if (prefer_ipv6_nexthop && + wanted_af == AF_INET6 && + (*naf) == AF_INET) { + wanted_af = AF_INET; + goto retry_other_af_random; + } else { + /* no hosts in wanted AF */ + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } } pf_addrcpy(naddr, &rpool->counter, wanted_af); } else if (init_addr != NULL && PF_AZERO(init_addr, @@ -702,10 +740,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, else rpool->tblidx = (int)(hashidx % cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); + if (prefer_ipv6_nexthop) + wanted_af = AF_INET6; + retry_other_af_srchash: if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, false)) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; /* unsupported */ + /* Retry with IPv4 nexthop for IPv4 traffic */ + if (prefer_ipv6_nexthop && + wanted_af == AF_INET6 && + (*naf) == AF_INET) { + wanted_af = AF_INET; + goto retry_other_af_srchash; + } else { + /* no hosts in wanted AF */ + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } } pf_addrcpy(naddr, &rpool->counter, wanted_af); } else { @@ -718,6 +768,9 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, { struct pf_kpooladdr *acur = rpool->cur; + retry_other_af_rr: + if (prefer_ipv6_nexthop) + wanted_af = rpool->ipv6_nexthop_af; if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, wanted_af, @@ -728,46 +781,55 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, true)) goto get_addr; - } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, - wanted_af)) + } else if (rpool->cur->af == wanted_af && + pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af)) goto get_addr; - + if (prefer_ipv6_nexthop && + (*naf) == AF_INET && wanted_af == AF_INET6) { + /* Reset table index when changing wanted AF. */ + rpool->tblidx = -1; + rpool->ipv6_nexthop_af = AF_INET; + goto retry_other_af_rr; + } try_next: + /* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */ + rpool->ipv6_nexthop_af = AF_INET6; if (TAILQ_NEXT(rpool->cur, entries) == NULL) rpool->cur = TAILQ_FIRST(&rpool->list); else rpool->cur = TAILQ_NEXT(rpool->cur, entries); + try_next_ipv6_nexthop_rr: + /* Reset table index when iterating pools or changing wanted AF. */ rpool->tblidx = -1; + if (prefer_ipv6_nexthop) + wanted_af = rpool->ipv6_nexthop_af; if (rpool->cur->addr.type == PF_ADDR_TABLE) { - if (pfr_pool_get(rpool->cur->addr.p.tbl, + if (!pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, wanted_af, NULL, - true)) { - /* table contains no address of type 'wanted_af' */ - if (rpool->cur != acur) - goto try_next; - reason = PFRES_MAPFAILED; - goto done_pool_mtx; - } + true)) + goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, wanted_af, - pf_islinklocal, true)) { - /* interface has no address of type 'wanted_af' */ - if (rpool->cur != acur) - goto try_next; - reason = PFRES_MAPFAILED; - goto done_pool_mtx; - } + if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, + true)) + goto get_addr; } else { - raddr = &rpool->cur->addr.v.a.addr; - rmask = &rpool->cur->addr.v.a.mask; - if (rpool->cur->af && rpool->cur->af != wanted_af) { - reason = PFRES_MAPFAILED; - goto done_pool_mtx; + if (rpool->cur->af == wanted_af) { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + pf_addrcpy(&rpool->counter, raddr, wanted_af); + goto get_addr; } - pf_addrcpy(&rpool->counter, raddr, wanted_af); } - + if (prefer_ipv6_nexthop && + (*naf) == AF_INET && wanted_af == AF_INET6) { + rpool->ipv6_nexthop_af = AF_INET; + goto try_next_ipv6_nexthop_rr; + } + if (rpool->cur != acur) + goto try_next; + reason = PFRES_MAPFAILED; + goto done_pool_mtx; get_addr: pf_addrcpy(naddr, &rpool->counter, wanted_af); if (init_addr != NULL && PF_AZERO(init_addr, wanted_af)) @@ -777,9 +839,16 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, } } + if (wanted_af == 0) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; + } + if (nkif) *nkif = rpool->cur->kif; + (*naf) = wanted_af; + done_pool_mtx: mtx_unlock(&rpool->mtx); diff --git a/tests/sys/netpfil/pf/route_to.sh b/tests/sys/netpfil/pf/route_to.sh index 765403dcb79c..872de0dcbb91 100644 --- a/tests/sys/netpfil/pf/route_to.sh +++ b/tests/sys/netpfil/pf/route_to.sh @@ -28,6 +28,75 @@ common_dir=$(atf_get_srcdir)/../common +# We need to somehow test if the random algorithm of pf_map_addr() is working. +# The table or prefix contains multiple IP next-hop addresses, for each one try +# to establish up to 10 connections. Fail the test if with this many attempts +# the "good" target has not been chosen. However this choice is random, +# the check might still ocasionally fail. +check_random() { + if [ "$1" = "IPv4" ]; then + ping_from="${net_clients_4}.1" + ping_to="${host_server_4}" + else + ping_from="${net_clients_6}::1" + ping_to="${host_server_6}" + fi + good_targets="$2" + bad_targets="$3" + + port=42000 + states=$(mktemp) || exit 1 + for good_target in $good_targets; do + found="no" + for attempt in $(seq 1 10); do + port=$(( port + 1 )) + jexec router pfctl -Fs + atf_check -s exit:0 ${common_dir}/pft_ping.py \ + --sendif ${epair_tester}a --replyif ${epair_tester}a \ + --fromaddr ${ping_from} --to ${ping_to} \ + --ping-type=tcp3way --send-sport=${port} + jexec router pfctl -qvvss | normalize_pfctl_s > $states + cat $states + if [ -n "${bad_targets}" ]; then + for bad_target in $bad_targets; do + if grep -qE "route-to: ${bad_target}@" $states; then + atf_fail "Bad target ${bad_target} selected!" + fi + done + fi; + if grep -qE "route-to: ${good_target}@" $states; then + found=yes + break + fi + done + if [ "${found}" = "no" ]; then + atf_fail "Target ${good_target} not selected after ${attempt} attempts!" + fi + done +} + +pf_map_addr_common() +{ + setup_router_server_nat64 + + # Clients will connect from another network behind the router. + # This allows for using multiple source addresses. + jexec router route add -6 ${net_clients_6}::/${net_clients_6_mask} ${net_tester_6_host_tester} + jexec router route add ${net_clients_4}.0/${net_clients_4_mask} ${net_tester_4_host_tester} + + # The servers are reachable over additional IP addresses for + # testing of tables and subnets. The addresses are noncontinougnus + # for pf_map_addr() counter tests. + for i in 0 1 4 5; do + a1=$((24 + i)) + jexec server1 ifconfig ${epair_server1}b inet ${net_server1_4}.${a1}/32 alias + jexec server1 ifconfig ${epair_server1}b inet6 ${net_server1_6}::42:${i}/128 alias + a2=$((40 + i)) + jexec server2 ifconfig ${epair_server2}b inet ${net_server2_4}.${a2}/32 alias + jexec server2 ifconfig ${epair_server2}b inet6 ${net_server2_6}::42:${i}/128 alias + done +} + atf_test_case "v4" "cleanup" v4_head() { @@ -893,36 +962,17 @@ empty_pool_cleanup() pft_cleanup } - atf_test_case "table_loop" "cleanup" table_loop_head() { atf_set descr 'Check that iterating over tables poperly loops' atf_set require.user root - atf_set require.progs python3 scapy } table_loop_body() { - setup_router_server_nat64 - - # Clients will connect from another network behind the router. - # This allows for using multiple source addresses. - jexec router route add -6 ${net_clients_6}::/${net_clients_6_mask} ${net_tester_6_host_tester} - jexec router route add ${net_clients_4}.0/${net_clients_4_mask} ${net_tester_4_host_tester} - - # The servers are reachable over additional IP addresses for - # testing of tables and subnets. The addresses are noncontinougnus - # for pf_map_addr() counter tests. - for i in 0 1 4 5; do - a1=$((24 + i)) - jexec server1 ifconfig ${epair_server1}b inet ${net_server1_4}.${a1}/32 alias - jexec server1 ifconfig ${epair_server1}b inet6 ${net_server1_6}::42:${i}/128 alias - a2=$((40 + i)) - jexec server2 ifconfig ${epair_server2}b inet ${net_server2_4}.${a2}/32 alias - jexec server2 ifconfig ${epair_server2}b inet6 ${net_server2_6}::42:${i}/128 alias - done + pf_map_addr_common jexec router pfctl -e pft_set_rules router \ @@ -976,6 +1026,612 @@ table_loop_cleanup() } +atf_test_case "roundrobin" "cleanup" + +roundrobin_head() +{ + atf_set descr 'multiple gateways of mixed AF, including prefixes and tables, for IPv6 packets' + atf_set require.user root +} + +roundrobin_body() +{ + pf_map_addr_common + + # The rule is defined as "inet6 proto tcp" so directly given IPv4 hosts + # will be removed from the pool by pfctl. Tables will still be loaded + # and pf_map_addr() will only use IPv6 addresses from them. It will + # iterate over members of the pool and inside of tables and prefixes. + + jexec router pfctl -e + pft_set_rules router \ + "set debug loud" \ + "set reassemble yes" \ + "set state-policy if-bound" \ + "table <rt_targets> { ${net_server2_4}.40/31 ${net_server2_4}.44 ${net_server2_6}::42:0/127 ${net_server2_6}::42:4 }" \ + "pass in on ${epair_tester}b \ + route-to { \ + (${epair_server1}a ${net_server1_4_host_server}) \ + (${epair_server2}a <rt_targets_empty>) \ + (${epair_server1}a ${net_server1_6}::42:0/127) \ + (${epair_server2}a <rt_targets_empty>) \ + (${epair_server2}a <rt_targets>) \ + } \ + inet6 proto tcp \ + keep state" + + for port in $(seq 1 6); do + port=$((4200 + port)) + atf_check -s exit:0 ${common_dir}/pft_ping.py \ + --sendif ${epair_tester}a --replyif ${epair_tester}a \ + --fromaddr ${net_clients_6}::1 --to ${host_server_6} \ + --ping-type=tcp3way --send-sport=${port} + done + + states=$(mktemp) || exit 1 + jexec router pfctl -qvvss | normalize_pfctl_s > $states + + for state_regexp in \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4201\] .* route-to: ${net_server1_6}::42:0@${epair_server1}a" \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4202\] .* route-to: ${net_server1_6}::42:1@${epair_server1}a" \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4203\] .* route-to: ${net_server2_6}::42:0@${epair_server2}a" \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4204\] .* route-to: ${net_server2_6}::42:1@${epair_server2}a" \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4205\] .* route-to: ${net_server2_6}::42:4@${epair_server2}a" \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4206\] .* route-to: ${net_server1_6}::42:0@${epair_server1}a" \ + ; do + grep -qE "${state_regexp}" $states || atf_fail "State not found for '${state_regexp}'" + done +} + +roundrobin_cleanup() +{ + pft_cleanup +} + +atf_test_case "random_table" "cleanup" + +random_table_head() +{ + atf_set descr 'Pool with random flag and a table for IPv6' + atf_set require.user root +} + +random_table_body() +{ + pf_map_addr_common + + # The "random" flag will pick random hosts from the table but will + # not dive into prefixes, always choosing the 0th address. + # Proper address family will be choosen. + + jexec router pfctl -e + pft_set_rules router \ + "set debug loud" \ + "set reassemble yes" \ + "set state-policy if-bound" \ + "table <rt_targets> { ${net_server2_4}.40/31 ${net_server2_4}.44 ${net_server2_6}::42:0/127 ${net_server2_6}::42:4 }" \ + "pass in on ${epair_tester}b \ + route-to { (${epair_server2}a <rt_targets>) } random \ + inet6 proto tcp \ + keep state" + + good_targets="${net_server2_6}::42:0 ${net_server2_6}::42:4" + bad_targets="${net_server2_6}::42:1" + check_random IPv6 "${good_targets}" "${bad_targets}" +} + +random_table_cleanup() +{ + pft_cleanup +} + +atf_test_case "random_prefix" "cleanup" + +random_prefix_head() +{ + atf_set descr 'Pool with random flag and a table for IPv4' + atf_set require.user root +} + +random_prefix_body() +{ + pf_map_addr_common + + # The "random" flag will pick random hosts from given prefix. + # The choice being random makes testing it non-trivial. We do 10 + # attempts to have each target chosen. Hopefully this is enough to have + # this test pass often enough. + + jexec router pfctl -e + pft_set_rules router \ + "set debug loud" \ + "set reassemble yes" \ + "set state-policy if-bound" \ + "pass in on ${epair_tester}b \ + route-to { (${epair_server2}a ${net_server2_6}::42:0/127) } random \ + inet6 proto tcp \ + keep state" + + good_targets="${net_server2_6}::42:0 ${net_server2_6}::42:1" + check_random IPv6 "${good_targets}" +} + +random_prefix_cleanup() +{ + pft_cleanup +} + +atf_test_case "prefer_ipv6_nexthop_single_ipv4" "cleanup" + +prefer_ipv6_nexthop_single_ipv4_head() +{ + atf_set descr 'prefer-ipv6-nexthop option for a single IPv4 gateway' + atf_set require.user root +} + +prefer_ipv6_nexthop_single_ipv4_body() +{ + pf_map_addr_common + + # Basic forwarding test for prefer-ipv6-nexthop pool option. + # A single IPv4 gateway will work only for IPv4 traffic. + + jexec router pfctl -e + pft_set_rules router \ + "set reassemble yes" \ *** 565 LINES SKIPPED ***