Module Name: src Committed By: martin Date: Mon May 25 17:25:28 UTC 2020
Modified Files: src/sys/net/npf [netbsd-9]: npf_conf.c npf_conn.c npf_conn.h npf_conndb.c npf_inet.c npf_nat.c src/usr.sbin/npf/npfctl [netbsd-9]: npf_build.c npf_show.c npfctl.h Log Message: Pull up following revision(s) (requested by rmind in ticket #930): usr.sbin/npf/npfctl/npf_build.c: revision 1.54 sys/net/npf/npf_conn.h: revision 1.19 usr.sbin/npf/npfctl/npfctl.h: revision 1.52 usr.sbin/npf/npfctl/npf_show.c: revision 1.31 sys/net/npf/npf_conf.c: revision 1.16 sys/net/npf/npf_nat.c: revision 1.49 sys/net/npf/npf_inet.c: revision 1.56 sys/net/npf/npf_conndb.c: revision 1.8 sys/net/npf/npf_conn.c: revision 1.31 Backport selected NPF fixes from the upstream (to be pulled up): - npf_conndb_lookup: protect the connection lookup with pserialize(9), instead of incorrectly assuming that the handler always runs at IPL_SOFNET. Should fix crashes reported on high load (PR/55182). - npf_config_destroy: handle partially initialized config; fixes crashes with some invalid configurations. - NAT policy creation / destruction: set the initial reference and do not wait for reference draining on destruction; destroy the policy on the last reference drop instead. Fixes a lockup with the dynamic NAT rules. - npf_nat_{export,import}: fix a regression since dynamic NAT rules. - npfctl: fix a regression and restore the default group behaviour. - Add npf_cache_tcp() and validate the TCP data offset (from maxv@). To generate a diff of this commit: cvs rdiff -u -r1.13.2.2 -r1.13.2.3 src/sys/net/npf/npf_conf.c cvs rdiff -u -r1.27.2.2 -r1.27.2.3 src/sys/net/npf/npf_conn.c cvs rdiff -u -r1.16.2.2 -r1.16.2.3 src/sys/net/npf/npf_conn.h cvs rdiff -u -r1.6 -r1.6.2.1 src/sys/net/npf/npf_conndb.c cvs rdiff -u -r1.54.2.1 -r1.54.2.2 src/sys/net/npf/npf_inet.c cvs rdiff -u -r1.46.2.2 -r1.46.2.3 src/sys/net/npf/npf_nat.c cvs rdiff -u -r1.50.2.2 -r1.50.2.3 src/usr.sbin/npf/npfctl/npf_build.c cvs rdiff -u -r1.28.2.1 -r1.28.2.2 src/usr.sbin/npf/npfctl/npf_show.c cvs rdiff -u -r1.48.2.2 -r1.48.2.3 src/usr.sbin/npf/npfctl/npfctl.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/net/npf/npf_conf.c diff -u src/sys/net/npf/npf_conf.c:1.13.2.2 src/sys/net/npf/npf_conf.c:1.13.2.3 --- src/sys/net/npf/npf_conf.c:1.13.2.2 Sun Sep 1 13:21:39 2019 +++ src/sys/net/npf/npf_conf.c Mon May 25 17:25:28 2020 @@ -47,7 +47,7 @@ #ifdef _KERNEL #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: npf_conf.c,v 1.13.2.2 2019/09/01 13:21:39 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_conf.c,v 1.13.2.3 2020/05/25 17:25:28 martin Exp $"); #include <sys/param.h> #include <sys/types.h> @@ -94,10 +94,18 @@ npf_config_destroy(npf_config_t *nc) * Note: the rulesets must be destroyed first, in order to drop * any references to the tableset. */ - npf_ruleset_destroy(nc->ruleset); - npf_ruleset_destroy(nc->nat_ruleset); - npf_rprocset_destroy(nc->rule_procs); - npf_tableset_destroy(nc->tableset); + if (nc->ruleset) { + npf_ruleset_destroy(nc->ruleset); + } + if (nc->nat_ruleset) { + npf_ruleset_destroy(nc->nat_ruleset); + } + if (nc->rule_procs) { + npf_rprocset_destroy(nc->rule_procs); + } + if (nc->tableset) { + npf_tableset_destroy(nc->tableset); + } kmem_free(nc, sizeof(npf_config_t)); } Index: src/sys/net/npf/npf_conn.c diff -u src/sys/net/npf/npf_conn.c:1.27.2.2 src/sys/net/npf/npf_conn.c:1.27.2.3 --- src/sys/net/npf/npf_conn.c:1.27.2.2 Fri Oct 4 08:06:35 2019 +++ src/sys/net/npf/npf_conn.c Mon May 25 17:25:28 2020 @@ -107,7 +107,7 @@ #ifdef _KERNEL #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.27.2.2 2019/10/04 08:06:35 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.27.2.3 2020/05/25 17:25:28 martin Exp $"); #include <sys/param.h> #include <sys/types.h> @@ -311,7 +311,7 @@ npf_conn_lookup(const npf_cache_t *npc, if (!npf_conn_conkey(npc, &key, true)) { return NULL; } - con = npf_conndb_lookup(npf->conn_db, &key, forw); + con = npf_conndb_lookup(npf, &key, forw); if (con == NULL) { return NULL; } @@ -908,7 +908,7 @@ npf_conn_find(npf_t *npf, const nvlist_t if (!kdict || !npf_connkey_import(kdict, &key)) { return EINVAL; } - con = npf_conndb_lookup(npf->conn_db, &key, &forw); + con = npf_conndb_lookup(npf, &key, &forw); if (con == NULL) { return ESRCH; } Index: src/sys/net/npf/npf_conn.h diff -u src/sys/net/npf/npf_conn.h:1.16.2.2 src/sys/net/npf/npf_conn.h:1.16.2.3 --- src/sys/net/npf/npf_conn.h:1.16.2.2 Tue Aug 13 14:35:55 2019 +++ src/sys/net/npf/npf_conn.h Mon May 25 17:25:28 2020 @@ -157,7 +157,7 @@ void npf_conndb_sysfini(npf_t *); npf_conndb_t * npf_conndb_create(void); void npf_conndb_destroy(npf_conndb_t *); -npf_conn_t * npf_conndb_lookup(npf_conndb_t *, const npf_connkey_t *, bool *); +npf_conn_t * npf_conndb_lookup(npf_t *, const npf_connkey_t *, bool *); bool npf_conndb_insert(npf_conndb_t *, const npf_connkey_t *, npf_conn_t *, bool); npf_conn_t * npf_conndb_remove(npf_conndb_t *, npf_connkey_t *); Index: src/sys/net/npf/npf_conndb.c diff -u src/sys/net/npf/npf_conndb.c:1.6 src/sys/net/npf/npf_conndb.c:1.6.2.1 --- src/sys/net/npf/npf_conndb.c:1.6 Tue Jul 23 00:52:01 2019 +++ src/sys/net/npf/npf_conndb.c Mon May 25 17:25:28 2020 @@ -46,7 +46,7 @@ #ifdef _KERNEL #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: npf_conndb.c,v 1.6 2019/07/23 00:52:01 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_conndb.c,v 1.6.2.1 2020/05/25 17:25:28 martin Exp $"); #include <sys/param.h> #include <sys/types.h> @@ -143,8 +143,9 @@ npf_conndb_destroy(npf_conndb_t *cd) * npf_conndb_lookup: find a connection given the key. */ npf_conn_t * -npf_conndb_lookup(npf_conndb_t *cd, const npf_connkey_t *ck, bool *forw) +npf_conndb_lookup(npf_t *npf, const npf_connkey_t *ck, bool *forw) { + npf_conndb_t *cd = atomic_load_relaxed(&npf->conn_db); const unsigned keylen = NPF_CONNKEY_LEN(ck); npf_conn_t *con; void *val; @@ -152,8 +153,10 @@ npf_conndb_lookup(npf_conndb_t *cd, cons /* * Lookup the connection key in the key-value map. */ + int s = npf_config_read_enter(npf); val = thmap_get(cd->cd_map, ck->ck_key, keylen); if (!val) { + npf_config_read_exit(npf, s); return NULL; } @@ -169,6 +172,7 @@ npf_conndb_lookup(npf_conndb_t *cd, cons * Acquire a reference and return the connection. */ atomic_inc_uint(&con->c_refcnt); + npf_config_read_exit(npf, s); return con; } Index: src/sys/net/npf/npf_inet.c diff -u src/sys/net/npf/npf_inet.c:1.54.2.1 src/sys/net/npf/npf_inet.c:1.54.2.2 --- src/sys/net/npf/npf_inet.c:1.54.2.1 Tue Aug 13 14:35:55 2019 +++ src/sys/net/npf/npf_inet.c Mon May 25 17:25:28 2020 @@ -38,7 +38,7 @@ #ifdef _KERNEL #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.54.2.1 2019/08/13 14:35:55 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.54.2.2 2020/05/25 17:25:28 martin Exp $"); #include <sys/param.h> #include <sys/types.h> @@ -562,6 +562,22 @@ npf_cache_ip(npf_cache_t *npc, nbuf_t *n return flags; } +static inline int +npf_cache_tcp(npf_cache_t *npc, nbuf_t *nbuf, unsigned hlen) +{ + struct tcphdr *th; + + th = nbuf_advance(nbuf, hlen, sizeof(struct tcphdr)); + if (__predict_false(th == NULL)) { + return NPC_FMTERR; + } + if (__predict_false(th->th_off < 5)) { + return NPC_FMTERR; + } + npc->npc_l4.tcp = th; + return NPC_LAYER4 | NPC_TCP; +} + /* * npf_cache_all: general routine to cache all relevant IP (v4 or v6) * and TCP, UDP or ICMP headers. @@ -601,9 +617,7 @@ again: switch (npc->npc_proto) { case IPPROTO_TCP: /* Cache: layer 4 - TCP. */ - npc->npc_l4.tcp = nbuf_advance(nbuf, hlen, - sizeof(struct tcphdr)); - l4flags = NPC_LAYER4 | NPC_TCP; + l4flags = npf_cache_tcp(npc, nbuf, hlen); break; case IPPROTO_UDP: /* Cache: layer 4 - UDP. */ Index: src/sys/net/npf/npf_nat.c diff -u src/sys/net/npf/npf_nat.c:1.46.2.2 src/sys/net/npf/npf_nat.c:1.46.2.3 --- src/sys/net/npf/npf_nat.c:1.46.2.2 Sun Sep 1 13:21:39 2019 +++ src/sys/net/npf/npf_nat.c Mon May 25 17:25:28 2020 @@ -67,7 +67,7 @@ #ifdef _KERNEL #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.46.2.2 2019/09/01 13:21:39 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.46.2.3 2020/05/25 17:25:28 martin Exp $"); #include <sys/param.h> #include <sys/types.h> @@ -182,6 +182,7 @@ npf_nat_newpolicy(npf_t *npf, const nvli size_t len; np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP); + atomic_store_relaxed(&np->n_refcnt, 1); np->n_npfctx = npf; /* The translation type, flags and policy ID. */ @@ -271,37 +272,53 @@ npf_nat_policyexport(const npf_natpolicy return 0; } +static void +npf_natpolicy_release(npf_natpolicy_t *np) +{ + KASSERT(atomic_load_relaxed(&np->n_refcnt) > 0); + + if (atomic_dec_uint_nv(&np->n_refcnt) != 0) { + return; + } + KASSERT(LIST_EMPTY(&np->n_nat_list)); + mutex_destroy(&np->n_lock); + kmem_free(np, sizeof(npf_natpolicy_t)); +} + /* * npf_nat_freepolicy: free the NAT policy. * * => Called from npf_rule_free() during the reload via npf_ruleset_destroy(). + * => At this point, NAT policy cannot acquire new references. */ void npf_nat_freepolicy(npf_natpolicy_t *np) { - npf_conn_t *con; - npf_nat_t *nt; - /* - * Disassociate all entries from the policy. At this point, - * new entries can no longer be created for this policy. + * Drain the references. If there are active NAT connections, + * then expire them and kick the worker. */ - while (np->n_refcnt) { + if (atomic_load_relaxed(&np->n_refcnt) > 1) { + npf_nat_t *nt; + mutex_enter(&np->n_lock); LIST_FOREACH(nt, &np->n_nat_list, nt_entry) { - con = nt->nt_conn; + npf_conn_t *con = nt->nt_conn; KASSERT(con != NULL); npf_conn_expire(con); } mutex_exit(&np->n_lock); - - /* Kick the worker - all references should be going away. */ npf_worker_signal(np->n_npfctx); - kpause("npfgcnat", false, 1, NULL); } - KASSERT(LIST_EMPTY(&np->n_nat_list)); - mutex_destroy(&np->n_lock); - kmem_free(np, sizeof(npf_natpolicy_t)); + KASSERT(atomic_load_relaxed(&np->n_refcnt) >= 1); + + /* + * Drop the initial reference, but it might not be the last one. + * If so, the last reference will be triggered via: + * + * npf_conn_destroy() -> npf_nat_destroy() -> npf_natpolicy_release() + */ + npf_natpolicy_release(np); } void @@ -649,7 +666,7 @@ npf_do_nat(npf_cache_t *npc, npf_conn_t npf_recache(npc); } error = npf_nat_algo(npc, np, forw); - atomic_dec_uint(&np->n_refcnt); + npf_natpolicy_release(np); return error; } @@ -662,7 +679,7 @@ npf_do_nat(npf_cache_t *npc, npf_conn_t if (con == NULL) { ncon = npf_conn_establish(npc, di, true); if (ncon == NULL) { - atomic_dec_uint(&np->n_refcnt); + npf_natpolicy_release(np); return ENOMEM; } con = ncon; @@ -674,7 +691,7 @@ npf_do_nat(npf_cache_t *npc, npf_conn_t */ nt = npf_nat_create(npc, np, con); if (nt == NULL) { - atomic_dec_uint(&np->n_refcnt); + npf_natpolicy_release(np); error = ENOMEM; goto out; } @@ -757,11 +774,15 @@ npf_nat_destroy(npf_nat_t *nt) } npf_stats_inc(np->n_npfctx, NPF_STAT_NAT_DESTROY); + /* + * Remove the connection from the list and drop the reference on + * the NAT policy. Note: this might trigger its destruction. + */ mutex_enter(&np->n_lock); LIST_REMOVE(nt, nt_entry); - KASSERT(np->n_refcnt > 0); - atomic_dec_uint(&np->n_refcnt); mutex_exit(&np->n_lock); + npf_natpolicy_release(np); + pool_cache_put(nat_cache, nt); } @@ -772,10 +793,13 @@ void npf_nat_export(nvlist_t *condict, npf_nat_t *nt) { npf_natpolicy_t *np = nt->nt_natpolicy; + unsigned alen = nt->nt_alen; nvlist_t *nat; nat = nvlist_create(0); + nvlist_add_number(nat, "alen", alen); nvlist_add_binary(nat, "oaddr", &nt->nt_oaddr, sizeof(npf_addr_t)); + nvlist_add_binary(nat, "taddr", &nt->nt_taddr, alen); nvlist_add_number(nat, "oport", nt->nt_oport); nvlist_add_number(nat, "tport", nt->nt_tport); nvlist_add_number(nat, "nat-policy", np->n_id); @@ -791,9 +815,9 @@ npf_nat_import(npf_t *npf, const nvlist_ { npf_natpolicy_t *np; npf_nat_t *nt; - const void *oaddr; + const void *taddr, *oaddr; + size_t alen, len; uint64_t np_id; - size_t len; np_id = dnvlist_get_number(nat, "nat-policy", UINT64_MAX); if ((np = npf_ruleset_findnat(natlist, np_id)) == NULL) { @@ -802,12 +826,23 @@ npf_nat_import(npf_t *npf, const nvlist_ nt = pool_cache_get(nat_cache, PR_WAITOK); memset(nt, 0, sizeof(npf_nat_t)); + alen = dnvlist_get_number(nat, "alen", 0); + if (alen == 0 || alen > sizeof(npf_addr_t)) { + goto err; + } + + taddr = dnvlist_get_binary(nat, "taddr", &len, NULL, 0); + if (!taddr || len != alen) { + goto err; + } + memcpy(&nt->nt_taddr, taddr, sizeof(npf_addr_t)); + oaddr = dnvlist_get_binary(nat, "oaddr", &len, NULL, 0); - if (!oaddr || len != sizeof(npf_addr_t)) { - pool_cache_put(nat_cache, nt); - return NULL; + if (!oaddr || len != alen) { + goto err; } memcpy(&nt->nt_oaddr, oaddr, sizeof(npf_addr_t)); + nt->nt_oport = dnvlist_get_number(nat, "oport", 0); nt->nt_tport = dnvlist_get_number(nat, "tport", 0); @@ -817,8 +852,7 @@ npf_nat_import(npf_t *npf, const nvlist_ if (!npf_portmap_take(pm, nt->nt_alen, &nt->nt_taddr, nt->nt_tport)) { - pool_cache_put(nat_cache, nt); - return NULL; + goto err; } } npf_stats_inc(npf, NPF_STAT_NAT_CREATE); @@ -832,6 +866,9 @@ npf_nat_import(npf_t *npf, const nvlist_ np->n_refcnt++; LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry); return nt; +err: + pool_cache_put(nat_cache, nt); + return NULL; } #if defined(DDB) || defined(_NPF_TESTING) Index: src/usr.sbin/npf/npfctl/npf_build.c diff -u src/usr.sbin/npf/npfctl/npf_build.c:1.50.2.2 src/usr.sbin/npf/npfctl/npf_build.c:1.50.2.3 --- src/usr.sbin/npf/npfctl/npf_build.c:1.50.2.2 Fri Oct 4 08:06:34 2019 +++ src/usr.sbin/npf/npfctl/npf_build.c Mon May 25 17:25:28 2020 @@ -32,7 +32,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: npf_build.c,v 1.50.2.2 2019/10/04 08:06:34 martin Exp $"); +__RCSID("$NetBSD: npf_build.c,v 1.50.2.3 2020/05/25 17:25:28 martin Exp $"); #include <sys/types.h> #define __FAVOR_BSD @@ -56,8 +56,9 @@ __RCSID("$NetBSD: npf_build.c,v 1.50.2.2 static nl_config_t * npf_conf = NULL; static bool npf_debug = false; static nl_rule_t * the_rule = NULL; +static bool npf_conf_built = false; -static bool defgroup = false; +static nl_rule_t * defgroup = NULL; static nl_rule_t * current_group[MAX_RULE_NESTING]; static unsigned rule_nesting_level = 0; static unsigned npfctl_tid_counter = 0; @@ -71,8 +72,31 @@ npfctl_config_init(bool debug) if (npf_conf == NULL) { errx(EXIT_FAILURE, "npf_config_create failed"); } - npf_debug = debug; memset(current_group, 0, sizeof(current_group)); + npf_debug = debug; + npf_conf_built = false; +} + +void +npfctl_config_build(void) +{ + /* Run-once. */ + if (npf_conf_built) { + return; + } + + /* + * The default group is mandatory. Note: npfctl_build_group_end() + * skipped the default rule, since it must be the last one. + */ + if (!defgroup) { + errx(EXIT_FAILURE, "default group was not defined"); + } + assert(rule_nesting_level == 0); + npf_rule_insert(npf_conf, NULL, defgroup); + + npf_config_build(npf_conf); + npf_conf_built = true; } int @@ -81,9 +105,7 @@ npfctl_config_send(int fd) npf_error_t errinfo; int error = 0; - if (!defgroup) { - errx(EXIT_FAILURE, "default group was not defined"); - } + npfctl_config_build(); error = npf_config_submit(npf_conf, fd, &errinfo); if (error == EEXIST) { /* XXX */ errx(EXIT_FAILURE, "(re)load failed: " @@ -118,6 +140,8 @@ npfctl_config_save(nl_config_t *ncf, con void npfctl_config_debug(const char *outfile) { + npfctl_config_build(); + printf("\nConfiguration:\n\n"); _npf_config_dump(npf_conf, STDOUT_FILENO); @@ -593,7 +617,7 @@ npfctl_build_group(const char *name, int if (rule_nesting_level) { yyerror("default group can only be at the top level"); } - defgroup = true; + defgroup = rl; } /* Set the current group and increase the nesting level. */ @@ -613,7 +637,15 @@ npfctl_build_group_end(void) group = current_group[rule_nesting_level]; current_group[rule_nesting_level--] = NULL; - /* Note: if the parent is NULL, then it is a global rule. */ + /* + * Note: + * - If the parent is NULL, then it is a global rule. + * - The default rule must be the last, so it is inserted later. + */ + if (group == defgroup) { + assert(parent == NULL); + return; + } npf_rule_insert(npf_conf, parent, group); } Index: src/usr.sbin/npf/npfctl/npf_show.c diff -u src/usr.sbin/npf/npfctl/npf_show.c:1.28.2.1 src/usr.sbin/npf/npfctl/npf_show.c:1.28.2.2 --- src/usr.sbin/npf/npfctl/npf_show.c:1.28.2.1 Sun Aug 11 10:12:18 2019 +++ src/usr.sbin/npf/npfctl/npf_show.c Mon May 25 17:25:28 2020 @@ -34,7 +34,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: npf_show.c,v 1.28.2.1 2019/08/11 10:12:18 martin Exp $"); +__RCSID("$NetBSD: npf_show.c,v 1.28.2.2 2020/05/25 17:25:28 martin Exp $"); #include <sys/socket.h> #define __FAVOR_BSD @@ -575,7 +575,7 @@ npfctl_config_show(int fd) print_linesep(ctx); } else { ncf = npfctl_config_ref(); - (void)npf_config_build(ncf); + npfctl_config_build(); loaded = true; } ctx->conf = ncf; Index: src/usr.sbin/npf/npfctl/npfctl.h diff -u src/usr.sbin/npf/npfctl/npfctl.h:1.48.2.2 src/usr.sbin/npf/npfctl/npfctl.h:1.48.2.3 --- src/usr.sbin/npf/npfctl/npfctl.h:1.48.2.2 Fri Oct 4 08:06:34 2019 +++ src/usr.sbin/npf/npfctl/npfctl.h Mon May 25 17:25:28 2020 @@ -192,6 +192,7 @@ void npfctl_bpf_table(npf_bpf_t *, u_in #define NPFCTL_NAT_STATIC 2 void npfctl_config_init(bool); +void npfctl_config_build(void); int npfctl_config_send(int); nl_config_t * npfctl_config_ref(void); int npfctl_config_show(int);