Module Name:    src
Committed By:   rmind
Date:           Fri Mar 20 23:36:28 UTC 2015

Modified Files:
        src/sys/net/npf: npf_ctl.c npf_ruleset.c

Log Message:
NPF: replace the TAILQ of the dynamic rules with a linked list and fix the
inheriting of the active dynamic rules during the reload; also, fix a bug
in the insert path by putting a memory barrier in the right place.


To generate a diff of this commit:
cvs rdiff -u -r1.40 -r1.41 src/sys/net/npf/npf_ctl.c
cvs rdiff -u -r1.41 -r1.42 src/sys/net/npf/npf_ruleset.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/net/npf/npf_ctl.c
diff -u src/sys/net/npf/npf_ctl.c:1.40 src/sys/net/npf/npf_ctl.c:1.41
--- src/sys/net/npf/npf_ctl.c:1.40	Sun Aug 24 20:36:30 2014
+++ src/sys/net/npf/npf_ctl.c	Fri Mar 20 23:36:28 2015
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ctl.c,v 1.40 2014/08/24 20:36:30 rmind Exp $	*/
+/*	$NetBSD: npf_ctl.c,v 1.41 2015/03/20 23:36:28 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.40 2014/08/24 20:36:30 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.41 2015/03/20 23:36:28 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/conf.h>
@@ -778,6 +778,9 @@ npfctl_rule(u_long cmd, void *data)
 	}
 	case NPF_CMD_RULE_LIST: {
 		retdict = npf_ruleset_list(rlset, ruleset_name);
+		if (!retdict) {
+			error = ESRCH;
+		}
 		break;
 	}
 	case NPF_CMD_RULE_FLUSH: {
@@ -797,6 +800,7 @@ npfctl_rule(u_long cmd, void *data)
 	npf_config_exit();
 
 	if (rl) {
+		KASSERT(error);
 		npf_rule_free(rl);
 	}
 out:

Index: src/sys/net/npf/npf_ruleset.c
diff -u src/sys/net/npf/npf_ruleset.c:1.41 src/sys/net/npf/npf_ruleset.c:1.42
--- src/sys/net/npf/npf_ruleset.c:1.41	Mon Feb  2 00:31:39 2015
+++ src/sys/net/npf/npf_ruleset.c	Fri Mar 20 23:36:28 2015
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_ruleset.c,v 1.41 2015/02/02 00:31:39 rmind Exp $	*/
+/*	$NetBSD: npf_ruleset.c,v 1.42 2015/03/20 23:36:28 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2009-2015 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.41 2015/02/02 00:31:39 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.42 2015/03/20 23:36:28 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -89,21 +89,24 @@ struct npf_rule {
 	npf_natpolicy_t *	r_natp;
 	npf_rproc_t *		r_rproc;
 
-	/* Rule priority: (highest) 1, 2 ... n (lowest). */
-	pri_t			r_priority;
-
-	/*
-	 * Dynamic group: subset queue and a dynamic group list entry.
-	 * Dynamic rule: entry and the parent rule (the group).
-	 */
 	union {
-		TAILQ_HEAD(npf_ruleq, npf_rule) r_subset;
-		TAILQ_ENTRY(npf_rule)	r_entry;
-	} /* C11 */;
-	union {
-		LIST_ENTRY(npf_rule)	r_dentry;
-		npf_rule_t *		r_parent;
-	} /* C11 */;
+		/*
+		 * Dynamic group: rule subset and a group list entry.
+		 */
+		struct {
+			npf_rule_t *		r_subset;
+			LIST_ENTRY(npf_rule)	r_dentry;
+		};
+
+		/*
+		 * Dynamic rule: priority, parent group and next rule.
+		 */
+		struct {
+			int			r_priority;
+			npf_rule_t *		r_parent;
+			npf_rule_t *		r_next;
+		};
+	};
 
 	/* Rule ID, name and the optional key. */
 	uint64_t		r_id;
@@ -147,19 +150,6 @@ npf_ruleset_create(size_t slots)
 	return rlset;
 }
 
-static void
-npf_ruleset_unlink(npf_ruleset_t *rlset, npf_rule_t *rl)
-{
-	if (NPF_DYNAMIC_GROUP_P(rl->r_attr)) {
-		LIST_REMOVE(rl, r_dentry);
-	}
-	if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
-		npf_rule_t *rg = rl->r_parent;
-		TAILQ_REMOVE(&rg->r_subset, rl, r_entry);
-	}
-	LIST_REMOVE(rl, r_aentry);
-}
-
 void
 npf_ruleset_destroy(npf_ruleset_t *rlset)
 {
@@ -167,7 +157,19 @@ npf_ruleset_destroy(npf_ruleset_t *rlset
 	npf_rule_t *rl;
 
 	while ((rl = LIST_FIRST(&rlset->rs_all)) != NULL) {
-		npf_ruleset_unlink(rlset, rl);
+		if (NPF_DYNAMIC_GROUP_P(rl->r_attr)) {
+			/*
+			 * Note: r_subset may point to the rules which
+			 * were inherited by a new ruleset.
+			 */
+			rl->r_subset = NULL;
+			LIST_REMOVE(rl, r_dentry);
+		}
+		if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
+			/* Not removing from r_subset, see above. */
+			KASSERT(rl->r_parent != NULL);
+		}
+		LIST_REMOVE(rl, r_aentry);
 		npf_rule_free(rl);
 	}
 	KASSERT(LIST_EMPTY(&rlset->rs_dynamic));
@@ -222,16 +224,16 @@ npf_ruleset_lookup(npf_ruleset_t *rlset,
 int
 npf_ruleset_add(npf_ruleset_t *rlset, const char *rname, npf_rule_t *rl)
 {
-	npf_rule_t *rg, *it;
-	pri_t priocmd;
+	npf_rule_t *rg, *it, *target;
+	int priocmd;
 
+	if (!NPF_DYNAMIC_RULE_P(rl->r_attr)) {
+		return EINVAL;
+	}
 	rg = npf_ruleset_lookup(rlset, rname);
 	if (rg == NULL) {
 		return ESRCH;
 	}
-	if (!NPF_DYNAMIC_RULE_P(rl->r_attr)) {
-		return EINVAL;
-	}
 
 	/* Dynamic rule - assign a unique ID and save the parent. */
 	rl->r_id = ++rlset->rs_idcnt;
@@ -245,29 +247,32 @@ npf_ruleset_add(npf_ruleset_t *rlset, co
 		rl->r_priority = 0;
 	}
 
+	/*
+	 * WARNING: once rg->subset or target->r_next of an *active*
+	 * rule is set, then our rule becomes globally visible and active.
+	 * Must issue a load fence to ensure rl->r_next visibility first.
+	 */
 	switch (priocmd) {
-	case NPF_PRI_FIRST:
-		TAILQ_FOREACH(it, &rg->r_subset, r_entry) {
-			if (rl->r_priority <= it->r_priority)
-				break;
-		}
-		if (it) {
-			TAILQ_INSERT_BEFORE(it, rl, r_entry);
-		} else {
-			TAILQ_INSERT_HEAD(&rg->r_subset, rl, r_entry);
-		}
-		break;
 	case NPF_PRI_LAST:
 	default:
-		TAILQ_FOREACH(it, &rg->r_subset, r_entry) {
-			if (rl->r_priority < it->r_priority)
-				break;
-		}
-		if (it) {
-			TAILQ_INSERT_BEFORE(it, rl, r_entry);
-		} else {
-			TAILQ_INSERT_TAIL(&rg->r_subset, rl, r_entry);
+		target = NULL;
+		it = rg->r_subset;
+		while (it && it->r_priority <= rl->r_priority) {
+			target = it;
+			it = it->r_next;
+		}
+		if (target) {
+			rl->r_next = target->r_next;
+			membar_producer();
+			target->r_next = rl;
+			break;
 		}
+		/* FALLTHROUGH */
+
+	case NPF_PRI_FIRST:
+		rl->r_next = rg->r_subset;
+		membar_producer();
+		rg->r_subset = rl;
 		break;
 	}
 
@@ -276,26 +281,41 @@ npf_ruleset_add(npf_ruleset_t *rlset, co
 	return 0;
 }
 
+static void
+npf_ruleset_unlink(npf_rule_t *rl, npf_rule_t *prev)
+{
+	KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
+	if (prev) {
+		prev->r_next = rl->r_next;
+	} else {
+		npf_rule_t *rg = rl->r_parent;
+		rg->r_subset = rl->r_next;
+	}
+	LIST_REMOVE(rl, r_aentry);
+}
+
 /*
  * npf_ruleset_remove: remove the dynamic rule given the rule ID.
  */
 int
 npf_ruleset_remove(npf_ruleset_t *rlset, const char *rname, uint64_t id)
 {
-	npf_rule_t *rg, *rl;
+	npf_rule_t *rg, *prev = NULL;
 
 	if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
 		return ESRCH;
 	}
-	TAILQ_FOREACH(rl, &rg->r_subset, r_entry) {
+	for (npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
 		KASSERT(rl->r_parent == rg);
+		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
 
 		/* Compare ID.  On match, remove and return. */
 		if (rl->r_id == id) {
-			npf_ruleset_unlink(rlset, rl);
+			npf_ruleset_unlink(rl, prev);
 			LIST_INSERT_HEAD(&rlset->rs_gc, rl, r_aentry);
 			return 0;
 		}
+		prev = rl;
 	}
 	return ENOENT;
 }
@@ -307,7 +327,7 @@ int
 npf_ruleset_remkey(npf_ruleset_t *rlset, const char *rname,
     const void *key, size_t len)
 {
-	npf_rule_t *rg, *rl;
+	npf_rule_t *rg, *rlast = NULL, *prev = NULL, *lastprev = NULL;
 
 	KASSERT(len && len <= NPF_RULE_MAXKEYLEN);
 
@@ -315,18 +335,22 @@ npf_ruleset_remkey(npf_ruleset_t *rlset,
 		return ESRCH;
 	}
 
-	/* Find the last in the list. */
-	TAILQ_FOREACH_REVERSE(rl, &rg->r_subset, npf_ruleq, r_entry) {
+	/* Compare the key and find the last in the list. */
+	for (npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
 		KASSERT(rl->r_parent == rg);
-
-		/* Compare the key.  On match, remove and return. */
+		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
 		if (memcmp(rl->r_key, key, len) == 0) {
-			npf_ruleset_unlink(rlset, rl);
-			LIST_INSERT_HEAD(&rlset->rs_gc, rl, r_aentry);
-			return 0;
+			lastprev = prev;
+			rlast = rl;
 		}
+		prev = rl;
 	}
-	return ENOENT;
+	if (!rlast) {
+		return ENOENT;
+	}
+	npf_ruleset_unlink(rlast, lastprev);
+	LIST_INSERT_HEAD(&rlset->rs_gc, rlast, r_aentry);
+	return 0;
 }
 
 /*
@@ -337,7 +361,7 @@ npf_ruleset_list(npf_ruleset_t *rlset, c
 {
 	prop_dictionary_t rgdict;
 	prop_array_t rules;
-	npf_rule_t *rg, *rl;
+	npf_rule_t *rg;
 
 	KASSERT(npf_config_locked_p());
 
@@ -352,12 +376,13 @@ npf_ruleset_list(npf_ruleset_t *rlset, c
 		return NULL;
 	}
 
-	TAILQ_FOREACH(rl, &rg->r_subset, r_entry) {
+	for (npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
 		prop_dictionary_t rldict;
 
-		rldict = prop_dictionary_create();
 		KASSERT(rl->r_parent == rg);
+		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
 
+		rldict = prop_dictionary_create();
 		if (npf_rule_export(rlset, rl, rldict)) {
 			prop_object_release(rldict);
 			prop_object_release(rules);
@@ -387,10 +412,17 @@ npf_ruleset_flush(npf_ruleset_t *rlset, 
 	if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
 		return ESRCH;
 	}
-	while ((rl = TAILQ_FIRST(&rg->r_subset)) != NULL) {
+
+	rl = atomic_swap_ptr(&rg->r_subset, NULL);
+	membar_producer();
+
+	while (rl) {
+		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
 		KASSERT(rl->r_parent == rg);
-		npf_ruleset_unlink(rlset, rl);
+
+		LIST_REMOVE(rl, r_aentry);
 		LIST_INSERT_HEAD(&rlset->rs_gc, rl, r_aentry);
+		rl = rl->r_next;
 	}
 	return 0;
 }
@@ -460,27 +492,33 @@ npf_ruleset_reload(npf_ruleset_t *newset
 	 * Scan the dynamic rules and share (migrate) if needed.
 	 */
 	LIST_FOREACH(rg, &newset->rs_dynamic, r_dentry) {
-		npf_rule_t *actrg;
+		npf_rule_t *active_rgroup;
 
 		/* Look for a dynamic ruleset group with such name. */
-		actrg = npf_ruleset_lookup(oldset, rg->r_name);
-		if (actrg == NULL) {
+		active_rgroup = npf_ruleset_lookup(oldset, rg->r_name);
+		if (active_rgroup == NULL) {
 			continue;
 		}
 
 		/*
-		 * Copy the list-head structure.  This is necessary because
-		 * the rules are still active and therefore accessible for
-		 * inspection via the old ruleset.
+		 * ATOMICITY: Copy the head pointer of the linked-list,
+		 * but do not remove the rules from the active r_subset.
+		 * This is necessary because the rules are still active
+		 * and therefore are accessible for inspection via the
+		 * old ruleset.
 		 */
-		memcpy(&rg->r_subset, &actrg->r_subset, sizeof(rg->r_subset));
-		TAILQ_FOREACH(rl, &rg->r_subset, r_entry) {
-			/*
-			 * We can safely migrate to the new all-rule list
-			 * and re-set the parent rule, though.
-			 */
+		rg->r_subset = active_rgroup->r_subset;
+
+		/*
+		 * We can safely migrate to the new all-rule list and
+		 * reset the parent rule, though.
+		 */
+		for (rl = rg->r_subset; rl; rl = rl->r_next) {
+			KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
 			LIST_REMOVE(rl, r_aentry);
 			LIST_INSERT_HEAD(&newset->rs_all, rl, r_aentry);
+
+			KASSERT(rl->r_parent == active_rgroup);
 			rl->r_parent = rg;
 		}
 	}
@@ -614,7 +652,6 @@ npf_rule_alloc(prop_dictionary_t rldict)
 
 	/* Allocate a rule structure. */
 	rl = kmem_zalloc(sizeof(npf_rule_t), KM_SLEEP);
-	TAILQ_INIT(&rl->r_subset);
 	rl->r_natp = NULL;
 
 	/* Name (optional) */
@@ -626,9 +663,17 @@ npf_rule_alloc(prop_dictionary_t rldict)
 
 	/* Attributes, priority and interface ID (optional). */
 	prop_dictionary_get_uint32(rldict, "attr", &rl->r_attr);
-	prop_dictionary_get_int32(rldict, "prio", &rl->r_priority);
 	rl->r_attr &= ~NPF_RULE_PRIVMASK;
 
+	if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
+		/* Priority of the dynamic rule. */
+		prop_dictionary_get_int32(rldict, "prio", &rl->r_priority);
+	} else {
+		/* The skip-to index.  No need to validate it. */
+		prop_dictionary_get_uint32(rldict, "skip-to", &rl->r_skip_to);
+	}
+
+	/* Interface name; register and get the npf-if-id. */
 	if (prop_dictionary_get_cstring_nocopy(rldict, "ifname", &rname)) {
 		if ((rl->r_ifid = npf_ifmap_register(rname)) == 0) {
 			kmem_free(rl, sizeof(npf_rule_t));
@@ -638,9 +683,6 @@ npf_rule_alloc(prop_dictionary_t rldict)
 		rl->r_ifid = 0;
 	}
 
-	/* Get the skip-to index.  No need to validate it. */
-	prop_dictionary_get_uint32(rldict, "skip-to", &rl->r_skip_to);
-
 	/* Key (optional). */
 	prop_object_t obj = prop_dictionary_get(rldict, "key");
 	const void *key = prop_data_data_nocopy(obj);
@@ -829,15 +871,16 @@ npf_rule_inspect(const npf_rule_t *rl, b
  * npf_rule_reinspect: re-inspect the dynamic rule by iterating its list.
  * This is only for the dynamic rules.  Subrules cannot have nested rules.
  */
-static npf_rule_t *
-npf_rule_reinspect(const npf_rule_t *drl, bpf_args_t *bc_args,
+static inline npf_rule_t *
+npf_rule_reinspect(const npf_rule_t *rg, bpf_args_t *bc_args,
     const int di_mask, const u_int ifid)
 {
 	npf_rule_t *final_rl = NULL, *rl;
 
-	KASSERT(NPF_DYNAMIC_GROUP_P(drl->r_attr));
+	KASSERT(NPF_DYNAMIC_GROUP_P(rg->r_attr));
 
-	TAILQ_FOREACH(rl, &drl->r_subset, r_entry) {
+	for (rl = rg->r_subset; rl; rl = rl->r_next) {
+		KASSERT(!final_rl || rl->r_priority >= final_rl->r_priority);
 		if (!npf_rule_inspect(rl, bc_args, di_mask, ifid)) {
 			continue;
 		}
@@ -882,7 +925,6 @@ npf_ruleset_inspect(npf_cache_t *npc, co
 		const uint32_t attr = rl->r_attr;
 
 		KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
-		KASSERT(!final_rl || rl->r_priority >= final_rl->r_priority);
 		KASSERT(n < skip_to);
 
 		/* Group is a barrier: return a matching if found any. */
@@ -948,7 +990,7 @@ npf_ruleset_dump(const char *name)
 
 	LIST_FOREACH(rg, &rlset->rs_dynamic, r_dentry) {
 		printf("ruleset '%s':\n", rg->r_name);
-		TAILQ_FOREACH(rl, &rg->r_subset, r_entry) {
+		for (rl = rg->r_subset; rl; rl = rl->r_next) {
 			printf("\tid %"PRIu64", key: ", rl->r_id);
 			for (u_int i = 0; i < NPF_RULE_MAXKEYLEN; i++)
 				printf("%x", rl->r_key[i]);

Reply via email to