Hello Stefan, On Wed, Aug 10, 2022 at 02:38:16PM +0000, Stefan Butz wrote: > Hi everyone, > > this mail includes a patch to store pf rules in a red-black tree. > Currently they are stored in a linked list. > My system configured with 16000 rules takes about 10 minutes > to print them out using `pfctl -sr`. > This patch decreases the time to 4 seconds. > I was not able to measure a time increase for rule insertion. > Inserting a lot of labels is still very slow. > This has to be attacked separatly. > >
below is a diff which should make DIOCGETRULE fast enough. It introduces a transaction (struct pf_trans). it comes from my 'work-in-progress' tree. I'd like to introduce transactions to move all memory allocations done on behalf of ioctl(2) in pf(4) outside of PF_LOCK() scope. Seeing your diff I've realized we can use pf_trans also to speed up DIOCGETRULE. diff below is based on my work-in-progress tree/branch. There might be some rough edges, but it compiles and seems to work. if people will like the idea I'll polish the change and commit it. It will also help me to make upcoming diffs smaller. At this point I'd like to know if idea presented in diff below makes kind of sense. there is `struct pf_trans` which keeps context for operations which involve multiple ioctl(2) calls (such as ADDRULE, GETRULE,...) ruleset.ticket is renamed to ruleset.version because ticket is know kept in newly introduced pf_trans. the workflow for process which wants to read/modify ruleset. In case of DIOCGETRULES/DIOCGETRULE operations the workflow looks as follows: transaction (pf_trans) is created by DIOCGETRULES transaction gets reference to anchor/ruleset we also remember current version of ruleset in transaction we also rmemeber a pointer to the first rule in ruleset/anchor we return transaction ticket to pfctl(8) process then pfctl(8) issues DIOCGETRULE, it passes transaction ticket to ioctl(2) we use ticket to look up a transaction created by DIOCGETRULES we check if version kept in transaction matches the version at ruleset. If there is a mismatch ruleset got modified and we bail out. if rule stored in transaction is NULL we return ENOENT to indicate ruleset is either empty or there are no more rules if version matches then we may safely proceed, the rule pointer must be valid because matching version number warrants nothing was modified. we copy rule to buffer and we store a next rule in transaction. all transactions created by process are currently destroyed in pfclose() thanks for thoughts/feedback. regards sashan --------8<---------------8<---------------8<------------------8<-------- diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c index 9f14b955ca7..cf9def71c02 100644 --- a/sbin/pfctl/pfctl.c +++ b/sbin/pfctl/pfctl.c @@ -833,7 +833,7 @@ pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format, char *anchorname, int depth, int wildcard, long shownr) { struct pfioc_rule pr; - u_int32_t nr, mnr, header = 0; + u_int32_t header = 0; int len = strlen(path), ret = 0; char *npath, *p; @@ -884,24 +884,9 @@ pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format, goto error; } - if (shownr < 0) { - mnr = pr.nr; - nr = 0; - } else if (shownr < pr.nr) { - nr = shownr; - mnr = shownr + 1; - } else { - warnx("rule %ld not found", shownr); - ret = -1; - goto error; - } - for (; nr < mnr; ++nr) { - pr.nr = nr; - if (ioctl(dev, DIOCGETRULE, &pr) == -1) { - warn("DIOCGETRULE"); - ret = -1; - goto error; - } + while (ioctl(dev, DIOCGETRULE, &pr) != -1) { + if ((shownr != -1) && (shownr != pr.nr)) + continue; /* anchor is the same for all rules in it */ if (pr.rule.anchor_wildcard == 0) @@ -956,6 +941,13 @@ pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format, case PFCTL_SHOW_NOTHING: break; } + errno = 0; + } + + if ((errno != 0) && (errno != ENOENT)) { + warn("DIOCGETRULE"); + ret = -1; + goto error; } /* diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c index b540afc6fe2..6ef89d162b3 100644 --- a/sys/net/pf_ioctl.c +++ b/sys/net/pf_ioctl.c @@ -116,6 +116,11 @@ void pf_qid_unref(u_int16_t); int pf_states_clr(struct pfioc_state_kill *); int pf_states_get(struct pfioc_states *); +struct pf_trans *pf_open_trans(pid_t); +struct pf_trans *pf_find_trans(u_int32_t); +void pf_free_trans(struct pf_trans *); +void pf_rollback_trans(struct pf_trans *); + struct pf_rule pf_default_rule, pf_default_rule_new; struct { @@ -165,6 +170,8 @@ int pf_rtlabel_add(struct pf_addr_wrap *); void pf_rtlabel_remove(struct pf_addr_wrap *); void pf_rtlabel_copyout(struct pf_addr_wrap *); +u_int32_t trans_ticket = 1; +LIST_HEAD(trans, pf_trans) pf_ioctl_trans; void pfattach(int num) @@ -273,8 +280,27 @@ pfopen(dev_t dev, int flags, int fmt, struct proc *p) int pfclose(dev_t dev, int flags, int fmt, struct proc *p) { + struct pf_trans *w, *s; + LIST_HEAD(trans, pf_trans) tmp_list; + if (minor(dev) >= 1) return (ENXIO); + + LIST_INIT(&tmp_list); + rw_enter_write(&pfioctl_rw); + LIST_FOREACH_SAFE(w, &pf_ioctl_trans, entry, s) { + if (w->pid == p->p_p->ps_pid) { + LIST_REMOVE(w, entry); + LIST_INSERT_HEAD(&tmp_list, w, entry); + } + } + rw_exit_write(&pfioctl_rw); + + while ((w = LIST_FIRST(&tmp_list)) != NULL) { + LIST_REMOVE(w, entry); + pf_free_trans(w); + } + return (0); } @@ -359,7 +385,7 @@ pf_purge_rule(struct pf_rule *rule) ruleset->rules.active.rcount--; TAILQ_FOREACH(rule, ruleset->rules.active.ptr, entries) rule->nr = nr++; - ruleset->rules.active.ticket++; + ruleset->rules.active.version++; pf_calc_skip_steps(ruleset->rules.active.ptr); pf_remove_if_empty_ruleset(ruleset); @@ -525,31 +551,32 @@ pf_qid_unref(u_int16_t qid) } int -pf_begin_rules(u_int32_t *ticket, const char *anchor) +pf_begin_rules(u_int32_t *version, const char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; + if ((rs = pf_find_or_create_ruleset(anchor)) == NULL) return (EINVAL); while ((rule = TAILQ_FIRST(rs->rules.inactive.ptr)) != NULL) { pf_rm_rule(rs->rules.inactive.ptr, rule); rs->rules.inactive.rcount--; } - *ticket = ++rs->rules.inactive.ticket; + *version = ++rs->rules.inactive.version; rs->rules.inactive.open = 1; return (0); } void -pf_rollback_rules(u_int32_t ticket, char *anchor) +pf_rollback_rules(u_int32_t version, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; rs = pf_find_ruleset(anchor); if (rs == NULL || !rs->rules.inactive.open || - rs->rules.inactive.ticket != ticket) + rs->rules.inactive.version != version) return; while ((rule = TAILQ_FIRST(rs->rules.inactive.ptr)) != NULL) { pf_rm_rule(rs->rules.inactive.ptr, rule); @@ -828,7 +855,7 @@ pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule) } int -pf_commit_rules(u_int32_t ticket, char *anchor) +pf_commit_rules(u_int32_t version, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; @@ -840,7 +867,7 @@ pf_commit_rules(u_int32_t ticket, char *anchor) rs = pf_find_ruleset(anchor); if (rs == NULL || !rs->rules.inactive.open || - ticket != rs->rules.inactive.ticket) + version != rs->rules.inactive.version) return (EBUSY); if (rs == &pf_main_ruleset) @@ -855,7 +882,7 @@ pf_commit_rules(u_int32_t ticket, char *anchor) rs->rules.inactive.ptr = old_rules; rs->rules.inactive.rcount = old_rcount; - rs->rules.active.ticket = rs->rules.inactive.ticket; + rs->rules.active.version = rs->rules.inactive.version; pf_calc_skip_steps(rs->rules.active.ptr); @@ -1148,10 +1175,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) return (EACCES); } - if (flags & FWRITE) - rw_enter_write(&pfioctl_rw); - else - rw_enter_read(&pfioctl_rw); + rw_enter_write(&pfioctl_rw); switch (cmd) { @@ -1197,7 +1221,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) NET_LOCK(); PF_LOCK(); - pq->ticket = pf_main_ruleset.rules.active.ticket; + pq->ticket = pf_main_ruleset.rules.active.version; /* save state to not run over them all each time? */ qs = TAILQ_FIRST(pf_queues_active); @@ -1218,7 +1242,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) NET_LOCK(); PF_LOCK(); - if (pq->ticket != pf_main_ruleset.rules.active.ticket) { + if (pq->ticket != pf_main_ruleset.rules.active.version) { error = EBUSY; PF_UNLOCK(); NET_UNLOCK(); @@ -1249,7 +1273,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) NET_LOCK(); PF_LOCK(); - if (pq->ticket != pf_main_ruleset.rules.active.ticket) { + if (pq->ticket != pf_main_ruleset.rules.active.version) { error = EBUSY; PF_UNLOCK(); NET_UNLOCK(); @@ -1296,7 +1320,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) NET_LOCK(); PF_LOCK(); - if (q->ticket != pf_main_ruleset.rules.inactive.ticket) { + if (q->ticket != pf_main_ruleset.rules.inactive.version) { error = EBUSY; PF_UNLOCK(); NET_UNLOCK(); @@ -1392,7 +1416,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pf_rule_free(rule); goto fail; } - if (pr->ticket != ruleset->rules.inactive.ticket) { + if (pr->ticket != ruleset->rules.inactive.version) { error = EBUSY; PF_UNLOCK(); NET_UNLOCK(); @@ -1454,7 +1478,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCGETRULES: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; - struct pf_rule *tail; + struct pf_rule *rule; + struct pf_trans *t; + u_int32_t ruleset_version; NET_LOCK(); PF_LOCK(); @@ -1466,14 +1492,25 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) NET_UNLOCK(); goto fail; } - tail = TAILQ_LAST(ruleset->rules.active.ptr, pf_rulequeue); - if (tail) - pr->nr = tail->nr + 1; + rule = TAILQ_LAST(ruleset->rules.active.ptr, pf_rulequeue); + if (rule) + pr->nr = rule->nr + 1; else pr->nr = 0; - pr->ticket = ruleset->rules.active.ticket; + ruleset_version = ruleset->rules.active.version; + pf_anchor_take(ruleset->anchor); + rule = TAILQ_FIRST(ruleset->rules.active.ptr); PF_UNLOCK(); NET_UNLOCK(); + + t = pf_open_trans(p->p_p->ps_pid); + t->getrule.anchor = (ruleset == &pf_main_ruleset) ? + &pf_main_anchor : ruleset->anchor; + t->getrule.version = ruleset_version; + t->getrule.rule = rule; + pr->ticket = t->ticket; + DPFPRINTF(LOG_DEBUG, "DIOCGETRULES: tversion: %u vs. rversion: %u\n", t->getrule.version, ruleset_version); + break; } @@ -1481,29 +1518,27 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *rule; + struct pf_trans *t; int i; + t = pf_find_trans(pr->ticket); + if (t == NULL) + return (EINVAL); + KASSERT(t->pid == p->p_p->ps_pid); + NET_LOCK(); PF_LOCK(); - pr->anchor[sizeof(pr->anchor) - 1] = '\0'; - ruleset = pf_find_ruleset(pr->anchor); - if (ruleset == NULL) { - error = EINVAL; - PF_UNLOCK(); - NET_UNLOCK(); - goto fail; - } - if (pr->ticket != ruleset->rules.active.ticket) { + KASSERT(t->getrule.anchor != NULL); + ruleset = &t->getrule.anchor->ruleset; + if (t->getrule.version != ruleset->rules.active.version) { error = EBUSY; PF_UNLOCK(); NET_UNLOCK(); goto fail; } - rule = TAILQ_FIRST(ruleset->rules.active.ptr); - while ((rule != NULL) && (rule->nr != pr->nr)) - rule = TAILQ_NEXT(rule, entries); + rule = t->getrule.rule; if (rule == NULL) { - error = EBUSY; + error = ENOENT; PF_UNLOCK(); NET_UNLOCK(); goto fail; @@ -1544,6 +1579,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rule->bytes[0] = rule->bytes[1] = 0; rule->states_tot = 0; } + pr->nr = rule->nr; + t->getrule.rule = TAILQ_NEXT(rule, entries); PF_UNLOCK(); NET_UNLOCK(); break; @@ -1569,7 +1606,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (ruleset == NULL) error = EINVAL; else - pcr->ticket = ++ruleset->rules.active.ticket; + pcr->ticket = ++ruleset->rules.active.version; PF_UNLOCK(); NET_UNLOCK(); @@ -1619,7 +1656,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) goto fail; } - if (pcr->ticket != ruleset->rules.active.ticket) { + if (pcr->ticket != ruleset->rules.active.version) { error = EINVAL; PF_UNLOCK(); NET_UNLOCK(); @@ -1717,7 +1754,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) TAILQ_FOREACH(oldrule, ruleset->rules.active.ptr, entries) oldrule->nr = nr++; - ruleset->rules.active.ticket++; + ruleset->rules.active.version++; pf_calc_skip_steps(ruleset->rules.active.ptr); pf_remove_if_empty_ruleset(ruleset); @@ -2655,7 +2692,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rs = pf_find_ruleset(ioe->anchor); if (rs == NULL || !rs->rules.inactive.open || - rs->rules.inactive.ticket != + rs->rules.inactive.version != ioe->ticket) { PF_UNLOCK(); NET_UNLOCK(); @@ -3031,10 +3068,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } fail: - if (flags & FWRITE) - rw_exit_write(&pfioctl_rw); - else - rw_exit_read(&pfioctl_rw); + rw_exit_write(&pfioctl_rw); return (error); } @@ -3253,3 +3287,52 @@ pf_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) return sysctl_rdstruct(oldp, oldlenp, newp, &pfs, sizeof(pfs)); } + +struct pf_trans * +pf_open_trans(pid_t pid) +{ + struct pf_trans *t; + + rw_assert_wrlock(&pfioctl_rw); + + t = malloc(sizeof(*t), M_TEMP, M_WAITOK|M_ZERO); + t->pid = pid; + t->ticket = trans_ticket++; + LIST_INSERT_HEAD(&pf_ioctl_trans, t, entry); + + return (t); +} + +struct pf_trans * +pf_find_trans(u_int32_t ticket) +{ + struct pf_trans *t; + + rw_assert_anylock(&pfioctl_rw); + + LIST_FOREACH(t, &pf_ioctl_trans, entry) { + if (t->ticket == ticket) + break; + } + + return (t); +} + +void +pf_free_trans(struct pf_trans *t) +{ + /* + * remove anchors, and main ruleset + */ + pf_anchor_rele(t->getrule.anchor); + free(t, M_TEMP, sizeof(*t)); +} + +void +pf_rollback_trans(struct pf_trans *t) +{ + if (t != NULL) { + LIST_REMOVE(t, entry); + pf_free_trans(t); + } +} diff --git a/sys/net/pf_ruleset.c b/sys/net/pf_ruleset.c index 6131895751e..9d17d71e124 100644 --- a/sys/net/pf_ruleset.c +++ b/sys/net/pf_ruleset.c @@ -233,6 +233,9 @@ pf_create_anchor(struct pf_anchor *parent, const char *aname) pf_init_ruleset(&anchor->ruleset); anchor->ruleset.anchor = anchor; +#ifdef _KERNEL + refcnt_init(&anchor->ref); +#endif return (anchor); } @@ -308,7 +311,7 @@ pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) if ((parent = ruleset->anchor->parent) != NULL) RB_REMOVE(pf_anchor_node, &parent->children, ruleset->anchor); - rs_pool_put_anchor(ruleset->anchor); + pf_anchor_rele(ruleset->anchor); if (parent == NULL) return; ruleset = &parent->ruleset; @@ -431,3 +434,27 @@ pf_remove_anchor(struct pf_rule *r) pf_remove_if_empty_ruleset(&r->anchor->ruleset); r->anchor = NULL; } + +void +pf_anchor_rele(struct pf_anchor *anchor) +{ + if ((anchor == NULL) || (anchor == &pf_main_anchor)) + return; + +#ifdef _KERNEL + if (refcnt_rele(&anchor->ref)) + rs_pool_put_anchor(anchor); +#else + rs_pool_put_anchor(anchor); +#endif +} + +struct pf_anchor * +pf_anchor_take(struct pf_anchor *anchor) +{ +#ifdef _KERNEL + if ((anchor != NULL) && (anchor != &pf_main_anchor)) + refcnt_take(&anchor->ref); +#endif + return (anchor); +} diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 6f88ff687ed..42d664e0133 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -928,7 +928,7 @@ struct pf_ruleset { struct { struct pf_rulequeue *ptr; u_int32_t rcount; - u_int32_t ticket; + u_int32_t version; int open; } active, inactive; } rules; @@ -950,6 +950,7 @@ struct pf_anchor { struct pf_ruleset ruleset; int refcnt; /* anchor rules */ int match; + struct refcnt ref; /* memory references */ }; RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare) RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare) @@ -1929,6 +1930,8 @@ struct pf_ruleset *pf_get_leaf_ruleset(char *, char **); struct pf_anchor *pf_create_anchor(struct pf_anchor *, const char *); struct pf_ruleset *pf_find_or_create_ruleset(const char *); void pf_rs_initialize(void); +void pf_anchor_rele(struct pf_anchor *); +struct pf_anchor *pf_anchor_take(struct pf_anchor *); /* The fingerprint functions can be linked into userland programs (tcpdump) */ int pf_osfp_add(struct pf_osfp_ioctl *); diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h index ec8f61a800d..37ef8a28631 100644 --- a/sys/net/pfvar_priv.h +++ b/sys/net/pfvar_priv.h @@ -39,6 +39,8 @@ #include <sys/rwlock.h> #include <sys/mutex.h> +#include <sys/queue.h> +#include <net/pfvar.h> /* * @@ -209,6 +211,17 @@ struct pf_pdesc { } hdr; }; +struct pf_trans { + pid_t pid; /* process id */ + u_int32_t ticket; + LIST_ENTRY(pf_trans) entry; + struct { + u_int32_t version; + struct pf_anchor *anchor; + struct pf_rule *rule; + } getrule; +}; + extern struct task pf_purge_task; extern struct timeout pf_purge_to;