On Tue, May 03, 2022 at 09:31:51PM +0200, Alexander Bluhm wrote:
> On Tue, May 03, 2022 at 07:42:34PM +0200, Moritz Buhl wrote:
> > commit 4b3977248902c22d96aaebdb5784840debc2631c
> > Author: mikeb <[email protected]>
> > Date: Mon Nov 24 13:22:09 2008 +0000
> >
> > Fix splasserts seen in pr 5987 by propagating a flag that discribes
> > whether we're called from the interrupt context to the functions
> > performing allocations.
>
> These days pf was protected with kernel lock and spl. Both are
> released when sleeping. Now we have netlock and pflock. These are
> rwlocks and not released during sleep. So this old race should not
> exist anymore.
>
> > And we are not in interrupt context.
>
> Yes, it is ioctl(2). I think we should always malloc with M_WAITOK
> when in syscall. Otherwise userland would have to cope with randomly
> failing syscalls.
>
> > If this is sound, then the only reason why pfr_destroy_ktable was called
> > is that pool_get is called with PR_NOWAIT. And then the following diff
> > would help.
>
> The code is too complex to be sure what the reason of the syzkaller
> panic is. Sleep in malloc is correct anyway and may improve the
> situation.
>
> Functions with argument values 0 or 1 are hard to read. It would
> be much nicer to pass M_WAITOK or M_NOWAIT. And the variable name
> "intr" does not make sense anymore. pf does not run in interrupt
> context. Call it "mflags" like in pfi_kif_alloc(). Or "wait" like
> in other functions.
>
> Could you cleanup that also?
I have a diff, which touches the same area. It's a work in progress change.
It moves all memory allocations outside of NET_LOCK/PF_LOCK. I'm just
sending it for your reference now.
I'm not sure flipping a flag is a right change. In general we don't want
to hold NET_LOCK()/PF_LOCK() while waiting for memory.
regards
sashan
--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c
index 8315b115474..1f036e1368f 100644
--- a/sys/net/pf_ioctl.c
+++ b/sys/net/pf_ioctl.c
@@ -2217,12 +2217,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags,
struct proc *p)
error = ENODEV;
goto fail;
}
- NET_LOCK();
- PF_LOCK();
error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size,
&io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
- PF_UNLOCK();
- NET_UNLOCK();
break;
}
diff --git a/sys/net/pf_table.c b/sys/net/pf_table.c
index fb23bcabe04..cbaa728b105 100644
--- a/sys/net/pf_table.c
+++ b/sys/net/pf_table.c
@@ -189,6 +189,7 @@ void pfr_clstats_ktable(struct
pfr_ktable *, time_t, int);
struct pfr_ktable *pfr_create_ktable(struct pfr_table *, time_t, int,
int);
void pfr_destroy_ktables(struct pfr_ktableworkq *, int);
+void pfr_destroy_ktables_aux(struct pfr_ktableworkq *);
void pfr_destroy_ktable(struct pfr_ktable *, int);
int pfr_ktable_compare(struct pfr_ktable *,
struct pfr_ktable *);
@@ -1493,14 +1494,16 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int
flags)
int
pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
{
- struct pfr_ktableworkq addq, changeq;
- struct pfr_ktable *p, *q, *r, key;
+ struct pfr_ktableworkq addq, changeq, auxq;
+ struct pfr_ktable *p, *q, *r, *n, *w, key;
int i, rv, xadd = 0;
time_t tzero = gettime();
ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
SLIST_INIT(&addq);
SLIST_INIT(&changeq);
+ SLIST_INIT(&auxq);
+ /* pre-allocate all memory outside of locks */
for (i = 0; i < size; i++) {
YIELD(flags & PFR_FLAG_USERIOCTL);
if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
@@ -1509,65 +1512,140 @@ pfr_add_tables(struct pfr_table *tbl, int size, int
*nadd, int flags)
flags & PFR_FLAG_USERIOCTL))
senderr(EINVAL);
key.pfrkt_flags |= PFR_TFLAG_ACTIVE;
- p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
- if (p == NULL) {
- p = pfr_create_ktable(&key.pfrkt_t, tzero, 1,
- !(flags & PFR_FLAG_USERIOCTL));
- if (p == NULL)
- senderr(ENOMEM);
- SLIST_FOREACH(q, &addq, pfrkt_workq) {
- if (!pfr_ktable_compare(p, q)) {
- pfr_destroy_ktable(p, 0);
- goto _skip;
- }
- }
- SLIST_INSERT_HEAD(&addq, p, pfrkt_workq);
- xadd++;
- if (!key.pfrkt_anchor[0])
- goto _skip;
+ p = pfr_create_ktable(&key.pfrkt_t, tzero, 0,
+ !(flags & PFR_FLAG_USERIOCTL));
+ if (p == NULL)
+ senderr(ENOMEM);
- /* find or create root table */
- bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor));
- r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
- if (r != NULL) {
- p->pfrkt_root = r;
- goto _skip;
- }
- SLIST_FOREACH(q, &addq, pfrkt_workq) {
- if (!pfr_ktable_compare(&key, q)) {
- p->pfrkt_root = q;
- goto _skip;
- }
+ /*
+ * Note: we also pre-allocate a root table here. We keep it
+ * at ->pfrkt_root, which we must not forget about.
+ */
+ key.pfrkt_flags = 0;
+ memset(key.pfrkt_anchor, 0, sizeof(key.pfrkt_anchor));
+ p->pfrkt_root = pfr_create_ktable(&key.pfrkt_t, 0, 0,
+ !(flags & PFR_FLAG_USERIOCTL));
+ if (p->pfrkt_root == NULL) {
+ pfr_destroy_ktable(p, 0);
+ senderr(ENOMEM);
+ }
+
+ SLIST_FOREACH(q, &auxq, pfrkt_workq) {
+ if (!pfr_ktable_compare(p, q)) {
+ /*
+ * We need no lock here, because `p` is empty,
+ * there are no rules or shadow tables
+ * attached.
+ */
+ pfr_destroy_ktable(p->pfrkt_root, 0);
+ p->pfrkt_root = NULL;
+ pfr_destroy_ktable(p, 0);
+ break;
}
- key.pfrkt_flags = 0;
- r = pfr_create_ktable(&key.pfrkt_t, 0, 1,
- !(flags & PFR_FLAG_USERIOCTL));
- if (r == NULL)
- senderr(ENOMEM);
- SLIST_INSERT_HEAD(&addq, r, pfrkt_workq);
- p->pfrkt_root = r;
- } else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
- SLIST_FOREACH(q, &changeq, pfrkt_workq)
- if (!pfr_ktable_compare(&key, q))
- goto _skip;
+ }
+
+ SLIST_INSERT_HEAD(&auxq, p, pfrkt_workq);
+ }
+
+ /*
+ * auxq contains freshly allocated tables with no dups.
+ * also note there are no rulesets attached, because
+ * the attach operation requires PF_LOCK().
+ */
+ NET_LOCK();
+ PF_LOCK();
+ SLIST_FOREACH_SAFE(n, &auxq, pfrkt_workq, w) {
+ p = RB_FIND(pfr_ktablehead, &pfr_ktables, n);
+ if (p == NULL) {
+ SLIST_REMOVE(&auxq, n, pfr_ktable, pfrkt_workq);
+ SLIST_INSERT_HEAD(&addq, n, pfrkt_workq);
+ } else {
p->pfrkt_nflags = (p->pfrkt_flags &
~PFR_TFLAG_USRMASK) | key.pfrkt_flags;
SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq);
- xadd++;
}
-_skip:
- ;
+ xadd++;
+ }
+
+ /*
+ * addq contains tables we have to insert and attach rules to them
+ * changeq contains tables we need to update
+ * auxq contains pre-allocated tables, we won't use and we must free
+ * them
+ */
+ SLIST_FOREACH_SAFE(p, &addq, pfrkt_workq, w) {
+ p->pfrkt_rs = pf_find_or_create_ruleset(p->pfrkt_anchor);
+ if (p->pfrkt_rs == NULL) {
+ xadd--;
+ SLIST_REMOVE(&addq, p, pfr_ktable, pfrkt_workq);
+ SLIST_INSERT_HEAD(&auxq, p, pfrkt_workq);
+ continue;
+ }
+ p->pfrkt_rs->tables++;
+
+ if (!p->pfrkt_anchor[0]) {
+ q = p->pfrkt_root;
+ p->pfrkt_root = NULL;
+ SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq);
+ continue;
+ }
+
+ /* use pre-allocated root table as a key */
+ q = p->pfrkt_root;
+ p->pfrkt_root = NULL;
+ r = RB_FIND(pfr_ktablehead, &pfr_ktables, q);
+ if (r != NULL) {
+ p->pfrkt_root = r;
+ SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq);
+ continue;
+ }
+ /*
+ * there is a chance we could create root table in earlier
+ * iteration. such table may exist in addq only then.
+ */
+ SLIST_FOREACH(r, &addq, pfrkt_workq) {
+ if (!pfr_ktable_compare(r, q)) {
+ /*
+ * `r` is our root table we've found earlier,
+ * `q` can get dropped.
+ */
+ p->pfrkt_root = r;
+ SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq);
+ continue;
+ }
+ }
+ q->pfrkt_rs = pf_find_or_create_ruleset(
+ q->pfrkt_root->pfrkt_anchor);
+ /*
+ * root tables are attached to main ruleset,
+ * because ->pfrkt_anchor[0] == '\0'
+ */
+ KASSERT(q->pfrkt_rs == &pf_main_ruleset);
+ q->pfrkt_rs->tables++;
+ p->pfrkt_root = q;
+ SLIST_INSERT_HEAD(&addq, q, pfrkt_workq);
}
if (!(flags & PFR_FLAG_DUMMY)) {
pfr_insert_ktables(&addq);
pfr_setflags_ktables(&changeq);
} else
- pfr_destroy_ktables(&addq, 0);
+ pfr_destroy_ktables(&addq, 0);
+
+
+ /*
+ * all pfr_destroy_ktables() must be kept under the lock,
+ * because of pf_remove_if_empty_ruleset()
+ */
+ PF_UNLOCK();
+ NET_UNLOCK();
+
+ pfr_destroy_ktables_aux(&auxq);
+
if (nadd != NULL)
*nadd = xadd;
return (0);
_bad:
- pfr_destroy_ktables(&addq, 0);
+ pfr_destroy_ktables_aux(&auxq);
return (rv);
}
@@ -2214,6 +2292,7 @@ pfr_create_ktable(struct pfr_table *tbl, time_t tzero,
int attachruleset,
kt->pfrkt_t = *tbl;
if (attachruleset) {
+ PF_ASSERT_LOCKED();
rs = pf_find_or_create_ruleset(tbl->pfrt_anchor);
if (!rs) {
pfr_destroy_ktable(kt, 0);
@@ -2249,6 +2328,31 @@ pfr_destroy_ktables(struct pfr_ktableworkq *workq, int
flushaddr)
}
}
+void
+pfr_destroy_ktables_aux(struct pfr_ktableworkq *auxq)
+{
+ struct pfr_ktable *p;
+
+ while ((p = SLIST_FIRST(auxq)) != NULL) {
+ SLIST_REMOVE_HEAD(auxq, pfrkt_workq);
+ /*
+ * There must be no extra data (rules, shadow tables, ...)
+ * attached, because auxq holds just empty memory to be
+ * initialized. Therefore we can also be called with no lock.
+ */
+ if (p->pfrkt_root != NULL) {
+ KASSERT(p->pfrkt_root->pfrkt_rs == NULL);
+ KASSERT(p->pfrkt_root->pfrkt_shadow == NULL);
+ KASSERT(p->pfrkt_root->pfrkt_root == NULL);
+ pfr_destroy_ktable(p->pfrkt_root, 0);
+ p->pfrkt_root = NULL;
+ }
+ KASSERT(p->pfrkt_rs == NULL);
+ KASSERT(p->pfrkt_shadow == NULL);
+ pfr_destroy_ktable(p, 0);
+ }
+}
+
void
pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
{