Hello,
</snip>
On Wed, Apr 26, 2023 at 11:37:58AM +1000, David Gwynne wrote:
> > fail:
> > - if (flags & FWRITE)
> > - rw_exit_write(&pfioctl_rw);
> > - else
> > - rw_exit_read(&pfioctl_rw);
> > + rw_exit_write(&pfioctl_rw);
>
> i dont think having the open mode flags affect whether you take a shared
> or exclusive lock makes sense anyway, so im happy with these bits.
while updating my diff I've noticed pfclose() needs
same change: dropping 'flags & FWRITE' test. This is
fixed i new diff below.
</snip>
>
> int unit = minor(dev);
>
> if (unit & ((1 << CLONE_SHIFT) - 1))
> return (ENXIO);
>
> this has some ties into the second issue, which is that we shouldn't
> use the pid as an identifier for state across syscalls like this
> in the kernel. the reason is that userland could be weird or buggy
> (or hostile) and fork in between creating a transaction and ending
> a transaction, but it will still have the fd it from from open(/dev/pf).
> instead, we should be using the whole dev_t or the minor number,
> as long as it includes the bits that the cloning infrastructure
> uses, as the identifier.
in new diff I'm using a minor(dev) to identify transaction owner.
>
> i would also check that the process^Wminor number that created a
> transaction is the same when looking up a transaction in pf_find_trans.
the pf_find_trans() is a dead code in diff below as it is
not being used there. Just to make sure I got things right
so I can update '3/3' diff in stack accordingly. Let's assume
snippet below comes from pfioctl() function
int
pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) {
...
t = pf_find_trans(ticket);
if (t == NULL)
return (ENXIO);
KASSERT(t->pft_unit == minor(dev));
is that kind of check in KASSET() something you have on your mind?
perhaps I can trade KASSERT() to regular code:
if (t->pft_unit != minor(dev))
return (EPERM);
thank you for pointers to bpf.c updated diff is below.
regards
sashan
--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c
index 1141069dcf6..b9904c545c5 100644
--- a/sys/net/pf_ioctl.c
+++ b/sys/net/pf_ioctl.c
@@ -117,6 +117,11 @@ void pf_qid_unref(u_int16_t);
int pf_states_clr(struct pfioc_state_kill *);
int pf_states_get(struct pfioc_states *);
+struct pf_trans *pf_open_trans(uint32_t);
+struct pf_trans *pf_find_trans(uint64_t);
+void pf_free_trans(struct pf_trans *);
+void pf_rollback_trans(struct pf_trans *);
+
struct pf_rule pf_default_rule, pf_default_rule_new;
struct {
@@ -168,6 +173,8 @@ int pf_rtlabel_add(struct pf_addr_wrap *);
void pf_rtlabel_remove(struct pf_addr_wrap *);
void pf_rtlabel_copyout(struct pf_addr_wrap *);
+uint64_t trans_ticket = 1;
+LIST_HEAD(, pf_trans) pf_ioctl_trans = LIST_HEAD_INITIALIZER(pf_trans);
void
pfattach(int num)
@@ -293,6 +300,28 @@ pfopen(dev_t dev, int flags, int fmt, struct proc *p)
int
pfclose(dev_t dev, int flags, int fmt, struct proc *p)
{
+ struct pf_trans *w, *s;
+ LIST_HEAD(, pf_trans) tmp_list;
+ uint32_t unit = minor(dev);
+
+ if (minor(dev) >= 1)
+ return (ENXIO);
+
+ LIST_INIT(&tmp_list);
+ rw_enter_write(&pfioctl_rw);
+ LIST_FOREACH_SAFE(w, &pf_ioctl_trans, pft_entry, s) {
+ if (w->pft_unit == unit) {
+ LIST_REMOVE(w, pft_entry);
+ LIST_INSERT_HEAD(&tmp_list, w, pft_entry);
+ }
+ }
+ rw_exit_write(&pfioctl_rw);
+
+ while ((w = LIST_FIRST(&tmp_list)) != NULL) {
+ LIST_REMOVE(w, pft_entry);
+ pf_free_trans(w);
+ }
+
return (0);
}
@@ -1142,10 +1171,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags,
struct proc *p)
return (EACCES);
}
- if (flags & FWRITE)
- rw_enter_write(&pfioctl_rw);
- else
- rw_enter_read(&pfioctl_rw);
+ rw_enter_write(&pfioctl_rw);
switch (cmd) {
@@ -3022,10 +3048,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags,
struct proc *p)
break;
}
fail:
- if (flags & FWRITE)
- rw_exit_write(&pfioctl_rw);
- else
- rw_exit_read(&pfioctl_rw);
+ rw_exit_write(&pfioctl_rw);
return (error);
}
@@ -3244,3 +3267,52 @@ pf_sysctl(void *oldp, size_t *oldlenp, void *newp,
size_t newlen)
return sysctl_rdstruct(oldp, oldlenp, newp, &pfs, sizeof(pfs));
}
+
+struct pf_trans *
+pf_open_trans(uint32_t unit)
+{
+ static uint64_t ticket = 1;
+ struct pf_trans *t;
+
+ rw_assert_wrlock(&pfioctl_rw);
+
+ t = malloc(sizeof(*t), M_TEMP, M_WAITOK);
+ memset(t, 0, sizeof(struct pf_trans));
+ t->pft_unit = unit;
+ t->pft_ticket = ticket++;
+
+ LIST_INSERT_HEAD(&pf_ioctl_trans, t, pft_entry);
+
+ return (t);
+}
+
+struct pf_trans *
+pf_find_trans(uint64_t ticket)
+{
+ struct pf_trans *t;
+
+ rw_assert_anylock(&pfioctl_rw);
+
+ LIST_FOREACH(t, &pf_ioctl_trans, pft_entry) {
+ if (t->pft_ticket == ticket)
+ break;
+ }
+
+ return (t);
+}
+
+void
+pf_free_trans(struct pf_trans *t)
+{
+ free(t, M_TEMP, sizeof(*t));
+}
+
+void
+pf_rollback_trans(struct pf_trans *t)
+{
+ if (t != NULL) {
+ rw_assert_wrlock(&pfioctl_rw);
+ LIST_REMOVE(t, pft_entry);
+ pf_free_trans(t);
+ }
+}
diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h
index 38fff6473aa..5af2027733a 100644
--- a/sys/net/pfvar_priv.h
+++ b/sys/net/pfvar_priv.h
@@ -322,6 +322,17 @@ enum {
extern struct cpumem *pf_anchor_stack;
+enum pf_trans_type {
+ PF_TRANS_NONE,
+ PF_TRANS_MAX
+};
+
+struct pf_trans {
+ LIST_ENTRY(pf_trans) pft_entry;
+ uint32_t pft_unit; /* process id */
+ uint64_t pft_ticket;
+ enum pf_trans_type pft_type;
+};
extern struct task pf_purge_task;
extern struct timeout pf_purge_to;