On Mon, Feb 13, 2023 at 08:30:03AM +0100, Alexandr Nedvedicky wrote:
> Hello,
>
> This diff looks good to me. Though I still have some
> doubts about accuracy of comment here:
>
> </snip>
> > return (kn->kn_data > 0);
> > @@ -1510,6 +1599,15 @@ bpf_catchpacket(struct bpf_d *d, u_char
> > ++d->bd_dcount;
> > return;
> > }
> > +
> > + /*
> > + * there's a small chance bpf_wait_cb is running
> > + * concurrently with this and two wakeups will be
> > + * generated.
> > + */
> > + if (timeout_del(&d->bd_wait_tmo))
> > + bpf_put(d);
> > +
> > ROTATE_BUFFERS(d);
> > do_wakeup = 1;
> > curlen = 0;
> > @@ -1530,12 +1628,27 @@ bpf_catchpacket(struct bpf_d *d, u_char
>
> I'm still failing to spot the race the comment is talking
> about. code in bpf_wait_cb() grabs `d->bd_mtx` mutex, the
> same mutex which is held by bpf_catchpacket() caller.
oh yeah. i forgot to remove the comment.
> However my doubts about comment should not prevent you
> committing code which solves the issue and looks good.
>
> OK sashan
thank you. i'll put this in sometime in the next few days unless
someone objects.
Index: sys/net/bpf.c
===================================================================
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.220
diff -u -p -r1.220 bpf.c
--- sys/net/bpf.c 10 Feb 2023 14:34:17 -0000 1.220
+++ sys/net/bpf.c 8 Mar 2023 03:58:08 -0000
@@ -77,6 +77,10 @@
#define BPF_BUFSIZE 32768
+#define BPF_S_IDLE 0
+#define BPF_S_WAIT 1
+#define BPF_S_DONE 2
+
#define PRINET 26 /* interruptible */
/*
@@ -101,6 +105,7 @@ int bpf_setif(struct bpf_d *, struct ifr
int bpfkqfilter(dev_t, struct knote *);
void bpf_wakeup(struct bpf_d *);
void bpf_wakeup_cb(void *);
+void bpf_wait_cb(void *);
int _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int);
void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
const struct bpf_hdr *);
@@ -392,11 +397,13 @@ bpfopen(dev_t dev, int flag, int mode, s
bd->bd_sig = SIGIO;
mtx_init(&bd->bd_mtx, IPL_NET);
task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
+ timeout_set(&bd->bd_wait_tmo, bpf_wait_cb, bd);
smr_init(&bd->bd_smr);
sigio_init(&bd->bd_sigio);
klist_init_mutex(&bd->bd_klist, &bd->bd_mtx);
bd->bd_rtout = 0; /* no timeout by default */
+ bd->bd_wtout = INFSLP; /* wait for the buffer to fill by default */
refcnt_init(&bd->bd_refcnt);
LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
@@ -435,6 +442,7 @@ bpfclose(dev_t dev, int flag, int mode,
(d)->bd_hbuf = (d)->bd_sbuf; \
(d)->bd_hlen = (d)->bd_slen; \
(d)->bd_sbuf = (d)->bd_fbuf; \
+ (d)->bd_state = BPF_S_IDLE; \
(d)->bd_slen = 0; \
(d)->bd_fbuf = NULL;
@@ -492,7 +500,7 @@ bpfread(dev_t dev, struct uio *uio, int
ROTATE_BUFFERS(d);
break;
}
- if (d->bd_immediate && d->bd_slen != 0) {
+ if (d->bd_state == BPF_S_DONE) {
/*
* A packet(s) either arrived since the previous
* read or arrived while we were asleep.
@@ -611,6 +619,21 @@ bpf_wakeup_cb(void *xd)
bpf_put(d);
}
+void
+bpf_wait_cb(void *xd)
+{
+ struct bpf_d *d = xd;
+
+ mtx_enter(&d->bd_mtx);
+ if (d->bd_state == BPF_S_WAIT) {
+ d->bd_state = BPF_S_DONE;
+ bpf_wakeup(d);
+ }
+ mtx_leave(&d->bd_mtx);
+
+ bpf_put(d);
+}
+
int
bpfwrite(dev_t dev, struct uio *uio, int ioflag)
{
@@ -674,17 +697,64 @@ bpf_resetd(struct bpf_d *d)
MUTEX_ASSERT_LOCKED(&d->bd_mtx);
KASSERT(d->bd_in_uiomove == 0);
+ if (timeout_del(&d->bd_wait_tmo))
+ bpf_put(d);
+
if (d->bd_hbuf != NULL) {
/* Free the hold buffer. */
d->bd_fbuf = d->bd_hbuf;
d->bd_hbuf = NULL;
}
+ d->bd_state = BPF_S_IDLE;
d->bd_slen = 0;
d->bd_hlen = 0;
d->bd_rcount = 0;
d->bd_dcount = 0;
}
+static int
+bpf_set_wtout(struct bpf_d *d, uint64_t wtout)
+{
+ mtx_enter(&d->bd_mtx);
+ d->bd_wtout = wtout;
+ mtx_leave(&d->bd_mtx);
+
+ return (0);
+}
+
+static int
+bpf_set_wtimeout(struct bpf_d *d, const struct timeval *tv)
+{
+ uint64_t nsec;
+
+ if (tv->tv_sec < 0 || !timerisvalid(tv))
+ return (EINVAL);
+
+ nsec = TIMEVAL_TO_NSEC(tv);
+ if (nsec > MAXTSLP)
+ return (EOVERFLOW);
+
+ return (bpf_set_wtout(d, nsec));
+}
+
+static int
+bpf_get_wtimeout(struct bpf_d *d, struct timeval *tv)
+{
+ uint64_t nsec;
+
+ mtx_enter(&d->bd_mtx);
+ nsec = d->bd_wtout;
+ mtx_leave(&d->bd_mtx);
+
+ if (nsec == INFSLP)
+ return (ENXIO);
+
+ memset(tv, 0, sizeof(*tv));
+ NSEC_TO_TIMEVAL(nsec, tv);
+
+ return (0);
+}
+
/*
* FIONREAD Check for read packet available.
* BIOCGBLEN Get buffer len [for read()].
@@ -698,6 +768,9 @@ bpf_resetd(struct bpf_d *d)
* BIOCSETIF Set interface.
* BIOCSRTIMEOUT Set read timeout.
* BIOCGRTIMEOUT Get read timeout.
+ * BIOCSWTIMEOUT Set wait timeout.
+ * BIOCGWTIMEOUT Get wait timeout.
+ * BIOCDWTIMEOUT Del wait timeout.
* BIOCGSTATS Get packet stats.
* BIOCIMMEDIATE Set immediate mode.
* BIOCVERSION Get filter language version.
@@ -720,6 +793,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t
case BIOCGDLTLIST:
case BIOCGETIF:
case BIOCGRTIMEOUT:
+ case BIOCGWTIMEOUT:
case BIOCGSTATS:
case BIOCVERSION:
case BIOCGRSIG:
@@ -727,6 +801,8 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t
case FIONREAD:
case BIOCLOCK:
case BIOCSRTIMEOUT:
+ case BIOCSWTIMEOUT:
+ case BIOCDWTIMEOUT:
case BIOCIMMEDIATE:
case TIOCGPGRP:
case BIOCGDIRFILT:
@@ -933,7 +1009,20 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t
* Set immediate mode.
*/
case BIOCIMMEDIATE:
- d->bd_immediate = *(u_int *)addr;
+ error = bpf_set_wtout(d, *(int *)addr ? 0 : INFSLP);
+ break;
+
+ /*
+ * Wait timeout.
+ */
+ case BIOCSWTIMEOUT:
+ error = bpf_set_wtimeout(d, (const struct timeval *)addr);
+ break;
+ case BIOCGWTIMEOUT:
+ error = bpf_get_wtimeout(d, (struct timeval *)addr);
+ break;
+ case BIOCDWTIMEOUT:
+ error = bpf_set_wtout(d, INFSLP);
break;
case BIOCVERSION:
@@ -1193,7 +1282,7 @@ filt_bpfread(struct knote *kn, long hint
MUTEX_ASSERT_LOCKED(&d->bd_mtx);
kn->kn_data = d->bd_hlen;
- if (d->bd_immediate)
+ if (d->bd_wtout == 0)
kn->kn_data += d->bd_slen;
return (kn->kn_data > 0);
@@ -1510,6 +1599,11 @@ bpf_catchpacket(struct bpf_d *d, u_char
++d->bd_dcount;
return;
}
+
+ /* cancel pending wtime */
+ if (timeout_del(&d->bd_wait_tmo))
+ bpf_put(d);
+
ROTATE_BUFFERS(d);
do_wakeup = 1;
curlen = 0;
@@ -1530,12 +1624,27 @@ bpf_catchpacket(struct bpf_d *d, u_char
bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen);
d->bd_slen = curlen + totlen;
- if (d->bd_immediate) {
+ switch (d->bd_wtout) {
+ case 0:
/*
* Immediate mode is set. A packet arrived so any
* reads should be woken up.
*/
+ if (d->bd_state == BPF_S_IDLE)
+ d->bd_state = BPF_S_DONE;
do_wakeup = 1;
+ break;
+ case INFSLP:
+ break;
+ default:
+ if (d->bd_state == BPF_S_IDLE) {
+ d->bd_state = BPF_S_WAIT;
+
+ bpf_get(d);
+ if (!timeout_add_nsec(&d->bd_wait_tmo, d->bd_wtout))
+ bpf_put(d);
+ }
+ break;
}
if (do_wakeup)
Index: sys/net/bpf.h
===================================================================
RCS file: /cvs/src/sys/net/bpf.h,v
retrieving revision 1.70
diff -u -p -r1.70 bpf.h
--- sys/net/bpf.h 3 Aug 2020 03:21:24 -0000 1.70
+++ sys/net/bpf.h 8 Mar 2023 03:58:08 -0000
@@ -119,6 +119,9 @@ struct bpf_version {
#define BIOCGDLTLIST _IOWR('B',123, struct bpf_dltlist)
#define BIOCGDIRFILT _IOR('B',124, u_int)
#define BIOCSDIRFILT _IOW('B',125, u_int)
+#define BIOCSWTIMEOUT _IOW('B',126, struct timeval)
+#define BIOCGWTIMEOUT _IOR('B',126, struct timeval)
+#define BIOCDWTIMEOUT _IO('B',126)
/*
* Direction filters for BIOCSDIRFILT/BIOCGDIRFILT
Index: sys/net/bpfdesc.h
===================================================================
RCS file: /cvs/src/sys/net/bpfdesc.h,v
retrieving revision 1.47
diff -u -p -r1.47 bpfdesc.h
--- sys/net/bpfdesc.h 9 Jul 2022 12:48:21 -0000 1.47
+++ sys/net/bpfdesc.h 8 Mar 2023 03:58:08 -0000
@@ -79,6 +79,7 @@ struct bpf_d {
struct bpf_if *bd_bif; /* interface descriptor */
uint64_t bd_rtout; /* [m] Read timeout in nanoseconds */
+ uint64_t bd_wtout; /* [m] Wait time in nanoseconds */
u_long bd_nreaders; /* [m] # threads asleep in bpfread() */
struct bpf_program_smr
*bd_rfilter; /* read filter code */
@@ -88,8 +89,7 @@ struct bpf_d {
u_long bd_dcount; /* number of packets dropped */
u_char bd_promisc; /* true if listening promiscuously */
- u_char bd_state; /* idle, waiting, or timed out */
- u_char bd_immediate; /* true to return on packet arrival */
+ u_char bd_state; /* [m] idle, waiting, or timed out */
u_char bd_locked; /* true if descriptor is locked */
u_char bd_fildrop; /* true if filtered packets will be
dropped */
u_char bd_dirfilt; /* direction filter */
@@ -103,7 +103,8 @@ struct bpf_d {
int bd_unit; /* logical unit number */
LIST_ENTRY(bpf_d) bd_list; /* descriptor list */
- struct task bd_wake_task; /* delay pgsigio() and selwakeup() */
+ struct task bd_wake_task; /* defer pgsigio() and selwakeup() */
+ struct timeout bd_wait_tmo; /* delay wakeup after catching pkt */
struct smr_entry
bd_smr;
Index: sbin/pflogd/pflogd.c
===================================================================
RCS file: /cvs/src/sbin/pflogd/pflogd.c,v
retrieving revision 1.62
diff -u -p -r1.62 pflogd.c
--- sbin/pflogd/pflogd.c 25 Jul 2019 17:32:33 -0000 1.62
+++ sbin/pflogd/pflogd.c 8 Mar 2023 03:58:09 -0000
@@ -251,8 +251,8 @@ pflog_read_live(const char *source, int
struct timeval to;
to.tv_sec = to_ms / 1000;
to.tv_usec = (to_ms * 1000) % 1000000;
- if (ioctl(p->fd, BIOCSRTIMEOUT, &to) == -1) {
- snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSRTIMEOUT: %s",
+ if (ioctl(p->fd, BIOCSWTIMEOUT, &to) == -1) {
+ snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSWTIMEOUT: %s",
pcap_strerror(errno));
goto bad;
}
Index: share/man/man4/bpf.4
===================================================================
RCS file: /cvs/src/share/man/man4/bpf.4,v
retrieving revision 1.44
diff -u -p -r1.44 bpf.4
--- share/man/man4/bpf.4 11 Sep 2022 06:38:11 -0000 1.44
+++ share/man/man4/bpf.4 8 Mar 2023 03:58:09 -0000
@@ -220,6 +220,8 @@ The allowable ioctls are
.Dv BIOCIMMEDIATE ,
.Dv BIOCLOCK ,
.Dv BIOCSRTIMEOUT ,
+.Dv BIOCSWTIMEOUT ,
+.Dv BIOCDWTIMEOUT ,
.Dv BIOCVERSION ,
.Dv TIOCGPGRP ,
and
@@ -301,6 +303,18 @@ This is useful for programs like
.Xr rarpd 8 ,
which must respond to messages in real time.
The default for a new file is off.
+.Pp
+.It Dv BIOCSWTIMEOUT Fa "struct timeval *"
+.It Dv BIOCGWTIMEOUT Fa "struct timeval *"
+.It Dv BIOCDWTIMEOUT
+Sets, gets, or deletes (resets) the wait timeout parameter.
+The
+.Ar timeval
+specifies the length of time to wait between receiving a packet and
+the kernel buffer becoming readable.
+By default, or when reset, the wait timeout is infinite, meaning
+the age of packets in the kernel buffer does not make the buffer
+readable.
.Pp
.It Dv BIOCSETF Fa "struct bpf_program *"
Sets the filter program used by the kernel to discard uninteresting packets.