On Mon, Feb 13, 2023 at 08:30:03AM +0100, Alexandr Nedvedicky wrote:
> Hello,
> 
> This diff looks good to me. Though I still have some
> doubts about accuracy of comment here:
> 
> </snip>
> >     return (kn->kn_data > 0);
> > @@ -1510,6 +1599,15 @@ bpf_catchpacket(struct bpf_d *d, u_char 
> >                     ++d->bd_dcount;
> >                     return;
> >             }
> > +
> > +           /*
> > +            * there's a small chance bpf_wait_cb is running
> > +            * concurrently with this and two wakeups will be
> > +            * generated.
> > +            */
> > +           if (timeout_del(&d->bd_wait_tmo))
> > +                   bpf_put(d);
> > +
> >             ROTATE_BUFFERS(d);
> >             do_wakeup = 1;
> >             curlen = 0;
> > @@ -1530,12 +1628,27 @@ bpf_catchpacket(struct bpf_d *d, u_char 
> 
>     I'm still failing to spot the race the comment is talking
>     about. code in bpf_wait_cb() grabs `d->bd_mtx` mutex, the
>     same mutex which is held by bpf_catchpacket() caller.

oh yeah. i forgot to remove the comment.

>     However my doubts about comment should not prevent you 
>     committing code which solves the issue and looks good.
> 
> OK sashan

thank you. i'll put this in sometime in the next few days unless
someone objects.

Index: sys/net/bpf.c
===================================================================
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.220
diff -u -p -r1.220 bpf.c
--- sys/net/bpf.c       10 Feb 2023 14:34:17 -0000      1.220
+++ sys/net/bpf.c       8 Mar 2023 03:58:08 -0000
@@ -77,6 +77,10 @@
 
 #define BPF_BUFSIZE 32768
 
+#define BPF_S_IDLE     0
+#define BPF_S_WAIT     1
+#define BPF_S_DONE     2
+
 #define PRINET  26                     /* interruptible */
 
 /*
@@ -101,6 +105,7 @@ int bpf_setif(struct bpf_d *, struct ifr
 int    bpfkqfilter(dev_t, struct knote *);
 void   bpf_wakeup(struct bpf_d *);
 void   bpf_wakeup_cb(void *);
+void   bpf_wait_cb(void *);
 int    _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int);
 void   bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
            const struct bpf_hdr *);
@@ -392,11 +397,13 @@ bpfopen(dev_t dev, int flag, int mode, s
        bd->bd_sig = SIGIO;
        mtx_init(&bd->bd_mtx, IPL_NET);
        task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
+       timeout_set(&bd->bd_wait_tmo, bpf_wait_cb, bd);
        smr_init(&bd->bd_smr);
        sigio_init(&bd->bd_sigio);
        klist_init_mutex(&bd->bd_klist, &bd->bd_mtx);
 
        bd->bd_rtout = 0;       /* no timeout by default */
+       bd->bd_wtout = INFSLP;  /* wait for the buffer to fill by default */
 
        refcnt_init(&bd->bd_refcnt);
        LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
@@ -435,6 +442,7 @@ bpfclose(dev_t dev, int flag, int mode, 
        (d)->bd_hbuf = (d)->bd_sbuf; \
        (d)->bd_hlen = (d)->bd_slen; \
        (d)->bd_sbuf = (d)->bd_fbuf; \
+       (d)->bd_state = BPF_S_IDLE; \
        (d)->bd_slen = 0; \
        (d)->bd_fbuf = NULL;
 
@@ -492,7 +500,7 @@ bpfread(dev_t dev, struct uio *uio, int 
                        ROTATE_BUFFERS(d);
                        break;
                }
-               if (d->bd_immediate && d->bd_slen != 0) {
+               if (d->bd_state == BPF_S_DONE) {
                        /*
                         * A packet(s) either arrived since the previous
                         * read or arrived while we were asleep.
@@ -611,6 +619,21 @@ bpf_wakeup_cb(void *xd)
        bpf_put(d);
 }
 
+void
+bpf_wait_cb(void *xd)
+{
+       struct bpf_d *d = xd;
+
+       mtx_enter(&d->bd_mtx);
+       if (d->bd_state == BPF_S_WAIT) {
+               d->bd_state = BPF_S_DONE;
+               bpf_wakeup(d);
+       }
+       mtx_leave(&d->bd_mtx);
+
+       bpf_put(d);
+}
+
 int
 bpfwrite(dev_t dev, struct uio *uio, int ioflag)
 {
@@ -674,17 +697,64 @@ bpf_resetd(struct bpf_d *d)
        MUTEX_ASSERT_LOCKED(&d->bd_mtx);
        KASSERT(d->bd_in_uiomove == 0);
 
+       if (timeout_del(&d->bd_wait_tmo))
+               bpf_put(d);
+
        if (d->bd_hbuf != NULL) {
                /* Free the hold buffer. */
                d->bd_fbuf = d->bd_hbuf;
                d->bd_hbuf = NULL;
        }
+       d->bd_state = BPF_S_IDLE;
        d->bd_slen = 0;
        d->bd_hlen = 0;
        d->bd_rcount = 0;
        d->bd_dcount = 0;
 }
 
+static int
+bpf_set_wtout(struct bpf_d *d, uint64_t wtout)
+{
+       mtx_enter(&d->bd_mtx);
+       d->bd_wtout = wtout;
+       mtx_leave(&d->bd_mtx);
+
+       return (0);
+}
+
+static int
+bpf_set_wtimeout(struct bpf_d *d, const struct timeval *tv)
+{
+       uint64_t nsec;
+
+       if (tv->tv_sec < 0 || !timerisvalid(tv))
+               return (EINVAL);
+
+       nsec = TIMEVAL_TO_NSEC(tv);
+       if (nsec > MAXTSLP)
+               return (EOVERFLOW);
+
+       return (bpf_set_wtout(d, nsec));
+}
+
+static int
+bpf_get_wtimeout(struct bpf_d *d, struct timeval *tv)
+{
+       uint64_t nsec;
+
+       mtx_enter(&d->bd_mtx);
+       nsec = d->bd_wtout;
+       mtx_leave(&d->bd_mtx);
+
+       if (nsec == INFSLP)
+               return (ENXIO);
+
+       memset(tv, 0, sizeof(*tv));
+       NSEC_TO_TIMEVAL(nsec, tv);
+
+       return (0);
+}
+
 /*
  *  FIONREAD           Check for read packet available.
  *  BIOCGBLEN          Get buffer len [for read()].
@@ -698,6 +768,9 @@ bpf_resetd(struct bpf_d *d)
  *  BIOCSETIF          Set interface.
  *  BIOCSRTIMEOUT      Set read timeout.
  *  BIOCGRTIMEOUT      Get read timeout.
+ *  BIOCSWTIMEOUT      Set wait timeout.
+ *  BIOCGWTIMEOUT      Get wait timeout.
+ *  BIOCDWTIMEOUT      Del wait timeout.
  *  BIOCGSTATS         Get packet stats.
  *  BIOCIMMEDIATE      Set immediate mode.
  *  BIOCVERSION                Get filter language version.
@@ -720,6 +793,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
                case BIOCGDLTLIST:
                case BIOCGETIF:
                case BIOCGRTIMEOUT:
+               case BIOCGWTIMEOUT:
                case BIOCGSTATS:
                case BIOCVERSION:
                case BIOCGRSIG:
@@ -727,6 +801,8 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
                case FIONREAD:
                case BIOCLOCK:
                case BIOCSRTIMEOUT:
+               case BIOCSWTIMEOUT:
+               case BIOCDWTIMEOUT:
                case BIOCIMMEDIATE:
                case TIOCGPGRP:
                case BIOCGDIRFILT:
@@ -933,7 +1009,20 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
         * Set immediate mode.
         */
        case BIOCIMMEDIATE:
-               d->bd_immediate = *(u_int *)addr;
+               error = bpf_set_wtout(d, *(int *)addr ? 0 : INFSLP);
+               break;
+
+       /*
+        * Wait timeout.
+        */
+       case BIOCSWTIMEOUT:
+               error = bpf_set_wtimeout(d, (const struct timeval *)addr);
+               break;
+       case BIOCGWTIMEOUT:
+               error = bpf_get_wtimeout(d, (struct timeval *)addr);
+               break;
+       case BIOCDWTIMEOUT:
+               error = bpf_set_wtout(d, INFSLP);
                break;
 
        case BIOCVERSION:
@@ -1193,7 +1282,7 @@ filt_bpfread(struct knote *kn, long hint
        MUTEX_ASSERT_LOCKED(&d->bd_mtx);
 
        kn->kn_data = d->bd_hlen;
-       if (d->bd_immediate)
+       if (d->bd_wtout == 0)
                kn->kn_data += d->bd_slen;
 
        return (kn->kn_data > 0);
@@ -1510,6 +1599,11 @@ bpf_catchpacket(struct bpf_d *d, u_char 
                        ++d->bd_dcount;
                        return;
                }
+
+               /* cancel pending wtime */
+               if (timeout_del(&d->bd_wait_tmo))
+                       bpf_put(d);
+
                ROTATE_BUFFERS(d);
                do_wakeup = 1;
                curlen = 0;
@@ -1530,12 +1624,27 @@ bpf_catchpacket(struct bpf_d *d, u_char 
        bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen);
        d->bd_slen = curlen + totlen;
 
-       if (d->bd_immediate) {
+       switch (d->bd_wtout) {
+       case 0:
                /*
                 * Immediate mode is set.  A packet arrived so any
                 * reads should be woken up.
                 */
+               if (d->bd_state == BPF_S_IDLE)
+                       d->bd_state = BPF_S_DONE;
                do_wakeup = 1;
+               break;
+       case INFSLP:
+               break;
+       default:
+               if (d->bd_state == BPF_S_IDLE) {
+                       d->bd_state = BPF_S_WAIT;
+
+                       bpf_get(d);
+                       if (!timeout_add_nsec(&d->bd_wait_tmo, d->bd_wtout))
+                               bpf_put(d);
+               }
+               break;
        }
 
        if (do_wakeup)
Index: sys/net/bpf.h
===================================================================
RCS file: /cvs/src/sys/net/bpf.h,v
retrieving revision 1.70
diff -u -p -r1.70 bpf.h
--- sys/net/bpf.h       3 Aug 2020 03:21:24 -0000       1.70
+++ sys/net/bpf.h       8 Mar 2023 03:58:08 -0000
@@ -119,6 +119,9 @@ struct bpf_version {
 #define BIOCGDLTLIST   _IOWR('B',123, struct bpf_dltlist)
 #define BIOCGDIRFILT   _IOR('B',124, u_int)
 #define BIOCSDIRFILT   _IOW('B',125, u_int)
+#define BIOCSWTIMEOUT  _IOW('B',126, struct timeval)
+#define BIOCGWTIMEOUT  _IOR('B',126, struct timeval)
+#define BIOCDWTIMEOUT  _IO('B',126)
 
 /*
  * Direction filters for BIOCSDIRFILT/BIOCGDIRFILT
Index: sys/net/bpfdesc.h
===================================================================
RCS file: /cvs/src/sys/net/bpfdesc.h,v
retrieving revision 1.47
diff -u -p -r1.47 bpfdesc.h
--- sys/net/bpfdesc.h   9 Jul 2022 12:48:21 -0000       1.47
+++ sys/net/bpfdesc.h   8 Mar 2023 03:58:08 -0000
@@ -79,6 +79,7 @@ struct bpf_d {
 
        struct bpf_if  *bd_bif;         /* interface descriptor */
        uint64_t        bd_rtout;       /* [m] Read timeout in nanoseconds */
+       uint64_t        bd_wtout;       /* [m] Wait time in nanoseconds */
        u_long          bd_nreaders;    /* [m] # threads asleep in bpfread() */
        struct bpf_program_smr
                       *bd_rfilter;     /* read filter code */
@@ -88,8 +89,7 @@ struct bpf_d {
        u_long          bd_dcount;      /* number of packets dropped */
 
        u_char          bd_promisc;     /* true if listening promiscuously */
-       u_char          bd_state;       /* idle, waiting, or timed out */
-       u_char          bd_immediate;   /* true to return on packet arrival */
+       u_char          bd_state;       /* [m] idle, waiting, or timed out */
        u_char          bd_locked;      /* true if descriptor is locked */
        u_char          bd_fildrop;     /* true if filtered packets will be 
dropped */
        u_char          bd_dirfilt;     /* direction filter */
@@ -103,7 +103,8 @@ struct bpf_d {
        int             bd_unit;        /* logical unit number */
        LIST_ENTRY(bpf_d) bd_list;      /* descriptor list */
 
-       struct task     bd_wake_task;   /* delay pgsigio() and selwakeup() */
+       struct task     bd_wake_task;   /* defer pgsigio() and selwakeup() */
+       struct timeout  bd_wait_tmo;    /* delay wakeup after catching pkt */
 
        struct smr_entry
                        bd_smr;
Index: sbin/pflogd/pflogd.c
===================================================================
RCS file: /cvs/src/sbin/pflogd/pflogd.c,v
retrieving revision 1.62
diff -u -p -r1.62 pflogd.c
--- sbin/pflogd/pflogd.c        25 Jul 2019 17:32:33 -0000      1.62
+++ sbin/pflogd/pflogd.c        8 Mar 2023 03:58:09 -0000
@@ -251,8 +251,8 @@ pflog_read_live(const char *source, int 
                struct timeval to;
                to.tv_sec = to_ms / 1000;
                to.tv_usec = (to_ms * 1000) % 1000000;
-               if (ioctl(p->fd, BIOCSRTIMEOUT, &to) == -1) {
-                       snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSRTIMEOUT: %s",
+               if (ioctl(p->fd, BIOCSWTIMEOUT, &to) == -1) {
+                       snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSWTIMEOUT: %s",
                            pcap_strerror(errno));
                        goto bad;
                }
Index: share/man/man4/bpf.4
===================================================================
RCS file: /cvs/src/share/man/man4/bpf.4,v
retrieving revision 1.44
diff -u -p -r1.44 bpf.4
--- share/man/man4/bpf.4        11 Sep 2022 06:38:11 -0000      1.44
+++ share/man/man4/bpf.4        8 Mar 2023 03:58:09 -0000
@@ -220,6 +220,8 @@ The allowable ioctls are
 .Dv BIOCIMMEDIATE ,
 .Dv BIOCLOCK ,
 .Dv BIOCSRTIMEOUT ,
+.Dv BIOCSWTIMEOUT ,
+.Dv BIOCDWTIMEOUT ,
 .Dv BIOCVERSION ,
 .Dv TIOCGPGRP ,
 and
@@ -301,6 +303,18 @@ This is useful for programs like
 .Xr rarpd 8 ,
 which must respond to messages in real time.
 The default for a new file is off.
+.Pp
+.It Dv BIOCSWTIMEOUT Fa "struct timeval *"
+.It Dv BIOCGWTIMEOUT Fa "struct timeval *"
+.It Dv BIOCDWTIMEOUT
+Sets, gets, or deletes (resets) the wait timeout parameter.
+The
+.Ar timeval
+specifies the length of time to wait between receiving a packet and
+the kernel buffer becoming readable.
+By default, or when reset, the wait timeout is infinite, meaning
+the age of packets in the kernel buffer does not make the buffer
+readable.
 .Pp
 .It Dv BIOCSETF Fa "struct bpf_program *"
 Sets the filter program used by the kernel to discard uninteresting packets.

Reply via email to