Thanks for the patch; I just got a few minutes today and I applied it,
rebuilt and installed the kernel and rebooted. Sadly, I get a similar
panic. Attached is a screenshot of console output. Note that, 'boot sync'
from ddb hangs forever.

        - Dan C.


On Fri, May 19, 2017 at 3:58 PM, Mike Belopuhov <m...@belopuhov.com> wrote:

> On Thu, May 18, 2017 at 21:15 -0400, Dan Cross wrote:
> > Okay, here is the output. I apologize for the screen shot; there's no
> other
> > particularly great way to capture the console output from the VPS and I
> > don't trust myself to type it all in without making a mistake of some
> kind.
> >
>
> That's OK, I can see that there's quite some swapping going on.
> I haven't finished investigating yet, but the first thing I've
> noticed is that FFS read-ahead issues 64k read requests.  xbf(4)
> cannot handle more than 45056 at a time so it fails the request.
> This might be causing some serious problems.
>
> Unfortunately, it turned out that our SCSI and VFS layers don't
> implement proper handling of short reads (b_resid is ignored
> on clustered reads by the buffercache and SCSI doesn't do
> anything about it either), so I took a stab at getting it
> working.
>
> For now the most appropriate way to solve this that I've found is
> to invalidate read-ahead portion of a cluster read: when FFS asks
> for a block, e.g. 16k, bread_cluster creates an array of bufs for
> a MAXPHYS worth of I/O sliced in chunks of the block size (e.g.
> 16k).  Then (after the I/O is done) we can walk down-up and ditch
> all chunks that correspond to failed I/O and throw them away.
> For example if b_resid is 20480 and we were using 16k chunks,
> then we have to invalidate two last bufs (32k).
>
> Unfortunately, there's a major problem that this diff doesn't
> solve: if we've read even less than what we were initially asked
> for (excluding all of read-ahead blocks).  This is because the
> biodone for the xbpp[0] aka "the bp" is done from sd_buf_done
> directly *before* we can do buf_fix_mapping and restore it's
> intended bp->b_bcount.  In other words, when sd_buf_done calls
> biodone you cannot correlate b_bcount and b_resid and mark the
> buffer B_INVAL because you don't know it's intended length.
>
> This is not a final version, but as I won't get back to it
> before Monday, I wanted to post it for a wider audience.
>
>
> diff --git sys/kern/vfs_bio.c sys/kern/vfs_bio.c
> index 95bc80bc0e6..1cc1943d752 100644
> --- sys/kern/vfs_bio.c
> +++ sys/kern/vfs_bio.c
> @@ -534,11 +534,29 @@ bread_cluster_callback(struct buf *bp)
>                  */
>                 buf_fix_mapping(bp, newsize);
>                 bp->b_bcount = newsize;
>         }
>
> -       for (i = 1; xbpp[i] != 0; i++) {
> +       /* Invalidate read-ahead buffers if read short */
> +       if (bp->b_resid > 0) {
> +               for (i = 0; xbpp[i] != NULL; i++)
> +                       continue;
> +               for (i = i - 1; i != 0; i--) {
> +                       if (xbpp[i]->b_bufsize <= bp->b_resid) {
> +                               bp->b_resid -= xbpp[i]->b_bufsize;
> +                               SET(xbpp[i]->b_flags, B_INVAL);
> +                       } else if (bp->b_resid > 0) {
> +                               bp->b_resid = 0;
> +                               SET(xbpp[i]->b_flags, B_INVAL);
> +                       } else
> +                               break;
> +               }
> +               if (bp->b_resid > 0)
> +                       printf("short read %ld\n", bp->b_resid);
> +       }
> +
> +       for (i = 1; xbpp[i] != NULL; i++) {
>                 if (ISSET(bp->b_flags, B_ERROR))
>                         SET(xbpp[i]->b_flags, B_INVAL | B_ERROR);
>                 biodone(xbpp[i]);
>         }
>
> @@ -605,11 +623,11 @@ bread_cluster(struct vnode *vp, daddr_t blkno, int
> size, struct buf **rbpp)
>                 }
>         }
>
>         bp = xbpp[0];
>
> -       xbpp[howmany] = 0;
> +       xbpp[howmany] = NULL;
>
>         inc = btodb(size);
>
>         for (i = 1; i < howmany; i++) {
>                 bcstats.pendingreads++;
> diff --git sys/dev/pv/xbf.c sys/dev/pv/xbf.c
> index d5c44770acb..9a94e3dc48f 100644
> --- sys/dev/pv/xbf.c
> +++ sys/dev/pv/xbf.c
> @@ -448,29 +448,32 @@ xbf_load_xs(struct scsi_xfer *xs, int desc)
>         struct xbf_softc *sc = xs->sc_link->adapter_softc;
>         struct xbf_sge *sge;
>         union xbf_ring_desc *xrd;
>         bus_dmamap_t map;
>         int i, error, mapflags;
> +       bus_size_t datalen;
>
>         xrd = &sc->sc_xr->xr_desc[desc];
>         map = sc->sc_xs_map[desc];
>
> +       datalen = MIN(xs->datalen, sc->sc_maxphys);
> +
>         mapflags = (sc->sc_domid << 16);
>         if (ISSET(xs->flags, SCSI_NOSLEEP))
>                 mapflags |= BUS_DMA_NOWAIT;
>         else
>                 mapflags |= BUS_DMA_WAITOK;
>         if (ISSET(xs->flags, SCSI_DATA_IN))
>                 mapflags |= BUS_DMA_READ;
>         else
>                 mapflags |= BUS_DMA_WRITE;
>
> -       error = bus_dmamap_load(sc->sc_dmat, map, xs->data, xs->datalen,
> +       error = bus_dmamap_load(sc->sc_dmat, map, xs->data, datalen,
>             NULL, mapflags);
>         if (error) {
> -               DPRINTF("%s: failed to load %d bytes of data\n",
> -                   sc->sc_dev.dv_xname, xs->datalen);
> +               DPRINTF("%s: failed to load %ld bytes of data\n",
> +                   sc->sc_dev.dv_xname, datalen);
>                 return (error);
>         }
>
>         for (i = 0; i < map->dm_nsegs; i++) {
>                 sge = &xrd->xrd_req.req_sgl[i];
> @@ -726,11 +729,11 @@ xbf_complete_cmd(struct scsi_xfer *xs, int desc)
>
>         id = xrd->xrd_rsp.rsp_id;
>         memset(xrd, 0, sizeof(*xrd));
>         xrd->xrd_req.req_id = id;
>
> -       xs->resid = 0;
> +       xs->resid = xs->datalen - MIN(xs->datalen, sc->sc_maxphys);
>
>         xbf_reclaim_xs(xs, desc);
>         xbf_scsi_done(xs, error);
>  }
>
>

Reply via email to