Implement VFS read clustering for MSDOSFS, third try

Stefan Fritsch Tue, 15 Aug 2017 08:21:04 -0700

Hi,

this is another try at making read clustering work for msdosfs.


Last year, mpi@ implemented VFS read clustering for MSDOSFS in

sys/msdosfs/denode.h 1.28
sys/msdosfs/msdosfs_vnops.c 1.105

This caused regressions when doing seeks past the end of the file and had.
to be reverted. Then I tried to fix that in

denode.h,v 1.31
msdosfs_vnops.c,v 1.114 

But that caused different problems and had to be reverted, too. I have 
another test for that that I will commit soon.

It seems that while netbsd uses DEV_BSIZE as base for the logical block 
numbers even in files, openbsd is more like freebsd and uses 
mnt_stat.f_iosize in that case. However, if the file system does accesses 
for metadata on the disk, it has to use DEV_BSIZE for accesses. So the 
fixes in 1.31/1.114 were wrong and the following diff is again based on 
1.28/1.105 with some fixes in extendfile() to make seeks past the end of 
the file work.


Testers/reviews/OKs welcome. Especially people who need msdosfs for 
booting via uefi. In any case, I won't commit until I get home again in a 
week.

If my analysis with the block sizes is correct, I will add some 
clarifications to the bread/bwrite man page later.

Cheers,
Stefan


diff --git a/sys/msdosfs/denode.h b/sys/msdosfs/denode.h
index 38ae2027a63..4e521c6d5b8 100644
--- a/sys/msdosfs/denode.h
+++ b/sys/msdosfs/denode.h
@@ -142,7 +142,6 @@ struct denode {
        struct vnode *de_devvp; /* vnode of blk dev we live on */
        uint32_t de_flag;               /* flag bits */
        dev_t de_dev;           /* device where direntry lives */
-       daddr_t de_lastr;
        uint32_t de_dirclust;   /* cluster of the directory file containing 
this entry */
        uint32_t de_diroffset;  /* offset of this entry in the directory 
cluster */
        uint32_t de_fndoffset;  /* offset of found dir entry */
diff --git a/sys/msdosfs/msdosfs_fat.c b/sys/msdosfs/msdosfs_fat.c
index 9e4675fa7e5..bee7c07f4d7 100644
--- a/sys/msdosfs/msdosfs_fat.c
+++ b/sys/msdosfs/msdosfs_fat.c
@@ -1021,14 +1021,12 @@ extendfile(struct denode *dep, uint32_t count, struct 
buf **bpp, uint32_t *ncp,
                                        bp = getblk(pmp->pm_devvp, cntobn(pmp, 
cn++),
                                                    pmp->pm_bpcluster, 0, 0);
                                else {
-                                       bp = getblk(DETOV(dep), de_cn2bn(pmp, 
frcn++),
+                                       bp = getblk(DETOV(dep), frcn++,
                                            pmp->pm_bpcluster, 0, 0);
                                        /*
                                         * Do the bmap now, as in msdosfs_write
                                         */
-                                       if (pcbmap(dep,
-                                           de_bn2cn(pmp, bp->b_lblkno),
-                                           &bp->b_blkno, 0, 0))
+                                       if (pcbmap(dep, bp->b_lblkno, 
&bp->b_blkno, 0, 0))
                                                bp->b_blkno = -1;
                                        if (bp->b_blkno == -1)
                                                panic("extendfile: pcbmap");
diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c
index 8452e9be80c..cadaf78920d 100644
--- a/sys/msdosfs/msdosfs_vnops.c
+++ b/sys/msdosfs/msdosfs_vnops.c
@@ -77,13 +77,13 @@
 
 static uint32_t fileidhash(uint64_t);
 
+int msdosfs_bmaparray(struct vnode *, daddr_t, daddr_t *, int *);
 int msdosfs_kqfilter(void *);
 int filt_msdosfsread(struct knote *, long);
 int filt_msdosfswrite(struct knote *, long);
 int filt_msdosfsvnode(struct knote *, long);
 void filt_msdosfsdetach(struct knote *);
 
-
 /*
  * Some general notes:
  *
@@ -509,18 +509,14 @@ int
 msdosfs_read(void *v)
 {
        struct vop_read_args *ap = v;
-       int error = 0;
-       uint32_t diff;
-       int blsize;
-       int isadir;
-       uint32_t n;
-       long on;
-       daddr_t lbn, rablock, rablkno;
-       struct buf *bp;
        struct vnode *vp = ap->a_vp;
        struct denode *dep = VTODE(vp);
        struct msdosfsmount *pmp = dep->de_pmp;
        struct uio *uio = ap->a_uio;
+       int isadir, error = 0;
+       uint32_t n, diff, size, on;
+       struct buf *bp;
+       daddr_t cn, bn;
 
        /*
         * If they didn't ask for any data, then we are done.
@@ -535,7 +531,8 @@ msdosfs_read(void *v)
                if (uio->uio_offset >= dep->de_FileSize)
                        return (0);
 
-               lbn = de_cluster(pmp, uio->uio_offset);
+               cn = de_cluster(pmp, uio->uio_offset);
+               size = pmp->pm_bpcluster;
                on = uio->uio_offset & pmp->pm_crbomask;
                n = ulmin(pmp->pm_bpcluster - on, uio->uio_resid);
 
@@ -548,31 +545,23 @@ msdosfs_read(void *v)
                if (diff < n)
                        n = diff;
 
-               /* convert cluster # to block # if a directory */
-               if (isadir) {
-                       error = pcbmap(dep, lbn, &lbn, 0, &blsize);
-                       if (error)
-                               return (error);
-               }
                /*
                 * If we are operating on a directory file then be sure to
                 * do i/o with the vnode for the filesystem instead of the
-                * vnode for the directory.
+                * vnode for the directory. In that case, the block number
+                * is physical blocks and not clusters.
                 */
                if (isadir) {
-                       error = bread(pmp->pm_devvp, lbn, blsize, &bp);
+                       /* dirfile-relative cn -> fs-relative bn */
+                       error = pcbmap(dep, cn, &bn, 0, &size);
+                       if (error)
+                               return (error);
+                       error = bread(pmp->pm_devvp, bn, size, &bp);
                } else {
-                       rablock = lbn + 1;
-                       rablkno = de_cn2bn(pmp, rablock);
-                       if (dep->de_lastr + 1 == lbn &&
-                           de_cn2off(pmp, rablock) < dep->de_FileSize)
-                               error = breadn(vp, de_cn2bn(pmp, lbn),
-                                   pmp->pm_bpcluster, &rablkno,
-                                   &pmp->pm_bpcluster, 1, &bp);
+                       if (de_cn2off(pmp, cn + 1) >= dep->de_FileSize)
+                               error = bread(vp, cn, size, &bp);
                        else
-                               error = bread(vp, de_cn2bn(pmp, lbn),
-                                   pmp->pm_bpcluster, &bp);
-                       dep->de_lastr = lbn;
+                               error = bread_cluster(vp, cn, size, &bp);
                }
                n = min(n, pmp->pm_bpcluster - bp->b_resid);
                if (error) {
@@ -601,7 +590,7 @@ msdosfs_write(void *v)
        uint32_t osize;
        int error = 0;
        uint32_t count, lastcn;
-       daddr_t bn;
+       daddr_t cn;
        struct buf *bp;
        int ioflag = ap->a_ioflag;
        struct uio *uio = ap->a_uio;
@@ -678,30 +667,30 @@ msdosfs_write(void *v)
                lastcn = de_clcount(pmp, osize) - 1;
 
        do {
-               if (de_cluster(pmp, uio->uio_offset) > lastcn) {
+               croffset = uio->uio_offset & pmp->pm_crbomask;
+               cn = de_cluster(pmp, uio->uio_offset);
+
+               if (cn > lastcn) {
                        error = ENOSPC;
                        break;
                }
 
-               bn = de_blk(pmp, uio->uio_offset);
-               if ((uio->uio_offset & pmp->pm_crbomask) == 0
-                   && (de_blk(pmp, uio->uio_offset + uio->uio_resid) > 
de_blk(pmp, uio->uio_offset)
-                       || uio->uio_offset + uio->uio_resid >= 
dep->de_FileSize)) {
+               if (croffset == 0 &&
+                   (de_cluster(pmp, uio->uio_offset + uio->uio_resid) > cn ||
+                    (uio->uio_offset + uio->uio_resid) >= dep->de_FileSize)) {
                        /*
                         * If either the whole cluster gets written,
                         * or we write the cluster from its start beyond EOF,
                         * then no need to read data from disk.
                         */
-                       bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
+                       bp = getblk(thisvp, cn, pmp->pm_bpcluster, 0, 0);
                        clrbuf(bp);
                        /*
                         * Do the bmap now, since pcbmap needs buffers
                         * for the fat table. (see msdosfs_strategy)
                         */
                        if (bp->b_blkno == bp->b_lblkno) {
-                               error = pcbmap(dep,
-                                              de_bn2cn(pmp, bp->b_lblkno),
-                                              &bp->b_blkno, 0, 0);
+                               error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 
0, 0);
                                if (error)
                                        bp->b_blkno = -1;
                        }
@@ -713,16 +702,16 @@ msdosfs_write(void *v)
                        }
                } else {
                        /*
-                        * The block we need to write into exists, so read it 
in.
+                        * The block we need to write into exists, so
+                        * read it in.
                         */
-                       error = bread(thisvp, bn, pmp->pm_bpcluster, &bp);
+                       error = bread(thisvp, cn, pmp->pm_bpcluster, &bp);
                        if (error) {
                                brelse(bp);
                                break;
                        }
                }
 
-               croffset = uio->uio_offset & pmp->pm_crbomask;
                n = ulmin(uio->uio_resid, pmp->pm_bpcluster - croffset);
                if (uio->uio_offset + n > dep->de_FileSize) {
                        dep->de_FileSize = uio->uio_offset + n;
@@ -1755,7 +1744,7 @@ msdosfs_islocked(void *v)
 
 /*
  * vp  - address of vnode file the file
- * bn  - which cluster we are interested in mapping to a filesystem block 
number.
+ * bn  - which cluster we are interested in mapping to a filesystem block 
number
  * vpp - returns the vnode for the block special file holding the filesystem
  *      containing the file of interest
  * bnp - address of where to return the filesystem relative block number
@@ -1765,19 +1754,51 @@ msdosfs_bmap(void *v)
 {
        struct vop_bmap_args *ap = v;
        struct denode *dep = VTODE(ap->a_vp);
-       struct msdosfsmount *pmp = dep->de_pmp;
 
        if (ap->a_vpp != NULL)
                *ap->a_vpp = dep->de_devvp;
        if (ap->a_bnp == NULL)
                return (0);
-       if (ap->a_runp) {
+
+       return (msdosfs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, ap->a_runp));
+}
+
+int
+msdosfs_bmaparray(struct vnode *vp, daddr_t cn, daddr_t *bnp, int *runp)
+{
+       struct denode *dep = VTODE(vp);
+       struct msdosfsmount *pmp = dep->de_pmp;
+       struct mount *mp;
+       int error, maxrun = 0, run;
+       daddr_t daddr;
+
+       mp = vp->v_mount;
+
+       if (runp) {
                /*
-                * Sequential clusters should be counted here.
+                * XXX
+                * If MAXBSIZE is the largest transfer the disks can handle,
+                * we probably want maxrun to be 1 block less so that we
+                * don't create a block larger than the device can handle.
                 */
-               *ap->a_runp = 0;
+               *runp = 0;
+               maxrun = min(MAXBSIZE / mp->mnt_stat.f_iosize - 1,
+                   pmp->pm_maxcluster - cn);
        }
-       return (pcbmap(dep, de_bn2cn(pmp, ap->a_bn), ap->a_bnp, 0, 0));
+
+       if ((error = pcbmap(dep, cn, bnp, 0, 0)) != 0)
+               return (error);
+
+       for (run = 1; run <= maxrun; run++) {
+               error = pcbmap(dep, cn + run, &daddr, 0, 0);
+               if (error != 0 || (daddr != *bnp + de_cn2bn(pmp, run)))
+                       break;
+       }
+
+       if (runp)
+               *runp = run - 1;
+
+       return (0);
 }
 
 int
@@ -1799,8 +1820,7 @@ msdosfs_strategy(void *v)
         * don't allow files with holes, so we shouldn't ever see this.
         */
        if (bp->b_blkno == bp->b_lblkno) {
-               error = pcbmap(dep, de_bn2cn(dep->de_pmp, bp->b_lblkno),
-                              &bp->b_blkno, 0, 0);
+               error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0);
                if (error)
                        bp->b_blkno = -1;
                if (bp->b_blkno == -1)

Implement VFS read clustering for MSDOSFS, third try

Reply via email to