Re: Fix for "Implement VFS read clustering for MSDOSFS"
On 29/05/17(Mon) 14:30, Stefan Fritsch wrote: > On Mon, 29 May 2017, Martin Pieuchot wrote: > > > A complete diff would be easier to review. > > ok, here it comes. > > The original commit message was this: > > Implement VFS read clustering for MSDOSFS. > > The logic used in msdosfs_bmap() to loop calling pcbmap() comes from > FreeBSD and is not really efficient but it is good enough since it is > only called when generating I/O. > > With this diff I get a 100% improvement when reading big files from a > crappy USB stick. > > With this and bread_cluster(9) modified to not re-fetch B_CACHED > buffers, > reading large contiguous files with chunk sizes of MAXPHYS is almost as > fast as physio(9) on the same device. > > For a 'real world' example, when copying music files from a USB stick I > see a speed jump from 15MB/s on -current to 24Mb/s with this diff. > > While here rename some 'lbn' variables into 'cn' to better reflect what > we're dealing with. > > Tested by Mathieu, with support from deraadt@ ok mpi@ Now I understood why I got confused between block & cluster, the current msdosfs_bmap() function has a bug. ``ap->a_bn'' is representing a cluster, not a block. Diff below fixes that, for the archives. Index: msdosfs/msdosfs_vnops.c === RCS file: /cvs/src/sys/msdosfs/msdosfs_vnops.c,v retrieving revision 1.113 diff -u -p -r1.113 msdosfs_vnops.c --- msdosfs/msdosfs_vnops.c 30 Aug 2016 19:47:23 - 1.113 +++ msdosfs/msdosfs_vnops.c 29 May 2017 13:24:11 - @@ -1765,7 +1765,6 @@ msdosfs_bmap(void *v) { struct vop_bmap_args *ap = v; struct denode *dep = VTODE(ap->a_vp); - struct msdosfsmount *pmp = dep->de_pmp; if (ap->a_vpp != NULL) *ap->a_vpp = dep->de_devvp; @@ -1777,7 +1776,7 @@ msdosfs_bmap(void *v) */ *ap->a_runp = 0; } - return (pcbmap(dep, de_bn2cn(pmp, ap->a_bn), ap->a_bnp, 0, 0)); + return (pcbmap(dep, ap->a_bn, ap->a_bnp, 0, 0)); } int
Re: Fix for "Implement VFS read clustering for MSDOSFS"
On Mon, 29 May 2017, Martin Pieuchot wrote: > A complete diff would be easier to review. ok, here it comes. The original commit message was this: Implement VFS read clustering for MSDOSFS. The logic used in msdosfs_bmap() to loop calling pcbmap() comes from FreeBSD and is not really efficient but it is good enough since it is only called when generating I/O. With this diff I get a 100% improvement when reading big files from a crappy USB stick. With this and bread_cluster(9) modified to not re-fetch B_CACHED buffers, reading large contiguous files with chunk sizes of MAXPHYS is almost as fast as physio(9) on the same device. For a 'real world' example, when copying music files from a USB stick I see a speed jump from 15MB/s on -current to 24Mb/s with this diff. While here rename some 'lbn' variables into 'cn' to better reflect what we're dealing with. Tested by Mathieu, with support from deraadt@ diff --git sys/msdosfs/denode.h sys/msdosfs/denode.h index cdca90500ab..c3a413313c1 100644 --- sys/msdosfs/denode.h +++ sys/msdosfs/denode.h @@ -142,7 +142,6 @@ struct denode { struct vnode *de_devvp; /* vnode of blk dev we live on */ uint32_t de_flag; /* flag bits */ dev_t de_dev; /* device where direntry lives */ - daddr_t de_lastr; uint32_t de_dirclust; /* cluster of the directory file containing this entry */ uint32_t de_diroffset; /* offset of this entry in the directory cluster */ uint32_t de_fndoffset; /* offset of found dir entry */ diff --git sys/msdosfs/msdosfs_vnops.c sys/msdosfs/msdosfs_vnops.c index 39c60044df5..783bd0d1d31 100644 --- sys/msdosfs/msdosfs_vnops.c +++ sys/msdosfs/msdosfs_vnops.c @@ -77,13 +77,13 @@ static uint32_t fileidhash(uint64_t); +int msdosfs_bmaparray(struct vnode *, daddr_t, daddr_t *, int *); int msdosfs_kqfilter(void *); int filt_msdosfsread(struct knote *, long); int filt_msdosfswrite(struct knote *, long); int filt_msdosfsvnode(struct knote *, long); void filt_msdosfsdetach(struct knote *); - /* * Some general notes: * @@ -509,18 +509,14 @@ int msdosfs_read(void *v) { struct vop_read_args *ap = v; - int error = 0; - uint32_t diff; - int blsize; - int isadir; - uint32_t n; - long on; - daddr_t lbn, rablock, rablkno; - struct buf *bp; struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct uio *uio = ap->a_uio; + int isadir, error = 0; + uint32_t n, diff, size, on; + struct buf *bp; + daddr_t cn, bn; /* * If they didn't ask for any data, then we are done. @@ -535,7 +531,8 @@ msdosfs_read(void *v) if (uio->uio_offset >= dep->de_FileSize) return (0); - lbn = de_cluster(pmp, uio->uio_offset); + cn = de_cluster(pmp, uio->uio_offset); + size = pmp->pm_bpcluster; on = uio->uio_offset & pmp->pm_crbomask; n = ulmin(pmp->pm_bpcluster - on, uio->uio_resid); @@ -548,31 +545,22 @@ msdosfs_read(void *v) if (diff < n) n = diff; - /* convert cluster # to block # if a directory */ - if (isadir) { - error = pcbmap(dep, lbn, &lbn, 0, &blsize); - if (error) - return (error); - } /* * If we are operating on a directory file then be sure to * do i/o with the vnode for the filesystem instead of the * vnode for the directory. */ if (isadir) { - error = bread(pmp->pm_devvp, lbn, blsize, &bp); + error = pcbmap(dep, cn, &cn, 0, &size); + if (error) + return (error); + error = bread(pmp->pm_devvp, cn, size, &bp); } else { - rablock = lbn + 1; - rablkno = de_cn2bn(pmp, rablock); - if (dep->de_lastr + 1 == lbn && - de_cn2off(pmp, rablock) < dep->de_FileSize) - error = breadn(vp, de_cn2bn(pmp, lbn), - pmp->pm_bpcluster, &rablkno, - &pmp->pm_bpcluster, 1, &bp); + bn = de_cn2bn(pmp, cn); + if (de_cn2off(pmp, cn + 1) >= dep->de_FileSize) + error = bread(vp, bn, size, &bp); else - error = bread(vp, de_cn2bn(pmp, lbn), - pmp->pm_bpcluster, &bp); -
Re: Fix for "Implement VFS read clustering for MSDOSFS"
On 29/05/17(Mon) 14:15, Stefan Fritsch wrote: > Last year, mpi@ implemented VFS read clustering for MSDOSFS in > > sys/msdosfs/denode.h 1.28 > sys/msdosfs/msdosfs_vnops.c 1.105 > > This caused regressions when doing seeks past the end of the file and had > to be reverted. > > I have now written a test that catches the bug (committed in > regress/sys/fileops) and got a fix that does not break the test: > > There was some confusion as to what block sizes are used in file > operations. Openbsd seems to be more like netbsd than like freebsd. > > See msdosfs_vnops.c 1.62 in freebsd: > > Author: msmith > Date: Sun Mar 1 21:26:09 1998 + > > Fix mmap() on msdosfs. In the words of the submitter: > > |In the process of evaluating the getpages/putpages issues I discovered > |that mmap on MSDOSFS does not work. This is because I blindly merged > |NetBSD changes in msdosfs_bmap and msdosfs_strategy. Apparently, their > |blocksize is always DEV_BSIZE (even in files), while in FreeBSD > |blocksize in files is v_mount->mnt_stat.f_iosize (i.e. clustersize in > |MSDOSFS case). The patch is below. > > Submitted by: Dmitrij Tejblum > > The diff is meant to be on top of mpi's diff (or on top of a revert of the > revert 1.113). I can send the complete diff if that's preferred. A complete diff would be easier to review. > > diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c > index 45a479fb646..46ff2736ca4 100644 > --- sys/msdosfs/msdosfs_vnops.c > +++ sys/msdosfs/msdosfs_vnops.c > @@ -516,7 +516,7 @@ msdosfs_read(void *v) > int isadir, error = 0; > uint32_t n, diff, size, on; > struct buf *bp; > - daddr_t cn; > + daddr_t cn, bn; > > /* >* If they didn't ask for any data, then we are done. > @@ -556,10 +556,11 @@ msdosfs_read(void *v) > return (error); > error = bread(pmp->pm_devvp, cn, size, &bp); > } else { > + bn = de_cn2bn(pmp, cn); > if (de_cn2off(pmp, cn + 1) >= dep->de_FileSize) > - error = bread(vp, cn, size, &bp); > + error = bread(vp, bn, size, &bp); > else > - error = bread_cluster(vp, cn, size, &bp); > + error = bread_cluster(vp, bn, size, &bp); > } > n = min(n, pmp->pm_bpcluster - bp->b_resid); > if (error) { > @@ -588,7 +589,7 @@ msdosfs_write(void *v) > uint32_t osize; > int error = 0; > uint32_t count, lastcn; > - daddr_t cn; > + daddr_t cn, bn; > struct buf *bp; > int ioflag = ap->a_ioflag; > struct uio *uio = ap->a_uio; > @@ -681,18 +682,19 @@ msdosfs_write(void *v) >* or we write the cluster from its start beyond EOF, >* then no need to read data from disk. >*/ > - bp = getblk(thisvp, cn, pmp->pm_bpcluster, 0, 0); > + bn = de_cn2bn(pmp, cn); > + bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0); > clrbuf(bp); > /* >* Do the bmap now, since pcbmap needs buffers >* for the fat table. (see msdosfs_strategy) >*/ > if (bp->b_blkno == bp->b_lblkno) { > - error = pcbmap(dep, bp->b_lblkno, &cn, 0, 0); > + error = pcbmap(dep, > +de_bn2cn(pmp, bp->b_lblkno), > +&bp->b_blkno, 0, 0); > if (error) > bp->b_blkno = -1; > - else > - bp->b_blkno = cn; > } > if (bp->b_blkno == -1) { > brelse(bp); > @@ -705,7 +707,8 @@ msdosfs_write(void *v) >* The block we need to write into exists, so >* read it in. >*/ > - error = bread(thisvp, cn, pmp->pm_bpcluster, &bp); > + bn = de_cn2bn(pmp, cn); > + error = bread(thisvp, bn, pmp->pm_bpcluster, &bp); > if (error) { > brelse(bp); > break; > @@ -1820,7 +1823,8 @@ msdosfs_strategy(void *v) >* don't allow files with holes, so we shouldn't ever see this. >*/ > if (bp->b_blkno == bp->b_lblkno) { > - error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0); > + error = pcbmap(dep, de_bn2cn(dep->de_pmp, bp->b_lblkno), > +&
Fix for "Implement VFS read clustering for MSDOSFS"
Last year, mpi@ implemented VFS read clustering for MSDOSFS in sys/msdosfs/denode.h 1.28 sys/msdosfs/msdosfs_vnops.c 1.105 This caused regressions when doing seeks past the end of the file and had to be reverted. I have now written a test that catches the bug (committed in regress/sys/fileops) and got a fix that does not break the test: There was some confusion as to what block sizes are used in file operations. Openbsd seems to be more like netbsd than like freebsd. See msdosfs_vnops.c 1.62 in freebsd: Author: msmith Date: Sun Mar 1 21:26:09 1998 + Fix mmap() on msdosfs. In the words of the submitter: |In the process of evaluating the getpages/putpages issues I discovered |that mmap on MSDOSFS does not work. This is because I blindly merged |NetBSD changes in msdosfs_bmap and msdosfs_strategy. Apparently, their |blocksize is always DEV_BSIZE (even in files), while in FreeBSD |blocksize in files is v_mount->mnt_stat.f_iosize (i.e. clustersize in |MSDOSFS case). The patch is below. Submitted by: Dmitrij Tejblum The diff is meant to be on top of mpi's diff (or on top of a revert of the revert 1.113). I can send the complete diff if that's preferred. diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c index 45a479fb646..46ff2736ca4 100644 --- sys/msdosfs/msdosfs_vnops.c +++ sys/msdosfs/msdosfs_vnops.c @@ -516,7 +516,7 @@ msdosfs_read(void *v) int isadir, error = 0; uint32_t n, diff, size, on; struct buf *bp; - daddr_t cn; + daddr_t cn, bn; /* * If they didn't ask for any data, then we are done. @@ -556,10 +556,11 @@ msdosfs_read(void *v) return (error); error = bread(pmp->pm_devvp, cn, size, &bp); } else { + bn = de_cn2bn(pmp, cn); if (de_cn2off(pmp, cn + 1) >= dep->de_FileSize) - error = bread(vp, cn, size, &bp); + error = bread(vp, bn, size, &bp); else - error = bread_cluster(vp, cn, size, &bp); + error = bread_cluster(vp, bn, size, &bp); } n = min(n, pmp->pm_bpcluster - bp->b_resid); if (error) { @@ -588,7 +589,7 @@ msdosfs_write(void *v) uint32_t osize; int error = 0; uint32_t count, lastcn; - daddr_t cn; + daddr_t cn, bn; struct buf *bp; int ioflag = ap->a_ioflag; struct uio *uio = ap->a_uio; @@ -681,18 +682,19 @@ msdosfs_write(void *v) * or we write the cluster from its start beyond EOF, * then no need to read data from disk. */ - bp = getblk(thisvp, cn, pmp->pm_bpcluster, 0, 0); + bn = de_cn2bn(pmp, cn); + bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0); clrbuf(bp); /* * Do the bmap now, since pcbmap needs buffers * for the fat table. (see msdosfs_strategy) */ if (bp->b_blkno == bp->b_lblkno) { - error = pcbmap(dep, bp->b_lblkno, &cn, 0, 0); + error = pcbmap(dep, + de_bn2cn(pmp, bp->b_lblkno), + &bp->b_blkno, 0, 0); if (error) bp->b_blkno = -1; - else - bp->b_blkno = cn; } if (bp->b_blkno == -1) { brelse(bp); @@ -705,7 +707,8 @@ msdosfs_write(void *v) * The block we need to write into exists, so * read it in. */ - error = bread(thisvp, cn, pmp->pm_bpcluster, &bp); + bn = de_cn2bn(pmp, cn); + error = bread(thisvp, bn, pmp->pm_bpcluster, &bp); if (error) { brelse(bp); break; @@ -1820,7 +1823,8 @@ msdosfs_strategy(void *v) * don't allow files with holes, so we shouldn't ever see this. */ if (bp->b_blkno == bp->b_lblkno) { - error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0); + error = pcbmap(dep, de_bn2cn(dep->de_pmp, bp->b_lblkno), + &bp->b_blkno, 0, 0); if (error) bp->b_blkno = -1; if (bp->b_blkno == -1)