Re: Fix for "Implement VFS read clustering for MSDOSFS"

2017-05-29 Thread Martin Pieuchot
On 29/05/17(Mon) 14:30, Stefan Fritsch wrote:
> On Mon, 29 May 2017, Martin Pieuchot wrote:
> 
> > A complete diff would be easier to review.
> 
> ok, here it comes.
> 
> The original commit message was this:
> 
>   Implement VFS read clustering for MSDOSFS.
> 
>   The logic used in msdosfs_bmap() to loop calling pcbmap() comes from
>   FreeBSD and is not really efficient but it is good enough since it is
>   only called when generating I/O.
> 
>   With this diff I get a 100% improvement when reading big files from a
>   crappy USB stick.
> 
>   With this and bread_cluster(9) modified to not re-fetch B_CACHED 
> buffers,
>   reading large contiguous files with chunk sizes of MAXPHYS is almost as
>   fast as physio(9) on the same device.
> 
>   For a 'real world' example, when copying music files from a USB stick I
>   see a speed jump from 15MB/s on -current to 24Mb/s with this diff.
> 
>   While here rename some 'lbn' variables into 'cn' to better reflect what
>   we're dealing with.
> 
>   Tested by Mathieu, with support from deraadt@

ok mpi@

Now I understood why I got confused between block & cluster, the current
msdosfs_bmap() function has a bug.  ``ap->a_bn'' is representing a
cluster, not a block.  Diff below fixes that, for the archives.

Index: msdosfs/msdosfs_vnops.c
===
RCS file: /cvs/src/sys/msdosfs/msdosfs_vnops.c,v
retrieving revision 1.113
diff -u -p -r1.113 msdosfs_vnops.c
--- msdosfs/msdosfs_vnops.c 30 Aug 2016 19:47:23 -  1.113
+++ msdosfs/msdosfs_vnops.c 29 May 2017 13:24:11 -
@@ -1765,7 +1765,6 @@ msdosfs_bmap(void *v)
 {
struct vop_bmap_args *ap = v;
struct denode *dep = VTODE(ap->a_vp);
-   struct msdosfsmount *pmp = dep->de_pmp;
 
if (ap->a_vpp != NULL)
*ap->a_vpp = dep->de_devvp;
@@ -1777,7 +1776,7 @@ msdosfs_bmap(void *v)
 */
*ap->a_runp = 0;
}
-   return (pcbmap(dep, de_bn2cn(pmp, ap->a_bn), ap->a_bnp, 0, 0));
+   return (pcbmap(dep, ap->a_bn, ap->a_bnp, 0, 0));
 }
 
 int



Re: Fix for "Implement VFS read clustering for MSDOSFS"

2017-05-29 Thread Stefan Fritsch
On Mon, 29 May 2017, Martin Pieuchot wrote:

> A complete diff would be easier to review.

ok, here it comes.

The original commit message was this:

  Implement VFS read clustering for MSDOSFS.

  The logic used in msdosfs_bmap() to loop calling pcbmap() comes from
  FreeBSD and is not really efficient but it is good enough since it is
  only called when generating I/O.

  With this diff I get a 100% improvement when reading big files from a
  crappy USB stick.

  With this and bread_cluster(9) modified to not re-fetch B_CACHED buffers,
  reading large contiguous files with chunk sizes of MAXPHYS is almost as
  fast as physio(9) on the same device.

  For a 'real world' example, when copying music files from a USB stick I
  see a speed jump from 15MB/s on -current to 24Mb/s with this diff.

  While here rename some 'lbn' variables into 'cn' to better reflect what
  we're dealing with.

  Tested by Mathieu, with support from deraadt@

diff --git sys/msdosfs/denode.h sys/msdosfs/denode.h
index cdca90500ab..c3a413313c1 100644
--- sys/msdosfs/denode.h
+++ sys/msdosfs/denode.h
@@ -142,7 +142,6 @@ struct denode {
struct vnode *de_devvp; /* vnode of blk dev we live on */
uint32_t de_flag;   /* flag bits */
dev_t de_dev;   /* device where direntry lives */
-   daddr_t de_lastr;
uint32_t de_dirclust;   /* cluster of the directory file containing 
this entry */
uint32_t de_diroffset;  /* offset of this entry in the directory 
cluster */
uint32_t de_fndoffset;  /* offset of found dir entry */
diff --git sys/msdosfs/msdosfs_vnops.c sys/msdosfs/msdosfs_vnops.c
index 39c60044df5..783bd0d1d31 100644
--- sys/msdosfs/msdosfs_vnops.c
+++ sys/msdosfs/msdosfs_vnops.c
@@ -77,13 +77,13 @@
 
 static uint32_t fileidhash(uint64_t);
 
+int msdosfs_bmaparray(struct vnode *, daddr_t, daddr_t *, int *);
 int msdosfs_kqfilter(void *);
 int filt_msdosfsread(struct knote *, long);
 int filt_msdosfswrite(struct knote *, long);
 int filt_msdosfsvnode(struct knote *, long);
 void filt_msdosfsdetach(struct knote *);
 
-
 /*
  * Some general notes:
  *
@@ -509,18 +509,14 @@ int
 msdosfs_read(void *v)
 {
struct vop_read_args *ap = v;
-   int error = 0;
-   uint32_t diff;
-   int blsize;
-   int isadir;
-   uint32_t n;
-   long on;
-   daddr_t lbn, rablock, rablkno;
-   struct buf *bp;
struct vnode *vp = ap->a_vp;
struct denode *dep = VTODE(vp);
struct msdosfsmount *pmp = dep->de_pmp;
struct uio *uio = ap->a_uio;
+   int isadir, error = 0;
+   uint32_t n, diff, size, on;
+   struct buf *bp;
+   daddr_t cn, bn;
 
/*
 * If they didn't ask for any data, then we are done.
@@ -535,7 +531,8 @@ msdosfs_read(void *v)
if (uio->uio_offset >= dep->de_FileSize)
return (0);
 
-   lbn = de_cluster(pmp, uio->uio_offset);
+   cn = de_cluster(pmp, uio->uio_offset);
+   size = pmp->pm_bpcluster;
on = uio->uio_offset & pmp->pm_crbomask;
n = ulmin(pmp->pm_bpcluster - on, uio->uio_resid);
 
@@ -548,31 +545,22 @@ msdosfs_read(void *v)
if (diff < n)
n = diff;
 
-   /* convert cluster # to block # if a directory */
-   if (isadir) {
-   error = pcbmap(dep, lbn, &lbn, 0, &blsize);
-   if (error)
-   return (error);
-   }
/*
 * If we are operating on a directory file then be sure to
 * do i/o with the vnode for the filesystem instead of the
 * vnode for the directory.
 */
if (isadir) {
-   error = bread(pmp->pm_devvp, lbn, blsize, &bp);
+   error = pcbmap(dep, cn, &cn, 0, &size);
+   if (error)
+   return (error);
+   error = bread(pmp->pm_devvp, cn, size, &bp);
} else {
-   rablock = lbn + 1;
-   rablkno = de_cn2bn(pmp, rablock);
-   if (dep->de_lastr + 1 == lbn &&
-   de_cn2off(pmp, rablock) < dep->de_FileSize)
-   error = breadn(vp, de_cn2bn(pmp, lbn),
-   pmp->pm_bpcluster, &rablkno,
-   &pmp->pm_bpcluster, 1, &bp);
+   bn = de_cn2bn(pmp, cn);
+   if (de_cn2off(pmp, cn + 1) >= dep->de_FileSize)
+   error = bread(vp, bn, size, &bp);
else
-   error = bread(vp, de_cn2bn(pmp, lbn),
-   pmp->pm_bpcluster, &bp);
-  

Re: Fix for "Implement VFS read clustering for MSDOSFS"

2017-05-29 Thread Martin Pieuchot
On 29/05/17(Mon) 14:15, Stefan Fritsch wrote:
> Last year, mpi@ implemented VFS read clustering for MSDOSFS in
> 
> sys/msdosfs/denode.h 1.28
> sys/msdosfs/msdosfs_vnops.c 1.105
> 
> This caused regressions when doing seeks past the end of the file and had 
> to be reverted.
> 
> I have now written a test that catches the bug (committed in 
> regress/sys/fileops) and got a fix that does not break the test:
> 
> There was some confusion as to what block sizes are used in file
> operations. Openbsd seems to be more like netbsd than like freebsd.
> 
> See msdosfs_vnops.c 1.62 in freebsd:
> 
> Author: msmith 
> Date:   Sun Mar 1 21:26:09 1998 +
> 
> Fix mmap() on msdosfs.  In the words of the submitter:
> 
> |In the process of evaluating the getpages/putpages issues I discovered
> |that mmap on MSDOSFS does not work. This is because I blindly merged
> |NetBSD changes in msdosfs_bmap and msdosfs_strategy. Apparently, their
> |blocksize is always DEV_BSIZE (even in files), while in FreeBSD
> |blocksize in files is v_mount->mnt_stat.f_iosize (i.e. clustersize in
> |MSDOSFS case). The patch is below.
> 
> Submitted by:   Dmitrij Tejblum 
> 
> The diff is meant to be on top of mpi's diff (or on top of a revert of the 
> revert 1.113). I can send the complete diff if that's preferred.

A complete diff would be easier to review.

> 
> diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c
> index 45a479fb646..46ff2736ca4 100644
> --- sys/msdosfs/msdosfs_vnops.c
> +++ sys/msdosfs/msdosfs_vnops.c
> @@ -516,7 +516,7 @@ msdosfs_read(void *v)
>   int isadir, error = 0;
>   uint32_t n, diff, size, on;
>   struct buf *bp;
> - daddr_t cn;
> + daddr_t cn, bn;
>  
>   /*
>* If they didn't ask for any data, then we are done.
> @@ -556,10 +556,11 @@ msdosfs_read(void *v)
>   return (error);
>   error = bread(pmp->pm_devvp, cn, size, &bp);
>   } else {
> + bn = de_cn2bn(pmp, cn);
>   if (de_cn2off(pmp, cn + 1) >= dep->de_FileSize)
> - error = bread(vp, cn, size, &bp);
> + error = bread(vp, bn, size, &bp);
>   else
> - error = bread_cluster(vp, cn, size, &bp);
> + error = bread_cluster(vp, bn, size, &bp);
>   }
>   n = min(n, pmp->pm_bpcluster - bp->b_resid);
>   if (error) {
> @@ -588,7 +589,7 @@ msdosfs_write(void *v)
>   uint32_t osize;
>   int error = 0;
>   uint32_t count, lastcn;
> - daddr_t cn;
> + daddr_t cn, bn;
>   struct buf *bp;
>   int ioflag = ap->a_ioflag;
>   struct uio *uio = ap->a_uio;
> @@ -681,18 +682,19 @@ msdosfs_write(void *v)
>* or we write the cluster from its start beyond EOF,
>* then no need to read data from disk.
>*/
> - bp = getblk(thisvp, cn, pmp->pm_bpcluster, 0, 0);
> + bn = de_cn2bn(pmp, cn);
> + bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
>   clrbuf(bp);
>   /*
>* Do the bmap now, since pcbmap needs buffers
>* for the fat table. (see msdosfs_strategy)
>*/
>   if (bp->b_blkno == bp->b_lblkno) {
> - error = pcbmap(dep, bp->b_lblkno, &cn, 0, 0);
> + error = pcbmap(dep,
> +de_bn2cn(pmp, bp->b_lblkno),
> +&bp->b_blkno, 0, 0);
>   if (error)
>   bp->b_blkno = -1;
> - else
> - bp->b_blkno = cn;
>   }
>   if (bp->b_blkno == -1) {
>   brelse(bp);
> @@ -705,7 +707,8 @@ msdosfs_write(void *v)
>* The block we need to write into exists, so
>* read it in.
>*/
> - error = bread(thisvp, cn, pmp->pm_bpcluster, &bp);
> + bn = de_cn2bn(pmp, cn);
> + error = bread(thisvp, bn, pmp->pm_bpcluster, &bp);
>   if (error) {
>   brelse(bp);
>   break;
> @@ -1820,7 +1823,8 @@ msdosfs_strategy(void *v)
>* don't allow files with holes, so we shouldn't ever see this.
>*/
>   if (bp->b_blkno == bp->b_lblkno) {
> - error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0);
> + error = pcbmap(dep, de_bn2cn(dep->de_pmp, bp->b_lblkno),
> +&

Fix for "Implement VFS read clustering for MSDOSFS"

2017-05-29 Thread Stefan Fritsch
Last year, mpi@ implemented VFS read clustering for MSDOSFS in

sys/msdosfs/denode.h 1.28
sys/msdosfs/msdosfs_vnops.c 1.105

This caused regressions when doing seeks past the end of the file and had 
to be reverted.

I have now written a test that catches the bug (committed in 
regress/sys/fileops) and got a fix that does not break the test:

There was some confusion as to what block sizes are used in file
operations. Openbsd seems to be more like netbsd than like freebsd.

See msdosfs_vnops.c 1.62 in freebsd:

Author: msmith 
Date:   Sun Mar 1 21:26:09 1998 +

Fix mmap() on msdosfs.  In the words of the submitter:

|In the process of evaluating the getpages/putpages issues I discovered
|that mmap on MSDOSFS does not work. This is because I blindly merged
|NetBSD changes in msdosfs_bmap and msdosfs_strategy. Apparently, their
|blocksize is always DEV_BSIZE (even in files), while in FreeBSD
|blocksize in files is v_mount->mnt_stat.f_iosize (i.e. clustersize in
|MSDOSFS case). The patch is below.

Submitted by:   Dmitrij Tejblum 

The diff is meant to be on top of mpi's diff (or on top of a revert of the 
revert 1.113). I can send the complete diff if that's preferred.

diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c
index 45a479fb646..46ff2736ca4 100644
--- sys/msdosfs/msdosfs_vnops.c
+++ sys/msdosfs/msdosfs_vnops.c
@@ -516,7 +516,7 @@ msdosfs_read(void *v)
int isadir, error = 0;
uint32_t n, diff, size, on;
struct buf *bp;
-   daddr_t cn;
+   daddr_t cn, bn;
 
/*
 * If they didn't ask for any data, then we are done.
@@ -556,10 +556,11 @@ msdosfs_read(void *v)
return (error);
error = bread(pmp->pm_devvp, cn, size, &bp);
} else {
+   bn = de_cn2bn(pmp, cn);
if (de_cn2off(pmp, cn + 1) >= dep->de_FileSize)
-   error = bread(vp, cn, size, &bp);
+   error = bread(vp, bn, size, &bp);
else
-   error = bread_cluster(vp, cn, size, &bp);
+   error = bread_cluster(vp, bn, size, &bp);
}
n = min(n, pmp->pm_bpcluster - bp->b_resid);
if (error) {
@@ -588,7 +589,7 @@ msdosfs_write(void *v)
uint32_t osize;
int error = 0;
uint32_t count, lastcn;
-   daddr_t cn;
+   daddr_t cn, bn;
struct buf *bp;
int ioflag = ap->a_ioflag;
struct uio *uio = ap->a_uio;
@@ -681,18 +682,19 @@ msdosfs_write(void *v)
 * or we write the cluster from its start beyond EOF,
 * then no need to read data from disk.
 */
-   bp = getblk(thisvp, cn, pmp->pm_bpcluster, 0, 0);
+   bn = de_cn2bn(pmp, cn);
+   bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
clrbuf(bp);
/*
 * Do the bmap now, since pcbmap needs buffers
 * for the fat table. (see msdosfs_strategy)
 */
if (bp->b_blkno == bp->b_lblkno) {
-   error = pcbmap(dep, bp->b_lblkno, &cn, 0, 0);
+   error = pcbmap(dep,
+  de_bn2cn(pmp, bp->b_lblkno),
+  &bp->b_blkno, 0, 0);
if (error)
bp->b_blkno = -1;
-   else
-   bp->b_blkno = cn;
}
if (bp->b_blkno == -1) {
brelse(bp);
@@ -705,7 +707,8 @@ msdosfs_write(void *v)
 * The block we need to write into exists, so
 * read it in.
 */
-   error = bread(thisvp, cn, pmp->pm_bpcluster, &bp);
+   bn = de_cn2bn(pmp, cn);
+   error = bread(thisvp, bn, pmp->pm_bpcluster, &bp);
if (error) {
brelse(bp);
break;
@@ -1820,7 +1823,8 @@ msdosfs_strategy(void *v)
 * don't allow files with holes, so we shouldn't ever see this.
 */
if (bp->b_blkno == bp->b_lblkno) {
-   error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0);
+   error = pcbmap(dep, de_bn2cn(dep->de_pmp, bp->b_lblkno),
+  &bp->b_blkno, 0, 0);
if (error)
bp->b_blkno = -1;
if (bp->b_blkno == -1)