On Mon, Feb 25, 2019 at 06:07:23AM +0000, David Holland wrote: > that one doesn't set dropend correctly for small buffers, outsmarted > myself while writing it.
Better change (still against 1.242) that makes the logic much simpler. Will test this overnight... Index: ufs_vnops.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ufs/ufs_vnops.c,v retrieving revision 1.239 diff -u -p -r1.239 ufs_vnops.c --- ufs_vnops.c 28 Oct 2017 00:37:13 -0000 1.239 +++ ufs_vnops.c 25 Feb 2019 06:58:52 -0000 @@ -1233,7 +1233,7 @@ ufs_readdir(void *v) #endif /* caller's buffer */ struct uio *calleruio = ap->a_uio; - off_t startoffset, endoffset; + off_t startoffset; size_t callerbytes; off_t curoffset; /* dirent production buffer */ @@ -1244,8 +1244,8 @@ ufs_readdir(void *v) off_t *cookies; size_t numcookies, maxcookies; /* disk buffer */ - off_t physstart, physend; - size_t skipstart, dropend; + off_t physstart; + size_t skipstart; char *rawbuf; size_t rawbufmax, rawbytes; struct uio rawuio; @@ -1256,29 +1256,60 @@ ufs_readdir(void *v) KASSERT(VOP_ISLOCKED(vp)); - /* figure out where we want to read */ +#define UFS_READDIR_ARBITRARY_MAX (1024*1024) callerbytes = calleruio->uio_resid; - startoffset = calleruio->uio_offset; - endoffset = startoffset + callerbytes; - if (callerbytes < _DIRENT_MINSIZE(dirent)) { /* no room for even one struct dirent */ return EINVAL; } + if (callerbytes > UFS_READDIR_ARBITRARY_MAX) { + callerbytes = UFS_READDIR_ARBITRARY_MAX; + } - /* round start and end down to block boundaries */ + /* + * Figure out where to start reading. Round the start down to + * a block boundary: we need to start at the beginning of a + * block in order to read the directory correctly. + * + * skipstart is the number of bytes we need to read in + * (because we need to start at the beginning of a block) but + * not transfer to the user. + */ + startoffset = calleruio->uio_offset; physstart = startoffset & ~(off_t)(ump->um_dirblksiz - 1); - physend = endoffset & ~(off_t)(ump->um_dirblksiz - 1); skipstart = startoffset - physstart; - dropend = endoffset - physend; - if (callerbytes - dropend < _DIRENT_MINSIZE(rawdp)) { - /* no room for even one struct direct */ - return EINVAL; - } + /* + * Now figure out how much to read. + * + * callerbytes tells us how much data we can send back to the + * user. Assume as a starting point that we want to read + * roughly the same volume of struct directs from disk as the + * volume of struct dirents we want to send to the user; this + * is not super accurate for large numbers of small names but + * will serve well enough. It's ok to underpopulate the user's + * buffer, and most directories are only a couple blocks + * anyway. + * + * However much we decide we want, stop on a block boundary. + * That way the copying code below doesn't need to worry about + * finding partial entries in the transfer buffer. + * + * Do this by rounding down (not up) by default as that will + * with luck avoid needing to scan the next block twice; but + * always read in at least one block. + */ - /* how much to actually read */ - rawbufmax = callerbytes + skipstart - dropend; + /* Base buffer space for CALLERBYTES of new data */ + rawbufmax = callerbytes + skipstart; + + /* Round down to an integral number of blocks */ + rawbufmax &= ~(off_t)(ump->um_dirblksiz - 1); + + /* but always at least one block */ + if (rawbufmax == 0) { + rawbufmax = ump->um_dirblksiz; + } /* read it */ rawbuf = kmem_alloc(rawbufmax, KM_SLEEP); -- David A. Holland dholl...@netbsd.org