On 08/09/16(Thu) 14:49, Ted Unangst wrote: > Currently, the bufcache doesn't know that mfs is backed by memory. All i/o to > mfs ends up being double cached, once in the userland process and again in the > kernel bufcache. This is wasteful. In particular, it means one can't use mfs > to increase the effective size of the buffer cache. Reading or writing to mfs > will push out buffers used for disk caching. (I think you can even end up with > triple buffering when mfs starts swapping...)
Isn't the solution to this problem a working dynamic buffer cache? I'm not sure adding a hack for mfs, and the complexity that comes with it, is the way to go. Did somebody analyzed what broke when the buffer cache was cranked to 90%? > This is mostly inherent to the design of mfs. But with a small tweak to the > buffer cache, we can improve the situation. This introduces the concept of > 'cheap' buffers, a hint to the buffer cache that they are easily replaced. > (There's a 'nocache' flag already, but it's not suitable here.) When mfs > finishes with a buf, it marks it cheap. Then it goes onto a special queue that > gets chewed up before we start looking at the regular caches. We still cache > some number of cheap buffers to prevent constant memory copying. > > With this diff, I've confirmed that reading/writing large files to mfs doesn't > flush the cache, but performance appears about the same. (Of particular note, > my bufcache is big enough to cache all of src/, but not src/ and obj/. With > obj/ on mfs, src never gets flushed.) > > Index: kern/vfs_bio.c > =================================================================== > RCS file: /cvs/src/sys/kern/vfs_bio.c,v > retrieving revision 1.176 > diff -u -p -r1.176 vfs_bio.c > --- kern/vfs_bio.c 4 Sep 2016 10:51:24 -0000 1.176 > +++ kern/vfs_bio.c 8 Sep 2016 18:31:52 -0000 > @@ -93,7 +93,10 @@ int bd_req; /* Sleep point for > cleaner > > #define NUM_CACHES 2 > #define DMA_CACHE 0 > +#define CHEAP_LIMIT 256 > struct bufcache cleancache[NUM_CACHES]; > +struct bufqueue cheapqueue; > +u_int cheapqueuelen; > struct bufqueue dirtyqueue; > > void > @@ -1297,6 +1300,7 @@ bufcache_init(void) > TAILQ_INIT(&cleancache[i].coldqueue); > TAILQ_INIT(&cleancache[i].warmqueue); > } > + TAILQ_INIT(&cheapqueue); > TAILQ_INIT(&dirtyqueue); > } > > @@ -1329,6 +1333,12 @@ bufcache_getcleanbuf(int cachenum, int d > > splassert(IPL_BIO); > > + /* try cheap queue if over limit */ > + if (discard || cheapqueuelen > CHEAP_LIMIT) { > + if ((bp = TAILQ_FIRST(&cheapqueue))) > + return bp; > + } > + > /* try cold queue */ > while ((bp = TAILQ_FIRST(&cache->coldqueue))) { > if ((!discard) && > @@ -1356,6 +1366,8 @@ bufcache_getcleanbuf(int cachenum, int d > /* buffer is cold - give it up */ > return bp; > } > + if ((bp = TAILQ_FIRST(&cheapqueue))) > + return bp; > if ((bp = TAILQ_FIRST(&cache->warmqueue))) > return bp; > if ((bp = TAILQ_FIRST(&cache->hotqueue))) > @@ -1410,6 +1422,13 @@ bufcache_take(struct buf *bp) > pages = atop(bp->b_bufsize); > struct bufcache *cache = &cleancache[bp->cache]; > if (!ISSET(bp->b_flags, B_DELWRI)) { > + if (ISSET(bp->b_flags, B_CHEAP)) { > + TAILQ_REMOVE(&cheapqueue, bp, b_freelist); > + cheapqueuelen--; > + CLR(bp->b_flags, B_CHEAP); > + return; > + } > + > if (ISSET(bp->b_flags, B_COLD)) { > queue = &cache->coldqueue; > } else if (ISSET(bp->b_flags, B_WARM)) { > @@ -1462,11 +1481,17 @@ bufcache_release(struct buf *bp) > struct bufqueue *queue; > int64_t pages; > struct bufcache *cache = &cleancache[bp->cache]; > + > pages = atop(bp->b_bufsize); > KASSERT(ISSET(bp->b_flags, B_BC)); > KASSERT((ISSET(bp->b_flags, B_DMA) && bp->cache == 0) > || ((!ISSET(bp->b_flags, B_DMA)) && bp->cache > 0)); > if (!ISSET(bp->b_flags, B_DELWRI)) { > + if (ISSET(bp->b_flags, B_CHEAP)) { > + TAILQ_INSERT_TAIL(&cheapqueue, bp, b_freelist); > + cheapqueuelen++; > + return; > + } > int64_t *queuepages; > if (ISSET(bp->b_flags, B_WARM | B_COLD)) { > SET(bp->b_flags, B_WARM); > Index: sys/buf.h > =================================================================== > RCS file: /cvs/src/sys/sys/buf.h,v > retrieving revision 1.103 > diff -u -p -r1.103 buf.h > --- sys/buf.h 23 May 2016 09:31:28 -0000 1.103 > +++ sys/buf.h 8 Sep 2016 17:20:12 -0000 > @@ -221,12 +221,14 @@ struct bufcache { > #define B_COLD 0x01000000 /* buffer is on the cold queue > */ > #define B_BC 0x02000000 /* buffer is managed by the > cache */ > #define B_DMA 0x04000000 /* buffer is DMA reachable */ > +#define B_CHEAP 0x08000000 /* buffer is cheap to refetch */ > > #define B_BITS "\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \ > "\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \ > "\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \ > "\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \ > - "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA" > + "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA" \ > + "\034CHEAP" > > /* > * Zero out the buffer's data area. > Index: ufs/mfs/mfs_vnops.c > =================================================================== > RCS file: /cvs/src/sys/ufs/mfs/mfs_vnops.c,v > retrieving revision 1.48 > diff -u -p -r1.48 mfs_vnops.c > --- ufs/mfs/mfs_vnops.c 8 Sep 2016 16:44:46 -0000 1.48 > +++ ufs/mfs/mfs_vnops.c 8 Sep 2016 18:45:53 -0000 > @@ -161,6 +161,7 @@ mfs_doio(struct mfsnode *mfsp, struct bu > bp->b_flags |= B_ERROR; > else > bp->b_resid = 0; > + bp->b_flags |= B_CHEAP; > s = splbio(); > biodone(bp); > splx(s); >