Currently, the bufcache doesn't know that mfs is backed by memory. All i/o to
mfs ends up being double cached, once in the userland process and again in the
kernel bufcache. This is wasteful. In particular, it means one can't use mfs
to increase the effective size of the buffer cache. Reading or writing to mfs
will push out buffers used for disk caching. (I think you can even end up with
triple buffering when mfs starts swapping...)
This is mostly inherent to the design of mfs. But with a small tweak to the
buffer cache, we can improve the situation. This introduces the concept of
'cheap' buffers, a hint to the buffer cache that they are easily replaced.
(There's a 'nocache' flag already, but it's not suitable here.) When mfs
finishes with a buf, it marks it cheap. Then it goes onto a special queue that
gets chewed up before we start looking at the regular caches. We still cache
some number of cheap buffers to prevent constant memory copying.
With this diff, I've confirmed that reading/writing large files to mfs doesn't
flush the cache, but performance appears about the same. (Of particular note,
my bufcache is big enough to cache all of src/, but not src/ and obj/. With
obj/ on mfs, src never gets flushed.)
Index: kern/vfs_bio.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.176
diff -u -p -r1.176 vfs_bio.c
--- kern/vfs_bio.c 4 Sep 2016 10:51:24 -0000 1.176
+++ kern/vfs_bio.c 8 Sep 2016 18:31:52 -0000
@@ -93,7 +93,10 @@ int bd_req; /* Sleep point for cleaner
#define NUM_CACHES 2
#define DMA_CACHE 0
+#define CHEAP_LIMIT 256
struct bufcache cleancache[NUM_CACHES];
+struct bufqueue cheapqueue;
+u_int cheapqueuelen;
struct bufqueue dirtyqueue;
void
@@ -1297,6 +1300,7 @@ bufcache_init(void)
TAILQ_INIT(&cleancache[i].coldqueue);
TAILQ_INIT(&cleancache[i].warmqueue);
}
+ TAILQ_INIT(&cheapqueue);
TAILQ_INIT(&dirtyqueue);
}
@@ -1329,6 +1333,12 @@ bufcache_getcleanbuf(int cachenum, int d
splassert(IPL_BIO);
+ /* try cheap queue if over limit */
+ if (discard || cheapqueuelen > CHEAP_LIMIT) {
+ if ((bp = TAILQ_FIRST(&cheapqueue)))
+ return bp;
+ }
+
/* try cold queue */
while ((bp = TAILQ_FIRST(&cache->coldqueue))) {
if ((!discard) &&
@@ -1356,6 +1366,8 @@ bufcache_getcleanbuf(int cachenum, int d
/* buffer is cold - give it up */
return bp;
}
+ if ((bp = TAILQ_FIRST(&cheapqueue)))
+ return bp;
if ((bp = TAILQ_FIRST(&cache->warmqueue)))
return bp;
if ((bp = TAILQ_FIRST(&cache->hotqueue)))
@@ -1410,6 +1422,13 @@ bufcache_take(struct buf *bp)
pages = atop(bp->b_bufsize);
struct bufcache *cache = &cleancache[bp->cache];
if (!ISSET(bp->b_flags, B_DELWRI)) {
+ if (ISSET(bp->b_flags, B_CHEAP)) {
+ TAILQ_REMOVE(&cheapqueue, bp, b_freelist);
+ cheapqueuelen--;
+ CLR(bp->b_flags, B_CHEAP);
+ return;
+ }
+
if (ISSET(bp->b_flags, B_COLD)) {
queue = &cache->coldqueue;
} else if (ISSET(bp->b_flags, B_WARM)) {
@@ -1462,11 +1481,17 @@ bufcache_release(struct buf *bp)
struct bufqueue *queue;
int64_t pages;
struct bufcache *cache = &cleancache[bp->cache];
+
pages = atop(bp->b_bufsize);
KASSERT(ISSET(bp->b_flags, B_BC));
KASSERT((ISSET(bp->b_flags, B_DMA) && bp->cache == 0)
|| ((!ISSET(bp->b_flags, B_DMA)) && bp->cache > 0));
if (!ISSET(bp->b_flags, B_DELWRI)) {
+ if (ISSET(bp->b_flags, B_CHEAP)) {
+ TAILQ_INSERT_TAIL(&cheapqueue, bp, b_freelist);
+ cheapqueuelen++;
+ return;
+ }
int64_t *queuepages;
if (ISSET(bp->b_flags, B_WARM | B_COLD)) {
SET(bp->b_flags, B_WARM);
Index: sys/buf.h
===================================================================
RCS file: /cvs/src/sys/sys/buf.h,v
retrieving revision 1.103
diff -u -p -r1.103 buf.h
--- sys/buf.h 23 May 2016 09:31:28 -0000 1.103
+++ sys/buf.h 8 Sep 2016 17:20:12 -0000
@@ -221,12 +221,14 @@ struct bufcache {
#define B_COLD 0x01000000 /* buffer is on the cold queue
*/
#define B_BC 0x02000000 /* buffer is managed by the
cache */
#define B_DMA 0x04000000 /* buffer is DMA reachable */
+#define B_CHEAP 0x08000000 /* buffer is cheap to refetch */
#define B_BITS "\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
"\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \
"\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \
"\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \
- "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA"
+ "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA" \
+ "\034CHEAP"
/*
* Zero out the buffer's data area.
Index: ufs/mfs/mfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/ufs/mfs/mfs_vnops.c,v
retrieving revision 1.48
diff -u -p -r1.48 mfs_vnops.c
--- ufs/mfs/mfs_vnops.c 8 Sep 2016 16:44:46 -0000 1.48
+++ ufs/mfs/mfs_vnops.c 8 Sep 2016 18:45:53 -0000
@@ -161,6 +161,7 @@ mfs_doio(struct mfsnode *mfsp, struct bu
bp->b_flags |= B_ERROR;
else
bp->b_resid = 0;
+ bp->b_flags |= B_CHEAP;
s = splbio();
biodone(bp);
splx(s);