Currently, the bufcache doesn't know that mfs is backed by memory. All i/o to
mfs ends up being double cached, once in the userland process and again in the
kernel bufcache. This is wasteful. In particular, it means one can't use mfs
to increase the effective size of the buffer cache. Reading or writing to mfs
will push out buffers used for disk caching. (I think you can even end up with
triple buffering when mfs starts swapping...)

This is mostly inherent to the design of mfs. But with a small tweak to the
buffer cache, we can improve the situation. This introduces the concept of
'cheap' buffers, a hint to the buffer cache that they are easily replaced.
(There's a 'nocache' flag already, but it's not suitable here.) When mfs
finishes with a buf, it marks it cheap. Then it goes onto a special queue that
gets chewed up before we start looking at the regular caches. We still cache
some number of cheap buffers to prevent constant memory copying.

With this diff, I've confirmed that reading/writing large files to mfs doesn't
flush the cache, but performance appears about the same. (Of particular note,
my bufcache is big enough to cache all of src/, but not src/ and obj/. With
obj/ on mfs, src never gets flushed.)


Index: kern/vfs_bio.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.176
diff -u -p -r1.176 vfs_bio.c
--- kern/vfs_bio.c      4 Sep 2016 10:51:24 -0000       1.176
+++ kern/vfs_bio.c      8 Sep 2016 18:31:52 -0000
@@ -93,7 +93,10 @@ int bd_req;                  /* Sleep point for cleaner
 
 #define NUM_CACHES 2
 #define DMA_CACHE 0
+#define CHEAP_LIMIT 256
 struct bufcache cleancache[NUM_CACHES];
+struct bufqueue cheapqueue;
+u_int cheapqueuelen;
 struct bufqueue dirtyqueue;
 
 void
@@ -1297,6 +1300,7 @@ bufcache_init(void)
                TAILQ_INIT(&cleancache[i].coldqueue);
                TAILQ_INIT(&cleancache[i].warmqueue);
        }
+       TAILQ_INIT(&cheapqueue);
        TAILQ_INIT(&dirtyqueue);
 }
 
@@ -1329,6 +1333,12 @@ bufcache_getcleanbuf(int cachenum, int d
 
        splassert(IPL_BIO);
 
+       /* try cheap queue if over limit */
+       if (discard || cheapqueuelen > CHEAP_LIMIT) {
+               if ((bp = TAILQ_FIRST(&cheapqueue)))
+                       return bp;
+       }
+
        /* try  cold queue */
        while ((bp = TAILQ_FIRST(&cache->coldqueue))) {
                if ((!discard) &&
@@ -1356,6 +1366,8 @@ bufcache_getcleanbuf(int cachenum, int d
                        /* buffer is cold - give it up */
                        return bp;
        }
+       if ((bp = TAILQ_FIRST(&cheapqueue)))
+               return bp;
        if ((bp = TAILQ_FIRST(&cache->warmqueue)))
                return bp;
        if ((bp = TAILQ_FIRST(&cache->hotqueue)))
@@ -1410,6 +1422,13 @@ bufcache_take(struct buf *bp)
        pages = atop(bp->b_bufsize);
        struct bufcache *cache = &cleancache[bp->cache];
        if (!ISSET(bp->b_flags, B_DELWRI)) {
+               if (ISSET(bp->b_flags, B_CHEAP)) {
+                       TAILQ_REMOVE(&cheapqueue, bp, b_freelist);
+                       cheapqueuelen--;
+                       CLR(bp->b_flags, B_CHEAP);
+                       return;
+               }
+
                 if (ISSET(bp->b_flags, B_COLD)) {
                        queue = &cache->coldqueue;
                } else if (ISSET(bp->b_flags, B_WARM)) {
@@ -1462,11 +1481,17 @@ bufcache_release(struct buf *bp)
        struct bufqueue *queue;
        int64_t pages;
        struct bufcache *cache = &cleancache[bp->cache];
+
        pages = atop(bp->b_bufsize);
        KASSERT(ISSET(bp->b_flags, B_BC));
        KASSERT((ISSET(bp->b_flags, B_DMA) && bp->cache == 0)
            || ((!ISSET(bp->b_flags, B_DMA)) && bp->cache > 0));
        if (!ISSET(bp->b_flags, B_DELWRI)) {
+               if (ISSET(bp->b_flags, B_CHEAP)) {
+                       TAILQ_INSERT_TAIL(&cheapqueue, bp, b_freelist);
+                       cheapqueuelen++;
+                       return;
+               }
                int64_t *queuepages;
                if (ISSET(bp->b_flags, B_WARM | B_COLD)) {
                        SET(bp->b_flags, B_WARM);
Index: sys/buf.h
===================================================================
RCS file: /cvs/src/sys/sys/buf.h,v
retrieving revision 1.103
diff -u -p -r1.103 buf.h
--- sys/buf.h   23 May 2016 09:31:28 -0000      1.103
+++ sys/buf.h   8 Sep 2016 17:20:12 -0000
@@ -221,12 +221,14 @@ struct bufcache {
 #define        B_COLD          0x01000000      /* buffer is on the cold queue 
*/
 #define        B_BC            0x02000000      /* buffer is managed by the 
cache */
 #define        B_DMA           0x04000000      /* buffer is DMA reachable */
+#define        B_CHEAP         0x08000000      /* buffer is cheap to refetch */
 
 #define        B_BITS  "\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
     "\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \
     "\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \
     "\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \
-    "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA"
+    "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA" \
+    "\034CHEAP"
 
 /*
  * Zero out the buffer's data area.
Index: ufs/mfs/mfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/ufs/mfs/mfs_vnops.c,v
retrieving revision 1.48
diff -u -p -r1.48 mfs_vnops.c
--- ufs/mfs/mfs_vnops.c 8 Sep 2016 16:44:46 -0000       1.48
+++ ufs/mfs/mfs_vnops.c 8 Sep 2016 18:45:53 -0000
@@ -161,6 +161,7 @@ mfs_doio(struct mfsnode *mfsp, struct bu
                bp->b_flags |= B_ERROR;
        else
                bp->b_resid = 0;
+       bp->b_flags |= B_CHEAP;
        s = splbio();
        biodone(bp);
        splx(s);

Reply via email to