On 08/09/16(Thu) 14:49, Ted Unangst wrote:
> Currently, the bufcache doesn't know that mfs is backed by memory. All i/o to
> mfs ends up being double cached, once in the userland process and again in the
> kernel bufcache. This is wasteful. In particular, it means one can't use mfs
> to increase the effective size of the buffer cache. Reading or writing to mfs
> will push out buffers used for disk caching. (I think you can even end up with
> triple buffering when mfs starts swapping...)

Isn't the solution to this problem a working dynamic buffer cache?  I'm
not sure adding a hack for mfs, and the complexity that comes with it,
is the way to go.  Did somebody analyzed what broke when the buffer
cache was cranked to 90%?

> This is mostly inherent to the design of mfs. But with a small tweak to the
> buffer cache, we can improve the situation. This introduces the concept of
> 'cheap' buffers, a hint to the buffer cache that they are easily replaced.
> (There's a 'nocache' flag already, but it's not suitable here.) When mfs
> finishes with a buf, it marks it cheap. Then it goes onto a special queue that
> gets chewed up before we start looking at the regular caches. We still cache
> some number of cheap buffers to prevent constant memory copying.
> 
> With this diff, I've confirmed that reading/writing large files to mfs doesn't
> flush the cache, but performance appears about the same. (Of particular note,
> my bufcache is big enough to cache all of src/, but not src/ and obj/. With
> obj/ on mfs, src never gets flushed.)
>
> Index: kern/vfs_bio.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_bio.c,v
> retrieving revision 1.176
> diff -u -p -r1.176 vfs_bio.c
> --- kern/vfs_bio.c    4 Sep 2016 10:51:24 -0000       1.176
> +++ kern/vfs_bio.c    8 Sep 2016 18:31:52 -0000
> @@ -93,7 +93,10 @@ int bd_req;                        /* Sleep point for 
> cleaner
>  
>  #define NUM_CACHES 2
>  #define DMA_CACHE 0
> +#define CHEAP_LIMIT 256
>  struct bufcache cleancache[NUM_CACHES];
> +struct bufqueue cheapqueue;
> +u_int cheapqueuelen;
>  struct bufqueue dirtyqueue;
>  
>  void
> @@ -1297,6 +1300,7 @@ bufcache_init(void)
>               TAILQ_INIT(&cleancache[i].coldqueue);
>               TAILQ_INIT(&cleancache[i].warmqueue);
>       }
> +     TAILQ_INIT(&cheapqueue);
>       TAILQ_INIT(&dirtyqueue);
>  }
>  
> @@ -1329,6 +1333,12 @@ bufcache_getcleanbuf(int cachenum, int d
>  
>       splassert(IPL_BIO);
>  
> +     /* try cheap queue if over limit */
> +     if (discard || cheapqueuelen > CHEAP_LIMIT) {
> +             if ((bp = TAILQ_FIRST(&cheapqueue)))
> +                     return bp;
> +     }
> +
>       /* try  cold queue */
>       while ((bp = TAILQ_FIRST(&cache->coldqueue))) {
>               if ((!discard) &&
> @@ -1356,6 +1366,8 @@ bufcache_getcleanbuf(int cachenum, int d
>                       /* buffer is cold - give it up */
>                       return bp;
>       }
> +     if ((bp = TAILQ_FIRST(&cheapqueue)))
> +             return bp;
>       if ((bp = TAILQ_FIRST(&cache->warmqueue)))
>               return bp;
>       if ((bp = TAILQ_FIRST(&cache->hotqueue)))
> @@ -1410,6 +1422,13 @@ bufcache_take(struct buf *bp)
>       pages = atop(bp->b_bufsize);
>       struct bufcache *cache = &cleancache[bp->cache];
>       if (!ISSET(bp->b_flags, B_DELWRI)) {
> +             if (ISSET(bp->b_flags, B_CHEAP)) {
> +                     TAILQ_REMOVE(&cheapqueue, bp, b_freelist);
> +                     cheapqueuelen--;
> +                     CLR(bp->b_flags, B_CHEAP);
> +                     return;
> +             }
> +
>                  if (ISSET(bp->b_flags, B_COLD)) {
>                       queue = &cache->coldqueue;
>               } else if (ISSET(bp->b_flags, B_WARM)) {
> @@ -1462,11 +1481,17 @@ bufcache_release(struct buf *bp)
>       struct bufqueue *queue;
>       int64_t pages;
>       struct bufcache *cache = &cleancache[bp->cache];
> +
>       pages = atop(bp->b_bufsize);
>       KASSERT(ISSET(bp->b_flags, B_BC));
>       KASSERT((ISSET(bp->b_flags, B_DMA) && bp->cache == 0)
>           || ((!ISSET(bp->b_flags, B_DMA)) && bp->cache > 0));
>       if (!ISSET(bp->b_flags, B_DELWRI)) {
> +             if (ISSET(bp->b_flags, B_CHEAP)) {
> +                     TAILQ_INSERT_TAIL(&cheapqueue, bp, b_freelist);
> +                     cheapqueuelen++;
> +                     return;
> +             }
>               int64_t *queuepages;
>               if (ISSET(bp->b_flags, B_WARM | B_COLD)) {
>                       SET(bp->b_flags, B_WARM);
> Index: sys/buf.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/buf.h,v
> retrieving revision 1.103
> diff -u -p -r1.103 buf.h
> --- sys/buf.h 23 May 2016 09:31:28 -0000      1.103
> +++ sys/buf.h 8 Sep 2016 17:20:12 -0000
> @@ -221,12 +221,14 @@ struct bufcache {
>  #define      B_COLD          0x01000000      /* buffer is on the cold queue 
> */
>  #define      B_BC            0x02000000      /* buffer is managed by the 
> cache */
>  #define      B_DMA           0x04000000      /* buffer is DMA reachable */
> +#define      B_CHEAP         0x08000000      /* buffer is cheap to refetch */
>  
>  #define      B_BITS  "\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
>      "\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \
>      "\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \
>      "\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \
> -    "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA"
> +    "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA" \
> +    "\034CHEAP"
>  
>  /*
>   * Zero out the buffer's data area.
> Index: ufs/mfs/mfs_vnops.c
> ===================================================================
> RCS file: /cvs/src/sys/ufs/mfs/mfs_vnops.c,v
> retrieving revision 1.48
> diff -u -p -r1.48 mfs_vnops.c
> --- ufs/mfs/mfs_vnops.c       8 Sep 2016 16:44:46 -0000       1.48
> +++ ufs/mfs/mfs_vnops.c       8 Sep 2016 18:45:53 -0000
> @@ -161,6 +161,7 @@ mfs_doio(struct mfsnode *mfsp, struct bu
>               bp->b_flags |= B_ERROR;
>       else
>               bp->b_resid = 0;
> +     bp->b_flags |= B_CHEAP;
>       s = splbio();
>       biodone(bp);
>       splx(s);
> 

Reply via email to