Module Name:    src
Committed By:   jdolecek
Date:           Sat Dec  2 17:29:55 UTC 2017

Modified Files:
        src/sys/kern: vfs_wapbl.c

Log Message:
according to benchmark extracting pkgsrc.tar, using FUA and hence waiting
for each transfer to write through to the medium is way slower than just
letting the drive use a cached write and doing DIOCCACHESYNC on the end

Results were (fs block 32KB / frag 4KB, partition aligned on 32KB boundary):
HDD at siisata(4):  no-FUA: 108 sec w/FUA: 294 sec
SSD at ahcisata(4): no-FUA:  73 sec w/FUA: 502 sec

change the flag so that FUA is only used for the commit block write;
for journal data write, only pass DPO, rely on the cache flush to get them
to media


To generate a diff of this commit:
cvs rdiff -u -r1.100 -r1.101 src/sys/kern/vfs_wapbl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/vfs_wapbl.c
diff -u src/sys/kern/vfs_wapbl.c:1.100 src/sys/kern/vfs_wapbl.c:1.101
--- src/sys/kern/vfs_wapbl.c:1.100	Fri Oct 27 12:25:15 2017
+++ src/sys/kern/vfs_wapbl.c	Sat Dec  2 17:29:55 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_wapbl.c,v 1.100 2017/10/27 12:25:15 joerg Exp $	*/
+/*	$NetBSD: vfs_wapbl.c,v 1.101 2017/12/02 17:29:55 jdolecek Exp $	*/
 
 /*-
  * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc.
@@ -36,7 +36,7 @@
 #define WAPBL_INTERNAL
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.100 2017/10/27 12:25:15 joerg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.101 2017/12/02 17:29:55 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/bitops.h>
@@ -239,10 +239,9 @@ struct wapbl {
 		(wapbl_allow_dpofua && ISSET((wl)->wl_dkcache, DKCACHE_FUA))
 #define WAPBL_JFLAGS(wl)	\
 		(WAPBL_USE_FUA(wl) ? (wl)->wl_jwrite_flags : 0)
-#define WAPBL_MFLAGS(wl)	\
-		(WAPBL_USE_FUA(wl) ? (wl)->wl_mwrite_flags : 0)
+#define WAPBL_JDATA_FLAGS(wl)	\
+		(WAPBL_JFLAGS(wl) & B_MEDIA_DPO)	/* only DPO */
 	int wl_jwrite_flags;	/* r: 	journal write flags */
-	int wl_mwrite_flags;	/* r:	metadata write flags */
 };
 
 #ifdef WAPBL_DEBUG_PRINT
@@ -444,10 +443,8 @@ wapbl_dkcache_init(struct wapbl *wl)
 	}
 
 	/* Use FUA instead of cache flush if available */
-	if (ISSET(wl->wl_dkcache, DKCACHE_FUA)) {
+	if (ISSET(wl->wl_dkcache, DKCACHE_FUA))
 		wl->wl_jwrite_flags |= B_MEDIA_FUA;
-		wl->wl_mwrite_flags |= B_MEDIA_FUA;
-	}
 
 	/* Use DPO for journal writes if available */
 	if (ISSET(wl->wl_dkcache, DKCACHE_DPO))
@@ -998,7 +995,7 @@ wapbl_buffered_write_async(struct wapbl 
 	KASSERT(TAILQ_FIRST(&wl->wl_iobufs) == bp);
 	TAILQ_REMOVE(&wl->wl_iobufs, bp, b_wapbllist);
 
-	bp->b_flags = B_WRITE | WAPBL_JFLAGS(wl);
+	bp->b_flags |= B_WRITE;
 	bp->b_cflags = BC_BUSY;	/* mandatory, asserted by biowait() */
 	bp->b_oflags = 0;
 	bp->b_bcount = bp->b_resid;
@@ -1043,7 +1040,7 @@ again:
 		error = biowait(bp);
 
 		/* reset for reuse */
-		bp->b_blkno = bp->b_resid = 0;
+		bp->b_blkno = bp->b_resid = bp->b_flags = 0;
 		TAILQ_INSERT_TAIL(&wl->wl_iobufs, bp, b_wapbllist);
 		found = true;
 
@@ -1067,7 +1064,8 @@ again:
  *	wapbl_buffered_flush.
  */
 static int
-wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn)
+wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn,
+    int bflags)
 {
 	size_t resid;
 	struct buf *bp;
@@ -1096,8 +1094,10 @@ again:
 	 * If this write goes to an empty buffer we have to
 	 * save the disk block address first.
 	 */
-	if (bp->b_blkno == 0)
+	if (bp->b_blkno == 0) {
 		bp->b_blkno = pbn;
+		bp->b_flags |= bflags;
+	}
 
 	/*
 	 * Remaining space so this buffer ends on a buffer size boundary.
@@ -1164,7 +1164,8 @@ wapbl_circ_write(struct wapbl *wl, void 
 #ifdef _KERNEL
 		pbn = btodb(pbn << wl->wl_log_dev_bshift);
 #endif
-		error = wapbl_buffered_write(data, slen, wl, pbn);
+		error = wapbl_buffered_write(data, slen, wl, pbn,
+		    WAPBL_JDATA_FLAGS(wl));
 		if (error)
 			return error;
 		data = (uint8_t *)data + slen;
@@ -1175,7 +1176,8 @@ wapbl_circ_write(struct wapbl *wl, void 
 #ifdef _KERNEL
 	pbn = btodb(pbn << wl->wl_log_dev_bshift);
 #endif
-	error = wapbl_buffered_write(data, len, wl, pbn);
+	error = wapbl_buffered_write(data, len, wl, pbn,
+	    WAPBL_JDATA_FLAGS(wl));
 	if (error)
 		return error;
 	off += len;
@@ -1925,9 +1927,6 @@ wapbl_flush(struct wapbl *wl, int waitfo
 		bp->b_iodone = wapbl_biodone;
 		bp->b_private = we;
 
-		/* make sure the block is saved sync when FUA in use */
-		bp->b_flags |= WAPBL_MFLAGS(wl);
-
 		bremfree(bp);
 		wapbl_remove_buf_locked(wl, bp);
 		mutex_exit(&wl->wl_mtx);
@@ -2399,8 +2398,8 @@ wapbl_cache_sync(struct wapbl *wl, const
 	int force = 1;
 	int error;
 
-	/* Skip full cache sync if disabled, or when using FUA */
-	if (!wapbl_flush_disk_cache || WAPBL_USE_FUA(wl)) {
+	/* Skip full cache sync if disabled */
+	if (!wapbl_flush_disk_cache) {
 		return 0;
 	}
 	if (verbose) {
@@ -2459,8 +2458,10 @@ wapbl_write_commit(struct wapbl *wl, off
 	if (error)
 		return error;
 	/*
-	 * flush disk cache to ensure that blocks we've written are actually
+	 * Flush disk cache to ensure that blocks we've written are actually
 	 * written to the stable storage before the commit header.
+	 * This flushes to disk not only journal blocks, but also all
+	 * metadata blocks, written asynchronously since previous commit.
 	 *
 	 * XXX Calc checksum here, instead we do this for now
 	 */
@@ -2489,7 +2490,7 @@ wapbl_write_commit(struct wapbl *wl, off
 #ifdef _KERNEL
 	pbn = btodb(pbn << wc->wc_log_dev_bshift);
 #endif
-	error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn);
+	error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn, WAPBL_JFLAGS(wl));
 	if (error)
 		return error;
 	error = wapbl_buffered_flush(wl, true);
@@ -2497,10 +2498,12 @@ wapbl_write_commit(struct wapbl *wl, off
 		return error;
 
 	/*
-	 * flush disk cache to ensure that the commit header is actually
-	 * written before meta data blocks.
+	 * Flush disk cache to ensure that the commit header is actually
+	 * written before meta data blocks. Commit block is written using
+	 * FUA when enabled, in that case this flush is not needed.
 	 */
-	wapbl_cache_sync(wl, "2");
+	if (!WAPBL_USE_FUA(wl))
+		wapbl_cache_sync(wl, "2");
 
 	/*
 	 * If the generation number was zero, write it out a second time.

Reply via email to