Module Name:    src
Committed By:   hannken
Date:           Sat Dec  8 07:24:42 UTC 2012

Modified Files:
        src/sys/kern: vfs_wapbl.c

Log Message:
Try to coalesce writes to the journal in MAXPHYS sized and aligned blocks.
Speeds up wapbl_flush() on raid5 by a factor of 3-4.

Discussed on tech-kern.

Needs pullup to NetBSD-6.


To generate a diff of this commit:
cvs rdiff -u -r1.53 -r1.54 src/sys/kern/vfs_wapbl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/vfs_wapbl.c
diff -u src/sys/kern/vfs_wapbl.c:1.53 src/sys/kern/vfs_wapbl.c:1.54
--- src/sys/kern/vfs_wapbl.c:1.53	Sat Nov 17 10:10:17 2012
+++ src/sys/kern/vfs_wapbl.c	Sat Dec  8 07:24:42 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_wapbl.c,v 1.53 2012/11/17 10:10:17 hannken Exp $	*/
+/*	$NetBSD: vfs_wapbl.c,v 1.54 2012/12/08 07:24:42 hannken Exp $	*/
 
 /*-
  * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc.
@@ -36,7 +36,7 @@
 #define WAPBL_INTERNAL
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.53 2012/11/17 10:10:17 hannken Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.54 2012/12/08 07:24:42 hannken Exp $");
 
 #include <sys/param.h>
 #include <sys/bitops.h>
@@ -184,6 +184,10 @@ struct wapbl {
 
 	SIMPLEQ_HEAD(, wapbl_entry) wl_entries; /* On disk transaction
 						   accounting */
+
+	u_char *wl_buffer;	/* l:   buffer for wapbl_buffered_write() */
+	daddr_t wl_buffer_dblk;	/* l:   buffer disk block address */
+	size_t wl_buffer_used;	/* l:   buffer current use */
 };
 
 #ifdef WAPBL_DEBUG_PRINT
@@ -489,6 +493,9 @@ wapbl_start(struct wapbl ** wlp, struct 
 	wl->wl_dealloclens = wapbl_alloc(sizeof(*wl->wl_dealloclens) *
 	    wl->wl_dealloclim);
 
+	wl->wl_buffer = wapbl_alloc(MAXPHYS);
+	wl->wl_buffer_used = 0;
+
 	wapbl_inodetrk_init(wl, WAPBL_INODETRK_SIZE);
 
 	/* Initialize the commit header */
@@ -537,6 +544,7 @@ wapbl_start(struct wapbl ** wlp, struct 
 	    sizeof(*wl->wl_deallocblks) * wl->wl_dealloclim);
 	wapbl_free(wl->wl_dealloclens,
 	    sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim);
+	wapbl_free(wl->wl_buffer, MAXPHYS);
 	wapbl_inodetrk_free(wl);
 	wapbl_free(wl, sizeof(*wl));
 
@@ -716,6 +724,7 @@ wapbl_stop(struct wapbl *wl, int force)
 	    sizeof(*wl->wl_deallocblks) * wl->wl_dealloclim);
 	wapbl_free(wl->wl_dealloclens,
 	    sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim);
+	wapbl_free(wl->wl_buffer, MAXPHYS);
 	wapbl_inodetrk_free(wl);
 
 	cv_destroy(&wl->wl_reclaimable_cv);
@@ -791,6 +800,81 @@ wapbl_read(void *data, size_t len, struc
 }
 
 /*
+ * Flush buffered data if any.
+ */
+static int
+wapbl_buffered_flush(struct wapbl *wl)
+{
+	int error;
+
+	if (wl->wl_buffer_used == 0)
+		return 0;
+
+	error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used,
+	    wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE);
+	wl->wl_buffer_used = 0;
+
+	return error;
+}
+
+/*
+ * Write data to the log.
+ * Try to coalesce writes and emit MAXPHYS aligned blocks.
+ */
+static int
+wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn)
+{
+	int error;
+	size_t resid;
+
+	/*
+	 * If not adjacent to buffered data flush first.  Disk block
+	 * address is always valid for non-empty buffer.
+	 */
+	if (wl->wl_buffer_used > 0 &&
+	    pbn != wl->wl_buffer_dblk + btodb(wl->wl_buffer_used)) {
+		error = wapbl_buffered_flush(wl);
+		if (error)
+			return error;
+	}
+	/*
+	 * If this write goes to an empty buffer we have to
+	 * save the disk block address first.
+	 */
+	if (wl->wl_buffer_used == 0)
+		wl->wl_buffer_dblk = pbn;
+	/*
+	 * Remaining space so this buffer ends on a MAXPHYS boundary.
+	 *
+	 * Cannot become less or equal zero as the buffer would have been
+	 * flushed on the last call then.
+	 */
+	resid = MAXPHYS - dbtob(wl->wl_buffer_dblk % btodb(MAXPHYS)) -
+	    wl->wl_buffer_used;
+	KASSERT(resid > 0);
+	KASSERT(dbtob(btodb(resid)) == resid);
+	if (len >= resid) {
+		memcpy(wl->wl_buffer + wl->wl_buffer_used, data, resid);
+		wl->wl_buffer_used += resid;
+		error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used,
+		    wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE);
+		data = (uint8_t *)data + resid;
+		len -= resid;
+		wl->wl_buffer_dblk = pbn + btodb(resid);
+		wl->wl_buffer_used = 0;
+		if (error)
+			return error;
+	}
+	KASSERT(len < MAXPHYS);
+	if (len > 0) {
+		memcpy(wl->wl_buffer + wl->wl_buffer_used, data, len);
+		wl->wl_buffer_used += len;
+	}
+
+	return 0;
+}
+
+/*
  * Off is byte offset returns new offset for next write
  * handles log wraparound
  */
@@ -813,7 +897,7 @@ wapbl_circ_write(struct wapbl *wl, void 
 #ifdef _KERNEL
 		pbn = btodb(pbn << wl->wl_log_dev_bshift);
 #endif
-		error = wapbl_write(data, slen, wl->wl_devvp, pbn);
+		error = wapbl_buffered_write(data, slen, wl, pbn);
 		if (error)
 			return error;
 		data = (uint8_t *)data + slen;
@@ -824,7 +908,7 @@ wapbl_circ_write(struct wapbl *wl, void 
 #ifdef _KERNEL
 	pbn = btodb(pbn << wl->wl_log_dev_bshift);
 #endif
-	error = wapbl_write(data, len, wl->wl_devvp, pbn);
+	error = wapbl_buffered_write(data, len, wl, pbn);
 	if (error)
 		return error;
 	off += len;
@@ -1967,6 +2051,9 @@ wapbl_write_commit(struct wapbl *wl, off
 	int error;
 	daddr_t pbn;
 
+	error = wapbl_buffered_flush(wl);
+	if (error)
+		return error;
 	/*
 	 * flush disk cache to ensure that blocks we've written are actually
 	 * written to the stable storage before the commit header.
@@ -1998,7 +2085,10 @@ wapbl_write_commit(struct wapbl *wl, off
 #ifdef _KERNEL
 	pbn = btodb(pbn << wc->wc_log_dev_bshift);
 #endif
-	error = wapbl_write(wc, wc->wc_len, wl->wl_devvp, pbn);
+	error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn);
+	if (error)
+		return error;
+	error = wapbl_buffered_flush(wl);
 	if (error)
 		return error;
 

Reply via email to