Module Name: src Committed By: hannken Date: Sat Dec 8 07:24:42 UTC 2012
Modified Files: src/sys/kern: vfs_wapbl.c Log Message: Try to coalesce writes to the journal in MAXPHYS sized and aligned blocks. Speeds up wapbl_flush() on raid5 by a factor of 3-4. Discussed on tech-kern. Needs pullup to NetBSD-6. To generate a diff of this commit: cvs rdiff -u -r1.53 -r1.54 src/sys/kern/vfs_wapbl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/vfs_wapbl.c diff -u src/sys/kern/vfs_wapbl.c:1.53 src/sys/kern/vfs_wapbl.c:1.54 --- src/sys/kern/vfs_wapbl.c:1.53 Sat Nov 17 10:10:17 2012 +++ src/sys/kern/vfs_wapbl.c Sat Dec 8 07:24:42 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_wapbl.c,v 1.53 2012/11/17 10:10:17 hannken Exp $ */ +/* $NetBSD: vfs_wapbl.c,v 1.54 2012/12/08 07:24:42 hannken Exp $ */ /*- * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc. @@ -36,7 +36,7 @@ #define WAPBL_INTERNAL #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.53 2012/11/17 10:10:17 hannken Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.54 2012/12/08 07:24:42 hannken Exp $"); #include <sys/param.h> #include <sys/bitops.h> @@ -184,6 +184,10 @@ struct wapbl { SIMPLEQ_HEAD(, wapbl_entry) wl_entries; /* On disk transaction accounting */ + + u_char *wl_buffer; /* l: buffer for wapbl_buffered_write() */ + daddr_t wl_buffer_dblk; /* l: buffer disk block address */ + size_t wl_buffer_used; /* l: buffer current use */ }; #ifdef WAPBL_DEBUG_PRINT @@ -489,6 +493,9 @@ wapbl_start(struct wapbl ** wlp, struct wl->wl_dealloclens = wapbl_alloc(sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim); + wl->wl_buffer = wapbl_alloc(MAXPHYS); + wl->wl_buffer_used = 0; + wapbl_inodetrk_init(wl, WAPBL_INODETRK_SIZE); /* Initialize the commit header */ @@ -537,6 +544,7 @@ wapbl_start(struct wapbl ** wlp, struct sizeof(*wl->wl_deallocblks) * wl->wl_dealloclim); wapbl_free(wl->wl_dealloclens, sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim); + wapbl_free(wl->wl_buffer, MAXPHYS); wapbl_inodetrk_free(wl); wapbl_free(wl, sizeof(*wl)); @@ -716,6 +724,7 @@ wapbl_stop(struct wapbl *wl, int force) sizeof(*wl->wl_deallocblks) * wl->wl_dealloclim); wapbl_free(wl->wl_dealloclens, sizeof(*wl->wl_dealloclens) * wl->wl_dealloclim); + wapbl_free(wl->wl_buffer, MAXPHYS); wapbl_inodetrk_free(wl); cv_destroy(&wl->wl_reclaimable_cv); @@ -791,6 +800,81 @@ wapbl_read(void *data, size_t len, struc } /* + * Flush buffered data if any. + */ +static int +wapbl_buffered_flush(struct wapbl *wl) +{ + int error; + + if (wl->wl_buffer_used == 0) + return 0; + + error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used, + wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE); + wl->wl_buffer_used = 0; + + return error; +} + +/* + * Write data to the log. + * Try to coalesce writes and emit MAXPHYS aligned blocks. + */ +static int +wapbl_buffered_write(void *data, size_t len, struct wapbl *wl, daddr_t pbn) +{ + int error; + size_t resid; + + /* + * If not adjacent to buffered data flush first. Disk block + * address is always valid for non-empty buffer. + */ + if (wl->wl_buffer_used > 0 && + pbn != wl->wl_buffer_dblk + btodb(wl->wl_buffer_used)) { + error = wapbl_buffered_flush(wl); + if (error) + return error; + } + /* + * If this write goes to an empty buffer we have to + * save the disk block address first. + */ + if (wl->wl_buffer_used == 0) + wl->wl_buffer_dblk = pbn; + /* + * Remaining space so this buffer ends on a MAXPHYS boundary. + * + * Cannot become less or equal zero as the buffer would have been + * flushed on the last call then. + */ + resid = MAXPHYS - dbtob(wl->wl_buffer_dblk % btodb(MAXPHYS)) - + wl->wl_buffer_used; + KASSERT(resid > 0); + KASSERT(dbtob(btodb(resid)) == resid); + if (len >= resid) { + memcpy(wl->wl_buffer + wl->wl_buffer_used, data, resid); + wl->wl_buffer_used += resid; + error = wapbl_doio(wl->wl_buffer, wl->wl_buffer_used, + wl->wl_devvp, wl->wl_buffer_dblk, B_WRITE); + data = (uint8_t *)data + resid; + len -= resid; + wl->wl_buffer_dblk = pbn + btodb(resid); + wl->wl_buffer_used = 0; + if (error) + return error; + } + KASSERT(len < MAXPHYS); + if (len > 0) { + memcpy(wl->wl_buffer + wl->wl_buffer_used, data, len); + wl->wl_buffer_used += len; + } + + return 0; +} + +/* * Off is byte offset returns new offset for next write * handles log wraparound */ @@ -813,7 +897,7 @@ wapbl_circ_write(struct wapbl *wl, void #ifdef _KERNEL pbn = btodb(pbn << wl->wl_log_dev_bshift); #endif - error = wapbl_write(data, slen, wl->wl_devvp, pbn); + error = wapbl_buffered_write(data, slen, wl, pbn); if (error) return error; data = (uint8_t *)data + slen; @@ -824,7 +908,7 @@ wapbl_circ_write(struct wapbl *wl, void #ifdef _KERNEL pbn = btodb(pbn << wl->wl_log_dev_bshift); #endif - error = wapbl_write(data, len, wl->wl_devvp, pbn); + error = wapbl_buffered_write(data, len, wl, pbn); if (error) return error; off += len; @@ -1967,6 +2051,9 @@ wapbl_write_commit(struct wapbl *wl, off int error; daddr_t pbn; + error = wapbl_buffered_flush(wl); + if (error) + return error; /* * flush disk cache to ensure that blocks we've written are actually * written to the stable storage before the commit header. @@ -1998,7 +2085,10 @@ wapbl_write_commit(struct wapbl *wl, off #ifdef _KERNEL pbn = btodb(pbn << wc->wc_log_dev_bshift); #endif - error = wapbl_write(wc, wc->wc_len, wl->wl_devvp, pbn); + error = wapbl_buffered_write(wc, wc->wc_len, wl, pbn); + if (error) + return error; + error = wapbl_buffered_flush(wl); if (error) return error;