Module Name: src Committed By: yamt Date: Fri Feb 17 08:45:12 UTC 2012
Modified Files: src/sys/kern: bufq_priocscan.c src/sys/sys: buf.h param.h Log Message: BUFQ_PRIOCSCAN: - to reduce cpu consumption for a long queue, maintain the sorted lists of buffers with rbtree instead of TAILQ. i vaguely remember that the problem pointed out by someone on a public mailing list while ago. sorry, i can't remember who and where. - add some #ifdef'ed out experimental code. - bump kernel version for struct buf change. To generate a diff of this commit: cvs rdiff -u -r1.16 -r1.17 src/sys/kern/bufq_priocscan.c cvs rdiff -u -r1.118 -r1.119 src/sys/sys/buf.h cvs rdiff -u -r1.409 -r1.410 src/sys/sys/param.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/bufq_priocscan.c diff -u src/sys/kern/bufq_priocscan.c:1.16 src/sys/kern/bufq_priocscan.c:1.17 --- src/sys/kern/bufq_priocscan.c:1.16 Mon Nov 7 08:44:16 2011 +++ src/sys/kern/bufq_priocscan.c Fri Feb 17 08:45:11 2012 @@ -1,7 +1,7 @@ -/* $NetBSD: bufq_priocscan.c,v 1.16 2011/11/07 08:44:16 hannken Exp $ */ +/* $NetBSD: bufq_priocscan.c,v 1.17 2012/02/17 08:45:11 yamt Exp $ */ /*- - * Copyright (c)2004,2005,2006,2008,2009 YAMAMOTO Takashi, + * Copyright (c)2004,2005,2006,2008,2009,2011,2012 YAMAMOTO Takashi, * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: bufq_priocscan.c,v 1.16 2011/11/07 08:44:16 hannken Exp $"); +__KERNEL_RCSID(0, "$NetBSD: bufq_priocscan.c,v 1.17 2012/02/17 08:45:11 yamt Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -35,99 +35,171 @@ __KERNEL_RCSID(0, "$NetBSD: bufq_priocsc #include <sys/bufq.h> #include <sys/bufq_impl.h> #include <sys/kmem.h> +#include <sys/rbtree.h> + +#undef PRIOCSCAN_USE_GLOBAL_POSITION /* * Cyclical scan (CSCAN) */ -TAILQ_HEAD(bqhead, buf); + +struct cscan_key { + daddr_t k_rawblkno; + int k_cylinder; +}; + struct cscan_queue { - struct bqhead cq_head[2]; /* actual lists of buffers */ - unsigned int cq_idx; /* current list index */ - int cq_lastcylinder; /* b_cylinder of the last request */ - daddr_t cq_lastrawblkno; /* b_rawblkno of the last request */ + rb_tree_t cq_buffers; /* ordered list of buffers */ +#if !defined(PRIOCSCAN_USE_GLOBAL_POSITION) + struct cscan_key cq_lastkey; /* key of last request */ +#endif /* !defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ + int cq_sortby; /* BUFQ_SORT_MASK */ + rb_tree_ops_t cq_ops; }; -static inline int cscan_empty(const struct cscan_queue *); -static void cscan_put(struct cscan_queue *, struct buf *, int); -static struct buf *cscan_get(struct cscan_queue *, int); -static void cscan_init(struct cscan_queue *); +static signed int +buf_cmp(const struct buf *b1, const struct buf *b2, int sortby) +{ -static inline int -cscan_empty(const struct cscan_queue *q) + if (buf_inorder(b2, b1, sortby)) { + return 1; /* b1 > b2 */ + } + if (buf_inorder(b1, b2, sortby)) { + return -1; /* b1 < b2 */ + } + return 0; +} + +/* return positive if n1 > n2 */ +static signed int +cscan_tree_compare_nodes(void *context, const void *n1, const void *n2) { + const struct cscan_queue * const q = context; + const struct buf * const b1 = n1; + const struct buf * const b2 = n2; + const int sortby = q->cq_sortby; + const int diff = buf_cmp(b1, b2, sortby); + + /* + * XXX rawblkno/cylinder might not be unique. eg. unbuffered i/o + */ - return TAILQ_EMPTY(&q->cq_head[0]) && TAILQ_EMPTY(&q->cq_head[1]); + if (diff != 0) { + return diff; + } + + /* + * XXX rawblkno/cylinder might not be unique. eg. unbuffered i/o + */ + if (b1 > b2) { + return 1; + } + if (b1 < b2) { + return -1; + } + return 0; } -static void -cscan_put(struct cscan_queue *q, struct buf *bp, int sortby) +/* return positive if n1 > k2 */ +static signed int +cscan_tree_compare_key(void *context, const void *n1, const void *k2) { - struct buf tmp; - struct buf *it; - struct bqhead *bqh; - unsigned int idx; + const struct cscan_queue * const q = context; + const struct buf * const b1 = n1; + const struct cscan_key * const key = k2; + const struct buf tmp = { + .b_rawblkno = key->k_rawblkno, + .b_cylinder = key->k_cylinder, + }; + const struct buf *b2 = &tmp; + const int sortby = q->cq_sortby; + + return buf_cmp(b1, b2, sortby); +} + +static void __unused +cscan_dump(struct cscan_queue *cq) +{ + const int sortby = cq->cq_sortby; + struct buf *bp; - tmp.b_cylinder = q->cq_lastcylinder; - tmp.b_rawblkno = q->cq_lastrawblkno; + RB_TREE_FOREACH(bp, &cq->cq_buffers) { + if (sortby == BUFQ_SORT_RAWBLOCK) { + printf(" %jd", (intmax_t)bp->b_rawblkno); + } else { + printf(" %jd/%jd", + (intmax_t)bp->b_cylinder, (intmax_t)bp->b_rawblkno); + } + } +} - if (buf_inorder(bp, &tmp, sortby)) - idx = 1 - q->cq_idx; - else - idx = q->cq_idx; +static inline bool +cscan_empty(struct cscan_queue *q) +{ - bqh = &q->cq_head[idx]; + /* XXX this might do more work than necessary */ + return rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT) == NULL; +} - TAILQ_FOREACH(it, bqh, b_actq) - if (buf_inorder(bp, it, sortby)) - break; +static void +cscan_put(struct cscan_queue *q, struct buf *bp) +{ + struct buf *obp; - if (it != NULL) - TAILQ_INSERT_BEFORE(it, bp, b_actq); - else - TAILQ_INSERT_TAIL(bqh, bp, b_actq); + obp = rb_tree_insert_node(&q->cq_buffers, bp); + KASSERT(obp == bp); /* see cscan_tree_compare_nodes */ } static struct buf * -cscan_get(struct cscan_queue *q, int remove) +cscan_get(struct cscan_queue *q, int remove, struct cscan_key *key) { - unsigned int idx = q->cq_idx; - struct bqhead *bqh; struct buf *bp; - bqh = &q->cq_head[idx]; - bp = TAILQ_FIRST(bqh); - + bp = rb_tree_find_node_geq(&q->cq_buffers, key); + KDASSERT(bp == NULL || cscan_tree_compare_key(q, bp, key) >= 0); if (bp == NULL) { - /* switch queue */ - idx = 1 - idx; - bqh = &q->cq_head[idx]; - bp = TAILQ_FIRST(bqh); + bp = rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT); + KDASSERT(cscan_tree_compare_key(q, bp, key) < 0); } - - KDASSERT((bp != NULL && !cscan_empty(q)) || - (bp == NULL && cscan_empty(q))); - if (bp != NULL && remove) { - q->cq_idx = idx; - TAILQ_REMOVE(bqh, bp, b_actq); +#if defined(DEBUG) + struct buf *nbp; +#endif /* defined(DEBUG) */ - q->cq_lastcylinder = bp->b_cylinder; - q->cq_lastrawblkno = - bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT); + rb_tree_remove_node(&q->cq_buffers, bp); + /* + * remember the head position. + */ + key->k_cylinder = bp->b_cylinder; + key->k_rawblkno = bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT); +#if defined(DEBUG) + nbp = rb_tree_find_node_geq(&q->cq_buffers, key); + if (nbp != NULL && cscan_tree_compare_nodes(q, nbp, bp) < 0) { + panic("%s: wrong order %p < %p\n", __func__, + nbp, bp); + } +#endif /* defined(DEBUG) */ } - - return (bp); + return bp; } static void -cscan_init(struct cscan_queue *q) +cscan_init(struct cscan_queue *q, int sortby) { + static const rb_tree_ops_t cscan_tree_ops = { + .rbto_compare_nodes = cscan_tree_compare_nodes, + .rbto_compare_key = cscan_tree_compare_key, + .rbto_node_offset = offsetof(struct buf, b_u.u_rbnode), + .rbto_context = NULL, + }; - TAILQ_INIT(&q->cq_head[0]); - TAILQ_INIT(&q->cq_head[1]); + q->cq_sortby = sortby; + /* XXX copy ops to workaround rbtree.h API limitation */ + q->cq_ops = cscan_tree_ops; + q->cq_ops.rbto_context = q; + rb_tree_init(&q->cq_buffers, &q->cq_ops); } - /* * Per-prioritiy CSCAN. * @@ -144,14 +216,13 @@ struct priocscan_queue { struct bufq_priocscan { struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE]; -#if 0 +#if defined(PRIOCSCAN_USE_GLOBAL_POSITION) /* * XXX using "global" head position can reduce positioning time * when switching between queues. * although it might affect against fairness. */ - daddr_t bq_lastrawblkno; - int bq_lastcylinder; + struct cscan_key bq_lastkey; #endif }; @@ -193,10 +264,9 @@ bufq_priocscan_put(struct bufq_state *bu { struct bufq_priocscan *q = bufq->bq_private; struct cscan_queue *cq; - const int sortby = bufq->bq_flags & BUFQ_SORT_MASK; cq = bufq_priocscan_selectqueue(q, bp); - cscan_put(cq, bp, sortby); + cscan_put(cq, bp); } static struct buf * @@ -307,7 +377,13 @@ bufq_priocscan_get(struct bufq_state *bu * finally, get a request from the selected queue. */ KDASSERT(!cscan_empty(&pq->q_queue)); - bp = cscan_get(&pq->q_queue, remove); + bp = cscan_get(&pq->q_queue, remove, +#if defined(PRIOCSCAN_USE_GLOBAL_POSITION) + &q->bq_lastkey +#else /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ + &pq->q_queue.cq_lastkey +#endif /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */ + ); KDASSERT(bp != NULL); KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp)); @@ -318,19 +394,20 @@ static struct buf * bufq_priocscan_cancel(struct bufq_state *bufq, struct buf *bp) { struct bufq_priocscan * const q = bufq->bq_private; - unsigned int i, j; + unsigned int i; for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { struct cscan_queue * const cq = &q->bq_queue[i].q_queue; - for (j = 0; j < 2; j++) { - struct bqhead * const bqh = &cq->cq_head[j]; - struct buf *bq; - - TAILQ_FOREACH(bq, bqh, b_actq) { - if (bq == bp) { - TAILQ_REMOVE(bqh, bp, b_actq); - return bp; - } + struct buf *it; + + /* + * XXX probably could be faster but the cancel functionality + * is not widely used anyway. + */ + RB_TREE_FOREACH(it, &cq->cq_buffers) { + if (it == bp) { + rb_tree_remove_node(&cq->cq_buffers, bp); + return bp; } } } @@ -349,6 +426,7 @@ static void bufq_priocscan_init(struct bufq_state *bufq) { struct bufq_priocscan *q; + const int sortby = bufq->bq_flags & BUFQ_SORT_MASK; unsigned int i; bufq->bq_get = bufq_priocscan_get; @@ -361,6 +439,6 @@ bufq_priocscan_init(struct bufq_state *b for (i = 0; i < PRIOCSCAN_NQUEUE; i++) { struct cscan_queue *cq = &q->bq_queue[i].q_queue; - cscan_init(cq); + cscan_init(cq, sortby); } } Index: src/sys/sys/buf.h diff -u src/sys/sys/buf.h:1.118 src/sys/sys/buf.h:1.119 --- src/sys/sys/buf.h:1.118 Mon Nov 21 04:36:05 2011 +++ src/sys/sys/buf.h Fri Feb 17 08:45:11 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: buf.h,v 1.118 2011/11/21 04:36:05 christos Exp $ */ +/* $NetBSD: buf.h,v 1.119 2012/02/17 08:45:11 yamt Exp $ */ /*- * Copyright (c) 1999, 2000, 2007, 2008 The NetBSD Foundation, Inc. @@ -73,6 +73,7 @@ #include <sys/queue.h> #include <sys/mutex.h> #include <sys/condvar.h> +#include <sys/rbtree.h> #if defined(_KERNEL) #include <sys/workqueue.h> #endif /* defined(_KERNEL) */ @@ -103,6 +104,7 @@ extern kmutex_t buffer_lock; struct buf { union { TAILQ_ENTRY(buf) u_actq; + rb_node_t u_rbnode; #if defined(_KERNEL) /* u_work is smaller than u_actq. XXX */ struct work u_work; #endif /* defined(_KERNEL) */ Index: src/sys/sys/param.h diff -u src/sys/sys/param.h:1.409 src/sys/sys/param.h:1.410 --- src/sys/sys/param.h:1.409 Wed Feb 15 23:05:02 2012 +++ src/sys/sys/param.h Fri Feb 17 08:45:12 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: param.h,v 1.409 2012/02/15 23:05:02 riz Exp $ */ +/* $NetBSD: param.h,v 1.410 2012/02/17 08:45:12 yamt Exp $ */ /*- * Copyright (c) 1982, 1986, 1989, 1993 @@ -63,7 +63,7 @@ * 2.99.9 (299000900) */ -#define __NetBSD_Version__ 699000100 /* NetBSD 6.99.1 */ +#define __NetBSD_Version__ 699000200 /* NetBSD 6.99.2 */ #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \ (m) * 1000000) + (p) * 100) <= __NetBSD_Version__)