Module Name:    src
Committed By:   yamt
Date:           Fri Feb 17 08:45:12 UTC 2012

Modified Files:
        src/sys/kern: bufq_priocscan.c
        src/sys/sys: buf.h param.h

Log Message:
BUFQ_PRIOCSCAN:

- to reduce cpu consumption for a long queue, maintain the sorted lists of
  buffers with rbtree instead of TAILQ.  i vaguely remember that the problem
  pointed out by someone on a public mailing list while ago.  sorry, i can't
  remember who and where.

- add some #ifdef'ed out experimental code.

- bump kernel version for struct buf change.


To generate a diff of this commit:
cvs rdiff -u -r1.16 -r1.17 src/sys/kern/bufq_priocscan.c
cvs rdiff -u -r1.118 -r1.119 src/sys/sys/buf.h
cvs rdiff -u -r1.409 -r1.410 src/sys/sys/param.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/bufq_priocscan.c
diff -u src/sys/kern/bufq_priocscan.c:1.16 src/sys/kern/bufq_priocscan.c:1.17
--- src/sys/kern/bufq_priocscan.c:1.16	Mon Nov  7 08:44:16 2011
+++ src/sys/kern/bufq_priocscan.c	Fri Feb 17 08:45:11 2012
@@ -1,7 +1,7 @@
-/*	$NetBSD: bufq_priocscan.c,v 1.16 2011/11/07 08:44:16 hannken Exp $	*/
+/*	$NetBSD: bufq_priocscan.c,v 1.17 2012/02/17 08:45:11 yamt Exp $	*/
 
 /*-
- * Copyright (c)2004,2005,2006,2008,2009 YAMAMOTO Takashi,
+ * Copyright (c)2004,2005,2006,2008,2009,2011,2012 YAMAMOTO Takashi,
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: bufq_priocscan.c,v 1.16 2011/11/07 08:44:16 hannken Exp $");
+__KERNEL_RCSID(0, "$NetBSD: bufq_priocscan.c,v 1.17 2012/02/17 08:45:11 yamt Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -35,99 +35,171 @@ __KERNEL_RCSID(0, "$NetBSD: bufq_priocsc
 #include <sys/bufq.h>
 #include <sys/bufq_impl.h>
 #include <sys/kmem.h>
+#include <sys/rbtree.h>
+
+#undef	PRIOCSCAN_USE_GLOBAL_POSITION
 
 /*
  * Cyclical scan (CSCAN)
  */
-TAILQ_HEAD(bqhead, buf);
+
+struct cscan_key {
+	daddr_t	k_rawblkno;
+	int k_cylinder;
+};
+
 struct cscan_queue {
-	struct bqhead cq_head[2];	/* actual lists of buffers */
-	unsigned int cq_idx;		/* current list index */
-	int cq_lastcylinder;		/* b_cylinder of the last request */
-	daddr_t cq_lastrawblkno;	/* b_rawblkno of the last request */
+	rb_tree_t cq_buffers;		/* ordered list of buffers */
+#if !defined(PRIOCSCAN_USE_GLOBAL_POSITION)
+	struct cscan_key cq_lastkey;	/* key of last request */
+#endif /* !defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
+	int cq_sortby;			/* BUFQ_SORT_MASK */
+	rb_tree_ops_t cq_ops;
 };
 
-static inline int cscan_empty(const struct cscan_queue *);
-static void cscan_put(struct cscan_queue *, struct buf *, int);
-static struct buf *cscan_get(struct cscan_queue *, int);
-static void cscan_init(struct cscan_queue *);
+static signed int
+buf_cmp(const struct buf *b1, const struct buf *b2, int sortby)
+{
 
-static inline int
-cscan_empty(const struct cscan_queue *q)
+	if (buf_inorder(b2, b1, sortby)) {
+		return 1;	/* b1 > b2 */
+	}
+	if (buf_inorder(b1, b2, sortby)) {
+		return -1;	/* b1 < b2 */
+	}
+	return 0;
+}
+
+/* return positive if n1 > n2 */
+static signed int
+cscan_tree_compare_nodes(void *context, const void *n1, const void *n2)
 {
+	const struct cscan_queue * const q = context;
+	const struct buf * const b1 = n1;
+	const struct buf * const b2 = n2;
+	const int sortby = q->cq_sortby;
+	const int diff = buf_cmp(b1, b2, sortby);
+
+	/*
+	 * XXX rawblkno/cylinder might not be unique.  eg. unbuffered i/o
+	 */
 
-	return TAILQ_EMPTY(&q->cq_head[0]) && TAILQ_EMPTY(&q->cq_head[1]);
+	if (diff != 0) {
+		return diff;
+	}
+
+	/*
+	 * XXX rawblkno/cylinder might not be unique.  eg. unbuffered i/o
+	 */
+	if (b1 > b2) {
+		return 1;
+	}
+	if (b1 < b2) {
+		return -1;
+	}
+	return 0;
 }
 
-static void
-cscan_put(struct cscan_queue *q, struct buf *bp, int sortby)
+/* return positive if n1 > k2 */
+static signed int
+cscan_tree_compare_key(void *context, const void *n1, const void *k2)
 {
-	struct buf tmp;
-	struct buf *it;
-	struct bqhead *bqh;
-	unsigned int idx;
+	const struct cscan_queue * const q = context;
+	const struct buf * const b1 = n1;
+	const struct cscan_key * const key = k2;
+	const struct buf tmp = {
+		.b_rawblkno = key->k_rawblkno,
+		.b_cylinder = key->k_cylinder,
+	};
+	const struct buf *b2 = &tmp;
+	const int sortby = q->cq_sortby;
+
+	return buf_cmp(b1, b2, sortby);
+}
+
+static void __unused
+cscan_dump(struct cscan_queue *cq)
+{
+	const int sortby = cq->cq_sortby;
+	struct buf *bp;
 
-	tmp.b_cylinder = q->cq_lastcylinder;
-	tmp.b_rawblkno = q->cq_lastrawblkno;
+	RB_TREE_FOREACH(bp, &cq->cq_buffers) {
+		if (sortby == BUFQ_SORT_RAWBLOCK) {
+			printf(" %jd", (intmax_t)bp->b_rawblkno);
+		} else {
+			printf(" %jd/%jd",
+			    (intmax_t)bp->b_cylinder, (intmax_t)bp->b_rawblkno);
+		}
+	}
+}
 
-	if (buf_inorder(bp, &tmp, sortby))
-		idx = 1 - q->cq_idx;
-	else
-		idx = q->cq_idx;
+static inline bool
+cscan_empty(struct cscan_queue *q)
+{
 
-	bqh = &q->cq_head[idx];
+	/* XXX this might do more work than necessary */
+	return rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT) == NULL;
+}
 
-	TAILQ_FOREACH(it, bqh, b_actq)
-		if (buf_inorder(bp, it, sortby))
-			break;
+static void
+cscan_put(struct cscan_queue *q, struct buf *bp)
+{
+	struct buf *obp;
 
-	if (it != NULL)
-		TAILQ_INSERT_BEFORE(it, bp, b_actq);
-	else
-		TAILQ_INSERT_TAIL(bqh, bp, b_actq);
+	obp = rb_tree_insert_node(&q->cq_buffers, bp);
+	KASSERT(obp == bp); /* see cscan_tree_compare_nodes */
 }
 
 static struct buf *
-cscan_get(struct cscan_queue *q, int remove)
+cscan_get(struct cscan_queue *q, int remove, struct cscan_key *key)
 {
-	unsigned int idx = q->cq_idx;
-	struct bqhead *bqh;
 	struct buf *bp;
 
-	bqh = &q->cq_head[idx];
-	bp = TAILQ_FIRST(bqh);
-
+	bp = rb_tree_find_node_geq(&q->cq_buffers, key);
+	KDASSERT(bp == NULL || cscan_tree_compare_key(q, bp, key) >= 0);
 	if (bp == NULL) {
-		/* switch queue */
-		idx = 1 - idx;
-		bqh = &q->cq_head[idx];
-		bp = TAILQ_FIRST(bqh);
+		bp = rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT);
+		KDASSERT(cscan_tree_compare_key(q, bp, key) < 0);
 	}
-
-	KDASSERT((bp != NULL && !cscan_empty(q)) ||
-	         (bp == NULL && cscan_empty(q)));
-
 	if (bp != NULL && remove) {
-		q->cq_idx = idx;
-		TAILQ_REMOVE(bqh, bp, b_actq);
+#if defined(DEBUG)
+		struct buf *nbp;
+#endif /* defined(DEBUG) */
 
-		q->cq_lastcylinder = bp->b_cylinder;
-		q->cq_lastrawblkno =
-		    bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT);
+		rb_tree_remove_node(&q->cq_buffers, bp);
+		/*
+		 * remember the head position.
+		 */
+		key->k_cylinder = bp->b_cylinder;
+		key->k_rawblkno = bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT);
+#if defined(DEBUG)
+		nbp = rb_tree_find_node_geq(&q->cq_buffers, key);
+		if (nbp != NULL && cscan_tree_compare_nodes(q, nbp, bp) < 0) {
+			panic("%s: wrong order %p < %p\n", __func__,
+			    nbp, bp);
+		}
+#endif /* defined(DEBUG) */
 	}
-
-	return (bp);
+	return bp;
 }
 
 static void
-cscan_init(struct cscan_queue *q)
+cscan_init(struct cscan_queue *q, int sortby)
 {
+	static const rb_tree_ops_t cscan_tree_ops = {
+		.rbto_compare_nodes = cscan_tree_compare_nodes,
+		.rbto_compare_key = cscan_tree_compare_key,
+		.rbto_node_offset = offsetof(struct buf, b_u.u_rbnode),
+		.rbto_context = NULL,
+	};
 
-	TAILQ_INIT(&q->cq_head[0]);
-	TAILQ_INIT(&q->cq_head[1]);
+	q->cq_sortby = sortby;
+	/* XXX copy ops to workaround rbtree.h API limitation */
+	q->cq_ops = cscan_tree_ops;
+	q->cq_ops.rbto_context = q;
+	rb_tree_init(&q->cq_buffers, &q->cq_ops);
 }
 
-
 /*
  * Per-prioritiy CSCAN.
  *
@@ -144,14 +216,13 @@ struct priocscan_queue {
 struct bufq_priocscan {
 	struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE];
 
-#if 0
+#if defined(PRIOCSCAN_USE_GLOBAL_POSITION)
 	/*
 	 * XXX using "global" head position can reduce positioning time
 	 * when switching between queues.
 	 * although it might affect against fairness.
 	 */
-	daddr_t bq_lastrawblkno;
-	int bq_lastcylinder;
+	struct cscan_key bq_lastkey;
 #endif
 };
 
@@ -193,10 +264,9 @@ bufq_priocscan_put(struct bufq_state *bu
 {
 	struct bufq_priocscan *q = bufq->bq_private;
 	struct cscan_queue *cq;
-	const int sortby = bufq->bq_flags & BUFQ_SORT_MASK;
 
 	cq = bufq_priocscan_selectqueue(q, bp);
-	cscan_put(cq, bp, sortby);
+	cscan_put(cq, bp);
 }
 
 static struct buf *
@@ -307,7 +377,13 @@ bufq_priocscan_get(struct bufq_state *bu
 	 * finally, get a request from the selected queue.
 	 */
 	KDASSERT(!cscan_empty(&pq->q_queue));
-	bp = cscan_get(&pq->q_queue, remove);
+	bp = cscan_get(&pq->q_queue, remove,
+#if defined(PRIOCSCAN_USE_GLOBAL_POSITION)
+	    &q->bq_lastkey
+#else /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
+	    &pq->q_queue.cq_lastkey
+#endif /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
+	    );
 	KDASSERT(bp != NULL);
 	KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp));
 
@@ -318,19 +394,20 @@ static struct buf *
 bufq_priocscan_cancel(struct bufq_state *bufq, struct buf *bp)
 {
 	struct bufq_priocscan * const q = bufq->bq_private;
-	unsigned int i, j;
+	unsigned int i;
 
 	for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
 		struct cscan_queue * const cq = &q->bq_queue[i].q_queue;
-		for (j = 0; j < 2; j++) {
-			struct bqhead * const bqh = &cq->cq_head[j];
-			struct buf *bq;
-
-			TAILQ_FOREACH(bq, bqh, b_actq) {
-				if (bq == bp) {
-					TAILQ_REMOVE(bqh, bp, b_actq);
-					return bp;
-				}
+		struct buf *it;
+
+		/*
+		 * XXX probably could be faster but the cancel functionality
+		 * is not widely used anyway.
+		 */
+		RB_TREE_FOREACH(it, &cq->cq_buffers) {
+			if (it == bp) {
+				rb_tree_remove_node(&cq->cq_buffers, bp);
+				return bp;
 			}
 		}
 	}
@@ -349,6 +426,7 @@ static void
 bufq_priocscan_init(struct bufq_state *bufq)
 {
 	struct bufq_priocscan *q;
+	const int sortby = bufq->bq_flags & BUFQ_SORT_MASK;
 	unsigned int i;
 
 	bufq->bq_get = bufq_priocscan_get;
@@ -361,6 +439,6 @@ bufq_priocscan_init(struct bufq_state *b
 	for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
 		struct cscan_queue *cq = &q->bq_queue[i].q_queue;
 
-		cscan_init(cq);
+		cscan_init(cq, sortby);
 	}
 }

Index: src/sys/sys/buf.h
diff -u src/sys/sys/buf.h:1.118 src/sys/sys/buf.h:1.119
--- src/sys/sys/buf.h:1.118	Mon Nov 21 04:36:05 2011
+++ src/sys/sys/buf.h	Fri Feb 17 08:45:11 2012
@@ -1,4 +1,4 @@
-/*     $NetBSD: buf.h,v 1.118 2011/11/21 04:36:05 christos Exp $ */
+/*     $NetBSD: buf.h,v 1.119 2012/02/17 08:45:11 yamt Exp $ */
 
 /*-
  * Copyright (c) 1999, 2000, 2007, 2008 The NetBSD Foundation, Inc.
@@ -73,6 +73,7 @@
 #include <sys/queue.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
+#include <sys/rbtree.h>
 #if defined(_KERNEL)
 #include <sys/workqueue.h>
 #endif /* defined(_KERNEL) */
@@ -103,6 +104,7 @@ extern kmutex_t buffer_lock;
 struct buf {
 	union {
 		TAILQ_ENTRY(buf) u_actq;
+		rb_node_t u_rbnode;
 #if defined(_KERNEL) /* u_work is smaller than u_actq. XXX */
 		struct work u_work;
 #endif /* defined(_KERNEL) */

Index: src/sys/sys/param.h
diff -u src/sys/sys/param.h:1.409 src/sys/sys/param.h:1.410
--- src/sys/sys/param.h:1.409	Wed Feb 15 23:05:02 2012
+++ src/sys/sys/param.h	Fri Feb 17 08:45:12 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: param.h,v 1.409 2012/02/15 23:05:02 riz Exp $	*/
+/*	$NetBSD: param.h,v 1.410 2012/02/17 08:45:12 yamt Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -63,7 +63,7 @@
  *	2.99.9		(299000900)
  */
 
-#define	__NetBSD_Version__	699000100	/* NetBSD 6.99.1 */
+#define	__NetBSD_Version__	699000200	/* NetBSD 6.99.2 */
 
 #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \
     (m) * 1000000) + (p) * 100) <= __NetBSD_Version__)

Reply via email to