An emphatic ok from me for this one.
On Wed, Nov 20, 2013 at 3:21 AM, David Gwynne <da...@gwynne.id.au> wrote:
> the subject says it all really. this is sort of inspired by 5d2ecd5224
> in bitrig except this brings all the architectures and device drivers
> forward (i didnt get to delete any to prepare for this), and maintains
> an algorithm for trying to order io on spinning rust (nscan).
>
> ok?
>
> Index: kern/kern_bufq.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_bufq.c,v
> retrieving revision 1.25
> diff -u -p -r1.25 kern_bufq.c
> --- kern/kern_bufq.c 10 Apr 2013 01:35:55 -0000 1.25
> +++ kern/kern_bufq.c 20 Nov 2013 10:02:47 -0000
> @@ -42,13 +42,6 @@ struct bufq_impl {
> int (*impl_peek)(void *);
> };
>
> -void *bufq_disksort_create(void);
> -void bufq_disksort_destroy(void *);
> -void bufq_disksort_queue(void *, struct buf *);
> -struct buf *bufq_disksort_dequeue(void *);
> -void bufq_disksort_requeue(void *, struct buf *);
> -int bufq_disksort_peek(void *);
> -
> void *bufq_fifo_create(void);
> void bufq_fifo_destroy(void *);
> void bufq_fifo_queue(void *, struct buf *);
> @@ -65,14 +58,6 @@ int bufq_nscan_peek(void *);
>
> const struct bufq_impl bufq_impls[BUFQ_HOWMANY] = {
> {
> - bufq_disksort_create,
> - bufq_disksort_destroy,
> - bufq_disksort_queue,
> - bufq_disksort_dequeue,
> - bufq_disksort_requeue,
> - bufq_disksort_peek
> - },
> - {
> bufq_fifo_create,
> bufq_fifo_destroy,
> bufq_fifo_queue,
> @@ -323,61 +308,6 @@ bufq_restart(void)
> mtx_leave(&bufqs_mtx);
> }
>
> -/*
> - * disksort implementation.
> - */
> -
> -void *
> -bufq_disksort_create(void)
> -{
> - return (malloc(sizeof(struct buf), M_DEVBUF, M_NOWAIT | M_ZERO));
> -}
> -
> -void
> -bufq_disksort_destroy(void *data)
> -{
> - free(data, M_DEVBUF);
> -}
> -
> -void
> -bufq_disksort_queue(void *data, struct buf *bp)
> -{
> - disksort((struct buf *)data, bp);
> -}
> -
> -struct buf *
> -bufq_disksort_dequeue(void *data)
> -{
> - struct buf *bufq = data;
> - struct buf *bp;
> -
> - bp = bufq->b_actf;
> - if (bp != NULL)
> - bufq->b_actf = bp->b_actf;
> - if (bufq->b_actf == NULL)
> - bufq->b_actb = &bufq->b_actf;
> -
> - return (bp);
> -}
> -
> -void
> -bufq_disksort_requeue(void *data, struct buf *bp)
> -{
> - struct buf *bufq = data;
> -
> - bp->b_actf = bufq->b_actf;
> - bufq->b_actf = bp;
> - if (bp->b_actf == NULL)
> - bufq->b_actb = &bp->b_actf;
> -}
> -
> -int
> -bufq_disksort_peek(void *data)
> -{
> - struct buf *bufq = data;
> -
> - return (bufq->b_actf != NULL);
> -}
>
> /*
> * fifo implementation
> @@ -444,9 +374,7 @@ bufq_fifo_peek(void *data)
> * nscan implementation
> */
>
> -#define BUF_INORDER(ba, bb) \
> - (((ba)->b_cylinder < (bb)->b_cylinder) || \
> - ((ba)->b_cylinder == (bb)->b_cylinder && (ba)->b_blkno < (bb)->b_blkno))
> +#define BUF_INORDER(ba, bb) ((ba)->b_blkno < (bb)->b_blkno)
>
> #define dsentries b_bufq.bufq_data_nscan.bqf_entries
>
> Index: kern/subr_disk.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/subr_disk.c,v
> retrieving revision 1.158
> diff -u -p -r1.158 subr_disk.c
> --- kern/subr_disk.c 18 Nov 2013 17:45:01 -0000 1.158
> +++ kern/subr_disk.c 20 Nov 2013 10:02:47 -0000
> @@ -91,99 +91,6 @@ void sr_map_root(void);
> void disk_attach_callback(void *, void *);
>
> /*
> - * Seek sort for disks. We depend on the driver which calls us using b_resid
> - * as the current cylinder number.
> - *
> - * The argument ap structure holds a b_actf activity chain pointer on which
> we
> - * keep two queues, sorted in ascending cylinder order. The first queue
> holds
> - * those requests which are positioned after the current cylinder (in the
> first
> - * request); the second holds requests which came in after their cylinder
> number
> - * was passed. Thus we implement a one way scan, retracting after reaching
> the
> - * end of the drive to the first request on the second queue, at which time
> it
> - * becomes the first queue.
> - *
> - * A one-way scan is natural because of the way UNIX read-ahead blocks are
> - * allocated.
> - */
> -
> -void
> -disksort(struct buf *ap, struct buf *bp)
> -{
> - struct buf *bq;
> -
> - /* If the queue is empty, then it's easy. */
> - if (ap->b_actf == NULL) {
> - bp->b_actf = NULL;
> - ap->b_actf = bp;
> - return;
> - }
> -
> - /*
> - * If we lie after the first (currently active) request, then we
> - * must locate the second request list and add ourselves to it.
> - */
> - bq = ap->b_actf;
> - if (bp->b_cylinder < bq->b_cylinder) {
> - while (bq->b_actf) {
> - /*
> - * Check for an ``inversion'' in the normally
> ascending
> - * cylinder numbers, indicating the start of the
> second
> - * request list.
> - */
> - if (bq->b_actf->b_cylinder < bq->b_cylinder) {
> - /*
> - * Search the second request list for the
> first
> - * request at a larger cylinder number. We go
> - * before that; if there is no such request,
> we
> - * go at end.
> - */
> - do {
> - if (bp->b_cylinder <
> - bq->b_actf->b_cylinder)
> - goto insert;
> - if (bp->b_cylinder ==
> - bq->b_actf->b_cylinder &&
> - bp->b_blkno < bq->b_actf->b_blkno)
> - goto insert;
> - bq = bq->b_actf;
> - } while (bq->b_actf);
> - goto insert; /* after last */
> - }
> - bq = bq->b_actf;
> - }
> - /*
> - * No inversions... we will go after the last, and
> - * be the first request in the second request list.
> - */
> - goto insert;
> - }
> - /*
> - * Request is at/after the current request...
> - * sort in the first request list.
> - */
> - while (bq->b_actf) {
> - /*
> - * We want to go after the current request if there is an
> - * inversion after it (i.e. it is the end of the first
> - * request list), or if the next request is a larger cylinder
> - * than our request.
> - */
> - if (bq->b_actf->b_cylinder < bq->b_cylinder ||
> - bp->b_cylinder < bq->b_actf->b_cylinder ||
> - (bp->b_cylinder == bq->b_actf->b_cylinder &&
> - bp->b_blkno < bq->b_actf->b_blkno))
> - goto insert;
> - bq = bq->b_actf;
> - }
> - /*
> - * Neither a second list nor a larger request... we go at the end of
> - * the first list, which is the same as the end of the whole schebang.
> - */
> -insert: bp->b_actf = bq->b_actf;
> - bq->b_actf = bp;
> -}
> -
> -/*
> * Compute checksum for disk label.
> */
> u_int
> @@ -720,9 +627,6 @@ bounds_check_with_label(struct buf *bp,
> bp->b_bcount = sz << DEV_BSHIFT;
> }
>
> - /* calculate cylinder for disksort to order transfers with */
> - bp->b_cylinder = (bp->b_blkno + DL_SECTOBLK(lp, DL_GETPOFFSET(p))) /
> - DL_SECTOBLK(lp, lp->d_secpercyl);
> return (0);
>
> bad:
> Index: sys/buf.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/buf.h,v
> retrieving revision 1.90
> diff -u -p -r1.90 buf.h
> --- sys/buf.h 6 Nov 2013 07:46:31 -0000 1.90
> +++ sys/buf.h 20 Nov 2013 10:02:47 -0000
> @@ -63,11 +63,10 @@ LIST_HEAD(workhead, worklist);
> * Buffer queues
> */
> #define BUFQ_NSCAN_N 128
> -#define BUFQ_DISKSORT 0
> -#define BUFQ_FIFO 1
> -#define BUFQ_NSCAN 2
> +#define BUFQ_FIFO 0
> +#define BUFQ_NSCAN 1
> #define BUFQ_DEFAULT BUFQ_NSCAN
> -#define BUFQ_HOWMANY 3
> +#define BUFQ_HOWMANY 2
>
> /*
> * Write limits for bufq - defines high and low water marks for how
>