This patch is an examle of block device stacking at request level,
showing the necessity of blk_end_request() and how the new
rq->end_io() hook is used.
Request-based dm itself is still under development and not ready
for inclusion.

This patch adds request-based dm feature to dm core.
Request-based dm hooks clone's ->end_io() to check errors of clone
returned from device drivers.  (See clone_end_request())

# Currently, request-based dm can be turned on by ioctl at dm device
# creation time, so the userspace patches are needed.
# The ioctl from userspace is ignored if kernel doesn't support it,
# so please update userspace tools first when you try this.
# (If kernel was updated first, you would hit kernel panic.)

Signed-off-by: Kiyoshi Ueda <[EMAIL PROTECTED]>
Signed-off-by: Jun'ichi Nomura <[EMAIL PROTECTED]>
---
 block/ll_rw_blk.c             |    9
 drivers/md/dm-hw-handler.h    |    1
 drivers/md/dm-ioctl.c         |    5
 drivers/md/dm-table.c         |   23 +
 drivers/md/dm.c               |  514 
+++++++++++++++++++++++++++++++++++++++++- drivers/md/dm.h               |   13 
+
 drivers/scsi/scsi_lib.c       |   38 +++
 include/linux/blkdev.h        |    6
 include/linux/device-mapper.h |   35 ++
 include/linux/dm-ioctl.h      |    9
 10 files changed, 638 insertions(+), 15 deletions(-)

diff -rupN 07-change-end-io/block/ll_rw_blk.c a1-rqdm-core/block/ll_rw_blk.c
--- 07-change-end-io/block/ll_rw_blk.c  2007-08-24 12:31:41.000000000 -0400
+++ a1-rqdm-core/block/ll_rw_blk.c      2007-08-29 13:53:12.000000000 -0400
@@ -177,6 +177,13 @@ void blk_queue_softirq_done(struct reque
 
 EXPORT_SYMBOL(blk_queue_softirq_done);
 
+void blk_queue_device_congested(struct request_queue *q, device_congested_fn 
*fn)
+{
+       q->device_congested_fn = fn;
+}
+
+EXPORT_SYMBOL_GPL(blk_queue_device_congested);
+
 /**
  * blk_queue_make_request - define an alternate make_request function for a 
device
  * @q:  the request queue for the device to be affected
@@ -3692,7 +3699,7 @@ int blk_end_io(struct request *rq, int u
        struct request_queue *q = rq->q;
        unsigned long flags = 0UL;
 
-       if (blk_fs_request(rq) || blk_pc_request(rq)) {
+       if ((blk_fs_request(rq) || blk_pc_request(rq)) && !blk_cloned_rq(rq)) {
                if (__end_that_request_first(rq, uptodate, nr_bytes))
                        return 1;
        }
diff -rupN 07-change-end-io/drivers/md/dm.c a1-rqdm-core/drivers/md/dm.c
--- 07-change-end-io/drivers/md/dm.c    2007-08-13 00:25:24.000000000 -0400
+++ a1-rqdm-core/drivers/md/dm.c        2007-08-30 11:19:30.000000000 -0400
@@ -51,6 +51,22 @@ struct dm_target_io {
        union map_info info;
 };
 
+/*
+ * For request based dm.
+ * One of these is allocated per request.
+ *
+ * Since assuming "original request : cloned request = 1 : 1" and
+ * a counter for number of clones like struct dm_io.io_count isn't needed,
+ * struct dm_io and struct target_io can merge.
+ */
+struct dm_rq_target_io {
+       struct mapped_device *md;
+       int error;
+       struct request *rq;
+       struct dm_target *ti;
+       union map_info info;
+};
+
 union map_info *dm_get_mapinfo(struct bio *bio)
 {
        if (bio && bio->bi_private)
@@ -58,6 +74,14 @@ union map_info *dm_get_mapinfo(struct bi
        return NULL;
 }
 
+union map_info *dm_get_rq_mapinfo(struct request *rq)
+{
+       if (rq && rq->end_io_data)
+               return &((struct dm_rq_target_io *)rq->end_io_data)->info;
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
+
 #define MINOR_ALLOCED ((void *)-1)
 
 /*
@@ -70,6 +94,13 @@ union map_info *dm_get_mapinfo(struct bi
 #define DMF_DELETING 4
 #define DMF_NOFLUSH_SUSPENDING 5
 
+/*
+ * Bits for the md->features field.
+ */
+#define DM_FEAT_REQUEST_BASE (1 << 0)
+
+#define dm_feat_rq_base(md) ((md)->features & DM_FEAT_REQUEST_BASE)
+
 struct mapped_device {
        struct rw_semaphore io_lock;
        struct semaphore suspend_lock;
@@ -79,6 +110,7 @@ struct mapped_device {
        atomic_t open_count;
 
        unsigned long flags;
+       unsigned long features;
 
        struct request_queue *queue;
        struct gendisk *disk;
@@ -121,11 +153,16 @@ struct mapped_device {
 
        /* forced geometry settings */
        struct hd_geometry geometry;
+
+       /* For saving the address of __make_request for request based dm */
+       make_request_fn *saved_make_request_fn;
 };
 
 #define MIN_IOS 256
 static struct kmem_cache *_io_cache;
 static struct kmem_cache *_tio_cache;
+static struct kmem_cache *_rq_cache;     /* clone pool for request-based dm */
+static struct kmem_cache *_rq_tio_cache; /* target_io pool for request-based 
dm */
 
 static int __init local_init(void)
 {
@@ -143,9 +180,27 @@ static int __init local_init(void)
                return -ENOMEM;
        }
 
+       _rq_cache = kmem_cache_create("dm_rq", sizeof(struct request),
+                                     0, 0, NULL);
+       if (!_rq_cache) {
+               kmem_cache_destroy(_tio_cache);
+               kmem_cache_destroy(_io_cache);
+               return -ENOMEM;
+       }
+
+       _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
+       if (!_rq_tio_cache) {
+               kmem_cache_destroy(_rq_cache);
+               kmem_cache_destroy(_tio_cache);
+               kmem_cache_destroy(_io_cache);
+               return -ENOMEM;
+       }
+
        _major = major;
        r = register_blkdev(_major, _name);
        if (r < 0) {
+               kmem_cache_destroy(_rq_tio_cache);
+               kmem_cache_destroy(_rq_cache);
                kmem_cache_destroy(_tio_cache);
                kmem_cache_destroy(_io_cache);
                return r;
@@ -159,6 +214,8 @@ static int __init local_init(void)
 
 static void local_exit(void)
 {
+       kmem_cache_destroy(_rq_tio_cache);
+       kmem_cache_destroy(_rq_cache);
        kmem_cache_destroy(_tio_cache);
        kmem_cache_destroy(_io_cache);
        unregister_blkdev(_major, _name);
@@ -341,6 +398,27 @@ static void free_tio(struct mapped_devic
        mempool_free(tio, md->tio_pool);
 }
 
+static inline struct request *alloc_rq(struct mapped_device *md)
+{
+       return mempool_alloc(md->io_pool, GFP_ATOMIC);
+}
+
+static inline void free_rq(struct mapped_device *md, struct request *rq)
+{
+       mempool_free(rq, md->io_pool);
+}
+
+static inline struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md)
+{
+       return mempool_alloc(md->tio_pool, GFP_ATOMIC);
+}
+
+static inline void free_rq_tio(struct mapped_device *md,
+                              struct dm_rq_target_io *tio)
+{
+       mempool_free(tio, md->tio_pool);
+}
+
 static void start_io_acct(struct dm_io *io)
 {
        struct mapped_device *md = io->md;
@@ -434,6 +512,93 @@ int dm_set_geometry(struct mapped_device
        return 0;
 }
 
+/*
+ * The queue is only valid as long as you have a reference
+ * count on 'md'.
+ */
+struct request_queue *dm_get_queue(struct mapped_device *md)
+{
+       if (blk_get_queue(md->queue))
+               return NULL;
+
+       return md->queue;
+}
+EXPORT_SYMBOL_GPL(dm_get_queue);
+
+void dm_put_queue(struct request_queue *q)
+{
+       blk_put_queue(q);
+}
+EXPORT_SYMBOL_GPL(dm_put_queue);
+
+void dm_stop_queue(struct request_queue *q)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       blk_stop_queue(q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(dm_stop_queue);
+
+void dm_start_queue(struct request_queue *q)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       if (blk_queue_stopped(q))
+               blk_start_queue(q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(dm_start_queue);
+
+void __dm_requeue_request(struct request_queue *q, struct request *rq)
+{
+       if (elv_queue_empty(q))
+               blk_plug_device(q);
+       blk_requeue_request(q, rq);
+}
+EXPORT_SYMBOL_GPL(__dm_requeue_request);
+
+void dm_requeue_request(struct request_queue *q, struct request *rq)
+{
+       unsigned long flags = 0UL;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       __dm_requeue_request(q, rq);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(dm_requeue_request);
+
+void dm_dispatch_request(struct request_queue *q, struct request *rq)
+{
+       int where = ELEVATOR_INSERT_BACK;
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+
+       /* Initialize statistic stuff */
+       disk_round_stats(rq->rq_disk);
+       rq->start_time = jiffies;
+       rq->rq_disk->in_flight++;
+
+       __elv_add_request(q, rq, where, 0);
+
+       spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(dm_dispatch_request);
+
+static void kill_request(struct request *rq)
+{
+       int nr_bytes = rq->hard_nr_sectors << 9;
+
+       if (!nr_bytes)
+               nr_bytes = rq->data_len;
+
+       rq->cmd_flags |= REQ_QUIET;
+       blk_end_request(rq, 0, nr_bytes);
+}
+
 /*-----------------------------------------------------------------
  * CRUD START:
  *   A more elegant soln is in the works that uses the queue
@@ -533,6 +698,152 @@ static int clone_endio(struct bio *bio, 
        return r;
 }
 
+static void blk_update_cloned_rq(struct request *rq, struct request *clone)
+{
+       clone->nr_phys_segments = rq->nr_phys_segments;
+       clone->nr_hw_segments = rq->nr_hw_segments;
+       clone->current_nr_sectors = rq->current_nr_sectors;
+       clone->hard_cur_sectors = rq->hard_cur_sectors;
+       clone->hard_nr_sectors = rq->hard_nr_sectors;
+       clone->nr_sectors = rq->nr_sectors;
+       clone->hard_sector = rq->hard_sector;
+       clone->sector = rq->sector;
+       clone->data_len = rq->data_len;
+       clone->buffer = rq->buffer;
+       clone->data = rq->data;
+       clone->bio = rq->bio;
+       clone->biotail = rq->biotail;
+}
+
+static void dec_rq_pending(struct dm_rq_target_io *tio)
+{
+       if (!atomic_dec_return(&tio->md->pending))
+               /* nudge anyone waiting on suspend queue */
+               wake_up(&tio->md->wait);
+}
+
+static void finish_clone(struct request *clone, int needlock,
+                        int (drv_callback)(struct request *))
+{
+       if (!clone->q)
+               /*
+                * The clone was not dispatched into underlying devices and
+                * it means the caller is not underlying device driver,
+                * the caller should be dm. (e.g. dispatch_queued_ios() of
+                * dm-multipath)
+                * So no need to do here for this clone.
+                */
+               return;
+
+       /*
+        * The clone is *NOT* freed actually here because it is alloced from
+        * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
+        *
+        * The uptodate and nr_bytes arguments of blk_end_io() don't matter
+        * because they aren't used for dm's clones.
+        */
+       if (blk_end_io(clone, 1, 0, needlock, NULL, drv_callback))
+               DMWARN("dm ignores the immediate return request of callback.");
+}
+
+static void clean_clone(struct request *clone)
+{
+       clone->special = NULL;
+       clone->errors = 0;
+}
+
+static int clone_end_request(struct request *clone, int uptodate, int nr_bytes,
+                            int needlock, int (drv_callback)(struct request *))
+{
+       int r = 0, error = 0, rw = rq_data_dir(clone);
+       struct dm_rq_target_io *tio = clone->end_io_data;
+       dm_request_endio_first_fn endio_first = tio->ti->type->rq_end_io_first;
+       dm_request_endio_fn endio = tio->ti->type->rq_end_io;
+       dm_request_queue_in_tgt_fn queue_in_tgt = tio->ti->type->queue_in_tgt;
+       struct request *orig = tio->rq;
+       struct request_queue *q = clone->q, *q_orig = orig->q;
+
+       if (blk_fs_request(clone) && clone->rq_disk)
+               disk_stat_add(clone->rq_disk, sectors[rw], nr_bytes >> 9);
+
+       if (end_io_error(uptodate))
+               error = !uptodate ? -EIO : uptodate;
+
+       if (endio_first) {
+               r = endio_first(tio->ti, clone, error, &tio->info);
+               switch (r) {
+               case 0:
+                       /* succeeded */
+                       break;
+               case 1:
+                       /* the target wants to handle the io without unmap */
+                       finish_clone(clone, needlock, drv_callback);
+                       clean_clone(clone);
+
+                       if (!queue_in_tgt) {
+                               DMERR("queue_in_tgt isn't implemented.");
+                               BUG();
+                       }
+                       queue_in_tgt(tio->ti, clone, &tio->info);
+                       blk_run_queue(q_orig);
+
+                       return 0;
+               case 2:
+                       /* The target wants to requeue the original request */
+                       if (needlock)
+                               dm_requeue_request(q_orig, orig);
+                       else
+                               __dm_requeue_request(q_orig, orig);
+
+                       goto free_clone;
+               default:
+                       if (r >= 0) {
+                               DMWARN("unimplemented target endio return 
value: %d", r);
+                               BUG();
+                       }
+
+                       /* 
+                        * the target detected error, but couldn't retry.
+                        * direct the error to upper layer.
+                        */
+                       uptodate = r;
+                       break;
+               }
+       }
+
+       /* Complete the original request's chunk */
+       r = blk_end_request(orig, uptodate, nr_bytes);
+
+       /*
+        * Recopy the original request fields that were updated
+         * in blk_end_request() to the clone.
+        */
+       blk_update_cloned_rq(orig, clone);
+
+       if (r)
+               /* The original request has leftover */
+               return 1;
+
+free_clone:
+       /*
+        * Now the original request is completed and freed, or requeued.
+        * So 
+        */
+
+       if (endio)
+               endio(tio->ti, clone, error, &tio->info);
+
+       finish_clone(clone, needlock, drv_callback);
+
+       blk_run_queue(q_orig);
+
+       dec_rq_pending(tio);
+       free_rq(tio->md, clone);
+       free_rq_tio(tio->md, tio);
+
+       return 0;
+}
+
 static sector_t max_io_len(struct mapped_device *md,
                           sector_t sector, struct dm_target *ti)
 {
@@ -844,6 +1155,166 @@ static int dm_request(struct request_que
        return 0;
 }
 
+/* FIXME */
+static int dm_make_request(struct request_queue *q, struct bio *bio)
+{
+       int r = 0;
+       struct mapped_device *md = (struct mapped_device *)q->queuedata;
+
+       r = md->saved_make_request_fn(q, bio); /* call __make_request() */
+
+       return r;
+}
+
+static void setup_clone(struct request *clone, struct request *rq)
+{
+       INIT_LIST_HEAD(&clone->queuelist);
+       INIT_LIST_HEAD(&clone->donelist);
+       clone->q = NULL;
+       clone->cmd_flags = (rq_data_dir(rq) | REQ_NOMERGE | REQ_CLONED);
+       clone->cmd_type = rq->cmd_type;
+       clone->sector = rq->sector;
+       clone->hard_sector = rq->hard_sector;
+       clone->nr_sectors = rq->nr_sectors;
+       clone->hard_nr_sectors = rq->hard_nr_sectors;
+       clone->current_nr_sectors = rq->current_nr_sectors;
+       clone->hard_cur_sectors = rq->hard_cur_sectors;
+       clone->bio = rq->bio;
+       clone->biotail = rq->biotail;
+       INIT_HLIST_NODE(&clone->hash);
+//     RB_CLEAR_NODE(&clone->rb_node);
+       clone->completion_data = NULL;
+       clone->elevator_private = NULL;
+       clone->elevator_private2 = NULL;
+       clone->rq_disk = NULL;
+       clone->start_time = jiffies;
+       clone->nr_phys_segments = rq->nr_phys_segments;
+       clone->nr_hw_segments = rq->nr_hw_segments;
+       clone->ioprio = rq->ioprio;
+       clone->special = NULL;
+       clone->buffer = rq->buffer;
+       clone->tag = -1;
+       clone->errors = 0;
+       clone->ref_count = 1;
+       clone->cmd_len = rq->cmd_len;
+       memcpy(clone->cmd, rq->cmd, sizeof(rq->cmd));
+       clone->data_len = rq->data_len;
+       clone->sense_len = rq->sense_len;
+       clone->data = rq->data;
+       clone->sense = rq->sense;
+       clone->timeout = 0;
+       clone->retries = 0;
+       clone->end_io = clone_end_request;
+       clone->end_io_data = NULL;
+}
+
+int dm_underlying_device_congested(struct request_queue *q)
+{
+       if (q->device_congested_fn)
+               return q->device_congested_fn(q);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dm_underlying_device_congested);
+
+static int clone_and_map_request(struct dm_target *ti, struct request *rq,
+                                struct mapped_device *md)
+{
+       int r;
+       struct request *clone;
+       struct dm_rq_target_io *tio;
+
+       tio = alloc_rq_tio(md); /* only one for each original request */
+       if (!tio)
+               /* -ENOMEM */
+               goto requeue;
+       tio->md = md;
+       tio->error = 0;
+       tio->rq = rq;
+       tio->ti = ti;
+       memset(&tio->info, 0, sizeof(tio->info));
+
+       clone = alloc_rq(md);
+       if (!clone) {
+               /* -ENOMEM */
+               free_rq_tio(md, tio);
+               goto requeue;
+       }
+       setup_clone(clone, rq);
+       clone->end_io_data = tio;
+
+       atomic_inc(&md->pending);
+       r = ti->type->map_rq(ti, clone, &tio->info);
+       switch (r) {
+       case 0:
+               /* the target has taken the request to submit by itself */
+               break;
+       case 1:
+               /* the clone has been remapped so dispatch it */
+               dm_dispatch_request(clone->q, clone);
+               break;
+       case 2:
+               /* the target has requested to requeue the original request */
+               dec_rq_pending(tio);
+               free_rq_tio(md, tio);
+               free_rq(md, clone);
+               goto requeue;
+       default:
+               if (r >= 0) {
+                       DMWARN("unimplemented target map return value: %d", r);
+                       BUG();
+               }
+
+               dec_rq_pending(tio);
+               free_rq_tio(md, tio);
+               free_rq(md, clone);
+               kill_request(rq);
+               break;
+       }
+
+       return 0;
+
+requeue:
+       return 1;
+}
+
+/*
+ * q->request_fn for request based dm.
+ * called with q->queue_lock held
+ */
+static void dm_request_fn(struct request_queue *q)
+{
+       int r;
+       struct mapped_device *md = (struct mapped_device *)q->queuedata;
+       struct dm_table *map = dm_get_table(md);
+       struct dm_target *ti;
+       dm_congested_fn congested;
+       struct request *rq;
+
+       while (!blk_queue_plugged(q)) {
+               rq = elv_next_request(q);
+               if (!rq)
+                       break;
+
+               ti = dm_table_find_target(map, rq->sector);
+               congested = ti->type->congested;
+               if (congested && congested(ti))
+                       break;
+
+               blkdev_dequeue_request(rq);
+               spin_unlock(q->queue_lock);
+               r = clone_and_map_request(ti, rq, md);
+               spin_lock_irq(q->queue_lock);
+
+               if (r)
+                       __dm_requeue_request(q, rq);
+       }
+
+       dm_table_put(map);
+
+       return;
+}
+
 static int dm_flush_all(struct request_queue *q, struct gendisk *disk,
                        sector_t *error_sector)
 {
@@ -865,6 +1336,9 @@ static void dm_unplug_all(struct request
        struct dm_table *map = dm_get_table(md);
 
        if (map) {
+               if (dm_feat_rq_base(md))
+                       generic_unplug_device(q);
+
                dm_table_unplug_all(map);
                dm_table_put(map);
        }
@@ -966,7 +1440,7 @@ static struct block_device_operations dm
 /*
  * Allocate and initialise a blank device with a given minor.
  */
-static struct mapped_device *alloc_dev(int minor)
+static struct mapped_device *alloc_dev(int minor, int request_base)
 {
        int r;
        struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
@@ -997,23 +1471,35 @@ static struct mapped_device *alloc_dev(i
        atomic_set(&md->open_count, 0);
        atomic_set(&md->event_nr, 0);
 
-       md->queue = blk_alloc_queue(GFP_KERNEL);
+       if (request_base) {
+               md->features |= DM_FEAT_REQUEST_BASE;
+               md->queue = blk_init_queue(dm_request_fn, NULL);
+       } else {
+               md->queue = blk_alloc_queue(GFP_KERNEL);
+       }
        if (!md->queue)
                goto bad1_free_minor;
 
        md->queue->queuedata = md;
-       md->queue->backing_dev_info.congested_fn = dm_any_congested;
-       md->queue->backing_dev_info.congested_data = md;
-       blk_queue_make_request(md->queue, dm_request);
+       if (request_base) {
+               md->saved_make_request_fn = md->queue->make_request_fn;
+               blk_queue_make_request(md->queue, dm_make_request);
+       } else {
+               md->queue->backing_dev_info.congested_fn = dm_any_congested;
+               md->queue->backing_dev_info.congested_data = md;
+               blk_queue_make_request(md->queue, dm_request);
+       }
        blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
        md->queue->unplug_fn = dm_unplug_all;
        md->queue->issue_flush_fn = dm_flush_all;
 
-       md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
+       md->io_pool = mempool_create_slab_pool(MIN_IOS,
+                               request_base ? _rq_cache : _io_cache);
        if (!md->io_pool)
                goto bad2;
 
-       md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
+       md->tio_pool = mempool_create_slab_pool(MIN_IOS,
+                               request_base ? _rq_tio_cache : _tio_cache);
        if (!md->tio_pool)
                goto bad3;
 
@@ -1154,11 +1640,12 @@ static void __unbind(struct mapped_devic
 /*
  * Constructor for a new device.
  */
-int dm_create(int minor, struct mapped_device **result)
+int dm_create(int minor, struct mapped_device **result, unsigned create_flags)
 {
        struct mapped_device *md;
+       int request_base = create_flags & DM_CREATE_REQUEST_BASE_FLAG ? 1 : 0;
 
-       md = alloc_dev(minor);
+       md = alloc_dev(minor, request_base);
        if (!md)
                return -ENXIO;
 
@@ -1383,9 +1870,13 @@ int dm_suspend(struct mapped_device *md,
        up_write(&md->io_lock);
 
        /* unplug */
-       if (map)
+       if (map) {
                dm_table_unplug_all(map);
 
+               if (dm_feat_rq_base(md))
+                       dm_stop_queue(md->queue);
+       }
+
        /*
         * Then we wait for the already mapped ios to
         * complete.
@@ -1475,6 +1966,9 @@ int dm_resume(struct mapped_device *md)
        if (!map || !dm_table_get_size(map))
                goto out;
 
+       if (dm_feat_rq_base(md))
+               dm_start_queue(md->queue);
+
        r = dm_table_resume_targets(map);
        if (r)
                goto out;
diff -rupN 07-change-end-io/drivers/md/dm.h a1-rqdm-core/drivers/md/dm.h
--- 07-change-end-io/drivers/md/dm.h    2007-08-13 00:25:24.000000000 -0400
+++ a1-rqdm-core/drivers/md/dm.h        2007-08-28 15:21:48.000000000 -0400
@@ -84,6 +84,11 @@
 #define DM_SUSPEND_NOFLUSH_FLAG                (1 << 1)
 
 /*
+ * Create feature flags
+ */
+#define DM_CREATE_REQUEST_BASE_FLAG    (1 << 0)
+
+/*
  * List of devices that a metadevice uses and should open/close.
  */
 struct dm_dev {
@@ -112,6 +117,7 @@ int dm_table_resume_targets(struct dm_ta
 int dm_table_any_congested(struct dm_table *t, int bdi_bits);
 void dm_table_unplug_all(struct dm_table *t);
 int dm_table_flush_all(struct dm_table *t);
+int dm_table_underlying_device_congested(struct dm_table *t);
 
 /*-----------------------------------------------------------------
  * A registry of target types.
@@ -124,6 +130,12 @@ int dm_target_iterate(void (*iter_func)(
                                        void *param), void *param);
 
 /*-----------------------------------------------------------------
+ * Helper for block layer operations
+ *---------------------------------------------------------------*/
+void dm_dispatch_request(struct request_queue *q, struct request *rq);
+int dm_underlying_device_congested(struct request_queue *q);
+
+/*-----------------------------------------------------------------
  * Useful inlines.
  *---------------------------------------------------------------*/
 static inline int array_too_big(unsigned long fixed, unsigned long obj,
@@ -180,6 +192,7 @@ void dm_stripe_exit(void);
 
 void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
 union map_info *dm_get_mapinfo(struct bio *bio);
+union map_info *dm_get_rq_mapinfo(struct request *rq);
 int dm_open_count(struct mapped_device *md);
 int dm_lock_for_deletion(struct mapped_device *md);
 
diff -rupN 07-change-end-io/drivers/md/dm-hw-handler.h 
a1-rqdm-core/drivers/md/dm-hw-handler.h
--- 07-change-end-io/drivers/md/dm-hw-handler.h 2007-08-13 00:25:24.000000000 
-0400
+++ a1-rqdm-core/drivers/md/dm-hw-handler.h     2007-08-28 15:21:48.000000000 
-0400
@@ -35,6 +35,7 @@ struct hw_handler_type {
        void (*pg_init) (struct hw_handler *hwh, unsigned bypassed,
                         struct dm_path *path);
        unsigned (*error) (struct hw_handler *hwh, struct bio *bio);
+       unsigned (*error_rq) (struct hw_handler *hwh, struct request *rq);
        int (*status) (struct hw_handler *hwh, status_type_t type,
                       char *result, unsigned int maxlen);
 };
diff -rupN 07-change-end-io/drivers/md/dm-ioctl.c 
a1-rqdm-core/drivers/md/dm-ioctl.c
--- 07-change-end-io/drivers/md/dm-ioctl.c      2007-08-13 00:25:24.000000000 
-0400
+++ a1-rqdm-core/drivers/md/dm-ioctl.c  2007-08-28 15:21:48.000000000 -0400
@@ -565,6 +565,7 @@ static int dev_create(struct dm_ioctl *p
 {
        int r, m = DM_ANY_MINOR;
        struct mapped_device *md;
+       unsigned create_flags = 0;
 
        r = check_name(param->name);
        if (r)
@@ -572,8 +573,10 @@ static int dev_create(struct dm_ioctl *p
 
        if (param->flags & DM_PERSISTENT_DEV_FLAG)
                m = MINOR(huge_decode_dev(param->dev));
+       if (param->flags & DM_REQUEST_BASE_FLAG)
+               create_flags |= DM_CREATE_REQUEST_BASE_FLAG;
 
-       r = dm_create(m, &md);
+       r = dm_create(m, &md, create_flags);
        if (r)
                return r;
 
diff -rupN 07-change-end-io/drivers/md/dm-table.c 
a1-rqdm-core/drivers/md/dm-table.c
--- 07-change-end-io/drivers/md/dm-table.c      2007-08-22 18:54:04.000000000 
-0400
+++ a1-rqdm-core/drivers/md/dm-table.c  2007-08-28 15:21:48.000000000 -0400
@@ -1025,6 +1025,29 @@ int dm_table_flush_all(struct dm_table *
        return ret;
 }
 
+int dm_table_underlying_device_congested(struct dm_table *t)
+{
+       int r = 0;
+       struct list_head *d, *devices = dm_table_get_devices(t);
+       struct dm_dev *dd;
+       struct request_queue *q;
+
+       for (d = devices->next; d != devices; d = d->next) {
+               dd = list_entry(d, struct dm_dev, list);
+               q = bdev_get_queue(dd->bdev);
+
+               /*
+                * We should use "&=" and target driver should chose
+                * not congested device by using a table function like
+                * dm_table_uncongested_device().
+                */
+               r &= dm_underlying_device_congested(q);
+//             r |= dm_underlying_device_congested(q);
+       }
+
+       return r;
+}
+
 struct mapped_device *dm_table_get_md(struct dm_table *t)
 {
        dm_get(t->md);
diff -rupN 07-change-end-io/drivers/scsi/scsi_lib.c 
a1-rqdm-core/drivers/scsi/scsi_lib.c
--- 07-change-end-io/drivers/scsi/scsi_lib.c    2007-08-24 12:26:06.000000000 
-0400
+++ a1-rqdm-core/drivers/scsi/scsi_lib.c        2007-08-28 15:21:48.000000000 
-0400
@@ -1401,6 +1401,43 @@ static void scsi_softirq_done(struct req
        }
 }
 
+static int scsi_device_congested(struct request_queue *q)
+{
+       int r = 0;
+       struct scsi_device *sdev = q->queuedata;
+//     struct Scsi_Host *shost;
+//     unsigned long flags = 0UL;
+
+       if (!sdev || !get_device(&sdev->sdev_gendev))
+               /*
+                * Something may happened.  We handle it in scsi_request_fn.
+                * Please dispatch the requests in the queue.
+                */
+               return 0;
+
+       if ((sdev->device_busy >= sdev->queue_depth) || sdev->device_blocked ||
+           (sdev->single_lun && scsi_target(sdev)->starget_sdev_user &&
+           scsi_target(sdev)->starget_sdev_user != sdev)) {
+               r = 1;
+               goto out;
+       }
+
+/*
+       shost = sdev->host;
+       spin_lock_irqsave(shost->host_lock, flags);
+       if (scsi_host_in_recovery(shost) ||
+           (shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
+           shost->host_blocked || shost->host_self_blocked)
+               r = 1;
+       spin_unlock_irqrestore(shost->host_lock, flags);
+*/
+
+out:
+       put_device(&sdev->sdev_gendev);
+
+       return r;
+}
+
 /*
  * Function:    scsi_request_fn()
  *
@@ -1590,6 +1627,7 @@ struct request_queue *scsi_alloc_queue(s
 
        blk_queue_prep_rq(q, scsi_prep_fn);
        blk_queue_softirq_done(q, scsi_softirq_done);
+       blk_queue_device_congested(q, scsi_device_congested);
        return q;
 }
 
diff -rupN 07-change-end-io/include/linux/blkdev.h 
a1-rqdm-core/include/linux/blkdev.h
--- 07-change-end-io/include/linux/blkdev.h     2007-08-24 12:32:44.000000000 
-0400
+++ a1-rqdm-core/include/linux/blkdev.h 2007-08-29 13:53:12.000000000 -0400
@@ -203,6 +203,7 @@ enum rq_flag_bits {
        __REQ_RW_SYNC,          /* request is sync (O_DIRECT) */
        __REQ_ALLOCED,          /* request came from our alloc pool */
        __REQ_RW_META,          /* metadata io request */
+       __REQ_CLONED,           /* request is a clone of another request */
        __REQ_NR_BITS,          /* stops here */
 };
 
@@ -224,6 +225,7 @@ enum rq_flag_bits {
 #define REQ_RW_SYNC    (1 << __REQ_RW_SYNC)
 #define REQ_ALLOCED    (1 << __REQ_ALLOCED)
 #define REQ_RW_META    (1 << __REQ_RW_META)
+#define REQ_CLONED     (1 << __REQ_CLONED)
 
 #define BLK_MAX_CDB    16
 
@@ -348,6 +350,7 @@ typedef int (merge_bvec_fn) (struct requ
 typedef int (issue_flush_fn) (struct request_queue *, struct gendisk *, 
sector_t *);
 typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
 typedef void (softirq_done_fn)(struct request *);
+typedef int (device_congested_fn) (struct request_queue *q);
 
 enum blk_queue_state {
        Queue_down,
@@ -386,6 +389,7 @@ struct request_queue
        issue_flush_fn          *issue_flush_fn;
        prepare_flush_fn        *prepare_flush_fn;
        softirq_done_fn         *softirq_done_fn;
+       device_congested_fn     *device_congested_fn;
 
        /*
         * Dispatch queue sorting
@@ -555,6 +559,7 @@ enum {
 #define blk_sorted_rq(rq)      ((rq)->cmd_flags & REQ_SORTED)
 #define blk_barrier_rq(rq)     ((rq)->cmd_flags & REQ_HARDBARRIER)
 #define blk_fua_rq(rq)         ((rq)->cmd_flags & REQ_FUA)
+#define blk_cloned_rq(rq)      ((rq)->cmd_flags & REQ_CLONED)
 #define blk_bidi_rq(rq)                ((rq)->next_rq != NULL)
 
 #define list_entry_rq(ptr)     list_entry((ptr), struct request, queuelist)
@@ -786,6 +791,7 @@ extern void blk_queue_prep_rq(struct req
 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
+extern void blk_queue_device_congested(struct request_queue *, 
device_congested_fn *);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device 
*bdev);
 extern int blk_queue_ordered(struct request_queue *, unsigned, 
prepare_flush_fn *);
 extern void blk_queue_issue_flush_fn(struct request_queue *, issue_flush_fn *);
diff -rupN 07-change-end-io/include/linux/device-mapper.h 
a1-rqdm-core/include/linux/device-mapper.h
--- 07-change-end-io/include/linux/device-mapper.h      2007-08-13 
00:25:24.000000000 -0400
+++ a1-rqdm-core/include/linux/device-mapper.h  2007-08-28 15:21:48.000000000 
-0400
@@ -10,6 +10,8 @@
 
 #ifdef __KERNEL__
 
+struct request;
+struct request_queue;
 struct dm_target;
 struct dm_table;
 struct dm_dev;
@@ -45,6 +47,9 @@ typedef void (*dm_dtr_fn) (struct dm_tar
 typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
                          union map_info *map_context);
 
+typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
+                                 union map_info *map_context);
+
 /*
  * Returns:
  * < 0 : error (currently ignored)
@@ -57,6 +62,18 @@ typedef int (*dm_endio_fn) (struct dm_ta
                            struct bio *bio, int error,
                            union map_info *map_context);
 
+typedef int (*dm_request_endio_first_fn) (struct dm_target *ti,
+                                         struct request *clone, int error,
+                                         union map_info *map_context);
+
+typedef int (*dm_request_endio_fn) (struct dm_target *ti,
+                                   struct request *clone, int error,
+                                   union map_info *map_context);
+
+typedef void (*dm_request_queue_in_tgt_fn) (struct dm_target *ti,
+                                           struct request *clone,
+                                           union map_info *map_context);
+
 typedef void (*dm_flush_fn) (struct dm_target *ti);
 typedef void (*dm_presuspend_fn) (struct dm_target *ti);
 typedef void (*dm_postsuspend_fn) (struct dm_target *ti);
@@ -71,6 +88,7 @@ typedef int (*dm_message_fn) (struct dm_
 typedef int (*dm_ioctl_fn) (struct dm_target *ti, struct inode *inode,
                            struct file *filp, unsigned int cmd,
                            unsigned long arg);
+typedef int (*dm_congested_fn) (struct dm_target *ti);
 
 void dm_error(const char *message);
 
@@ -98,7 +116,11 @@ struct target_type {
        dm_ctr_fn ctr;
        dm_dtr_fn dtr;
        dm_map_fn map;
+       dm_map_request_fn map_rq;
        dm_endio_fn end_io;
+       dm_request_endio_first_fn rq_end_io_first;
+       dm_request_endio_fn rq_end_io;
+       dm_request_queue_in_tgt_fn queue_in_tgt;
        dm_flush_fn flush;
        dm_presuspend_fn presuspend;
        dm_postsuspend_fn postsuspend;
@@ -107,6 +129,7 @@ struct target_type {
        dm_status_fn status;
        dm_message_fn message;
        dm_ioctl_fn ioctl;
+       dm_congested_fn congested;
 };
 
 struct io_restrictions {
@@ -142,6 +165,8 @@ struct dm_target {
 
        /* Used to provide an error string from the ctr */
        char *error;
+
+       struct request_queue *queue;
 };
 
 int dm_register_target(struct target_type *t);
@@ -157,7 +182,7 @@ int dm_unregister_target(struct target_t
  * DM_ANY_MINOR chooses the next available minor number.
  */
 #define DM_ANY_MINOR (-1)
-int dm_create(int minor, struct mapped_device **md);
+int dm_create(int minor, struct mapped_device **md, unsigned create_flags);
 
 /*
  * Reference counting for md.
@@ -167,6 +192,14 @@ void dm_get(struct mapped_device *md);
 void dm_put(struct mapped_device *md);
 
 /*
+ * Queue operations
+ */
+struct request_queue *dm_get_queue(struct mapped_device *md);
+void dm_put_queue(struct request_queue *q);
+void dm_stop_queue(struct request_queue *q);
+void dm_start_queue(struct request_queue *q);
+
+/*
  * An arbitrary pointer may be stored alongside a mapped device.
  */
 void dm_set_mdptr(struct mapped_device *md, void *ptr);
diff -rupN 07-change-end-io/include/linux/dm-ioctl.h 
a1-rqdm-core/include/linux/dm-ioctl.h
--- 07-change-end-io/include/linux/dm-ioctl.h   2007-08-13 00:25:24.000000000 
-0400
+++ a1-rqdm-core/include/linux/dm-ioctl.h       2007-08-28 15:21:48.000000000 
-0400
@@ -285,9 +285,9 @@ typedef char ioctl_struct[308];
 #define DM_DEV_SET_GEOMETRY    _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct 
dm_ioctl)
 
 #define DM_VERSION_MAJOR       4
-#define DM_VERSION_MINOR       11
+#define DM_VERSION_MINOR       12
 #define DM_VERSION_PATCHLEVEL  0
-#define DM_VERSION_EXTRA       "-ioctl (2006-10-12)"
+#define DM_VERSION_EXTRA       "-ioctl (2006-12-14)"
 
 /* Status bits */
 #define DM_READONLY_FLAG       (1 << 0) /* In/Out */
@@ -328,4 +328,9 @@ typedef char ioctl_struct[308];
  */
 #define DM_NOFLUSH_FLAG                (1 << 11) /* In */
 
+/*
+ * Set this to create request based device-mapper device.
+ */
+#define DM_REQUEST_BASE_FLAG   (1 << 12) /* In */
+
 #endif                         /* _LINUX_DM_IOCTL_H */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to