Now that bi_io_vec and bio can be shared, we can handle arbitrarily
large bios in __make_request by splitting them over multiple
requests.
If we do split a request, we mark both halves as "REQ_NOMERGE".
It is only really necessary to mark the first part as
 NO_BACK_MERGE
and the second part as
 NO_FRONT_MERGE
but that distinction isn't currently supported.

Note that we do not try to merge part of a large bio to
a neighbouring request.  That is a possible future enhancement.


Signed-off-by: Neil Brown <[EMAIL PROTECTED]>

### Diffstat output
 ./block/ll_rw_blk.c      |  122 +++++++++++++++++++++++++++++++++++++++--------
 ./include/linux/blkdev.h |    5 +
 2 files changed, 107 insertions(+), 20 deletions(-)

diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
--- .prev/block/ll_rw_blk.c     2007-07-31 11:21:15.000000000 +1000
+++ ./block/ll_rw_blk.c 2007-07-31 11:21:20.000000000 +1000
@@ -1221,13 +1221,21 @@ static void blk_recalc_rq_segments(struc
        struct req_iterator i;
        int high, highprv = 1;
        struct request_queue *q = rq->q;
+       int curr_size = 0;
+       unsigned short max_sectors;
 
        if (!rq->bio)
                return;
 
+       if (unlikely(blk_pc_request(rq)))
+               max_sectors = q->max_hw_sectors;
+       else
+               max_sectors = q->max_sectors;
+
        cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
        hw_seg_size = seg_size = 0;
        phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
+       rq->max_allowed_size = 0;
        rq_for_each_segment(rq, i, bv) {
                /*
                 * the trick here is making sure that a high page is never
@@ -1249,9 +1257,7 @@ static void blk_recalc_rq_segments(struc
 
                        seg_size += bv.bv_len;
                        hw_seg_size += bv.bv_len;
-                       bvprv = bv;
-                       prvidx = i.i.i;
-                       continue;
+                       goto same_seg;
                }
 new_segment:
                if (BIOVEC_VIRT_MERGEABLE(&bvprv, &bv) &&
@@ -1267,11 +1273,19 @@ new_hw_segment:
                }
 
                nr_phys_segs++;
+               seg_size = bv.bv_len;
+same_seg:
+               curr_size += bv.bv_len;
                bvprv = bv;
                prvidx = i.i.i;
-               seg_size = bv.bv_len;
                highprv = high;
+
+               if (curr_size <= (max_sectors << 9) &&
+                   nr_phys_segs <= q->max_phys_segments &&
+                   nr_hw_segs <= q->max_hw_segments)
+                       rq->max_allowed_size = curr_size;
        }
+
        rq->last_len = bvprv.bv_offset + bvprv.bv_len;
        rq->last_idx = prvidx;
 
@@ -2924,6 +2938,70 @@ static void init_request_from_bio(struct
        blk_rq_bio_prep(req->q, req, bio);
 }
 
+static void rq_split(struct request *orig, struct request *new)
+{
+
+       /* 'orig' contains exactly one bio, and may refer to
+        * some section in the middle of that bio.
+        * Make 'new' refer to the beginning of that section, up
+        * to orig->max_allowed_size.
+        * Remove from 'orig' everything that went into 'new'.
+        * If 'orig' becomes empty, release it's reference to the bio.
+        */
+
+       new->cmd_type = orig->cmd_type;
+       new->cmd_flags |= orig->cmd_flags;
+       new->errors = 0;
+       new->hard_sector = new->sector = orig->hard_sector;
+       new->ioprio = orig->ioprio;
+       new->start_time = jiffies;
+       new->data_len = orig->data_len;
+       new->bio = orig->bio;
+       atomic_inc(&orig->bio->bi_iocnt);
+       new->biotail = orig->biotail;
+       new->current_nr_sectors = orig->current_nr_sectors;
+
+       new->buffer = orig->buffer;
+       new->rq_disk = orig->rq_disk;
+
+       if (orig->max_allowed_size == orig->hard_nr_sectors << 9) {
+               /* all of orig goes into new */
+               new->nr_sectors = new->hard_nr_sectors
+                       = orig->hard_nr_sectors;
+               new->nr_phys_segments = orig->nr_phys_segments;
+               new->nr_hw_segments = orig->nr_hw_segments;
+               new->hw_front_size = orig->hw_front_size;
+               new->hw_back_size = orig->hw_back_size;
+               new->last_len = orig->last_len;
+               new->last_idx = orig->last_idx;
+
+               orig->nr_sectors = orig->hard_nr_sectors  = 0;
+               atomic_dec(&orig->bio->bi_iocnt);
+               orig->bio = NULL;
+       } else {
+               /* start of orig goes into new, rest stays in orig */
+               int offset;
+               new->nr_sectors = new->hard_nr_sectors
+                       = (orig->max_allowed_size >> 9);
+               new->data_len = new->nr_sectors << 9;
+               new->biotail = NULL;
+               new->cmd_flags |= REQ_NOMERGE;
+
+               orig->nr_sectors = orig->hard_nr_sectors
+                       -= orig->max_allowed_size >> 9;
+               orig->data_len = orig->nr_sectors << 9;
+               orig->sector = orig->hard_sector += orig->max_allowed_size >> 9;
+               offset = orig->first_offset + orig->max_allowed_size;
+               orig->first_offset = offset;
+               if (offset)
+                       orig->cmd_flags |= REQ_NOMERGE;
+
+               blk_recalc_rq_segments(new);
+               BUG_ON(new->hard_nr_sectors != (new->max_allowed_size >> 9));
+               blk_recalc_rq_segments(orig);
+       }
+}
+
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
        struct request *req;
@@ -3029,24 +3107,28 @@ get_rq:
        if (sync)
                rw_flags |= REQ_RW_SYNC;
 
-       /*
-        * Grab a free request. This is might sleep but can not fail.
-        * Returns with the queue unlocked.
-        */
-       req = get_request_wait(q, rw_flags, bio);
+       while (nreq.hard_nr_sectors) {
+               /*
+                * Grab a free request. This is might sleep but can
+                * not fail.  Returns with the queue unlocked.
+                */
+               req = get_request_wait(q, rw_flags, bio);
+               rq_split(&nreq, req);
 
-       /*
-        * After dropping the lock and possibly sleeping here, our request
-        * may now be mergeable after it had proven unmergeable (above).
-        * We don't worry about that case for efficiency. It won't happen
-        * often, and the elevators are able to handle it.
-        */
-       init_request_from_bio(req, bio);
+               /*
+                * After dropping the lock and possibly sleeping here,
+                * our request may now be mergeable after it had
+                * proven unmergeable (above).  We don't worry about
+                * that case for efficiency. It won't happen often,
+                * and the elevators are able to handle it.
+                */
+
+               spin_lock_irq(q->queue_lock);
+               if (elv_queue_empty(q))
+                       blk_plug_device(q);
+               add_request(q, req);
+       }
 
-       spin_lock_irq(q->queue_lock);
-       if (elv_queue_empty(q))
-               blk_plug_device(q);
-       add_request(q, req);
 out:
        if (sync)
                __generic_unplug_device(q);

diff .prev/include/linux/blkdev.h ./include/linux/blkdev.h
--- .prev/include/linux/blkdev.h        2007-07-31 11:21:15.000000000 +1000
+++ ./include/linux/blkdev.h    2007-07-31 11:21:20.000000000 +1000
@@ -262,6 +262,11 @@ struct request {
                                 * so it matches bv_offset+bv_len in
                                 * the simple case.
                                 */
+       int max_allowed_size;   /* If this number (in bytes) is less than
+                                * hard_nr_sectors  (in sectors), the request
+                                * is too big for the queue and must be
+                                * split.
+                                */
 
        struct hlist_node hash; /* merge hash */
        /*
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to