When dispatching write requests to a zoned block device, only allow
requests targeting an unlocked zone. Requests targeting a locked zone
are left in the scheduler queue to preserve the initial write order.
If no write request can be dispatched, allow reads to be dispatched
even if the write batch is not done.

To ensure that the search for an appropriate write request is atomic
in deadline_fifo_request() and deadline_next_request() with reagrd to
write requests zone lock state, introduce the spinlock zone_lock.
Holding this lock while doing the search in these functions as well as
when unlocking the target zone of a completed write request in
dd_completed_request() ensure that the search does not pickup a write
request in the middle of a zone queued write sequence.

Signed-off-by: Damien Le Moal <damien.lem...@wdc.com>
---
 block/mq-deadline.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 68 insertions(+), 3 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 93a1aede5dd0..cbca24e1f96e 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -61,6 +61,7 @@ struct deadline_data {
        spinlock_t lock;
        struct list_head dispatch;
 
+       spinlock_t zone_lock;
        unsigned long *zones_wlock;
 };
 
@@ -244,12 +245,24 @@ static void deadline_wlock_zone(struct deadline_data *dd,
 
 /*
  * Write unlock the target zone of a write request.
+ * Clearing the target zone write lock bit is done with the scheduler zone_lock
+ * spinlock held so that deadline_next_request() and deadline_fifo_request()
+ * cannot see the lock state of a zone change due to a request completion 
during
+ * their eventual search for an appropriate write request. Otherwise, for a 
zone
+ * with multiple write requests queued, a non sequential write request
+ * can be chosen.
  */
 static void deadline_wunlock_zone(struct deadline_data *dd,
                                  struct request *rq)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&dd->zone_lock, flags);
+
        WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq), dd->zones_wlock));
        deadline_clear_request_zone_wlock(rq);
+
+       spin_unlock_irqrestore(&dd->zone_lock, flags);
 }
 
 /*
@@ -279,19 +292,47 @@ static inline int deadline_check_fifo(struct 
deadline_data *dd, int ddir)
 }
 
 /*
+ * Test if a request can be dispatched.
+ */
+static inline bool deadline_can_dispatch_request(struct deadline_data *dd,
+                                                struct request *rq)
+{
+       if (!deadline_request_needs_zone_wlock(dd, rq))
+               return true;
+       return !deadline_zone_is_wlocked(dd, rq);
+}
+
+/*
  * For the specified data direction, return the next request to
  * dispatch using arrival ordered lists.
  */
 static struct request *
 deadline_fifo_request(struct deadline_data *dd, int data_dir)
 {
+       struct request *rq;
+       unsigned long flags;
+
        if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
                return NULL;
 
        if (list_empty(&dd->fifo_list[data_dir]))
                return NULL;
 
-       return rq_entry_fifo(dd->fifo_list[data_dir].next);
+       if (!dd->zones_wlock || data_dir == READ)
+               return rq_entry_fifo(dd->fifo_list[data_dir].next);
+
+       spin_lock_irqsave(&dd->zone_lock, flags);
+
+       list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) {
+               if (deadline_can_dispatch_request(dd, rq))
+                       goto out;
+       }
+       rq = NULL;
+
+out:
+       spin_unlock_irqrestore(&dd->zone_lock, flags);
+
+       return rq;
 }
 
 /*
@@ -301,10 +342,25 @@ deadline_fifo_request(struct deadline_data *dd, int 
data_dir)
 static struct request *
 deadline_next_request(struct deadline_data *dd, int data_dir)
 {
+       struct request *rq;
+       unsigned long flags;
+
        if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
                return NULL;
 
-       return dd->next_rq[data_dir];
+       rq = dd->next_rq[data_dir];
+       if (!dd->zones_wlock || data_dir == READ)
+               return rq;
+
+       spin_lock_irqsave(&dd->zone_lock, flags);
+       while (rq) {
+               if (deadline_can_dispatch_request(dd, rq))
+                       break;
+               rq = deadline_latter_request(rq);
+       }
+       spin_unlock_irqrestore(&dd->zone_lock, flags);
+
+       return rq;
 }
 
 /*
@@ -346,7 +402,8 @@ static struct request *__dd_dispatch_request(struct 
blk_mq_hw_ctx *hctx)
        if (reads) {
                BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
 
-               if (writes && (dd->starved++ >= dd->writes_starved))
+               if (deadline_fifo_request(dd, WRITE) &&
+                   (dd->starved++ >= dd->writes_starved))
                        goto dispatch_writes;
 
                data_dir = READ;
@@ -391,6 +448,13 @@ static struct request *__dd_dispatch_request(struct 
blk_mq_hw_ctx *hctx)
                rq = next_rq;
        }
 
+       /*
+        * If we only have writes queued and none of them can be dispatched,
+        * rq will be NULL.
+        */
+       if (!rq)
+               return NULL;
+
        dd->batching = 0;
 
 dispatch_request:
@@ -490,6 +554,7 @@ static int dd_init_queue(struct request_queue *q, struct 
elevator_type *e)
        spin_lock_init(&dd->lock);
        INIT_LIST_HEAD(&dd->dispatch);
 
+       spin_lock_init(&dd->zone_lock);
        ret = deadline_init_zones_wlock(q, dd);
        if (ret)
                goto out_free_dd;
-- 
2.13.6

Reply via email to