Create per-task io poll context for both IO submission and poll task
if the queue is bio based and supports polling.

This io polling context includes two queues: submission queue(sq) for
storing HIPRI bio submission result(cookie) and the bio, written
by submission task and read by poll task; polling queue(pq) for holding
data moved from sq, only used in poll context for running bio polling.

Following patches will support bio poll.

Signed-off-by: Ming Lei <ming....@redhat.com>
---
 block/blk-core.c          | 71 ++++++++++++++++++++++++++++++++-------
 block/blk-ioc.c           |  1 +
 block/blk-mq.c            | 14 ++++++++
 block/blk.h               | 46 +++++++++++++++++++++++++
 include/linux/iocontext.h |  2 ++
 5 files changed, 122 insertions(+), 12 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d58f8a0c80de..0b00c21cbefb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -792,16 +792,59 @@ static inline blk_status_t blk_check_zone_append(struct 
request_queue *q,
        return BLK_STS_OK;
 }
 
-static inline void blk_create_io_context(struct request_queue *q)
+static inline struct blk_bio_poll_ctx *blk_get_bio_poll_ctx(void)
 {
-       /*
-        * Various block parts want %current->io_context, so allocate it up
-        * front rather than dealing with lots of pain to allocate it only
-        * where needed. This may fail and the block layer knows how to live
-        * with it.
-        */
-       if (unlikely(!current->io_context))
-               create_task_io_context(current, GFP_ATOMIC, q->node);
+       struct io_context *ioc = current->io_context;
+
+       return ioc ? ioc->data : NULL;
+}
+
+static inline unsigned int bio_grp_list_size(unsigned int nr_grps)
+{
+       return sizeof(struct bio_grp_list) + nr_grps *
+               sizeof(struct bio_grp_list_data);
+}
+
+static void bio_poll_ctx_init(struct blk_bio_poll_ctx *pc)
+{
+       pc->sq = (void *)pc + sizeof(*pc);
+       pc->sq->max_nr_grps = BLK_BIO_POLL_SQ_SZ;
+
+       pc->pq = (void *)pc->sq + bio_grp_list_size(BLK_BIO_POLL_SQ_SZ);
+       pc->pq->max_nr_grps = BLK_BIO_POLL_PQ_SZ;
+
+       spin_lock_init(&pc->sq_lock);
+       mutex_init(&pc->pq_lock);
+}
+
+void bio_poll_ctx_alloc(struct io_context *ioc)
+{
+       struct blk_bio_poll_ctx *pc;
+       unsigned int size = sizeof(*pc) +
+               bio_grp_list_size(BLK_BIO_POLL_SQ_SZ) +
+               bio_grp_list_size(BLK_BIO_POLL_PQ_SZ);
+
+       pc = kzalloc(GFP_ATOMIC, size);
+       if (pc) {
+               bio_poll_ctx_init(pc);
+               if (cmpxchg(&ioc->data, NULL, (void *)pc))
+                       kfree(pc);
+       }
+}
+
+static inline bool blk_queue_support_bio_poll(struct request_queue *q)
+{
+       return !queue_is_mq(q) && blk_queue_poll(q);
+}
+
+static inline void blk_bio_poll_preprocess(struct request_queue *q,
+               struct bio *bio)
+{
+       if (!(bio->bi_opf & REQ_HIPRI))
+               return;
+
+       if (!blk_queue_poll(q) || (!queue_is_mq(q) && !blk_get_bio_poll_ctx()))
+               bio->bi_opf &= ~REQ_HIPRI;
 }
 
 static noinline_for_stack bool submit_bio_checks(struct bio *bio)
@@ -848,10 +891,14 @@ static noinline_for_stack bool submit_bio_checks(struct 
bio *bio)
                }
        }
 
-       blk_create_io_context(q);
+       /*
+        * Created per-task io poll queue if we supports bio polling
+        * and it is one HIPRI bio.
+        */
+       blk_create_io_context(q, blk_queue_support_bio_poll(q) &&
+                       (bio->bi_opf & REQ_HIPRI));
 
-       if (!blk_queue_poll(q))
-               bio->bi_opf &= ~REQ_HIPRI;
+       blk_bio_poll_preprocess(q, bio);
 
        switch (bio_op(bio)) {
        case REQ_OP_DISCARD:
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index b0cde18c4b8c..5574c398eff6 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -19,6 +19,7 @@ static struct kmem_cache *iocontext_cachep;
 
 static inline void free_io_context(struct io_context *ioc)
 {
+       kfree(ioc->data);
        kmem_cache_free(iocontext_cachep, ioc);
 }
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 63c81df3b8b5..c832faa52ca0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3852,6 +3852,17 @@ static bool blk_mq_poll_hybrid(struct request_queue *q,
        return blk_mq_poll_hybrid_sleep(q, rq);
 }
 
+static int blk_bio_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
+{
+       /*
+        * Create poll queue for storing poll bio and its cookie from
+        * submission queue
+        */
+       blk_create_io_context(q, true);
+
+       return 0;
+}
+
 /**
  * blk_poll - poll for IO completions
  * @q:  the queue
@@ -3875,6 +3886,9 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, 
bool spin)
        if (current->plug)
                blk_flush_plug_list(current->plug, false);
 
+       if (!queue_is_mq(q))
+               return blk_bio_poll(q, cookie, spin);
+
        hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
 
        /*
diff --git a/block/blk.h b/block/blk.h
index 3b53e44b967e..ae58a706327e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -357,4 +357,50 @@ int bio_add_hw_page(struct request_queue *q, struct bio 
*bio,
                struct page *page, unsigned int len, unsigned int offset,
                unsigned int max_sectors, bool *same_page);
 
+/* grouping bios belonging to same group into one list  */
+struct bio_grp_list_data {
+       /* group data */
+       void *grp_data;
+
+       /* all bios in this list share same 'grp_data' */
+       struct bio_list list;
+};
+
+struct bio_grp_list {
+       unsigned int max_nr_grps, nr_grps;
+       struct bio_grp_list_data head[0];
+};
+
+struct blk_bio_poll_ctx {
+       spinlock_t sq_lock;
+       struct bio_grp_list *sq;
+
+       struct mutex pq_lock;
+       struct bio_grp_list *pq;
+};
+
+#define BLK_BIO_POLL_SQ_SZ             32U
+#define BLK_BIO_POLL_PQ_SZ             (BLK_BIO_POLL_SQ_SZ * 2)
+
+void bio_poll_ctx_alloc(struct io_context *ioc);
+
+static inline void blk_create_io_context(struct request_queue *q,
+               bool need_poll_ctx)
+{
+       struct io_context *ioc;
+
+       /*
+        * Various block parts want %current->io_context, so allocate it up
+        * front rather than dealing with lots of pain to allocate it only
+        * where needed. This may fail and the block layer knows how to live
+        * with it.
+        */
+       if (unlikely(!current->io_context))
+               create_task_io_context(current, GFP_ATOMIC, q->node);
+
+       ioc = current->io_context;
+       if (need_poll_ctx && unlikely(ioc && !ioc->data))
+               bio_poll_ctx_alloc(ioc);
+}
+
 #endif /* BLK_INTERNAL_H */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 0a9dc40b7be8..f9a467571356 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -110,6 +110,8 @@ struct io_context {
        struct io_cq __rcu      *icq_hint;
        struct hlist_head       icq_list;
 
+       void                    *data;
+
        struct work_struct release_work;
 };
 
-- 
2.29.2

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel

Reply via email to