Signed-off-by: Weiping Zhang <zhangweip...@didiglobal.com>
---
 drivers/block/null_blk.h      |   7 +
 drivers/block/null_blk_main.c | 294 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 288 insertions(+), 13 deletions(-)

diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index 34b22d6523ba..aa53c4b6de49 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -12,6 +12,7 @@
 
 struct nullb_cmd {
        struct list_head list;
+       struct list_head wrr_node;
        struct llist_node ll_list;
        struct __call_single_data csd;
        struct request *rq;
@@ -23,6 +24,8 @@ struct nullb_cmd {
 };
 
 struct nullb_queue {
+       spinlock_t wrr_lock;
+       struct list_head wrr_head;
        unsigned long *tag_map;
        wait_queue_head_t wait;
        unsigned int queue_depth;
@@ -83,6 +86,10 @@ struct nullb {
        struct nullb_queue *queues;
        unsigned int nr_queues;
        char disk_name[DISK_NAME_LEN];
+
+       struct task_struct *wrr_thread;
+       atomic_long_t wrrd_inflight;
+       wait_queue_head_t wrrd_wait;
 };
 
 #ifdef CONFIG_BLK_DEV_ZONED
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 447d635c79a2..100fc0e13036 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -4,6 +4,8 @@
  * Shaohua Li <s...@fb.com>
  */
 #include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/blk-cgroup.h>
 
 #include <linux/moduleparam.h>
 #include <linux/sched.h>
@@ -75,6 +77,7 @@ enum {
        NULL_IRQ_NONE           = 0,
        NULL_IRQ_SOFTIRQ        = 1,
        NULL_IRQ_TIMER          = 2,
+       NULL_IRQ_WRR            = 3,
 };
 
 enum {
@@ -87,10 +90,23 @@ static int g_no_sched;
 module_param_named(no_sched, g_no_sched, int, 0444);
 MODULE_PARM_DESC(no_sched, "No io scheduler");
 
+static int g_tagset_nr_maps = 1;
 static int g_submit_queues = 1;
 module_param_named(submit_queues, g_submit_queues, int, 0444);
 MODULE_PARM_DESC(submit_queues, "Number of submission queues");
 
+#define NULLB_SUBMIT_QUEUE(attr, count) \
+static int g_submit_queues_##attr = count; \
+module_param_named(submit_queues_##attr, g_submit_queues_##attr, int, 0444); \
+MODULE_PARM_DESC(submit_queues_##attr, "Number of " #attr " submission 
queues");
+
+NULLB_SUBMIT_QUEUE(default, 1)
+NULLB_SUBMIT_QUEUE(read, 0)
+NULLB_SUBMIT_QUEUE(poll, 0)
+NULLB_SUBMIT_QUEUE(wrr_low, 0)
+NULLB_SUBMIT_QUEUE(wrr_medium, 0)
+NULLB_SUBMIT_QUEUE(wrr_high, 0)
+
 static int g_home_node = NUMA_NO_NODE;
 module_param_named(home_node, g_home_node, int, 0444);
 MODULE_PARM_DESC(home_node, "Home node for the device");
@@ -158,7 +174,7 @@ static int g_irqmode = NULL_IRQ_SOFTIRQ;
 static int null_set_irqmode(const char *str, const struct kernel_param *kp)
 {
        return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
-                                       NULL_IRQ_TIMER);
+                                       NULL_IRQ_WRR);
 }
 
 static const struct kernel_param_ops null_irqmode_param_ops = {
@@ -643,6 +659,22 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
        hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
 }
 
+static void null_cmd_wrr_insert(struct nullb_cmd *cmd)
+{
+       struct nullb_queue *nq = cmd->nq;
+       struct nullb *nullb = nq->dev->nullb;
+       unsigned long flags;
+
+       INIT_LIST_HEAD(&cmd->wrr_node);
+       spin_lock_irqsave(&nq->wrr_lock, flags);
+       list_add_tail(&cmd->wrr_node, &nq->wrr_head);
+       spin_unlock_irqrestore(&nq->wrr_lock, flags);
+
+       /* wake up wrr_thread if needed */
+       if (atomic_long_inc_return(&nullb->wrrd_inflight) == 1)
+               wake_up_interruptible(&nullb->wrrd_wait);
+}
+
 static void null_complete_rq(struct request *rq)
 {
        end_cmd(blk_mq_rq_to_pdu(rq));
@@ -1236,6 +1268,9 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
        case NULL_IRQ_TIMER:
                null_cmd_end_timer(cmd);
                break;
+       case NULL_IRQ_WRR:
+               null_cmd_wrr_insert(cmd);
+               break;
        }
        return BLK_STS_OK;
 }
@@ -1351,10 +1386,64 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx 
*hctx,
        return null_handle_cmd(cmd);
 }
 
+static inline int null_hctx_nr_queue(int type)
+{
+       int ret;
+
+       switch (type) {
+       case HCTX_TYPE_DEFAULT:
+               ret = g_submit_queues_default;
+               break;
+       case HCTX_TYPE_READ:
+               ret = g_submit_queues_read;
+               break;
+       case HCTX_TYPE_POLL:
+               ret = g_submit_queues_poll;
+               break;
+       case HCTX_TYPE_WRR_LOW:
+               ret = g_submit_queues_wrr_low;
+               break;
+       case HCTX_TYPE_WRR_MEDIUM:
+               ret = g_submit_queues_wrr_medium;
+               break;
+       case HCTX_TYPE_WRR_HIGH:
+               ret = g_submit_queues_wrr_high;
+               break;
+       default:
+               ret = 0;
+               break;
+       }
+
+       return ret;
+}
+
+static int null_map_queues(struct blk_mq_tag_set *set)
+{
+       int i, offset;
+
+       for (i = 0, offset = 0; i < set->nr_maps; i++) {
+               struct blk_mq_queue_map *map = &set->map[i];
+
+               map->nr_queues = null_hctx_nr_queue(i);
+
+               if (!map->nr_queues) {
+                       BUG_ON(i == HCTX_TYPE_DEFAULT);
+                       continue;
+               }
+
+               map->queue_offset = offset;
+               blk_mq_map_queues(map);
+               offset += map->nr_queues;
+       }
+
+       return 0;
+}
+
 static const struct blk_mq_ops null_mq_ops = {
        .queue_rq       = null_queue_rq,
        .complete       = null_complete_rq,
        .timeout        = null_timeout_rq,
+       .map_queues     = null_map_queues,
 };
 
 static void cleanup_queue(struct nullb_queue *nq)
@@ -1397,6 +1486,9 @@ static void null_del_dev(struct nullb *nullb)
        cleanup_queues(nullb);
        if (null_cache_active(nullb))
                null_free_device_storage(nullb->dev, true);
+
+       if (dev->irqmode == NULL_IRQ_WRR)
+               kthread_stop(nullb->wrr_thread);
        kfree(nullb);
        dev->nullb = NULL;
 }
@@ -1435,6 +1527,8 @@ static void null_init_queue(struct nullb *nullb, struct 
nullb_queue *nq)
        init_waitqueue_head(&nq->wait);
        nq->queue_depth = nullb->queue_depth;
        nq->dev = nullb->dev;
+       INIT_LIST_HEAD(&nq->wrr_head);
+       spin_lock_init(&nq->wrr_lock);
 }
 
 static void null_init_queues(struct nullb *nullb)
@@ -1549,6 +1643,7 @@ static int null_init_tag_set(struct nullb *nullb, struct 
blk_mq_tag_set *set)
                                                g_submit_queues;
        set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
                                                g_hw_queue_depth;
+       set->nr_maps = g_tagset_nr_maps;
        set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
        set->cmd_size   = sizeof(struct nullb_cmd);
        set->flags = BLK_MQ_F_SHOULD_MERGE;
@@ -1576,7 +1671,7 @@ static void null_validate_conf(struct nullb_device *dev)
                dev->submit_queues = 1;
 
        dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
-       dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
+       dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_WRR);
 
        /* Do memory allocation, so set blocking */
        if (dev->memory_backed)
@@ -1616,6 +1711,72 @@ static bool null_setup_fault(void)
        return true;
 }
 
+static inline void null_wrr_handle_map(struct nullb *nullb,
+                               struct blk_mq_queue_map *map, int batch)
+{
+       int i, nr;
+       struct nullb_queue *nq;
+       struct nullb_cmd *cmd,*tmp;
+       unsigned long flags;
+
+       for (i = 0; i < map->nr_queues; i++) {
+               nq = &nullb->queues[i + map->queue_offset];
+               nr = batch;
+               spin_lock_irqsave(&nq->wrr_lock, flags);
+               list_for_each_entry_safe(cmd, tmp, &nq->wrr_head, wrr_node) {
+                       list_del(&cmd->wrr_node);
+                       blk_mq_end_request(cmd->rq, cmd->error);
+                       atomic_long_dec(&nullb->wrrd_inflight);
+                       if (nr-- == 0)
+                               break;
+               }
+               spin_unlock_irqrestore(&nq->wrr_lock, flags);
+       }
+}
+
+static int null_wrr_thread(void *data)
+{
+       struct nullb *nullb = (struct nullb *)data;
+       struct blk_mq_tag_set *set = nullb->tag_set;
+       struct blk_mq_queue_map *map;
+       DEFINE_WAIT(wait);
+
+       while (1) {
+               if (kthread_should_stop())
+                       goto out;
+
+               cond_resched();
+
+               /* handle each hardware queue in weighted round robin way */
+
+               map = &set->map[HCTX_TYPE_WRR_HIGH];
+               null_wrr_handle_map(nullb, map, 8);
+
+               map = &set->map[HCTX_TYPE_WRR_MEDIUM];
+               null_wrr_handle_map(nullb, map, 4);
+
+               map = &set->map[HCTX_TYPE_WRR_LOW];
+               null_wrr_handle_map(nullb, map, 1);
+
+               map = &set->map[HCTX_TYPE_POLL];
+               null_wrr_handle_map(nullb, map, 1);
+
+               map = &set->map[HCTX_TYPE_READ];
+               null_wrr_handle_map(nullb, map, 1);
+
+               map = &set->map[HCTX_TYPE_DEFAULT];
+               null_wrr_handle_map(nullb, map, 1);
+
+               prepare_to_wait(&nullb->wrrd_wait, &wait, TASK_INTERRUPTIBLE);
+               if (0 == atomic_long_read(&nullb->wrrd_inflight))
+                       schedule();
+               finish_wait(&nullb->wrrd_wait, &wait);
+       }
+
+out:
+       return 0;
+}
+
 static int null_add_dev(struct nullb_device *dev)
 {
        struct nullb *nullb;
@@ -1706,15 +1867,27 @@ static int null_add_dev(struct nullb_device *dev)
 
        sprintf(nullb->disk_name, "nullb%d", nullb->index);
 
+       if (dev->irqmode == NULL_IRQ_WRR) {
+               init_waitqueue_head(&nullb->wrrd_wait);
+               atomic_long_set(&nullb->wrrd_inflight, 0);
+               nullb->wrr_thread = kthread_run(null_wrr_thread, (void *)nullb,
+                       "wrrd_%s", nullb->disk_name);
+               if (!nullb->wrr_thread)
+                       goto out_cleanup_zone;
+       }
+
        rv = null_gendisk_register(nullb);
        if (rv)
-               goto out_cleanup_zone;
+               goto out_cleanup_wrrd;
 
        mutex_lock(&lock);
        list_add_tail(&nullb->list, &nullb_list);
        mutex_unlock(&lock);
 
        return 0;
+out_cleanup_wrrd:
+       if (dev->irqmode == NULL_IRQ_WRR)
+               kthread_stop(nullb->wrr_thread);
 out_cleanup_zone:
        if (dev->zoned)
                null_zone_exit(dev);
@@ -1731,6 +1904,106 @@ static int null_add_dev(struct nullb_device *dev)
        return rv;
 }
 
+static int null_verify_queues(void)
+{
+       int queues, nr;
+
+       if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
+               if (g_submit_queues != nr_online_nodes) {
+                       pr_warn("null_blk: submit_queues param is set to %u.\n",
+                                                       nr_online_nodes);
+                       g_submit_queues = nr_online_nodes;
+               }
+       } else if (g_submit_queues > nr_cpu_ids)
+               g_submit_queues = nr_cpu_ids;
+       else if (g_submit_queues <= 0)
+               g_submit_queues = 1;
+
+       /* at least leave one queue for default */
+       g_submit_queues_default = 1;
+       queues = g_submit_queues - 1;
+       if (queues == 0)
+               goto def;
+
+       /* read queues */
+       nr = g_submit_queues_read;
+       if (nr < 0 || nr > queues) {
+               pr_warn("null_blk: invalid read queue count\n");
+               return -EINVAL;
+       }
+       g_tagset_nr_maps++;
+       queues -= nr;
+       if (queues == 0)
+               goto read;
+
+       /* poll queues */
+       nr = g_submit_queues_poll;
+       if (nr < 0 || nr > queues) {
+               pr_warn("null_blk: invalid poll queue count\n");
+               return -EINVAL;
+       }
+       g_tagset_nr_maps++;
+       queues -= nr;
+       if (queues == 0)
+               goto poll;
+
+       /* wrr_low queues */
+       nr = g_submit_queues_wrr_low;
+       if (nr < 0 || nr > queues) {
+               pr_warn("null_blk: invalid wrr_low queue count\n");
+               return -EINVAL;
+       }
+       g_tagset_nr_maps++;
+       queues -= nr;
+       if (queues == 0)
+               goto wrr_low;
+
+       /* wrr_medium queues */
+       nr = g_submit_queues_wrr_medium;
+       if (nr < 0 || nr > queues) {
+               pr_warn("null_blk: invalid wrr_medium queue count\n");
+               return -EINVAL;
+       }
+       g_tagset_nr_maps++;
+       queues -= nr;
+       if (queues == 0)
+               goto wrr_medium;
+
+       /* wrr_high queues */
+       nr = g_submit_queues_wrr_high;
+       if (nr < 0 || nr > queues) {
+               pr_warn("null_blk: invalid wrr_medium queue count\n");
+               return -EINVAL;
+       }
+       g_tagset_nr_maps++;
+       queues -= nr;
+
+       /* add all others queue to default group */
+       g_submit_queues_default += queues;
+
+       goto out;
+
+def:
+       g_submit_queues_read = 0;
+read:
+       g_submit_queues_poll = 0;
+poll:
+       g_submit_queues_wrr_low = 0;
+wrr_low:
+       g_submit_queues_wrr_medium = 0;
+wrr_medium:
+       g_submit_queues_wrr_high = 0;
+out:
+       pr_info("null_blk: total submit queues:%d, nr_map:%d, default:%d, "
+               "read:%d, poll:%d, wrr_low:%d, wrr_medium:%d wrr_high:%d\n",
+               g_submit_queues, g_tagset_nr_maps, g_submit_queues_default,
+               g_submit_queues_read, g_submit_queues_poll,
+               g_submit_queues_wrr_low, g_submit_queues_wrr_medium,
+               g_submit_queues_wrr_high);
+
+       return 0;
+}
+
 static int __init null_init(void)
 {
        int ret = 0;
@@ -1758,16 +2031,11 @@ static int __init null_init(void)
                pr_err("null_blk: legacy IO path no longer available\n");
                return -EINVAL;
        }
-       if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
-               if (g_submit_queues != nr_online_nodes) {
-                       pr_warn("null_blk: submit_queues param is set to %u.\n",
-                                                       nr_online_nodes);
-                       g_submit_queues = nr_online_nodes;
-               }
-       } else if (g_submit_queues > nr_cpu_ids)
-               g_submit_queues = nr_cpu_ids;
-       else if (g_submit_queues <= 0)
-               g_submit_queues = 1;
+
+       if (null_verify_queues()){
+               pr_err("null_blk: invalid submit queue parameter\n");
+               return -EINVAL;
+       }
 
        if (g_queue_mode == NULL_Q_MQ && shared_tags) {
                ret = null_init_tag_set(NULL, &tag_set);
-- 
2.14.1

Reply via email to