On 04/05/2018 06:05 PM, Ming Lei wrote:
[...]
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 90838e998f66..996f8a963026 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1324,9 +1324,18 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx
> *hctx)
> */
> if (!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
> cpu_online(hctx->next_cpu)) {
> - printk(KERN_WARNING "run queue from wrong CPU %d, hctx %s\n",
> - raw_smp_processor_id(),
> + int cpu;
> + printk(KERN_WARNING "run queue from wrong CPU %d/%d, hctx-%d
> %s\n",
> + raw_smp_processor_id(), hctx->next_cpu,
> + hctx->queue_num,
> cpumask_empty(hctx->cpumask) ? "inactive": "active");
> + printk("dump CPUs mapped to this hctx:\n");
> + for_each_cpu(cpu, hctx->cpumask)
> + printk("%d ", cpu);
> + printk("\n");
> + printk("nr_cpu_ids is %d, and dump online cpus:\n", nr_cpu_ids);
> + for_each_cpu(cpu, cpu_online_mask)
> + printk("%d ", cpu);
> dump_stack();
> }
>
FWIW, with things like
[ 4.049828] dump CPUs mapped to this hctx:
[ 4.049829] 18
[ 4.049829] 82
[ 4.049830] 146
[ 4.049830] 210
[ 4.049831] 274
[ 4.049832] nr_cpu_ids is 282, and dump online cpus:
[ 4.049833] 0
[ 4.049833] 1
[ 4.049834] 2
[ 4.049834] 3
[ 4.049835] 4
[ 4.049835] 5
[ 4.049836] 6
[ 4.049836] 7
[ 4.049837] 8
[ 4.049837] 9
[ 4.049838] 10
[ 4.049839] 11
[ 4.049839] 12
[ 4.049840] 13
[ 4.049840] 14
[ 4.049841] 15
So the hctx has only "possible CPUs", but all are offline.
Doesnt that always make this run unbound? See blk_mq_hctx_next_cpu below.
/*
* It'd be great if the workqueue API had a way to pass
* in a mask and had some smarts for more clever placement.
* For now we just round-robin here, switching for every
* BLK_MQ_CPU_WORK_BATCH queued items.
*/
static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
{
bool tried = false;
if (hctx->queue->nr_hw_queues == 1)
return WORK_CPU_UNBOUND;
if (--hctx->next_cpu_batch <= 0) {
int next_cpu;
select_cpu:
next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask,
cpu_online_mask);
if (next_cpu >= nr_cpu_ids)
next_cpu =
cpumask_first_and(hctx->cpumask,cpu_online_mask);
/*
* No online CPU is found, so have to make sure hctx->next_cpu
* is set correctly for not breaking workqueue.
*/
if (next_cpu >= nr_cpu_ids)
hctx->next_cpu = cpumask_first(hctx->cpumask);
else
hctx->next_cpu = next_cpu;
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
}
/*
* Do unbound schedule if we can't find a online CPU for this hctx,
* and it should only happen in the path of handling CPU DEAD.
*/
if (!cpu_online(hctx->next_cpu)) {
if (!tried) {
tried = true;
goto select_cpu;
}
/*
* Make sure to re-select CPU next time once after CPUs
* in hctx->cpumask become online again.
*/
hctx->next_cpu_batch = 1;
return WORK_CPU_UNBOUND;
}
return hctx->next_cpu;
}