On 4/24/25 20:19, Daniel Wagner wrote:
When isolcpus=io_queue is enabled, and the last housekeeping CPU for a
given hctx would go offline, there would be no CPU left which handles
the IOs. To prevent IO stalls, prevent offlining housekeeping CPUs which
are still severing isolated CPUs..
            serving


Signed-off-by: Daniel Wagner <w...@kernel.org>
---
  block/blk-mq.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
  1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 
c2697db591091200cdb9f6e082e472b829701e4c..aff17673b773583dfb2b01cb2f5f010c456bd834
 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3627,6 +3627,48 @@ static bool blk_mq_hctx_has_requests(struct 
blk_mq_hw_ctx *hctx)
        return data.has_rq;
  }
+static bool blk_mq_hctx_check_isolcpus_online(struct blk_mq_hw_ctx *hctx, unsigned int cpu)
+{
+       const struct cpumask *hk_mask;
+       int i;
+
+       if (!housekeeping_enabled(HK_TYPE_IO_QUEUE))
+               return true;
+
+       hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
+
+       for (i = 0; i < hctx->nr_ctx; i++) {
+               struct blk_mq_ctx *ctx = hctx->ctxs[i];
+
+               if (ctx->cpu == cpu)
+                       continue;
+
+               /*
+                * Check if this context has at least one online
+                * housekeeping CPU in this case the hardware context is
+                * usable.
+                */
+               if (cpumask_test_cpu(ctx->cpu, hk_mask) &&
+                   cpu_online(ctx->cpu))
+                       break;
+
+               /*
+                * The context doesn't have any online housekeeping CPUs
+                * but there might be an online isolated CPU mapped to
+                * it.
+                */
+               if (cpu_is_offline(ctx->cpu))
+                       continue;
+
+               pr_warn("%s: trying to offline hctx%d but there is still an online 
isolcpu CPU %d mapped to it\n",
+                       hctx->queue->disk->disk_name,
+                       hctx->queue_num, ctx->cpu);
+               return true;
+       }
+
+       return false;
+}
+
  static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
                unsigned int this_cpu)
  {
@@ -3647,7 +3689,7 @@ static bool blk_mq_hctx_has_online_cpu(struct 
blk_mq_hw_ctx *hctx,
/* this hctx has at least one online CPU */
                if (this_cpu != cpu)
-                       return true;
+                       return blk_mq_hctx_check_isolcpus_online(hctx, 
this_cpu);
        }
return false;
@@ -3659,7 +3701,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, 
struct hlist_node *node)
                        struct blk_mq_hw_ctx, cpuhp_online);
if (blk_mq_hctx_has_online_cpu(hctx, cpu))
-               return 0;
+               return -EINVAL;
/*
         * Prevent new request from being allocated on the current hctx.

Otherwise:

Reviewed-by: Hannes Reinecke <h...@suse.de>

Cheers,

Hannes
--
Dr. Hannes Reinecke                  Kernel Storage Architect
h...@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

Reply via email to