On Fri, Feb 05, 2021 at 01:13:10AM -0800, Yang Yang wrote:
> Hang occurs when user changes the scheduler queue depth, by writing to
> the 'nr_requests' sysfs file of that device.
> 
> The details of the environment that we found the problem are as follows:
>   an eMMC block device
>   total driver tags: 16
>   default queue_depth: 32
>   kqd->async_depth initialized in kyber_init_sched() with queue_depth=32
> 
> Then we change queue_depth to 256, by writing to the 'nr_requests' sysfs
> file. But kqd->async_depth don't be updated after queue_depth changes.
> Now the value of async depth is too small for queue_depth=256, this may
> cause hang.
> 
> This patch introduces kyber_depth_updated(), so that kyber can update
> async depth when queue depth changes.
> 
> Signed-off-by: Yang Yang <yang.y...@vivo.com>

I wasn't able to reproduce the hang, but this looks correct, and it
passed my tests.

Reviewed-by: Omar Sandoval <osan...@fb.com>

> ---
> v2:
> - Change the commit message
> - Change from sbitmap::depth to 2^sbitmap::shift
> ---
>  block/kyber-iosched.c | 29 +++++++++++++----------------
>  1 file changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
> index dc89199bc8c6..17215b6bf482 100644
> --- a/block/kyber-iosched.c
> +++ b/block/kyber-iosched.c
> @@ -353,19 +353,9 @@ static void kyber_timer_fn(struct timer_list *t)
>       }
>  }
>  
> -static unsigned int kyber_sched_tags_shift(struct request_queue *q)
> -{
> -     /*
> -      * All of the hardware queues have the same depth, so we can just grab
> -      * the shift of the first one.
> -      */
> -     return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift;
> -}
> -
>  static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue 
> *q)
>  {
>       struct kyber_queue_data *kqd;
> -     unsigned int shift;
>       int ret = -ENOMEM;
>       int i;
>  
> @@ -400,9 +390,6 @@ static struct kyber_queue_data 
> *kyber_queue_data_alloc(struct request_queue *q)
>               kqd->latency_targets[i] = kyber_latency_targets[i];
>       }
>  
> -     shift = kyber_sched_tags_shift(q);
> -     kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
> -
>       return kqd;
>  
>  err_buckets:
> @@ -458,9 +445,19 @@ static void kyber_ctx_queue_init(struct kyber_ctx_queue 
> *kcq)
>               INIT_LIST_HEAD(&kcq->rq_list[i]);
>  }
>  
> -static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
> +static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
>  {
>       struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
> +     struct blk_mq_tags *tags = hctx->sched_tags;
> +     unsigned int shift = tags->bitmap_tags->sb.shift;
> +
> +     kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
> +
> +     sbitmap_queue_min_shallow_depth(tags->bitmap_tags, kqd->async_depth);
> +}
> +
> +static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
> +{
>       struct kyber_hctx_data *khd;
>       int i;
>  
> @@ -502,8 +499,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, 
> unsigned int hctx_idx)
>       khd->batching = 0;
>  
>       hctx->sched_data = khd;
> -     sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags,
> -                                     kqd->async_depth);
> +     kyber_depth_updated(hctx);
>  
>       return 0;
>  
> @@ -1022,6 +1018,7 @@ static struct elevator_type kyber_sched = {
>               .completed_request = kyber_completed_request,
>               .dispatch_request = kyber_dispatch_request,
>               .has_work = kyber_has_work,
> +             .depth_updated = kyber_depth_updated,
>       },
>  #ifdef CONFIG_BLK_DEBUG_FS
>       .queue_debugfs_attrs = kyber_queue_debugfs_attrs,
> -- 
> 2.17.1
> 

Reply via email to