Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-08 Thread Christoph Hellwig
What we really nee to do is to always set the NOWAIT flag (we have a
reserved tag for connect anyway), and thus never trigger the code
deep down in bt_get that might switch to a different hctx.

Below is the version that I've tested together with the NVMe change
to use the NOWAIT flag:

---
>From 2346a4fdcc57c70a671e1329f7550d91d4e9d8a8 Mon Sep 17 00:00:00 2001
From: Ming Lin 
Date: Mon, 30 Nov 2015 19:45:48 +0100
Subject: blk-mq: add blk_mq_alloc_request_hctx

For some protocols like NVMe over Fabrics we need to be able to send
initialization commands to a specific queue.

Based on an earlier patch from Christoph Hellwig .

Signed-off-by: Ming Lin 
[hch: disallow sleeping allocation, req_op fixes]
Signed-off-by: Christoph Hellwig 
---
 block/blk-mq.c | 39 +++
 include/linux/blk-mq.h |  2 ++
 2 files changed, 41 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 13f4603..7aa60c4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -267,6 +267,45 @@ struct request *blk_mq_alloc_request(struct request_queue 
*q, int rw,
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
+   unsigned int flags, unsigned int hctx_idx)
+{
+   struct blk_mq_hw_ctx *hctx;
+   struct blk_mq_ctx *ctx;
+   struct request *rq;
+   struct blk_mq_alloc_data alloc_data;
+   int ret;
+
+   /*
+* If the tag allocator sleeps we could get an allocation for a
+* different hardware context.  No need to complicate the low level
+* allocator for this for the rare use case of a command tied to
+* a specific queue.
+*/
+   if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)))
+   return ERR_PTR(-EINVAL);
+
+   if (hctx_idx >= q->nr_hw_queues)
+   return ERR_PTR(-EIO);
+
+   ret = blk_queue_enter(q, true);
+   if (ret)
+   return ERR_PTR(ret);
+
+   hctx = q->queue_hw_ctx[hctx_idx];
+   ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+
+   blk_mq_set_alloc_data(_data, q, flags, ctx, hctx);
+   rq = __blk_mq_alloc_request(_data, rw, 0);
+   if (!rq) {
+   blk_queue_exit(q);
+   return ERR_PTR(-EWOULDBLOCK);
+   }
+
+   return rq;
+}
+EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
+
 static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
  struct blk_mq_ctx *ctx, struct request *rq)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index faa7d5c2..e43bbff 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -198,6 +198,8 @@ enum {
 
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
unsigned int flags);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op,
+   unsigned int flags, unsigned int hctx_idx);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
 
-- 
2.1.4



Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-08 Thread Christoph Hellwig
What we really nee to do is to always set the NOWAIT flag (we have a
reserved tag for connect anyway), and thus never trigger the code
deep down in bt_get that might switch to a different hctx.

Below is the version that I've tested together with the NVMe change
to use the NOWAIT flag:

---
>From 2346a4fdcc57c70a671e1329f7550d91d4e9d8a8 Mon Sep 17 00:00:00 2001
From: Ming Lin 
Date: Mon, 30 Nov 2015 19:45:48 +0100
Subject: blk-mq: add blk_mq_alloc_request_hctx

For some protocols like NVMe over Fabrics we need to be able to send
initialization commands to a specific queue.

Based on an earlier patch from Christoph Hellwig .

Signed-off-by: Ming Lin 
[hch: disallow sleeping allocation, req_op fixes]
Signed-off-by: Christoph Hellwig 
---
 block/blk-mq.c | 39 +++
 include/linux/blk-mq.h |  2 ++
 2 files changed, 41 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 13f4603..7aa60c4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -267,6 +267,45 @@ struct request *blk_mq_alloc_request(struct request_queue 
*q, int rw,
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
+   unsigned int flags, unsigned int hctx_idx)
+{
+   struct blk_mq_hw_ctx *hctx;
+   struct blk_mq_ctx *ctx;
+   struct request *rq;
+   struct blk_mq_alloc_data alloc_data;
+   int ret;
+
+   /*
+* If the tag allocator sleeps we could get an allocation for a
+* different hardware context.  No need to complicate the low level
+* allocator for this for the rare use case of a command tied to
+* a specific queue.
+*/
+   if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)))
+   return ERR_PTR(-EINVAL);
+
+   if (hctx_idx >= q->nr_hw_queues)
+   return ERR_PTR(-EIO);
+
+   ret = blk_queue_enter(q, true);
+   if (ret)
+   return ERR_PTR(ret);
+
+   hctx = q->queue_hw_ctx[hctx_idx];
+   ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+
+   blk_mq_set_alloc_data(_data, q, flags, ctx, hctx);
+   rq = __blk_mq_alloc_request(_data, rw, 0);
+   if (!rq) {
+   blk_queue_exit(q);
+   return ERR_PTR(-EWOULDBLOCK);
+   }
+
+   return rq;
+}
+EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
+
 static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
  struct blk_mq_ctx *ctx, struct request *rq)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index faa7d5c2..e43bbff 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -198,6 +198,8 @@ enum {
 
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
unsigned int flags);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op,
+   unsigned int flags, unsigned int hctx_idx);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
 
-- 
2.1.4



Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-08 Thread Christoph Hellwig
On Tue, Jun 07, 2016 at 10:49:22PM -0600, Jens Axboe wrote:
> Why are we duplicating this code here? If NOWAIT isn't set, then we'll
> always return a request. bt_get() will run the queue for us, if it needs
> to. blk_mq_alloc_request() does this too, and I'm guessing that code was
> just copied. I'll fix that up. Looks like this should just be:

The generic code is a bit of a mess as it does the nowait optimization
in two different cases, I'll send you a separate cleanup patch for that.


Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-08 Thread Christoph Hellwig
On Tue, Jun 07, 2016 at 10:49:22PM -0600, Jens Axboe wrote:
> Why are we duplicating this code here? If NOWAIT isn't set, then we'll
> always return a request. bt_get() will run the queue for us, if it needs
> to. blk_mq_alloc_request() does this too, and I'm guessing that code was
> just copied. I'll fix that up. Looks like this should just be:

The generic code is a bit of a mess as it does the nowait optimization
in two different cases, I'll send you a separate cleanup patch for that.


Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-07 Thread Ming Lin
On Tue, 2016-06-07 at 22:49 -0600, Jens Axboe wrote:
> On 06/06/2016 03:21 PM, Christoph Hellwig wrote:
> > From: Ming Lin 
> > 
> > For some protocols like NVMe over Fabrics we need to be able to
> > send
> > initialization commands to a specific queue.
> > 
> > Based on an earlier patch from Christoph Hellwig .
> > 
> > Signed-off-by: Ming Lin 
> > Signed-off-by: Christoph Hellwig 
> > ---
> >   block/blk-mq.c | 33 +
> >   include/linux/blk-mq.h |  2 ++
> >   2 files changed, 35 insertions(+)
> > 
> > diff --git a/block/blk-mq.c b/block/blk-mq.c
> > index 29cbc1b..7bb45ed 100644
> > --- a/block/blk-mq.c
> > +++ b/block/blk-mq.c
> > @@ -266,6 +266,39 @@ struct request *blk_mq_alloc_request(struct
> > request_queue *q, int rw,
> >   }
> >   EXPORT_SYMBOL(blk_mq_alloc_request);
> > 
> > +struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
> > int rw,
> > +   unsigned int flags, unsigned int hctx_idx)
> > +{
> > +   struct blk_mq_hw_ctx *hctx;
> > +   struct blk_mq_ctx *ctx;
> > +   struct request *rq;
> > +   struct blk_mq_alloc_data alloc_data;
> > +   int ret;
> > +
> > +   ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
> > +   if (ret)
> > +   return ERR_PTR(ret);
> > +
> > +   hctx = q->queue_hw_ctx[hctx_idx];
> > +   ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
> > +
> > +   blk_mq_set_alloc_data(_data, q, flags, ctx, hctx);
> > +
> > +   rq = __blk_mq_alloc_request(_data, rw);
> > +   if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
> > +   __blk_mq_run_hw_queue(hctx);
> > +
> > +   rq =  __blk_mq_alloc_request(_data, rw);
> > +   }
> 
> Why are we duplicating this code here? If NOWAIT isn't set, then
> we'll
> always return a request. bt_get() will run the queue for us, if it
> needs
> to. blk_mq_alloc_request() does this too, and I'm guessing that code
> was
> just copied. I'll fix that up. Looks like this should just be:
> 
>   rq = __blk_mq_alloc_request(_data, rw);
>   if (rq)
>   return rq;
> 
>   blk_queue_exit(q);
>   return ERR_PTR(-EWOULDBLOCK);
> 
> for this case.

Yes,

But the bt_get() reminds me that this patch actually has a problem.

blk_mq_alloc_request_hctx() ->
  __blk_mq_alloc_request() ->
    blk_mq_get_tag() -> 
      __blk_mq_get_tag() ->
        bt_get() ->
          blk_mq_put_ctx(data->ctx);

Here are blk_mq_get_ctx() and blk_mq_put_ctx().

static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
{   
return __blk_mq_get_ctx(q, get_cpu());
} 

static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
{
put_cpu();
}

blk_mq_alloc_request_hctx() calls __blk_mq_get_ctx() instead
of blk_mq_get_ctx(). Then reason is the "hctx" could belong to other
cpu. So blk_mq_get_ctx() doesn't work.

But then above put_cpu() in blk_mq_put_ctx() will trigger a WARNING
because we didn't do get_cpu() in blk_mq_alloc_request_hctx()


Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-07 Thread Ming Lin
On Tue, 2016-06-07 at 22:49 -0600, Jens Axboe wrote:
> On 06/06/2016 03:21 PM, Christoph Hellwig wrote:
> > From: Ming Lin 
> > 
> > For some protocols like NVMe over Fabrics we need to be able to
> > send
> > initialization commands to a specific queue.
> > 
> > Based on an earlier patch from Christoph Hellwig .
> > 
> > Signed-off-by: Ming Lin 
> > Signed-off-by: Christoph Hellwig 
> > ---
> >   block/blk-mq.c | 33 +
> >   include/linux/blk-mq.h |  2 ++
> >   2 files changed, 35 insertions(+)
> > 
> > diff --git a/block/blk-mq.c b/block/blk-mq.c
> > index 29cbc1b..7bb45ed 100644
> > --- a/block/blk-mq.c
> > +++ b/block/blk-mq.c
> > @@ -266,6 +266,39 @@ struct request *blk_mq_alloc_request(struct
> > request_queue *q, int rw,
> >   }
> >   EXPORT_SYMBOL(blk_mq_alloc_request);
> > 
> > +struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
> > int rw,
> > +   unsigned int flags, unsigned int hctx_idx)
> > +{
> > +   struct blk_mq_hw_ctx *hctx;
> > +   struct blk_mq_ctx *ctx;
> > +   struct request *rq;
> > +   struct blk_mq_alloc_data alloc_data;
> > +   int ret;
> > +
> > +   ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
> > +   if (ret)
> > +   return ERR_PTR(ret);
> > +
> > +   hctx = q->queue_hw_ctx[hctx_idx];
> > +   ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
> > +
> > +   blk_mq_set_alloc_data(_data, q, flags, ctx, hctx);
> > +
> > +   rq = __blk_mq_alloc_request(_data, rw);
> > +   if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
> > +   __blk_mq_run_hw_queue(hctx);
> > +
> > +   rq =  __blk_mq_alloc_request(_data, rw);
> > +   }
> 
> Why are we duplicating this code here? If NOWAIT isn't set, then
> we'll
> always return a request. bt_get() will run the queue for us, if it
> needs
> to. blk_mq_alloc_request() does this too, and I'm guessing that code
> was
> just copied. I'll fix that up. Looks like this should just be:
> 
>   rq = __blk_mq_alloc_request(_data, rw);
>   if (rq)
>   return rq;
> 
>   blk_queue_exit(q);
>   return ERR_PTR(-EWOULDBLOCK);
> 
> for this case.

Yes,

But the bt_get() reminds me that this patch actually has a problem.

blk_mq_alloc_request_hctx() ->
  __blk_mq_alloc_request() ->
    blk_mq_get_tag() -> 
      __blk_mq_get_tag() ->
        bt_get() ->
          blk_mq_put_ctx(data->ctx);

Here are blk_mq_get_ctx() and blk_mq_put_ctx().

static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
{   
return __blk_mq_get_ctx(q, get_cpu());
} 

static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
{
put_cpu();
}

blk_mq_alloc_request_hctx() calls __blk_mq_get_ctx() instead
of blk_mq_get_ctx(). Then reason is the "hctx" could belong to other
cpu. So blk_mq_get_ctx() doesn't work.

But then above put_cpu() in blk_mq_put_ctx() will trigger a WARNING
because we didn't do get_cpu() in blk_mq_alloc_request_hctx()


Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-07 Thread Jens Axboe

On 06/06/2016 03:21 PM, Christoph Hellwig wrote:

From: Ming Lin 

For some protocols like NVMe over Fabrics we need to be able to send
initialization commands to a specific queue.

Based on an earlier patch from Christoph Hellwig .

Signed-off-by: Ming Lin 
Signed-off-by: Christoph Hellwig 
---
  block/blk-mq.c | 33 +
  include/linux/blk-mq.h |  2 ++
  2 files changed, 35 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 29cbc1b..7bb45ed 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -266,6 +266,39 @@ struct request *blk_mq_alloc_request(struct request_queue 
*q, int rw,
  }
  EXPORT_SYMBOL(blk_mq_alloc_request);

+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
+   unsigned int flags, unsigned int hctx_idx)
+{
+   struct blk_mq_hw_ctx *hctx;
+   struct blk_mq_ctx *ctx;
+   struct request *rq;
+   struct blk_mq_alloc_data alloc_data;
+   int ret;
+
+   ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
+   if (ret)
+   return ERR_PTR(ret);
+
+   hctx = q->queue_hw_ctx[hctx_idx];
+   ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+
+   blk_mq_set_alloc_data(_data, q, flags, ctx, hctx);
+
+   rq = __blk_mq_alloc_request(_data, rw);
+   if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
+   __blk_mq_run_hw_queue(hctx);
+
+   rq =  __blk_mq_alloc_request(_data, rw);
+   }


Why are we duplicating this code here? If NOWAIT isn't set, then we'll
always return a request. bt_get() will run the queue for us, if it needs
to. blk_mq_alloc_request() does this too, and I'm guessing that code was
just copied. I'll fix that up. Looks like this should just be:

rq = __blk_mq_alloc_request(_data, rw);
if (rq)
return rq;

blk_queue_exit(q);
return ERR_PTR(-EWOULDBLOCK);

for this case.

--
Jens Axboe



Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-07 Thread Jens Axboe

On 06/06/2016 03:21 PM, Christoph Hellwig wrote:

From: Ming Lin 

For some protocols like NVMe over Fabrics we need to be able to send
initialization commands to a specific queue.

Based on an earlier patch from Christoph Hellwig .

Signed-off-by: Ming Lin 
Signed-off-by: Christoph Hellwig 
---
  block/blk-mq.c | 33 +
  include/linux/blk-mq.h |  2 ++
  2 files changed, 35 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 29cbc1b..7bb45ed 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -266,6 +266,39 @@ struct request *blk_mq_alloc_request(struct request_queue 
*q, int rw,
  }
  EXPORT_SYMBOL(blk_mq_alloc_request);

+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
+   unsigned int flags, unsigned int hctx_idx)
+{
+   struct blk_mq_hw_ctx *hctx;
+   struct blk_mq_ctx *ctx;
+   struct request *rq;
+   struct blk_mq_alloc_data alloc_data;
+   int ret;
+
+   ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
+   if (ret)
+   return ERR_PTR(ret);
+
+   hctx = q->queue_hw_ctx[hctx_idx];
+   ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+
+   blk_mq_set_alloc_data(_data, q, flags, ctx, hctx);
+
+   rq = __blk_mq_alloc_request(_data, rw);
+   if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
+   __blk_mq_run_hw_queue(hctx);
+
+   rq =  __blk_mq_alloc_request(_data, rw);
+   }


Why are we duplicating this code here? If NOWAIT isn't set, then we'll
always return a request. bt_get() will run the queue for us, if it needs
to. blk_mq_alloc_request() does this too, and I'm guessing that code was
just copied. I'll fix that up. Looks like this should just be:

rq = __blk_mq_alloc_request(_data, rw);
if (rq)
return rq;

blk_queue_exit(q);
return ERR_PTR(-EWOULDBLOCK);

for this case.

--
Jens Axboe



Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-07 Thread Ming Lin
On Tue, Jun 7, 2016 at 8:27 AM, Ming Lin  wrote:
> On Tue, Jun 7, 2016 at 7:57 AM, Keith Busch  wrote:
>> On Mon, Jun 06, 2016 at 11:21:52PM +0200, Christoph Hellwig wrote:
>>> +struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
>>> + unsigned int flags, unsigned int hctx_idx)
>>> +{
>>> + struct blk_mq_hw_ctx *hctx;
>>> + struct blk_mq_ctx *ctx;
>>> + struct request *rq;
>>> + struct blk_mq_alloc_data alloc_data;
>>> + int ret;
>>> +
>>> + ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
>>> + if (ret)
>>> + return ERR_PTR(ret);
>>> +
>>> + hctx = q->queue_hw_ctx[hctx_idx];
>>
>> We probably want to check 'if (hctx_idx < q->nr_hw_queues)' before
>> getting the hctx. Even if hctx_idx was origially valid, it's possible
>> (though unlikely) blk_queue_enter waits on reallocating h/w contexts,
>> which can make hctx_idx invalid.
>
> Yes, I'll update it.

How about this?

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7bb45ed..b59d2ef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -279,6 +279,11 @@ struct request *blk_mq_alloc_request_hctx(struct
request_queue *q, int rw,
if (ret)
return ERR_PTR(ret);

+   if (hctx_idx >= q->nr_hw_queues) {
+   blk_queue_exit(q);
+   return ERR_PTR(-EINVAL);
+   }
+
hctx = q->queue_hw_ctx[hctx_idx];
ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));


Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-07 Thread Ming Lin
On Tue, Jun 7, 2016 at 8:27 AM, Ming Lin  wrote:
> On Tue, Jun 7, 2016 at 7:57 AM, Keith Busch  wrote:
>> On Mon, Jun 06, 2016 at 11:21:52PM +0200, Christoph Hellwig wrote:
>>> +struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
>>> + unsigned int flags, unsigned int hctx_idx)
>>> +{
>>> + struct blk_mq_hw_ctx *hctx;
>>> + struct blk_mq_ctx *ctx;
>>> + struct request *rq;
>>> + struct blk_mq_alloc_data alloc_data;
>>> + int ret;
>>> +
>>> + ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
>>> + if (ret)
>>> + return ERR_PTR(ret);
>>> +
>>> + hctx = q->queue_hw_ctx[hctx_idx];
>>
>> We probably want to check 'if (hctx_idx < q->nr_hw_queues)' before
>> getting the hctx. Even if hctx_idx was origially valid, it's possible
>> (though unlikely) blk_queue_enter waits on reallocating h/w contexts,
>> which can make hctx_idx invalid.
>
> Yes, I'll update it.

How about this?

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7bb45ed..b59d2ef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -279,6 +279,11 @@ struct request *blk_mq_alloc_request_hctx(struct
request_queue *q, int rw,
if (ret)
return ERR_PTR(ret);

+   if (hctx_idx >= q->nr_hw_queues) {
+   blk_queue_exit(q);
+   return ERR_PTR(-EINVAL);
+   }
+
hctx = q->queue_hw_ctx[hctx_idx];
ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));


Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-07 Thread Ming Lin
On Tue, Jun 7, 2016 at 7:57 AM, Keith Busch  wrote:
> On Mon, Jun 06, 2016 at 11:21:52PM +0200, Christoph Hellwig wrote:
>> +struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
>> + unsigned int flags, unsigned int hctx_idx)
>> +{
>> + struct blk_mq_hw_ctx *hctx;
>> + struct blk_mq_ctx *ctx;
>> + struct request *rq;
>> + struct blk_mq_alloc_data alloc_data;
>> + int ret;
>> +
>> + ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
>> + if (ret)
>> + return ERR_PTR(ret);
>> +
>> + hctx = q->queue_hw_ctx[hctx_idx];
>
> We probably want to check 'if (hctx_idx < q->nr_hw_queues)' before
> getting the hctx. Even if hctx_idx was origially valid, it's possible
> (though unlikely) blk_queue_enter waits on reallocating h/w contexts,
> which can make hctx_idx invalid.

Yes, I'll update it.


Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-07 Thread Ming Lin
On Tue, Jun 7, 2016 at 7:57 AM, Keith Busch  wrote:
> On Mon, Jun 06, 2016 at 11:21:52PM +0200, Christoph Hellwig wrote:
>> +struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
>> + unsigned int flags, unsigned int hctx_idx)
>> +{
>> + struct blk_mq_hw_ctx *hctx;
>> + struct blk_mq_ctx *ctx;
>> + struct request *rq;
>> + struct blk_mq_alloc_data alloc_data;
>> + int ret;
>> +
>> + ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
>> + if (ret)
>> + return ERR_PTR(ret);
>> +
>> + hctx = q->queue_hw_ctx[hctx_idx];
>
> We probably want to check 'if (hctx_idx < q->nr_hw_queues)' before
> getting the hctx. Even if hctx_idx was origially valid, it's possible
> (though unlikely) blk_queue_enter waits on reallocating h/w contexts,
> which can make hctx_idx invalid.

Yes, I'll update it.


Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-07 Thread Keith Busch
On Mon, Jun 06, 2016 at 11:21:52PM +0200, Christoph Hellwig wrote:
> +struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
> + unsigned int flags, unsigned int hctx_idx)
> +{
> + struct blk_mq_hw_ctx *hctx;
> + struct blk_mq_ctx *ctx;
> + struct request *rq;
> + struct blk_mq_alloc_data alloc_data;
> + int ret;
> +
> + ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
> + if (ret)
> + return ERR_PTR(ret);
> +
> + hctx = q->queue_hw_ctx[hctx_idx];

We probably want to check 'if (hctx_idx < q->nr_hw_queues)' before
getting the hctx. Even if hctx_idx was origially valid, it's possible
(though unlikely) blk_queue_enter waits on reallocating h/w contexts,
which can make hctx_idx invalid.


Re: [PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-07 Thread Keith Busch
On Mon, Jun 06, 2016 at 11:21:52PM +0200, Christoph Hellwig wrote:
> +struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
> + unsigned int flags, unsigned int hctx_idx)
> +{
> + struct blk_mq_hw_ctx *hctx;
> + struct blk_mq_ctx *ctx;
> + struct request *rq;
> + struct blk_mq_alloc_data alloc_data;
> + int ret;
> +
> + ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
> + if (ret)
> + return ERR_PTR(ret);
> +
> + hctx = q->queue_hw_ctx[hctx_idx];

We probably want to check 'if (hctx_idx < q->nr_hw_queues)' before
getting the hctx. Even if hctx_idx was origially valid, it's possible
(though unlikely) blk_queue_enter waits on reallocating h/w contexts,
which can make hctx_idx invalid.


[PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-06 Thread Christoph Hellwig
From: Ming Lin 

For some protocols like NVMe over Fabrics we need to be able to send
initialization commands to a specific queue.

Based on an earlier patch from Christoph Hellwig .

Signed-off-by: Ming Lin 
Signed-off-by: Christoph Hellwig 
---
 block/blk-mq.c | 33 +
 include/linux/blk-mq.h |  2 ++
 2 files changed, 35 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 29cbc1b..7bb45ed 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -266,6 +266,39 @@ struct request *blk_mq_alloc_request(struct request_queue 
*q, int rw,
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
+   unsigned int flags, unsigned int hctx_idx)
+{
+   struct blk_mq_hw_ctx *hctx;
+   struct blk_mq_ctx *ctx;
+   struct request *rq;
+   struct blk_mq_alloc_data alloc_data;
+   int ret;
+
+   ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
+   if (ret)
+   return ERR_PTR(ret);
+
+   hctx = q->queue_hw_ctx[hctx_idx];
+   ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+
+   blk_mq_set_alloc_data(_data, q, flags, ctx, hctx);
+
+   rq = __blk_mq_alloc_request(_data, rw);
+   if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
+   __blk_mq_run_hw_queue(hctx);
+
+   rq =  __blk_mq_alloc_request(_data, rw);
+   }
+   if (!rq) {
+   blk_queue_exit(q);
+   return ERR_PTR(-EWOULDBLOCK);
+   }
+
+   return rq;
+}
+EXPORT_SYMBOL(blk_mq_alloc_request_hctx);
+
 static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
  struct blk_mq_ctx *ctx, struct request *rq)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2498fdf..6bf8735 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -196,6 +196,8 @@ enum {
 
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
unsigned int flags);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
+   unsigned int flags, unsigned int hctx_idx);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
 
-- 
2.1.4



[PATCH 1/8] blk-mq: add blk_mq_alloc_request_hctx

2016-06-06 Thread Christoph Hellwig
From: Ming Lin 

For some protocols like NVMe over Fabrics we need to be able to send
initialization commands to a specific queue.

Based on an earlier patch from Christoph Hellwig .

Signed-off-by: Ming Lin 
Signed-off-by: Christoph Hellwig 
---
 block/blk-mq.c | 33 +
 include/linux/blk-mq.h |  2 ++
 2 files changed, 35 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 29cbc1b..7bb45ed 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -266,6 +266,39 @@ struct request *blk_mq_alloc_request(struct request_queue 
*q, int rw,
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
+   unsigned int flags, unsigned int hctx_idx)
+{
+   struct blk_mq_hw_ctx *hctx;
+   struct blk_mq_ctx *ctx;
+   struct request *rq;
+   struct blk_mq_alloc_data alloc_data;
+   int ret;
+
+   ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
+   if (ret)
+   return ERR_PTR(ret);
+
+   hctx = q->queue_hw_ctx[hctx_idx];
+   ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+
+   blk_mq_set_alloc_data(_data, q, flags, ctx, hctx);
+
+   rq = __blk_mq_alloc_request(_data, rw);
+   if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
+   __blk_mq_run_hw_queue(hctx);
+
+   rq =  __blk_mq_alloc_request(_data, rw);
+   }
+   if (!rq) {
+   blk_queue_exit(q);
+   return ERR_PTR(-EWOULDBLOCK);
+   }
+
+   return rq;
+}
+EXPORT_SYMBOL(blk_mq_alloc_request_hctx);
+
 static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
  struct blk_mq_ctx *ctx, struct request *rq)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2498fdf..6bf8735 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -196,6 +196,8 @@ enum {
 
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
unsigned int flags);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
+   unsigned int flags, unsigned int hctx_idx);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
 
-- 
2.1.4