Re: [PATCH v2 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Sagi Grimberg

On 10/21/2015 1:04 PM, Or Gerlitz wrote:

On 10/21/2015 12:53 PM, Sagi Grimberg wrote:

On 10/15/2015 2:44 PM, Eran Ben Elisha wrote:



+struct ib_uverbs_ex_create_qp {
+__u64 user_handle;
+__u32 pd_handle;
+__u32 send_cq_handle;
+__u32 recv_cq_handle;
+__u32 srq_handle;
+__u32 max_send_wr;
+__u32 max_recv_wr;
+__u32 max_send_sge;
+__u32 max_recv_sge;
+__u32 max_inline_data;
+__u8  sq_sig_all;
+__u8  qp_type;
+__u8  is_srq;
+__u8 reserved;
+__u32 comp_mask;
+__u32 create_flags;


Can we please make create_flags u64 to begin with?



In the kernel we used 4-5 flags over 10 years, why worry?


I don't see why we should assume that we won't occupy enough bits in a
long time just for saving extra 4 bytes. But if you are strongly against
it then I won't persist here.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Eran Ben Elisha
ib_uverbs_ex_create_qp follows the extension verbs
mechanism. New features (for example, QP creation flags
field which is added in a downstream patch) could used
via user-space libraries without breaking the ABI.

Signed-off-by: Eran Ben Elisha 
---
Hi Doug,
This is a fix for the first patch of "Add support for multicast loopback
prevention to mlx4"
Changes from v2:
remove comp_mask check in create_qp based on Haggai's comment

 drivers/infiniband/core/uverbs.h  |   1 +
 drivers/infiniband/core/uverbs_cmd.c  | 254 +-
 drivers/infiniband/core/uverbs_main.c |   1 +
 include/uapi/rdma/ib_user_verbs.h |  26 
 4 files changed, 217 insertions(+), 65 deletions(-)

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 3863d33..94bbd8c 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -272,5 +272,6 @@ IB_UVERBS_DECLARE_EX_CMD(create_flow);
 IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
 IB_UVERBS_DECLARE_EX_CMD(query_device);
 IB_UVERBS_DECLARE_EX_CMD(create_cq);
+IB_UVERBS_DECLARE_EX_CMD(create_qp);
 
 #endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c 
b/drivers/infiniband/core/uverbs_cmd.c
index be4cb9f..049ae91 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1741,66 +1741,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file 
*file,
return in_len;
 }
 
-ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
-   struct ib_device *ib_dev,
-   const char __user *buf, int in_len,
-   int out_len)
-{
-   struct ib_uverbs_create_qp  cmd;
-   struct ib_uverbs_create_qp_resp resp;
-   struct ib_udata udata;
-   struct ib_uqp_object   *obj;
-   struct ib_device   *device;
-   struct ib_pd   *pd = NULL;
-   struct ib_xrcd *xrcd = NULL;
-   struct ib_uobject  *uninitialized_var(xrcd_uobj);
-   struct ib_cq   *scq = NULL, *rcq = NULL;
-   struct ib_srq  *srq = NULL;
-   struct ib_qp   *qp;
-   struct ib_qp_init_attr  attr;
-   int ret;
-
-   if (out_len < sizeof resp)
-   return -ENOSPC;
-
-   if (copy_from_user(, buf, sizeof cmd))
-   return -EFAULT;
+static int create_qp(struct ib_uverbs_file *file,
+struct ib_udata *ucore,
+struct ib_udata *uhw,
+struct ib_uverbs_ex_create_qp *cmd,
+size_t cmd_sz,
+int (*cb)(struct ib_uverbs_file *file,
+  struct ib_uverbs_ex_create_qp_resp *resp,
+  struct ib_udata *udata),
+void *context)
+{
+   struct ib_uqp_object*obj;
+   struct ib_device*device;
+   struct ib_pd*pd = NULL;
+   struct ib_xrcd  *xrcd = NULL;
+   struct ib_uobject   *uninitialized_var(xrcd_uobj);
+   struct ib_cq*scq = NULL, *rcq = NULL;
+   struct ib_srq   *srq = NULL;
+   struct ib_qp*qp;
+   char*buf;
+   struct ib_qp_init_attr  attr;
+   struct ib_uverbs_ex_create_qp_resp resp;
+   int ret;
 
-   if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
+   if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
 
-   INIT_UDATA(, buf + sizeof cmd,
-  (unsigned long) cmd.response + sizeof resp,
-  in_len - sizeof cmd, out_len - sizeof resp);
-
obj = kzalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
 
-   init_uobj(>uevent.uobject, cmd.user_handle, file->ucontext, 
_lock_class);
+   init_uobj(>uevent.uobject, cmd->user_handle, file->ucontext,
+ _lock_class);
down_write(>uevent.uobject.mutex);
 
-   if (cmd.qp_type == IB_QPT_XRC_TGT) {
-   xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, _uobj);
+   if (cmd->qp_type == IB_QPT_XRC_TGT) {
+   xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
+_uobj);
if (!xrcd) {
ret = -EINVAL;
goto err_put;
}
device = xrcd->device;
} else {
-   if (cmd.qp_type == IB_QPT_XRC_INI) {
-   cmd.max_recv_wr = cmd.max_recv_sge = 0;
+   if (cmd->qp_type == IB_QPT_XRC_INI) {
+   cmd->max_recv_wr = 0;
+   cmd->max_recv_sge = 0;
} else {
- 

Re: [PATCH v2 07/22] staging/rdma/hfi1: Fix sparse error in sdma.h file

2015-10-21 Thread Dan Carpenter
On Mon, Oct 19, 2015 at 10:11:22PM -0400, ira.we...@intel.com wrote:
> From: Niranjana Vishwanathapura 
> 
> Use NULL instead of 0 for pointer argument to fix the sparse error.
> 
> Reviewed-by: Mike Marciniszyn 
> Reviewed-by: Mitko Haralanov 
> Reviewed-by: Dennis Dalessandro 
> Signed-off-by: Niranjana Vishwanathapura 
> Signed-off-by: Ira Weiny 

This should have just been folded in with the previous patch.  Don't
introduce problems and fix them in the patchset.

regards,
dan carpenter

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Haggai Eran
On 21/10/2015 17:00, Eran Ben Elisha wrote:
> ib_uverbs_ex_create_qp follows the extension verbs
> mechanism. New features (for example, QP creation flags
> field which is added in a downstream patch) could used
> via user-space libraries without breaking the ABI.
> 
> Signed-off-by: Eran Ben Elisha 
> ---
> Hi Doug,
> This is a fix for the first patch of "Add support for multicast loopback
> prevention to mlx4"
> Changes from v2:
>   remove comp_mask check in create_qp based on Haggai's comment
> 
>  drivers/infiniband/core/uverbs.h  |   1 +
>  drivers/infiniband/core/uverbs_cmd.c  | 254 
> +-
>  drivers/infiniband/core/uverbs_main.c |   1 +
>  include/uapi/rdma/ib_user_verbs.h |  26 
>  4 files changed, 217 insertions(+), 65 deletions(-)
> 

Reviewed-by: Haggai Eran 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread eran ben elisha
On Wed, Oct 21, 2015 at 11:34 AM, Haggai Eran  wrote:
> On 15/10/2015 14:44, Eran Ben Elisha wrote:
>> ib_uverbs_ex_create_qp follows the extension verbs
>> mechanism. New features (for example, QP creation flags
>> field which is added in a downstream patch) could used
>> via user-space libraries without breaking the ABI.
>>
>> Signed-off-by: Eran Ben Elisha 
>> ---
>
>> diff --git a/drivers/infiniband/core/uverbs_cmd.c 
>> b/drivers/infiniband/core/uverbs_cmd.c
>> index be4cb9f..e795d59 100644
>> --- a/drivers/infiniband/core/uverbs_cmd.c
>> +++ b/drivers/infiniband/core/uverbs_cmd.c
>> @@ -1741,66 +1741,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file 
>> *file,
>>   return in_len;
>>  }
>>
>> -ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>> - struct ib_device *ib_dev,
>> - const char __user *buf, int in_len,
>> - int out_len)
>> -{
>> - struct ib_uverbs_create_qp  cmd;
>> - struct ib_uverbs_create_qp_resp resp;
>> - struct ib_udata udata;
>> - struct ib_uqp_object   *obj;
>> - struct ib_device   *device;
>> - struct ib_pd   *pd = NULL;
>> - struct ib_xrcd *xrcd = NULL;
>> - struct ib_uobject  *uninitialized_var(xrcd_uobj);
>> - struct ib_cq   *scq = NULL, *rcq = NULL;
>> - struct ib_srq  *srq = NULL;
>> - struct ib_qp   *qp;
>> - struct ib_qp_init_attr  attr;
>> - int ret;
>> -
>> - if (out_len < sizeof resp)
>> - return -ENOSPC;
>> -
>> - if (copy_from_user(, buf, sizeof cmd))
>> - return -EFAULT;
>> +static int create_qp(struct ib_uverbs_file *file,
>> +  struct ib_udata *ucore,
>> +  struct ib_udata *uhw,
>> +  struct ib_uverbs_ex_create_qp *cmd,
>> +  size_t cmd_sz,
>> +  int (*cb)(struct ib_uverbs_file *file,
>> +struct ib_uverbs_ex_create_qp_resp *resp,
>> +struct ib_udata *udata),
>> +  void *context)
>> +{
>> + struct ib_uqp_object*obj;
>> + struct ib_device*device;
>> + struct ib_pd*pd = NULL;
>> + struct ib_xrcd  *xrcd = NULL;
>> + struct ib_uobject   *uninitialized_var(xrcd_uobj);
>> + struct ib_cq*scq = NULL, *rcq = NULL;
>> + struct ib_srq   *srq = NULL;
>> + struct ib_qp*qp;
>> + char*buf;
>> + struct ib_qp_init_attr  attr;
>> + struct ib_uverbs_ex_create_qp_resp resp;
>> + int ret;
>>
>> - if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
>> + if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
>>   return -EPERM;
>>
>> - INIT_UDATA(, buf + sizeof cmd,
>> -(unsigned long) cmd.response + sizeof resp,
>> -in_len - sizeof cmd, out_len - sizeof resp);
>> -
>>   obj = kzalloc(sizeof *obj, GFP_KERNEL);
>>   if (!obj)
>>   return -ENOMEM;
>>
>> - init_uobj(>uevent.uobject, cmd.user_handle, file->ucontext, 
>> _lock_class);
>> + init_uobj(>uevent.uobject, cmd->user_handle, file->ucontext,
>> +   _lock_class);
>>   down_write(>uevent.uobject.mutex);
>>
>> - if (cmd.qp_type == IB_QPT_XRC_TGT) {
>> - xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, 
>> _uobj);
>> + if (cmd->qp_type == IB_QPT_XRC_TGT) {
>> + xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
>> +  _uobj);
>>   if (!xrcd) {
>>   ret = -EINVAL;
>>   goto err_put;
>>   }
>>   device = xrcd->device;
>>   } else {
>> - if (cmd.qp_type == IB_QPT_XRC_INI) {
>> - cmd.max_recv_wr = cmd.max_recv_sge = 0;
>> + if (cmd->qp_type == IB_QPT_XRC_INI) {
>> + cmd->max_recv_wr = 0;
>> + cmd->max_recv_sge = 0;
>>   } else {
>> - if (cmd.is_srq) {
>> - srq = idr_read_srq(cmd.srq_handle, 
>> file->ucontext);
>> + if (cmd->is_srq) {
>> + srq = idr_read_srq(cmd->srq_handle,
>> +file->ucontext);
>>   if (!srq || srq->srq_type != IB_SRQT_BASIC) {
>>   ret = -EINVAL;
>>   goto err_put;
>>   }
>>   }
>>
>> - if (cmd.recv_cq_handle != 

Re: [PATCH v2 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Haggai Eran
On 21/10/2015 16:46, eran ben elisha wrote:
>>> +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>>> >> + struct ib_device *ib_dev,
>>> >> + const char __user *buf, int in_len,
>>> >> + int out_len)
>>> >> +{
>>> >> + struct ib_uverbs_create_qp  cmd;
>>> >> + struct ib_uverbs_ex_create_qp   cmd_ex;
>>> >> + struct ib_udata ucore;
>>> >> + struct ib_udata uhw;
>>> >> + ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
>>> >> + int err;
>> >
>> > I would expect a check here that in_len >= sizeof(cmd). But I see the
>> > previous code didn't have it either. Any reason adding the check would
>> > break user-space?
> This patch just refactor in ib_uverbs_create_qp and doesn't change any
> of it's logic or fix any bug. we can consider such a fix for the
> future.

Fair enough.

Haggai
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Bart Van Assche

On 10/21/2015 12:11 AM, Or Gerlitz wrote:

haven't found any review or ack to your giant patch that touches the

> whole subsystem (drivers, core and ULPs) expect from Sagi's -- lets
> hear more opinions.

Although I have not yet had the time to review the entire patch, 
removing ib_query_device() seems a great idea to me and an idea that I 
welcome very much. The ib_device_attr structure is too large to be 
allocated on the stack. This means that with Christoph's patch it is no 
longer needed to call kmalloc() + ib_query_device() + kfree() when a 
device attribute is needed from kernel code.


Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v2 07/22] staging/rdma/hfi1: Fix sparse error in sdma.h file

2015-10-21 Thread Weiny, Ira
> 
> On Mon, Oct 19, 2015 at 10:11:22PM -0400, ira.we...@intel.com wrote:
> > From: Niranjana Vishwanathapura 
> >
> > Use NULL instead of 0 for pointer argument to fix the sparse error.
> >
> > Reviewed-by: Mike Marciniszyn 
> > Reviewed-by: Mitko Haralanov 
> > Reviewed-by: Dennis Dalessandro 
> > Signed-off-by: Niranjana Vishwanathapura
> > 
> > Signed-off-by: Ira Weiny 
> 
> This should have just been folded in with the previous patch.  Don't introduce
> problems and fix them in the patchset.

My bad, I will fix in v3.

Ira

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Jason Gunthorpe
On Wed, Oct 21, 2015 at 08:48:10AM -0700, Bart Van Assche wrote:
> On 10/21/2015 12:11 AM, Or Gerlitz wrote:
> >haven't found any review or ack to your giant patch that touches the
> > whole subsystem (drivers, core and ULPs) expect from Sagi's -- lets
> > hear more opinions.
> 
> Although I have not yet had the time to review the entire patch, removing
> ib_query_device() seems a great idea to me and an idea that I welcome very
> much. The ib_device_attr structure is too large to be allocated on the
> stack. This means that with Christoph's patch it is no longer needed to call
> kmalloc() + ib_query_device() + kfree() when a device attribute is needed
> from kernel code.

I agree, this is absolutely the right way to go.

The bikeshedding is not important, nobody has come up with a reason
why we need to maintain the attr structure as-is and Christoph already
has a patch - I say go with it.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Steve Wise

On 10/21/2015 11:43 AM, Jason Gunthorpe wrote:

On Wed, Oct 21, 2015 at 08:48:10AM -0700, Bart Van Assche wrote:

On 10/21/2015 12:11 AM, Or Gerlitz wrote:

haven't found any review or ack to your giant patch that touches the
whole subsystem (drivers, core and ULPs) expect from Sagi's -- lets
hear more opinions.

Although I have not yet had the time to review the entire patch, removing
ib_query_device() seems a great idea to me and an idea that I welcome very
much. The ib_device_attr structure is too large to be allocated on the
stack. This means that with Christoph's patch it is no longer needed to call
kmalloc() + ib_query_device() + kfree() when a device attribute is needed
from kernel code.

I agree, this is absolutely the right way to go.

The bikeshedding is not important, nobody has come up with a reason
why we need to maintain the attr structure as-is and Christoph already
has a patch - I say go with it.

Jason



While I don't really like the uber patch review-wise, I'm all for nuking 
ib_query_device() and the attr struct.


Steve.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Or Gerlitz
On Wed, Oct 21, 2015 at 6:48 PM, Bart Van Assche
 wrote:
> On 10/21/2015 12:11 AM, Or Gerlitz wrote:
>>
>> haven't found any review or ack to your giant patch that touches the
>
>> whole subsystem (drivers, core and ULPs) expect from Sagi's -- lets
>> hear more opinions.
>
> Although I have not yet had the time to review the entire patch, removing
> ib_query_device() seems a great idea to me and an idea that I welcome very
> much. The ib_device_attr structure is too large to be allocated on the
> stack. This means that with Christoph's patch it is no longer needed to call
> kmalloc() + ib_query_device() + kfree() when a device attribute is needed
> from kernel code.


Bart, Jason, Steve,

The alternative approach to address this which was also running here
in the form of a patch from Ira following a reviewer comment from me,
is the have struct ib_device to contain a struct ib_device_attr member
(potentially a pointer) which is filled by the device driver before
they register themselves with the IB core.

This way there's no need for kmalloc() + ib_query_device() + kfree()
when a device attribute is needed and we don't introduce tens/hundreds
new fields for struct ib_device.

Thoughts?


Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Or Gerlitz
On Wed, Oct 21, 2015 at 9:21 PM, Christoph Lameter  wrote:

> Hmmm... We are running out of dev_cap flags on mlx4 already. They are 32
> bits.

Christoph,

These  are QP creation flags (IB_QP_CREATE_XXX) not device capability
flags (IB_DEVICE_YYY)

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Christoph Lameter
On Wed, 21 Oct 2015, Or Gerlitz wrote:

> Again, the kernel stack consuming rate here was < 1 bit a year when
> averaging over time since this was introduced. So we should be doing
> well for the coming ~10-20 years with this 32 bit field, and we can
> easily extend it later, I verified that with Haggai, so yes, don't
> want 64 bits now.

Hmmm... We are running out of dev_cap flags on mlx4 already. They are 32
bits.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Or Gerlitz
On Wed, Oct 21, 2015 at 9:20 PM, Jason Gunthorpe
 wrote:

> No to the pointer idea.

can you spare few words why?

And if you have a valid argument, as Christoph said they teach in
cache 101 course which he's pretty sure I skipped as twenty other
courses taken by kernel developers, put the no pointer member @ the
end

> I have a slight preference toward ibdev->foo to match other subsystems.

I have a slight preference to keep the struct ib_device_attr notion to
match other portions of the RDMA stack such as the UAPI and the user
space API to applications.

> I have no idea why you care so much about something so trivial.

I don't care so much, I have the small right to speak without getting
so much mud on my head.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Jason Gunthorpe
On Wed, Oct 21, 2015 at 09:08:31PM +0300, Or Gerlitz wrote:

> The alternative approach to address this which was also running here
> in the form of a patch from Ira following a reviewer comment from me,
> is the have struct ib_device to contain a struct ib_device_attr member
> (potentially a pointer) which is filled by the device driver before
> they register themselves with the IB core.

No to the pointer idea.

> This way there's no need for kmalloc() + ib_query_device() + kfree()
> when a device attribute is needed and we don't introduce tens/hundreds
> new fields for struct ib_device.

I care very little if the name is ibdev->attr.foo or ibdev->foo, that is
just bikeshedding.

I have a slight preference toward ibdev->foo to match other subsystems.

I have no idea why you care so much about something so trivial.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH rdma-RC] IB/cm: Fix rb-tree duplicate free and use-after-free

2015-10-21 Thread Doug Ledford
On 10/20/2015 04:27 PM, Doug Ledford wrote:
> On 10/15/2015 11:15 AM, Matan Barak wrote:
>>
>>
>> On 10/12/2015 7:37 PM, Hefty, Sean wrote:
 ib_send_cm_sidr_rep could sometimes erase the node from the sidr
 (depending on errors in the process). Since ib_send_cm_sidr_rep is
 called both from cm_sidr_req_handler and cm_destroy_id, cm_id_priv
>>>
>>> This should clarify that it is the app calling from the callback, and
>>> not a direct call from the cm_sidr_req_handler.
>>>
>>
>> Consider the following error flows:
>>
>> Double free:
>> cm_sidr_req_handler:3156->cm_reject_sidr_req:663->ib_send_cm_sidr_rep:3233->erase_rb
>>
>> cm_sidr_req_handler:3173->ib_destroy_cm_id->cm_destroy_id:846->ib_send_cm_sidr_rep:3233->erase_rb
>>
>>
>> RB contains free node:
>> cm_sidr_req_handler:3156->cm_reject_sidr_req:663->ib_send_cm_sidr_rep->returns
>> error(for example cm_alloc_msg,3219)
>>
>> cm_sidr_req_handler:3173->ib_destroy_cm_id->cm_destroy_id:846->cm_reject_sidr_req->cm_reject_sidr_req:663->returns
>>
>> error(for example cm_alloc_msg,3219)->RB wasn't erased but memory is
>> freed :910 kfree(cm_id_priv)
>>
>>
 could be either erased from the rb_tree twice or not erased at all.
>>>
>>> In an error case, I can see why it would be left in the rbtree, but I
>>> don't see how it can be removed twice.
>>>
>>>
 Fixing that by making sure it's erased only once before freeing
 cm_id_priv.

 Fixes: a977049dacde ('[PATCH] IB: Add the kernel CM implementation')
 Signed-off-by: Doron Tsur 
 Signed-off-by: Matan Barak 
 ---

 Hi Doug,
 This patch fixes a bug in the CM. In some flow, rb-tree could be
 freed twice or used after it was freed. This bug was picked by
 our regression tests and this fix was verified.

 Thanks,
 Doron and Matan

   drivers/infiniband/core/cm.c | 10 +-
   1 file changed, 9 insertions(+), 1 deletion(-)

 diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
 index f5cf1c4..56ff0f3 100644
 --- a/drivers/infiniband/core/cm.c
 +++ b/drivers/infiniband/core/cm.c
 @@ -844,6 +844,11 @@ retest:
   case IB_CM_SIDR_REQ_RCVD:
   spin_unlock_irq(_id_priv->lock);
   cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
 +spin_lock_irq();
 +if (!RB_EMPTY_NODE(_id_priv->sidr_id_node))
 +rb_erase(_id_priv->sidr_id_node,
 + _sidr_table);
 +spin_unlock_irq();
>>
>> This change seeks to remove the about to be freed node from the rb tree,
>> while verifying it has not been freed already
>>
>>>
>>> We should be able to use a return value from cm_reject_sidr_req() --
>>> passed through from ib_send_cm_sidr_rep() to determine if the id was
>>> removed from the tree.
>>>
>>
>> But this won't protect from double free in ib_send_cm_sidr_rep, unless
>> we pass this parameter to the cm destroy function, but this alternative
>> is cumbersome.
>>
   break;
   case IB_CM_REQ_SENT:
   case IB_CM_MRA_REQ_RCVD:
 @@ -3210,7 +3215,10 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
   spin_unlock_irqrestore(_id_priv->lock, flags);

   spin_lock_irqsave(, flags);
 -rb_erase(_id_priv->sidr_id_node, _sidr_table);
 +if (!RB_EMPTY_NODE(_id_priv->sidr_id_node)) {
 +rb_erase(_id_priv->sidr_id_node, _sidr_table);
 +RB_CLEAR_NODE(_id_priv->sidr_id_node);
 +}
>>
>> This change protects against double free
>>
   spin_unlock_irqrestore(, flags);
>>>
>>> Something is very wrong in this function if the id is not in the tree
>>> at this point.
>>>
>>
>> We agree, but there's an error flow that triggers this behavior.
> 
> Sean, I need to close on this patch.  What is your position after
> Matan's explanation?
> 

Absent an objection from Sean, I've pulled this in.  A use after free
bug is a pretty serious issue, and you've listed an error flow that
triggers it.  The only thing bugging me is that this code is 10+ years
old and this didn't show up until now, which makes me think that some
recent change is the cause of this.  I've made note of that fact in my
tag commit and I think this warrants further examination in the next
kernel cycle.  But since we are so close to out of time on 4.3, I deemed
it better to fix the use after free issue, even if it isn't necessarily
the perfect fix, than leave that hanging about.

-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH] usnic: correctly check failed allocation

2015-10-21 Thread Doug Ledford
On 10/15/2015 05:15 PM, Insu Yun wrote:
> Since ib_alloc_device returns allocated memory address, not error,
> it should be checked as IS_NULL, not IS_ERR_OR_NULL.
> 
> Signed-off-by: Insu Yun 

Thanks, applied.

> ---
>  drivers/infiniband/hw/usnic/usnic_ib_main.c | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c 
> b/drivers/infiniband/hw/usnic/usnic_ib_main.c
> index 34c49b8..cbc0514 100644
> --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
> +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
> @@ -328,16 +328,15 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
>   netdev = pci_get_drvdata(dev);
>  
>   us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
> - if (IS_ERR_OR_NULL(us_ibdev)) {
> + if (!us_ibdev) {
>   usnic_err("Device %s context alloc failed\n",
>   netdev_name(pci_get_drvdata(dev)));
> - return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT);
> + return ERR_PTR(-EFAULT);
>   }
>  
>   us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
> - if (IS_ERR_OR_NULL(us_ibdev->ufdev)) {
> - usnic_err("Failed to alloc ufdev for %s with err %ld\n",
> - pci_name(dev), PTR_ERR(us_ibdev->ufdev));
> + if (!us_ibdev->ufdev) {
> + usnic_err("Failed to alloc ufdev for %s\n", pci_name(dev));
>   goto err_dealloc;
>   }
>  
> 


-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH] usnix: correctly handle kzalloc return value

2015-10-21 Thread Doug Ledford
On 10/17/2015 03:06 PM, Insu Yun wrote:
> Since kzalloc returns memory address, not error code,
> it should be checked whether it is null or not. 
> 
> Signed-off-by: Insu Yun 

Thanks, applied.

> ---
>  drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c 
> b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
> index db3588d..a678a66 100644
> --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
> +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
> @@ -221,8 +221,8 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
>  
>   /* Create Flow Handle */
>   qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
> - if (IS_ERR_OR_NULL(qp_flow)) {
> - err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
> + if (!qp_flow) {
> + err = -ENOMEM;
>   goto out_dealloc_flow;
>   }
>   qp_flow->flow = flow;
> @@ -296,8 +296,8 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
>  
>   /* Create qp_flow */
>   qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
> - if (IS_ERR_OR_NULL(qp_flow)) {
> - err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
> + if (!qp_flow) {
> + err = -ENOMEM;
>   goto out_dealloc_flow;
>   }
>   qp_flow->flow = flow;
> 


-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH] MAINTAINERS: update usnic maintainer contacts

2015-10-21 Thread Doug Ledford
On 10/15/2015 11:01 PM, Dave Goodell wrote:
> Upinder hasn't worked for Cisco for a little while now, updating to more
> active maintainers.
> 
> Signed-off-by: Dave Goodell 

Thanks, applied.

> ---
>  MAINTAINERS | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 5f46784..20cb877 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -2730,9 +2730,10 @@ S: Supported
>  F:   drivers/net/ethernet/cisco/enic/
>  
>  CISCO VIC LOW LATENCY NIC DRIVER
> -M:   Upinder Malhi 
> +M:   Christian Benvenuti 
> +M:   Dave Goodell 
>  S:   Supported
> -F:   drivers/infiniband/hw/usnic
> +F:   drivers/infiniband/hw/usnic/
>  
>  CIRRUS LOGIC EP93XX ETHERNET DRIVER
>  M:   Hartley Sweeten 
> 


-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH for-next 0/5] RDMA/ocrdma: bug fixes

2015-10-21 Thread Doug Ledford
On 10/20/2015 04:47 AM, Selvin Xavier wrote:
> This patch series includs fix in CQ door bell logic, check to prevent
> driver crash and code cleanup.
> Please apply.
> 
> Devesh Sharma (1):
>   RDMA/ocrdma: Prevent CQ-Doorbell floods
> 
> Naga Irrinki (1):
>   RDMA/ocrdma: Check resource ids received in Async CQE
> 
> Selvin Xavier (3):
>   RDMA/ocrdma: Cleanup unused device list and rcu variables
>   RDMA/ocrdma: Avoid a possible crash in ocrdma_rem_port_stats
>   RDMA/ocrdma: Bump up ocrdma version number to 11.0.0.0
> 
>  drivers/infiniband/hw/ocrdma/ocrdma.h   |  3 +--
>  drivers/infiniband/hw/ocrdma/ocrdma_hw.c| 30 
> +
>  drivers/infiniband/hw/ocrdma/ocrdma_main.c  | 16 ++-
>  drivers/infiniband/hw/ocrdma/ocrdma_stats.c |  2 +-
>  drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 11 +++
>  5 files changed, 33 insertions(+), 29 deletions(-)
> 

Thanks, series applied.

-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH] RDMA/cxgb4: re-fix 32-bit build warning

2015-10-21 Thread Doug Ledford
On 10/07/2015 08:10 AM, Arnd Bergmann wrote:
> Casting a pointer to __be64 produces a warning on 32-bit architectures:
> 
> drivers/infiniband/hw/cxgb4/mem.c:147:20: warning: cast from pointer to 
> integer of different size [-Wpointer-to-int-cast]
> req->wr.wr_lo = (__force __be64)_wait;
> 
> This was fixed at least twice for this driver in different places,
> and accidentally reverted once more. This puts the correct version
> back in place.
> 
> Signed-off-by: Arnd Bergmann 
> Fixes: 6198dd8d7a6a7 ("iw_cxgb4: 32b platform fixes")

Thanks, applied.

> 
> diff --git a/drivers/infiniband/hw/cxgb4/mem.c 
> b/drivers/infiniband/hw/cxgb4/mem.c
> index 026b91ebd5e2..140415d31bcc 100644
> --- a/drivers/infiniband/hw/cxgb4/mem.c
> +++ b/drivers/infiniband/hw/cxgb4/mem.c
> @@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, 
> u32 addr, u32 len,
>   if (i == (num_wqe-1)) {
>   req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
>   FW_WR_COMPL_F);
> - req->wr.wr_lo = (__force __be64)_wait;
> + req->wr.wr_lo = (__force __be64)(unsigned long)_wait;
>   } else
>   req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR));
>   req->wr.wr_mid = cpu_to_be32(
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH] IB/core: avoid 32-bit warning

2015-10-21 Thread Doug Ledford
On 10/07/2015 09:46 AM, Yann Droneaud wrote:
> Hi,
> 
> Le mercredi 07 octobre 2015 à 16:19 +0300, Sagi Grimberg a écrit :
>> On 10/7/2015 3:29 PM, Arnd Bergmann wrote:
>>> The INIT_UDATA() macro requires a pointer or unsigned long argument
>>> for
>>> both input and output buffer, and all callers had a cast from when
>>> the code was merged until a recent restructuring, so now we get
>>>
>>> core/uverbs_cmd.c: In function 'ib_uverbs_create_cq':
>>> core/uverbs_cmd.c:1481:66: warning: cast to pointer from integer of
>>> different size [-Wint-to-pointer-cast]
>>>
>>> This makes the code behave as before by adding back the cast to
>>> unsigned long.
>>>
>>> Signed-off-by: Arnd Bergmann 
>>> Fixes: 565197dd8fb1 ("IB/core: Extend ib_uverbs_create_cq")
>>>
>>> diff --git a/drivers/infiniband/core/uverbs_cmd.c
>>> b/drivers/infiniband/core/uverbs_cmd.c
>>> index be4cb9f04be3..88b3b78340f2 100644
>>> --- a/drivers/infiniband/core/uverbs_cmd.c
>>> +++ b/drivers/infiniband/core/uverbs_cmd.c
>>> @@ -1478,7 +1478,7 @@ ssize_t ib_uverbs_create_cq(struct
>>> ib_uverbs_file *file,
>>> if (copy_from_user(, buf, sizeof(cmd)))
>>> return -EFAULT;
>>>
>>> -   INIT_UDATA(, buf, cmd.response, sizeof(cmd),
>>> sizeof(resp));
>>> +   INIT_UDATA(, buf, (unsigned long)cmd.response,
>>> sizeof(cmd), sizeof(resp));
>>
>> Would it make sense to cast inside INIT_UDATA() and not have callers
>> worry about it?
> 
> It's ... complicated. See INIT_UDATA_BUF_OR_NULL().
> 
> Awayway, I have patch to do the opposite, eg. explicitly cast u64 value
> to (void __user *)(unsigned long) in the caller function instead, as it
> allows some safer / new operations on the response buffer.
> 
> Regards.
> 

I haven't seen this oether patch that Yann is talking about, so I've
taken this one.

-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH for-next 0/2] RDMA/cxgb4: Add iWARP support for T6 adapter

2015-10-21 Thread Doug Ledford
On 10/12/2015 01:01 PM, Steve Wise wrote:
> 
> 
>> -Original Message-
>> From: netdev-ow...@vger.kernel.org [mailto:netdev-ow...@vger.kernel.org] On 
>> Behalf Of Hariprasad Shenai
>> Sent: Wednesday, September 23, 2015 6:49 AM
>> To: linux-rdma@vger.kernel.org; net...@vger.kernel.org
>> Cc: dledf...@redhat.com; da...@davemloft.net; sw...@opengridcomputing.com; 
>> lee...@chelsio.com; nirran...@chelsio.com;
>> Hariprasad Shenai
>> Subject: [PATCH for-next 0/2] RDMA/cxgb4: Add iWARP support for T6 adapter
>>
>> Hi,
>>
>> PATCH 1/2 adds changes like new register, structure and functions in cxgb4
>> driver for iw_cxgb4 driver, and PATCH 2/2 adds iw_cxgb4 specific code to
>> support T6 adapter.
>>
>> This patch series has been created against Doug's linux tree and includes
>> patches on cxgb4 and iw_cxgb4 driver.
>>
>> We have included all the maintainers of respective drivers. Kindly review
>> the change and let us know in case of any review comments.
>>
>> Thanks
>>
> 
> These look ok to me. 
> 
> Series Reviewed-by: Steve Wise 
> 
> Doug, should these get staged through your tree?
> 

Probably.  Unless David objects, I'll queue them up.


-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH for-next 0/5] set MPA revision to 2 and misc. fixes for iw_cxgb4

2015-10-21 Thread Doug Ledford
On 09/08/2015 12:26 AM, Hariprasad Shenai wrote:
> Hi,
> 
> This patch series adds the following.
> Detect errors while creating listening servers, pass ird/ord info in
> connect reply events, fix misuse of ord for ird calculation and for the
> ESTABLISHED event we should have the peer's ord/ird so swap the values in
> the event before the upcall. Set default MPA version to 2.
> 
> This patch series has been created against Doug's linux tree and includes
> patches on iw_cxgb4 driver.
> 
> We have included all the maintainers of respective drivers. Kindly review
> the change and let us know in case of any review comments.
> 
> Thanks
> 
> 
> Hariprasad Shenai (5):
>   iw_cxgb4: detect fatal errors while creating listening filters
>   iw_cxgb4: pass the ord/ird in connect reply events
>   iw_cxgb4: fix misuse of ep->ord for minimum ird calculation
>   iw_cxgb4: reverse the ord/ird in the ESTABLISHED upcall
>   iw_cxgb4: set the default MPA version to 2
> 
>  drivers/infiniband/hw/cxgb4/cm.c | 18 +-
>  1 file changed, 13 insertions(+), 5 deletions(-)
> 

The series, minus the last one which I managed to get into a previous
release, has been picked up.  Thanks!

-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


VMware Users List

2015-10-21 Thread Michael Thompson


Hi,

Hope this email finds you well!

Would you be interested in reaching out to "VMware Users List” for across USA 
with opt-in verified contact information?

We also have the list of  VMware Clusters, VMware Distributed Resource 
Scheduler (DRS), VMware ESX Server, VMware Fusion, VMware GSX Server, VMware 
Server, VMware Site Recovery Manager (SRM), VMware ThinApp, VMware vCenter, 
VMware vCenter Converter, VMware vCenter Lab Manager, VMware vCloud, VMware 
vFabric GemFire, VMware View, VMware Virtual Infrastructure, VMware vMotion, 
VMware vShield, VMware vSphere, VMware Workstation, Avaya List, Adobe List, 
Barracuda List, BMC List, Cisco List, Citrix List, EMC List, HP List, HP List, 
Infor List, Java List, Juniper List, Microsoft List, Netapp List, Netsuite 
List, Nortel List, Novell List, Oracle List, Sage List, Salesforce List, SAP 
List, Siemens List, Symantec List, and many more...

Our Data fields on each record contains: Company, Name, First Name, Last Name, 
Title, HQ Phone, Direct No, Email, Address1, Address2, City, State, Zip, 
Country, Industry, Revenue, Employees, IT Budget, IT Employees, Website, 
Technology, Company HQ Address1, Company HQ Address2, Company HQ City, Company 
HQ State, Company HQ Zip, Company HQ Country and Linked-in link.

All the contacts are opt-in verified, 100% permission based and can be used for 
unlimited  multi-channel marketing.

If you can let me know target criteria, I can get back to you with proper 
response.

Look forward to your prompt response.

Best Regards,
Michael Thompson

Reply “Leave-out” if you do not wish to receive emails from us.


---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Or Gerlitz
On Wed, Oct 21, 2015 at 5:09 PM, Sagi Grimberg  wrote:
> On 10/21/2015 1:04 PM, Or Gerlitz wrote:
>> On 10/21/2015 12:53 PM, Sagi Grimberg wrote:
>>> On 10/15/2015 2:44 PM, Eran Ben Elisha wrote:

 +struct ib_uverbs_ex_create_qp {
 +__u64 user_handle;
 +__u32 pd_handle;
 +__u32 send_cq_handle;
 +__u32 recv_cq_handle;
 +__u32 srq_handle;
 +__u32 max_send_wr;
 +__u32 max_recv_wr;
 +__u32 max_send_sge;
 +__u32 max_recv_sge;
 +__u32 max_inline_data;
 +__u8  sq_sig_all;
 +__u8  qp_type;
 +__u8  is_srq;
 +__u8 reserved;
 +__u32 comp_mask;
 +__u32 create_flags;
>>>
>>>
>>> Can we please make create_flags u64 to begin with?

>> In the kernel we used 4-5 flags over 10 years, why worry?

> I don't see why we should assume that we won't occupy enough bits in a
> long time just for saving extra 4 bytes. But if you are strongly against
> it then I won't persist here.

Again, the kernel stack consuming rate here was < 1 bit a year when
averaging over time since this was introduced. So we should be doing
well for the coming ~10-20 years with this 32 bit field, and we can
easily extend it later, I verified that with Haggai, so yes, don't
want 64 bits now.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next 0/2] RDMA/cxgb4: Add iWARP support for T6 adapter

2015-10-21 Thread David Miller
From: Doug Ledford 
Date: Wed, 21 Oct 2015 16:58:50 -0400

> Probably.  Unless David objects, I'll queue them up.

No objection from me.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 01/22] staging/rdma/hfi1: Fix regression in send performance

2015-10-21 Thread Dan Carpenter
On Mon, Oct 19, 2015 at 10:11:16PM -0400, ira.we...@intel.com wrote:
> From: Mike Marciniszyn 
> 
> This additional call is a regression from qib.  For small messages the 
> progress
> routine always builds one and clears out the ahg state when the queue has gone
> to empty which is the predominant case for small messages.
> 
> Inline the routine to mitigate the call and move the routine to qp.h for scope
> reasons.
> 
> Reviewed-by: Dennis Dalessandro 
> Signed-off-by: Mike Marciniszyn 
> Signed-off-by: Ira Weiny 
> ---

The whole point of this patch is to do:

> - if (qp->s_sde)
> + if (qp->s_sde && qp->s_ahgidx >= 0)

It was not at all clear from the changelog and it was difficult to tell
because we moved code around as well.

regards,
dan carpenter
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Doug Ledford
On 10/21/2015 10:00 AM, Eran Ben Elisha wrote:
> ib_uverbs_ex_create_qp follows the extension verbs
> mechanism. New features (for example, QP creation flags
> field which is added in a downstream patch) could used
> via user-space libraries without breaking the ABI.
> 
> Signed-off-by: Eran Ben Elisha 
> ---
> Hi Doug,
> This is a fix for the first patch of "Add support for multicast loopback
> prevention to mlx4"
> Changes from v2:
>   remove comp_mask check in create_qp based on Haggai's comment

I updated the v2 series with this patch and then pulled in the entire
series.  Thanks.


-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH v6 0/4] Add network namespace support in the RDMA-CM

2015-10-21 Thread Doug Ledford
On 08/27/2015 10:29 AM, Haggai Eran wrote:
> Hi,
> 
> Now that the code for demuxing requests is inside rdma_cm, here are the 
> patches
> to add InfiniBand network namespace again.
> 
> Changes from v5:
> - removed patches that got in as part of the cleanup series.

Hi Haggai,

This series no longer applies cleanly after all of the cm changes that
made it into 4.3.  Can you please rebase and resend?


-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH for-next v2 00/10] Add RoCE GID cache usage in verbs/cma

2015-10-21 Thread Doug Ledford
On 10/15/2015 11:38 AM, Matan Barak wrote:
> Hi Doug,
> 
> This purpose of this series is to add usage of the GID cache to
> the CMA and IB stack. Instead of passing Ethernet L2 attributes
> via QP attributes, we could just use the GID cache that's already
> points to a ndev and thus to all required L2 attributes.
> 
> The first five patches query the GID table to find only GIDs which
> are related to the bounded net-device of the specific used port. This
> extra information is carried via ib_sa_path_rec (extending it to
> include the namespace and ifindex). Querying the ndev and port
> is achieved by adding a gid_attr argument to ib_query_gid, add a
> ndev argument to ib_find_cached_gid and add a new
> ib_find_cached_gid_by_port. This usage of the GID table replaces
> the usage of QP attributes.
> 
> The sixth patch adds an ib_cache_gid_find_by_filter function.
> This allows the user to query the cache by a specific filter.
> ib_cache_gid_find_by_filter is used by the seventh patch. Instead
> of storing the smac and vid on the AH, we could just resolve
> them from the net-device which the sgid index is assigned to.
> This change means that instead of resolving all L2 attributes,
> the server, we get the dgid and vlan and searches the GID cache
> in order to find a matching GID index.
> 
> The last three patches removed unused fields and attributes.
> 
> Matan
> 
> Changes from V1:
>  - Rebased patches against Doug's latest k.o/for-4.4 tree.

Most of this code seemed like pretty straight forward prepatory word for
the follow on patch series.  However, I did note that this patch set
extends the per-element locking of the GID tables.  I've taken this
patch set in spite of that fact.  Please prioritize changing that
locking to something more reasonable.  I would prefer if those changes
landed prior to the 4.4 merge window closing.


-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


Re: [PATCH 00/23] Update driver to 0.9-294

2015-10-21 Thread Moni Shoua
> Understood, however, unlike SoftRoCE, qib and hfi currently share a lot of 
> code
> to drive the hardware.
>
> The underlying reason for the TODO item "Remove software processing of IB
> protocol..." is because we have a large amount of duplicated code between 
> these
> drivers.  _Some_ of which, at a high level, is sharable with SoftRoCE.
>
After studying the qib driver (briefly I must admit) I found that it
has a lot in common with SoftRoCE driver and both can be redesigned to
share a fair amount of code.
> These patches (and more to follow), further differentiate the 2 drivers along
> hardware lines.  Accepting these patches will help us make sure that we don't
> create some common code between qib and hfi which, because of our testing we
> now know, needs to be separated out.
>
I'm sorry but I'm not sure that I understand the plan. What we had in
mind is that we start following the idea which was presented by Deny,
build a detailed design plan and implement it. All that can be done
without fixing bugs, adding functionality and improving performance
but I understand that you also need to move forward.
Us, in Mellanox are willing to contribute to the effort and take
responsibility over the Software Verbs Transport.
> This is a separate issue from the higher level code sharing which will be done
> with SoftRoCE.
I'm not sure I agree. Can you explain the plan to remove code sharing
between qib and hfi that is not part of the Software Verbs Transport
module?
>
> Ira
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Christoph Hellwig
On Tue, Oct 20, 2015 at 06:07:31PM +0300, Or Gerlitz wrote:
> But this makes struct ib_device much much bigger, and this structure
> is used in **fast** path,
> e.g the ULP traverses through a pointer from struct ib_device to
> post_send/recv, poll_cq and friends.

But it doesn't get near the end of the structure.  Maybe it's time for a
little cache line 101 primer?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Christoph Hellwig
On Tue, Oct 20, 2015 at 06:13:35PM +0300, Sagi Grimberg wrote:
>> The networking community will let you work for 10y before they add a
>> field to struct net_device exactly b/c
>> of the reason I brought. Why here we can come out of the blue and add
>> tens if not hundreds of fields to our device structure?
>
> Keeping struct ib_device_attr embedded at the end of struct ib_device is
> exactly the same isn't it?

In terms of layout it is, in terms of usability it's worse.  Just to take
the networking example Or seems to like so much there is no struct netdev_attr
to which fields of struct netdev are moved out to, which you then need to
query and cache anyway in your private device structure.   This design simply
was a idiotic idea to start with and I don't understand why anyone would
han onto it.  I do understand Chuck's concern about churn, but I'd rather
fix the infiniband subsystem up before we're growing even more users.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Or Gerlitz

On 10/21/2015 9:38 AM, Christoph Hellwig wrote:

On Tue, Oct 20, 2015 at 06:07:31PM +0300, Or Gerlitz wrote:

>But this makes struct ib_device much much bigger, and this structure
>is used in **fast** path,
>e.g the ULP traverses through a pointer from struct ib_device to
>post_send/recv, poll_cq and friends.

But it doesn't get near the end of the structure.  Maybe it's time for a
little cache line 101 primer?


Oh, not that I am fully qualified on that front, but I understand  the 
end-of-structure argument.


Fact is that for struct net_device you will not add 333 new fields over 
night in the coming 33 years, for sure.


This makes much sense to me, as a guideline. I don't think we should 
have a device structure with the current
100 fields and another few hundreds without any notable benefit and 
against the common practice in the networking

subsystem.

We have a UAPI that requires us to keep the device query verb for ever, 
and hence the returned device attr struct,
it would be a much smaller and noisy patch to cache an device attr 
pointer on the device structure.


Or.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Christoph Hellwig
On Wed, Oct 21, 2015 at 09:44:41AM +0300, Or Gerlitz wrote:
> Fact is that for struct net_device you will not add 333 new fields over 
> night in the coming 33 years, for sure.

That's because they never had this split and added fields to struct netdev
as required.  One interesting difference in netdev is that it move all
the function pointers out to a different structure so it could be kept
const.  This introduces one more level of pointer chasing but has other
advantages.

> This makes much sense to me, as a guideline. I don't think we should have a 
> device structure with the current
> 100 fields and another few hundreds without any notable benefit and against 
> the common practice in the networking
> subsystem.

Please understand the networking subsystem (or SCSI, or NVMe, or ...) before
making such incorrect comments.  We're moving towards how all other
subsystems work.

> We have a UAPI that requires us to keep the device query verb for ever, and 
> hence the returned device attr struct,
> it would be a much smaller and noisy patch to cache an device attr pointer 
> on the device structure.

Take a look at the code.  The only time we ever call into ->query_device is
for the userspace only timestamping extensions only implemented for mlx4.

With all the stuff we have on the plate the kernel API will look substatially
different from the arkane 'Verbs' implementation in userspace, and they will
use less and less common code.  Libuvers and the ABIs it uses are something
we can't change unfortunately, but we can make the kernel API much much
better by learining lessons from other kernel subsystems.  That's work
that Jason Sagi and me have been doing for a while.

I'd also suggest you update your Linux knowledge before trying to
micromanage patch submissions.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Or Gerlitz

On 10/21/2015 9:51 AM, Christoph Hellwig wrote:

We have a UAPI that requires us to keep the device query verb for ever, and
hence the returned device attr struct, it would be a much smaller and noisy 
patch pointer
to cache an device attr  on the device structure.

Take a look at the code.  The only time we ever call into ->query_device is
for the userspace only timestamping extensions only implemented for mlx4.


We will have many more device query extensions, but the point I tried to 
make here is a bit different --
we do need to keep the user/kernel device attr struct as part of the 
UAPI, and don't see any reason to
avoid it in the kernel, just because some other subsystems do that, 
according to your view. As I said, you
would have to work real (real) hard to convince the networking ppl to 
add fields to struct net_device sk_buff
and friends... the nature of rdma/offloading is such that new attr come 
often and I don't think we need

to touch our device struct every release.



With all the stuff we have on the plate the kernel API will look substatially
different from the arkane 'Verbs' implementation in userspace, and they will
use less and less common code.  Libuvers and the ABIs it uses are something
we can't change unfortunately, but we can make the kernel API much much
better by learining lessons from other kernel subsystems.  That's work
that Jason Sagi and me have been doing for a while.

I'd also suggest you update your Linux knowledge before trying to
micromanage patch submissions.


not trying to micro-manage @ all, I went over the archives and haven't 
found any review or ack
to your giant patch that touches the whole subsystem (drivers, core and 
ULPs) expect from Sagi's
-- lets hear more opinions. Re my education -- can you make the argument 
more precise: what are we

making much much better in the kernel API by thins house moving exercise?

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Sagi Grimberg

On 10/15/2015 2:44 PM, Eran Ben Elisha wrote:

ib_uverbs_ex_create_qp follows the extension verbs
mechanism. New features (for example, QP creation flags
field which is added in a downstream patch) could used
via user-space libraries without breaking the ABI.


This is an important addition, I'll need it soon for stuff
I'm working on...


+struct ib_uverbs_ex_create_qp {
+   __u64 user_handle;
+   __u32 pd_handle;
+   __u32 send_cq_handle;
+   __u32 recv_cq_handle;
+   __u32 srq_handle;
+   __u32 max_send_wr;
+   __u32 max_recv_wr;
+   __u32 max_send_sge;
+   __u32 max_recv_sge;
+   __u32 max_inline_data;
+   __u8  sq_sig_all;
+   __u8  qp_type;
+   __u8  is_srq;
+   __u8 reserved;
+   __u32 comp_mask;
+   __u32 create_flags;


Can we please make create_flags u64 to begin with?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/cma: Use inner P_Key to determine netdev

2015-10-21 Thread Haggai Eran
On 20/10/2015 19:44, Jason Gunthorpe wrote:
> On Tue, Oct 20, 2015 at 09:45:27AM +0300, Haggai Eran wrote:
>> On 19/10/2015 21:19, Jason Gunthorpe wrote:
>>> On Mon, Oct 19, 2015 at 09:09:25PM +0300, Haggai Eran wrote:
 When discussing the patches to demux ids in rdma_cm instead of ib_cm, it
 was decided that it is best to use the P_Key value in the packet headers
 [1]. However, some drivers are currently unable to send correct P_Key in
 GMP headers.
>>>
>>> You should explicitly describe the broken drivers in the commit text.
>> These are mlx5 and ipath. I'll add them to the commit message.
> 
> And ipath is actually ipath, the obsolete driver being removed, not
> qib? (ie we don't care about it?)
Right. qib is fine.

>> The remaining issue is that it doesn't respect the
>> ib_send_wr.ud.pkey_index field when sending.
> 
> But this is a very serious bug, to the point the mis-labeled packets
> may not even be delivered in some cases - you really care about the
> sub case where mis-labeled packets happen to be deliverable but don't
> parse right?
I understand the issue in mlx5 is serious but we've lived with it a long
time. In this patch I only wanted to work around it for the purpose of
fixing the regression introduced in 4.3.

> Well, don't forget to undo this patch when the mlx5 driver is fixed..
Of course.

Thanks,
Haggai
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Or Gerlitz

On 10/21/2015 12:53 PM, Sagi Grimberg wrote:

On 10/15/2015 2:44 PM, Eran Ben Elisha wrote:



+struct ib_uverbs_ex_create_qp {
+__u64 user_handle;
+__u32 pd_handle;
+__u32 send_cq_handle;
+__u32 recv_cq_handle;
+__u32 srq_handle;
+__u32 max_send_wr;
+__u32 max_recv_wr;
+__u32 max_send_sge;
+__u32 max_recv_sge;
+__u32 max_inline_data;
+__u8  sq_sig_all;
+__u8  qp_type;
+__u8  is_srq;
+__u8 reserved;
+__u32 comp_mask;
+__u32 create_flags;


Can we please make create_flags u64 to begin with?



In the kernel we used 4-5 flags over 10 years, why worry?

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Haggai Eran
On 21/10/2015 12:53, Sagi Grimberg wrote:
> On 10/15/2015 2:44 PM, Eran Ben Elisha wrote:
>> ib_uverbs_ex_create_qp follows the extension verbs
>> mechanism. New features (for example, QP creation flags
>> field which is added in a downstream patch) could used
>> via user-space libraries without breaking the ABI.
> 
> This is an important addition, I'll need it soon for stuff
> I'm working on...
> 
>> +struct ib_uverbs_ex_create_qp {
>> +__u64 user_handle;
>> +__u32 pd_handle;
>> +__u32 send_cq_handle;
>> +__u32 recv_cq_handle;
>> +__u32 srq_handle;
>> +__u32 max_send_wr;
>> +__u32 max_recv_wr;
>> +__u32 max_send_sge;
>> +__u32 max_recv_sge;
>> +__u32 max_inline_data;
>> +__u8  sq_sig_all;
>> +__u8  qp_type;
>> +__u8  is_srq;
>> +__u8 reserved;
>> +__u32 comp_mask;
>> +__u32 create_flags;
> 
> Can we please make create_flags u64 to begin with?
Note that the size of the struct must be a round number of 64-bit words
to avoid different sizeof values on 32-bit systems. If you change the
size of create_flags you'll need another 32-bit padding field in the struct.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] IB/mlx5: Postpone remove_keys under knowledge of coming preemption

2015-10-21 Thread Leon Romanovsky
From: Leon Romanovsky 

The remove_keys() logic is performed as garbage collection task. Such
task is intended to be run when no other active processes are running.

The need_resched() will return TRUE if there are user tasks to be
activated in near future.

In such case, we don't execute remove_keys() and postpone
the garbage collection work to try to run in next cycle,
in order to free CPU resources to other tasks.

The possible pseudo-code to trigger such scenario:
1. Allocate a lot of MR to fill the cache above the limit.
2. Wait a small amount of time "to calm" the system.
3. Start CPU extensive operations on multi-node cluster.
4. Expect performance degradation during MR cache shrink operation.

Signed-off-by: Leon Romanovsky 
Signed-off-by: Eli Cohen 
---

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 54a15b5..45ce00e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -381,7 +381,19 @@
}
}
} else if (ent->cur > 2 * ent->limit) {
-   if (!someone_adding(cache) &&
+   /*
+* The remove_keys() logic is performed as garbage collection
+* task. Such task is intended to be run when no other active
+* processes are running.
+*
+* The need_resched() will return TRUE if there are user tasks
+* to be activated in near future.
+*
+* In such case, we don't execute remove_keys() and postpone
+* the garbage collection work to try to run in next cycle,
+* in order to free CPU resources to other tasks.
+*/
+   if (!need_resched() && !someone_adding(cache) &&
time_after(jiffies, cache->last_add + 300 * HZ)) {
remove_keys(dev, i, 1);
if (ent->cur > ent->limit)
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 1/7] IB/core: Extend ib_uverbs_create_qp

2015-10-21 Thread Haggai Eran
On 15/10/2015 14:44, Eran Ben Elisha wrote:
> ib_uverbs_ex_create_qp follows the extension verbs
> mechanism. New features (for example, QP creation flags
> field which is added in a downstream patch) could used
> via user-space libraries without breaking the ABI.
> 
> Signed-off-by: Eran Ben Elisha 
> ---

> diff --git a/drivers/infiniband/core/uverbs_cmd.c 
> b/drivers/infiniband/core/uverbs_cmd.c
> index be4cb9f..e795d59 100644
> --- a/drivers/infiniband/core/uverbs_cmd.c
> +++ b/drivers/infiniband/core/uverbs_cmd.c
> @@ -1741,66 +1741,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file 
> *file,
>   return in_len;
>  }
>  
> -ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
> - struct ib_device *ib_dev,
> - const char __user *buf, int in_len,
> - int out_len)
> -{
> - struct ib_uverbs_create_qp  cmd;
> - struct ib_uverbs_create_qp_resp resp;
> - struct ib_udata udata;
> - struct ib_uqp_object   *obj;
> - struct ib_device   *device;
> - struct ib_pd   *pd = NULL;
> - struct ib_xrcd *xrcd = NULL;
> - struct ib_uobject  *uninitialized_var(xrcd_uobj);
> - struct ib_cq   *scq = NULL, *rcq = NULL;
> - struct ib_srq  *srq = NULL;
> - struct ib_qp   *qp;
> - struct ib_qp_init_attr  attr;
> - int ret;
> -
> - if (out_len < sizeof resp)
> - return -ENOSPC;
> -
> - if (copy_from_user(, buf, sizeof cmd))
> - return -EFAULT;
> +static int create_qp(struct ib_uverbs_file *file,
> +  struct ib_udata *ucore,
> +  struct ib_udata *uhw,
> +  struct ib_uverbs_ex_create_qp *cmd,
> +  size_t cmd_sz,
> +  int (*cb)(struct ib_uverbs_file *file,
> +struct ib_uverbs_ex_create_qp_resp *resp,
> +struct ib_udata *udata),
> +  void *context)
> +{
> + struct ib_uqp_object*obj;
> + struct ib_device*device;
> + struct ib_pd*pd = NULL;
> + struct ib_xrcd  *xrcd = NULL;
> + struct ib_uobject   *uninitialized_var(xrcd_uobj);
> + struct ib_cq*scq = NULL, *rcq = NULL;
> + struct ib_srq   *srq = NULL;
> + struct ib_qp*qp;
> + char*buf;
> + struct ib_qp_init_attr  attr;
> + struct ib_uverbs_ex_create_qp_resp resp;
> + int ret;
>  
> - if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
> + if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
>   return -EPERM;
>  
> - INIT_UDATA(, buf + sizeof cmd,
> -(unsigned long) cmd.response + sizeof resp,
> -in_len - sizeof cmd, out_len - sizeof resp);
> -
>   obj = kzalloc(sizeof *obj, GFP_KERNEL);
>   if (!obj)
>   return -ENOMEM;
>  
> - init_uobj(>uevent.uobject, cmd.user_handle, file->ucontext, 
> _lock_class);
> + init_uobj(>uevent.uobject, cmd->user_handle, file->ucontext,
> +   _lock_class);
>   down_write(>uevent.uobject.mutex);
>  
> - if (cmd.qp_type == IB_QPT_XRC_TGT) {
> - xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, _uobj);
> + if (cmd->qp_type == IB_QPT_XRC_TGT) {
> + xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
> +  _uobj);
>   if (!xrcd) {
>   ret = -EINVAL;
>   goto err_put;
>   }
>   device = xrcd->device;
>   } else {
> - if (cmd.qp_type == IB_QPT_XRC_INI) {
> - cmd.max_recv_wr = cmd.max_recv_sge = 0;
> + if (cmd->qp_type == IB_QPT_XRC_INI) {
> + cmd->max_recv_wr = 0;
> + cmd->max_recv_sge = 0;
>   } else {
> - if (cmd.is_srq) {
> - srq = idr_read_srq(cmd.srq_handle, 
> file->ucontext);
> + if (cmd->is_srq) {
> + srq = idr_read_srq(cmd->srq_handle,
> +file->ucontext);
>   if (!srq || srq->srq_type != IB_SRQT_BASIC) {
>   ret = -EINVAL;
>   goto err_put;
>   }
>   }
>  
> - if (cmd.recv_cq_handle != cmd.send_cq_handle) {
> - rcq = idr_read_cq(cmd.recv_cq_handle, 
> file->ucontext, 0);
> + if (cmd->recv_cq_handle != cmd->send_cq_handle) {
> +   

Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Christoph Hellwig
On Wed, Oct 21, 2015 at 10:11:29AM +0300, Or Gerlitz wrote:
>
> We will have many more device query extensions,

None of which use struct ib_device_attr I hope!

> but the point I tried to 
> make here is a bit different --
> we do need to keep the user/kernel device attr struct as part of the UAPI, 

It's an entirely separate ib_uverbs_query_device_resp structure. 

> and don't see any reason to
> avoid it in the kernel, just because some other subsystems do that, 
> according to your view. As I said, you
> would have to work real (real) hard to convince the networking ppl to add 
> fields to struct net_device sk_buff

Or, please stop these bullshit strawman arguments.  Yes, adding fields to
struct sk_buff will be hard, but that's not the right comparism.  struct
sk_buff is a structured allocated for the data buffers in great quantities and
not the net_device structure allocated once per device.  I'm going to
finish this email, but if you don't even try to get your facts right I'll stop
responding ro you because it's futile.

> and friends... the nature of rdma/offloading is such that new attr come 
> often and I don't think we need
> to touch our device struct every release.

For anything you want to add you need to touch _a_ struct.  It's not any
difference in efforts if it's ib_device_attr or ib_device.

You're not even saving much memory if at all as every driver caches at
least some fields of it in their own device structure, and iser, isert, 
srpt and nfs cache the full structure, so if you use one of those you're
already having one of them per device.  If you use two of them you
have multiple copies plus individual fields caches by other ULDs and core
code.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Or Gerlitz

On 10/21/2015 10:33 AM, Christoph Hellwig wrote:

For anything you want to add you need to touch_a_  struct.  It's not any
difference in efforts if it's ib_device_attr or ib_device.

You're not even saving much memory if at all as every driver caches at
least some fields of it in their own device structure, and iser, isert,
srpt and nfs cache the full structure,


I know, but a patch that adds caching an attr pointer on the device will 
remove these local caches,

we actually had that/similar patch posted here and it was dropped/forgotten.


so if you use one of those you're
already having one of them per device.  If you use two of them you
have multiple copies plus individual fields caches by other ULDs and core
code.


again, the method I propose does remove these duplications all together.

I am still waiting to hear what is the precise argument for having this 
change.


Or.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: merge struct ib_device_attr into struct ib_device V2

2015-10-21 Thread Christoph Hellwig
On Wed, Oct 21, 2015 at 10:41:13AM +0300, Or Gerlitz wrote:
> I know, but a patch that adds caching an attr pointer on the device will 
> remove these local caches,
> we actually had that/similar patch posted here and it was dropped/forgotten.
>
>> so if you use one of those you're
>> already having one of them per device.  If you use two of them you
>> have multiple copies plus individual fields caches by other ULDs and core
>> code.
>
> again, the method I propose does remove these duplications all together.
>
> I am still waiting to hear what is the precise argument for having this 
> change.

It does everything the pointer does, but in addition saves memory (no
rounding up of the slab size), reduces pointer chasing and makes and API
that's easier to use and more similar to how all other major Linux subsystems
work.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 for-next 2/7] IB/core: Allow setting create flags in QP init attribute

2015-10-21 Thread Haggai Eran
On 15/10/2015 14:44, Eran Ben Elisha wrote:
> Allow setting IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK at create_flags in
> ib_uverbs_create_qp_ex.
> 
> Signed-off-by: Eran Ben Elisha 
> ---
>  drivers/infiniband/core/uverbs_cmd.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/infiniband/core/uverbs_cmd.c 
> b/drivers/infiniband/core/uverbs_cmd.c
> index e795d59..e9bafa3 100644
> --- a/drivers/infiniband/core/uverbs_cmd.c
> +++ b/drivers/infiniband/core/uverbs_cmd.c
> @@ -1843,7 +1843,7 @@ static int create_qp(struct ib_uverbs_file *file,
> sizeof(cmd->create_flags))
>   attr.create_flags = cmd->create_flags;
>  
> - if (attr.create_flags) {
> + if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
>   ret = -EINVAL;
>   goto err_put;
>   }
> 

FWIW

Reviewed-by: Haggai Eran 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html