[PATCH 4/8] iw_cxgb4: Max fastreg depth depends on DSGL support

2014-03-26 Thread Hariprasad Shenai
From: Steve Wise sw...@opengridcomputing.com

The max depth of a fastreg mr depends on whether the device supports
DSGL or not.  So compute it dynamically based on the device support and
the module use_dsgl option.

Signed-off-by: Steve Wise sw...@opengridcomputing.com
---
 drivers/infiniband/hw/cxgb4/provider.c | 2 +-
 drivers/infiniband/hw/cxgb4/qp.c   | 3 ++-
 drivers/infiniband/hw/cxgb4/t4.h   | 9 -
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/provider.c 
b/drivers/infiniband/hw/cxgb4/provider.c
index 7e94c9a..deaaa21 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -287,7 +287,7 @@ static int c4iw_query_device(struct ib_device *ibdev,
props-max_mr = c4iw_num_stags(dev-rdev);
props-max_pd = T4_MAX_NUM_PD;
props-local_ca_ack_delay = 0;
-   props-max_fast_reg_page_list_len = T4_MAX_FR_DEPTH;
+   props-max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl);
 
return 0;
 }
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 8ad7d2e..0af82af 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -555,7 +555,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
int pbllen = roundup(wr-wr.fast_reg.page_list_len * sizeof(u64), 32);
int rem;
 
-   if (wr-wr.fast_reg.page_list_len  T4_MAX_FR_DEPTH)
+   if (wr-wr.fast_reg.page_list_len 
+   t4_max_fr_depth(use_dsgl))
return -EINVAL;
 
wqe-fr.qpbinde_to_dcacpu = 0;
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 62017a7..1ba7a87 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -84,7 +84,14 @@ struct t4_status_page {
sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
 #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \
sizeof(struct fw_ri_immd))  ~31UL)
-#define T4_MAX_FR_DEPTH (1024 / sizeof(u64))
+#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64))
+#define T4_MAX_FR_DSGL 1024
+#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64))
+
+static inline int t4_max_fr_depth(int use_dsgl)
+{
+   return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH;
+}
 
 #define T4_RQ_NUM_SLOTS 2
 #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS)
-- 
1.8.4

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/8] iw_cxgb4: rmb() after reading valid gen bit

2014-03-26 Thread Hariprasad Shenai
From: Steve Wise sw...@opengridcomputing.com

Some HW platforms can reorder read operations, so we must rmb() after
we see a valid gen bit in a CQE but before we read any other fields from
the CQE.

Signed-off-by: Steve Wise sw...@opengridcomputing.com
---
 drivers/infiniband/hw/cxgb4/t4.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index e73ace7..62017a7 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -566,6 +566,8 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct 
t4_cqe **cqe)
printk(KERN_ERR MOD cq overflow cqid %u\n, cq-cqid);
BUG_ON(1);
} else if (t4_valid_cqe(cq, cq-queue[cq-cidx])) {
+   /* Ensure CQE is flushed to memory */
+   rmb();
*cqe = cq-queue[cq-cidx];
ret = 0;
} else
-- 
1.8.4

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/8] iw_cxgb4: endpoint timeout fixes

2014-03-26 Thread Hariprasad Shenai
From: Steve Wise sw...@opengridcomputing.com

1) timedout endpoint processing can be starved. If there is continual
CPL messages flowing into the driver, the endpoint timeout processing
can be starved.  This condition exposed the other bugs below.

Solution: In process_work(), call process_timedout_eps() after each CPL
is processed.

2) Connection events can be processed even though the endpoint is on
the timeout list.  If the endpoint is scheduled for timeout processing,
then we must ignore MPA Start Requests and Replies.

Solution: Change stop_ep_timer() to return 1 if the ep has already been
queued for timeout processing.  All the callers of stop_ep_timer() need
to check this and act accordingly.  There are just a few cases where
the caller needs to do something different if stop_ep_timer() returns 1:

1) in process_mpa_reply(), ignore the reply and  process_timeout()
will abort the connection.

2) in process_mpa_request, ignore the request and process_timeout()
will abort the connection.

It is ok for callers of stop_ep_timer() to abort the connection since
that will leave the state in ABORTING or DEAD, and process_timeout()
now ignores timeouts when the ep is in these states.

3) Double insertion on the timeout list.  Since the endpoint timers are
used for connection setup and teardown, we need to guard against the
possibility that an endpoint is already on the timeout list.  This is
a rare condition and only seen under heavy load and in the presense of
the above 2 bugs.

Solution: In ep_timeout(), don't queue the endpoint if it is already on
the queue.

Signed-off-by: Steve Wise sw...@opengridcomputing.com
---
 drivers/infiniband/hw/cxgb4/cm.c | 89 +---
 1 file changed, 56 insertions(+), 33 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index fe5db3c..471614e 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -173,12 +173,15 @@ static void start_ep_timer(struct c4iw_ep *ep)
add_timer(ep-timer);
 }
 
-static void stop_ep_timer(struct c4iw_ep *ep)
+static int stop_ep_timer(struct c4iw_ep *ep)
 {
PDBG(%s ep %p stopping\n, __func__, ep);
del_timer_sync(ep-timer);
-   if (!test_and_set_bit(TIMEOUT, ep-com.flags))
+   if (!test_and_set_bit(TIMEOUT, ep-com.flags)) {
c4iw_put_ep(ep-com);
+   return 0;
+   }
+   return 1;
 }
 
 static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
@@ -1165,12 +1168,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, 
struct sk_buff *skb)
PDBG(%s ep %p tid %u\n, __func__, ep, ep-hwtid);
 
/*
-* Stop mpa timer.  If it expired, then the state has
-* changed and we bail since ep_timeout already aborted
-* the connection.
+* Stop mpa timer.  If it expired, then
+* we ignore the MPA reply.  process_timeout()
+* will abort the connection.
 */
-   stop_ep_timer(ep);
-   if (ep-com.state != MPA_REQ_SENT)
+   if (stop_ep_timer(ep))
return;
 
/*
@@ -1375,15 +1377,12 @@ static void process_mpa_request(struct c4iw_ep *ep, 
struct sk_buff *skb)
 
PDBG(%s ep %p tid %u\n, __func__, ep, ep-hwtid);
 
-   if (ep-com.state != MPA_REQ_WAIT)
-   return;
-
/*
 * If we get more than the supported amount of private data
 * then we must fail this connection.
 */
if (ep-mpa_pkt_len + skb-len  sizeof(ep-mpa_pkt)) {
-   stop_ep_timer(ep);
+   (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1413,13 +1412,13 @@ static void process_mpa_request(struct c4iw_ep *ep, 
struct sk_buff *skb)
if (mpa-revision  mpa_rev) {
printk(KERN_ERR MOD %s MPA version mismatch. Local = %d,
Received = %d\n, __func__, mpa_rev, mpa-revision);
-   stop_ep_timer(ep);
+   (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
 
if (memcmp(mpa-key, MPA_KEY_REQ, sizeof(mpa-key))) {
-   stop_ep_timer(ep);
+   (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1430,7 +1429,7 @@ static void process_mpa_request(struct c4iw_ep *ep, 
struct sk_buff *skb)
 * Fail if there's too much private data.
 */
if (plen  MPA_MAX_PRIVATE_DATA) {
-   stop_ep_timer(ep);
+   (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1439,7 +1438,7 @@ static void process_mpa_request(struct c4iw_ep *ep, 
struct sk_buff *skb)
 * If plen does not account for pkt size
 */
if (ep-mpa_pkt_len  (sizeof(*mpa) + plen)) {
-   

[PATCH 6/8] iw_cxgb4: Initialize reserved fields in a FW work request

2014-03-26 Thread Hariprasad Shenai
From: Steve Wise sw...@opengridcomputing.com

Signed-off-by: Steve Wise sw...@opengridcomputing.com
---
 drivers/infiniband/hw/cxgb4/qp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 0af82af..e865fa4 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -425,6 +425,8 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr 
*wqe,
default:
return -EINVAL;
}
+   wqe-send.r3 = 0;
+   wqe-send.r4 = 0;
 
plen = 0;
if (wr-num_sge) {
-- 
1.8.4

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/8] Endpoint timeout fix, SQ flush and other misc. fixes for iw_cxgb4

2014-03-26 Thread Hariprasad Shenai
Hi All,

This patch series provides fixes related to endpoint timeout, one fix related to
SQ flushing and other miscelleneous fixes for Chelsio T4/T5 adapters on 
iw_cxgb4.

The patches series is created against 'infiniband' tree, 'for-next' branch.
And includes patches on iw_cxgb4 driver. 

These patches were part of a bigger patch-series posted initially on netdev:
https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg19052.html
Based on suggestion from David Miller, we have split above patch-series into
several smaller patch-series.
This is the last of the smaller patch-series on iw-cxgb4.

Since this patch-series contains purely iw_cxgb4 patches, we would like to
request this patch series to get merged via Roland's 'infiniband' tree.

We have included all the maintainers of respective drivers. Kindly review the
change and let us know in case of any review comments.

Thanks

Hariprasad Shenai (1):
  iw_cxgb4: Use pr_warn_ratelimited

Steve Wise (7):
  iw_cxgb4: endpoint timeout fixes
  iw_cxgb4: rmb() after reading valid gen bit
  iw_cxgb4: SQ flush fix
  iw_cxgb4: Max fastreg depth depends on DSGL support
  iw_cxgb4: Initialize reserved fields in a FW work request
  iw_cxgb4: Add missing debug stats
  iw_cxgb4: Use uninitialized_var()

 drivers/infiniband/hw/cxgb4/cm.c   | 89 +-
 drivers/infiniband/hw/cxgb4/cq.c   | 24 -
 drivers/infiniband/hw/cxgb4/mem.c  |  6 ++-
 drivers/infiniband/hw/cxgb4/provider.c |  2 +-
 drivers/infiniband/hw/cxgb4/qp.c   | 11 +++--
 drivers/infiniband/hw/cxgb4/resource.c | 10 ++--
 drivers/infiniband/hw/cxgb4/t4.h   | 11 -
 7 files changed, 95 insertions(+), 58 deletions(-)

-- 
1.8.4

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/8] iw_cxgb4: Use pr_warn_ratelimited

2014-03-26 Thread Hariprasad Shenai
Signed-off-by: Hariprasad Shenai haripra...@chelsio.com
---
 drivers/infiniband/hw/cxgb4/resource.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/resource.c 
b/drivers/infiniband/hw/cxgb4/resource.c
index cdef4d7..94b5fd9 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -322,8 +322,8 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
unsigned long addr = gen_pool_alloc(rdev-rqt_pool, size  6);
PDBG(%s addr 0x%x size %d\n, __func__, (u32)addr, size  6);
if (!addr)
-   printk_ratelimited(KERN_WARNING MOD %s: Out of RQT memory\n,
-  pci_name(rdev-lldi.pdev));
+   pr_warn_ratelimited(MOD %s: Out of RQT memory\n,
+   pci_name(rdev-lldi.pdev));
mutex_lock(rdev-stats.lock);
if (addr) {
rdev-stats.rqt.cur += roundup(size  6, 1  MIN_RQT_SHIFT);
-- 
1.8.4

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/8] iw_cxgb4: Use uninitialized_var()

2014-03-26 Thread Hariprasad Shenai
From: Steve Wise sw...@opengridcomputing.com

Signed-off-by: Steve Wise sw...@opengridcomputing.com
---
 drivers/infiniband/hw/cxgb4/cq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index e17b155..cfaa56a 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -672,7 +672,7 @@ skip_cqe:
 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 {
struct c4iw_qp *qhp = NULL;
-   struct t4_cqe cqe = {0, 0}, *rd_cqe;
+   struct t4_cqe uninitialized_var(cqe), *rd_cqe;
struct t4_wq *wq;
u32 credit = 0;
u8 cqe_flushed;
-- 
1.8.4

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8] iw_cxgb4: Use uninitialized_var()

2014-03-26 Thread Yann Droneaud
Le mercredi 26 mars 2014 à 18:53 +0530, Hariprasad Shenai a écrit :
 From: Steve Wise sw...@opengridcomputing.com
 

What for ? Please describe the reason to use uninitialized_var()


 Signed-off-by: Steve Wise sw...@opengridcomputing.com
 ---
  drivers/infiniband/hw/cxgb4/cq.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/drivers/infiniband/hw/cxgb4/cq.c 
 b/drivers/infiniband/hw/cxgb4/cq.c
 index e17b155..cfaa56a 100644
 --- a/drivers/infiniband/hw/cxgb4/cq.c
 +++ b/drivers/infiniband/hw/cxgb4/cq.c
 @@ -672,7 +672,7 @@ skip_cqe:
  static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
  {
   struct c4iw_qp *qhp = NULL;
 - struct t4_cqe cqe = {0, 0}, *rd_cqe;
 + struct t4_cqe uninitialized_var(cqe), *rd_cqe;
   struct t4_wq *wq;
   u32 credit = 0;
   u8 cqe_flushed;


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 8/8] iw_cxgb4: Use uninitialized_var()

2014-03-26 Thread Steve Wise


 -Original Message-
 From: linux-rdma-ow...@vger.kernel.org 
 [mailto:linux-rdma-ow...@vger.kernel.org] On
 Behalf Of Yann Droneaud
 Sent: Wednesday, March 26, 2014 9:34 AM
 To: Hariprasad Shenai
 Cc: linux-rdma@vger.kernel.org; rol...@purestorage.com; 
 sw...@opengridcomputing.com;
 kuma...@chelsio.com; nirran...@chelsio.com
 Subject: Re: [PATCH 8/8] iw_cxgb4: Use uninitialized_var()
 
 Le mercredi 26 mars 2014 à 18:53 +0530, Hariprasad Shenai a écrit :
  From: Steve Wise sw...@opengridcomputing.com
 
 
 What for ? Please describe the reason to use uninitialized_var()
 

Function poll_cq() fills out var cqe.  The compiler doesn't know that so either 
you initialize cqe unnecessarily, or use uninitized_var().  The poll path is 
performance-critical, and I hate to initialize variables without reason.


 
  Signed-off-by: Steve Wise sw...@opengridcomputing.com
  ---
   drivers/infiniband/hw/cxgb4/cq.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
 
  diff --git a/drivers/infiniband/hw/cxgb4/cq.c 
  b/drivers/infiniband/hw/cxgb4/cq.c
  index e17b155..cfaa56a 100644
  --- a/drivers/infiniband/hw/cxgb4/cq.c
  +++ b/drivers/infiniband/hw/cxgb4/cq.c
  @@ -672,7 +672,7 @@ skip_cqe:
   static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
   {
  struct c4iw_qp *qhp = NULL;
  -   struct t4_cqe cqe = {0, 0}, *rd_cqe;
  +   struct t4_cqe uninitialized_var(cqe), *rd_cqe;
  struct t4_wq *wq;
  u32 credit = 0;
  u8 cqe_flushed;
 
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-rdma in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] RDMA/cxgb4: set error code on kmalloc() failure

2014-03-26 Thread Yann Droneaud
If kmalloc() fails in c4iw_alloc_ucontext(), the function
leaves but does not set an error code in ret variable:
it will return 0 to the caller.

This patch set ret to -ENOMEM in such case.

Cc: Steve Wise sw...@opengridcomputing.com
Cc: Steve Wise sw...@chelsio.com
Signed-off-by: Yann Droneaud ydrone...@opteya.com
---
 drivers/infiniband/hw/cxgb4/provider.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/provider.c 
b/drivers/infiniband/hw/cxgb4/provider.c
index e36d2a27c431..79429256023a 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -128,8 +128,10 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct 
ib_device *ibdev,
rhp-rdev.flags |= T4_STATUS_PAGE_DISABLED;
} else {
mm = kmalloc(sizeof(*mm), GFP_KERNEL);
-   if (!mm)
+   if (!mm) {
+   ret = -ENOMEM;
goto err_free;
+   }
 
uresp.status_page_size = PAGE_SIZE;
 
-- 
1.9.0

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


FW: [PATCH] RDMA/cxgb4: set error code on kmalloc() failure

2014-03-26 Thread Steve Wise
Acked-by: Steve Wise sw...@opengridcomputing.com

Note: This fix applies only to net-next because the commit that introduced this 
is still
pending in net-next:

commit 05eb23893c2cf9502a9cec0c32e7f1d1ed2895c8
Author: Steve Wise sw...@opengridcomputing.com
Date:   Fri Mar 14 21:52:08 2014 +0530

cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes


Dave, can you please merge this?

Thanks,

Steve.

-Original Message-
From: Yann Droneaud [mailto:ydrone...@opteya.com] 
Sent: Wednesday, March 26, 2014 10:05 AM
To: Roland Dreier; Roland Dreier
Cc: Yann Droneaud; linux-rdma@vger.kernel.org; Steve Wise; Steve Wise
Subject: [PATCH] RDMA/cxgb4: set error code on kmalloc() failure

If kmalloc() fails in c4iw_alloc_ucontext(), the function
leaves but does not set an error code in ret variable:
it will return 0 to the caller.

This patch set ret to -ENOMEM in such case.

Cc: Steve Wise sw...@opengridcomputing.com
Cc: Steve Wise sw...@chelsio.com
Signed-off-by: Yann Droneaud ydrone...@opteya.com
---
 drivers/infiniband/hw/cxgb4/provider.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/provider.c
b/drivers/infiniband/hw/cxgb4/provider.c
index e36d2a27c431..79429256023a 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -128,8 +128,10 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct 
ib_device
*ibdev,
rhp-rdev.flags |= T4_STATUS_PAGE_DISABLED;
} else {
mm = kmalloc(sizeof(*mm), GFP_KERNEL);
-   if (!mm)
+   if (!mm) {
+   ret = -ENOMEM;
goto err_free;
+   }
 
uresp.status_page_size = PAGE_SIZE;
 
-- 
1.9.0

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next 2/2] cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes

2014-03-26 Thread Yann Droneaud
Le vendredi 14 mars 2014 à 21:52 +0530, Hariprasad Shenai a écrit :
 From: Steve Wise sw...@opengridcomputing.com

[...]

 Signed-off-by: Steve Wise sw...@opengridcomputing.com
 ---
  drivers/infiniband/hw/cxgb4/device.c| 177 
 ++--
  drivers/infiniband/hw/cxgb4/iw_cxgb4.h  |   9 +-
  drivers/infiniband/hw/cxgb4/provider.c  |  43 +-
  drivers/infiniband/hw/cxgb4/qp.c| 140 +--
  drivers/infiniband/hw/cxgb4/t4.h|   6 +
  drivers/infiniband/hw/cxgb4/user.h  |   5 +
  drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |   1 +
  drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |  87 +++-
  drivers/net/ethernet/chelsio/cxgb4/sge.c|   8 +-
  9 files changed, 286 insertions(+), 190 deletions(-)
 

[...]

 diff --git a/drivers/infiniband/hw/cxgb4/provider.c 
 b/drivers/infiniband/hw/cxgb4/provider.c
 index 7e94c9a..e36d2a2 100644
 --- a/drivers/infiniband/hw/cxgb4/provider.c
 +++ b/drivers/infiniband/hw/cxgb4/provider.c
 @@ -106,15 +106,54 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct 
 ib_device *ibdev,
  {
   struct c4iw_ucontext *context;
   struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
 + static int warned;
 + struct c4iw_alloc_ucontext_resp uresp;
 + int ret = 0;
 + struct c4iw_mm_entry *mm = NULL;
  
   PDBG(%s ibdev %p\n, __func__, ibdev);
   context = kzalloc(sizeof(*context), GFP_KERNEL);
 - if (!context)
 - return ERR_PTR(-ENOMEM);
 + if (!context) {
 + ret = -ENOMEM;
 + goto err;
 + }
 +
   c4iw_init_dev_ucontext(rhp-rdev, context-uctx);
   INIT_LIST_HEAD(context-mmaps);
   spin_lock_init(context-mmap_lock);
 +
 + if (udata-outlen  sizeof(uresp)) {
 + if (!warned++)
 + pr_err(MOD Warning - downlevel libcxgb4 (non-fatal), 
 device status page disabled.);
 + rhp-rdev.flags |= T4_STATUS_PAGE_DISABLED;
 + } else {
 + mm = kmalloc(sizeof(*mm), GFP_KERNEL);
 + if (!mm)
 + goto err_free;
 +

OK, that's the origin of the missing error I've noticed in my latest
review on linux-next.

See
http://marc.info/?i=1395846311-29288-1-git-send-email-ydrone...@opteya.com
http://marc.info/?i=005b01cf4907$9adfa320$d09ee960
$@opengridcomputing.com

Sorry, I've missed the opportunity to report it.

 + uresp.status_page_size = PAGE_SIZE;
 +
 + spin_lock(context-mmap_lock);
 + uresp.status_page_key = context-key;
 + context-key += PAGE_SIZE;
 + spin_unlock(context-mmap_lock);
 +

Is it really necessary to spinlock here since context is local to the
function ?

 + ret = ib_copy_to_udata(udata, uresp, sizeof(uresp));
 + if (ret)
 + goto err_mm;
 +
 + mm-key = uresp.status_page_key;
 + mm-addr = virt_to_phys(rhp-rdev.status_page);
 + mm-len = PAGE_SIZE;
 + insert_mmap(context, mm);
 + }
   return context-ibucontext;
 +err_mm:
 + kfree(mm);
 +err_free:
 + kfree(context);
 +err:
 + return ERR_PTR(ret);
  }
  

[...]

 diff --git a/drivers/infiniband/hw/cxgb4/user.h 
 b/drivers/infiniband/hw/cxgb4/user.h
 index 32b754c..11ccd27 100644
 --- a/drivers/infiniband/hw/cxgb4/user.h
 +++ b/drivers/infiniband/hw/cxgb4/user.h
 @@ -70,4 +70,9 @@ struct c4iw_create_qp_resp {
   __u32 qid_mask;
   __u32 flags;
  };
 +
 +struct c4iw_alloc_ucontext_resp {
 + __u64 status_page_key;
 + __u32 status_page_size;
 +};

If this is going to be part of the ABI, mind add an explicit padding to
align the structure on 64bits.

Regards

-- 
Yann Droneaud
OPTEYA


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH net-next 2/2] cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes

2014-03-26 Thread Steve Wise


  +   uresp.status_page_size = PAGE_SIZE;
  +
  +   spin_lock(context-mmap_lock);
  +   uresp.status_page_key = context-key;
  +   context-key += PAGE_SIZE;
  +   spin_unlock(context-mmap_lock);
  +
 
 Is it really necessary to spinlock here since context is local to the
 function ?
 

You're correct.

 
 [...]
 
  diff --git a/drivers/infiniband/hw/cxgb4/user.h 
  b/drivers/infiniband/hw/cxgb4/user.h
  index 32b754c..11ccd27 100644
  --- a/drivers/infiniband/hw/cxgb4/user.h
  +++ b/drivers/infiniband/hw/cxgb4/user.h
  @@ -70,4 +70,9 @@ struct c4iw_create_qp_resp {
  __u32 qid_mask;
  __u32 flags;
   };
  +
  +struct c4iw_alloc_ucontext_resp {
  +   __u64 status_page_key;
  +   __u32 status_page_size;
  +};
 
 If this is going to be part of the ABI, mind add an explicit padding to
 align the structure on 64bits.
 

Sounds good.  Can you provide patches for these?


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-next] IB/core: Don't resolve passive side RoCE L2 address in cma req handler

2014-03-26 Thread Or Gerlitz
From: Moni Shoua mo...@mellanox.com

The code that resolves the passive side source mac within the rdma_cm 
connection request handler was both redundant and buggy, remove it.

It was redundant since later, when an RC QP is modified to RTR state 
the resolution will take place in the ib_core module. It was buggy b/c
this callback also deals with UD SIDR exchange under which we wrongly
looked on the REQ member of the CM event and dereferenced a random value.

Signed-off-by: Moni Shoua mo...@mellanox.com
Signed-off-by: Or Gerlitz ogerl...@mellanox.com
---

Hi Roland, we're post 3.14-rc8 and hence I assume will go to 3.15-rc1 
and once there we will post it to -stable to it gets into 3.14.y

That nasty random deref wasn't causing instant crashes, but rather only 
when running on lengthy QA loops, so we missed it through the submission.

 drivers/infiniband/core/cm.c  |   17 -
 drivers/infiniband/core/cma.c |   17 -
 include/rdma/ib_cm.h  |1 -
 3 files changed, 0 insertions(+), 35 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0601b9d..c323917 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -349,23 +349,6 @@ static void cm_init_av_for_response(struct cm_port *port, 
struct ib_wc *wc,
   grh, av-ah_attr);
 }
 
-int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
-{
-   struct cm_id_private *cm_id_priv;
-
-   cm_id_priv = container_of(id, struct cm_id_private, id);
-
-   if (smac != NULL)
-   memcpy(cm_id_priv-av.smac, smac, sizeof(cm_id_priv-av.smac));
-
-   if (alt_smac != NULL)
-   memcpy(cm_id_priv-alt_av.smac, alt_smac,
-  sizeof(cm_id_priv-alt_av.smac));
-
-   return 0;
-}
-EXPORT_SYMBOL(ib_update_cm_av);
-
 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 {
struct cm_device *cm_dev;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 199958d..7e8c3d5 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1336,28 +1336,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, 
struct ib_cm_event *ib_event)
ret = conn_id-id.event_handler(conn_id-id, event);
if (ret)
goto err3;
-
-   if (is_iboe) {
-   if (ib_event-param.req_rcvd.primary_path != NULL)
-   rdma_addr_find_smac_by_sgid(
-   ib_event-param.req_rcvd.primary_path-sgid,
-   psmac, NULL);
-   else
-   psmac = NULL;
-   if (ib_event-param.req_rcvd.alternate_path != NULL)
-   rdma_addr_find_smac_by_sgid(
-   ib_event-param.req_rcvd.alternate_path-sgid,
-   palt_smac, NULL);
-   else
-   palt_smac = NULL;
-   }
/*
 * Acquire mutex to prevent user executing rdma_destroy_id()
 * while we're accessing the cm_id.
 */
mutex_lock(lock);
-   if (is_iboe)
-   ib_update_cm_av(cm_id, psmac, palt_smac);
if (cma_comp(conn_id, RDMA_CM_CONNECT) 
(conn_id-id.qp_type != IB_QPT_UD))
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index f29e3a2..0e3ff30 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -601,5 +601,4 @@ struct ib_cm_sidr_rep_param {
 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
struct ib_cm_sidr_rep_param *param);
 
-int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac);
 #endif /* IB_CM_H */
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next] IB/core: Don't resolve passive side RoCE L2 address in cma req handler

2014-03-26 Thread Yann Droneaud
Hi,

Le mercredi 26 mars 2014 à 18:23 +0200, Or Gerlitz a écrit :
 From: Moni Shoua mo...@mellanox.com
 
 The code that resolves the passive side source mac within the rdma_cm 
 connection request handler was both redundant and buggy, remove it.
 
 It was redundant since later, when an RC QP is modified to RTR state 
 the resolution will take place in the ib_core module. It was buggy b/c
 this callback also deals with UD SIDR exchange under which we wrongly
 looked on the REQ member of the CM event and dereferenced a random value.
 

Is that a revert of some patch ?

 Signed-off-by: Moni Shoua mo...@mellanox.com
 Signed-off-by: Or Gerlitz ogerl...@mellanox.com
 ---
 
 Hi Roland, we're post 3.14-rc8 and hence I assume will go to 3.15-rc1 
 and once there we will post it to -stable to it gets into 3.14.y
 

Just add Cc: sta...@vger.kernel.org

See:

http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/stable_kernel_rules.txt?id=v3.14-rc8#n36

Regards.

-- 
Yann Droneaud
OPTEYA


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Kernel oops/panic with NFS over RDMA mount after disrupted Infiniband connection

2014-03-26 Thread Chuck Lever

On Mar 26, 2014, at 6:20 AM, rafael.reiter rafael.rei...@ims.co.at wrote:

 Hello,
 
 I am looking into a problem with NFS/RDMA with openSuse 12.3 and the 3.10.17 
 kernel. The following kernel oops, followed by a kernel panic, occurs on
 the client computer after 5-30 seconds when the connection between NFS server 
 and client is disrupted (e.g. by pulling the Infiniband cable on the
 server or using ibportstate to disable the port on the client) and the 
 mounted directory is accessed (cd, ls).
 
 
 The export on the server side is done with
 /data
 172.16.100.0/24(rw,wdelay,crossmnt,insecure,no_root_squash,no_subtree_check,fsid=0,mountpoint)
 
 Following command is used for mounting the NFSv4 share:
  mount -t nfs -o port=20049,rdma,vers=4.0,timeo=900 172.16.100.2:/ /mnt/
 
 
 [ 3336.995934] kernel tried to execute NX-protected page - exploit attempt?
 (uid: 0)
 [ 3337.003415] BUG: unable to handle kernel paging request at 880842900768
 [ 3337.010404] IP: [880842900768] 0x880842900767
 [ 3337.015658] PGD 1d7c067 PUD 85d4e1063 PMD 842f48063 PTE 800842900163
 [ 3337.022420] Oops: 0011 [#1] SMP
 [ 3337.025681] Modules linked in: xprtrdma(O) auth_rpcgss oid_registry nfsv4
 cpuid af_packet 8021q garp stp llc rdma_ucm(O) ib_ucm(O) rdma_cm(O) iw_cm(O)
 ib_ipoib(O) ib_cm(O) ib_uverbs(O) ib_umad(O) mlx4_en(O) mlx4_ib(O) ib_sa(O)
 ib_mad(O) ib_core(O) ib_addr(O) sr_mod cdrom usb_storage nvidia(PO) joydev
 usbhid mlx4_core(O) compat(O) adm1021 lm90 coretemp nouveau kvm_intel kvm
 crc32c_intel ghash_clmulni_intel aesni_intel ablk_helper cryptd lrw gf128mul
 glue_helper aes_x86_64 acpi_cpufreq iTCO_wdt iTCO_vendor_support microcode
 pcspkr ttm drm_kms_helper sb_edac edac_core isci drm i2c_i801 libsas ehci_pci
 ehci_hcd scsi_transport_sas mxm_wmi sg video usbcore lpc_ich ioatdma mfd_core
 usb_common shpchp pci_hotplug wmi mperf processor thermal_sys button edd fuse
 autofs4 xfs libcrc32c nfsv3 nfs fscache lockd nfs_acl sunrpc igb dca
 i2c_algo_bit ptp pps_core
 [ 3337.102467] CPU: 0 PID: 0 Comm: swapper/0 Tainted: P   O 
 3.10.17-ims2
 #2
 [ 3337.109863] Hardware name: Supermicro B9DRG-E/B9DRG-E, BIOS 3.0 09/04/2013
 [ 3337.116736] task: 81a11440 ti: 81a0 task.ti:
 81a0
 [ 3337.124218] RIP: 0010:[880842900768]  [880842900768]
 0x880842900767
 [ 3337.131892] RSP: 0018:88087fc03e88  EFLAGS: 00010282
 [ 3337.137208] RAX: 0286 RBX: 880842900768 RCX: 
 88085e2454a0
 [ 3337.144335] RDX: 88085e2454a0 RSI: 0286 RDI: 
 88085e245480
 [ 3337.151463] RBP: 88087fc03ea0 R08: 88085e24b170 R09: 
 0040
 [ 3337.158588] R10: 0003 R11: dead00100100 R12: 
 88085e245480
 [ 3337.165716] R13: 0006 R14: 0006 R15: 
 81a5db90
 [ 3337.172842] FS:  () GS:88087fc0()
 knlGS:
 [ 3337.180932] CS:  0010 DS:  ES:  CR0: 80050033
 [ 3337.186671] CR2: 880842900768 CR3: 01a0c000 CR4: 
 000407f0
 [ 3337.193809] DR0:  DR1:  DR2: 
 
 [ 3337.200934] DR3:  DR6: 0ff0 DR7: 
 0400
 [ 3337.208061] Stack:
 [ 3337.210073]  a04f7cbe a04fd388 
 88087fc03ec0
 [ 3337.217530]  81049c82 0001 81a050b0
 88087fc03f30
 [ 3337.224987]  81049870 81a01fd8 0001000b940f
 00202000
 [ 3337.232443] Call Trace:
 [ 3337.234903]  IRQ
 [ 3337.236838]  [a04f7cbe] ? rpcrdma_run_tasklet+0x7e/0xc0 
 [xprtrdma]
 [ 3337.244116]  [81049c82] tasklet_action+0x52/0xc0
 [ 3337.249611]  [81049870] __do_softirq+0xe0/0x220
 [ 3337.255014]  [8155cbac] call_softirq+0x1c/0x30
 [ 3337.260335]  [8100452d] do_softirq+0x4d/0x80
 [ 3337.265470]  [81049b05] irq_exit+0x95/0xa0
 [ 3337.270437]  [8100411e] do_IRQ+0x5e/0xd0
 [ 3337.275224]  [81553eaa] common_interrupt+0x6a/0x6a
 [ 3337.280884]  EOI
 [ 3337.282809]  [81069090] ? __hrtimer_start_range_ns+0x1c0/0x400
 [ 3337.289729]  [8141de86] ? cpuidle_enter_state+0x56/0xd0
 [ 3337.295824]  [8141de82] ? cpuidle_enter_state+0x52/0xd0
 [ 3337.301928]  [8141dfb6] cpuidle_idle_call+0xb6/0x200
 [ 3337.307764]  [8100aa39] arch_cpu_idle+0x9/0x20
 [ 3337.313087]  [81087cc0] cpu_startup_entry+0x80/0x200
 [ 3337.318924]  [815358a2] rest_init+0x72/0x80
 [ 3337.323984]  [81ac4e28] start_kernel+0x3b2/0x3bf
 [ 3337.329464]  [81ac4875] ? repair_env_string+0x5e/0x5e
 [ 3337.335386]  [81ac45a5] x86_64_start_reservations+0x2a/0x2c
 [ 3337.341820]  [81ac4675] x86_64_start_kernel+0xce/0xd2
 [ 3337.347732] Code: 88 ff ff e7 33 4f a0 ff ff ff ff 00 b0 dd 57 10 88 ff ff 
 a9
 be a9 be 10 00 00 00 00 01 00 00 00 00 00 00 20 00 00 00 00 00 00 00 d0 35 
 24
 5e 08 88 ff ff 98 54 24 5e 08 88 ff ff 

Re: [PATCH for-next] IB/core: Don't resolve passive side RoCE L2 address in cma req handler

2014-03-26 Thread Or Gerlitz
On Wed, Mar 26, 2014 at 6:23 PM, Or Gerlitz ogerl...@mellanox.com wrote:
 ---
 Hi Roland, we're post 3.14-rc8 and hence I assume will go to 3.15-rc1
 and once there we will post it to -stable to it gets into 3.14.y


So we have here some warnings on unused variables, I forgot to amend
that, sorry,  will send V1 for that.
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next] IB/core: Don't resolve passive side RoCE L2 address in cma req handler

2014-03-26 Thread Or Gerlitz
On Wed, Mar 26, 2014 at 7:10 PM, Yann Droneaud ydrone...@opteya.com wrote:

 Is that a revert of some patch ?

Its not a full revents,  but I will pick what you are probably
suggesting and mention
in the V1 change-log which commit this  fixes, thanks

 Just add Cc: sta...@vger.kernel.org

I know that but preferns to send the patch myself to the stable mailing list
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH opensm 1/2] All SA queries should validate the requester port under lock

2014-03-26 Thread Hal Rosenstock

It might collide with the sweep operations.

Signed-off-by: Alex Netes ale...@mellanox.com
Signed-off-by: Hal Rosenstock h...@mellanox.com
---
 opensm/osm_sa_guidinfo_record.c  |   69 +++--
 opensm/osm_sa_informinfo.c   |9 +++--
 opensm/osm_sa_lft_record.c   |5 ++-
 opensm/osm_sa_link_record.c  |5 ++-
 opensm/osm_sa_mcmember_record.c  |   12 ---
 opensm/osm_sa_mft_record.c   |5 ++-
 opensm/osm_sa_multipath_record.c |   33 +-
 opensm/osm_sa_node_record.c  |5 ++-
 opensm/osm_sa_path_record.c  |3 +-
 opensm/osm_sa_pkey_record.c  |5 ++-
 opensm/osm_sa_portinfo_record.c  |5 ++-
 opensm/osm_sa_service_record.c   |   20 +-
 opensm/osm_sa_slvl_record.c  |5 ++-
 opensm/osm_sa_sminfo_record.c|5 ++-
 opensm/osm_sa_sw_info_record.c   |5 ++-
 opensm/osm_sa_vlarb_record.c |5 ++-
 16 files changed, 120 insertions(+), 76 deletions(-)

diff --git a/opensm/osm_sa_guidinfo_record.c b/opensm/osm_sa_guidinfo_record.c
index 28f6c0f..1ad8cad 100644
--- a/opensm/osm_sa_guidinfo_record.c
+++ b/opensm/osm_sa_guidinfo_record.c
@@ -401,6 +401,7 @@ static void del_guidinfo(IN osm_sa_t *sa, IN osm_madw_t 
*p_madw,
block_num %d is higher than Max GUID Cap block %d 
for port GUID 0x% PRIx64 \n,
block_num, max_block, 
cl_ntoh64(p_port-p_physp-port_guid));
+   CL_PLOCK_RELEASE(sa-p_lock);
osm_sa_send_error(sa, p_madw,
  IB_SA_MAD_STATUS_NO_RECORDS);
return;
@@ -417,6 +418,7 @@ static void del_guidinfo(IN osm_sa_t *sa, IN osm_madw_t 
*p_madw,
 i++) {
/* can't delete block 0 index 0 (base guid is RO) for alias 
guid table */
if (i == 0  p_sa_mad-comp_mask  IB_GIR_COMPMASK_GID0) {
+   CL_PLOCK_RELEASE(sa-p_lock);
OSM_LOG(sa-p_log, OSM_LOG_DEBUG,
Not allowed to delete RO GID 0\n);
osm_sa_send_error(sa, p_madw,
@@ -436,6 +438,7 @@ static void del_guidinfo(IN osm_sa_t *sa, IN osm_madw_t 
*p_madw,
p_list_item = cl_qlist_next(p_list_item);
p_mcm_alias_guid = 
osm_mgrp_get_mcm_alias_guid(p_mcm_port-mgrp, del_alias_guid);
if (p_mcm_alias_guid) {
+   CL_PLOCK_RELEASE(sa-p_lock);
osm_sa_send_error(sa, p_madw,
  
IB_SA_MAD_STATUS_DENIED);
return;
@@ -481,6 +484,7 @@ static void del_guidinfo(IN osm_sa_t *sa, IN osm_madw_t 
*p_madw,
   sizeof(ib_guid_info_t));
 
 Exit:
+   CL_PLOCK_RELEASE(sa-p_lock);
gir_respond(sa, p_madw);
 }
 
@@ -504,6 +508,7 @@ static void set_guidinfo(IN osm_sa_t *sa, IN osm_madw_t 
*p_madw,
block_num %d is higher than Max GUID Cap block %d 
for port GUID 0x% PRIx64 \n,
block_num, max_block, 
cl_ntoh64(p_port-p_physp-port_guid));
+   CL_PLOCK_RELEASE(sa-p_lock);
osm_sa_send_error(sa, p_madw,
  IB_SA_MAD_STATUS_NO_RECORDS);
return;
@@ -516,6 +521,7 @@ static void set_guidinfo(IN osm_sa_t *sa, IN osm_madw_t 
*p_madw,
GUID table memory allocation failed for port 
GUID 0x% PRIx64 \n,
cl_ntoh64(p_port-p_physp-port_guid));
+   CL_PLOCK_RELEASE(sa-p_lock);
osm_sa_send_error(sa, p_madw,
  IB_SA_MAD_STATUS_NO_RESOURCES);
return;
@@ -539,6 +545,7 @@ static void set_guidinfo(IN osm_sa_t *sa, IN osm_madw_t 
*p_madw,
 i++) {
/* can't set block 0 index 0 (base guid is RO) for alias guid 
table */
if (i == 0  p_sa_mad-comp_mask  IB_GIR_COMPMASK_GID0) {
+   CL_PLOCK_RELEASE(sa-p_lock);
OSM_LOG(sa-p_log, OSM_LOG_DEBUG,
Not allowed to set RO GID 0\n);
osm_sa_send_error(sa, p_madw,
@@ -595,6 +602,7 @@ static void set_guidinfo(IN osm_sa_t *sa, IN osm_madw_t 
*p_madw,
for (j = 0; j  1000; j++) {
assigned_guid = 
sm_assigned_guid(sa-p_subn-opt.sm_assigned_guid);
if (!assigned_guid) {
+   CL_PLOCK_RELEASE(sa-p_lock);
OSM_LOG(sa-p_log, OSM_LOG_ERROR,
ERR 510E: No more assigned 
guids available\n);

[PATCH opensm 2/2] osm_sa_service_record.c: Improve locking

2014-03-26 Thread Hal Rosenstock
From: Alex Netes ale...@mellanox.com
Date: Tue, 25 Mar 2014 12:01:56 +0200

Read lock should be sufficient when treating GET method.

Signed-off-by: Alex Netes ale...@mellanox.com
Signed-off-by: Hal Rosenstock h...@mellanox.com
---
 opensm/osm_sa_service_record.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/opensm/osm_sa_service_record.c b/opensm/osm_sa_service_record.c
index 61c5892..cce0ad5 100644
--- a/opensm/osm_sa_service_record.c
+++ b/opensm/osm_sa_service_record.c
@@ -454,7 +454,7 @@ static void sr_rcv_process_get_method(osm_sa_t * sa, IN 
osm_madw_t * p_madw)
CL_ASSERT(p_madw);
 
/* Grab the lock */
-   cl_plock_excl_acquire(sa-p_lock);
+   cl_plock_acquire(sa-p_lock);
 
/* update the requester physical port */
p_req_physp = osm_get_physp_by_mad_addr(sa-p_log, sa-p_subn,
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V1 for-next] IB/core: Don't resolve passive side RoCE L2 address in cma req handler

2014-03-26 Thread Or Gerlitz
From: Moni Shoua mo...@mellanox.com

The code that resolves the passive side source mac within the rdma_cm 
connection request handler was both redundant and buggy, remove it.

It was redundant since later, when an RC QP is modified to RTR state 
the resolution will take place in the ib_core module. It was buggy b/c
this callback also deals with UD SIDR exchange under which we wrongly
looked on the REQ member of the CM event and dereferenced a random value.

Signed-off-by: Moni Shoua mo...@mellanox.com
Signed-off-by: Or Gerlitz ogerl...@mellanox.com
---

Hi Roland, we're post 3.14-rc8 and hence I assume will go to 3.15-rc1 
and once there we will post it to -stable to it gets into 3.14.y

That nasty random deref wasn't causing instant crashes, but rather only 
when running on lengthy QA loops, so we missed it through the submission.


V1 changes: removed unused variables from cma_req_handler 


 drivers/infiniband/core/cm.c  |   17 -
 drivers/infiniband/core/cma.c |   26 --
 include/rdma/ib_cm.h  |1 -
 3 files changed, 0 insertions(+), 44 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0601b9d..c323917 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -349,23 +349,6 @@ static void cm_init_av_for_response(struct cm_port *port, 
struct ib_wc *wc,
   grh, av-ah_attr);
 }
 
-int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
-{
-   struct cm_id_private *cm_id_priv;
-
-   cm_id_priv = container_of(id, struct cm_id_private, id);
-
-   if (smac != NULL)
-   memcpy(cm_id_priv-av.smac, smac, sizeof(cm_id_priv-av.smac));
-
-   if (alt_smac != NULL)
-   memcpy(cm_id_priv-alt_av.smac, alt_smac,
-  sizeof(cm_id_priv-alt_av.smac));
-
-   return 0;
-}
-EXPORT_SYMBOL(ib_update_cm_av);
-
 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 {
struct cm_device *cm_dev;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 199958d..42c3058 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1284,15 +1284,6 @@ static int cma_req_handler(struct ib_cm_id *cm_id, 
struct ib_cm_event *ib_event)
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
int offset, ret;
-   u8 smac[ETH_ALEN];
-   u8 alt_smac[ETH_ALEN];
-   u8 *psmac = smac;
-   u8 *palt_smac = alt_smac;
-   int is_iboe = ((rdma_node_get_transport(cm_id-device-node_type) ==
-   RDMA_TRANSPORT_IB) 
-  (rdma_port_get_link_layer(cm_id-device,
-   ib_event-param.req_rcvd.port) ==
-   IB_LINK_LAYER_ETHERNET));
 
listen_id = cm_id-context;
if (!cma_check_req_qp_type(listen_id-id, ib_event))
@@ -1336,28 +1327,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, 
struct ib_cm_event *ib_event)
ret = conn_id-id.event_handler(conn_id-id, event);
if (ret)
goto err3;
-
-   if (is_iboe) {
-   if (ib_event-param.req_rcvd.primary_path != NULL)
-   rdma_addr_find_smac_by_sgid(
-   ib_event-param.req_rcvd.primary_path-sgid,
-   psmac, NULL);
-   else
-   psmac = NULL;
-   if (ib_event-param.req_rcvd.alternate_path != NULL)
-   rdma_addr_find_smac_by_sgid(
-   ib_event-param.req_rcvd.alternate_path-sgid,
-   palt_smac, NULL);
-   else
-   palt_smac = NULL;
-   }
/*
 * Acquire mutex to prevent user executing rdma_destroy_id()
 * while we're accessing the cm_id.
 */
mutex_lock(lock);
-   if (is_iboe)
-   ib_update_cm_av(cm_id, psmac, palt_smac);
if (cma_comp(conn_id, RDMA_CM_CONNECT) 
(conn_id-id.qp_type != IB_QPT_UD))
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index f29e3a2..0e3ff30 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -601,5 +601,4 @@ struct ib_cm_sidr_rep_param {
 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
struct ib_cm_sidr_rep_param *param);
 
-int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac);
 #endif /* IB_CM_H */
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] RDMA/core: iWARP Port Mapper Overview

2014-03-26 Thread Tatyana Nikolova

Hello All,

This patch series adds iWARP Port Mapper (IWPM) Version 2 support
in RDMA/core, RDMA/nes driver and RDMA/cxgb4 driver.
The iWARP Port Mapper implementation is based on the port mapper specification
section in the Sockets Direct Protocol paper -
http://www.rdmaconsortium.org/home/draft-pinkerton-iwarp-sdp-v1.0.pdf 

Existing iWARP RDMA providers use the same IP address as the native TCP/IP
stack when creating RDMA connections. They need to ensure a mechanism to claim 
the TCP ports used for RDMA connections, thereby preventing TCP port collisions
when other host applications are consuming TCP ports. The iWARP Port Mapper
provides a standard mechanism to accomplish this. Without this service it is
possible for RDMA application to bind/listen on the same port which is already
being used by native TCP host application. If that happens the incoming TCP
connection data can be passed to the RDMA stack with error.

The iWARP Port Mapper solution doesn't contain any changes to the existing
network stack in the kernel space. All the changes are contained with the
infiniband tree and also in user space.

The iWARP Port Mapper service is implemented as a user space daemon process.
Interested readers could look at the source code of the IWPM service located at
http://git.openfabrics.org/git?p=~tnikolova/libiwpm-1.0.0/.git;a=summary

The iWARP driver (port mapper client) sends to the IWPM service the local IP
address and TCP port it has received from the RDMA application, when starting a
connection. The IWPM service performs a socket bind from user space to get
an available TCP port, called a mapped port, and communicates it back to the
client. In that sense, the IWPM service is used to map the TCP port, which the
RDMA application uses to any port available from the host TCP port space. The
mapped ports are used in iWARP RDMA connections to avoid collisions with native
TCP stack which is aware that these ports are taken. When an RDMA connection
using a mapped port is terminated, the client notifies the IWPM service, which
then releases the TCP port.

The message exchange between the IWPM service and the iWARP drivers
(between user space and kernel space) is implemented using netlink sockets.

This patch series adds iWarp driver support for sending/receiving/parsing
netlink messages, to make the communication with the IWPM service possible.

This is the second submission for the iWarp Port Mapper and
we have considered and incorporated feedback after the first submission
to improve the IWPM Version 2 patch series.

The following items are improvements and fixes to IWPM V2:

1) The IWPM functionality, common for both iWarp drivers (nes and cxgb4)
   is refactored from the drivers source files and is moved to new shared
   files in infiniband/core which are compiled as part of the iw_cm module.

2) Hash table data structure is implemented to store the drivers mapping
   information and enable efficient searching. (Hash table implementation
   is based on original work by Vipul Pandya)

3) Fix for a timer bug is provided as the unnecessary timer scheduling is
   removed.

The patches are built against Roland's infiniband tree for-next branch.

Thank you,

Tatyana Nikolova (2):
  RDMA/core: Add support for iWarp Port Mapper V2 user space service
  RDMA/nes: Add support for iWarp Port Mapper V2 user space service
Steve Wise (1):
  RDMA/cxgb4: Add support for iWarp Port Mapper V2 user space service

 drivers/infiniband/core/Makefile   |2 +-
 drivers/infiniband/core/cma.c  |3 +-
 drivers/infiniband/core/iwpm_msg.c |  691 
 drivers/infiniband/core/iwpm_util.c|  609 
 drivers/infiniband/core/iwpm_util.h|  238 +++
 drivers/infiniband/core/netlink.c  |   18 +-
 drivers/infiniband/hw/cxgb4/cm.c   |  180 +++--
 drivers/infiniband/hw/cxgb4/device.c   |   81 -
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |   44 ++
 drivers/infiniband/hw/nes/nes.c|   25 ++-
 drivers/infiniband/hw/nes/nes.h|3 +
 drivers/infiniband/hw/nes/nes_cm.c |  320 ---
 drivers/infiniband/hw/nes/nes_cm.h |   12 +-
 include/rdma/iw_portmap.h  |  199 +
 include/rdma/rdma_netlink.h|   23 +-
 include/uapi/rdma/rdma_netlink.h   |   96 +-
 16 files changed, 2431 insertions(+), 113 deletions(-)
 create mode 100644 drivers/infiniband/core/iwpm_msg.c
 create mode 100644 drivers/infiniband/core/iwpm_util.c
 create mode 100644 drivers/infiniband/core/iwpm_util.h
 create mode 100644 include/rdma/iw_portmap.h

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] RDMA/nes: Add support for iWARP Port Mapper user space service (Version 2)

2014-03-26 Thread Tatyana Nikolova
Add support for iWarp Port Mapper (Version 2) 

Signed-off-by: Tatyana Nikolova tatyana.e.nikol...@intel.com
---
 drivers/infiniband/hw/nes/nes.c|   25 +++-
 drivers/infiniband/hw/nes/nes.h|3 +
 drivers/infiniband/hw/nes/nes_cm.c |  320 +---
 drivers/infiniband/hw/nes/nes_cm.h |   12 +-
 4 files changed, 296 insertions(+), 64 deletions(-)

diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 353c7b0..3b2a6dc 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -68,7 +68,6 @@ MODULE_VERSION(DRV_VERSION);
 int max_mtu = 9000;
 int interrupt_mod_interval = 0;
 
-
 /* Interoperability */
 int mpa_version = 1;
 module_param(mpa_version, int, 0644);
@@ -112,6 +111,16 @@ static struct pci_device_id nes_pci_table[] = {
 
 MODULE_DEVICE_TABLE(pci, nes_pci_table);
 
+/* registered nes netlink callbacks */
+static struct ibnl_client_cbs nes_nl_cb_table[] = {
+   [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+   [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+   [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+   [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+   [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+   [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
 static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
 static int nes_net_event(struct notifier_block *, unsigned long, void *);
 static int nes_notifiers_registered;
@@ -672,6 +681,17 @@ static int nes_probe(struct pci_dev *pcidev, const struct 
pci_device_id *ent)
}
nes_notifiers_registered++;
 
+   if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table))
+   printk(KERN_ERR PFX %s[%u]: Failed to add netlink callback\n,
+   __func__, __LINE__);
+
+   ret = iwpm_init(RDMA_NL_NES);
+   if (ret) {
+   printk(KERN_ERR PFX %s: port mapper initialization failed\n,
+   pci_name(pcidev));
+   goto bail7;
+   }
+
INIT_DELAYED_WORK(nesdev-work, nes_recheck_link_status);
 
/* Initialize network devices */
@@ -710,6 +730,7 @@ static int nes_probe(struct pci_dev *pcidev, const struct 
pci_device_id *ent)
 
nes_debug(NES_DBG_INIT, netdev_count=%d, 
nesadapter-netdev_count=%d\n,
nesdev-netdev_count, nesdev-nesadapter-netdev_count);
+   ibnl_remove_client(RDMA_NL_NES);
 
nes_notifiers_registered--;
if (nes_notifiers_registered == 0) {
@@ -773,6 +794,8 @@ static void nes_remove(struct pci_dev *pcidev)
nesdev-nesadapter-netdev_count--;
}
}
+   ibnl_remove_client(RDMA_NL_NES);
+   iwpm_exit(RDMA_NL_NES);
 
nes_notifiers_registered--;
if (nes_notifiers_registered == 0) {
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 33cc589..bd9d132 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -51,6 +51,8 @@
 #include rdma/ib_pack.h
 #include rdma/rdma_cm.h
 #include rdma/iw_cm.h
+#include rdma/rdma_netlink.h
+#include rdma/iw_portmap.h
 
 #define NES_SEND_FIRST_WRITE
 
@@ -130,6 +132,7 @@
 #define NES_DBG_IW_TX   0x0004
 #define NES_DBG_SHUTDOWN0x0008
 #define NES_DBG_PAU 0x0010
+#define NES_DBG_NLMSG   0x0020
 #define NES_DBG_RSVD1   0x1000
 #define NES_DBG_RSVD2   0x2000
 #define NES_DBG_RSVD3   0x4000
diff --git a/drivers/infiniband/hw/nes/nes_cm.c 
b/drivers/infiniband/hw/nes/nes_cm.c
index dfa9df4..6f09a72 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2011 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2006 - 2014 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -59,6 +59,7 @@
 #include net/route.h
 #include net/ip_fib.h
 #include net/tcp.h
+#include linux/fcntl.h
 
 #include nes.h
 
@@ -166,7 +167,6 @@ int nes_rem_ref_cm_node(struct nes_cm_node *cm_node)
 {
return rem_ref_cm_node(cm_node-cm_core, cm_node);
 }
-
 /**
  * create_event
  */
@@ -482,11 +482,11 @@ static void form_cm_frame(struct sk_buff *skb,
iph-ttl = 0x40;
iph-protocol = 0x06;   /* IPPROTO_TCP */
 
-   iph-saddr = htonl(cm_node-loc_addr);
-   iph-daddr = htonl(cm_node-rem_addr);
+   iph-saddr = htonl(cm_node-mapped_loc_addr);
+   iph-daddr = htonl(cm_node-mapped_rem_addr);
 
-   tcph-source = htons(cm_node-loc_port);
-   tcph-dest = htons(cm_node-rem_port);
+   tcph-source = htons(cm_node-mapped_loc_port);
+   tcph-dest = htons(cm_node-mapped_rem_port);