[PATCH 4/8] iw_cxgb4: Max fastreg depth depends on DSGL support
From: Steve Wise sw...@opengridcomputing.com The max depth of a fastreg mr depends on whether the device supports DSGL or not. So compute it dynamically based on the device support and the module use_dsgl option. Signed-off-by: Steve Wise sw...@opengridcomputing.com --- drivers/infiniband/hw/cxgb4/provider.c | 2 +- drivers/infiniband/hw/cxgb4/qp.c | 3 ++- drivers/infiniband/hw/cxgb4/t4.h | 9 - 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 7e94c9a..deaaa21 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -287,7 +287,7 @@ static int c4iw_query_device(struct ib_device *ibdev, props-max_mr = c4iw_num_stags(dev-rdev); props-max_pd = T4_MAX_NUM_PD; props-local_ca_ack_delay = 0; - props-max_fast_reg_page_list_len = T4_MAX_FR_DEPTH; + props-max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 8ad7d2e..0af82af 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -555,7 +555,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, int pbllen = roundup(wr-wr.fast_reg.page_list_len * sizeof(u64), 32); int rem; - if (wr-wr.fast_reg.page_list_len T4_MAX_FR_DEPTH) + if (wr-wr.fast_reg.page_list_len + t4_max_fr_depth(use_dsgl)) return -EINVAL; wqe-fr.qpbinde_to_dcacpu = 0; diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 62017a7..1ba7a87 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -84,7 +84,14 @@ struct t4_status_page { sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ sizeof(struct fw_ri_immd)) ~31UL) -#define T4_MAX_FR_DEPTH (1024 / sizeof(u64)) +#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) +#define T4_MAX_FR_DSGL 1024 +#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64)) + +static inline int t4_max_fr_depth(int use_dsgl) +{ + return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH; +} #define T4_RQ_NUM_SLOTS 2 #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) -- 1.8.4 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/8] iw_cxgb4: rmb() after reading valid gen bit
From: Steve Wise sw...@opengridcomputing.com Some HW platforms can reorder read operations, so we must rmb() after we see a valid gen bit in a CQE but before we read any other fields from the CQE. Signed-off-by: Steve Wise sw...@opengridcomputing.com --- drivers/infiniband/hw/cxgb4/t4.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index e73ace7..62017a7 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -566,6 +566,8 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) printk(KERN_ERR MOD cq overflow cqid %u\n, cq-cqid); BUG_ON(1); } else if (t4_valid_cqe(cq, cq-queue[cq-cidx])) { + /* Ensure CQE is flushed to memory */ + rmb(); *cqe = cq-queue[cq-cidx]; ret = 0; } else -- 1.8.4 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/8] iw_cxgb4: endpoint timeout fixes
From: Steve Wise sw...@opengridcomputing.com 1) timedout endpoint processing can be starved. If there is continual CPL messages flowing into the driver, the endpoint timeout processing can be starved. This condition exposed the other bugs below. Solution: In process_work(), call process_timedout_eps() after each CPL is processed. 2) Connection events can be processed even though the endpoint is on the timeout list. If the endpoint is scheduled for timeout processing, then we must ignore MPA Start Requests and Replies. Solution: Change stop_ep_timer() to return 1 if the ep has already been queued for timeout processing. All the callers of stop_ep_timer() need to check this and act accordingly. There are just a few cases where the caller needs to do something different if stop_ep_timer() returns 1: 1) in process_mpa_reply(), ignore the reply and process_timeout() will abort the connection. 2) in process_mpa_request, ignore the request and process_timeout() will abort the connection. It is ok for callers of stop_ep_timer() to abort the connection since that will leave the state in ABORTING or DEAD, and process_timeout() now ignores timeouts when the ep is in these states. 3) Double insertion on the timeout list. Since the endpoint timers are used for connection setup and teardown, we need to guard against the possibility that an endpoint is already on the timeout list. This is a rare condition and only seen under heavy load and in the presense of the above 2 bugs. Solution: In ep_timeout(), don't queue the endpoint if it is already on the queue. Signed-off-by: Steve Wise sw...@opengridcomputing.com --- drivers/infiniband/hw/cxgb4/cm.c | 89 +--- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index fe5db3c..471614e 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -173,12 +173,15 @@ static void start_ep_timer(struct c4iw_ep *ep) add_timer(ep-timer); } -static void stop_ep_timer(struct c4iw_ep *ep) +static int stop_ep_timer(struct c4iw_ep *ep) { PDBG(%s ep %p stopping\n, __func__, ep); del_timer_sync(ep-timer); - if (!test_and_set_bit(TIMEOUT, ep-com.flags)) + if (!test_and_set_bit(TIMEOUT, ep-com.flags)) { c4iw_put_ep(ep-com); + return 0; + } + return 1; } static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, @@ -1165,12 +1168,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) PDBG(%s ep %p tid %u\n, __func__, ep, ep-hwtid); /* -* Stop mpa timer. If it expired, then the state has -* changed and we bail since ep_timeout already aborted -* the connection. +* Stop mpa timer. If it expired, then +* we ignore the MPA reply. process_timeout() +* will abort the connection. */ - stop_ep_timer(ep); - if (ep-com.state != MPA_REQ_SENT) + if (stop_ep_timer(ep)) return; /* @@ -1375,15 +1377,12 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) PDBG(%s ep %p tid %u\n, __func__, ep, ep-hwtid); - if (ep-com.state != MPA_REQ_WAIT) - return; - /* * If we get more than the supported amount of private data * then we must fail this connection. */ if (ep-mpa_pkt_len + skb-len sizeof(ep-mpa_pkt)) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1413,13 +1412,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) if (mpa-revision mpa_rev) { printk(KERN_ERR MOD %s MPA version mismatch. Local = %d, Received = %d\n, __func__, mpa_rev, mpa-revision); - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } if (memcmp(mpa-key, MPA_KEY_REQ, sizeof(mpa-key))) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1430,7 +1429,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * Fail if there's too much private data. */ if (plen MPA_MAX_PRIVATE_DATA) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1439,7 +1438,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * If plen does not account for pkt size */ if (ep-mpa_pkt_len (sizeof(*mpa) + plen)) { -
[PATCH 6/8] iw_cxgb4: Initialize reserved fields in a FW work request
From: Steve Wise sw...@opengridcomputing.com Signed-off-by: Steve Wise sw...@opengridcomputing.com --- drivers/infiniband/hw/cxgb4/qp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 0af82af..e865fa4 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -425,6 +425,8 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, default: return -EINVAL; } + wqe-send.r3 = 0; + wqe-send.r4 = 0; plen = 0; if (wr-num_sge) { -- 1.8.4 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/8] Endpoint timeout fix, SQ flush and other misc. fixes for iw_cxgb4
Hi All, This patch series provides fixes related to endpoint timeout, one fix related to SQ flushing and other miscelleneous fixes for Chelsio T4/T5 adapters on iw_cxgb4. The patches series is created against 'infiniband' tree, 'for-next' branch. And includes patches on iw_cxgb4 driver. These patches were part of a bigger patch-series posted initially on netdev: https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg19052.html Based on suggestion from David Miller, we have split above patch-series into several smaller patch-series. This is the last of the smaller patch-series on iw-cxgb4. Since this patch-series contains purely iw_cxgb4 patches, we would like to request this patch series to get merged via Roland's 'infiniband' tree. We have included all the maintainers of respective drivers. Kindly review the change and let us know in case of any review comments. Thanks Hariprasad Shenai (1): iw_cxgb4: Use pr_warn_ratelimited Steve Wise (7): iw_cxgb4: endpoint timeout fixes iw_cxgb4: rmb() after reading valid gen bit iw_cxgb4: SQ flush fix iw_cxgb4: Max fastreg depth depends on DSGL support iw_cxgb4: Initialize reserved fields in a FW work request iw_cxgb4: Add missing debug stats iw_cxgb4: Use uninitialized_var() drivers/infiniband/hw/cxgb4/cm.c | 89 +- drivers/infiniband/hw/cxgb4/cq.c | 24 - drivers/infiniband/hw/cxgb4/mem.c | 6 ++- drivers/infiniband/hw/cxgb4/provider.c | 2 +- drivers/infiniband/hw/cxgb4/qp.c | 11 +++-- drivers/infiniband/hw/cxgb4/resource.c | 10 ++-- drivers/infiniband/hw/cxgb4/t4.h | 11 - 7 files changed, 95 insertions(+), 58 deletions(-) -- 1.8.4 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/8] iw_cxgb4: Use pr_warn_ratelimited
Signed-off-by: Hariprasad Shenai haripra...@chelsio.com --- drivers/infiniband/hw/cxgb4/resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index cdef4d7..94b5fd9 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -322,8 +322,8 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) unsigned long addr = gen_pool_alloc(rdev-rqt_pool, size 6); PDBG(%s addr 0x%x size %d\n, __func__, (u32)addr, size 6); if (!addr) - printk_ratelimited(KERN_WARNING MOD %s: Out of RQT memory\n, - pci_name(rdev-lldi.pdev)); + pr_warn_ratelimited(MOD %s: Out of RQT memory\n, + pci_name(rdev-lldi.pdev)); mutex_lock(rdev-stats.lock); if (addr) { rdev-stats.rqt.cur += roundup(size 6, 1 MIN_RQT_SHIFT); -- 1.8.4 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 8/8] iw_cxgb4: Use uninitialized_var()
From: Steve Wise sw...@opengridcomputing.com Signed-off-by: Steve Wise sw...@opengridcomputing.com --- drivers/infiniband/hw/cxgb4/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index e17b155..cfaa56a 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -672,7 +672,7 @@ skip_cqe: static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) { struct c4iw_qp *qhp = NULL; - struct t4_cqe cqe = {0, 0}, *rd_cqe; + struct t4_cqe uninitialized_var(cqe), *rd_cqe; struct t4_wq *wq; u32 credit = 0; u8 cqe_flushed; -- 1.8.4 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 8/8] iw_cxgb4: Use uninitialized_var()
Le mercredi 26 mars 2014 à 18:53 +0530, Hariprasad Shenai a écrit : From: Steve Wise sw...@opengridcomputing.com What for ? Please describe the reason to use uninitialized_var() Signed-off-by: Steve Wise sw...@opengridcomputing.com --- drivers/infiniband/hw/cxgb4/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index e17b155..cfaa56a 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -672,7 +672,7 @@ skip_cqe: static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) { struct c4iw_qp *qhp = NULL; - struct t4_cqe cqe = {0, 0}, *rd_cqe; + struct t4_cqe uninitialized_var(cqe), *rd_cqe; struct t4_wq *wq; u32 credit = 0; u8 cqe_flushed; -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 8/8] iw_cxgb4: Use uninitialized_var()
-Original Message- From: linux-rdma-ow...@vger.kernel.org [mailto:linux-rdma-ow...@vger.kernel.org] On Behalf Of Yann Droneaud Sent: Wednesday, March 26, 2014 9:34 AM To: Hariprasad Shenai Cc: linux-rdma@vger.kernel.org; rol...@purestorage.com; sw...@opengridcomputing.com; kuma...@chelsio.com; nirran...@chelsio.com Subject: Re: [PATCH 8/8] iw_cxgb4: Use uninitialized_var() Le mercredi 26 mars 2014 à 18:53 +0530, Hariprasad Shenai a écrit : From: Steve Wise sw...@opengridcomputing.com What for ? Please describe the reason to use uninitialized_var() Function poll_cq() fills out var cqe. The compiler doesn't know that so either you initialize cqe unnecessarily, or use uninitized_var(). The poll path is performance-critical, and I hate to initialize variables without reason. Signed-off-by: Steve Wise sw...@opengridcomputing.com --- drivers/infiniband/hw/cxgb4/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index e17b155..cfaa56a 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -672,7 +672,7 @@ skip_cqe: static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) { struct c4iw_qp *qhp = NULL; - struct t4_cqe cqe = {0, 0}, *rd_cqe; + struct t4_cqe uninitialized_var(cqe), *rd_cqe; struct t4_wq *wq; u32 credit = 0; u8 cqe_flushed; -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] RDMA/cxgb4: set error code on kmalloc() failure
If kmalloc() fails in c4iw_alloc_ucontext(), the function leaves but does not set an error code in ret variable: it will return 0 to the caller. This patch set ret to -ENOMEM in such case. Cc: Steve Wise sw...@opengridcomputing.com Cc: Steve Wise sw...@chelsio.com Signed-off-by: Yann Droneaud ydrone...@opteya.com --- drivers/infiniband/hw/cxgb4/provider.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index e36d2a27c431..79429256023a 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -128,8 +128,10 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, rhp-rdev.flags |= T4_STATUS_PAGE_DISABLED; } else { mm = kmalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) + if (!mm) { + ret = -ENOMEM; goto err_free; + } uresp.status_page_size = PAGE_SIZE; -- 1.9.0 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
FW: [PATCH] RDMA/cxgb4: set error code on kmalloc() failure
Acked-by: Steve Wise sw...@opengridcomputing.com Note: This fix applies only to net-next because the commit that introduced this is still pending in net-next: commit 05eb23893c2cf9502a9cec0c32e7f1d1ed2895c8 Author: Steve Wise sw...@opengridcomputing.com Date: Fri Mar 14 21:52:08 2014 +0530 cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes Dave, can you please merge this? Thanks, Steve. -Original Message- From: Yann Droneaud [mailto:ydrone...@opteya.com] Sent: Wednesday, March 26, 2014 10:05 AM To: Roland Dreier; Roland Dreier Cc: Yann Droneaud; linux-rdma@vger.kernel.org; Steve Wise; Steve Wise Subject: [PATCH] RDMA/cxgb4: set error code on kmalloc() failure If kmalloc() fails in c4iw_alloc_ucontext(), the function leaves but does not set an error code in ret variable: it will return 0 to the caller. This patch set ret to -ENOMEM in such case. Cc: Steve Wise sw...@opengridcomputing.com Cc: Steve Wise sw...@chelsio.com Signed-off-by: Yann Droneaud ydrone...@opteya.com --- drivers/infiniband/hw/cxgb4/provider.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index e36d2a27c431..79429256023a 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -128,8 +128,10 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, rhp-rdev.flags |= T4_STATUS_PAGE_DISABLED; } else { mm = kmalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) + if (!mm) { + ret = -ENOMEM; goto err_free; + } uresp.status_page_size = PAGE_SIZE; -- 1.9.0 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next 2/2] cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
Le vendredi 14 mars 2014 à 21:52 +0530, Hariprasad Shenai a écrit : From: Steve Wise sw...@opengridcomputing.com [...] Signed-off-by: Steve Wise sw...@opengridcomputing.com --- drivers/infiniband/hw/cxgb4/device.c| 177 ++-- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 9 +- drivers/infiniband/hw/cxgb4/provider.c | 43 +- drivers/infiniband/hw/cxgb4/qp.c| 140 +-- drivers/infiniband/hw/cxgb4/t4.h| 6 + drivers/infiniband/hw/cxgb4/user.h | 5 + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 87 +++- drivers/net/ethernet/chelsio/cxgb4/sge.c| 8 +- 9 files changed, 286 insertions(+), 190 deletions(-) [...] diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 7e94c9a..e36d2a2 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -106,15 +106,54 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, { struct c4iw_ucontext *context; struct c4iw_dev *rhp = to_c4iw_dev(ibdev); + static int warned; + struct c4iw_alloc_ucontext_resp uresp; + int ret = 0; + struct c4iw_mm_entry *mm = NULL; PDBG(%s ibdev %p\n, __func__, ibdev); context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); + if (!context) { + ret = -ENOMEM; + goto err; + } + c4iw_init_dev_ucontext(rhp-rdev, context-uctx); INIT_LIST_HEAD(context-mmaps); spin_lock_init(context-mmap_lock); + + if (udata-outlen sizeof(uresp)) { + if (!warned++) + pr_err(MOD Warning - downlevel libcxgb4 (non-fatal), device status page disabled.); + rhp-rdev.flags |= T4_STATUS_PAGE_DISABLED; + } else { + mm = kmalloc(sizeof(*mm), GFP_KERNEL); + if (!mm) + goto err_free; + OK, that's the origin of the missing error I've noticed in my latest review on linux-next. See http://marc.info/?i=1395846311-29288-1-git-send-email-ydrone...@opteya.com http://marc.info/?i=005b01cf4907$9adfa320$d09ee960 $@opengridcomputing.com Sorry, I've missed the opportunity to report it. + uresp.status_page_size = PAGE_SIZE; + + spin_lock(context-mmap_lock); + uresp.status_page_key = context-key; + context-key += PAGE_SIZE; + spin_unlock(context-mmap_lock); + Is it really necessary to spinlock here since context is local to the function ? + ret = ib_copy_to_udata(udata, uresp, sizeof(uresp)); + if (ret) + goto err_mm; + + mm-key = uresp.status_page_key; + mm-addr = virt_to_phys(rhp-rdev.status_page); + mm-len = PAGE_SIZE; + insert_mmap(context, mm); + } return context-ibucontext; +err_mm: + kfree(mm); +err_free: + kfree(context); +err: + return ERR_PTR(ret); } [...] diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index 32b754c..11ccd27 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -70,4 +70,9 @@ struct c4iw_create_qp_resp { __u32 qid_mask; __u32 flags; }; + +struct c4iw_alloc_ucontext_resp { + __u64 status_page_key; + __u32 status_page_size; +}; If this is going to be part of the ABI, mind add an explicit padding to align the structure on 64bits. Regards -- Yann Droneaud OPTEYA -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH net-next 2/2] cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
+ uresp.status_page_size = PAGE_SIZE; + + spin_lock(context-mmap_lock); + uresp.status_page_key = context-key; + context-key += PAGE_SIZE; + spin_unlock(context-mmap_lock); + Is it really necessary to spinlock here since context is local to the function ? You're correct. [...] diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index 32b754c..11ccd27 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -70,4 +70,9 @@ struct c4iw_create_qp_resp { __u32 qid_mask; __u32 flags; }; + +struct c4iw_alloc_ucontext_resp { + __u64 status_page_key; + __u32 status_page_size; +}; If this is going to be part of the ABI, mind add an explicit padding to align the structure on 64bits. Sounds good. Can you provide patches for these? -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH for-next] IB/core: Don't resolve passive side RoCE L2 address in cma req handler
From: Moni Shoua mo...@mellanox.com The code that resolves the passive side source mac within the rdma_cm connection request handler was both redundant and buggy, remove it. It was redundant since later, when an RC QP is modified to RTR state the resolution will take place in the ib_core module. It was buggy b/c this callback also deals with UD SIDR exchange under which we wrongly looked on the REQ member of the CM event and dereferenced a random value. Signed-off-by: Moni Shoua mo...@mellanox.com Signed-off-by: Or Gerlitz ogerl...@mellanox.com --- Hi Roland, we're post 3.14-rc8 and hence I assume will go to 3.15-rc1 and once there we will post it to -stable to it gets into 3.14.y That nasty random deref wasn't causing instant crashes, but rather only when running on lengthy QA loops, so we missed it through the submission. drivers/infiniband/core/cm.c | 17 - drivers/infiniband/core/cma.c | 17 - include/rdma/ib_cm.h |1 - 3 files changed, 0 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0601b9d..c323917 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -349,23 +349,6 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, av-ah_attr); } -int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac) -{ - struct cm_id_private *cm_id_priv; - - cm_id_priv = container_of(id, struct cm_id_private, id); - - if (smac != NULL) - memcpy(cm_id_priv-av.smac, smac, sizeof(cm_id_priv-av.smac)); - - if (alt_smac != NULL) - memcpy(cm_id_priv-alt_av.smac, alt_smac, - sizeof(cm_id_priv-alt_av.smac)); - - return 0; -} -EXPORT_SYMBOL(ib_update_cm_av); - static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) { struct cm_device *cm_dev; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 199958d..7e8c3d5 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1336,28 +1336,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) ret = conn_id-id.event_handler(conn_id-id, event); if (ret) goto err3; - - if (is_iboe) { - if (ib_event-param.req_rcvd.primary_path != NULL) - rdma_addr_find_smac_by_sgid( - ib_event-param.req_rcvd.primary_path-sgid, - psmac, NULL); - else - psmac = NULL; - if (ib_event-param.req_rcvd.alternate_path != NULL) - rdma_addr_find_smac_by_sgid( - ib_event-param.req_rcvd.alternate_path-sgid, - palt_smac, NULL); - else - palt_smac = NULL; - } /* * Acquire mutex to prevent user executing rdma_destroy_id() * while we're accessing the cm_id. */ mutex_lock(lock); - if (is_iboe) - ib_update_cm_av(cm_id, psmac, palt_smac); if (cma_comp(conn_id, RDMA_CM_CONNECT) (conn_id-id.qp_type != IB_QPT_UD)) ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index f29e3a2..0e3ff30 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -601,5 +601,4 @@ struct ib_cm_sidr_rep_param { int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_param *param); -int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac); #endif /* IB_CM_H */ -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next] IB/core: Don't resolve passive side RoCE L2 address in cma req handler
Hi, Le mercredi 26 mars 2014 à 18:23 +0200, Or Gerlitz a écrit : From: Moni Shoua mo...@mellanox.com The code that resolves the passive side source mac within the rdma_cm connection request handler was both redundant and buggy, remove it. It was redundant since later, when an RC QP is modified to RTR state the resolution will take place in the ib_core module. It was buggy b/c this callback also deals with UD SIDR exchange under which we wrongly looked on the REQ member of the CM event and dereferenced a random value. Is that a revert of some patch ? Signed-off-by: Moni Shoua mo...@mellanox.com Signed-off-by: Or Gerlitz ogerl...@mellanox.com --- Hi Roland, we're post 3.14-rc8 and hence I assume will go to 3.15-rc1 and once there we will post it to -stable to it gets into 3.14.y Just add Cc: sta...@vger.kernel.org See: http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/stable_kernel_rules.txt?id=v3.14-rc8#n36 Regards. -- Yann Droneaud OPTEYA -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Kernel oops/panic with NFS over RDMA mount after disrupted Infiniband connection
On Mar 26, 2014, at 6:20 AM, rafael.reiter rafael.rei...@ims.co.at wrote: Hello, I am looking into a problem with NFS/RDMA with openSuse 12.3 and the 3.10.17 kernel. The following kernel oops, followed by a kernel panic, occurs on the client computer after 5-30 seconds when the connection between NFS server and client is disrupted (e.g. by pulling the Infiniband cable on the server or using ibportstate to disable the port on the client) and the mounted directory is accessed (cd, ls). The export on the server side is done with /data 172.16.100.0/24(rw,wdelay,crossmnt,insecure,no_root_squash,no_subtree_check,fsid=0,mountpoint) Following command is used for mounting the NFSv4 share: mount -t nfs -o port=20049,rdma,vers=4.0,timeo=900 172.16.100.2:/ /mnt/ [ 3336.995934] kernel tried to execute NX-protected page - exploit attempt? (uid: 0) [ 3337.003415] BUG: unable to handle kernel paging request at 880842900768 [ 3337.010404] IP: [880842900768] 0x880842900767 [ 3337.015658] PGD 1d7c067 PUD 85d4e1063 PMD 842f48063 PTE 800842900163 [ 3337.022420] Oops: 0011 [#1] SMP [ 3337.025681] Modules linked in: xprtrdma(O) auth_rpcgss oid_registry nfsv4 cpuid af_packet 8021q garp stp llc rdma_ucm(O) ib_ucm(O) rdma_cm(O) iw_cm(O) ib_ipoib(O) ib_cm(O) ib_uverbs(O) ib_umad(O) mlx4_en(O) mlx4_ib(O) ib_sa(O) ib_mad(O) ib_core(O) ib_addr(O) sr_mod cdrom usb_storage nvidia(PO) joydev usbhid mlx4_core(O) compat(O) adm1021 lm90 coretemp nouveau kvm_intel kvm crc32c_intel ghash_clmulni_intel aesni_intel ablk_helper cryptd lrw gf128mul glue_helper aes_x86_64 acpi_cpufreq iTCO_wdt iTCO_vendor_support microcode pcspkr ttm drm_kms_helper sb_edac edac_core isci drm i2c_i801 libsas ehci_pci ehci_hcd scsi_transport_sas mxm_wmi sg video usbcore lpc_ich ioatdma mfd_core usb_common shpchp pci_hotplug wmi mperf processor thermal_sys button edd fuse autofs4 xfs libcrc32c nfsv3 nfs fscache lockd nfs_acl sunrpc igb dca i2c_algo_bit ptp pps_core [ 3337.102467] CPU: 0 PID: 0 Comm: swapper/0 Tainted: P O 3.10.17-ims2 #2 [ 3337.109863] Hardware name: Supermicro B9DRG-E/B9DRG-E, BIOS 3.0 09/04/2013 [ 3337.116736] task: 81a11440 ti: 81a0 task.ti: 81a0 [ 3337.124218] RIP: 0010:[880842900768] [880842900768] 0x880842900767 [ 3337.131892] RSP: 0018:88087fc03e88 EFLAGS: 00010282 [ 3337.137208] RAX: 0286 RBX: 880842900768 RCX: 88085e2454a0 [ 3337.144335] RDX: 88085e2454a0 RSI: 0286 RDI: 88085e245480 [ 3337.151463] RBP: 88087fc03ea0 R08: 88085e24b170 R09: 0040 [ 3337.158588] R10: 0003 R11: dead00100100 R12: 88085e245480 [ 3337.165716] R13: 0006 R14: 0006 R15: 81a5db90 [ 3337.172842] FS: () GS:88087fc0() knlGS: [ 3337.180932] CS: 0010 DS: ES: CR0: 80050033 [ 3337.186671] CR2: 880842900768 CR3: 01a0c000 CR4: 000407f0 [ 3337.193809] DR0: DR1: DR2: [ 3337.200934] DR3: DR6: 0ff0 DR7: 0400 [ 3337.208061] Stack: [ 3337.210073] a04f7cbe a04fd388 88087fc03ec0 [ 3337.217530] 81049c82 0001 81a050b0 88087fc03f30 [ 3337.224987] 81049870 81a01fd8 0001000b940f 00202000 [ 3337.232443] Call Trace: [ 3337.234903] IRQ [ 3337.236838] [a04f7cbe] ? rpcrdma_run_tasklet+0x7e/0xc0 [xprtrdma] [ 3337.244116] [81049c82] tasklet_action+0x52/0xc0 [ 3337.249611] [81049870] __do_softirq+0xe0/0x220 [ 3337.255014] [8155cbac] call_softirq+0x1c/0x30 [ 3337.260335] [8100452d] do_softirq+0x4d/0x80 [ 3337.265470] [81049b05] irq_exit+0x95/0xa0 [ 3337.270437] [8100411e] do_IRQ+0x5e/0xd0 [ 3337.275224] [81553eaa] common_interrupt+0x6a/0x6a [ 3337.280884] EOI [ 3337.282809] [81069090] ? __hrtimer_start_range_ns+0x1c0/0x400 [ 3337.289729] [8141de86] ? cpuidle_enter_state+0x56/0xd0 [ 3337.295824] [8141de82] ? cpuidle_enter_state+0x52/0xd0 [ 3337.301928] [8141dfb6] cpuidle_idle_call+0xb6/0x200 [ 3337.307764] [8100aa39] arch_cpu_idle+0x9/0x20 [ 3337.313087] [81087cc0] cpu_startup_entry+0x80/0x200 [ 3337.318924] [815358a2] rest_init+0x72/0x80 [ 3337.323984] [81ac4e28] start_kernel+0x3b2/0x3bf [ 3337.329464] [81ac4875] ? repair_env_string+0x5e/0x5e [ 3337.335386] [81ac45a5] x86_64_start_reservations+0x2a/0x2c [ 3337.341820] [81ac4675] x86_64_start_kernel+0xce/0xd2 [ 3337.347732] Code: 88 ff ff e7 33 4f a0 ff ff ff ff 00 b0 dd 57 10 88 ff ff a9 be a9 be 10 00 00 00 00 01 00 00 00 00 00 00 20 00 00 00 00 00 00 00 d0 35 24 5e 08 88 ff ff 98 54 24 5e 08 88 ff ff
Re: [PATCH for-next] IB/core: Don't resolve passive side RoCE L2 address in cma req handler
On Wed, Mar 26, 2014 at 6:23 PM, Or Gerlitz ogerl...@mellanox.com wrote: --- Hi Roland, we're post 3.14-rc8 and hence I assume will go to 3.15-rc1 and once there we will post it to -stable to it gets into 3.14.y So we have here some warnings on unused variables, I forgot to amend that, sorry, will send V1 for that. -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next] IB/core: Don't resolve passive side RoCE L2 address in cma req handler
On Wed, Mar 26, 2014 at 7:10 PM, Yann Droneaud ydrone...@opteya.com wrote: Is that a revert of some patch ? Its not a full revents, but I will pick what you are probably suggesting and mention in the V1 change-log which commit this fixes, thanks Just add Cc: sta...@vger.kernel.org I know that but preferns to send the patch myself to the stable mailing list -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH opensm 1/2] All SA queries should validate the requester port under lock
It might collide with the sweep operations. Signed-off-by: Alex Netes ale...@mellanox.com Signed-off-by: Hal Rosenstock h...@mellanox.com --- opensm/osm_sa_guidinfo_record.c | 69 +++-- opensm/osm_sa_informinfo.c |9 +++-- opensm/osm_sa_lft_record.c |5 ++- opensm/osm_sa_link_record.c |5 ++- opensm/osm_sa_mcmember_record.c | 12 --- opensm/osm_sa_mft_record.c |5 ++- opensm/osm_sa_multipath_record.c | 33 +- opensm/osm_sa_node_record.c |5 ++- opensm/osm_sa_path_record.c |3 +- opensm/osm_sa_pkey_record.c |5 ++- opensm/osm_sa_portinfo_record.c |5 ++- opensm/osm_sa_service_record.c | 20 +- opensm/osm_sa_slvl_record.c |5 ++- opensm/osm_sa_sminfo_record.c|5 ++- opensm/osm_sa_sw_info_record.c |5 ++- opensm/osm_sa_vlarb_record.c |5 ++- 16 files changed, 120 insertions(+), 76 deletions(-) diff --git a/opensm/osm_sa_guidinfo_record.c b/opensm/osm_sa_guidinfo_record.c index 28f6c0f..1ad8cad 100644 --- a/opensm/osm_sa_guidinfo_record.c +++ b/opensm/osm_sa_guidinfo_record.c @@ -401,6 +401,7 @@ static void del_guidinfo(IN osm_sa_t *sa, IN osm_madw_t *p_madw, block_num %d is higher than Max GUID Cap block %d for port GUID 0x% PRIx64 \n, block_num, max_block, cl_ntoh64(p_port-p_physp-port_guid)); + CL_PLOCK_RELEASE(sa-p_lock); osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RECORDS); return; @@ -417,6 +418,7 @@ static void del_guidinfo(IN osm_sa_t *sa, IN osm_madw_t *p_madw, i++) { /* can't delete block 0 index 0 (base guid is RO) for alias guid table */ if (i == 0 p_sa_mad-comp_mask IB_GIR_COMPMASK_GID0) { + CL_PLOCK_RELEASE(sa-p_lock); OSM_LOG(sa-p_log, OSM_LOG_DEBUG, Not allowed to delete RO GID 0\n); osm_sa_send_error(sa, p_madw, @@ -436,6 +438,7 @@ static void del_guidinfo(IN osm_sa_t *sa, IN osm_madw_t *p_madw, p_list_item = cl_qlist_next(p_list_item); p_mcm_alias_guid = osm_mgrp_get_mcm_alias_guid(p_mcm_port-mgrp, del_alias_guid); if (p_mcm_alias_guid) { + CL_PLOCK_RELEASE(sa-p_lock); osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_DENIED); return; @@ -481,6 +484,7 @@ static void del_guidinfo(IN osm_sa_t *sa, IN osm_madw_t *p_madw, sizeof(ib_guid_info_t)); Exit: + CL_PLOCK_RELEASE(sa-p_lock); gir_respond(sa, p_madw); } @@ -504,6 +508,7 @@ static void set_guidinfo(IN osm_sa_t *sa, IN osm_madw_t *p_madw, block_num %d is higher than Max GUID Cap block %d for port GUID 0x% PRIx64 \n, block_num, max_block, cl_ntoh64(p_port-p_physp-port_guid)); + CL_PLOCK_RELEASE(sa-p_lock); osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RECORDS); return; @@ -516,6 +521,7 @@ static void set_guidinfo(IN osm_sa_t *sa, IN osm_madw_t *p_madw, GUID table memory allocation failed for port GUID 0x% PRIx64 \n, cl_ntoh64(p_port-p_physp-port_guid)); + CL_PLOCK_RELEASE(sa-p_lock); osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RESOURCES); return; @@ -539,6 +545,7 @@ static void set_guidinfo(IN osm_sa_t *sa, IN osm_madw_t *p_madw, i++) { /* can't set block 0 index 0 (base guid is RO) for alias guid table */ if (i == 0 p_sa_mad-comp_mask IB_GIR_COMPMASK_GID0) { + CL_PLOCK_RELEASE(sa-p_lock); OSM_LOG(sa-p_log, OSM_LOG_DEBUG, Not allowed to set RO GID 0\n); osm_sa_send_error(sa, p_madw, @@ -595,6 +602,7 @@ static void set_guidinfo(IN osm_sa_t *sa, IN osm_madw_t *p_madw, for (j = 0; j 1000; j++) { assigned_guid = sm_assigned_guid(sa-p_subn-opt.sm_assigned_guid); if (!assigned_guid) { + CL_PLOCK_RELEASE(sa-p_lock); OSM_LOG(sa-p_log, OSM_LOG_ERROR, ERR 510E: No more assigned guids available\n);
[PATCH opensm 2/2] osm_sa_service_record.c: Improve locking
From: Alex Netes ale...@mellanox.com Date: Tue, 25 Mar 2014 12:01:56 +0200 Read lock should be sufficient when treating GET method. Signed-off-by: Alex Netes ale...@mellanox.com Signed-off-by: Hal Rosenstock h...@mellanox.com --- opensm/osm_sa_service_record.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/opensm/osm_sa_service_record.c b/opensm/osm_sa_service_record.c index 61c5892..cce0ad5 100644 --- a/opensm/osm_sa_service_record.c +++ b/opensm/osm_sa_service_record.c @@ -454,7 +454,7 @@ static void sr_rcv_process_get_method(osm_sa_t * sa, IN osm_madw_t * p_madw) CL_ASSERT(p_madw); /* Grab the lock */ - cl_plock_excl_acquire(sa-p_lock); + cl_plock_acquire(sa-p_lock); /* update the requester physical port */ p_req_physp = osm_get_physp_by_mad_addr(sa-p_log, sa-p_subn, -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V1 for-next] IB/core: Don't resolve passive side RoCE L2 address in cma req handler
From: Moni Shoua mo...@mellanox.com The code that resolves the passive side source mac within the rdma_cm connection request handler was both redundant and buggy, remove it. It was redundant since later, when an RC QP is modified to RTR state the resolution will take place in the ib_core module. It was buggy b/c this callback also deals with UD SIDR exchange under which we wrongly looked on the REQ member of the CM event and dereferenced a random value. Signed-off-by: Moni Shoua mo...@mellanox.com Signed-off-by: Or Gerlitz ogerl...@mellanox.com --- Hi Roland, we're post 3.14-rc8 and hence I assume will go to 3.15-rc1 and once there we will post it to -stable to it gets into 3.14.y That nasty random deref wasn't causing instant crashes, but rather only when running on lengthy QA loops, so we missed it through the submission. V1 changes: removed unused variables from cma_req_handler drivers/infiniband/core/cm.c | 17 - drivers/infiniband/core/cma.c | 26 -- include/rdma/ib_cm.h |1 - 3 files changed, 0 insertions(+), 44 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0601b9d..c323917 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -349,23 +349,6 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, av-ah_attr); } -int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac) -{ - struct cm_id_private *cm_id_priv; - - cm_id_priv = container_of(id, struct cm_id_private, id); - - if (smac != NULL) - memcpy(cm_id_priv-av.smac, smac, sizeof(cm_id_priv-av.smac)); - - if (alt_smac != NULL) - memcpy(cm_id_priv-alt_av.smac, alt_smac, - sizeof(cm_id_priv-alt_av.smac)); - - return 0; -} -EXPORT_SYMBOL(ib_update_cm_av); - static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) { struct cm_device *cm_dev; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 199958d..42c3058 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1284,15 +1284,6 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event; int offset, ret; - u8 smac[ETH_ALEN]; - u8 alt_smac[ETH_ALEN]; - u8 *psmac = smac; - u8 *palt_smac = alt_smac; - int is_iboe = ((rdma_node_get_transport(cm_id-device-node_type) == - RDMA_TRANSPORT_IB) - (rdma_port_get_link_layer(cm_id-device, - ib_event-param.req_rcvd.port) == - IB_LINK_LAYER_ETHERNET)); listen_id = cm_id-context; if (!cma_check_req_qp_type(listen_id-id, ib_event)) @@ -1336,28 +1327,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) ret = conn_id-id.event_handler(conn_id-id, event); if (ret) goto err3; - - if (is_iboe) { - if (ib_event-param.req_rcvd.primary_path != NULL) - rdma_addr_find_smac_by_sgid( - ib_event-param.req_rcvd.primary_path-sgid, - psmac, NULL); - else - psmac = NULL; - if (ib_event-param.req_rcvd.alternate_path != NULL) - rdma_addr_find_smac_by_sgid( - ib_event-param.req_rcvd.alternate_path-sgid, - palt_smac, NULL); - else - palt_smac = NULL; - } /* * Acquire mutex to prevent user executing rdma_destroy_id() * while we're accessing the cm_id. */ mutex_lock(lock); - if (is_iboe) - ib_update_cm_av(cm_id, psmac, palt_smac); if (cma_comp(conn_id, RDMA_CM_CONNECT) (conn_id-id.qp_type != IB_QPT_UD)) ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index f29e3a2..0e3ff30 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -601,5 +601,4 @@ struct ib_cm_sidr_rep_param { int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_param *param); -int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac); #endif /* IB_CM_H */ -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/3] RDMA/core: iWARP Port Mapper Overview
Hello All, This patch series adds iWARP Port Mapper (IWPM) Version 2 support in RDMA/core, RDMA/nes driver and RDMA/cxgb4 driver. The iWARP Port Mapper implementation is based on the port mapper specification section in the Sockets Direct Protocol paper - http://www.rdmaconsortium.org/home/draft-pinkerton-iwarp-sdp-v1.0.pdf Existing iWARP RDMA providers use the same IP address as the native TCP/IP stack when creating RDMA connections. They need to ensure a mechanism to claim the TCP ports used for RDMA connections, thereby preventing TCP port collisions when other host applications are consuming TCP ports. The iWARP Port Mapper provides a standard mechanism to accomplish this. Without this service it is possible for RDMA application to bind/listen on the same port which is already being used by native TCP host application. If that happens the incoming TCP connection data can be passed to the RDMA stack with error. The iWARP Port Mapper solution doesn't contain any changes to the existing network stack in the kernel space. All the changes are contained with the infiniband tree and also in user space. The iWARP Port Mapper service is implemented as a user space daemon process. Interested readers could look at the source code of the IWPM service located at http://git.openfabrics.org/git?p=~tnikolova/libiwpm-1.0.0/.git;a=summary The iWARP driver (port mapper client) sends to the IWPM service the local IP address and TCP port it has received from the RDMA application, when starting a connection. The IWPM service performs a socket bind from user space to get an available TCP port, called a mapped port, and communicates it back to the client. In that sense, the IWPM service is used to map the TCP port, which the RDMA application uses to any port available from the host TCP port space. The mapped ports are used in iWARP RDMA connections to avoid collisions with native TCP stack which is aware that these ports are taken. When an RDMA connection using a mapped port is terminated, the client notifies the IWPM service, which then releases the TCP port. The message exchange between the IWPM service and the iWARP drivers (between user space and kernel space) is implemented using netlink sockets. This patch series adds iWarp driver support for sending/receiving/parsing netlink messages, to make the communication with the IWPM service possible. This is the second submission for the iWarp Port Mapper and we have considered and incorporated feedback after the first submission to improve the IWPM Version 2 patch series. The following items are improvements and fixes to IWPM V2: 1) The IWPM functionality, common for both iWarp drivers (nes and cxgb4) is refactored from the drivers source files and is moved to new shared files in infiniband/core which are compiled as part of the iw_cm module. 2) Hash table data structure is implemented to store the drivers mapping information and enable efficient searching. (Hash table implementation is based on original work by Vipul Pandya) 3) Fix for a timer bug is provided as the unnecessary timer scheduling is removed. The patches are built against Roland's infiniband tree for-next branch. Thank you, Tatyana Nikolova (2): RDMA/core: Add support for iWarp Port Mapper V2 user space service RDMA/nes: Add support for iWarp Port Mapper V2 user space service Steve Wise (1): RDMA/cxgb4: Add support for iWarp Port Mapper V2 user space service drivers/infiniband/core/Makefile |2 +- drivers/infiniband/core/cma.c |3 +- drivers/infiniband/core/iwpm_msg.c | 691 drivers/infiniband/core/iwpm_util.c| 609 drivers/infiniband/core/iwpm_util.h| 238 +++ drivers/infiniband/core/netlink.c | 18 +- drivers/infiniband/hw/cxgb4/cm.c | 180 +++-- drivers/infiniband/hw/cxgb4/device.c | 81 - drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 44 ++ drivers/infiniband/hw/nes/nes.c| 25 ++- drivers/infiniband/hw/nes/nes.h|3 + drivers/infiniband/hw/nes/nes_cm.c | 320 --- drivers/infiniband/hw/nes/nes_cm.h | 12 +- include/rdma/iw_portmap.h | 199 + include/rdma/rdma_netlink.h| 23 +- include/uapi/rdma/rdma_netlink.h | 96 +- 16 files changed, 2431 insertions(+), 113 deletions(-) create mode 100644 drivers/infiniband/core/iwpm_msg.c create mode 100644 drivers/infiniband/core/iwpm_util.c create mode 100644 drivers/infiniband/core/iwpm_util.h create mode 100644 include/rdma/iw_portmap.h -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/3] RDMA/nes: Add support for iWARP Port Mapper user space service (Version 2)
Add support for iWarp Port Mapper (Version 2) Signed-off-by: Tatyana Nikolova tatyana.e.nikol...@intel.com --- drivers/infiniband/hw/nes/nes.c| 25 +++- drivers/infiniband/hw/nes/nes.h|3 + drivers/infiniband/hw/nes/nes_cm.c | 320 +--- drivers/infiniband/hw/nes/nes_cm.h | 12 +- 4 files changed, 296 insertions(+), 64 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 353c7b0..3b2a6dc 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -68,7 +68,6 @@ MODULE_VERSION(DRV_VERSION); int max_mtu = 9000; int interrupt_mod_interval = 0; - /* Interoperability */ int mpa_version = 1; module_param(mpa_version, int, 0644); @@ -112,6 +111,16 @@ static struct pci_device_id nes_pci_table[] = { MODULE_DEVICE_TABLE(pci, nes_pci_table); +/* registered nes netlink callbacks */ +static struct ibnl_client_cbs nes_nl_cb_table[] = { + [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, + [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, + [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, + [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, + [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, + [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} +}; + static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *); static int nes_net_event(struct notifier_block *, unsigned long, void *); static int nes_notifiers_registered; @@ -672,6 +681,17 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) } nes_notifiers_registered++; + if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table)) + printk(KERN_ERR PFX %s[%u]: Failed to add netlink callback\n, + __func__, __LINE__); + + ret = iwpm_init(RDMA_NL_NES); + if (ret) { + printk(KERN_ERR PFX %s: port mapper initialization failed\n, + pci_name(pcidev)); + goto bail7; + } + INIT_DELAYED_WORK(nesdev-work, nes_recheck_link_status); /* Initialize network devices */ @@ -710,6 +730,7 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) nes_debug(NES_DBG_INIT, netdev_count=%d, nesadapter-netdev_count=%d\n, nesdev-netdev_count, nesdev-nesadapter-netdev_count); + ibnl_remove_client(RDMA_NL_NES); nes_notifiers_registered--; if (nes_notifiers_registered == 0) { @@ -773,6 +794,8 @@ static void nes_remove(struct pci_dev *pcidev) nesdev-nesadapter-netdev_count--; } } + ibnl_remove_client(RDMA_NL_NES); + iwpm_exit(RDMA_NL_NES); nes_notifiers_registered--; if (nes_notifiers_registered == 0) { diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 33cc589..bd9d132 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -51,6 +51,8 @@ #include rdma/ib_pack.h #include rdma/rdma_cm.h #include rdma/iw_cm.h +#include rdma/rdma_netlink.h +#include rdma/iw_portmap.h #define NES_SEND_FIRST_WRITE @@ -130,6 +132,7 @@ #define NES_DBG_IW_TX 0x0004 #define NES_DBG_SHUTDOWN0x0008 #define NES_DBG_PAU 0x0010 +#define NES_DBG_NLMSG 0x0020 #define NES_DBG_RSVD1 0x1000 #define NES_DBG_RSVD2 0x2000 #define NES_DBG_RSVD3 0x4000 diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index dfa9df4..6f09a72 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -59,6 +59,7 @@ #include net/route.h #include net/ip_fib.h #include net/tcp.h +#include linux/fcntl.h #include nes.h @@ -166,7 +167,6 @@ int nes_rem_ref_cm_node(struct nes_cm_node *cm_node) { return rem_ref_cm_node(cm_node-cm_core, cm_node); } - /** * create_event */ @@ -482,11 +482,11 @@ static void form_cm_frame(struct sk_buff *skb, iph-ttl = 0x40; iph-protocol = 0x06; /* IPPROTO_TCP */ - iph-saddr = htonl(cm_node-loc_addr); - iph-daddr = htonl(cm_node-rem_addr); + iph-saddr = htonl(cm_node-mapped_loc_addr); + iph-daddr = htonl(cm_node-mapped_rem_addr); - tcph-source = htons(cm_node-loc_port); - tcph-dest = htons(cm_node-rem_port); + tcph-source = htons(cm_node-mapped_loc_port); + tcph-dest = htons(cm_node-mapped_rem_port);