Re: [PATCH 2/4] cxgb4/iw_cxgb4: use firmware ord/ird resource limits

2014-07-13 Thread Yann Droneaud
Hi,

Le vendredi 11 juillet 2014 à 20:44 +0530, Hariprasad Shenai a écrit :
> diff --git a/drivers/infiniband/hw/cxgb4/qp.c
> b/drivers/infiniband/hw/cxgb4/qp.c
> index 6f74e0e..c911e96 100644
> --- a/drivers/infiniband/hw/cxgb4/qp.c
> +++ b/drivers/infiniband/hw/cxgb4/qp.c
> @@ -58,6 +58,30 @@ static int max_fr_immd = T4_MAX_FR_IMMD;
>  module_param(max_fr_immd, int, 0644);
>  MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL
> instead of immedate");
>  
> +static int alloc_ird(struct c4iw_dev *dev, u32 ird)
> +{
> +   int ret;
> +
> +   spin_lock_irq(&dev->lock);
> +   if (ird <= dev->avail_ird) {
> +   dev->avail_ird -= ird;
> +   ret = 0;
> +   } else {
> +   ret = -ENOMEM;
> +   pr_info("%s: device IRD resources exhausted\n",
> +   pci_name(dev->rdev.lldi.pdev));
> +   }
> +   spin_unlock_irq(&dev->lock);
> +   return ret;
> +}
> +

I would have written this function has:

static int alloc_ird(struct c4iw_dev *dev, u32 ird)
{
int ret = 0

spin_lock_irq(&dev->lock);
if (ird <= dev->avail_ird)
dev->avail_ird -= ird;
else
ret = -ENOMEM;
spin_unlock_irq(&dev->lock);

if (ret)
dev_warn(dev->rdev.lldi.pdev,
 "device IRD resources exhausted\n");

return ret;
}

Regards.

-- 
Yann Droneaud
OPTEYA


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] cxgb4/iw_cxgb4: use firmware ord/ird resource limits

2014-07-11 Thread Hariprasad Shenai
Advertise a larger max read queue depth for qps, and gather the resource limits
from fw and use them to avoid exhaustinq all the resources.

Design:

cxgb4:

Obtain the max_ordird_qp and max_ird_adapter device params from FW
at init time and pass them up to the ULDs when they attach.  If these
parameters are not available, due to older firmware, then hard-code
the values based on the known values for older firmware.
iw_cxgb4:

Fix the c4iw_query_device() to report these correct values based on
adapter parameters.  ibv_query_device() will always return:

max_qp_rd_atom = max_qp_init_rd_atom = min(module_max, max_ordird_qp)
max_res_rd_atom = max_ird_adapter

Bump up the per qp max module option to 32, allowing it to be increased
by the user up to the device max of max_ordird_qp.  32 seems to be
sufficient to maximize throughput for streaming read benchmarks.

Fail connection setup if the negotiated IRD exhausts the available
adapter ird resources.  So the driver will track the amount of ird
resource in use and not send an RI_WR/INIT to FW that would reduce the
available ird resources below zero.

Signed-off-by: Steve Wise 
Signed-off-by: Hariprasad Shenai 
---
 drivers/infiniband/hw/cxgb4/cm.c|   80 ---
 drivers/infiniband/hw/cxgb4/device.c|2 +
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h  |9 ++-
 drivers/infiniband/hw/cxgb4/provider.c  |6 +-
 drivers/infiniband/hw/cxgb4/qp.c|   53 +--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |3 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |   18 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h  |2 +
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h   |2 +
 9 files changed, 141 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index d62a0f9..df5bd3d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -79,9 +79,10 @@ static int dack_mode = 1;
 module_param(dack_mode, int, 0644);
 MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
 
-int c4iw_max_read_depth = 8;
+uint c4iw_max_read_depth = 32;
 module_param(c4iw_max_read_depth, int, 0644);
-MODULE_PARM_DESC(c4iw_max_read_depth, "Per-connection max ORD/IRD 
(default=8)");
+MODULE_PARM_DESC(c4iw_max_read_depth,
+"Per-connection max ORD/IRD (default=32)");
 
 static int enable_tcp_timestamps;
 module_param(enable_tcp_timestamps, int, 0644);
@@ -813,6 +814,8 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff 
*skb,
if (mpa_rev_to_use == 2) {
mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
   sizeof (struct 
mpa_v2_conn_params));
+   PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
+ep->ord);
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
 
@@ -1182,8 +1185,8 @@ static int connect_request_upcall(struct c4iw_ep *ep)
sizeof(struct mpa_v2_conn_params);
} else {
/* this means MPA_v1 is used. Send max supported */
-   event.ord = c4iw_max_read_depth;
-   event.ird = c4iw_max_read_depth;
+   event.ord = cur_max_read_depth(ep->com.dev);
+   event.ird = cur_max_read_depth(ep->com.dev);
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
}
@@ -1247,6 +1250,8 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 
credits)
return credits;
 }
 
+#define RELAXED_IRD_NEGOTIATION 1
+
 static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
 {
struct mpa_message *mpa;
@@ -1358,17 +1363,33 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct 
sk_buff *skb)
MPA_V2_IRD_ORD_MASK;
resp_ord = ntohs(mpa_v2_params->ord) &
MPA_V2_IRD_ORD_MASK;
+   PDBG("%s responder ird %u ord %u ep ird %u ord %u\n",
+__func__, resp_ird, resp_ord, ep->ird, ep->ord);
 
/*
 * This is a double-check. Ideally, below checks are
 * not required since ird/ord stuff has been taken
 * care of in c4iw_accept_cr
 */
-   if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
+   if (ep->ird < resp_ord) {
+   if (RELAXED_IRD_NEGOTIATION && resp_ord <=
+   ep->com.dev->rdev.lldi.max_ordird_qp)
+   ep->ird = resp_ord;
+   else
+   insuff_ird = 1;
+   } else if (ep