[PATCH WIP 11/43] xprtrdma, svcrdma: Convert to ib_alloc_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 net/sunrpc/xprtrdma/frwr_ops.c   | 6 +++---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 63f282e..517efed 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -117,7 +117,7 @@ __frwr_recovery_worker(struct work_struct *work)
if (ib_dereg_mr(r->r.frmr.fr_mr))
goto out_fail;
 
-   r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth);
+   r->r.frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_FAST_REG, depth, 0);
if (IS_ERR(r->r.frmr.fr_mr))
goto out_fail;
 
@@ -148,7 +148,7 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct 
ib_device *device,
struct rpcrdma_frmr *f = &r->r.frmr;
int rc;
 
-   f->fr_mr = ib_alloc_fast_reg_mr(pd, depth);
+   f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_FAST_REG, depth, 0);
if (IS_ERR(f->fr_mr))
goto out_mr_err;
f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
@@ -158,7 +158,7 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct 
ib_device *device,
 
 out_mr_err:
rc = PTR_ERR(f->fr_mr);
-   dprintk("RPC:   %s: ib_alloc_fast_reg_mr status %i\n",
+   dprintk("RPC:   %s: ib_alloc_mr status %i\n",
__func__, rc);
return rc;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c 
b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 6b36279..fd933d9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -738,7 +738,7 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct 
svcxprt_rdma *xprt)
if (!frmr)
goto err;
 
-   mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
+   mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_FAST_REG, RPCSVC_MAXPAGES, 0);
if (IS_ERR(mr))
goto err_free_frmr;
 
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 12/43] RDS: Convert to ib_alloc_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 net/rds/iw_rdma.c | 5 +++--
 net/rds/iw_send.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index dba8d08..dac0131 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -667,11 +667,12 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool 
*pool,
struct ib_mr *mr;
int err;
 
-   mr = ib_alloc_fast_reg_mr(rds_iwdev->pd, pool->max_message_size);
+   mr = ib_alloc_mr(rds_iwdev->pd, IB_MR_TYPE_FAST_REG,
+pool->max_message_size, 0);
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
 
-   printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed 
(err=%d)\n", err);
+   printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed (err=%d)\n", 
err);
return err;
}
 
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 334fe98..0d8e74b 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -153,9 +153,10 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
sge->length = sizeof(struct rds_header);
sge->lkey = 0;
 
-   send->s_mr = ib_alloc_fast_reg_mr(ic->i_pd, 
fastreg_message_size);
+   send->s_mr = ib_alloc_mr(ic->i_pd, IB_MR_TYPE_FAST_REG,
+fastreg_message_size, 0);
if (IS_ERR(send->s_mr)) {
-   printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr 
failed\n");
+   printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
break;
}
 
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 20/43] IB/core: Drop ib_alloc_fast_reg_mr

2015-07-21 Thread Sagi Grimberg
Fully replaced by a more generic and suitable
ib_alloc_mr

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/core/verbs.c | 21 -
 include/rdma/ib_verbs.h | 11 ---
 2 files changed, 32 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 23d73bd..beed431 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1265,27 +1265,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
 }
 EXPORT_SYMBOL(ib_alloc_mr);
 
-struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
-{
-   struct ib_mr *mr;
-
-   if (!pd->device->alloc_fast_reg_mr)
-   return ERR_PTR(-ENOSYS);
-
-   mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len);
-
-   if (!IS_ERR(mr)) {
-   mr->device  = pd->device;
-   mr->pd  = pd;
-   mr->uobject = NULL;
-   atomic_inc(&pd->usecnt);
-   atomic_set(&mr->usecnt, 0);
-   }
-
-   return mr;
-}
-EXPORT_SYMBOL(ib_alloc_fast_reg_mr);
-
 struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device 
*device,
  int max_page_list_len)
 {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5ec9a70..7a93e2d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1661,8 +1661,6 @@ struct ib_device {
   enum ib_mr_type mr_type,
   u32 max_entries,
   u32 flags);
-   struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
-  int max_page_list_len);
struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct 
ib_device *device,
   int 
page_list_len);
void   (*free_fast_reg_page_list)(struct 
ib_fast_reg_page_list *page_list);
@@ -2803,15 +2801,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
  u32 flags);
 
 /**
- * ib_alloc_fast_reg_mr - Allocates memory region usable with the
- *   IB_WR_FAST_REG_MR send work request.
- * @pd: The protection domain associated with the region.
- * @max_page_list_len: requested max physical buffer list length to be
- *   used with fast register work requests for this MR.
- */
-struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
-
-/**
  * ib_alloc_fast_reg_page_list - Allocates a page list array
  * @device - ib device pointer.
  * @page_list_len - size of the page list array to be allocated.
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 17/43] nes: Drop nes_alloc_fast_reg_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/nes/nes_verbs.c | 66 ---
 1 file changed, 66 deletions(-)

diff --git a/drivers/infiniband/hw/nes/nes_verbs.c 
b/drivers/infiniband/hw/nes/nes_verbs.c
index ac63763..752e6ea 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -447,71 +447,6 @@ static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd,
 }
 
 /*
- * nes_alloc_fast_reg_mr
- */
-static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int 
max_page_list_len)
-{
-   struct nes_pd *nespd = to_nespd(ibpd);
-   struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
-   struct nes_device *nesdev = nesvnic->nesdev;
-   struct nes_adapter *nesadapter = nesdev->nesadapter;
-
-   u32 next_stag_index;
-   u8 stag_key = 0;
-   u32 driver_key = 0;
-   int err = 0;
-   u32 stag_index = 0;
-   struct nes_mr *nesmr;
-   u32 stag;
-   int ret;
-   struct ib_mr *ibmr;
-/*
- * Note:  Set to always use a fixed length single page entry PBL.  This is to 
allow
- *  for the fast_reg_mr operation to always know the size of the PBL.
- */
-   if (max_page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
-   return ERR_PTR(-E2BIG);
-
-   get_random_bytes(&next_stag_index, sizeof(next_stag_index));
-   stag_key = (u8)next_stag_index;
-   next_stag_index >>= 8;
-   next_stag_index %= nesadapter->max_mr;
-
-   err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
-nesadapter->max_mr, &stag_index,
-&next_stag_index, NES_RESOURCE_FAST_MR);
-   if (err)
-   return ERR_PTR(err);
-
-   nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
-   if (!nesmr) {
-   nes_free_resource(nesadapter, nesadapter->allocated_mrs, 
stag_index);
-   return ERR_PTR(-ENOMEM);
-   }
-
-   stag = stag_index << 8;
-   stag |= driver_key;
-   stag += (u32)stag_key;
-
-   nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n",
- stag, stag_index);
-
-   ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_page_list_len);
-
-   if (ret == 0) {
-   nesmr->ibmr.rkey = stag;
-   nesmr->ibmr.lkey = stag;
-   nesmr->mode = IWNES_MEMREG_TYPE_FMEM;
-   ibmr = &nesmr->ibmr;
-   } else {
-   kfree(nesmr);
-   nes_free_resource(nesadapter, nesadapter->allocated_mrs, 
stag_index);
-   ibmr = ERR_PTR(-ENOMEM);
-   }
-   return ibmr;
-}
-
-/*
  * nes_alloc_fast_reg_page_list
  */
 static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list(
@@ -4002,7 +3937,6 @@ struct nes_ib_device *nes_init_ofa_device(struct 
net_device *netdev)
nesibdev->ibdev.bind_mw = nes_bind_mw;
 
nesibdev->ibdev.alloc_mr = nes_alloc_mr;
-   nesibdev->ibdev.alloc_fast_reg_mr = nes_alloc_fast_reg_mr;
nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list;
nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list;
 
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 16/43] qib: Drop qib_alloc_fast_reg_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/qib/qib_mr.c| 17 -
 drivers/infiniband/hw/qib/qib_verbs.c |  1 -
 drivers/infiniband/hw/qib/qib_verbs.h |  2 --
 3 files changed, 20 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib_mr.c 
b/drivers/infiniband/hw/qib/qib_mr.c
index 1522255..2a4afea 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -344,23 +344,6 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
return &mr->ibmr;
 }
 
-/*
- * Allocate a memory region usable with the
- * IB_WR_FAST_REG_MR send work request.
- *
- * Return the memory region on success, otherwise return an errno.
- */
-struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
-{
-   struct qib_mr *mr;
-
-   mr = alloc_mr(max_page_list_len, pd);
-   if (IS_ERR(mr))
-   return (struct ib_mr *)mr;
-
-   return &mr->ibmr;
-}
-
 struct ib_fast_reg_page_list *
 qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
 {
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c 
b/drivers/infiniband/hw/qib/qib_verbs.c
index 323666b..ef022a1 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -2236,7 +2236,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->reg_user_mr = qib_reg_user_mr;
ibdev->dereg_mr = qib_dereg_mr;
ibdev->alloc_mr = qib_alloc_mr;
-   ibdev->alloc_fast_reg_mr = qib_alloc_fast_reg_mr;
ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list;
ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list;
ibdev->alloc_fmr = qib_alloc_fmr;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h 
b/drivers/infiniband/hw/qib/qib_verbs.h
index 034510c..8fbd995 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1037,8 +1037,6 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
   u32 max_entries,
   u32 flags);
 
-struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
-
 struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list(
struct ib_device *ibdev, int page_list_len);
 
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 26/43] qib: Allocate a private page list in ib_alloc_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/qib/qib_mr.c| 9 +
 drivers/infiniband/hw/qib/qib_verbs.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/drivers/infiniband/hw/qib/qib_mr.c 
b/drivers/infiniband/hw/qib/qib_mr.c
index 2a4afea..a58a347 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -303,6 +303,7 @@ int qib_dereg_mr(struct ib_mr *ibmr)
int ret = 0;
unsigned long timeout;
 
+   kfree(mr->pl);
qib_free_lkey(&mr->mr);
 
qib_put_mr(&mr->mr); /* will set completion if last */
@@ -341,7 +342,15 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
if (IS_ERR(mr))
return (struct ib_mr *)mr;
 
+   mr->pl = kcalloc(max_entries, sizeof(u64), GFP_KERNEL);
+   if (!mr->pl)
+   goto err;
+
return &mr->ibmr;
+
+err:
+   qib_dereg_mr(&mr->ibmr);
+   return ERR_PTR(-ENOMEM);
 }
 
 struct ib_fast_reg_page_list *
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h 
b/drivers/infiniband/hw/qib/qib_verbs.h
index 8fbd995..c8062ae 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -330,6 +330,8 @@ struct qib_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct qib_mregion mr;  /* must be last */
+   u64 *pl;
+   u32 npages;
 };
 
 /*
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 22/43] mlx4: Allocate a private page list in ib_alloc_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/mlx4/mlx4_ib.h |  5 
 drivers/infiniband/hw/mlx4/mr.c  | 52 +---
 2 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h 
b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 9220faf..a9a4a7f 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -120,6 +120,11 @@ struct mlx4_ib_mr {
struct ib_mribmr;
struct mlx4_mr  mmr;
struct ib_umem *umem;
+   u64 *pl;
+   __be64  *mpl;
+   dma_addr_t  pl_map;
+   u32 npages;
+   u32 max_pages;
 };
 
 struct mlx4_ib_mw {
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 121ee7f..01e16bc 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -271,11 +271,50 @@ release_mpt_entry:
return err;
 }
 
+static int
+mlx4_alloc_page_list(struct ib_device *device,
+struct mlx4_ib_mr *mr,
+ int max_entries)
+{
+   int size = max_entries * sizeof (u64);
+
+   mr->pl = kcalloc(max_entries, sizeof(u64), GFP_KERNEL);
+   if (!mr->pl)
+   return -ENOMEM;
+
+   mr->mpl = dma_alloc_coherent(device->dma_device, size,
+&mr->pl_map, GFP_KERNEL);
+   if (!mr->mpl)
+   goto err;
+
+   return 0;
+err:
+   kfree(mr->pl);
+
+   return -ENOMEM;
+}
+
+static void
+mlx4_free_page_list(struct mlx4_ib_mr *mr)
+{
+   struct ib_device *device = mr->ibmr.device;
+   int size = mr->max_pages * sizeof(u64);
+
+   kfree(mr->pl);
+   if (mr->mpl)
+   dma_free_coherent(device->dma_device, size,
+ mr->mpl, mr->pl_map);
+   mr->pl = NULL;
+   mr->mpl = NULL;
+}
+
 int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
 {
struct mlx4_ib_mr *mr = to_mmr(ibmr);
int ret;
 
+   mlx4_free_page_list(mr);
+
ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
if (ret)
return ret;
@@ -371,18 +410,25 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
if (err)
goto err_free;
 
+   err = mlx4_alloc_page_list(pd->device, mr, max_entries);
+   if (err)
+   goto err_free_mr;
+
+   mr->max_pages = max_entries;
+
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
-   goto err_mr;
+   goto err_free_pl;
 
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
mr->umem = NULL;
 
return &mr->ibmr;
 
-err_mr:
+err_free_pl:
+   mlx4_free_page_list(mr);
+err_free_mr:
(void) mlx4_mr_free(dev->dev, &mr->mmr);
-
 err_free:
kfree(mr);
return ERR_PTR(err);
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 21/43] mlx5: Allocate a private page list in ib_alloc_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  5 
 drivers/infiniband/hw/mlx5/mr.c  | 45 
 2 files changed, 50 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index c2916f1..df5e959 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -315,6 +315,11 @@ enum mlx5_ib_mtt_access_flags {
 
 struct mlx5_ib_mr {
struct ib_mribmr;
+   u64 *pl;
+   __be64  *mpl;
+   dma_addr_t  pl_map;
+   int ndescs;
+   int max_descs;
struct mlx5_core_mr mmr;
struct ib_umem *umem;
struct mlx5_shared_mr_info  *smr_info;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index c8de302..1075065 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1167,6 +1167,42 @@ error:
return err;
 }
 
+static int
+mlx5_alloc_page_list(struct ib_device *device,
+struct mlx5_ib_mr *mr, int ndescs)
+{
+   int size = ndescs * sizeof(u64);
+
+   mr->pl = kcalloc(ndescs, sizeof(u64), GFP_KERNEL);
+   if (!mr->pl)
+   return -ENOMEM;
+
+   mr->mpl = dma_alloc_coherent(device->dma_device, size,
+&mr->pl_map, GFP_KERNEL);
+   if (!mr->mpl)
+   goto err;
+
+   return 0;
+err:
+   kfree(mr->pl);
+
+   return -ENOMEM;
+}
+
+static void
+mlx5_free_page_list(struct mlx5_ib_mr *mr)
+{
+   struct ib_device *device = mr->ibmr.device;
+   int size = mr->max_descs * sizeof(u64);
+
+   kfree(mr->pl);
+   if (mr->mpl)
+   dma_free_coherent(device->dma_device, size,
+ mr->mpl, mr->pl_map);
+   mr->pl = NULL;
+   mr->mpl = NULL;
+}
+
 static int clean_mr(struct mlx5_ib_mr *mr)
 {
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
@@ -1186,6 +1222,8 @@ static int clean_mr(struct mlx5_ib_mr *mr)
mr->sig = NULL;
}
 
+   mlx5_free_page_list(mr);
+
if (!umred) {
err = destroy_mkey(dev, mr);
if (err) {
@@ -1279,6 +1317,12 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
if (mr_type == IB_MR_TYPE_FAST_REG) {
access_mode = MLX5_ACCESS_MODE_MTT;
in->seg.log2_page_size = PAGE_SHIFT;
+
+   err = mlx5_alloc_page_list(pd->device, mr, ndescs);
+   if (err)
+   goto err_free_in;
+
+   mr->max_descs = ndescs;
} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
u32 psv_index[2];
 
@@ -1335,6 +1379,7 @@ err_destroy_psv:
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
 mr->sig->psv_wire.psv_idx);
}
+   mlx5_free_page_list(mr);
 err_free_sig:
kfree(mr->sig);
 err_free_in:
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 34/43] nes: Support the new memory registration API

2015-07-21 Thread Sagi Grimberg
Just duplicated the functions to take the needed
arguments from the private MR context. The old
fast_reg routines will be dropped later.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/nes/nes_verbs.c | 85 +++
 1 file changed, 85 insertions(+)

diff --git a/drivers/infiniband/hw/nes/nes_verbs.c 
b/drivers/infiniband/hw/nes/nes_verbs.c
index 532496d..d5d8b01 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -465,6 +465,17 @@ err:
return ERR_PTR(-ENOMEM);
 }
 
+static int nes_map_mr_sg(struct ib_mr *ibmr,
+struct scatterlist *sg,
+unsigned short sg_nents)
+{
+   struct nes_mr *nesmr = to_nesmr(ibmr);
+
+   return ib_sg_to_pages(sg, sg_nents, nesmr->max_pages,
+ nesmr->pl, &nesmr->npages,
+ &ibmr->length, &ibmr->iova);
+}
+
 /*
  * nes_alloc_fast_reg_page_list
  */
@@ -3537,6 +3548,79 @@ static int nes_post_send(struct ib_qp *ibqp, struct 
ib_send_wr *ib_wr,
  wqe_misc);
break;
}
+   case IB_WR_FASTREG_MR:
+   {
+   int i;
+   struct nes_mr *mr = to_nesmr(ib_wr->wr.fastreg.mr);
+   int flags = mr->ibmr.access;
+   u64 *src_page_list = mr->pl;
+   u64 *dst_page_list = mr->mpl;
+
+   if (mr->npages > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) 
{
+   nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad 
page_list_len\n");
+   err = -EINVAL;
+   break;
+   }
+   wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
+   set_wqe_64bit_value(wqe->wqe_words,
+   NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
+   mr->ibmr.iova);
+   set_wqe_32bit_value(wqe->wqe_words,
+   NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
+   mr->ibmr.length);
+   set_wqe_32bit_value(wqe->wqe_words,
+   
NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
+   set_wqe_32bit_value(wqe->wqe_words,
+   NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
+   ib_wr->wr.fastreg.key);
+
+   /* Set page size: currently only 4K*/
+   if (ib_wr->wr.fast_reg.page_shift == 12) {
+   wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
+   } else {
+   nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
+ " ib_wr=%u, max=1\n", ib_wr->num_sge);
+   err = -EINVAL;
+   break;
+   }
+
+   /* Set access_flags */
+   wqe_misc |= 
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
+   if (flags & IB_ACCESS_LOCAL_WRITE)
+   wqe_misc |= 
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE;
+
+   if (flags & IB_ACCESS_REMOTE_WRITE)
+   wqe_misc |= 
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE;
+
+   if (flags & IB_ACCESS_REMOTE_READ)
+   wqe_misc |= 
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ;
+
+   if (flags & IB_ACCESS_MW_BIND)
+   wqe_misc |= 
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
+
+   /* Fill in PBL info: */
+   set_wqe_64bit_value(wqe->wqe_words,
+   
NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
+   mr->mpl_addr);
+
+   set_wqe_32bit_value(wqe->wqe_words,
+   NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
+   mr->npages * 8);
+
+   for (i = 0; i < mr->npages; i++)
+   dst_page_list[i] = 
cpu_to_le64(src_page_list[i]);
+
+   nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, "
+ "length: %d, rkey: %0x, pgl_paddr: %llx, "
+ "page_list_len: %u, wqe_misc: %x\n",
+ (unsigned long long) mr->ibmr.iova,
+ mr->ibmr.length

[PATCH WIP 25/43] cxgb4: Allocate a private page list in ib_alloc_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |  4 
 drivers/infiniband/hw/cxgb4/mem.c  | 15 +++
 2 files changed, 19 insertions(+)

diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h 
b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 886be9c..e529ace 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -386,6 +386,10 @@ struct c4iw_mr {
struct c4iw_dev *rhp;
u64 kva;
struct tpt_attributes attr;
+   u64 *mpl;
+   dma_addr_t mpl_addr;
+   u32 max_mpl_len;
+   u32 mpl_len;
 };
 
 static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr)
diff --git a/drivers/infiniband/hw/cxgb4/mem.c 
b/drivers/infiniband/hw/cxgb4/mem.c
index 5ecf4aa..91aedce 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -864,6 +864,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
u32 mmid;
u32 stag = 0;
int ret = 0;
+   int length = roundup(max_entries * sizeof(u64), 32);
 
if (mr_type != IB_MR_TYPE_FAST_REG || flags)
return ERR_PTR(-EINVAL);
@@ -876,6 +877,14 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
goto err;
}
 
+   mhp->mpl = dma_alloc_coherent(&rhp->rdev.lldi.pdev->dev,
+ length, &mhp->mpl_addr, GFP_KERNEL);
+   if (!mhp->mpl) {
+   ret = -ENOMEM;
+   goto err_mpl;
+   }
+   mhp->max_mpl_len = length;
+
mhp->rhp = rhp;
ret = alloc_pbl(mhp, max_entries);
if (ret)
@@ -905,6 +914,9 @@ err2:
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
  mhp->attr.pbl_size << 3);
 err1:
+   dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
+ mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
+err_mpl:
kfree(mhp);
 err:
return ERR_PTR(ret);
@@ -970,6 +982,9 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
rhp = mhp->rhp;
mmid = mhp->attr.stag >> 8;
remove_handle(rhp, &rhp->mmidr, mmid);
+   if (mhp->mpl)
+   dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
+ mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
   mhp->attr.pbl_addr);
if (mhp->attr.pbl_size)
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 35/43] qib: Support the new memory registration API

2015-07-21 Thread Sagi Grimberg
Just duplicated the functions to take the needed
arguments from the private MR context. The old
fast_reg routines will be dropped later.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/qib/qib_keys.c  | 56 +++
 drivers/infiniband/hw/qib/qib_mr.c| 11 +++
 drivers/infiniband/hw/qib/qib_verbs.c |  6 +++-
 drivers/infiniband/hw/qib/qib_verbs.h |  5 
 4 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qib/qib_keys.c 
b/drivers/infiniband/hw/qib/qib_keys.c
index ad843c7..557e6c2 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -385,3 +385,59 @@ bail:
spin_unlock_irqrestore(&rkt->lock, flags);
return ret;
 }
+
+/*
+ * Initialize the memory region specified by the work reqeust.
+ */
+int qib_fastreg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
+{
+   struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
+   struct qib_pd *pd = to_ipd(qp->ibqp.pd);
+   struct qib_mr *mr = to_imr(wr->wr.fastreg.mr);
+   struct qib_mregion *mrg;
+   u32 key = wr->wr.fastreg.key;
+   unsigned i, n, m;
+   int ret = -EINVAL;
+   unsigned long flags;
+   u64 *page_list;
+   size_t ps;
+
+   spin_lock_irqsave(&rkt->lock, flags);
+   if (pd->user || key == 0)
+   goto bail;
+
+   mrg = rcu_dereference_protected(
+   rkt->table[(key >> (32 - ib_qib_lkey_table_size))],
+   lockdep_is_held(&rkt->lock));
+   if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd))
+   goto bail;
+
+   if (mr->npages > mrg->max_segs)
+   goto bail;
+
+   ps = 1UL << PAGE_SHIFT;
+   if (mr->ibmr.length > ps * mr->npages)
+   goto bail;
+
+   mrg->user_base = mr->ibmr.iova;
+   mrg->iova = mr->ibmr.iova;
+   mrg->lkey = key;
+   mrg->length = mr->ibmr.length;
+   mrg->access_flags = mr->ibmr.access;
+   page_list = mr->pl;
+   m = 0;
+   n = 0;
+   for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
+   mrg->map[m]->segs[n].vaddr = (void *) page_list[i];
+   mrg->map[m]->segs[n].length = ps;
+   if (++n == QIB_SEGSZ) {
+   m++;
+   n = 0;
+   }
+   }
+
+   ret = 0;
+bail:
+   spin_unlock_irqrestore(&rkt->lock, flags);
+   return ret;
+}
diff --git a/drivers/infiniband/hw/qib/qib_mr.c 
b/drivers/infiniband/hw/qib/qib_mr.c
index a58a347..a4986f0 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -353,6 +353,17 @@ err:
return ERR_PTR(-ENOMEM);
 }
 
+int qib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ unsigned short sg_nents)
+{
+   struct qib_mr *mr = to_imr(ibmr);
+
+   return ib_sg_to_pages(sg, sg_nents, mr->mr.max_segs,
+ mr->pl, &mr->npages,
+ &ibmr->length, &ibmr->iova);
+}
+
 struct ib_fast_reg_page_list *
 qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
 {
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c 
b/drivers/infiniband/hw/qib/qib_verbs.c
index ef022a1..8561f90 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -361,7 +361,10 @@ static int qib_post_one_send(struct qib_qp *qp, struct 
ib_send_wr *wr,
 * undefined operations.
 * Make sure buffer is large enough to hold the result for atomics.
 */
-   if (wr->opcode == IB_WR_FAST_REG_MR) {
+   if (wr->opcode == IB_WR_FASTREG_MR) {
+   if (qib_fastreg_mr(qp, wr))
+   goto bail_inval;
+   } else if (wr->opcode == IB_WR_FAST_REG_MR) {
if (qib_fast_reg_mr(qp, wr))
goto bail_inval;
} else if (qp->ibqp.qp_type == IB_QPT_UC) {
@@ -2236,6 +2239,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->reg_user_mr = qib_reg_user_mr;
ibdev->dereg_mr = qib_dereg_mr;
ibdev->alloc_mr = qib_alloc_mr;
+   ibdev->map_mr_sg = qib_map_mr_sg;
ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list;
ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list;
ibdev->alloc_fmr = qib_alloc_fmr;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h 
b/drivers/infiniband/hw/qib/qib_verbs.h
index c8062ae..c7a3af5 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1039,12 +1039,17 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
   u32 max_entries,
   u32 flags);
 
+int qib_map_mr_sg(struct ib_mr *ibmr,

[PATCH WIP 23/43] ocrdma: Allocate a private page list in ib_alloc_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/ocrdma/ocrdma.h   | 2 ++
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 9 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h 
b/drivers/infiniband/hw/ocrdma/ocrdma.h
index b396344..37deea2 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -178,6 +178,8 @@ struct ocrdma_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct ocrdma_hw_mr hwmr;
+   u64 *pl;
+   u32 npages;
 };
 
 struct ocrdma_stats {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index fb97db1..a764cb9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -957,6 +957,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
 
(void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
 
+   kfree(mr->pl);
ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
 
/* it could be user registered memory. */
@@ -3003,6 +3004,12 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
if (!mr)
return ERR_PTR(-ENOMEM);
 
+   mr->pl = kcalloc(max_entries, sizeof(u64), GFP_KERNEL);
+   if (!mr->pl) {
+   status = -ENOMEM;
+   goto pl_err;
+   }
+
status = ocrdma_get_pbl_info(dev, mr, max_entries);
if (status)
goto pbl_err;
@@ -3026,6 +3033,8 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
 mbx_err:
ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
 pbl_err:
+   kfree(mr->pl);
+pl_err:
kfree(mr);
return ERR_PTR(-ENOMEM);
 }
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 27/43] nes: Allocate a private page list in ib_alloc_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/nes/nes_verbs.c | 27 +++
 drivers/infiniband/hw/nes/nes_verbs.h |  5 +
 2 files changed, 32 insertions(+)

diff --git a/drivers/infiniband/hw/nes/nes_verbs.c 
b/drivers/infiniband/hw/nes/nes_verbs.c
index 752e6ea..532496d 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -51,6 +51,7 @@ atomic_t qps_created;
 atomic_t sw_qps_destroyed;
 
 static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
+static int nes_dereg_mr(struct ib_mr *ib_mr);
 
 /**
  * nes_alloc_mw
@@ -443,7 +444,25 @@ static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd,
nes_free_resource(nesadapter, nesadapter->allocated_mrs, 
stag_index);
ibmr = ERR_PTR(-ENOMEM);
}
+
+   nesmr->pl = kcalloc(max_entries, sizeof(u64), GFP_KERNEL);
+   if (!nesmr->pl)
+   goto err;
+
+   nesmr->mpl = pci_alloc_consistent(nesdev->pcidev,
+ max_entries * sizeof(u64),
+ &nesmr->mpl_addr);
+   if (!nesmr->mpl_addr)
+   goto err;
+
+   nesmr->max_pages = max_entries;
+
return ibmr;
+
+err:
+   nes_dereg_mr(ibmr);
+
+   return ERR_PTR(-ENOMEM);
 }
 
 /*
@@ -2681,6 +2700,14 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
u16 major_code;
u16 minor_code;
 
+
+   kfree(nesmr->pl);
+   if (nesmr->mpl)
+   pci_free_consistent(nesdev->pcidev,
+   nesmr->max_pages * sizeof(u64),
+   nesmr->mpl,
+   nesmr->mpl_addr);
+
if (nesmr->region) {
ib_umem_release(nesmr->region);
}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h 
b/drivers/infiniband/hw/nes/nes_verbs.h
index 309b31c..e99aa69 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -79,6 +79,11 @@ struct nes_mr {
u16   pbls_used;
u8mode;
u8pbl_4k;
+   u64   *pl;
+   u64   *mpl;
+   dma_addr_tmpl_addr;
+   u32   max_pages;
+   u32   npages;
 };
 
 struct nes_hw_pb {
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 24/43] cxgb3: Allocate a provate page list in ib_alloc_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/cxgb3/iwch_provider.c | 9 +
 drivers/infiniband/hw/cxgb3/iwch_provider.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c 
b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index af55b79..c9368e6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -463,6 +463,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
return -EINVAL;
 
mhp = to_iwch_mr(ib_mr);
+   kfree(mhp->pl);
rhp = mhp->rhp;
mmid = mhp->attr.stag >> 8;
cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
@@ -817,6 +818,12 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
if (!mhp)
goto err;
 
+   mhp->pl = kcalloc(max_entries, sizeof(u64), GFP_KERNEL);
+   if (!mhp->pl) {
+   ret = -ENOMEM;
+   goto pl_err;
+   }
+
mhp->rhp = rhp;
ret = iwch_alloc_pbl(mhp, max_entries);
if (ret)
@@ -843,6 +850,8 @@ err3:
 err2:
iwch_free_pbl(mhp);
 err1:
+   kfree(mhp->pl);
+pl_err:
kfree(mhp);
 err:
return ERR_PTR(ret);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h 
b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 87c14b0..8e16da9 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -77,6 +77,8 @@ struct iwch_mr {
struct iwch_dev *rhp;
u64 kva;
struct tpt_attributes attr;
+   u64 *pl;
+   u32 npages;
 };
 
 typedef struct iwch_mw iwch_mw_handle;
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 33/43] cxgb4: Support the new memory registration API

2015-07-21 Thread Sagi Grimberg
Just duplicated the functions to take the needed
arguments from the private MR context. The old
fast_reg routines will be dropped later.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |  3 ++
 drivers/infiniband/hw/cxgb4/mem.c  | 11 +
 drivers/infiniband/hw/cxgb4/provider.c |  1 +
 drivers/infiniband/hw/cxgb4/qp.c   | 75 +-
 4 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h 
b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index e529ace..ce2bbf3 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -978,6 +978,9 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_entries,
u32 flags);
+int c4iw_map_mr_sg(struct ib_mr *ibmr,
+  struct scatterlist *sg,
+  unsigned short sg_nents);
 int c4iw_dealloc_mw(struct ib_mw *mw);
 struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
 struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c 
b/drivers/infiniband/hw/cxgb4/mem.c
index 91aedce..ea37fc7 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -922,6 +922,17 @@ err:
return ERR_PTR(ret);
 }
 
+int c4iw_map_mr_sg(struct ib_mr *ibmr,
+  struct scatterlist *sg,
+  unsigned short sg_nents)
+{
+   struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
+
+   return ib_sg_to_pages(sg, sg_nents, mhp->max_mpl_len,
+ mhp->mpl, &mhp->mpl_len,
+ &ibmr->length, &ibmr->iova);
+}
+
 struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,
 int page_list_len)
 {
diff --git a/drivers/infiniband/hw/cxgb4/provider.c 
b/drivers/infiniband/hw/cxgb4/provider.c
index 7746113..55dedad 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -557,6 +557,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.bind_mw = c4iw_bind_mw;
dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
dev->ibdev.alloc_mr = c4iw_alloc_mr;
+   dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl;
dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl;
dev->ibdev.attach_mcast = c4iw_multicast_attach;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 6517e12..e5d1d99 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -605,10 +605,75 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union 
t4_recv_wr *wqe,
return 0;
 }
 
-static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
+static int build_fastreg2(struct t4_sq *sq, union t4_wr *wqe,
 struct ib_send_wr *wr, u8 *len16, u8 t5dev)
 {
+   struct c4iw_mr *mhp = to_c4iw_mr(wr->wr.fastreg.mr);
+   struct fw_ri_immd *imdp;
+   __be64 *p;
+   int i;
+   int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
+   int rem;
+
+   if (mhp->mpl_len > t4_max_fr_depth(use_dsgl))
+   return -EINVAL;
+
+   wqe->fr.qpbinde_to_dcacpu = 0;
+   wqe->fr.pgsz_shift = PAGE_SHIFT - 12;
+   wqe->fr.addr_type = FW_RI_VA_BASED_TO;
+   wqe->fr.mem_perms = c4iw_ib_to_tpt_access(mhp->ibmr.access);
+   wqe->fr.len_hi = 0;
+   wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length);
+   wqe->fr.stag = cpu_to_be32(wr->wr.fastreg.key);
+   wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
+   wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
+   0x);
+
+   if (t5dev && use_dsgl && (pbllen > max_fr_immd)) {
+   struct fw_ri_dsgl *sglp;
+
+   for (i = 0; i < mhp->mpl_len; i++) {
+   mhp->mpl[i] = (__force 
u64)cpu_to_be64((u64)mhp->mpl[i]);
+   }
+
+   sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
+   sglp->op = FW_RI_DATA_DSGL;
+   sglp->r1 = 0;
+   sglp->nsge = cpu_to_be16(1);
+   sglp->addr0 = cpu_to_be64(mhp->mpl_addr);
+   sglp->len0 = cpu_to_be32(pbllen);
+
+   *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16);
+   } else {
+   imdp = (struct fw_ri_immd *)(&wqe->fr + 1);
+   imdp->op = FW_RI_DATA_IMMD;
+   imdp->r1 = 0;
+   imdp->r2 = 0;
+   imdp->immdlen = cpu_to_be32(pbllen);
+   p = (__be64 *)(imdp

[PATCH WIP 28/43] IB/core: Introduce new fast registration API

2015-07-21 Thread Sagi Grimberg
The new fast registration is receiving a struct
scatterlist and converts it to a page list under
the verbs API. The user is provided with a new
verb ib_map_mr_sg, and a helper to set the send work
request structure.

The drivers are handed with a generic helper that
converts a scatterlist into a vector of pages.
Given that some drivers have a shadow mapped page list,
I expect that drivers might use their own routines to
avoid the extra copies.

The new registration API is added with fast_reg for
now, but once all drivers and ULPs will be ported, we
can drop the old registration API.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/core/verbs.c | 123 
 include/rdma/ib_verbs.h |  37 
 2 files changed, 160 insertions(+)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index beed431..9875163 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1481,3 +1481,126 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
mr->device->check_mr_status(mr, check_mask, mr_status) : 
-ENOSYS;
 }
 EXPORT_SYMBOL(ib_check_mr_status);
+
+
+/**
+ * ib_map_mr_sg() - Populates MR with a dma mapped SG list
+ * @mr:memory region
+ * @sg:dma mapped scatterlist
+ * @sg_nents:  number of entries in sg
+ * @access:access permissions
+ *
+ * After this completes successfully, the memory region is ready
+ * for fast registration.
+ */
+int ib_map_mr_sg(struct ib_mr *mr,
+struct scatterlist *sg,
+unsigned short sg_nents,
+unsigned int access)
+{
+   int rc;
+
+   if (!mr->device->map_mr_sg)
+   return -ENOSYS;
+
+   rc = mr->device->map_mr_sg(mr, sg, sg_nents);
+   if (!rc)
+   mr->access = access;
+
+   return rc;
+}
+EXPORT_SYMBOL(ib_map_mr_sg);
+
+/**
+ * ib_sg_to_pages() - Convert a sg list to a page vector
+ * @dev:   ib device
+ * @sgl:   dma mapped scatterlist
+ * @sg_nents:  number of entries in sg
+ * @max_pages: maximum pages allowed
+ * @pages: output page vector
+ * @npages:output number of mapped pages
+ * @length:output total byte length
+ * @offset:output first byte offset
+ *
+ * Core service helper for drivers to convert a scatter
+ * list to a page vector. The assumption is that the
+ * sg must meet the following conditions:
+ * - Only the first sg is allowed to have an offset
+ * - All the elements are of the same size - PAGE_SIZE
+ * - The last element is allowed to have length less than
+ *   PAGE_SIZE
+ *
+ * If any of those conditions is not met, the routine will
+ * fail with EINVAL.
+ */
+int ib_sg_to_pages(struct scatterlist *sgl,
+  unsigned short sg_nents,
+  unsigned short max_pages,
+  u64 *pages, u32 *npages,
+  u32 *length, u64 *offset)
+{
+   struct scatterlist *sg;
+   u64 last_end_dma_addr = 0, last_page_addr = 0;
+   unsigned int last_page_off = 0;
+   int i, j = 0;
+
+   /* TODO: We can do better with huge pages */
+
+   *offset = sg_dma_address(&sgl[0]);
+   *length = 0;
+
+   for_each_sg(sgl, sg, sg_nents, i) {
+   u64 dma_addr = sg_dma_address(sg);
+   unsigned int dma_len = sg_dma_len(sg);
+   u64 end_dma_addr = dma_addr + dma_len;
+   u64 page_addr = dma_addr & PAGE_MASK;
+
+   *length += dma_len;
+
+   /* Fail we ran out of pages */
+   if (unlikely(j > max_pages))
+   return -EINVAL;
+
+   if (i && sg->offset) {
+   if (unlikely((last_end_dma_addr) != dma_addr)) {
+   /* gap - fail */
+   goto err;
+   }
+   if (last_page_off + dma_len < PAGE_SIZE) {
+   /* chunk this fragment with the last */
+   last_end_dma_addr += dma_len;
+   last_page_off += dma_len;
+   continue;
+   } else {
+   /* map starting from the next page */
+   page_addr = last_page_addr + PAGE_SIZE;
+   dma_len -= PAGE_SIZE - last_page_off;
+   }
+   }
+
+   do {
+   pages[j++] = page_addr;
+   page_addr += PAGE_SIZE;
+   } while (page_addr < end_dma_addr);
+
+   last_end_dma_addr = end_dma_addr;
+   last_page_addr = end_dma_addr & PAGE_MASK;
+   last_page_off = end_dma_addr & ~PAGE_MASK;
+   }
+
+   *npages = j;
+
+   return 0;
+err:
+   pr_err("RDMA ali

[PATCH WIP 42/43] iser: Accept arbitrary sg lists mapping if the device supports it

2015-07-21 Thread Sagi Grimberg
If the device support arbitrary sg list mapping (device cap
IB_DEVICE_MAP_ARB_SG set) we allocate the memory regions with
IB_MR_MAP_ARB_SG and skip the counce buffer work around.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/iser/iser_memory.c |  4 
 drivers/infiniband/ulp/iser/iser_verbs.c  | 20 
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c 
b/drivers/infiniband/ulp/iser/iser_memory.c
index 094cf8a..690f840 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -781,6 +781,10 @@ iser_handle_unaligned_buf(struct iscsi_iser_task *task,
aligned_len = iser_data_buf_aligned_len(mem, device->ib_device,
iser_conn->scsi_sg_tablesize);
if (aligned_len != mem->dma_nents) {
+   if (device->dev_attr.device_cap_flags & IB_DEVICE_MAP_ARB_SG)
+   /* Arbitrary sg support, no need to bounce :) */
+   return 0;
+
err = fall_to_bounce_buf(task, mem, dir);
if (err)
return err;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c 
b/drivers/infiniband/ulp/iser/iser_verbs.c
index 332f784..978e283 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -281,14 +281,18 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
 }
 
 static int
-iser_alloc_reg_res(struct ib_device *ib_device,
+iser_alloc_reg_res(struct iser_device *device,
   struct ib_pd *pd,
   struct iser_reg_resources *res,
   unsigned int size)
 {
int ret;
+   int flags = 0;
 
-   res->mr = ib_alloc_mr(pd, IB_MR_TYPE_FAST_REG, size, 0);
+   if (device->dev_attr.device_cap_flags & IB_DEVICE_MAP_ARB_SG)
+   flags = IB_MR_MAP_ARB_SG;
+
+   res->mr = ib_alloc_mr(pd, IB_MR_TYPE_FAST_REG, size, flags);
if (IS_ERR(res->mr)) {
ret = PTR_ERR(res->mr);
iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
@@ -306,7 +310,7 @@ iser_free_reg_res(struct iser_reg_resources *rsc)
 }
 
 static int
-iser_alloc_pi_ctx(struct ib_device *ib_device,
+iser_alloc_pi_ctx(struct iser_device *device,
  struct ib_pd *pd,
  struct iser_fr_desc *desc,
  unsigned int size)
@@ -320,7 +324,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device,
 
pi_ctx = desc->pi_ctx;
 
-   ret = iser_alloc_reg_res(ib_device, pd, &pi_ctx->rsc, size);
+   ret = iser_alloc_reg_res(device, pd, &pi_ctx->rsc, size);
if (ret) {
iser_err("failed to allocate reg_resources\n");
goto alloc_reg_res_err;
@@ -353,7 +357,7 @@ iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
 }
 
 static struct iser_fr_desc *
-iser_create_fastreg_desc(struct ib_device *ib_device,
+iser_create_fastreg_desc(struct iser_device *device,
 struct ib_pd *pd,
 bool pi_enable,
 unsigned int size)
@@ -365,12 +369,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device,
if (!desc)
return ERR_PTR(-ENOMEM);
 
-   ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc, size);
+   ret = iser_alloc_reg_res(device, pd, &desc->rsc, size);
if (ret)
goto reg_res_alloc_failure;
 
if (pi_enable) {
-   ret = iser_alloc_pi_ctx(ib_device, pd, desc, size);
+   ret = iser_alloc_pi_ctx(device, pd, desc, size);
if (ret)
goto pi_ctx_alloc_failure;
}
@@ -403,7 +407,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
spin_lock_init(&fr_pool->lock);
fr_pool->size = 0;
for (i = 0; i < cmds_max; i++) {
-   desc = iser_create_fastreg_desc(device->ib_device, device->pd,
+   desc = iser_create_fastreg_desc(device, device->pd,
ib_conn->pi_support, size);
if (IS_ERR(desc)) {
ret = PTR_ERR(desc);
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 43/43] iser: Move unaligned counter increment

2015-07-21 Thread Sagi Grimberg
We don't always use bounce buffers, still we update
this counter.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/iser/iser_memory.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c 
b/drivers/infiniband/ulp/iser/iser_memory.c
index 690f840..4d3dc1c 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -487,11 +487,8 @@ static int fall_to_bounce_buf(struct iscsi_iser_task 
*iser_task,
  struct iser_data_buf *mem,
  enum iser_data_dir cmd_dir)
 {
-   struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
struct iser_device *device = iser_task->iser_conn->ib_conn.device;
 
-   iscsi_conn->fmr_unalign_cnt++;
-
if (iser_debug_level > 0)
iser_data_buf_dump(mem, device->ib_device);
 
@@ -781,6 +778,7 @@ iser_handle_unaligned_buf(struct iscsi_iser_task *task,
aligned_len = iser_data_buf_aligned_len(mem, device->ib_device,
iser_conn->scsi_sg_tablesize);
if (aligned_len != mem->dma_nents) {
+   iser_conn->iscsi_conn->fmr_unalign_cnt++;
if (device->dev_attr.device_cap_flags & IB_DEVICE_MAP_ARB_SG)
/* Arbitrary sg support, no need to bounce :) */
return 0;
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 37/43] xprtrdma: Port to new memory registration API

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 net/sunrpc/xprtrdma/frwr_ops.c  | 80 ++---
 net/sunrpc/xprtrdma/xprt_rdma.h |  4 ++-
 2 files changed, 47 insertions(+), 37 deletions(-)

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 517efed..e28246b 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct 
ib_device *device,
f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_FAST_REG, depth, 0);
if (IS_ERR(f->fr_mr))
goto out_mr_err;
-   f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
-   if (IS_ERR(f->fr_pgl))
+
+   f->sg = kcalloc(sizeof(*f->sg), depth, GFP_KERNEL);
+   if (IS_ERR(f->sg))
goto out_list_err;
+
+   sg_init_table(f->sg, depth);
+
return 0;
 
 out_mr_err:
@@ -163,7 +167,7 @@ out_mr_err:
return rc;
 
 out_list_err:
-   rc = PTR_ERR(f->fr_pgl);
+   rc = -ENOMEM;
dprintk("RPC:   %s: ib_alloc_fast_reg_page_list status %i\n",
__func__, rc);
ib_dereg_mr(f->fr_mr);
@@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r)
if (rc)
dprintk("RPC:   %s: ib_dereg_mr status %i\n",
__func__, rc);
-   ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+   kfree(r->r.frmr.sg);
 }
 
 static int
@@ -320,10 +324,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct 
rpcrdma_mr_seg *seg,
struct ib_send_wr fastreg_wr, *bad_wr;
u8 key;
int len, pageoff;
-   int i, rc;
-   int seg_len;
-   u64 pa;
-   int page_no;
+   int i, rc, access;
 
mw = seg1->rl_mw;
seg1->rl_mw = NULL;
@@ -344,39 +345,46 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct 
rpcrdma_mr_seg *seg,
if (nsegs > ia->ri_max_frmr_depth)
nsegs = ia->ri_max_frmr_depth;
 
-   for (page_no = i = 0; i < nsegs;) {
-   rpcrdma_map_one(device, seg, direction);
-   pa = seg->mr_dma;
-   for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
-   frmr->fr_pgl->page_list[page_no++] = pa;
-   pa += PAGE_SIZE;
-   }
+   for (i = 0; i < nsegs;) {
+   sg_set_page(&frmr->sg[i], seg->mr_page,
+   seg->mr_len, offset_in_page(seg->mr_offset));
len += seg->mr_len;
-   ++seg;
++i;
-   /* Check for holes */
+   ++seg;
+
+   /* Check for holes - needed?? */
if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
+
+   frmr->sg_nents = i;
+   frmr->dma_nents = ib_dma_map_sg(device, frmr->sg,
+   frmr->sg_nents, direction);
+   if (!frmr->dma_nents) {
+   pr_err("RPC:   %s: failed to dma map sg %p sg_nents %d\n",
+   __func__, frmr->sg, frmr->sg_nents);
+   return -ENOMEM;
+   }
+
dprintk("RPC:   %s: Using frmr %p to map %d segments (%d bytes)\n",
__func__, mw, i, len);
 
-   memset(&fastreg_wr, 0, sizeof(fastreg_wr));
-   fastreg_wr.wr_id = (unsigned long)(void *)mw;
-   fastreg_wr.opcode = IB_WR_FAST_REG_MR;
-   fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff;
-   fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
-   fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-   fastreg_wr.wr.fast_reg.page_list_len = page_no;
-   fastreg_wr.wr.fast_reg.length = len;
-   fastreg_wr.wr.fast_reg.access_flags = writing ?
-   IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
-   IB_ACCESS_REMOTE_READ;
mr = frmr->fr_mr;
+   access = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+  IB_ACCESS_REMOTE_READ;
+   rc = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, access);
+   if (rc) {
+   pr_err("RPC:   %s: failed to map mr %p rc %d\n",
+   __func__, frmr->fr_mr, rc);
+   return rc;
+   }
+
key = (u8)(mr->rkey & 0x00FF);
ib_update_fast_reg_key(mr, ++key);
-   fastreg_wr.wr.fast_reg.rkey = mr->rkey;
+
+   memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+   ib_set_fastreg_wr(mr, mr->rkey, (uintptr_t)mw, false, &fastreg_wr);
 
DECR_CQCOUNT(&r_xprt->rx_ep);
rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
@@ -385,15 +

[PATCH WIP 40/43] mlx5: Allocate private context for arbitrary scatterlist registration

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  6 ++-
 drivers/infiniband/hw/mlx5/mr.c  | 71 ++--
 2 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 7017a1a..fb3ac22 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -315,11 +315,15 @@ enum mlx5_ib_mtt_access_flags {
 
 struct mlx5_ib_mr {
struct ib_mribmr;
-   u64 *pl;
+   union {
+   __be64  *pl;
+   struct mlx5_klm *klms;
+   };
__be64  *mpl;
dma_addr_t  pl_map;
int ndescs;
int max_descs;
+   int access_mode;
struct mlx5_core_mr mmr;
struct ib_umem *umem;
struct mlx5_shared_mr_info  *smr_info;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 7a030a2..45209c7 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1168,6 +1168,40 @@ error:
 }
 
 static int
+mlx5_alloc_klm_list(struct ib_device *device,
+   struct mlx5_ib_mr *mr, int ndescs)
+{
+   int size = sizeof(struct mlx5_klm) * ndescs;
+
+   size += max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+   mr->klms = kzalloc(size, GFP_KERNEL);
+   if (!mr->klms)
+   return -ENOMEM;
+
+   mr->pl_map = dma_map_single(device->dma_device, mr->klms,
+   size, DMA_TO_DEVICE);
+   if (dma_mapping_error(device->dma_device, mr->pl_map))
+   goto err;
+
+   return 0;
+err:
+   kfree(mr->klms);
+
+   return -ENOMEM;
+}
+
+static void
+mlx5_free_klm_list(struct mlx5_ib_mr *mr)
+{
+   struct ib_device *device = mr->ibmr.device;
+   int size = mr->max_descs * sizeof(struct mlx5_klm);
+
+   kfree(mr->klms);
+   dma_unmap_single(device->dma_device, mr->pl_map, size, DMA_TO_DEVICE);
+   mr->klms = NULL;
+}
+
+static int
 mlx5_alloc_page_list(struct ib_device *device,
 struct mlx5_ib_mr *mr, int ndescs)
 {
@@ -1222,7 +1256,10 @@ static int clean_mr(struct mlx5_ib_mr *mr)
mr->sig = NULL;
}
 
-   mlx5_free_page_list(mr);
+   if (mr->access_mode == MLX5_ACCESS_MODE_MTT)
+   mlx5_free_page_list(mr);
+   else
+   mlx5_free_klm_list(mr);
 
if (!umred) {
err = destroy_mkey(dev, mr);
@@ -1293,10 +1330,10 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_create_mkey_mbox_in *in;
struct mlx5_ib_mr *mr;
-   int access_mode, err;
-   int ndescs = roundup(max_entries, 4);
+   int ndescs = ALIGN(max_entries, 4);
+   int err;
 
-   if (flags)
+   if (flags & ~IB_MR_MAP_ARB_SG)
return ERR_PTR(-EINVAL);
 
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
@@ -1315,13 +1352,20 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
 
if (mr_type == IB_MR_TYPE_FAST_REG) {
-   access_mode = MLX5_ACCESS_MODE_MTT;
-   in->seg.log2_page_size = PAGE_SHIFT;
+   if (flags & IB_MR_MAP_ARB_SG) {
+   mr->access_mode = MLX5_ACCESS_MODE_KLM;
 
-   err = mlx5_alloc_page_list(pd->device, mr, ndescs);
-   if (err)
-   goto err_free_in;
+   err = mlx5_alloc_klm_list(pd->device, mr, ndescs);
+   if (err)
+   goto err_free_in;
+   } else {
+   mr->access_mode = MLX5_ACCESS_MODE_MTT;
+   in->seg.log2_page_size = PAGE_SHIFT;
 
+   err = mlx5_alloc_page_list(pd->device, mr, ndescs);
+   if (err)
+   goto err_free_in;
+   }
mr->max_descs = ndescs;
} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
u32 psv_index[2];
@@ -1341,7 +1385,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
if (err)
goto err_free_sig;
 
-   access_mode = MLX5_ACCESS_MODE_KLM;
+   mr->access_mode = MLX5_ACCESS_MODE_KLM;
mr->sig->psv_memory.psv_idx = psv_index[0];
mr->sig->psv_wire.psv_idx = psv_index[1];
 
@@ -1355,7 +1399,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
goto err_free_in;
}
 
-   in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
+   in->seg.flags

[PATCH WIP 41/43] mlx5: Add arbitrary sg list support

2015-07-21 Thread Sagi Grimberg
If ib_alloc_mr is called with IB_MR_MAP_ARB_SG, the driver
allocate a private klm list instead of a private page list.

And set the UMR wqe correctly when posting the fast registration.

Also, expose device cap IB_DEVICE_MAP_ARB_SG

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/mlx5/main.c |  1 +
 drivers/infiniband/hw/mlx5/mr.c   | 30 ++
 drivers/infiniband/hw/mlx5/qp.c   | 31 ---
 3 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index a90ef7a..2402563 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -249,6 +249,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, xrc))
props->device_cap_flags |= IB_DEVICE_XRC;
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+   props->device_cap_flags |= IB_DEVICE_MAP_ARB_SG;
if (MLX5_CAP_GEN(mdev, sho)) {
props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
/* At this stage no support for signature handover */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 45209c7..836e717 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1519,12 +1519,42 @@ done:
return ret;
 }
 
+static int
+mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
+  struct scatterlist *sgl,
+  unsigned short sg_nents)
+{
+   struct scatterlist *sg = sgl;
+   u32 lkey = mr->ibmr.device->local_dma_lkey;
+   int i;
+
+   if (sg_nents > mr->max_descs)
+   return -EINVAL;
+
+   mr->ibmr.iova = sg_dma_address(sg);
+   mr->ibmr.length = 0;
+   mr->ndescs = sg_nents;
+
+   for (i = 0; i < sg_nents; i++) {
+   mr->klms[i].va = cpu_to_be64(sg_dma_address(sg));
+   mr->klms[i].bcount = cpu_to_be32(sg_dma_len(sg));
+   mr->klms[i].key = cpu_to_be32(lkey);
+   mr->ibmr.length += sg_dma_len(sg);
+   sg = sg_next(sg);
+   }
+
+   return 0;
+}
+
 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
  struct scatterlist *sg,
  unsigned short sg_nents)
 {
struct mlx5_ib_mr *mr = to_mmr(ibmr);
 
+   if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+   return mlx5_ib_sg_to_klms(mr, sg, sg_nents);
+
return ib_sg_to_pages(sg, sg_nents, mr->max_descs,
  mr->pl, &mr->ndescs,
  &ibmr->length, &ibmr->iova);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index f0a03aa..3fb0396 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1909,6 +1909,10 @@ static void set_fastreg_umr_seg(struct 
mlx5_wqe_umr_ctrl_seg *umr,
 {
int ndescs = mr->ndescs;
 
+   if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+   /* KLMs take twice the size of MTTs */
+   ndescs *= 2;
+
memset(umr, 0, sizeof(*umr));
umr->flags = MLX5_UMR_CHECK_NOT_FREE;
umr->klm_octowords = get_klm_octo(ndescs);
@@ -2012,15 +2016,21 @@ static void set_fastreg_mkey_seg(struct mlx5_mkey_seg 
*seg,
 {
int ndescs = ALIGN(mr->ndescs, 8) >> 1;
 
+   if (mr->access_mode == MLX5_ACCESS_MODE_MTT)
+   seg->log2_page_size = PAGE_SHIFT;
+   else if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+   /* KLMs take twice the size of MTTs */
+   ndescs *= 2;
+
+
memset(seg, 0, sizeof(*seg));
-   seg->flags = get_umr_flags(mr->ibmr.access) | MLX5_ACCESS_MODE_MTT;
+   seg->flags = get_umr_flags(mr->ibmr.access) | mr->access_mode;
*writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
seg->start_addr = cpu_to_be64(mr->ibmr.iova);
seg->len = cpu_to_be64(mr->ibmr.length);
seg->xlt_oct_size = cpu_to_be32(ndescs);
-   seg->log2_page_size = PAGE_SHIFT;
 }
 
 static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
@@ -2069,12 +2079,19 @@ static void set_fastreg_ds(struct mlx5_wqe_data_seg 
*dseg,
   struct mlx5_ib_pd *pd,
   int writ)
 {
-   u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
-   int bcount = sizeof(u64) * mr->ndescs;
-   int i;
+   int bcount;
+
+   if (mr->access_mode == MLX5_ACCESS_MODE_MTT) {
+   u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
+   int i;
+
+   bcount = sizeof(u64) * mr->ndescs;
+

[PATCH WIP 39/43] IB/core: Add arbitrary sg_list support

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 include/rdma/ib_verbs.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index d543fee..cc83c39 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -133,6 +133,7 @@ enum ib_device_cap_flags {
IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
IB_DEVICE_SIGNATURE_HANDOVER= (1<<30),
IB_DEVICE_ON_DEMAND_PAGING  = (1<<31),
+   IB_DEVICE_MAP_ARB_SG= (1ULL<<32),
 };
 
 enum ib_signature_prot_cap {
@@ -193,7 +194,7 @@ struct ib_device_attr {
u32 hw_ver;
int max_qp;
int max_qp_wr;
-   int device_cap_flags;
+   u64 device_cap_flags;
int max_sge;
int max_sge_rd;
int max_cq;
@@ -556,6 +557,11 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
  */
 __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
 
+enum ib_mr_flags {
+   IB_MR_MAP_ARB_SG = 1,
+};
+
+
 enum ib_mr_type {
IB_MR_TYPE_FAST_REG,
IB_MR_TYPE_SIGNATURE,
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 30/43] mlx4: Support the new memory registration API

2015-07-21 Thread Sagi Grimberg
Just duplicated the functions to take the needed
arguments from the private MR context. The old
fast_reg routines will be dropped later.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/mlx4/main.c|  1 +
 drivers/infiniband/hw/mlx4/mlx4_ib.h |  3 +++
 drivers/infiniband/hw/mlx4/mr.c  | 11 +++
 drivers/infiniband/hw/mlx4/qp.c  | 27 +++
 4 files changed, 42 insertions(+)

diff --git a/drivers/infiniband/hw/mlx4/main.c 
b/drivers/infiniband/hw/mlx4/main.c
index 829fcf4..f2d101c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2298,6 +2298,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
ibdev->ib_dev.dereg_mr  = mlx4_ib_dereg_mr;
ibdev->ib_dev.alloc_mr  = mlx4_ib_alloc_mr;
+   ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg;
ibdev->ib_dev.alloc_fast_reg_page_list = 
mlx4_ib_alloc_fast_reg_page_list;
ibdev->ib_dev.free_fast_reg_page_list  = 
mlx4_ib_free_fast_reg_page_list;
ibdev->ib_dev.attach_mcast  = mlx4_ib_mcg_attach;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h 
b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index a9a4a7f..e5c7292 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -689,6 +689,9 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
   enum ib_mr_type mr_type,
   u32 max_entries,
   u32 flags);
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ unsigned short sg_nents);
 struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct 
ib_device *ibdev,
   int 
page_list_len);
 void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 01e16bc..9a86829 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -574,3 +574,14 @@ int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
 
return err;
 }
+
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ unsigned short sg_nents)
+{
+   struct mlx4_ib_mr *mr = to_mmr(ibmr);
+
+   return ib_sg_to_pages(sg, sg_nents, mr->max_pages,
+ mr->pl, &mr->npages,
+ &ibmr->length, &ibmr->iova);
+}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index c5a3a5f..492e799 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2401,6 +2401,25 @@ static __be32 convert_access(int acc)
cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
 }
 
+static void set_fastreg_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr 
*wr)
+{
+   struct mlx4_ib_mr *mr = to_mmr(wr->wr.fastreg.mr);
+   int i;
+
+   for (i = 0; i < mr->npages; ++i)
+   mr->mpl[i] = cpu_to_be64(mr->pl[i] | MLX4_MTT_FLAG_PRESENT);
+
+   fseg->flags = convert_access(mr->ibmr.access);
+   fseg->mem_key   = cpu_to_be32(wr->wr.fastreg.key);
+   fseg->buf_list  = cpu_to_be64(mr->pl_map);
+   fseg->start_addr= cpu_to_be64(mr->ibmr.iova);
+   fseg->reg_len   = cpu_to_be64(mr->ibmr.length);
+   fseg->offset= 0; /* XXX -- is this just for ZBVA? */
+   fseg->page_size = cpu_to_be32(PAGE_SHIFT);
+   fseg->reserved[0]   = 0;
+   fseg->reserved[1]   = 0;
+}
+
 static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
 {
struct mlx4_ib_fast_reg_page_list *mfrpl = 
to_mfrpl(wr->wr.fast_reg.page_list);
@@ -2759,6 +2778,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct 
ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
break;
 
+   case IB_WR_FASTREG_MR:
+   ctrl->srcrb_flags |=
+   cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
+   set_fastreg_seg(wqe, wr);
+   wqe  += sizeof (struct mlx4_wqe_fmr_seg);
+   size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
+   break;
+
case IB_WR_BIND_MW:
ctrl->srcrb_flags |=
cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 18/43] cxgb4: Drop c4iw_alloc_fast_reg_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |  1 -
 drivers/infiniband/hw/cxgb4/mem.c  | 51 --
 drivers/infiniband/hw/cxgb4/provider.c |  1 -
 3 files changed, 53 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h 
b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 97b2568..886be9c 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -974,7 +974,6 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_entries,
u32 flags);
-struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth);
 int c4iw_dealloc_mw(struct ib_mw *mw);
 struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
 struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c 
b/drivers/infiniband/hw/cxgb4/mem.c
index 7ee01ce..5ecf4aa 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -910,57 +910,6 @@ err:
return ERR_PTR(ret);
 }
 
-struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
-{
-   struct c4iw_dev *rhp;
-   struct c4iw_pd *php;
-   struct c4iw_mr *mhp;
-   u32 mmid;
-   u32 stag = 0;
-   int ret = 0;
-
-   php = to_c4iw_pd(pd);
-   rhp = php->rhp;
-   mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
-   if (!mhp) {
-   ret = -ENOMEM;
-   goto err;
-   }
-
-   mhp->rhp = rhp;
-   ret = alloc_pbl(mhp, pbl_depth);
-   if (ret)
-   goto err1;
-   mhp->attr.pbl_size = pbl_depth;
-   ret = allocate_stag(&rhp->rdev, &stag, php->pdid,
-mhp->attr.pbl_size, mhp->attr.pbl_addr);
-   if (ret)
-   goto err2;
-   mhp->attr.pdid = php->pdid;
-   mhp->attr.type = FW_RI_STAG_NSMR;
-   mhp->attr.stag = stag;
-   mhp->attr.state = 1;
-   mmid = (stag) >> 8;
-   mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-   if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
-   ret = -ENOMEM;
-   goto err3;
-   }
-
-   PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
-   return &(mhp->ibmr);
-err3:
-   dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
-  mhp->attr.pbl_addr);
-err2:
-   c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
- mhp->attr.pbl_size << 3);
-err1:
-   kfree(mhp);
-err:
-   return ERR_PTR(ret);
-}
-
 struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,
 int page_list_len)
 {
diff --git a/drivers/infiniband/hw/cxgb4/provider.c 
b/drivers/infiniband/hw/cxgb4/provider.c
index 2885aba..7746113 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -557,7 +557,6 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.bind_mw = c4iw_bind_mw;
dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
dev->ibdev.alloc_mr = c4iw_alloc_mr;
-   dev->ibdev.alloc_fast_reg_mr = c4iw_alloc_fast_reg_mr;
dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl;
dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl;
dev->ibdev.attach_mcast = c4iw_multicast_attach;
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 29/43] mlx5: Support the new memory registration API

2015-07-21 Thread Sagi Grimberg
Just duplicated the functions to take the needed
arguments from the private MR context. The old
fast_reg routines will be dropped later.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/mlx5/main.c|  1 +
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  3 ++
 drivers/infiniband/hw/mlx5/mr.c  | 11 +
 drivers/infiniband/hw/mlx5/qp.c  | 90 
 4 files changed, 105 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index ce75875..a90ef7a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1503,6 +1503,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.detach_mcast= mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
dev->ib_dev.alloc_mr= mlx5_ib_alloc_mr;
+   dev->ib_dev.map_mr_sg   = mlx5_ib_map_mr_sg;
dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index df5e959..7017a1a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -582,6 +582,9 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
   enum ib_mr_type mr_type,
   u32 max_entries,
   u32 flags);
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ unsigned short sg_nents);
 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct 
ib_device *ibdev,
   int 
page_list_len);
 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1075065..7a030a2 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1471,3 +1471,14 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 
check_mask,
 done:
return ret;
 }
+
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ unsigned short sg_nents)
+{
+   struct mlx5_ib_mr *mr = to_mmr(ibmr);
+
+   return ib_sg_to_pages(sg, sg_nents, mr->max_descs,
+ mr->pl, &mr->ndescs,
+ &ibmr->length, &ibmr->iova);
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 203c8a4..f0a03aa 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -65,6 +65,7 @@ static const u32 mlx5_ib_opcode[] = {
[IB_WR_SEND_WITH_INV]   = MLX5_OPCODE_SEND_INVAL,
[IB_WR_LOCAL_INV]   = MLX5_OPCODE_UMR,
[IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR,
+   [IB_WR_FASTREG_MR]  = MLX5_OPCODE_UMR,
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP]   = MLX5_OPCODE_ATOMIC_MASKED_CS,
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
[MLX5_IB_WR_UMR]= MLX5_OPCODE_UMR,
@@ -1903,6 +1904,17 @@ static __be64 sig_mkey_mask(void)
return cpu_to_be64(result);
 }
 
+static void set_fastreg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
+   struct mlx5_ib_mr *mr)
+{
+   int ndescs = mr->ndescs;
+
+   memset(umr, 0, sizeof(*umr));
+   umr->flags = MLX5_UMR_CHECK_NOT_FREE;
+   umr->klm_octowords = get_klm_octo(ndescs);
+   umr->mkey_mask = frwr_mkey_mask();
+}
+
 static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 struct ib_send_wr *wr, int li)
 {
@@ -1994,6 +2006,23 @@ static u8 get_umr_flags(int acc)
MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
 }
 
+static void set_fastreg_mkey_seg(struct mlx5_mkey_seg *seg,
+struct mlx5_ib_mr *mr, u32 key,
+int *writ)
+{
+   int ndescs = ALIGN(mr->ndescs, 8) >> 1;
+
+   memset(seg, 0, sizeof(*seg));
+   seg->flags = get_umr_flags(mr->ibmr.access) | MLX5_ACCESS_MODE_MTT;
+   *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
+   seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xff00);
+   seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
+   seg->start_addr = cpu_to_be64(mr->ibmr.iova);
+   seg->len = cpu_to_be64(mr->ibmr.length);
+   seg->xlt_oct_size = cpu_to_be32(ndescs);
+   seg->log2_page_size = PAGE_SHIFT;
+}
+
 static void set_mkey_segment(struct mlx5_mkey_seg *seg

[PATCH WIP 32/43] cxgb3: Support the new memory registration API

2015-07-21 Thread Sagi Grimberg
Just duplicated the functions to take the needed
arguments from the private MR context. The old
fast_reg routines will be dropped later.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/cxgb3/iwch_provider.c | 12 
 drivers/infiniband/hw/cxgb3/iwch_qp.c   | 48 +
 2 files changed, 60 insertions(+)

diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c 
b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index c9368e6..b25cb6a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -857,6 +857,17 @@ err:
return ERR_PTR(ret);
 }
 
+static int iwch_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ unsigned short sg_nents)
+{
+   struct iwch_mr *mhp = to_iwch_mr(ibmr);
+
+   return ib_sg_to_pages(sg, sg_nents, mhp->attr.pbl_size,
+ mhp->pl, &mhp->npages,
+ &ibmr->length, &ibmr->iova);
+}
+
 static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
struct ib_device *device,
int page_list_len)
@@ -1455,6 +1466,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.bind_mw = iwch_bind_mw;
dev->ibdev.dealloc_mw = iwch_dealloc_mw;
dev->ibdev.alloc_mr = iwch_alloc_mr;
+   dev->ibdev.map_mr_sg = iwch_map_mr_sg;
dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl;
dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
dev->ibdev.attach_mcast = iwch_multicast_attach;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c 
b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index b57c0be..2c30326 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -146,6 +146,49 @@ static int build_rdma_read(union t3_wr *wqe, struct 
ib_send_wr *wr,
return 0;
 }
 
+static int build_fastreg2(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
+{
+   struct iwch_mr *mhp = to_iwch_mr(wr->wr.fastreg.mr);
+   int i;
+   __be64 *p;
+
+   if (mhp->npages > T3_MAX_FASTREG_DEPTH)
+   return -EINVAL;
+   *wr_cnt = 1;
+   wqe->fastreg.stag = cpu_to_be32(wr->wr.fastreg.key);
+   wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length);
+   wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
+   wqe->fastreg.va_base_lo_fbo =
+   cpu_to_be32(mhp->ibmr.iova & 0x);
+   wqe->fastreg.page_type_perms = cpu_to_be32(
+   V_FR_PAGE_COUNT(mhp->npages) |
+   V_FR_PAGE_SIZE(PAGE_SHIFT - 12) |
+   V_FR_TYPE(TPT_VATO) |
+   V_FR_PERMS(iwch_ib_to_tpt_access(mhp->ibmr.access)));
+   p = &wqe->fastreg.pbl_addrs[0];
+   for (i = 0; i < mhp->npages; i++, p++) {
+
+   /* If we need a 2nd WR, then set it up */
+   if (i == T3_MAX_FASTREG_FRAG) {
+   *wr_cnt = 2;
+   wqe = (union t3_wr *)(wq->queue +
+   Q_PTR2IDX((wq->wptr+1), wq->size_log2));
+   build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
+  Q_GENBIT(wq->wptr + 1, wq->size_log2),
+  0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG,
+  T3_EOP);
+
+   p = &wqe->pbl_frag.pbl_addrs[0];
+   }
+   *p = cpu_to_be64((u64)mhp->pl[i]);
+   }
+   *flit_cnt = 5 + mhp->npages;
+   if (*flit_cnt > 15)
+   *flit_cnt = 15;
+   return 0;
+}
+
 static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
 {
@@ -419,6 +462,11 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr 
*wr,
err = build_fastreg(wqe, wr, &t3_wr_flit_cnt,
 &wr_cnt, &qhp->wq);
break;
+   case IB_WR_FASTREG_MR:
+   t3_wr_opcode = T3_WR_FASTREG;
+   err = build_fastreg2(wqe, wr, &t3_wr_flit_cnt,
+&wr_cnt, &qhp->wq);
+   break;
case IB_WR_LOCAL_INV:
if (wr->send_flags & IB_SEND_FENCE)
t3_wr_flags |= T3_LOCAL_FENCE_FLAG;
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 19/43] cxgb3: Drop iwch_alloc_fast_reg_mr

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/cxgb3/iwch_provider.c | 47 -
 1 file changed, 47 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c 
b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index d0e9e2d..af55b79 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -848,52 +848,6 @@ err:
return ERR_PTR(ret);
 }
 
-static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
-{
-   struct iwch_dev *rhp;
-   struct iwch_pd *php;
-   struct iwch_mr *mhp;
-   u32 mmid;
-   u32 stag = 0;
-   int ret = 0;
-
-   php = to_iwch_pd(pd);
-   rhp = php->rhp;
-   mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
-   if (!mhp)
-   goto err;
-
-   mhp->rhp = rhp;
-   ret = iwch_alloc_pbl(mhp, pbl_depth);
-   if (ret)
-   goto err1;
-   mhp->attr.pbl_size = pbl_depth;
-   ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
-mhp->attr.pbl_size, mhp->attr.pbl_addr);
-   if (ret)
-   goto err2;
-   mhp->attr.pdid = php->pdid;
-   mhp->attr.type = TPT_NON_SHARED_MR;
-   mhp->attr.stag = stag;
-   mhp->attr.state = 1;
-   mmid = (stag) >> 8;
-   mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-   if (insert_handle(rhp, &rhp->mmidr, mhp, mmid))
-   goto err3;
-
-   PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
-   return &(mhp->ibmr);
-err3:
-   cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
-  mhp->attr.pbl_addr);
-err2:
-   iwch_free_pbl(mhp);
-err1:
-   kfree(mhp);
-err:
-   return ERR_PTR(ret);
-}
-
 static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
struct ib_device *device,
int page_list_len)
@@ -1492,7 +1446,6 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.bind_mw = iwch_bind_mw;
dev->ibdev.dealloc_mw = iwch_dealloc_mw;
dev->ibdev.alloc_mr = iwch_alloc_mr;
-   dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr;
dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl;
dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
dev->ibdev.attach_mcast = iwch_multicast_attach;
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH WIP 31/43] ocrdma: Support the new memory registration API

2015-07-21 Thread Sagi Grimberg
Just duplicated the functions to take the needed
arguments from the private MR context. The old
fast_reg routines will be dropped later.
---
 drivers/infiniband/hw/ocrdma/ocrdma_main.c  |  1 +
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 67 +
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.h |  3 ++
 3 files changed, 71 insertions(+)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 47d2814..2dd6b06 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -295,6 +295,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
 
dev->ibdev.alloc_mr = ocrdma_alloc_mr;
+   dev->ibdev.map_mr_sg = ocrdma_map_mr_sg;
dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list;
dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list;
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index a764cb9..0f32fc4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -2121,6 +2121,59 @@ static int get_encoded_page_size(int pg_sz)
return i;
 }
 
+static int ocrdma_build_fr2(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
+  struct ib_send_wr *wr)
+{
+   u64 fbo;
+   struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
+   struct ocrdma_mr *mr = get_ocrdma_mr(wr->wr.fastreg.mr);
+   struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
+   struct ocrdma_pbe *pbe;
+   u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
+   int num_pbes = 0, i;
+
+   wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
+
+   hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
+   hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
+
+   if (mr->ibmr.access & IB_ACCESS_LOCAL_WRITE)
+   hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
+   if (mr->ibmr.access & IB_ACCESS_REMOTE_WRITE)
+   hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
+   if (mr->ibmr.access & IB_ACCESS_REMOTE_READ)
+   hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
+   hdr->lkey = wr->wr.fastreg.key;
+   hdr->total_len = mr->ibmr.length;
+
+   fbo = mr->ibmr.iova - mr->pl[0];
+
+   fast_reg->va_hi = upper_32_bits(mr->ibmr.iova);
+   fast_reg->va_lo = (u32) (mr->ibmr.iova & 0x);
+   fast_reg->fbo_hi = upper_32_bits(fbo);
+   fast_reg->fbo_lo = (u32) fbo & 0x;
+   fast_reg->num_sges = mr->npages;
+   fast_reg->size_sge = get_encoded_page_size(1 << PAGE_SHIFT);
+
+   pbe = pbl_tbl->va;
+   for (i = 0; i < mr->npages; i++) {
+   u64 buf_addr = mr->pl[i];
+   pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
+   pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
+   num_pbes += 1;
+   pbe++;
+
+   /* if the pbl is full storing the pbes,
+* move to next pbl.
+   */
+   if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) {
+   pbl_tbl++;
+   pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+   }
+   }
+
+   return 0;
+}
 
 static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
   struct ib_send_wr *wr)
@@ -2248,6 +2301,9 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct 
ib_send_wr *wr,
case IB_WR_FAST_REG_MR:
status = ocrdma_build_fr(qp, hdr, wr);
break;
+   case IB_WR_FASTREG_MR:
+   status = ocrdma_build_fr2(qp, hdr, wr);
+   break;
default:
status = -EINVAL;
break;
@@ -3221,3 +3277,14 @@ pbl_err:
kfree(mr);
return ERR_PTR(status);
 }
+
+int ocrdma_map_mr_sg(struct ib_mr *ibmr,
+struct scatterlist *sg,
+unsigned short sg_nents)
+{
+   struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
+
+   return ib_sg_to_pages(sg, sg_nents, mr->hwmr.num_pbes,
+ mr->pl, &mr->npages,
+ &ibmr->length, &ibmr->iova);
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h 
b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index d09ff8e..4c60eec 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -100,6 +100,9 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
  enum ib_mr_type mr_type,
  u32 max_entries,
  u32 flags);
+int ocrdma_map_mr_sg(struct ib_mr *ibmr,
+struct scatterlist *sg,

[PATCH WIP 38/43] iser-target: Port to new memory registration API

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/isert/ib_isert.c | 116 ++--
 drivers/infiniband/ulp/isert/ib_isert.h |   2 -
 2 files changed, 19 insertions(+), 99 deletions(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c 
b/drivers/infiniband/ulp/isert/ib_isert.c
index 94395ce..af1c01d 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -486,10 +486,8 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
list_for_each_entry_safe(fr_desc, tmp,
 &isert_conn->fr_pool, list) {
list_del(&fr_desc->list);
-   ib_free_fast_reg_page_list(fr_desc->data_frpl);
ib_dereg_mr(fr_desc->data_mr);
if (fr_desc->pi_ctx) {
-   ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl);
ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
ib_dereg_mr(fr_desc->pi_ctx->sig_mr);
kfree(fr_desc->pi_ctx);
@@ -517,22 +515,13 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
return -ENOMEM;
}
 
-   pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(device,
-   ISCSI_ISER_SG_TABLESIZE);
-   if (IS_ERR(pi_ctx->prot_frpl)) {
-   isert_err("Failed to allocate prot frpl err=%ld\n",
- PTR_ERR(pi_ctx->prot_frpl));
-   ret = PTR_ERR(pi_ctx->prot_frpl);
-   goto err_pi_ctx;
-   }
-
pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_FAST_REG,
  ISCSI_ISER_SG_TABLESIZE, 0);
if (IS_ERR(pi_ctx->prot_mr)) {
isert_err("Failed to allocate prot frmr err=%ld\n",
  PTR_ERR(pi_ctx->prot_mr));
ret = PTR_ERR(pi_ctx->prot_mr);
-   goto err_prot_frpl;
+   goto err_pi_ctx;
}
desc->ind |= ISERT_PROT_KEY_VALID;
 
@@ -552,8 +541,6 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
 
 err_prot_mr:
ib_dereg_mr(pi_ctx->prot_mr);
-err_prot_frpl:
-   ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
 err_pi_ctx:
kfree(pi_ctx);
 
@@ -564,34 +551,18 @@ static int
 isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
 struct fast_reg_descriptor *fr_desc)
 {
-   int ret;
-
-   fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
-
ISCSI_ISER_SG_TABLESIZE);
-   if (IS_ERR(fr_desc->data_frpl)) {
-   isert_err("Failed to allocate data frpl err=%ld\n",
- PTR_ERR(fr_desc->data_frpl));
-   return PTR_ERR(fr_desc->data_frpl);
-   }
-
fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_FAST_REG,
   ISCSI_ISER_SG_TABLESIZE, 0);
if (IS_ERR(fr_desc->data_mr)) {
isert_err("Failed to allocate data frmr err=%ld\n",
  PTR_ERR(fr_desc->data_mr));
-   ret = PTR_ERR(fr_desc->data_mr);
-   goto err_data_frpl;
+   return PTR_ERR(fr_desc->data_mr);
}
fr_desc->ind |= ISERT_DATA_KEY_VALID;
 
isert_dbg("Created fr_desc %p\n", fr_desc);
 
return 0;
-
-err_data_frpl:
-   ib_free_fast_reg_page_list(fr_desc->data_frpl);
-
-   return ret;
 }
 
 static int
@@ -2521,45 +2492,6 @@ unmap_cmd:
return ret;
 }
 
-static int
-isert_map_fr_pagelist(struct ib_device *ib_dev,
- struct scatterlist *sg_start, int sg_nents, u64 *fr_pl)
-{
-   u64 start_addr, end_addr, page, chunk_start = 0;
-   struct scatterlist *tmp_sg;
-   int i = 0, new_chunk, last_ent, n_pages;
-
-   n_pages = 0;
-   new_chunk = 1;
-   last_ent = sg_nents - 1;
-   for_each_sg(sg_start, tmp_sg, sg_nents, i) {
-   start_addr = ib_sg_dma_address(ib_dev, tmp_sg);
-   if (new_chunk)
-   chunk_start = start_addr;
-   end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg);
-
-   isert_dbg("SGL[%d] dma_addr: 0x%llx len: %u\n",
- i, (unsigned long long)tmp_sg->dma_address,
- tmp_sg->length);
-
-   if ((end_addr & ~PAGE_MASK) && i < last_ent) {
-   new_chunk = 0;
-   continue;
-   }
-   new_chunk = 1;
-
-   page = chunk_start & PAGE_MASK;
-   do {
-   fr_pl[n_pages++] = page;
-   isert_dbg("Mapped page_list[%d] page_addr: 0x%llx\n",
- 

[PATCH WIP 36/43] iser: Port to new fast registration api

2015-07-21 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/iser/iscsi_iser.h  |  6 +
 drivers/infiniband/ulp/iser/iser_memory.c | 40 ---
 drivers/infiniband/ulp/iser/iser_verbs.c  | 16 +
 3 files changed, 17 insertions(+), 45 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h 
b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 6c7efe6..88d0ffc 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -413,7 +413,6 @@ struct iser_device {
  *
  * @mr: memory region
  * @fmr_pool:   pool of fmrs
- * @frpl:   fast reg page list used by frwrs
  * @page_vec:   fast reg page list used by fmr pool
  * @mr_valid:   is mr valid indicator
  */
@@ -422,10 +421,7 @@ struct iser_reg_resources {
struct ib_mr *mr;
struct ib_fmr_pool   *fmr_pool;
};
-   union {
-   struct ib_fast_reg_page_list *frpl;
-   struct iser_page_vec *page_vec;
-   };
+   struct iser_page_vec *page_vec;
u8mr_valid:1;
 };
 
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c 
b/drivers/infiniband/ulp/iser/iser_memory.c
index d6d980b..094cf8a 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -732,19 +732,19 @@ static int iser_fast_reg_mr(struct iscsi_iser_task 
*iser_task,
struct iser_reg_resources *rsc,
struct iser_mem_reg *reg)
 {
-   struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
-   struct iser_device *device = ib_conn->device;
-   struct ib_mr *mr = rsc->mr;
-   struct ib_fast_reg_page_list *frpl = rsc->frpl;
struct iser_tx_desc *tx_desc = &iser_task->desc;
+   struct ib_mr *mr = rsc->mr;
struct ib_send_wr *wr;
-   int offset, size, plen;
-
-   plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
-  &offset, &size);
-   if (plen * SIZE_4K < size) {
-   iser_err("fast reg page_list too short to hold this SG\n");
-   return -EINVAL;
+   int err;
+   int access = IB_ACCESS_LOCAL_WRITE  |
+IB_ACCESS_REMOTE_WRITE |
+IB_ACCESS_REMOTE_READ;
+
+   err = ib_map_mr_sg(mr, mem->sg, mem->size, access);
+   if (err) {
+   iser_err("failed to map sg %p with %d entries\n",
+mem->sg, mem->dma_nents);
+   return err;
}
 
if (!rsc->mr_valid) {
@@ -753,24 +753,14 @@ static int iser_fast_reg_mr(struct iscsi_iser_task 
*iser_task,
}
 
wr = iser_tx_next_wr(tx_desc);
-   wr->opcode = IB_WR_FAST_REG_MR;
-   wr->wr_id = ISER_FASTREG_LI_WRID;
-   wr->send_flags = 0;
-   wr->wr.fast_reg.iova_start = frpl->page_list[0] + offset;
-   wr->wr.fast_reg.page_list = frpl;
-   wr->wr.fast_reg.page_list_len = plen;
-   wr->wr.fast_reg.page_shift = SHIFT_4K;
-   wr->wr.fast_reg.length = size;
-   wr->wr.fast_reg.rkey = mr->rkey;
-   wr->wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
-   IB_ACCESS_REMOTE_WRITE |
-   IB_ACCESS_REMOTE_READ);
+   ib_set_fastreg_wr(mr, mr->rkey, ISER_FASTREG_LI_WRID,
+ false, wr);
rsc->mr_valid = 0;
 
reg->sge.lkey = mr->lkey;
reg->rkey = mr->rkey;
-   reg->sge.addr = frpl->page_list[0] + offset;
-   reg->sge.length = size;
+   reg->sge.addr = mr->iova;
+   reg->sge.length = mr->length;
 
iser_dbg("fast reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
 " length=0x%x\n", reg->sge.lkey, reg->rkey,
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c 
b/drivers/infiniband/ulp/iser/iser_verbs.c
index ecc3265..332f784 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -288,35 +288,21 @@ iser_alloc_reg_res(struct ib_device *ib_device,
 {
int ret;
 
-   res->frpl = ib_alloc_fast_reg_page_list(ib_device, size);
-   if (IS_ERR(res->frpl)) {
-   ret = PTR_ERR(res->frpl);
-   iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
-ret);
-   return PTR_ERR(res->frpl);
-   }
-
res->mr = ib_alloc_mr(pd, IB_MR_TYPE_FAST_REG, size, 0);
if (IS_ERR(res->mr)) {
ret = PTR_ERR(res->mr);
iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
-   goto fast_reg_mr_failure;
+   return

Re: [PATCH WIP 37/43] xprtrdma: Port to new memory registration API

2015-07-22 Thread Sagi Grimberg



+   for (i = 0; i < nsegs;) {
+   sg_set_page(&frmr->sg[i], seg->mr_page,
+   seg->mr_len, offset_in_page(seg->mr_offset));


Cautionary note: here we’re dealing with both the “contiguous
set of pages” case and the “small region of bytes in a single page”
case. See rpcrdma_convert_iovs(): sometimes RPC send or receive
buffers can be registered (RDMA_NOMSG).


I noticed that (I think). I think this is handled correctly.
What exactly is the caution note here?


mr = frmr->fr_mr;
+   access = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+  IB_ACCESS_REMOTE_READ;
+   rc = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, access);


I like this (and the matching ib_dma_unmap_sg). But why wouldn’t
this function be called ib_dma_map_sg() ? The name ib_map_mr_sg()
had me thinking for a moment that this API actually posted the
FASTREG WR, but I see that it doesn’t.


Umm, ib_dma_map_sg is already taken :)

This is what I came up with, it maps the SG elements to the MR
private context.

I'd like to keep the post API for now. It will be possible to
to add a wrapper function that would do:
- dma_map_sg
- ib_map_mr_sg
- init fastreg send_wr
- post_send (maybe)



-   while (seg1->mr_nsegs--)
-   rpcrdma_unmap_one(ia->ri_device, seg++);
+   ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);


->mr_dir was previously set by rpcrdma_map_one(), which you’ve replaced
with ib_map_mr_sg(). So maybe frwr_op_map() needs to save “direction”
in the rpcrdma_frmr.


Yep, that's correct, if I had turned on dma mapping debug it would shout
at me here...

Note, I added in the git repo a patch to allow arbitrary sg lists in
frwr_op_map() which would allow you to skip the holes check... seems to
work with mlx5...

I did noticed the mlx4 gives a protection error with after the 
conversion... I'll look into that...

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 21/43] mlx5: Allocate a private page list in ib_alloc_mr

2015-07-22 Thread Sagi Grimberg

On 7/22/2015 7:46 PM, Christoph Hellwig wrote:

Just curious: what's the tradeoff between allocating the page list
in the core vs duplicating it in all the drivers?  Does the driver
variant give us any benefits?


It's not necessarily a page list... (i.e. a real scatterlist).
I it will make more sense in patch 41/43.

Moreover, as I wrote in the cover-letter. I noticed that several
drivers keep shadows anyway for various reasons. For example mlx4
sets the page list with a preset-bit (related to ODP...) so at
registration time we see the loop:

for (i = 0; i < mr->npages; ++i)
mr->mpl[i] = cpu_to_be64(mr->pl[i] | MLX4_MTT_FLAG_PRESENT);

Given that this not a single example, I'd expect drivers to skip this
duplication (hopefully).

Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 28/43] IB/core: Introduce new fast registration API

2015-07-22 Thread Sagi Grimberg

On 7/22/2015 7:50 PM, Christoph Hellwig wrote:

+/**
+ * ib_map_mr_sg() - Populates MR with a dma mapped SG list
+ * @mr:memory region
+ * @sg:dma mapped scatterlist
+ * @sg_nents:  number of entries in sg
+ * @access:access permissions


I know moving the access flags here was my idea originally, but I seem
convinced by your argument that it might fit in better with the posting
helper.  Or did someone else come up with a better argument that mine
for moving it here?


Not really. I was and still pretty indifferent about it...




+int ib_map_mr_sg(struct ib_mr *mr,
+struct scatterlist *sg,
+unsigned short sg_nents,
+unsigned int access)
+{
+   int rc;
+
+   if (!mr->device->map_mr_sg)
+   return -ENOSYS;
+
+   rc = mr->device->map_mr_sg(mr, sg, sg_nents);


Do we really need a driver callout here?  It seems like we should
just do the map here, and then either have a flag for the mlx5 indirect
mapping, or if you want to keep the abstraction add the method at that
point but make it optional, so that all the other drivers don't need the
boilerplate code.


I commented on this bit in another reply. I think that several drivers
will want to use their own mappings. But I can change that if it's not
the case...



Also it seems like this returns 0/-error.  How do callers like SRP
see that it only did a partial mapping and it needs another MR?


Umm, I think SRP would need to iterate over the sg list and pass partial
SGs to the mapping (I can add a break; statement if we met sg_nents)

It's not perfect, but the idea was not to do backflips here.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 01/43] IB: Modify ib_create_mr API

2015-07-22 Thread Sagi Grimberg

On 7/22/2015 7:44 PM, Christoph Hellwig wrote:

On Wed, Jul 22, 2015 at 10:34:05AM -0600, Jason Gunthorpe wrote:

+/**
+ * ib_alloc_mr() - Allocates a memory region
+ * @pd:protection domain associated with the region
+ * @mr_type:   memory region type
+ * @max_entries:   maximum registration entries available
+ * @flags: create flags
+ */


Can you update this comment to elaborate some more on what the
parameters are? 'max_entries' is the number of s/g elements or
something?


+enum ib_mr_type {
+   IB_MR_TYPE_FAST_REG,
+   IB_MR_TYPE_SIGNATURE,
  };


Sure would be nice to have some documentation for what these things
do..


Agreed on both counts.  Otherwise this looks pretty good to me.


I can add some more documentation here...
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 01/43] IB: Modify ib_create_mr API

2015-07-22 Thread Sagi Grimberg

On 7/22/2015 7:34 PM, Jason Gunthorpe wrote:

+/**
+ * ib_alloc_mr() - Allocates a memory region
+ * @pd:protection domain associated with the region
+ * @mr_type:   memory region type
+ * @max_entries:   maximum registration entries available
+ * @flags: create flags
+ */


Can you update this comment to elaborate some more on what the
parameters are? 'max_entries' is the number of s/g elements or
something?


+enum ib_mr_type {
+   IB_MR_TYPE_FAST_REG,
+   IB_MR_TYPE_SIGNATURE,
  };


Sure would be nice to have some documentation for what these things
do..


Do we want to pull ib_get_dma_mr() here with type IB_MR_TYPE_DMA?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 01/43] IB: Modify ib_create_mr API

2015-07-22 Thread Sagi Grimberg

On 7/22/2015 8:01 PM, Jason Gunthorpe wrote:

On Wed, Jul 22, 2015 at 07:59:16PM +0300, Sagi Grimberg wrote:

Do we want to pull ib_get_dma_mr() here with type IB_MR_TYPE_DMA?


I want to get rid of ib_get_dma_mr...


That's why I asked :)

So I'll take it as a no...
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 02/43] IB/mlx4: Support ib_alloc_mr verb

2015-07-22 Thread Sagi Grimberg

On 7/22/2015 7:58 PM, Jason Gunthorpe wrote:

On Wed, Jul 22, 2015 at 09:55:02AM +0300, Sagi Grimberg wrote:


+struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
+  enum ib_mr_type mr_type,
+  u32 max_entries,
+  u32 flags)
+{


This is just a copy of mlx4_ib_alloc_fast_reg_mr with
this added:


+   if (mr_type != IB_MR_TYPE_FAST_REG || flags)
+   return ERR_PTR(-EINVAL);


Are all the driver updates the same? It looks like it.

I'd suggest shortening this patch series, have the core provide the
wrapper immediately:

struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
{
...

 if (pd->device->alloc_mr) {
mr = pd->device->alloc_mr(pd, mr_type, max_entries, flags);
 } else {
if (mr_type != IB_MR_TYPE_FAST_REG || flags ||
!ib_dev->alloc_fast_reg_mr)
return ERR_PTR(-ENOSYS);
mr = pd->device->alloc_fast_reg_mr(..);
 }
}

Then go through the series to remove ib_alloc_fast_reg_mr

Then go through one series to migrate the drivers from
alloc_fast_reg_mr to alloc_mr

Then entirely drop alloc_fast_reg_mr from the driver API.

That should be shorter and easier to read the driver diffs, which is
the major change here.


Yea, it would be better...

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 39/43] IB/core: Add arbitrary sg_list support

2015-07-22 Thread Sagi Grimberg

On 7/22/2015 8:22 PM, Jason Gunthorpe wrote:

On Wed, Jul 22, 2015 at 09:55:39AM +0300, Sagi Grimberg wrote:

+enum ib_mr_flags {
+   IB_MR_MAP_ARB_SG = 1,
+};


Something about this just seems ugly. We are back to what we were
trying to avoid: Adding more types of MRs..

Is this really necessary? Do you really need to know the MR type when
the MR is created, or can the adaptor change types on the fly during
registration?

iSER for example has a rarely used corner case where it needs this,


I can tell you that its anything but a corner case. direct-io, bio
merges, FS operations and PI are examples where most of the sg lists
*will* be "gappy".

Trust me, it's fairly common to see those...


but it just turns on the feature unconditionally right away. This
incures 2x the overhead in the MR allocations and who knows what
performance impact on the adaptor side.


I ran various workloads with this, and performance seems to sustain.



It would be so much better if it could switch to this mode on a SG by
SG list basis.


It would, but unfortunately it can't.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 38/43] iser-target: Port to new memory registration API

2015-07-22 Thread Sagi Grimberg

On 7/22/2015 8:04 PM, Christoph Hellwig wrote:

@@ -2585,11 +2517,9 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
struct isert_device *device = isert_conn->device;
struct ib_device *ib_dev = device->ib_device;
struct ib_mr *mr;
struct ib_send_wr fr_wr, inv_wr;
struct ib_send_wr *bad_wr, *wr = NULL;
+   int ret;

if (mem->dma_nents == 1) {
sge->lkey = device->mr->lkey;
@@ -2600,40 +2530,32 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
return 0;
}

+   if (ind == ISERT_DATA_KEY_VALID)
/* Registering data buffer */
mr = fr_desc->data_mr;
+   else
/* Registering protection buffer */
mr = fr_desc->pi_ctx->prot_mr;

if (!(fr_desc->ind & ind)) {
isert_inv_rkey(&inv_wr, mr);
wr = &inv_wr;
}

+   ret = ib_map_mr_sg(mr, mem->sg, mem->nents, IB_ACCESS_LOCAL_WRITE);
+   if (ret) {
+   isert_err("failed to map sg %p with %d entries\n",
+mem->sg, mem->dma_nents);
+   return ret;
+   }
+
+   isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
+ fr_desc, mem->nents, mem->offset);
+
/* Prepare FASTREG WR */
memset(&fr_wr, 0, sizeof(fr_wr));
+   ib_set_fastreg_wr(mr, mr->lkey, ISER_FASTREG_LI_WRID,
+ false, &fr_wr);


Shouldn't ib_set_fastreg_wr take care of this memset?  Also it seems
instead of the singalled flag to it we might just set that or
other flags later if we really want to.


The reason I didn't put it in was that ib_send_wr is not a small struct
(92 bytes IIRC). So I'm a bit reluctant to add an unconditional memset.
Maybe it's better that the callers can carefully set it to save some
cycles?




  struct pi_context {
struct ib_mr   *prot_mr;
-   struct ib_fast_reg_page_list   *prot_frpl;
struct ib_mr   *sig_mr;
  };

  struct fast_reg_descriptor {
struct list_headlist;
struct ib_mr   *data_mr;
-   struct ib_fast_reg_page_list   *data_frpl;
u8  ind;
struct pi_context  *pi_ctx;


As a follow on it might be worth to just kill off the separate
pi_context structure here.


Yea we can do that..
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 00/43] New fast registration API

2015-07-22 Thread Sagi Grimberg

On 7/22/2015 8:10 PM, Christoph Hellwig wrote:

Thanks Sagi,

this looks pretty good in general, various nitpicks nonwithstanding.

The one thing I'm curious about is how we can support SRP with it's
multiple MR support without too much boilerplate code.  One option
would be that pass an array of MRs to the map routines, and while
most callers would just pass in one it would handle multiple for those
drivers that supply them.


We can do that, but I'd prefer not to pollute the API just for this
single use case. What we can do, is add a pool API that would take care
of that. But even then we might end up with different strategies as not
all ULPs can use it the same way (protocol constraints)...

Today SRP has this logic that registers multiple SG aligned partials.
We can just have it pass a partial SG list to what we have today instead
of building the page vectors...

Or if we can come up with something that will keep the API trivial, we
can take care of that too.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 01/43] IB: Modify ib_create_mr API

2015-07-23 Thread Sagi Grimberg

On 7/22/2015 10:05 PM, Jason Gunthorpe wrote:

On Wed, Jul 22, 2015 at 07:58:23PM +0300, Sagi Grimberg wrote:

On 7/22/2015 7:44 PM, Christoph Hellwig wrote:

On Wed, Jul 22, 2015 at 10:34:05AM -0600, Jason Gunthorpe wrote:

+/**
+ * ib_alloc_mr() - Allocates a memory region
+ * @pd:protection domain associated with the region
+ * @mr_type:   memory region type
+ * @max_entries:   maximum registration entries available
+ * @flags: create flags
+ */


Can you update this comment to elaborate some more on what the
parameters are? 'max_entries' is the number of s/g elements or
something?


+enum ib_mr_type {
+   IB_MR_TYPE_FAST_REG,
+   IB_MR_TYPE_SIGNATURE,
  };


Sure would be nice to have some documentation for what these things
do..


Agreed on both counts.  Otherwise this looks pretty good to me.


I can add some more documentation here...


So, I was wrong, 'max_entries' is the number of page entires, not
really the s/g element limit?


The max_entries stands for the maximum number of sg entries. Other than
that, the SG list must meet the requirements documented in ib_map_mr_sg.

The reason I named max_entries is because might might not be pages but
real SG elements. It stands for maximum registration entries.

Do you have a better name?



In other words the ULP can submit at most max_entires*PAGE_SIZE bytes
for the non ARB_SG case

For the ARB_SG case.. It is some other more difficult computation?


Not really. The ULP needs to submit sg_nents < max_entries. The SG
list needs to meed the alignment requirements.

For ARB_SG, the condition is the same, but the SG is free from the
alignment constraints.



It is somewhat ugly to ask for this upfront as a hard limit..

Is there any reason we can't use a hint_prealloc_pages as the argument
here, and then realloc in the map routine if the hint turns out to be
too small for a particular s/g list?


The reason is that it is not possible. The memory key allocation
reserves resources in the device translation tables. realloc means
reallocating the memory key. In any event, this is not possible in
the IO path.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 01/43] IB: Modify ib_create_mr API

2015-07-23 Thread Sagi Grimberg

On 7/23/2015 12:30 PM, Christoph Hellwig wrote:

On Thu, Jul 23, 2015 at 12:57:34AM +, Hefty, Sean wrote:

+enum ib_mr_type {
+   IB_MR_TYPE_FAST_REG,
+   IB_MR_TYPE_SIGNATURE,


If we're going to go through the trouble of changing everything, I vote
for dropping the word 'fast'. It's a marketing term.  It's goofy.  And
the IB spec is goofy for using it.


So IB_MR_TYPE_MEM_REG?



Yes.  Especially as the infrastructure will be usable to support FMR
on legacy adapters as well except that instead of the ib_post_send it'll
need a call to the FMR code at the very end.

While we're at it  wonder if we should consolidate the type and the
flags field as well, as the split between the two is a little confusing.


I can do that.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 02/43] IB/mlx4: Support ib_alloc_mr verb

2015-07-23 Thread Sagi Grimberg

On 7/22/2015 9:54 PM, Jason Gunthorpe wrote:

On Wed, Jul 22, 2015 at 01:50:01PM -0500, Steve Wise wrote:


43 patches overflows my stack ;)  I agree with Jason's suggestion.


Saig, you may as well just send the ib_alloc_mr rework as a series and
get it done with, I'd pass off on the core parts of v2.


I'll split that off from the rest.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 28/43] IB/core: Introduce new fast registration API

2015-07-23 Thread Sagi Grimberg

On 7/22/2015 8:44 PM, Jason Gunthorpe wrote:

On Wed, Jul 22, 2015 at 09:50:12AM -0700, Christoph Hellwig wrote:

+/**
+ * ib_map_mr_sg() - Populates MR with a dma mapped SG list
+ * @mr:memory region
+ * @sg:dma mapped scatterlist
+ * @sg_nents:  number of entries in sg
+ * @access:access permissions


I know moving the access flags here was my idea originally, but I seem
convinced by your argument that it might fit in better with the posting
helper.  Or did someone else come up with a better argument that mine
for moving it here?


I was hoping we'd move the DMA flush and translate into here and make
it mandatory. Is there any reason not to do that?


The reason I didn't added it in was so the ULPs can make sure they meet
the restrictions of ib_map_mr_sg(). Allow SRP to iterate on his
SG list set partials and iSER to detect gaps (they need to dma map
for that).




+int ib_map_mr_sg(struct ib_mr *mr,
+struct scatterlist *sg,
+unsigned short sg_nents,
+unsigned int access)
+{
+   int rc;
+
+   if (!mr->device->map_mr_sg)
+   return -ENOSYS;
+
+   rc = mr->device->map_mr_sg(mr, sg, sg_nents);


Do we really need a driver callout here?  It seems like we should


The call out makes sense to me..

The driver will convert the scatter list directly into whatever HW
representation it needs and prepare everything for posting. Every
driver has a different HW format, so it must be a callout.


Also it seems like this returns 0/-error.  How do callers like SRP
see that it only did a partial mapping and it needs another MR?


I would think it is an error to pass in more sg_nents than the MR was
created with, so SRP should never get a partial mapping as it should
never ask for more than max_entries.

(? Sagi, did I get the intent of this right?)


Error is returned when:
- sg_nents > max_entries
- sg has gaps
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 28/43] IB/core: Introduce new fast registration API

2015-07-23 Thread Sagi Grimberg

On 7/22/2015 9:02 PM, Jason Gunthorpe wrote:

On Wed, Jul 22, 2015 at 09:55:28AM +0300, Sagi Grimberg wrote:

+/**
+ * ib_map_mr_sg() - Populates MR with a dma mapped SG list
+ * @mr:memory region
+ * @sg:dma mapped scatterlist
+ * @sg_nents:  number of entries in sg
+ * @access:access permissions


Again, related to my prior comments, please have two of these:

ib_map_mr_sg_rkey()
ib_map_mr_sg_lkey()

So we force ULPs to think about what they are doing properly, and we
get a chance to actually force lkey to be local use only for IB.


The lkey/rkey decision is passed in the fastreg post_send().

ib_map_mr_sg is just a mapping API, not the registration itself.




+static inline void
+ib_set_fastreg_wr(struct ib_mr *mr,
+ u32 key,


The key should come from MR. Once the above is split then it is
obvious which key to use.


IMO, it's obvious as it is. I don't see why should anyone get it
wrong.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 37/43] xprtrdma: Port to new memory registration API

2015-07-23 Thread Sagi Grimberg

On 7/22/2015 10:21 PM, Steve Wise wrote:


On 7/22/2015 1:55 AM, Sagi Grimberg wrote:

Signed-off-by: Sagi Grimberg 
---
  net/sunrpc/xprtrdma/frwr_ops.c  | 80
++---
  net/sunrpc/xprtrdma/xprt_rdma.h |  4 ++-
  2 files changed, 47 insertions(+), 37 deletions(-)


Did you intend to change svcrdma as well?


All the ULPs need to convert. I didn't have a chance to convert
svcrdma yet. Want to take it?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 38/43] iser-target: Port to new memory registration API

2015-07-23 Thread Sagi Grimberg

On 7/22/2015 8:57 PM, Jason Gunthorpe wrote:

On Wed, Jul 22, 2015 at 08:33:16PM +0300, Sagi Grimberg wrote:

memset(&fr_wr, 0, sizeof(fr_wr));
+   ib_set_fastreg_wr(mr, mr->lkey, ISER_FASTREG_LI_WRID,
+ false, &fr_wr);


Shouldn't ib_set_fastreg_wr take care of this memset?  Also it seems
instead of the singalled flag to it we might just set that or
other flags later if we really want to.


Seems reasonable.

If you want to micro optimize then just zero the few items that are
defined to be accessed for fastreg, no need to zero the whole
structure. Infact, you may have already done that, so just drop the
memset entirely.


I will.




The reason I didn't put it in was that ib_send_wr is not a small struct
(92 bytes IIRC). So I'm a bit reluctant to add an unconditional memset.
Maybe it's better that the callers can carefully set it to save some
cycles?


If you want to optimize this path, then Sean is right, move the post
into the driver and stop pretending that ib_post_send is a performance
API.

ib_post_fastreg_wr would be a function that needs 3 register passed
arguments and does a simple copy to the driver's actual sendq


That will require to take the SQ lock and write a doorbell for each
registration and post you want to do. I'm confident that constructing
a post chain with a single sq lock acquire and a single doorbell will
be much much better even with conditional jumps and memsets.

svcrdma, isert (and iser - not upstream yet) are doing it. I think that
others should do it too. My tests shows that this makes a difference in
small IO workloads.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 40/43] mlx5: Allocate private context for arbitrary scatterlist registration

2015-07-23 Thread Sagi Grimberg

On 7/23/2015 12:25 PM, Christoph Hellwig wrote:

On Wed, Jul 22, 2015 at 11:30:48AM -0600, Jason Gunthorpe wrote:

On Wed, Jul 22, 2015 at 09:55:40AM +0300, Sagi Grimberg wrote:

+   size += max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+   mr->klms = kzalloc(size, GFP_KERNEL);
+   if (!mr->klms)
+   return -ENOMEM;
+
+   mr->pl_map = dma_map_single(device->dma_device, mr->klms,
+   size, DMA_TO_DEVICE);


This is a misuse of the DMA API, you must call dma_map_single after
the memory is set by the CPU, not before.

The fast reg varient is using coherent allocations, which is OK..


It's fine as long as you dma_sync_*_for_{cpu,device} in the right
places, which is what a lot of drivers do for longer held allocations.


OK. I'll fix that.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 00/43] New fast registration API

2015-07-23 Thread Sagi Grimberg

On 7/23/2015 12:28 PM, Christoph Hellwig wrote:

On Wed, Jul 22, 2015 at 08:42:32PM +0300, Sagi Grimberg wrote:

We can do that, but I'd prefer not to pollute the API just for this
single use case. What we can do, is add a pool API that would take care
of that. But even then we might end up with different strategies as not
all ULPs can use it the same way (protocol constraints)...

Today SRP has this logic that registers multiple SG aligned partials.
We can just have it pass a partial SG list to what we have today instead
of building the page vectors...

Or if we can come up with something that will keep the API trivial, we
can take care of that too.



Supporting an array or list of MRs seems pretty easy.


I'm missing the simplicity here...


If you ignore the
weird fallback to physical DMA case when a MR fails case the SRP memory
registration code isn't significanly more complex than that in iSER for
example.  And I think NFS needs the same support as well, as it allows
using additional MRs when detecting a gap.



This kinda changing the semantics a bit. With this we need to return a
value of how many MRs used to register. It will also make it a bit
sloppy as the actual mapping is driven from the drivers (which use their
internal buffers).

Don't you think that a separate pool API is better for addressing this?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 37/43] xprtrdma: Port to new memory registration API

2015-07-23 Thread Sagi Grimberg

On 7/22/2015 7:04 PM, Chuck Lever wrote:


On Jul 22, 2015, at 11:41 AM, Sagi Grimberg  wrote:




+   for (i = 0; i < nsegs;) {
+   sg_set_page(&frmr->sg[i], seg->mr_page,
+   seg->mr_len, offset_in_page(seg->mr_offset));


Cautionary note: here we’re dealing with both the “contiguous
set of pages” case and the “small region of bytes in a single page”
case. See rpcrdma_convert_iovs(): sometimes RPC send or receive
buffers can be registered (RDMA_NOMSG).


I noticed that (I think). I think this is handled correctly.
What exactly is the caution note here?


Well the sg is turned into a page list below your API. Just
want to make sure that we have tested your xprtrdma alterations
with all the ULP possibilities. When you are further along I
can pull this and run my functional tests.



mr = frmr->fr_mr;
+   access = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+  IB_ACCESS_REMOTE_READ;
+   rc = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, access);


I like this (and the matching ib_dma_unmap_sg). But why wouldn’t
this function be called ib_dma_map_sg() ? The name ib_map_mr_sg()
had me thinking for a moment that this API actually posted the
FASTREG WR, but I see that it doesn’t.


Umm, ib_dma_map_sg is already taken :)

This is what I came up with, it maps the SG elements to the MR
private context.

I'd like to keep the post API for now. It will be possible to
to add a wrapper function that would do:
- dma_map_sg
- ib_map_mr_sg
- init fastreg send_wr
- post_send (maybe)


Where xprtrdma might improve is by setting up all the FASTREG
WRs for one RPC with a single chain and post_send. We could do
that with your INDIR_MR concept, for example.


BTW, it would be great if you can play with it a little bit. I'm more
confident with the iSER part... I added two small fixes when I tested
with mlx4. It seems to work...





-   while (seg1->mr_nsegs--)
-   rpcrdma_unmap_one(ia->ri_device, seg++);
+   ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);


->mr_dir was previously set by rpcrdma_map_one(), which you’ve replaced
with ib_map_mr_sg(). So maybe frwr_op_map() needs to save “direction”
in the rpcrdma_frmr.


Yep, that's correct, if I had turned on dma mapping debug it would shout
at me here...

Note, I added in the git repo a patch to allow arbitrary sg lists in
frwr_op_map() which would allow you to skip the holes check... seems to
work with mlx5...

I did noticed the mlx4 gives a protection error with after the conversion... 
I'll look into that...


Should also get Steve and Devesh to try this with their adapters.


Ah, yes please. I've only compiled tested drivers other than mlx4, mlx5
which means there is a 99.9% (probably 100%) that it doesn't work.

It would be great to get help on porting the rest of the ULPs as well,
but that can wait until we converge on the API...
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/10] IB/core: Guarantee that a local_dma_lkey is available

2015-07-23 Thread Sagi Grimberg

On 7/23/2015 2:34 AM, Jason Gunthorpe wrote:

Every single ULP requires a local_dma_lkey to do anything with
a QP, so let us ensure one exists for every PD created.

If the driver can supply a global local_dma_lkey then use that, otherwise
ask the driver to create a local use all physical memory MR associated
with the new PD.

Signed-off-by: Jason Gunthorpe 
Reviewed-by: Sagi Grimberg 
Acked-by: Christoph Hellwig 
---
  drivers/infiniband/core/verbs.c | 40 +++-
  include/rdma/ib_verbs.h |  2 ++
  2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index bac3fb406a74..1ddf06314f36 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -213,24 +213,54 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);

  /* Protection domains */

+/* Return a pd for in-kernel use that has a local_dma_lkey which provides
+   local access to all physical memory. */


Why not kdoc style? we need to move the ib_verbs.h kdocs here anyway.
Might be a good chance to do that for ib_alloc_pd().


  struct ib_pd *ib_alloc_pd(struct ib_device *device)
  {
struct ib_pd *pd;
+   struct ib_device_attr devattr;
+   int rc;
+
+   rc = ib_query_device(device, &devattr);
+   if (rc)
+   return ERR_PTR(rc);

pd = device->alloc_pd(device, NULL, NULL);
+   if (IS_ERR(pd))
+   return pd;
+
+   pd->device = device;
+   pd->uobject = NULL;
+   pd->local_mr = NULL;
+   atomic_set(&pd->usecnt, 0);
+
+   if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+   pd->local_dma_lkey = device->local_dma_lkey;
+   else {
+   struct ib_mr *mr;
+
+   mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
+   if (IS_ERR(mr)) {
+   ib_dealloc_pd(pd);
+   return (struct ib_pd *)mr;
+   }

-   if (!IS_ERR(pd)) {
-   pd->device  = device;
-   pd->uobject = NULL;
-   atomic_set(&pd->usecnt, 0);
+   pd->local_mr = mr;
+   pd->local_dma_lkey = pd->local_mr->lkey;
}
-
return pd;
  }
+
  EXPORT_SYMBOL(ib_alloc_pd);


You have an extra newline here.



  int ib_dealloc_pd(struct ib_pd *pd)
  {
+   if (pd->local_mr) {
+   if (ib_dereg_mr(pd->local_mr))
+   return -EBUSY;
+   pd->local_mr = NULL;
+   }
+
if (atomic_read(&pd->usecnt))
return -EBUSY;

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 986fddb08579..cfda95d7b067 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1255,6 +1255,8 @@ struct ib_pd {
struct ib_device   *device;
struct ib_uobject  *uobject;
atomic_tusecnt; /* count all resources */
+   struct ib_mr   *local_mr;
+   u32 local_dma_lkey;


Maybe its better to place the local_dma_lkey in the first cacheline as
it is normally accessed in the hot path?

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 06/10] IB/iser: Use pd->local_dma_lkey

2015-07-23 Thread Sagi Grimberg

On 7/23/2015 2:34 AM, Jason Gunthorpe wrote:

Replace all leys with  pd->local_dma_lkey. This driver does not support
iWarp, so this is safe.

The insecure use of ib_get_dma_mr is thus isolated to an rkey, and this
looks trivially fixed by forcing the use of registration in a future
patch.

Signed-off-by: Jason Gunthorpe 


Looks good.

Reviewed-by: Sagi Grimberg 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 08/10] IB/srp: Use pd->local_dma_lkey

2015-07-23 Thread Sagi Grimberg

On 7/23/2015 2:34 AM, Jason Gunthorpe wrote:

Replace all leys with  pd->local_dma_lkey. This driver does not support
iWarp, so this is safe.

The insecure use of ib_get_dma_mr is thus isolated to an rkey, and will
have to be fixed separately.

Signed-off-by: Jason Gunthorpe 


Looks good.

Reviewed-by: Sagi Grimberg 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 07/10] iser-target: Remove ib_get_dma_mr calls

2015-07-23 Thread Sagi Grimberg

On 7/23/2015 2:34 AM, Jason Gunthorpe wrote:

The pd now has a local_dma_lkey member which completely replaces
ib_get_dma_mr, use it instead.

Signed-off-by: Jason Gunthorpe 


Looks good.

Reviewed-by: Sagi Grimberg 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 09/10] ib_srpt: Remove ib_get_dma_mr calls

2015-07-23 Thread Sagi Grimberg

On 7/23/2015 2:34 AM, Jason Gunthorpe wrote:

The pd now has a local_dma_lkey member which completely replaces
ib_get_dma_mr, use it instead.

Signed-off-by: Jason Gunthorpe 


Looks good.

Reviewed-by: Sagi Grimberg 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/10] IB: Replace safe uses for ib_get_dma_mr with pd->local_dma_lkey

2015-07-23 Thread Sagi Grimberg



Sagi, IB/iser should have special attention paid, as it is less clear to me if
it got everything.


It looks fine. I'll pull those as well.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 28/43] IB/core: Introduce new fast registration API

2015-07-23 Thread Sagi Grimberg

On 7/23/2015 7:14 PM, Jason Gunthorpe wrote:

On Thu, Jul 23, 2015 at 01:19:16PM +0300, Sagi Grimberg wrote:

Again, related to my prior comments, please have two of these:

ib_map_mr_sg_rkey()
ib_map_mr_sg_lkey()

So we force ULPs to think about what they are doing properly, and we
get a chance to actually force lkey to be local use only for IB.


The lkey/rkey decision is passed in the fastreg post_send().


That is too late to check the access flags.


Why? the access permissions are kept in the mr context?
I can move it to the post interface if it makes more sense.
the access is kind of out of place in the mapping routine anyway...

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 38/43] iser-target: Port to new memory registration API

2015-07-23 Thread Sagi Grimberg

On 7/23/2015 7:31 PM, Jason Gunthorpe wrote:

On Thu, Jul 23, 2015 at 01:27:23PM +0300, Sagi Grimberg wrote:

ib_post_fastreg_wr would be a function that needs 3 register passed
arguments and does a simple copy to the driver's actual sendq


That will require to take the SQ lock and write a doorbell for each
registration and post you want to do. I'm confident that constructing
a post chain with a single sq lock acquire and a single doorbell will
be much much better even with conditional jumps and memsets.


You are still thinking at a micro level, the ULP should be working at
a higher level and requesting the MR(s) and the actual work together
so the driver can run the the whole chain of posts without extra stack
traffic, locking or doorbells.


But I'd also want to chain the subsequent RDMA(s) or SEND (with the
rkey(s) under the same post.

I'm sorry but the idea of handling memory region mapping (possibly more
than one), detecting gaps and deciding on the strategy of what to do
and who knows what else under the send queue lock doesn't seem like a
good idea, its a complete overkill IMO.

I don't mean to be negative about your ideas, I just don't think that
doing all the work in the drivers is going to get us to a better place.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/10] IB: Replace safe uses for ib_get_dma_mr with pd->local_dma_lkey

2015-07-26 Thread Sagi Grimberg



If we want security by default then I propose not only to change the default
value of register_always from false into true but also to change the default
value of prefer_fr from false into true such that fast registration becomes
the default instead of FMR.


Yes, I was frowning at that stuff too.. We are trying to get rid of
FMR, so nothing should prefer it over FRWR...

Sagi, perhaps that belongs in your MR unification series?


I don't see how this fits in.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 01/43] IB: Modify ib_create_mr API

2015-07-26 Thread Sagi Grimberg

On 7/23/2015 10:08 PM, Jason Gunthorpe wrote:

On Thu, Jul 23, 2015 at 01:07:56PM +0300, Sagi Grimberg wrote:

On 7/22/2015 10:05 PM, Jason Gunthorpe wrote:
The reason I named max_entries is because might might not be pages but
real SG elements. It stands for maximum registration entries.

Do you have a better name?


I wouldn't try and be both..

Use 'max_num_sg' and document that no aggregate scatterlist with
length larger than 'max_num_sg*PAGE_SIZE' or with more entries than
max_num_sg can be submitted?

Maybe document with ARB_SG that it is not length limited?


OK, I can do that.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 28/43] IB/core: Introduce new fast registration API

2015-07-26 Thread Sagi Grimberg


I would like to see the kdoc for ib_map_mr_sg explain exactly what is
required of the caller, maybe just hoist this bit from the
ib_sg_to_pages


I'll add the kdoc.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 28/43] IB/core: Introduce new fast registration API

2015-07-26 Thread Sagi Grimberg

On 7/23/2015 8:55 PM, Jason Gunthorpe wrote:

On Thu, Jul 23, 2015 at 01:15:16PM +0300, Sagi Grimberg wrote:

I was hoping we'd move the DMA flush and translate into here and make
it mandatory. Is there any reason not to do that?


The reason I didn't added it in was so the ULPs can make sure they meet
the restrictions of ib_map_mr_sg(). Allow SRP to iterate on his
SG list set partials and iSER to detect gaps (they need to dma map
for that).


The ULP can always get the sg list's virtual address to check for
gaps. Page aligned gaps are always OK.


I guess I can pull DMA mapping in there, but we will need an opposite
routine ib_umap_mr_sg() since it'll be weird if the ULP will do dma
unmap without doing the map...



BTW, the logic in ib_sg_to_pages should be checking that directly, as
coded, it won't work with swiotlb:

// Only the first SG entry can start unaligned
if (i && page_addr != dma_addr)
 return EINVAL;
// Only the last SG entry can end unaligned
if ((page_addr + dma_len) & PAGE_MASK != end_dma_addr)
  if (!is_last)
  return EINVAL;

Don't use sg->offset after dma mapping.

The biggest problem with checking the virtual address is
swiotlb. However, if swiotlb is used this API is basically broken as
swiotlb downgrades everything to a 2k alignment, which means we only
ever get 1 s/g entry.


Can you explain what do you mean by "downgrades everything to a 2k 
alignment"? If the ULP is responsible for a PAGE_SIZE alignment than

how would this get out of alignment with swiotlb?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 28/43] IB/core: Introduce new fast registration API

2015-07-26 Thread Sagi Grimberg

On 7/23/2015 9:51 PM, Jason Gunthorpe wrote:

On Thu, Jul 23, 2015 at 07:47:14PM +0300, Sagi Grimberg wrote:


So we force ULPs to think about what they are doing properly, and we
get a chance to actually force lkey to be local use only for IB.


The lkey/rkey decision is passed in the fastreg post_send().


That is too late to check the access flags.


Why? the access permissions are kept in the mr context?


Sure, one could do if (key == mr->lkey) .. check lkey flags in the
post, but that seems silly considering we want the post inlined..


Why should we check the lkey/rkey access flags in the post?




I can move it to the post interface if it makes more sense.
the access is kind of out of place in the mapping routine anyway...


All the dma routines have an access equivalent during map, I don't
think it is out of place..

To my mind, the map is the point where the MR should crystallize into
an rkey or lkey MR, not at the post.


I'm not sure I understand why the lkey/rkey should be set at the map
routine. To me, it seems more natural to map_mr_sg and then either
register the lkey or the rkey.

It's easy enough to move the key arg to ib_map_mr_sg, but I don't see a
good reason why at the moment.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V6 1/9] RDMA/iser: Limit sg tablesize and max_sectors to device fastreg max depth

2015-07-26 Thread Sagi Grimberg

On 7/24/2015 9:40 PM, Steve Wise wrote:




-Original Message-
From: Jason Gunthorpe [mailto:jguntho...@obsidianresearch.com]
Sent: Friday, July 24, 2015 11:41 AM
To: Steve Wise
Cc: dledf...@redhat.com; infinip...@intel.com; sa...@mellanox.com; 
ogerl...@mellanox.com; r...@mellanox.com; linux-
r...@vger.kernel.org; e...@mellanox.com; target-de...@vger.kernel.org; 
linux-...@vger.kernel.org; bfie...@fieldses.org
Subject: Re: [PATCH V6 1/9] RDMA/iser: Limit sg tablesize and max_sectors to 
device fastreg max depth

On Fri, Jul 24, 2015 at 11:18:21AM -0500, Steve Wise wrote:

Currently the sg tablesize, which dictates fast register page list
depth to use, does not take into account the limits of the rdma device.
So adjust it once we discover the device fastreg max depth limit.  Also
adjust the max_sectors based on the resulting sg tablesize.


Huh. How does this relate to the max_page_list_len argument:

  struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)

Shouldn't max_fast_reg_page_list_len be checked during the above?

Ie does this still make sense:

drivers/infiniband/ulp/iser/iser_verbs.c:   desc->data_mr = 
ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);

?

The only ULP that checks this is SRP, so basically, all our ULPs are
probably quietly broken? cxgb3 has a limit of 10 (!?!?!!)



Yea seems like some drivers need to enforce this in ib_alloc_fast_reg_mr() as 
well as ib_alloc_fast_reg_page_list(), and ULPs need
to not exceed the device max.

I will fix iser to limit the mr and page_list allocation based on the device 
max.


Steve, I have a patch that addresses this in the pipe.
The patch is support for up to 8MB transfer size for 4.3 (hopefully).
So no need for you to tackle this.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V6 1/9] RDMA/iser: Limit sg tablesize and max_sectors to device fastreg max depth

2015-07-26 Thread Sagi Grimberg

On 7/24/2015 10:14 PM, Jason Gunthorpe wrote:

On Fri, Jul 24, 2015 at 01:40:17PM -0500, Steve Wise wrote:

Huh. How does this relate to the max_page_list_len argument:

  struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)

Shouldn't max_fast_reg_page_list_len be checked during the above?

Ie does this still make sense:

drivers/infiniband/ulp/iser/iser_verbs.c:   desc->data_mr = 
ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);

?

The only ULP that checks this is SRP, so basically, all our ULPs are
probably quietly broken? cxgb3 has a limit of 10 (!?!?!!)



Yea seems like some drivers need to enforce this in ib_alloc_fast_reg_mr() as 
well as ib_alloc_fast_reg_page_list(), and ULPs need
to not exceed the device max.


Great, Sagi, can you incorporate that in your series so that
ib_alloc_mr's max_entires is checked against
max_fast_reg_page_list_len and EINVAL's if it is too great?


Yes. I'll take care of that.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V6 4/9] svcrdma: Use max_sge_rd for destination read depths

2015-07-26 Thread Sagi Grimberg



@@ -1059,6 +1062,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt 
*xprt)
ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
   route.addr.dst_addr)->sin_port),
newxprt->sc_max_sge,
+   newxprt->sc_max_sge_rd,
newxprt->sc_sq_depth,
newxprt->sc_max_requests,
newxprt->sc_ord);



With the above patch change, we have no more users of the recently created 
rdma_cap_read_multi_sge().  Should I add a patch to remove it?


Yes please.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V6 6/9] isert: Rename IO functions to more descriptive names

2015-07-26 Thread Sagi Grimberg

On 7/24/2015 7:18 PM, Steve Wise wrote:

This is in preparation for adding new FRMR-only IO handlers
for devices that support FRMR and not PI.


Steve,

I've given this some thought and I think we should avoid splitting
logic from PI and iWARP. The reason (other than code duplication) is
that currently the iser target support only up to 1MB IOs. I have some
code (not done yet) to support larger IOs by using multiple
registrations  per IO (with or without PI).
With a little tweaking I think we can get iwarp to fit in too...

So, do you mind if I take a crack at it?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V6 9/9] isert: Support iWARP transports using FRMRs

2015-07-26 Thread Sagi Grimberg

On 7/24/2015 10:24 PM, Jason Gunthorpe wrote:

On Fri, Jul 24, 2015 at 01:48:09PM -0500, Steve Wise wrote:

The use of FRWR for RDMA READ should be iWarp specific, IB shouldn't
pay that overhead. I am expecting to see a cap_rdma_read_rkey or
something in here ?


Ok.  But cap_rdma_read_rkey() doesn't really describe the
requirement.  The requirement is rkey + REMOTE_WRITE.  So it is more
like rdma_cap_read_requires_remote_write() which is ugly and too
long (but descriptive)...


I don't care much what name you pick, just jam something like this in
the description


I think we can just do if (signature || iwarp) use fastreg else
use local_dma_lkey.



  If set then RDMA_READ must be performed by mapping the local
  buffers through a rkey MR with ACCESS_REMOTE_WRITE enabled.
  The rkey of this MR should be passed in as the sg_lists's lkey for
  IB_WR_RDMA_READ_WITH_INV.


I think this would be an incremental patch and not as part of iwarp
support.

Question though, wouldn't it be better to do a single RDMA_READ to say
4 registered keys rather than RDMA_READ_WITH_INV for each?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V6 6/9] isert: Rename IO functions to more descriptive names

2015-07-26 Thread Sagi Grimberg

On 7/26/2015 1:43 PM, Christoph Hellwig wrote:

On Sun, Jul 26, 2015 at 01:08:16PM +0300, Sagi Grimberg wrote:

I've given this some thought and I think we should avoid splitting
logic from PI and iWARP. The reason (other than code duplication) is
that currently the iser target support only up to 1MB IOs. I have some
code (not done yet) to support larger IOs by using multiple
registrations  per IO (with or without PI).


Just curious: How is this going to work with iSER only having a single
rkey/offset/len field?



Good question,

On the wire iser sends a single rkey, but the target is allowed to
transfer the data however it wants to.

Say that the local target HCA supports only 32 pages (128K bytes for 4K
pages) registration and the initiator sent:
rkey=0x1234
address=0x
length=512K

The target would allocate a 512K buffer and:
register offset 0-128K to lkey=0x1
register offset 128K-256K to lkey=0x2
register offset 256K-384K to lkey=0x3
register offset 384K-512K to lkey=0x4

then constructs sg_list as:
sg_list[0] = {addr=buf, length=128K, lkey=0x1}
sg_list[1] = {addr=buf+128K, length=128K, lkey=0x2}
sg_list[2] = {addr=buf+256K, length=128K, lkey=0x3}
sg_list[3] = {addr=buf+384K, length=128K, lkey=0x4}

Then set rdma_read wr with:
rdma_r_wr.sg_list=&sg_list
rdma_r_wr.rdma.addr=0x
rdma_r_wr.rdma.rkey=0x1234

post_send(rdma_r_wr);

Ideally, the post contains a chain of all 4 registrations and the
rdma_read (and an opportunistic good scsi response).
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] mlx5: Expose correct page_size_cap in device attributes

2015-07-26 Thread Sagi Grimberg

On 7/24/2015 12:48 AM, Jason Gunthorpe wrote:

On Thu, Jul 23, 2015 at 05:41:38PM -0400, Doug Ledford wrote:


I assume this prevents the driver from working at all on certain arches
(like ppc with 64k page size)?


Nothing uses page_size_cap correctly, so it has no impact.

Sagi, that is a good point, your generic code for the cleanup series
really should check that PAGE_SIZE is in page_size_cap and at least
fail the mr allocation if it isn't...


Yea, that's doable...
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V6 6/9] isert: Rename IO functions to more descriptive names

2015-07-26 Thread Sagi Grimberg

On 7/26/2015 6:53 PM, Christoph Hellwig wrote:

On Sun, Jul 26, 2015 at 02:00:51PM +0300, Sagi Grimberg wrote:

On the wire iser sends a single rkey, but the target is allowed to
transfer the data however it wants to.


So you're trying to get above the limit of a single RDMA READ, not
above the limit for memory registration in the initiator?


Correct.


 In that case your explanation makes sense, that's just not what I expected
to be the limiting factor.



In the initiator case, there is no way to support transfer size that
exceeds the device registration length capabilities (unless we start
using higher-order atomic allocations which we won't).
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V6 6/9] isert: Rename IO functions to more descriptive names

2015-07-26 Thread Sagi Grimberg



Ideally, the post contains a chain of all 4 registrations and the
rdma_read (and an opportunistic good scsi response).


Just to be clear: This example is for IB only, correct?  IW would
require rkeys with REMOTE_WRITE and 4 read wrs.


My assumption is that it would depend on max_sge_rd.

IB only? iWARP by definition isn't capable of doing rdma_read to
more than one scatter? Anyway, we'll need to calculate the number
of RDMA_READs.


And you're ignoring invalidation wrs (or read-with-inv) in the example...


Yes, didn't want to inflate the example too much...
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Bug: Discover more than 1 iSER gives -- isert: isert_handle_wc: wr... status {9,1} vend_err {8a,d7) -- & -- conn error (1011)

2015-07-28 Thread Sagi Grimberg

On 7/28/2015 5:25 AM, james harvey wrote:

Two up to date arch systems.  Kernel 4.1.2 (Arch -2).

2 Mellanox MT25418 [ConnectX VPI PCIe 2.0 2.5GT/s - IB DDR / 10GigE]
(rev a0) running mlx4_core driver v2.2-1 (Feb, 2014.)  Both on most
recent firmware for PSID MT_04A0110002, FW Version 2.9.1000.  Systems
directly connected, no switches.  InfiniBand otherwise works great,
through VERY extensive testing.

Running OpenFabrics most recent releases of everything (release
versions, not git versions.)

Open-iscsi 2.0_873-6.

targetcli-fb 2.1.fb41-1, python-rtslib-fb 2.1.fb57-1, and
python-configshell-fb 1.1.fb18-1.



I can't discover more than 1 iSER device working at a time.  Using
IPoIB lets me discover as many as I want.


Hi James,

This is compatibility issue with open-iscsi user-space tools.
Since kernel 3.13 iser is capable of doing discovery over RDMA, but that
required a user-space tools adjustment that is missing in your
open-iscsi.

Are you using the open-iscsi package provided with your distro? (which 
is)? Given that you are using an upgraded kernel, in this case, you need

to upgrade your open-iscsi as well.

I think it would be best to just use the upstream version:
g...@github.com:mikechristie/open-iscsi.git

Cheers,
Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] xprtrdma: take vendor driver refcount at client

2015-07-28 Thread Sagi Grimberg

On 7/28/2015 2:01 AM, Devesh Sharma wrote:

Thanks Chuck Lever for the valuable feedback and suggestions.

This is a rework of the following patch sent almost a year back:
http://www.mail-archive.com/linux-rdma%40vger.kernel.org/msg20730.html

In presence of active mount if someone tries to rmmod vendor-driver, the
command remains stuck forever waiting for destruction of all rdma-cm-id.
in worst case client can crash during shutdown with active mounts.


Ouch, taking a reference on the module preventing it from unloading is
not very well behaved (putting it nicely). That's also breaking the
layering of ULPs <-> core <-> provider scheme.

Why not just cleanup everything upon DEVICE_REMOVAL?



The existing code assumes that ia->ri_id->device cannot change during
the lifetime of a transport. Lifting that assumption is a long chain
of work, and is in plan.

The community decided that preventing the hang right now is more
important than waiting for architectural changes.


Well, if you are putting a bandage here - the code should be documented
with a proper FIXME.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RESEND 0/4] Use max_sge_rd device capability

2015-07-28 Thread Sagi Grimberg

On 7/28/2015 2:09 AM, Steve Wise wrote:

Resending because I forgot to cc linux-rdma :(

Some devices were not setting this capability, so fix those devices,
and svcrdma to use max_sge_rd.  Also remove rdma_cap_read_multi_sge()
since it isn't needed.

These patches were originally part of:

http://www.spinics.net/lists/linux-rdma/msg27436.html

They really aren't part of iSER/iWARP at all, so I've split
them out.

Bruce: This hits svcrdma, but I suggest they get merged via Doug's tree
to avoid any merge problems.

---


Looks good,

Reviewed-by: Sagi Grimberg 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Potential lost receive WCs (was "[PATCH WIP 38/43]")

2015-07-30 Thread Sagi Grimberg



The drivers we have that don't dequeue all the CQEs are doing
something like NAPI polling and have other mechanisms to guarentee
progress. Don't copy something like budget without copying the other
mechanisms :)


OK, that makes total sense. Thanks for clarifying.


IIRC NAPI is soft-IRQ which chuck is trying to avoid.

Chuck, I think I was the one that commented on this. I observed a
situation in iser where the polling loop kept going continuously
without ever leaving the soft-IRQ context (high workload obviously).
In addition to the polling loop hogging the CPU, other CQs with the
same IRQ assignment were starved. So I suggested you should take care
of it in xprtrdma as well.

The correct approach is NAPI. There is an equivalent for storage which
is called blk_iopoll (block/blk-iopool.c) which sort of has nothing
specific to block devices (also soft-IRQ context). I have attempted to
convert iser to use it, but I got some unpredictable latency jitters so
I stopped and didn't get a chance to pick it up ever since.

I still think that draining the CQ without respecting a quota is
wrong, even if driverX has a glitch there.

Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH WIP 28/43] IB/core: Introduce new fast registration API

2015-07-30 Thread Sagi Grimberg



Can you explain what do you mean by "downgrades everything to a 2k
alignment"? If the ULP is responsible for a PAGE_SIZE alignment than
how would this get out of alignment with swiotlb?


swiotlb copies all DMA maps to a shared buffer below 4G so it can be
used with 32 bit devices.

The shared buffer is managed in a way that copies each s/g element to
a continuous 2k aligned subsection of the buffer.



Thanks for the explanation.


Basically, swiotlb realigns everything that passes through it.


So this won't ever happen if the ULP will DMA map the SG and check
for gaps right?

Also, is it interesting to support swiotlb even if we don't have
any devices that require it (and should we expect one to ever exist)?



The DMA API allows this, so ultimately, code has to check the dma
physical address when concerned about alignment.. But we should not
expect this to commonly fail.

So, something like..

   if (!ib_does_sgl_fit_in_mr(mr,sg))
  .. bounce buffer ..


I don't understand the need for this is we do the same thing
if the actual mapping fails...



   if (!ib_map_mr_sg(mr,sg)) // does dma mapping and checks it
  .. bounce buffer ..


Each ULP would want to do something different, iser
will bounce but srp would need to use multiple mrs, nfs will
split the request.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 02/15] IB: Modify ib_create_mr API

2015-07-30 Thread Sagi Grimberg
Use ib_alloc_mr with specific parameters.
Change the existing callers.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/core/verbs.c  | 31 --
 drivers/infiniband/hw/mlx5/main.c|  2 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  5 +++--
 drivers/infiniband/hw/mlx5/mr.c  | 17 ++-
 drivers/infiniband/ulp/iser/iser_verbs.c |  6 ++
 drivers/infiniband/ulp/isert/ib_isert.c  |  6 +-
 include/rdma/ib_verbs.h  | 37 +---
 7 files changed, 58 insertions(+), 46 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 003bb62..2ac599b 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1272,15 +1272,32 @@ int ib_dereg_mr(struct ib_mr *mr)
 }
 EXPORT_SYMBOL(ib_dereg_mr);
 
-struct ib_mr *ib_create_mr(struct ib_pd *pd,
-  struct ib_mr_init_attr *mr_init_attr)
+/**
+ * ib_alloc_mr() - Allocates a memory region
+ * @pd:protection domain associated with the region
+ * @mr_type:   memory region type
+ * @max_num_sg:maximum sg entries available for registration.
+ *
+ * Notes:
+ * Memory registeration page/sg lists must not exceed max_num_sg.
+ * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed
+ * max_num_sg * used_page_size.
+ *
+ */
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg)
 {
struct ib_mr *mr;
 
-   if (!pd->device->create_mr)
-   return ERR_PTR(-ENOSYS);
-
-   mr = pd->device->create_mr(pd, mr_init_attr);
+   if (pd->device->alloc_mr) {
+   mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
+   } else {
+   if (mr_type != IB_MR_TYPE_MEM_REG ||
+   !pd->device->alloc_fast_reg_mr)
+   return ERR_PTR(-ENOSYS);
+   mr = pd->device->alloc_fast_reg_mr(pd, max_num_sg);
+   }
 
if (!IS_ERR(mr)) {
mr->device  = pd->device;
@@ -1292,7 +1309,7 @@ struct ib_mr *ib_create_mr(struct ib_pd *pd,
 
return mr;
 }
-EXPORT_SYMBOL(ib_create_mr);
+EXPORT_SYMBOL(ib_alloc_mr);
 
 struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
 {
diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index 46d1383..2c2a461 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1489,7 +1489,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.attach_mcast= mlx5_ib_mcg_attach;
dev->ib_dev.detach_mcast= mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
-   dev->ib_dev.create_mr   = mlx5_ib_create_mr;
+   dev->ib_dev.alloc_mr= mlx5_ib_alloc_mr;
dev->ib_dev.alloc_fast_reg_mr   = mlx5_ib_alloc_fast_reg_mr;
dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 537f42e..3030abe 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -572,8 +572,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 
start, u64 length,
 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
   int npages, int zap);
 int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
-struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
-   struct ib_mr_init_attr *mr_init_attr);
+struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
+  enum ib_mr_type mr_type,
+  u32 max_num_sg);
 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct 
ib_device *ibdev,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 03cf74e..b0b68bb 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1246,14 +1246,15 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
return 0;
 }
 
-struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
-   struct ib_mr_init_attr *mr_init_attr)
+struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
+  enum ib_mr_type mr_type,
+  u32 max_num_sg)
 {
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_create_mkey_mbox_in *in;
struct mlx5_ib_mr *mr;
int access_mode, err;
-   int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
+   int ndescs = roundup(max_num_

[PATCH for-4.3 00/15] Modify MR allocation API

2015-07-30 Thread Sagi Grimberg
This patch set is detached from my WIP for modifying our
fast registration kernel API. I incorporated some comments
from Jason and Christoph. The current set is a drop-in replacement
of ib_alloc_fast_reg_mr to ib_alloc_mr which receives a memory
region type (whcih can be IB_MR_TYPE_MEM_REG for normal memory
registration, IB_MR_TYPE_SIGNATURE for a data-integrity capable
memory region and future arbitrary SG support capable memory
region).

Sagi Grimberg (15):
  IB/core: Get rid of redundant verb ib_destroy_mr
  IB: Modify ib_create_mr API
  IB/iser: Convert to ib_alloc_mr
  iser-target: Convert to ib_alloc_mr
  IB/srp: Convert to ib_alloc_mr
  xprtrdma, svcrdma: Convert to ib_alloc_mr
  RDS: Convert to ib_alloc_mr
  mlx5: Drop mlx5_ib_alloc_fast_reg_mr
  mlx4: Support ib_alloc_mr verb
  ocrdma: Support ib_alloc_mr verb
  iw_cxgb4: Support ib_alloc_mr verb
  cxgb3: Support ib_alloc_mr verb
  nes: Support ib_alloc_mr verb
  qib: Support ib_alloc_mr verb
  IB/core: Drop ib_alloc_fast_reg_mr

 drivers/infiniband/core/verbs.c |  62 +
 drivers/infiniband/hw/cxgb3/iwch_provider.c |  14 ++--
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h  |   4 +-
 drivers/infiniband/hw/cxgb4/mem.c   |  12 +++-
 drivers/infiniband/hw/cxgb4/provider.c  |   2 +-
 drivers/infiniband/hw/mlx4/main.c   |   2 +-
 drivers/infiniband/hw/mlx4/mlx4_ib.h|   5 +-
 drivers/infiniband/hw/mlx4/mr.c |  11 ++-
 drivers/infiniband/hw/mlx5/main.c   |   4 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h|   8 +--
 drivers/infiniband/hw/mlx5/mr.c | 104 +++-
 drivers/infiniband/hw/nes/nes_verbs.c   |  19 +++--
 drivers/infiniband/hw/ocrdma/ocrdma_main.c  |   2 +-
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c |  11 ++-
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.h |   4 +-
 drivers/infiniband/hw/qib/qib_mr.c  |   9 ++-
 drivers/infiniband/hw/qib/qib_verbs.c   |   2 +-
 drivers/infiniband/hw/qib/qib_verbs.h   |   4 +-
 drivers/infiniband/ulp/iser/iser_verbs.c|  15 ++--
 drivers/infiniband/ulp/isert/ib_isert.c |  14 ++--
 drivers/infiniband/ulp/srp/ib_srp.c |   3 +-
 include/rdma/ib_verbs.h |  58 
 net/rds/iw_rdma.c   |   5 +-
 net/rds/iw_send.c   |   5 +-
 net/sunrpc/xprtrdma/frwr_ops.c  |   6 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c|   2 +-
 26 files changed, 159 insertions(+), 228 deletions(-)

-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 05/15] IB/srp: Convert to ib_alloc_mr

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/srp/ib_srp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c 
b/drivers/infiniband/ulp/srp/ib_srp.c
index a800307..4cc2cd4 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -378,7 +378,8 @@ static struct srp_fr_pool *srp_create_fr_pool(struct 
ib_device *device,
INIT_LIST_HEAD(&pool->free_list);
 
for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
-   mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
+   mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+max_page_list_len);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto destroy_pool;
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 04/15] iser-target: Convert to ib_alloc_mr

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/isert/ib_isert.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c 
b/drivers/infiniband/ulp/isert/ib_isert.c
index 23a793a..bf094cc 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -515,7 +515,8 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
goto err_pi_ctx;
}
 
-   pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
+   pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+ ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(pi_ctx->prot_mr)) {
isert_err("Failed to allocate prot frmr err=%ld\n",
  PTR_ERR(pi_ctx->prot_mr));
@@ -562,7 +563,8 @@ isert_create_fr_desc(struct ib_device *ib_device, struct 
ib_pd *pd,
return PTR_ERR(fr_desc->data_frpl);
}
 
-   fr_desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
+   fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+  ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(fr_desc->data_mr)) {
isert_err("Failed to allocate data frmr err=%ld\n",
  PTR_ERR(fr_desc->data_mr));
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 03/15] IB/iser: Convert to ib_alloc_mr

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/iser/iser_verbs.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c 
b/drivers/infiniband/ulp/iser/iser_verbs.c
index 9bef5a7..0a7ceb9 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -299,8 +299,8 @@ iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd 
*pd,
goto prot_frpl_failure;
}
 
-   pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
-   ISCSI_ISER_SG_TABLESIZE + 1);
+   pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+ ISCSI_ISER_SG_TABLESIZE + 1);
if (IS_ERR(pi_ctx->prot_mr)) {
ret = PTR_ERR(pi_ctx->prot_mr);
goto prot_mr_failure;
@@ -351,7 +351,8 @@ iser_create_fastreg_desc(struct ib_device *ib_device, 
struct ib_pd *pd,
return PTR_ERR(desc->data_frpl);
}
 
-   desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);
+   desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+   ISCSI_ISER_SG_TABLESIZE + 1);
if (IS_ERR(desc->data_mr)) {
ret = PTR_ERR(desc->data_mr);
iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 01/15] IB/core: Get rid of redundant verb ib_destroy_mr

2015-07-30 Thread Sagi Grimberg
This was added in a thought of uniting all mr allocation
and deallocation routines but the fact is we have a single
deallocation routine already, ib_dereg_mr.

And, move mlx5_ib_destroy_mr specific logic into mlx5_ib_dereg_mr
(includes only signature stuff for now).

And, fixup the only callers (iser/isert) accordingly.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/core/verbs.c  | 17 -
 drivers/infiniband/hw/mlx5/main.c|  1 -
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  1 -
 drivers/infiniband/hw/mlx5/mr.c  | 43 ++--
 drivers/infiniband/ulp/iser/iser_verbs.c |  2 +-
 drivers/infiniband/ulp/isert/ib_isert.c  |  2 +-
 include/rdma/ib_verbs.h  | 10 
 7 files changed, 15 insertions(+), 61 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index bb561c8..003bb62 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1294,23 +1294,6 @@ struct ib_mr *ib_create_mr(struct ib_pd *pd,
 }
 EXPORT_SYMBOL(ib_create_mr);
 
-int ib_destroy_mr(struct ib_mr *mr)
-{
-   struct ib_pd *pd;
-   int ret;
-
-   if (atomic_read(&mr->usecnt))
-   return -EBUSY;
-
-   pd = mr->pd;
-   ret = mr->device->destroy_mr(mr);
-   if (!ret)
-   atomic_dec(&pd->usecnt);
-
-   return ret;
-}
-EXPORT_SYMBOL(ib_destroy_mr);
-
 struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
 {
struct ib_mr *mr;
diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index 5cfb0b1..46d1383 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1486,7 +1486,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.get_dma_mr  = mlx5_ib_get_dma_mr;
dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
dev->ib_dev.dereg_mr= mlx5_ib_dereg_mr;
-   dev->ib_dev.destroy_mr  = mlx5_ib_destroy_mr;
dev->ib_dev.attach_mcast= mlx5_ib_mcg_attach;
dev->ib_dev.detach_mcast= mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 446d804..537f42e 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -572,7 +572,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 
start, u64 length,
 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
   int npages, int zap);
 int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
-int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
 struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr);
 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 4c92ca8..03cf74e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1172,6 +1172,19 @@ static int clean_mr(struct mlx5_ib_mr *mr)
int umred = mr->umred;
int err;
 
+   if (mr->sig) {
+   if (mlx5_core_destroy_psv(dev->mdev,
+ mr->sig->psv_memory.psv_idx))
+   mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+mr->sig->psv_memory.psv_idx);
+   if (mlx5_core_destroy_psv(dev->mdev,
+ mr->sig->psv_wire.psv_idx))
+   mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+mr->sig->psv_wire.psv_idx);
+   kfree(mr->sig);
+   mr->sig = NULL;
+   }
+
if (!umred) {
err = destroy_mkey(dev, mr);
if (err) {
@@ -1319,36 +1332,6 @@ err_free:
return ERR_PTR(err);
 }
 
-int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
-{
-   struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
-   struct mlx5_ib_mr *mr = to_mmr(ibmr);
-   int err;
-
-   if (mr->sig) {
-   if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_memory.psv_idx))
-   mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
-mr->sig->psv_memory.psv_idx);
-   if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_wire.psv_idx))
-   mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
-mr->sig->psv_wire.psv_idx);
-   kfree(mr->sig);
-   }
-
-   err = destroy_mkey(dev, mr);

[PATCH for-4.3 06/15] xprtrdma, svcrdma: Convert to ib_alloc_mr

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 net/sunrpc/xprtrdma/frwr_ops.c   | 6 +++---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 63f282e..d6653f5 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -117,7 +117,7 @@ __frwr_recovery_worker(struct work_struct *work)
if (ib_dereg_mr(r->r.frmr.fr_mr))
goto out_fail;
 
-   r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth);
+   r->r.frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
if (IS_ERR(r->r.frmr.fr_mr))
goto out_fail;
 
@@ -148,7 +148,7 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct 
ib_device *device,
struct rpcrdma_frmr *f = &r->r.frmr;
int rc;
 
-   f->fr_mr = ib_alloc_fast_reg_mr(pd, depth);
+   f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
if (IS_ERR(f->fr_mr))
goto out_mr_err;
f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
@@ -158,7 +158,7 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct 
ib_device *device,
 
 out_mr_err:
rc = PTR_ERR(f->fr_mr);
-   dprintk("RPC:   %s: ib_alloc_fast_reg_mr status %i\n",
+   dprintk("RPC:   %s: ib_alloc_mr status %i\n",
__func__, rc);
return rc;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c 
b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 6b36279..adcf904 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -738,7 +738,7 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct 
svcxprt_rdma *xprt)
if (!frmr)
goto err;
 
-   mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
+   mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, RPCSVC_MAXPAGES);
if (IS_ERR(mr))
goto err_free_frmr;
 
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 13/15] nes: Support ib_alloc_mr verb

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/nes/nes_verbs.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/nes/nes_verbs.c 
b/drivers/infiniband/hw/nes/nes_verbs.c
index fbc43e5..44cb513 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -375,9 +375,11 @@ static int alloc_fast_reg_mr(struct nes_device *nesdev, 
struct nes_pd *nespd,
 }
 
 /*
- * nes_alloc_fast_reg_mr
+ * nes_alloc_mr
  */
-static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int 
max_page_list_len)
+static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg)
 {
struct nes_pd *nespd = to_nespd(ibpd);
struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
@@ -393,11 +395,18 @@ static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd 
*ibpd, int max_page_list
u32 stag;
int ret;
struct ib_mr *ibmr;
+
+   if (mr_type != IB_MR_TYPE_MEM_REG)
+   return ERR_PTR(-EINVAL);
+
+   if (max_num_sg > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
+   return ERR_PTR(-E2BIG);
+
 /*
  * Note:  Set to always use a fixed length single page entry PBL.  This is to 
allow
  *  for the fast_reg_mr operation to always know the size of the PBL.
  */
-   if (max_page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
+   if (max_num_sg > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
return ERR_PTR(-E2BIG);
 
get_random_bytes(&next_stag_index, sizeof(next_stag_index));
@@ -424,7 +433,7 @@ static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd 
*ibpd, int max_page_list
nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n",
  stag, stag_index);
 
-   ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_page_list_len);
+   ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_num_sg);
 
if (ret == 0) {
nesmr->ibmr.rkey = stag;
@@ -3929,7 +3938,7 @@ struct nes_ib_device *nes_init_ofa_device(struct 
net_device *netdev)
nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
nesibdev->ibdev.bind_mw = nes_bind_mw;
 
-   nesibdev->ibdev.alloc_fast_reg_mr = nes_alloc_fast_reg_mr;
+   nesibdev->ibdev.alloc_mr = nes_alloc_mr;
nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list;
nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list;
 
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 08/15] mlx5: Drop mlx5_ib_alloc_fast_reg_mr

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/mlx5/main.c|  1 -
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  2 --
 drivers/infiniband/hw/mlx5/mr.c  | 44 
 3 files changed, 47 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index 2c2a461..41d6911 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1490,7 +1490,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.detach_mcast= mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
dev->ib_dev.alloc_mr= mlx5_ib_alloc_mr;
-   dev->ib_dev.alloc_fast_reg_mr   = mlx5_ib_alloc_fast_reg_mr;
dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 3030abe..a5fa0b9 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -575,8 +575,6 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
   enum ib_mr_type mr_type,
   u32 max_num_sg);
-struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
-   int max_page_list_len);
 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct 
ib_device *ibdev,
   int 
page_list_len);
 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index b0b68bb..09e6ae6 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1339,50 +1339,6 @@ err_free:
return ERR_PTR(err);
 }
 
-struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
-   int max_page_list_len)
-{
-   struct mlx5_ib_dev *dev = to_mdev(pd->device);
-   struct mlx5_create_mkey_mbox_in *in;
-   struct mlx5_ib_mr *mr;
-   int err;
-
-   mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-   if (!mr)
-   return ERR_PTR(-ENOMEM);
-
-   in = kzalloc(sizeof(*in), GFP_KERNEL);
-   if (!in) {
-   err = -ENOMEM;
-   goto err_free;
-   }
-
-   in->seg.status = MLX5_MKEY_STATUS_FREE;
-   in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
-   in->seg.qpn_mkey7_0 = cpu_to_be32(0xff << 8);
-   in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
-   in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
-   /*
-* TBD not needed - issue 197292 */
-   in->seg.log2_page_size = PAGE_SHIFT;
-
-   err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
-   NULL, NULL);
-   kfree(in);
-   if (err)
-   goto err_free;
-
-   mr->ibmr.lkey = mr->mmr.key;
-   mr->ibmr.rkey = mr->mmr.key;
-   mr->umem = NULL;
-
-   return &mr->ibmr;
-
-err_free:
-   kfree(mr);
-   return ERR_PTR(err);
-}
-
 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct 
ib_device *ibdev,
   int 
page_list_len)
 {
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 09/15] mlx4: Support ib_alloc_mr verb

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/mlx4/main.c|  2 +-
 drivers/infiniband/hw/mlx4/mlx4_ib.h |  5 +++--
 drivers/infiniband/hw/mlx4/mr.c  | 11 ---
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c 
b/drivers/infiniband/hw/mlx4/main.c
index 05166b7..9ab73a4 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2294,7 +2294,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.reg_user_mr   = mlx4_ib_reg_user_mr;
ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
ibdev->ib_dev.dereg_mr  = mlx4_ib_dereg_mr;
-   ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
+   ibdev->ib_dev.alloc_mr  = mlx4_ib_alloc_mr;
ibdev->ib_dev.alloc_fast_reg_page_list = 
mlx4_ib_alloc_fast_reg_page_list;
ibdev->ib_dev.free_fast_reg_page_list  = 
mlx4_ib_free_fast_reg_page_list;
ibdev->ib_dev.attach_mcast  = mlx4_ib_mcg_attach;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h 
b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 9066fc2..80228b2 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -679,8 +679,9 @@ struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum 
ib_mw_type type);
 int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
 int mlx4_ib_dealloc_mw(struct ib_mw *mw);
-struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
-   int max_page_list_len);
+struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
+  enum ib_mr_type mr_type,
+  u32 max_num_sg);
 struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct 
ib_device *ibdev,
   int 
page_list_len);
 void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index e0d2717..2542fd3 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -350,19 +350,24 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
return 0;
 }
 
-struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
-   int max_page_list_len)
+struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
+  enum ib_mr_type mr_type,
+  u32 max_num_sg)
 {
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
int err;
 
+   if (mr_type != IB_MR_TYPE_MEM_REG ||
+   max_num_sg > MLX4_MAX_FAST_REG_PAGES)
+   return ERR_PTR(-EINVAL);
+
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
 
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
-   max_page_list_len, 0, &mr->mmr);
+   max_num_sg, 0, &mr->mmr);
if (err)
goto err_free;
 
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 11/15] iw_cxgb4: Support ib_alloc_mr verb

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |  4 +++-
 drivers/infiniband/hw/cxgb4/mem.c  | 12 +---
 drivers/infiniband/hw/cxgb4/provider.c |  2 +-
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h 
b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index cc77844..c7bb38c 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -970,7 +970,9 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list 
*page_list);
 struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
struct ib_device *device,
int page_list_len);
-struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth);
+struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
+   enum ib_mr_type mr_type,
+   u32 max_num_sg);
 int c4iw_dealloc_mw(struct ib_mw *mw);
 struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
 struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c 
b/drivers/infiniband/hw/cxgb4/mem.c
index cff815b..026b91e 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -853,7 +853,9 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
return 0;
 }
 
-struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
+struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
+   enum ib_mr_type mr_type,
+   u32 max_num_sg)
 {
struct c4iw_dev *rhp;
struct c4iw_pd *php;
@@ -862,6 +864,10 @@ struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int 
pbl_depth)
u32 stag = 0;
int ret = 0;
 
+   if (mr_type != IB_MR_TYPE_MEM_REG ||
+   max_num_sg > t4_max_fr_depth(use_dsgl))
+   return ERR_PTR(-EINVAL);
+
php = to_c4iw_pd(pd);
rhp = php->rhp;
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
@@ -871,10 +877,10 @@ struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, 
int pbl_depth)
}
 
mhp->rhp = rhp;
-   ret = alloc_pbl(mhp, pbl_depth);
+   ret = alloc_pbl(mhp, max_num_sg);
if (ret)
goto err1;
-   mhp->attr.pbl_size = pbl_depth;
+   mhp->attr.pbl_size = max_num_sg;
ret = allocate_stag(&rhp->rdev, &stag, php->pdid,
 mhp->attr.pbl_size, mhp->attr.pbl_addr);
if (ret)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c 
b/drivers/infiniband/hw/cxgb4/provider.c
index 6eee3d3..7746113 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -556,7 +556,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.alloc_mw = c4iw_alloc_mw;
dev->ibdev.bind_mw = c4iw_bind_mw;
dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
-   dev->ibdev.alloc_fast_reg_mr = c4iw_alloc_fast_reg_mr;
+   dev->ibdev.alloc_mr = c4iw_alloc_mr;
dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl;
dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl;
dev->ibdev.attach_mcast = c4iw_multicast_attach;
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 15/15] IB/core: Drop ib_alloc_fast_reg_mr

2015-07-30 Thread Sagi Grimberg
Fully replaced by a more generic and suitable
ib_alloc_mr.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/core/verbs.c | 32 +++-
 include/rdma/ib_verbs.h | 11 ---
 2 files changed, 3 insertions(+), 40 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 2ac599b..2e5fd89 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1290,36 +1290,10 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
 {
struct ib_mr *mr;
 
-   if (pd->device->alloc_mr) {
-   mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
-   } else {
-   if (mr_type != IB_MR_TYPE_MEM_REG ||
-   !pd->device->alloc_fast_reg_mr)
-   return ERR_PTR(-ENOSYS);
-   mr = pd->device->alloc_fast_reg_mr(pd, max_num_sg);
-   }
-
-   if (!IS_ERR(mr)) {
-   mr->device  = pd->device;
-   mr->pd  = pd;
-   mr->uobject = NULL;
-   atomic_inc(&pd->usecnt);
-   atomic_set(&mr->usecnt, 0);
-   }
-
-   return mr;
-}
-EXPORT_SYMBOL(ib_alloc_mr);
-
-struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
-{
-   struct ib_mr *mr;
-
-   if (!pd->device->alloc_fast_reg_mr)
+   if (!pd->device->alloc_mr)
return ERR_PTR(-ENOSYS);
 
-   mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len);
-
+   mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
if (!IS_ERR(mr)) {
mr->device  = pd->device;
mr->pd  = pd;
@@ -1330,7 +1304,7 @@ struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int 
max_page_list_len)
 
return mr;
 }
-EXPORT_SYMBOL(ib_alloc_fast_reg_mr);
+EXPORT_SYMBOL(ib_alloc_mr);
 
 struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device 
*device,
  int max_page_list_len)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 110044d..a2c328b 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1671,8 +1671,6 @@ struct ib_device {
struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
   enum ib_mr_type mr_type,
   u32 max_num_sg);
-   struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
-  int max_page_list_len);
struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct 
ib_device *device,
   int 
page_list_len);
void   (*free_fast_reg_page_list)(struct 
ib_fast_reg_page_list *page_list);
@@ -2819,15 +2817,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
  u32 max_num_sg);
 
 /**
- * ib_alloc_fast_reg_mr - Allocates memory region usable with the
- *   IB_WR_FAST_REG_MR send work request.
- * @pd: The protection domain associated with the region.
- * @max_page_list_len: requested max physical buffer list length to be
- *   used with fast register work requests for this MR.
- */
-struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
-
-/**
  * ib_alloc_fast_reg_page_list - Allocates a page list array
  * @device - ib device pointer.
  * @page_list_len - size of the page list array to be allocated.
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 10/15] ocrdma: Support ib_alloc_mr verb

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/ocrdma/ocrdma_main.c  |  2 +-
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 11 ---
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.h |  4 +++-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index d98a707..62086ba 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -294,7 +294,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.dereg_mr = ocrdma_dereg_mr;
dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
 
-   dev->ibdev.alloc_fast_reg_mr = ocrdma_alloc_frmr;
+   dev->ibdev.alloc_mr = ocrdma_alloc_mr;
dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list;
dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list;
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 5bb61eb..7a5260f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -2983,21 +2983,26 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum 
ib_cq_notify_flags cq_flags)
return 0;
 }
 
-struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
+struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg)
 {
int status;
struct ocrdma_mr *mr;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
 
-   if (max_page_list_len > dev->attr.max_pages_per_frmr)
+   if (mr_type != IB_MR_TYPE_MEM_REG)
+   return ERR_PTR(-EINVAL);
+
+   if (max_num_sg > dev->attr.max_pages_per_frmr)
return ERR_PTR(-EINVAL);
 
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
 
-   status = ocrdma_get_pbl_info(dev, mr, max_page_list_len);
+   status = ocrdma_get_pbl_info(dev, mr, max_num_sg);
if (status)
goto pbl_err;
mr->hwmr.fr_mr = 1;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h 
b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index b15c608..50d8d47 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -96,7 +96,9 @@ struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
   int num_phys_buf, int acc, u64 *iova_start);
 struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
 u64 virt, int acc, struct ib_udata *);
-struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *pd, int max_page_list_len);
+struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg);
 struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
*ibdev,
int page_list_len);
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 14/15] qib: Support ib_alloc_mr verb

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/qib/qib_mr.c| 9 +++--
 drivers/infiniband/hw/qib/qib_verbs.c | 2 +-
 drivers/infiniband/hw/qib/qib_verbs.h | 4 +++-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib_mr.c 
b/drivers/infiniband/hw/qib/qib_mr.c
index c4473db..19220dc 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -327,11 +327,16 @@ out:
  *
  * Return the memory region on success, otherwise return an errno.
  */
-struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
+struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
+  enum ib_mr_type mr_type,
+  u32 max_num_sg)
 {
struct qib_mr *mr;
 
-   mr = alloc_mr(max_page_list_len, pd);
+   if (mr_type != IB_MR_TYPE_MEM_REG)
+   return ERR_PTR(-EINVAL);
+
+   mr = alloc_mr(max_num_sg, pd);
if (IS_ERR(mr))
return (struct ib_mr *)mr;
 
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c 
b/drivers/infiniband/hw/qib/qib_verbs.c
index a05d1a3..ef022a1 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -2235,7 +2235,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->reg_phys_mr = qib_reg_phys_mr;
ibdev->reg_user_mr = qib_reg_user_mr;
ibdev->dereg_mr = qib_dereg_mr;
-   ibdev->alloc_fast_reg_mr = qib_alloc_fast_reg_mr;
+   ibdev->alloc_mr = qib_alloc_mr;
ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list;
ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list;
ibdev->alloc_fmr = qib_alloc_fmr;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h 
b/drivers/infiniband/hw/qib/qib_verbs.h
index 1635572..3bc0caf 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1032,7 +1032,9 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 
start, u64 length,
 
 int qib_dereg_mr(struct ib_mr *ibmr);
 
-struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
+struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
+  enum ib_mr_type mr_type,
+  u32 max_entries);
 
 struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list(
struct ib_device *ibdev, int page_list_len);
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 12/15] cxgb3: Support ib_alloc_mr verb

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/hw/cxgb3/iwch_provider.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c 
b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index b1b7323..7e004c7 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -796,7 +796,9 @@ static int iwch_dealloc_mw(struct ib_mw *mw)
return 0;
 }
 
-static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
+static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
+  enum ib_mr_type mr_type,
+  u32 max_num_sg)
 {
struct iwch_dev *rhp;
struct iwch_pd *php;
@@ -805,6 +807,10 @@ static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd 
*pd, int pbl_depth)
u32 stag = 0;
int ret = 0;
 
+   if (mr_type != IB_MR_TYPE_MEM_REG ||
+   max_num_sg > T3_MAX_FASTREG_DEPTH)
+   return ERR_PTR(-EINVAL);
+
php = to_iwch_pd(pd);
rhp = php->rhp;
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
@@ -812,10 +818,10 @@ static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd 
*pd, int pbl_depth)
goto err;
 
mhp->rhp = rhp;
-   ret = iwch_alloc_pbl(mhp, pbl_depth);
+   ret = iwch_alloc_pbl(mhp, max_num_sg);
if (ret)
goto err1;
-   mhp->attr.pbl_size = pbl_depth;
+   mhp->attr.pbl_size = max_num_sg;
ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
 mhp->attr.pbl_size, mhp->attr.pbl_addr);
if (ret)
@@ -1439,7 +1445,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.alloc_mw = iwch_alloc_mw;
dev->ibdev.bind_mw = iwch_bind_mw;
dev->ibdev.dealloc_mw = iwch_dealloc_mw;
-   dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr;
+   dev->ibdev.alloc_mr = iwch_alloc_mr;
dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl;
dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
dev->ibdev.attach_mcast = iwch_multicast_attach;
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-4.3 07/15] RDS: Convert to ib_alloc_mr

2015-07-30 Thread Sagi Grimberg
Signed-off-by: Sagi Grimberg 
---
 net/rds/iw_rdma.c | 5 +++--
 net/rds/iw_send.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index dba8d08..6a8fbd6 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -667,11 +667,12 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool 
*pool,
struct ib_mr *mr;
int err;
 
-   mr = ib_alloc_fast_reg_mr(rds_iwdev->pd, pool->max_message_size);
+   mr = ib_alloc_mr(rds_iwdev->pd, IB_MR_TYPE_MEM_REG,
+pool->max_message_size);
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
 
-   printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed 
(err=%d)\n", err);
+   printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed (err=%d)\n", 
err);
return err;
}
 
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 334fe98..86152ec 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -153,9 +153,10 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
sge->length = sizeof(struct rds_header);
sge->lkey = 0;
 
-   send->s_mr = ib_alloc_fast_reg_mr(ic->i_pd, 
fastreg_message_size);
+   send->s_mr = ib_alloc_mr(ic->i_pd, IB_MR_TYPE_MEM_REG,
+fastreg_message_size);
if (IS_ERR(send->s_mr)) {
-   printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr 
failed\n");
+   printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
break;
}
 
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/22] IB/iser: Fix possible bogus DMA unmapping

2015-07-30 Thread Sagi Grimberg
If iser_initialize_task_headers() routine failed before
dma mapping, we should not attempt to unmap in cleanup_task().

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/iser/iscsi_iser.c | 12 
 drivers/infiniband/ulp/iser/iscsi_iser.h |  2 ++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c 
b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 92b1020..e3cea61 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -200,6 +200,7 @@ iser_initialize_task_headers(struct iscsi_task *task,
goto out;
}
 
+   tx_desc->mapped = true;
tx_desc->dma_addr = dma_addr;
tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
@@ -359,16 +360,19 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
 static void iscsi_iser_cleanup_task(struct iscsi_task *task)
 {
struct iscsi_iser_task *iser_task = task->dd_data;
-   struct iser_tx_desc*tx_desc   = &iser_task->desc;
-   struct iser_conn   *iser_conn = task->conn->dd_data;
+   struct iser_tx_desc *tx_desc = &iser_task->desc;
+   struct iser_conn *iser_conn = task->conn->dd_data;
struct iser_device *device = iser_conn->ib_conn.device;
 
/* DEVICE_REMOVAL event might have already released the device */
if (!device)
return;
 
-   ib_dma_unmap_single(device->ib_device,
-   tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
+   if (likely(tx_desc->mapped)) {
+   ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
+   ISER_HEADERS_LEN, DMA_TO_DEVICE);
+   tx_desc->mapped = false;
+   }
 
/* mgmt tasks do not need special cleanup */
if (!task->sc)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h 
b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 262ba1f..d2b6caf 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -270,6 +270,7 @@ enum iser_desc_type {
  * sg[1] optionally points to either of immediate data
  * unsolicited data-out or control
  * @num_sge:   number sges used on this TX task
+ * @mapped:Is the task header mapped
  */
 struct iser_tx_desc {
struct iser_hdr  iser_header;
@@ -278,6 +279,7 @@ struct iser_tx_desc {
u64  dma_addr;
struct ib_sgetx_sg[2];
int  num_sge;
+   bool mapped;
 };
 
 #define ISER_RX_PAD_SIZE   (256 - (ISER_RX_PAYLOAD_SIZE + \
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/22] IB/iser: Fix missing return status check in iser_send_data_out

2015-07-30 Thread Sagi Grimberg
iser_initialize_task_headers() might fail.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/iser/iser_initiator.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c 
b/drivers/infiniband/ulp/iser/iser_initiator.c
index 2d02f04..174799e 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -454,7 +454,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
unsigned long buf_offset;
unsigned long data_seg_len;
uint32_t itt;
-   int err = 0;
+   int err;
struct ib_sge *tx_dsg;
 
itt = (__force uint32_t)hdr->itt;
@@ -475,7 +475,9 @@ int iser_send_data_out(struct iscsi_conn *conn,
memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
 
/* build the tx desc */
-   iser_initialize_task_headers(task, tx_desc);
+   err = iser_initialize_task_headers(task, tx_desc);
+   if (err)
+   goto send_data_out_error;
 
mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
tx_dsg = &tx_desc->tx_sg[1];
@@ -502,7 +504,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
 
 send_data_out_error:
kmem_cache_free(ig.desc_cache, tx_desc);
-   iser_err("conn %p failed err %d\n",conn, err);
+   iser_err("conn %p failed err %d\n", conn, err);
return err;
 }
 
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/22] IB/iser: Introduce struct iser_reg_resources

2015-07-30 Thread Sagi Grimberg
Have fast_reg_descriptor hold struct iser_reg_resources
(mr, frpl, valid flag). This will be useful when the
actual buffer registration routines will be passed with
the needed registration resources (i.e. iser_reg_resources)
without being aware of their nature (i.e. data or protection).

In order to achieve this, we remove reg_indicators flags container
and place specific flags (mr_valid) within iser_reg_resources struct.
We also place the sig_mr_valid and sig_protcted flags in iser_pi_context.

This patch also modifies iser_fast_reg_mr to receive the
reg_resources instead of the fast_reg_descriptor and a data/protection
indicator.

Signed-off-by: Sagi Grimberg 
Signed-off-by: Adir Lev 
---
 drivers/infiniband/ulp/iser/iscsi_iser.h  |  36 ++-
 drivers/infiniband/ulp/iser/iser_memory.c |  35 +--
 drivers/infiniband/ulp/iser/iser_verbs.c  | 101 +-
 3 files changed, 91 insertions(+), 81 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h 
b/drivers/infiniband/ulp/iser/iscsi_iser.h
index d2b6caf..9cdfdbd 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -367,41 +367,45 @@ struct iser_device {
 #define ISER_CHECK_REFTAG  0x0f
 #define ISER_CHECK_APPTAG  0x30
 
-enum iser_reg_indicator {
-   ISER_DATA_KEY_VALID = 1 << 0,
-   ISER_PROT_KEY_VALID = 1 << 1,
-   ISER_SIG_KEY_VALID  = 1 << 2,
-   ISER_FASTREG_PROTECTED  = 1 << 3,
+/**
+ * struct iser_reg_resources - Fast registration recources
+ *
+ * @mr: memory region
+ * @frpl:   fast reg page list
+ * @mr_valid:   is mr valid indicator
+ */
+struct iser_reg_resources {
+   struct ib_mr *mr;
+   struct ib_fast_reg_page_list *frpl;
+   u8mr_valid:1;
 };
 
 /**
  * struct iser_pi_context - Protection information context
  *
- * @prot_mr:protection memory region
- * @prot_frpl:  protection fastreg page list
- * @sig_mr: signature feature enabled memory region
+ * @rsc: protection buffer registration resources
+ * @sig_mr:  signature enable memory region
+ * @sig_mr_valid:is sig_mr valid indicator
+ * @sig_protected:   is region protected indicator
  */
 struct iser_pi_context {
-   struct ib_mr   *prot_mr;
-   struct ib_fast_reg_page_list   *prot_frpl;
+   struct iser_reg_resources   rsc;
struct ib_mr   *sig_mr;
+   u8  sig_mr_valid:1;
+   u8  sig_protected:1;
 };
 
 /**
  * struct fast_reg_descriptor - Fast registration descriptor
  *
  * @list:   entry in connection fastreg pool
- * @data_mr:data memory region
- * @data_frpl:  data fastreg page list
+ * @rsc:data buffer registration resources
  * @pi_ctx: protection information context
- * @reg_indicators: fast registration indicators
  */
 struct fast_reg_descriptor {
struct list_head  list;
-   struct ib_mr *data_mr;
-   struct ib_fast_reg_page_list *data_frpl;
+   struct iser_reg_resources rsc;
struct iser_pi_context   *pi_ctx;
-   u8reg_indicators;
 };
 
 /**
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c 
b/drivers/infiniband/ulp/iser/iser_memory.c
index 56dd53b..e6516bc 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -647,13 +647,12 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
 
 static int
 iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
-   struct fast_reg_descriptor *desc,
+   struct iser_pi_context *pi_ctx,
struct iser_mem_reg *data_reg,
struct iser_mem_reg *prot_reg,
struct iser_mem_reg *sig_reg)
 {
struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
-   struct iser_pi_context *pi_ctx = desc->pi_ctx;
struct ib_send_wr sig_wr, inv_wr;
struct ib_send_wr *bad_wr, *wr = NULL;
struct ib_sig_attrs sig_attrs;
@@ -666,7 +665,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 
iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
 
-   if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
+   if (!pi_ctx->sig_mr_valid) {
iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
wr = &inv_wr;
}
@@ -694,7 +693,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
iser_err("reg_sig_mr failed, ret:%d\n", ret);
goto err;
}
-   desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
+   pi_ctx->sig_mr_valid = 0;
 
sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
sig_reg->rkey = pi_

[PATCH 12/22] IB/iser: Introduce iser_reg_ops

2015-07-30 Thread Sagi Grimberg
Move all the per-device function pointers to an easy
extensible iser_reg_ops structure that contains all
the iser registration operations.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/iser/iscsi_iser.h | 39 ++--
 drivers/infiniband/ulp/iser/iser_initiator.c | 16 ++--
 drivers/infiniband/ulp/iser/iser_memory.c| 35 +
 drivers/infiniband/ulp/iser/iser_verbs.c | 30 +
 4 files changed, 75 insertions(+), 45 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h 
b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 70bf6e7..9ce090c 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -326,6 +326,25 @@ struct iser_comp {
 };
 
 /**
+ * struct iser_device - Memory registration operations
+ * per-device registration schemes
+ *
+ * @alloc_reg_res: Allocate registration resources
+ * @free_reg_res:  Free registration resources
+ * @reg_rdma_mem:  Register memory buffers
+ * @unreg_rdma_mem:Un-register memory buffers
+ */
+struct iser_reg_ops {
+   int(*alloc_reg_res)(struct ib_conn *ib_conn,
+   unsigned cmds_max);
+   void   (*free_reg_res)(struct ib_conn *ib_conn);
+   int(*reg_rdma_mem)(struct iscsi_iser_task *iser_task,
+  enum iser_data_dir cmd_dir);
+   void   (*unreg_rdma_mem)(struct iscsi_iser_task *iser_task,
+enum iser_data_dir cmd_dir);
+};
+
+/**
  * struct iser_device - iSER device handle
  *
  * @ib_device: RDMA device
@@ -338,11 +357,7 @@ struct iser_comp {
  * @comps_used:Number of completion contexts used, Min between online
  * cpus and device max completion vectors
  * @comps: Dinamically allocated array of completion handlers
- * Memory registration pool Function pointers (FMR or Fastreg):
- * @iser_alloc_rdma_reg_res: Allocation of memory regions pool
- * @iser_free_rdma_reg_res:  Free of memory regions pool
- * @iser_reg_rdma_mem:   Memory registration routine
- * @iser_unreg_rdma_mem: Memory deregistration routine
+ * @reg_ops:   Registration ops
  */
 struct iser_device {
struct ib_device *ib_device;
@@ -354,13 +369,7 @@ struct iser_device {
int  refcount;
int  comps_used;
struct iser_comp *comps;
-   int  (*iser_alloc_rdma_reg_res)(struct ib_conn 
*ib_conn,
-   unsigned 
cmds_max);
-   void (*iser_free_rdma_reg_res)(struct ib_conn 
*ib_conn);
-   int  (*iser_reg_rdma_mem)(struct 
iscsi_iser_task *iser_task,
- enum iser_data_dir 
cmd_dir);
-   void (*iser_unreg_rdma_mem)(struct 
iscsi_iser_task *iser_task,
-   enum iser_data_dir 
cmd_dir);
+   struct iser_reg_ops  *reg_ops;
 };
 
 #define ISER_CHECK_GUARD   0xc0
@@ -563,6 +572,8 @@ extern int iser_debug_level;
 extern bool iser_pi_enable;
 extern int iser_pi_guard;
 
+int iser_assign_reg_ops(struct iser_device *device);
+
 int iser_send_control(struct iscsi_conn *conn,
  struct iscsi_task *task);
 
@@ -636,9 +647,9 @@ int  iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc);
 int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
  struct iscsi_session *session);
-int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max);
+int iser_alloc_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max);
 void iser_free_fmr_pool(struct ib_conn *ib_conn);
-int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
+int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
 void iser_free_fastreg_pool(struct ib_conn *ib_conn);
 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
 enum iser_data_dir cmd_dir, sector_t *sector);
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c 
b/drivers/infiniband/ulp/iser/iser_initiator.c
index 42d6f42..88d8a89 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -73,7 +73,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
return err;
}
 
-   err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
+   err = device->reg_ops->reg_rdma_mem(iser_task, ISER_DIR_IN);
if (err) {
iser_err("Failed to set up Data-IN RDMA\n");
return err;
@@ -128,7 +128,7 @@ iser_prepa

[PATCH 11/22] IB/iser: Remove dead code in fmr_pool alloc/free

2015-07-30 Thread Sagi Grimberg
In the past the we always tried to allocate an fmr_pool
and if it failed on ENOSYS (not supported) then we continued
with dma mr. This is not the case anymore and if we tried to
allocate an fmr_pool then it is supported and we expect to succeed.

Also, the check if fmr_pool is allocated when free is called is
redundant as well as we are guaranteed it exists.

Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/iser/iser_verbs.c | 26 ++
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c 
b/drivers/infiniband/ulp/iser/iser_verbs.c
index f7828e3..2a0cb42 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -244,22 +244,18 @@ int iser_create_fmr_pool(struct ib_conn *ib_conn, 
unsigned cmds_max)
IB_ACCESS_REMOTE_READ);
 
ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms);
-   if (!IS_ERR(ib_conn->fmr.pool))
-   return 0;
+   if (IS_ERR(ib_conn->fmr.pool)) {
+   ret = PTR_ERR(ib_conn->fmr.pool);
+   iser_err("FMR allocation failed, err %d\n", ret);
+   goto err;
+   }
+
+   return 0;
 
-   /* no FMR => no need for page_vec */
+err:
kfree(ib_conn->fmr.page_vec);
ib_conn->fmr.page_vec = NULL;
-
-   ret = PTR_ERR(ib_conn->fmr.pool);
-   ib_conn->fmr.pool = NULL;
-   if (ret != -ENOSYS) {
-   iser_err("FMR allocation failed, err %d\n", ret);
-   return ret;
-   } else {
-   iser_warn("FMRs are not supported, using unaligned mode\n");
-   return 0;
-   }
+   return ret;
 }
 
 /**
@@ -270,9 +266,7 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
iser_info("freeing conn %p fmr pool %p\n",
  ib_conn, ib_conn->fmr.pool);
 
-   if (ib_conn->fmr.pool != NULL)
-   ib_destroy_fmr_pool(ib_conn->fmr.pool);
-
+   ib_destroy_fmr_pool(ib_conn->fmr.pool);
ib_conn->fmr.pool = NULL;
 
kfree(ib_conn->fmr.page_vec);
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/22] IB/iser: Change some module parameters to be RO

2015-07-30 Thread Sagi Grimberg
From: Jenny Falkovich 

While we're at it, use permission defines instead
of octal values and rearrange a little bit.

Signed-off-by: Jenny Derzhavetz 
Signed-off-by: Sagi Grimberg 
---
 drivers/infiniband/ulp/iser/iscsi_iser.c | 31 +++
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c 
b/drivers/infiniband/ulp/iser/iscsi_iser.c
index c969fc1..c7cea25 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -74,34 +74,33 @@
 
 #include "iscsi_iser.h"
 
+MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz");
+MODULE_VERSION(DRV_VER);
+
 static struct scsi_host_template iscsi_iser_sht;
 static struct iscsi_transport iscsi_iser_transport;
 static struct scsi_transport_template *iscsi_iser_scsi_transport;
+static struct workqueue_struct *release_wq;
+struct iser_global ig;
+
+int iser_debug_level = 0;
+module_param_named(debug_level, iser_debug_level, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 
(default:disabled)");
 
 static unsigned int iscsi_max_lun = 512;
 module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
+MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session (default:512");
 
-int iser_debug_level = 0;
 bool iser_pi_enable = false;
-int iser_pi_guard = 1;
-
-MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz");
-MODULE_VERSION(DRV_VER);
-
-module_param_named(debug_level, iser_debug_level, int, 0644);
-MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 
(default:disabled)");
-
-module_param_named(pi_enable, iser_pi_enable, bool, 0644);
+module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO);
 MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support 
(default:disabled)");
 
-module_param_named(pi_guard, iser_pi_guard, int, 0644);
+int iser_pi_guard;
+module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO);
 MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
 
-static struct workqueue_struct *release_wq;
-struct iser_global ig;
-
 /*
  * iscsi_iser_recv() - Process a successfull recv completion
  * @conn: iscsi connection
-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/22] iser patches for 4.3

2015-07-30 Thread Sagi Grimberg
This set is a resend that includes some extra patches that
piled up in the meantime.

I still have some patches in the pipe (including initiator/target
support for remote invalidate) but I'm targeting those to 4.4

This patch set includes:
- Small fixes for bugs encountered in testing
- Small fixes detected by static checkers
- Memory registration code path rework (consolidate to
  a single code path that branches only at the actual registration
  FRWR vs. FMR). This reduces code duplication that exists in current code.
- Larger IO transfer size support (up to 8MB at the moment) depending on
  the device capabilities.
- Optimize Io path by chaining send work requests and posting them
  only once.

Adir Lev (1):
  IB/iser: Maintain connection fmr_pool under a single registration
descriptor

Jenny Falkovich (1):
  IB/iser: Change some module parameters to be RO

Sagi Grimberg (20):
  IB/iser: Change minor assignments and logging prints
  IB/iser: Remove '.' from log message
  IB/iser: Fix missing return status check in iser_send_data_out
  IB/iser: Get rid of un-maintained counters
  IB/iser: Fix possible bogus DMA unmapping
  IB/iser: Remove a redundant always-false condition
  IB/iser: Remove an unneeded print for unaligned memory
  IB/iser: Introduce struct iser_reg_resources
  IB/iser: Rename struct fast_reg_descriptor -> iser_fr_desc
  IB/iser: Remove dead code in fmr_pool alloc/free
  IB/iser: Introduce iser_reg_ops
  IB/iser: Move fastreg descriptor allocation to
iser_create_fastreg_desc
  IB/iser: Introduce iser registration pool struct
  IB/iser: Rename iser_reg_page_vec to iser_fast_reg_fmr
  IB/iser: Make reg_desc_get a per device routine
  IB/iser: Unify fast memory registration flows
  IB/iser: Pass registration pool a size parameter
  IB/iser: Support up to 8MB data transfer in a single command
  IB/iser: Add debug prints to the various memory registration methods
  IB/iser: Chain all iser transaction send work requests

 drivers/infiniband/ulp/iser/iscsi_iser.c |  89 +++--
 drivers/infiniband/ulp/iser/iscsi_iser.h | 206 
 drivers/infiniband/ulp/iser/iser_initiator.c |  34 +-
 drivers/infiniband/ulp/iser/iser_memory.c| 480 +++
 drivers/infiniband/ulp/iser/iser_verbs.c | 328 ++
 5 files changed, 645 insertions(+), 492 deletions(-)

-- 
1.8.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


<    1   2   3   4   5   6   7   8   9   10   >