Hi Wengang,

I was going through your patch set here, and it seems that you missed changing 
kfree to kvfree in mlx4_ib_destroy_srq().  In the current code if the srq wrid 
is allocated using vmalloc, then on cleanup we will use kfree, which is a bug.

Thanks,
-matt




On 10/7/15, 10:27 PM, "linux-rdma-ow...@vger.kernel.org on behalf of Wengang 
Wang" <linux-rdma-ow...@vger.kernel.org on behalf of wen.gang.w...@oracle.com> 
wrote:

>There are several hits that WR buffer allocation(kmalloc) failed.
>It failed at order 3 and/or 4 contigous pages allocation. At the same time
>there are actually 100MB+ free memory but well fragmented.
>So try vmalloc when kmalloc failed.
>
>Signed-off-by: Wengang Wang <wen.gang.w...@oracle.com>
>Acked-by: Or Gerlitz <ogerl...@mellanox.com>
>---
> drivers/infiniband/hw/mlx4/qp.c  | 19 +++++++++++++------
> drivers/infiniband/hw/mlx4/srq.c | 11 ++++++++---
> 2 files changed, 21 insertions(+), 9 deletions(-)
>
>diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
>index 4ad9be3..3ccbd3a 100644
>--- a/drivers/infiniband/hw/mlx4/qp.c
>+++ b/drivers/infiniband/hw/mlx4/qp.c
>@@ -34,6 +34,7 @@
> #include <linux/log2.h>
> #include <linux/slab.h>
> #include <linux/netdevice.h>
>+#include <linux/vmalloc.h>
> 
> #include <rdma/ib_cache.h>
> #include <rdma/ib_pack.h>
>@@ -786,8 +787,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, 
>struct ib_pd *pd,
>               if (err)
>                       goto err_mtt;
> 
>-              qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
>-              qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
>+              qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
>+              if (!qp->sq.wrid)
>+                      qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
>+                                              gfp, PAGE_KERNEL);
>+              qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
>+              if (!qp->rq.wrid)
>+                      qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
>+                                              gfp, PAGE_KERNEL);
>               if (!qp->sq.wrid || !qp->rq.wrid) {
>                       err = -ENOMEM;
>                       goto err_wrid;
>@@ -874,8 +881,8 @@ err_wrid:
>               if (qp_has_rq(init_attr))
>                       
> mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
>       } else {
>-              kfree(qp->sq.wrid);
>-              kfree(qp->rq.wrid);
>+              kvfree(qp->sq.wrid);
>+              kvfree(qp->rq.wrid);
>       }
> 
> err_mtt:
>@@ -1050,8 +1057,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, 
>struct mlx4_ib_qp *qp,
>                                             &qp->db);
>               ib_umem_release(qp->umem);
>       } else {
>-              kfree(qp->sq.wrid);
>-              kfree(qp->rq.wrid);
>+              kvfree(qp->sq.wrid);
>+              kvfree(qp->rq.wrid);
>               if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
>                   MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
>                       free_proxy_bufs(&dev->ib_dev, qp);
>diff --git a/drivers/infiniband/hw/mlx4/srq.c 
>b/drivers/infiniband/hw/mlx4/srq.c
>index dce5dfe..8d133c4 100644
>--- a/drivers/infiniband/hw/mlx4/srq.c
>+++ b/drivers/infiniband/hw/mlx4/srq.c
>@@ -34,6 +34,7 @@
> #include <linux/mlx4/qp.h>
> #include <linux/mlx4/srq.h>
> #include <linux/slab.h>
>+#include <linux/vmalloc.h>
> 
> #include "mlx4_ib.h"
> #include "user.h"
>@@ -172,8 +173,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
> 
>               srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
>               if (!srq->wrid) {
>-                      err = -ENOMEM;
>-                      goto err_mtt;
>+                      srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
>+                                            GFP_KERNEL, PAGE_KERNEL);
>+                      if (!srq->wrid) {
>+                              err = -ENOMEM;
>+                              goto err_mtt;
>+                      }
>               }
>       }
> 
>@@ -204,7 +209,7 @@ err_wrid:
>       if (pd->uobject)
>               mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), 
> &srq->db);
>       else
>-              kfree(srq->wrid);
>+              kvfree(srq->wrid);
> 
> err_mtt:
>       mlx4_mtt_cleanup(dev->dev, &srq->mtt);
>-- 
>2.1.0
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>the body of a message to majord...@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to