[PATCH] IB/mlx4: Use vmalloc for WR buffers when needed

2015-09-23 Thread Wengang Wang
There are several hits that WR buffer allocation(kmalloc) failed.
It failed at order 3 and/or 4 contigous pages allocation. At the same time
there are actually 100MB+ free memory but well fragmented.
So try vmalloc when kmalloc failed.

Signed-off-by: Wengang Wang 
---
 drivers/infiniband/hw/mlx4/qp.c  | 34 --
 drivers/infiniband/hw/mlx4/srq.c | 14 +++---
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 4ad9be3..da551e1 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -786,8 +787,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, 
struct ib_pd *pd,
if (err)
goto err_mtt;
 
-   qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
-   qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
+   qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+   if (!qp->sq.wrid)
+   qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
+   gfp, PAGE_KERNEL);
+   qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+   if (!qp->rq.wrid)
+   qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
+   gfp, PAGE_KERNEL);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
@@ -874,8 +881,15 @@ err_wrid:
if (qp_has_rq(init_attr))

mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
-   kfree(qp->sq.wrid);
-   kfree(qp->rq.wrid);
+   if (is_vmalloc_addr(qp->sq.wrid))
+   vfree(qp->sq.wrid);
+   else
+   kfree(qp->sq.wrid);
+
+   if (is_vmalloc_addr(qp->rq.wrid))
+   vfree(qp->rq.wrid);
+   else
+   kfree(qp->rq.wrid);
}
 
 err_mtt:
@@ -1050,8 +1064,16 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, 
struct mlx4_ib_qp *qp,
  &qp->db);
ib_umem_release(qp->umem);
} else {
-   kfree(qp->sq.wrid);
-   kfree(qp->rq.wrid);
+   if (is_vmalloc_addr(qp->sq.wrid))
+   vfree(qp->sq.wrid);
+   else
+   kfree(qp->sq.wrid);
+
+   if (is_vmalloc_addr(qp->rq.wrid))
+   vfree(qp->rq.wrid);
+   else
+   kfree(qp->rq.wrid);
+
if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
free_proxy_bufs(&dev->ib_dev, qp);
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index dce5dfe..d0e8d40 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "mlx4_ib.h"
 #include "user.h"
@@ -172,8 +173,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 
srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
if (!srq->wrid) {
-   err = -ENOMEM;
-   goto err_mtt;
+   srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
+ GFP_KERNEL, PAGE_KERNEL);
+   if (!srq->wrid) {
+   err = -ENOMEM;
+   goto err_mtt;
+   }
}
}
 
@@ -204,7 +209,10 @@ err_wrid:
if (pd->uobject)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), 
&srq->db);
else
-   kfree(srq->wrid);
+   if (is_vmalloc_addr(srq->wrid))
+   vfree(srq->wrid);
+   else
+   kfree(srq->wrid);
 
 err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] IB/mlx4: Use vmalloc for WR buffers when needed

2015-09-24 Thread Wengang Wang
There are several hits that WR buffer allocation(kmalloc) failed.
It failed at order 3 and/or 4 contigous pages allocation. At the same time
there are actually 100MB+ free memory but well fragmented.
So try vmalloc when kmalloc failed.

Signed-off-by: Wengang Wang 
---
 drivers/infiniband/hw/mlx4/qp.c  | 20 ++--
 drivers/infiniband/hw/mlx4/srq.c | 11 ---
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 4ad9be3..f152d8a 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -786,8 +787,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, 
struct ib_pd *pd,
if (err)
goto err_mtt;
 
-   qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
-   qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
+   qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+   if (!qp->sq.wrid)
+   qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
+   gfp, PAGE_KERNEL);
+   qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+   if (!qp->rq.wrid)
+   qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
+   gfp, PAGE_KERNEL);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
@@ -874,8 +881,8 @@ err_wrid:
if (qp_has_rq(init_attr))

mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
-   kfree(qp->sq.wrid);
-   kfree(qp->rq.wrid);
+   kvfree(qp->sq.wrid);
+   kvfree(qp->rq.wrid);
}
 
 err_mtt:
@@ -1050,8 +1057,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, 
struct mlx4_ib_qp *qp,
  &qp->db);
ib_umem_release(qp->umem);
} else {
-   kfree(qp->sq.wrid);
-   kfree(qp->rq.wrid);
+   kvfree(qp->sq.wrid);
+   kvfree(qp->rq.wrid);
+
if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
free_proxy_bufs(&dev->ib_dev, qp);
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index dce5dfe..8d133c4 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "mlx4_ib.h"
 #include "user.h"
@@ -172,8 +173,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 
srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
if (!srq->wrid) {
-   err = -ENOMEM;
-   goto err_mtt;
+   srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
+ GFP_KERNEL, PAGE_KERNEL);
+   if (!srq->wrid) {
+   err = -ENOMEM;
+   goto err_mtt;
+   }
}
}
 
@@ -204,7 +209,7 @@ err_wrid:
if (pd->uobject)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), 
&srq->db);
else
-   kfree(srq->wrid);
+   kvfree(srq->wrid);
 
 err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mlx4: Use vmalloc for WR buffers when needed

2015-09-24 Thread Or Gerlitz

On 9/24/2015 9:52 AM, Wengang Wang wrote:

} else {
-   kfree(qp->sq.wrid);
-   kfree(qp->rq.wrid);
+   if (is_vmalloc_addr(qp->sq.wrid))
+   vfree(qp->sq.wrid);
+   else
+   kfree(qp->sq.wrid);
+
+   if (is_vmalloc_addr(qp->rq.wrid))
+   vfree(qp->rq.wrid);
+   else
+   kfree(qp->rq.wrid);
}


NO

just call kvfree, see commit 914efb0 "mlx4: don't duplicate kvfree()"
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mlx4: Use vmalloc for WR buffers when needed

2015-09-24 Thread Wengang Wang


在 2015年09月24日 16:07, Or Gerlitz 写道:

On 9/24/2015 9:52 AM, Wengang Wang wrote:

  } else {
-kfree(qp->sq.wrid);
-kfree(qp->rq.wrid);
+if (is_vmalloc_addr(qp->sq.wrid))
+vfree(qp->sq.wrid);
+else
+kfree(qp->sq.wrid);
+
+if (is_vmalloc_addr(qp->rq.wrid))
+vfree(qp->rq.wrid);
+else
+kfree(qp->rq.wrid);
  }


NO

just call kvfree, see commit 914efb0 "mlx4: don't duplicate kvfree()"


Yeap, will re-post.

thanks,
wengang

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mlx4: Use vmalloc for WR buffers when needed

2015-09-24 Thread Or Gerlitz
On Thu, Sep 24, 2015 at 1:49 PM, Wengang Wang  wrote:
> @@ -786,8 +787,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, 
> struct ib_pd *pd,
> if (err)
> goto err_mtt;
>
> -   qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
> -   qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
> +   qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
> +   if (!qp->sq.wrid)
> +   qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
> +   gfp, PAGE_KERNEL);

On other spots of mlx4, we're using vmalloc and not __vmalloc, any
pros/cons for going that way too here?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mlx4: Use vmalloc for WR buffers when needed

2015-09-24 Thread Wengang Wang

Hi Or,

在 2015年09月24日 19:57, Or Gerlitz 写道:

On Thu, Sep 24, 2015 at 1:49 PM, Wengang Wang  wrote:

@@ -786,8 +787,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, 
struct ib_pd *pd,
 if (err)
 goto err_mtt;

-   qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
-   qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
+   qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+   if (!qp->sq.wrid)
+   qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
+   gfp, PAGE_KERNEL);

On other spots of mlx4, we're using vmalloc and not __vmalloc, any
pros/cons for going that way too here?


vmalloc is just using GFP_KERNEL | __GFP_HIGHMEM, we can't pass in the 
flag gfp with it.  We should respect orginal code which needs to pass 
the flag.


thanks,
wengang

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mlx4: Use vmalloc for WR buffers when needed

2015-10-07 Thread Or Gerlitz

On 9/24/2015 1:49 PM, Wengang Wang wrote:

@@ -1050,8 +1057,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, 
struct mlx4_ib_qp *qp,
  &qp->db);
ib_umem_release(qp->umem);
} else {
-   kfree(qp->sq.wrid);
-   kfree(qp->rq.wrid);
+   kvfree(qp->sq.wrid);
+   kvfree(qp->rq.wrid);
+
if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
free_proxy_bufs(&dev->ib_dev, qp);


when you fix issue A you don't conduct cleanup for issue B, remove the 
new line added here andadd


Acked-by: Or Gerlitz 

When you re-submit patches makes sure to maintain version, e.g the 
re-post would be [PATCH V2] (I believe

this is going to be the 3rd version, so V0... V1 ... and now V2)
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mlx4: Use vmalloc for WR buffers when needed

2015-10-07 Thread Leon Romanovsky
On Fri, Sep 25, 2015 at 08:51:22AM +0800, Wengang Wang wrote:
> Hi Or,
> 
> 在 2015年09月24日 19:57, Or Gerlitz 写道:
> >On Thu, Sep 24, 2015 at 1:49 PM, Wengang Wang  
> >wrote:
> >>@@ -786,8 +787,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, 
> >>struct ib_pd *pd,
> >> if (err)
> >> goto err_mtt;
> >>
> >>-   qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
> >>-   qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
> >>+   qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
> >>+   if (!qp->sq.wrid)
> >>+   qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * 
> >>sizeof(u64),
> >>+   gfp, PAGE_KERNEL);
> >On other spots of mlx4, we're using vmalloc and not __vmalloc, any
> >pros/cons for going that way too here?
> 
> vmalloc is just using GFP_KERNEL | __GFP_HIGHMEM, we can't pass in the flag
> gfp with it.  We should respect orginal code which needs to pass the flag.
Additionally, I want to spot Or's attention on the following discussion
in MM-subsystem about kmalloc/vmalloc and general function to fallback
from one to another.

[1] [PATCH 2/7] mm: introduce kvmalloc and kvmalloc_node
https://lkml.org/lkml/2015/7/7/548
[2] [PATCH 0/7] mm: reliable memory allocation with kvmalloc
https://lkml.org/lkml/2015/7/7/545

> 
> thanks,
> wengang
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mlx4: Use vmalloc for WR buffers when needed

2015-10-07 Thread Or Gerlitz

On 10/8/2015 9:06 AM, Leon Romanovsky wrote:

Additionally, I want to spot Or's attention on the following discussion
in MM-subsystem about kmalloc/vmalloc and general function to fallback
from one to another.




too busy to read them now, if you have review comments speak now and 
provide them to the author.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mlx4: Use vmalloc for WR buffers when needed

2015-10-07 Thread Leon Romanovsky
On Thu, Oct 8, 2015 at 9:14 AM, Or Gerlitz  wrote:
> On 10/8/2015 9:06 AM, Leon Romanovsky wrote:
>>
>> Additionally, I want to spot Or's attention on the following discussion
>> in MM-subsystem about kmalloc/vmalloc and general function to fallback
>> from one to another.
>>
>>
>
> too busy to read them now, if you have review comments speak now and provide
> them to the author.
My comments to author that from my point of view this patch is a
correct to fix current behaviour.

The more general solution (I doubt if it is feasible) is to decrease
the dependency on high order allocations.

>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html