Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer

2019-04-07 Thread Yuval Shaia
On Sun, Apr 07, 2019 at 11:13:15AM +0300, Kamal Heib wrote:
> 
> 
> On 4/3/19 9:05 PM, Yuval Shaia wrote:
> > On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote:
> >> Add the required functions and definitions to support shared receive
> >> queues (SRQs) in the backend layer.
> >>
> >> Signed-off-by: Kamal Heib 
> >> ---
> >>  hw/rdma/rdma_backend.c  | 116 +++-
> >>  hw/rdma/rdma_backend.h  |  12 
> >>  hw/rdma/rdma_backend_defs.h |   5 ++
> >>  hw/rdma/rdma_rm.c   |   2 +
> >>  hw/rdma/rdma_rm_defs.h  |   1 +
> >>  5 files changed, 134 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> >> index d1660b6474fa..04dfd63a573b 100644
> >> --- a/hw/rdma/rdma_backend.c
> >> +++ b/hw/rdma/rdma_backend.c
> >> @@ -40,6 +40,7 @@ typedef struct BackendCtx {
> >>  void *up_ctx;
> >>  struct ibv_sge sge; /* Used to save MAD recv buffer */
> >>  RdmaBackendQP *backend_qp; /* To maintain recv buffers */
> >> +RdmaBackendSRQ *backend_srq;
> >>  } BackendCtx;
> >>  
> >>  struct backend_umad {
> >> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources 
> >> *rdma_dev_res, struct ibv_cq *ibcq)
> >>  int i, ne, total_ne = 0;
> >>  BackendCtx *bctx;
> >>  struct ibv_wc wc[2];
> >> +RdmaProtectedGSList *cqe_ctx_list;
> >>  
> >>  qemu_mutex_lock(_dev_res->lock);
> >>  do {
> >> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources 
> >> *rdma_dev_res, struct ibv_cq *ibcq)
> >>  
> >>  comp_handler(bctx->up_ctx, [i]);
> >>  
> >> -
> >> rdma_protected_gslist_remove_int32(>backend_qp->cqe_ctx_list,
> >> -   wc[i].wr_id);
> >> +if (bctx->backend_qp) {
> >> +cqe_ctx_list = >backend_qp->cqe_ctx_list;
> >> +} else {
> >> +cqe_ctx_list = >backend_srq->cqe_ctx_list;
> >> +}
> >> +
> >> +rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
> >>  rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
> >>  g_free(bctx);
> >>  }
> >> @@ -662,6 +669,60 @@ err_free_bctx:
> >>  g_free(bctx);
> >>  }
> >>  
> >> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
> >> +RdmaBackendSRQ *srq, struct ibv_sge *sge,
> >> +uint32_t num_sge, void *ctx)
> >> +{
> >> +BackendCtx *bctx;
> >> +struct ibv_sge new_sge[MAX_SGE];
> >> +uint32_t bctx_id;
> >> +int rc;
> >> +struct ibv_recv_wr wr = {}, *bad_wr;
> >> +
> >> +bctx = g_malloc0(sizeof(*bctx));
> >> +bctx->up_ctx = ctx;
> >> +bctx->backend_srq = srq;
> >> +
> >> +rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, _id, bctx);
> >> +if (unlikely(rc)) {
> >> +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
> >> +goto err_free_bctx;
> >> +}
> >> +
> >> +rdma_protected_gslist_append_int32(>cqe_ctx_list, bctx_id);
> >> +
> >> +rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, 
> >> num_sge,
> >> +  
> >> _dev->rdma_dev_res->stats.rx_bufs_len);
> >> +if (rc) {
> >> +complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> >> +goto err_dealloc_cqe_ctx;
> >> +}
> >> +
> >> +wr.num_sge = num_sge;
> >> +wr.sg_list = new_sge;
> >> +wr.wr_id = bctx_id;
> >> +rc = ibv_post_srq_recv(srq->ibsrq, , _wr);
> >> +if (rc) {
> >> +rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, 
> >> errno=%d",
> >> +  srq->ibsrq->handle, rc, errno);
> >> +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
> >> +goto err_dealloc_cqe_ctx;
> >> +}
> >> +
> >> +atomic_inc(_dev->rdma_dev_res->stats.missing_cqe);
> >> +backend_dev->rdma_dev_res->stats.rx_bufs++;
> >> +backend_dev->rdma_dev_res->stats.rx_srq++;
> > 
> > You should update function rdma_dump_device_counters with this new
> > counter.
> > 
> >> +
> >> +return;
> >> +
> >> +err_dealloc_cqe_ctx:
> >> +backend_dev->rdma_dev_res->stats.rx_bufs_err++;
> >> +rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
> >> +
> >> +err_free_bctx:
> >> +g_free(bctx);
> >> +}
> >> +
> >>  int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
> >>  {
> >>  pd->ibpd = ibv_alloc_pd(backend_dev->context);
> >> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, 
> >> RdmaDeviceResources *dev_res)
> >>  rdma_protected_gslist_destroy(>cqe_ctx_list);
> >>  }
> >>  
> >> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
> >> +uint32_t max_wr, uint32_t max_sge,
> >> +uint32_t srq_limit)
> >> +{
> >> +struct ibv_srq_init_attr srq_init_attr = {};
> >> +
> >> +

Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer

2019-04-07 Thread Kamal Heib



On 4/3/19 9:05 PM, Yuval Shaia wrote:
> On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote:
>> Add the required functions and definitions to support shared receive
>> queues (SRQs) in the backend layer.
>>
>> Signed-off-by: Kamal Heib 
>> ---
>>  hw/rdma/rdma_backend.c  | 116 +++-
>>  hw/rdma/rdma_backend.h  |  12 
>>  hw/rdma/rdma_backend_defs.h |   5 ++
>>  hw/rdma/rdma_rm.c   |   2 +
>>  hw/rdma/rdma_rm_defs.h  |   1 +
>>  5 files changed, 134 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
>> index d1660b6474fa..04dfd63a573b 100644
>> --- a/hw/rdma/rdma_backend.c
>> +++ b/hw/rdma/rdma_backend.c
>> @@ -40,6 +40,7 @@ typedef struct BackendCtx {
>>  void *up_ctx;
>>  struct ibv_sge sge; /* Used to save MAD recv buffer */
>>  RdmaBackendQP *backend_qp; /* To maintain recv buffers */
>> +RdmaBackendSRQ *backend_srq;
>>  } BackendCtx;
>>  
>>  struct backend_umad {
>> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources 
>> *rdma_dev_res, struct ibv_cq *ibcq)
>>  int i, ne, total_ne = 0;
>>  BackendCtx *bctx;
>>  struct ibv_wc wc[2];
>> +RdmaProtectedGSList *cqe_ctx_list;
>>  
>>  qemu_mutex_lock(_dev_res->lock);
>>  do {
>> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources 
>> *rdma_dev_res, struct ibv_cq *ibcq)
>>  
>>  comp_handler(bctx->up_ctx, [i]);
>>  
>> -
>> rdma_protected_gslist_remove_int32(>backend_qp->cqe_ctx_list,
>> -   wc[i].wr_id);
>> +if (bctx->backend_qp) {
>> +cqe_ctx_list = >backend_qp->cqe_ctx_list;
>> +} else {
>> +cqe_ctx_list = >backend_srq->cqe_ctx_list;
>> +}
>> +
>> +rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
>>  rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
>>  g_free(bctx);
>>  }
>> @@ -662,6 +669,60 @@ err_free_bctx:
>>  g_free(bctx);
>>  }
>>  
>> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
>> +RdmaBackendSRQ *srq, struct ibv_sge *sge,
>> +uint32_t num_sge, void *ctx)
>> +{
>> +BackendCtx *bctx;
>> +struct ibv_sge new_sge[MAX_SGE];
>> +uint32_t bctx_id;
>> +int rc;
>> +struct ibv_recv_wr wr = {}, *bad_wr;
>> +
>> +bctx = g_malloc0(sizeof(*bctx));
>> +bctx->up_ctx = ctx;
>> +bctx->backend_srq = srq;
>> +
>> +rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, _id, bctx);
>> +if (unlikely(rc)) {
>> +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
>> +goto err_free_bctx;
>> +}
>> +
>> +rdma_protected_gslist_append_int32(>cqe_ctx_list, bctx_id);
>> +
>> +rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, 
>> num_sge,
>> +  
>> _dev->rdma_dev_res->stats.rx_bufs_len);
>> +if (rc) {
>> +complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
>> +goto err_dealloc_cqe_ctx;
>> +}
>> +
>> +wr.num_sge = num_sge;
>> +wr.sg_list = new_sge;
>> +wr.wr_id = bctx_id;
>> +rc = ibv_post_srq_recv(srq->ibsrq, , _wr);
>> +if (rc) {
>> +rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, 
>> errno=%d",
>> +  srq->ibsrq->handle, rc, errno);
>> +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
>> +goto err_dealloc_cqe_ctx;
>> +}
>> +
>> +atomic_inc(_dev->rdma_dev_res->stats.missing_cqe);
>> +backend_dev->rdma_dev_res->stats.rx_bufs++;
>> +backend_dev->rdma_dev_res->stats.rx_srq++;
> 
> You should update function rdma_dump_device_counters with this new
> counter.
> 
>> +
>> +return;
>> +
>> +err_dealloc_cqe_ctx:
>> +backend_dev->rdma_dev_res->stats.rx_bufs_err++;
>> +rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
>> +
>> +err_free_bctx:
>> +g_free(bctx);
>> +}
>> +
>>  int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
>>  {
>>  pd->ibpd = ibv_alloc_pd(backend_dev->context);
>> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, 
>> RdmaDeviceResources *dev_res)
>>  rdma_protected_gslist_destroy(>cqe_ctx_list);
>>  }
>>  
>> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
>> +uint32_t max_wr, uint32_t max_sge,
>> +uint32_t srq_limit)
>> +{
>> +struct ibv_srq_init_attr srq_init_attr = {};
>> +
>> +srq_init_attr.attr.max_wr = max_wr;
>> +srq_init_attr.attr.max_sge = max_sge;
>> +srq_init_attr.attr.srq_limit = srq_limit;
>> +
>> +srq->ibsrq = ibv_create_srq(pd->ibpd, _init_attr);
>> +if (!srq->ibsrq) {
>> +rdma_error_report("ibv_create_srq failed, errno=%d", errno);
>> +return -EIO;
>> +}
>> +
>> +

Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer

2019-04-03 Thread Yuval Shaia
On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote:
> Add the required functions and definitions to support shared receive
> queues (SRQs) in the backend layer.
> 
> Signed-off-by: Kamal Heib 
> ---
>  hw/rdma/rdma_backend.c  | 116 +++-
>  hw/rdma/rdma_backend.h  |  12 
>  hw/rdma/rdma_backend_defs.h |   5 ++
>  hw/rdma/rdma_rm.c   |   2 +
>  hw/rdma/rdma_rm_defs.h  |   1 +
>  5 files changed, 134 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> index d1660b6474fa..04dfd63a573b 100644
> --- a/hw/rdma/rdma_backend.c
> +++ b/hw/rdma/rdma_backend.c
> @@ -40,6 +40,7 @@ typedef struct BackendCtx {
>  void *up_ctx;
>  struct ibv_sge sge; /* Used to save MAD recv buffer */
>  RdmaBackendQP *backend_qp; /* To maintain recv buffers */
> +RdmaBackendSRQ *backend_srq;
>  } BackendCtx;
>  
>  struct backend_umad {
> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, 
> struct ibv_cq *ibcq)
>  int i, ne, total_ne = 0;
>  BackendCtx *bctx;
>  struct ibv_wc wc[2];
> +RdmaProtectedGSList *cqe_ctx_list;
>  
>  qemu_mutex_lock(_dev_res->lock);
>  do {
> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources 
> *rdma_dev_res, struct ibv_cq *ibcq)
>  
>  comp_handler(bctx->up_ctx, [i]);
>  
> -
> rdma_protected_gslist_remove_int32(>backend_qp->cqe_ctx_list,
> -   wc[i].wr_id);
> +if (bctx->backend_qp) {
> +cqe_ctx_list = >backend_qp->cqe_ctx_list;
> +} else {
> +cqe_ctx_list = >backend_srq->cqe_ctx_list;
> +}
> +
> +rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
>  rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
>  g_free(bctx);
>  }
> @@ -662,6 +669,60 @@ err_free_bctx:
>  g_free(bctx);
>  }
>  
> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
> +RdmaBackendSRQ *srq, struct ibv_sge *sge,
> +uint32_t num_sge, void *ctx)
> +{
> +BackendCtx *bctx;
> +struct ibv_sge new_sge[MAX_SGE];
> +uint32_t bctx_id;
> +int rc;
> +struct ibv_recv_wr wr = {}, *bad_wr;
> +
> +bctx = g_malloc0(sizeof(*bctx));
> +bctx->up_ctx = ctx;
> +bctx->backend_srq = srq;
> +
> +rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, _id, bctx);
> +if (unlikely(rc)) {
> +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
> +goto err_free_bctx;
> +}
> +
> +rdma_protected_gslist_append_int32(>cqe_ctx_list, bctx_id);
> +
> +rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, 
> num_sge,
> +  _dev->rdma_dev_res->stats.rx_bufs_len);
> +if (rc) {
> +complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
> +goto err_dealloc_cqe_ctx;
> +}
> +
> +wr.num_sge = num_sge;
> +wr.sg_list = new_sge;
> +wr.wr_id = bctx_id;
> +rc = ibv_post_srq_recv(srq->ibsrq, , _wr);
> +if (rc) {
> +rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, 
> errno=%d",
> +  srq->ibsrq->handle, rc, errno);
> +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
> +goto err_dealloc_cqe_ctx;
> +}
> +
> +atomic_inc(_dev->rdma_dev_res->stats.missing_cqe);
> +backend_dev->rdma_dev_res->stats.rx_bufs++;
> +backend_dev->rdma_dev_res->stats.rx_srq++;

You should update function rdma_dump_device_counters with this new
counter.

> +
> +return;
> +
> +err_dealloc_cqe_ctx:
> +backend_dev->rdma_dev_res->stats.rx_bufs_err++;
> +rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
> +
> +err_free_bctx:
> +g_free(bctx);
> +}
> +
>  int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
>  {
>  pd->ibpd = ibv_alloc_pd(backend_dev->context);
> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, 
> RdmaDeviceResources *dev_res)
>  rdma_protected_gslist_destroy(>cqe_ctx_list);
>  }
>  
> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
> +uint32_t max_wr, uint32_t max_sge,
> +uint32_t srq_limit)
> +{
> +struct ibv_srq_init_attr srq_init_attr = {};
> +
> +srq_init_attr.attr.max_wr = max_wr;
> +srq_init_attr.attr.max_sge = max_sge;
> +srq_init_attr.attr.srq_limit = srq_limit;
> +
> +srq->ibsrq = ibv_create_srq(pd->ibpd, _init_attr);
> +if (!srq->ibsrq) {
> +rdma_error_report("ibv_create_srq failed, errno=%d", errno);
> +return -EIO;
> +}
> +
> +rdma_protected_gslist_init(>cqe_ctx_list);
> +
> +return 0;
> +}
> +
> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr 
> *srq_attr)
> +{
> +if (!srq->ibsrq) {
> +