Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer
On Sun, Apr 07, 2019 at 11:13:15AM +0300, Kamal Heib wrote: > > > On 4/3/19 9:05 PM, Yuval Shaia wrote: > > On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote: > >> Add the required functions and definitions to support shared receive > >> queues (SRQs) in the backend layer. > >> > >> Signed-off-by: Kamal Heib > >> --- > >> hw/rdma/rdma_backend.c | 116 +++- > >> hw/rdma/rdma_backend.h | 12 > >> hw/rdma/rdma_backend_defs.h | 5 ++ > >> hw/rdma/rdma_rm.c | 2 + > >> hw/rdma/rdma_rm_defs.h | 1 + > >> 5 files changed, 134 insertions(+), 2 deletions(-) > >> > >> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c > >> index d1660b6474fa..04dfd63a573b 100644 > >> --- a/hw/rdma/rdma_backend.c > >> +++ b/hw/rdma/rdma_backend.c > >> @@ -40,6 +40,7 @@ typedef struct BackendCtx { > >> void *up_ctx; > >> struct ibv_sge sge; /* Used to save MAD recv buffer */ > >> RdmaBackendQP *backend_qp; /* To maintain recv buffers */ > >> +RdmaBackendSRQ *backend_srq; > >> } BackendCtx; > >> > >> struct backend_umad { > >> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources > >> *rdma_dev_res, struct ibv_cq *ibcq) > >> int i, ne, total_ne = 0; > >> BackendCtx *bctx; > >> struct ibv_wc wc[2]; > >> +RdmaProtectedGSList *cqe_ctx_list; > >> > >> qemu_mutex_lock(_dev_res->lock); > >> do { > >> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources > >> *rdma_dev_res, struct ibv_cq *ibcq) > >> > >> comp_handler(bctx->up_ctx, [i]); > >> > >> - > >> rdma_protected_gslist_remove_int32(>backend_qp->cqe_ctx_list, > >> - wc[i].wr_id); > >> +if (bctx->backend_qp) { > >> +cqe_ctx_list = >backend_qp->cqe_ctx_list; > >> +} else { > >> +cqe_ctx_list = >backend_srq->cqe_ctx_list; > >> +} > >> + > >> +rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id); > >> rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); > >> g_free(bctx); > >> } > >> @@ -662,6 +669,60 @@ err_free_bctx: > >> g_free(bctx); > >> } > >> > >> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, > >> +RdmaBackendSRQ *srq, struct ibv_sge *sge, > >> +uint32_t num_sge, void *ctx) > >> +{ > >> +BackendCtx *bctx; > >> +struct ibv_sge new_sge[MAX_SGE]; > >> +uint32_t bctx_id; > >> +int rc; > >> +struct ibv_recv_wr wr = {}, *bad_wr; > >> + > >> +bctx = g_malloc0(sizeof(*bctx)); > >> +bctx->up_ctx = ctx; > >> +bctx->backend_srq = srq; > >> + > >> +rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, _id, bctx); > >> +if (unlikely(rc)) { > >> +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); > >> +goto err_free_bctx; > >> +} > >> + > >> +rdma_protected_gslist_append_int32(>cqe_ctx_list, bctx_id); > >> + > >> +rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, > >> num_sge, > >> + > >> _dev->rdma_dev_res->stats.rx_bufs_len); > >> +if (rc) { > >> +complete_work(IBV_WC_GENERAL_ERR, rc, ctx); > >> +goto err_dealloc_cqe_ctx; > >> +} > >> + > >> +wr.num_sge = num_sge; > >> +wr.sg_list = new_sge; > >> +wr.wr_id = bctx_id; > >> +rc = ibv_post_srq_recv(srq->ibsrq, , _wr); > >> +if (rc) { > >> +rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, > >> errno=%d", > >> + srq->ibsrq->handle, rc, errno); > >> +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); > >> +goto err_dealloc_cqe_ctx; > >> +} > >> + > >> +atomic_inc(_dev->rdma_dev_res->stats.missing_cqe); > >> +backend_dev->rdma_dev_res->stats.rx_bufs++; > >> +backend_dev->rdma_dev_res->stats.rx_srq++; > > > > You should update function rdma_dump_device_counters with this new > > counter. > > > >> + > >> +return; > >> + > >> +err_dealloc_cqe_ctx: > >> +backend_dev->rdma_dev_res->stats.rx_bufs_err++; > >> +rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); > >> + > >> +err_free_bctx: > >> +g_free(bctx); > >> +} > >> + > >> int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd) > >> { > >> pd->ibpd = ibv_alloc_pd(backend_dev->context); > >> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, > >> RdmaDeviceResources *dev_res) > >> rdma_protected_gslist_destroy(>cqe_ctx_list); > >> } > >> > >> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, > >> +uint32_t max_wr, uint32_t max_sge, > >> +uint32_t srq_limit) > >> +{ > >> +struct ibv_srq_init_attr srq_init_attr = {}; > >> + > >> +
Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer
On 4/3/19 9:05 PM, Yuval Shaia wrote: > On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote: >> Add the required functions and definitions to support shared receive >> queues (SRQs) in the backend layer. >> >> Signed-off-by: Kamal Heib >> --- >> hw/rdma/rdma_backend.c | 116 +++- >> hw/rdma/rdma_backend.h | 12 >> hw/rdma/rdma_backend_defs.h | 5 ++ >> hw/rdma/rdma_rm.c | 2 + >> hw/rdma/rdma_rm_defs.h | 1 + >> 5 files changed, 134 insertions(+), 2 deletions(-) >> >> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c >> index d1660b6474fa..04dfd63a573b 100644 >> --- a/hw/rdma/rdma_backend.c >> +++ b/hw/rdma/rdma_backend.c >> @@ -40,6 +40,7 @@ typedef struct BackendCtx { >> void *up_ctx; >> struct ibv_sge sge; /* Used to save MAD recv buffer */ >> RdmaBackendQP *backend_qp; /* To maintain recv buffers */ >> +RdmaBackendSRQ *backend_srq; >> } BackendCtx; >> >> struct backend_umad { >> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources >> *rdma_dev_res, struct ibv_cq *ibcq) >> int i, ne, total_ne = 0; >> BackendCtx *bctx; >> struct ibv_wc wc[2]; >> +RdmaProtectedGSList *cqe_ctx_list; >> >> qemu_mutex_lock(_dev_res->lock); >> do { >> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources >> *rdma_dev_res, struct ibv_cq *ibcq) >> >> comp_handler(bctx->up_ctx, [i]); >> >> - >> rdma_protected_gslist_remove_int32(>backend_qp->cqe_ctx_list, >> - wc[i].wr_id); >> +if (bctx->backend_qp) { >> +cqe_ctx_list = >backend_qp->cqe_ctx_list; >> +} else { >> +cqe_ctx_list = >backend_srq->cqe_ctx_list; >> +} >> + >> +rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id); >> rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); >> g_free(bctx); >> } >> @@ -662,6 +669,60 @@ err_free_bctx: >> g_free(bctx); >> } >> >> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, >> +RdmaBackendSRQ *srq, struct ibv_sge *sge, >> +uint32_t num_sge, void *ctx) >> +{ >> +BackendCtx *bctx; >> +struct ibv_sge new_sge[MAX_SGE]; >> +uint32_t bctx_id; >> +int rc; >> +struct ibv_recv_wr wr = {}, *bad_wr; >> + >> +bctx = g_malloc0(sizeof(*bctx)); >> +bctx->up_ctx = ctx; >> +bctx->backend_srq = srq; >> + >> +rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, _id, bctx); >> +if (unlikely(rc)) { >> +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); >> +goto err_free_bctx; >> +} >> + >> +rdma_protected_gslist_append_int32(>cqe_ctx_list, bctx_id); >> + >> +rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, >> num_sge, >> + >> _dev->rdma_dev_res->stats.rx_bufs_len); >> +if (rc) { >> +complete_work(IBV_WC_GENERAL_ERR, rc, ctx); >> +goto err_dealloc_cqe_ctx; >> +} >> + >> +wr.num_sge = num_sge; >> +wr.sg_list = new_sge; >> +wr.wr_id = bctx_id; >> +rc = ibv_post_srq_recv(srq->ibsrq, , _wr); >> +if (rc) { >> +rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, >> errno=%d", >> + srq->ibsrq->handle, rc, errno); >> +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); >> +goto err_dealloc_cqe_ctx; >> +} >> + >> +atomic_inc(_dev->rdma_dev_res->stats.missing_cqe); >> +backend_dev->rdma_dev_res->stats.rx_bufs++; >> +backend_dev->rdma_dev_res->stats.rx_srq++; > > You should update function rdma_dump_device_counters with this new > counter. > >> + >> +return; >> + >> +err_dealloc_cqe_ctx: >> +backend_dev->rdma_dev_res->stats.rx_bufs_err++; >> +rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); >> + >> +err_free_bctx: >> +g_free(bctx); >> +} >> + >> int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd) >> { >> pd->ibpd = ibv_alloc_pd(backend_dev->context); >> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, >> RdmaDeviceResources *dev_res) >> rdma_protected_gslist_destroy(>cqe_ctx_list); >> } >> >> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, >> +uint32_t max_wr, uint32_t max_sge, >> +uint32_t srq_limit) >> +{ >> +struct ibv_srq_init_attr srq_init_attr = {}; >> + >> +srq_init_attr.attr.max_wr = max_wr; >> +srq_init_attr.attr.max_sge = max_sge; >> +srq_init_attr.attr.srq_limit = srq_limit; >> + >> +srq->ibsrq = ibv_create_srq(pd->ibpd, _init_attr); >> +if (!srq->ibsrq) { >> +rdma_error_report("ibv_create_srq failed, errno=%d", errno); >> +return -EIO; >> +} >> + >> +
Re: [Qemu-devel] [PATCH v3 1/4] hw/rdma: Add SRQ support to backend layer
On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote: > Add the required functions and definitions to support shared receive > queues (SRQs) in the backend layer. > > Signed-off-by: Kamal Heib > --- > hw/rdma/rdma_backend.c | 116 +++- > hw/rdma/rdma_backend.h | 12 > hw/rdma/rdma_backend_defs.h | 5 ++ > hw/rdma/rdma_rm.c | 2 + > hw/rdma/rdma_rm_defs.h | 1 + > 5 files changed, 134 insertions(+), 2 deletions(-) > > diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c > index d1660b6474fa..04dfd63a573b 100644 > --- a/hw/rdma/rdma_backend.c > +++ b/hw/rdma/rdma_backend.c > @@ -40,6 +40,7 @@ typedef struct BackendCtx { > void *up_ctx; > struct ibv_sge sge; /* Used to save MAD recv buffer */ > RdmaBackendQP *backend_qp; /* To maintain recv buffers */ > +RdmaBackendSRQ *backend_srq; > } BackendCtx; > > struct backend_umad { > @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, > struct ibv_cq *ibcq) > int i, ne, total_ne = 0; > BackendCtx *bctx; > struct ibv_wc wc[2]; > +RdmaProtectedGSList *cqe_ctx_list; > > qemu_mutex_lock(_dev_res->lock); > do { > @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources > *rdma_dev_res, struct ibv_cq *ibcq) > > comp_handler(bctx->up_ctx, [i]); > > - > rdma_protected_gslist_remove_int32(>backend_qp->cqe_ctx_list, > - wc[i].wr_id); > +if (bctx->backend_qp) { > +cqe_ctx_list = >backend_qp->cqe_ctx_list; > +} else { > +cqe_ctx_list = >backend_srq->cqe_ctx_list; > +} > + > +rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id); > rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); > g_free(bctx); > } > @@ -662,6 +669,60 @@ err_free_bctx: > g_free(bctx); > } > > +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, > +RdmaBackendSRQ *srq, struct ibv_sge *sge, > +uint32_t num_sge, void *ctx) > +{ > +BackendCtx *bctx; > +struct ibv_sge new_sge[MAX_SGE]; > +uint32_t bctx_id; > +int rc; > +struct ibv_recv_wr wr = {}, *bad_wr; > + > +bctx = g_malloc0(sizeof(*bctx)); > +bctx->up_ctx = ctx; > +bctx->backend_srq = srq; > + > +rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, _id, bctx); > +if (unlikely(rc)) { > +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); > +goto err_free_bctx; > +} > + > +rdma_protected_gslist_append_int32(>cqe_ctx_list, bctx_id); > + > +rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, > num_sge, > + _dev->rdma_dev_res->stats.rx_bufs_len); > +if (rc) { > +complete_work(IBV_WC_GENERAL_ERR, rc, ctx); > +goto err_dealloc_cqe_ctx; > +} > + > +wr.num_sge = num_sge; > +wr.sg_list = new_sge; > +wr.wr_id = bctx_id; > +rc = ibv_post_srq_recv(srq->ibsrq, , _wr); > +if (rc) { > +rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, > errno=%d", > + srq->ibsrq->handle, rc, errno); > +complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); > +goto err_dealloc_cqe_ctx; > +} > + > +atomic_inc(_dev->rdma_dev_res->stats.missing_cqe); > +backend_dev->rdma_dev_res->stats.rx_bufs++; > +backend_dev->rdma_dev_res->stats.rx_srq++; You should update function rdma_dump_device_counters with this new counter. > + > +return; > + > +err_dealloc_cqe_ctx: > +backend_dev->rdma_dev_res->stats.rx_bufs_err++; > +rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); > + > +err_free_bctx: > +g_free(bctx); > +} > + > int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd) > { > pd->ibpd = ibv_alloc_pd(backend_dev->context); > @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, > RdmaDeviceResources *dev_res) > rdma_protected_gslist_destroy(>cqe_ctx_list); > } > > +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, > +uint32_t max_wr, uint32_t max_sge, > +uint32_t srq_limit) > +{ > +struct ibv_srq_init_attr srq_init_attr = {}; > + > +srq_init_attr.attr.max_wr = max_wr; > +srq_init_attr.attr.max_sge = max_sge; > +srq_init_attr.attr.srq_limit = srq_limit; > + > +srq->ibsrq = ibv_create_srq(pd->ibpd, _init_attr); > +if (!srq->ibsrq) { > +rdma_error_report("ibv_create_srq failed, errno=%d", errno); > +return -EIO; > +} > + > +rdma_protected_gslist_init(>cqe_ctx_list); > + > +return 0; > +} > + > +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr > *srq_attr) > +{ > +if (!srq->ibsrq) { > +