On Sun, Apr 07, 2019 at 11:13:15AM +0300, Kamal Heib wrote: > > > On 4/3/19 9:05 PM, Yuval Shaia wrote: > > On Wed, Apr 03, 2019 at 02:33:40PM +0300, Kamal Heib wrote: > >> Add the required functions and definitions to support shared receive > >> queues (SRQs) in the backend layer. > >> > >> Signed-off-by: Kamal Heib <kamalhe...@gmail.com> > >> --- > >> hw/rdma/rdma_backend.c | 116 +++++++++++++++++++++++++++++++++++- > >> hw/rdma/rdma_backend.h | 12 ++++ > >> hw/rdma/rdma_backend_defs.h | 5 ++ > >> hw/rdma/rdma_rm.c | 2 + > >> hw/rdma/rdma_rm_defs.h | 1 + > >> 5 files changed, 134 insertions(+), 2 deletions(-) > >> > >> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c > >> index d1660b6474fa..04dfd63a573b 100644 > >> --- a/hw/rdma/rdma_backend.c > >> +++ b/hw/rdma/rdma_backend.c > >> @@ -40,6 +40,7 @@ typedef struct BackendCtx { > >> void *up_ctx; > >> struct ibv_sge sge; /* Used to save MAD recv buffer */ > >> RdmaBackendQP *backend_qp; /* To maintain recv buffers */ > >> + RdmaBackendSRQ *backend_srq; > >> } BackendCtx; > >> > >> struct backend_umad { > >> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources > >> *rdma_dev_res, struct ibv_cq *ibcq) > >> int i, ne, total_ne = 0; > >> BackendCtx *bctx; > >> struct ibv_wc wc[2]; > >> + RdmaProtectedGSList *cqe_ctx_list; > >> > >> qemu_mutex_lock(&rdma_dev_res->lock); > >> do { > >> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources > >> *rdma_dev_res, struct ibv_cq *ibcq) > >> > >> comp_handler(bctx->up_ctx, &wc[i]); > >> > >> - > >> rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list, > >> - wc[i].wr_id); > >> + if (bctx->backend_qp) { > >> + cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list; > >> + } else { > >> + cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list; > >> + } > >> + > >> + rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id); > >> rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); > >> g_free(bctx); > >> } > >> @@ -662,6 +669,60 @@ err_free_bctx: > >> g_free(bctx); > >> } > >> > >> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, > >> + RdmaBackendSRQ *srq, struct ibv_sge *sge, > >> + uint32_t num_sge, void *ctx) > >> +{ > >> + BackendCtx *bctx; > >> + struct ibv_sge new_sge[MAX_SGE]; > >> + uint32_t bctx_id; > >> + int rc; > >> + struct ibv_recv_wr wr = {}, *bad_wr; > >> + > >> + bctx = g_malloc0(sizeof(*bctx)); > >> + bctx->up_ctx = ctx; > >> + bctx->backend_srq = srq; > >> + > >> + rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); > >> + if (unlikely(rc)) { > >> + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); > >> + goto err_free_bctx; > >> + } > >> + > >> + rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id); > >> + > >> + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, > >> num_sge, > >> + > >> &backend_dev->rdma_dev_res->stats.rx_bufs_len); > >> + if (rc) { > >> + complete_work(IBV_WC_GENERAL_ERR, rc, ctx); > >> + goto err_dealloc_cqe_ctx; > >> + } > >> + > >> + wr.num_sge = num_sge; > >> + wr.sg_list = new_sge; > >> + wr.wr_id = bctx_id; > >> + rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr); > >> + if (rc) { > >> + rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, > >> errno=%d", > >> + srq->ibsrq->handle, rc, errno); > >> + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); > >> + goto err_dealloc_cqe_ctx; > >> + } > >> + > >> + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe); > >> + backend_dev->rdma_dev_res->stats.rx_bufs++; > >> + backend_dev->rdma_dev_res->stats.rx_srq++; > > > > You should update function rdma_dump_device_counters with this new > > counter. > > > >> + > >> + return; > >> + > >> +err_dealloc_cqe_ctx: > >> + backend_dev->rdma_dev_res->stats.rx_bufs_err++; > >> + rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); > >> + > >> +err_free_bctx: > >> + g_free(bctx); > >> +} > >> + > >> int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd) > >> { > >> pd->ibpd = ibv_alloc_pd(backend_dev->context); > >> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, > >> RdmaDeviceResources *dev_res) > >> rdma_protected_gslist_destroy(&qp->cqe_ctx_list); > >> } > >> > >> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, > >> + uint32_t max_wr, uint32_t max_sge, > >> + uint32_t srq_limit) > >> +{ > >> + struct ibv_srq_init_attr srq_init_attr = {}; > >> + > >> + srq_init_attr.attr.max_wr = max_wr; > >> + srq_init_attr.attr.max_sge = max_sge; > >> + srq_init_attr.attr.srq_limit = srq_limit; > >> + > >> + srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr); > >> + if (!srq->ibsrq) { > >> + rdma_error_report("ibv_create_srq failed, errno=%d", errno); > >> + return -EIO; > >> + } > >> + > >> + rdma_protected_gslist_init(&srq->cqe_ctx_list); > >> + > >> + return 0; > >> +} > >> + > >> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr > >> *srq_attr) > >> +{ > >> + if (!srq->ibsrq) { > >> + return -EINVAL; > >> + } > >> + > >> + return ibv_query_srq(srq->ibsrq, srq_attr); > >> +} > >> + > >> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr > >> *srq_attr, > >> + int srq_attr_mask) > >> +{ > >> + if (!srq->ibsrq) { > >> + return -EINVAL; > >> + } > >> + > >> + return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask); > >> +} > >> + > >> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources > >> *dev_res) > >> +{ > >> + if (srq->ibsrq) { > >> + ibv_destroy_srq(srq->ibsrq); > >> + } > >> + g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res); > >> + rdma_protected_gslist_destroy(&srq->cqe_ctx_list); > >> +} > >> + > >> #define CHK_ATTR(req, dev, member, fmt) ({ \ > >> trace_rdma_check_dev_attr(#member, dev.member, req->member); \ > >> if (req->member > dev.member) { \ > >> @@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev > >> *backend_dev, > >> } > >> > >> dev_attr->max_sge = MAX_SGE; > >> + dev_attr->max_srq_sge = MAX_SGE; > >> > >> CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64); > >> CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d"); > >> @@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev > >> *backend_dev, > >> CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d"); > >> CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d"); > >> CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d"); > >> + CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d"); > >> > >> return 0; > >> } > >> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h > >> index 38056d97c7fc..cad7956d98e8 100644 > >> --- a/hw/rdma/rdma_backend.h > >> +++ b/hw/rdma/rdma_backend.h > >> @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev > >> *backend_dev, > >> RdmaBackendQP *qp, uint8_t qp_type, > >> struct ibv_sge *sge, uint32_t num_sge, void > >> *ctx); > >> > >> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, > >> + uint32_t max_wr, uint32_t max_sge, > >> + uint32_t srq_limit); > >> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr > >> *srq_attr); > >> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr > >> *srq_attr, > >> + int srq_attr_mask); > >> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, > >> + RdmaDeviceResources *dev_res); > >> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, > >> + RdmaBackendSRQ *srq, struct ibv_sge *sge, > >> + uint32_t num_sge, void *ctx); > >> + > >> #endif > >> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h > >> index 817153dc8cf4..0b55be35038d 100644 > >> --- a/hw/rdma/rdma_backend_defs.h > >> +++ b/hw/rdma/rdma_backend_defs.h > >> @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP { > >> RdmaProtectedGSList cqe_ctx_list; > >> } RdmaBackendQP; > >> > >> +typedef struct RdmaBackendSRQ { > >> + struct ibv_srq *ibsrq; > >> + RdmaProtectedGSList cqe_ctx_list; > >> +} RdmaBackendSRQ; > >> + > >> #endif > >> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c > >> index bac3b2f4a6c3..b683506b8616 100644 > >> --- a/hw/rdma/rdma_rm.c > >> +++ b/hw/rdma/rdma_rm.c > >> @@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, > >> RdmaDeviceResources *dev_res) > >> dev_res->stats.tx_err); > >> monitor_printf(mon, "\trx_bufs : %" PRId64 "\n", > >> dev_res->stats.rx_bufs); > >> + monitor_printf(mon, "\trx_srq : %" PRId64 "\n", > >> + dev_res->stats.rx_srq);
[1] > >> monitor_printf(mon, "\trx_bufs_len : %" PRId64 "\n", > >> dev_res->stats.rx_bufs_len); > >> monitor_printf(mon, "\trx_bufs_err : %" PRId64 "\n", > >> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h > >> index c200d311de37..e774af528022 100644 > >> --- a/hw/rdma/rdma_rm_defs.h > >> +++ b/hw/rdma/rdma_rm_defs.h > >> @@ -106,6 +106,7 @@ typedef struct RdmaRmStats { > >> uint64_t rx_bufs; > >> uint64_t rx_bufs_len; > >> uint64_t rx_bufs_err; > >> + uint64_t rx_srq; > >> uint64_t completions; > >> uint64_t mad_tx; > >> uint64_t mad_tx_err; > > > > Please make a separate patch to update the function > > rdma_dump_device_counters. > > > > You mean a separate patch for introducing the "rx_srq" counter & update the > function rdma_dump_device_counters()? My bad, missed that ([1]). No need for a separate patch. > > > Besides that patch lgtm. > > > > Reviewed-by: Yuval Shaia <yuval.sh...@oracle.com> > > > >> -- > >> 2.20.1 > >> > >> >