Hi
From: Maxime Coquelin > On 1/29/20 11:09 AM, Matan Azrad wrote: > > As an arrangement to the vitrio queues creation, a 2 QPs and CQ may be > > created for the virtio queue. > > > > The design is to trigger an event for the guest and for the vdpa > > driver when a new CQE is posted by the HW after the packet transition. > > > > This patch add the basic operations to create and destroy the above HW > > objects and to trigger the CQE events when a new CQE is posted. > > > > Signed-off-by: Matan Azrad <ma...@mellanox.com> > > Acked-by: Viacheslav Ovsiienko <viachesl...@mellanox.com> > > --- > > drivers/common/mlx5/mlx5_prm.h | 4 + > > drivers/vdpa/mlx5/Makefile | 1 + > > drivers/vdpa/mlx5/meson.build | 1 + > > drivers/vdpa/mlx5/mlx5_vdpa.h | 89 ++++++++ > > drivers/vdpa/mlx5/mlx5_vdpa_event.c | 399 > > ++++++++++++++++++++++++++++++++++++ > > 5 files changed, 494 insertions(+) > > create mode 100644 drivers/vdpa/mlx5/mlx5_vdpa_event.c > > > > diff --git a/drivers/common/mlx5/mlx5_prm.h > > b/drivers/common/mlx5/mlx5_prm.h index b48cd0a..b533798 100644 > > --- a/drivers/common/mlx5/mlx5_prm.h > > +++ b/drivers/common/mlx5/mlx5_prm.h > > @@ -392,6 +392,10 @@ struct mlx5_cqe { > > /* CQE format value. */ > > #define MLX5_COMPRESSED 0x3 > > > > +/* CQ doorbell cmd types. */ > > +#define MLX5_CQ_DBR_CMD_SOL_ONLY (1 << 24) #define > > +MLX5_CQ_DBR_CMD_ALL (0 << 24) > > + > > /* Action type of header modification. */ enum { > > MLX5_MODIFICATION_TYPE_SET = 0x1, > > diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile > > index 5472797..7f13756 100644 > > --- a/drivers/vdpa/mlx5/Makefile > > +++ b/drivers/vdpa/mlx5/Makefile > > @@ -9,6 +9,7 @@ LIB = librte_pmd_mlx5_vdpa.a # Sources. > > SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa.c > > SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_mem.c > > +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_event.c > > > > # Basic CFLAGS. > > CFLAGS += -O3 > > diff --git a/drivers/vdpa/mlx5/meson.build > > b/drivers/vdpa/mlx5/meson.build index 7e5dd95..c609f7c 100644 > > --- a/drivers/vdpa/mlx5/meson.build > > +++ b/drivers/vdpa/mlx5/meson.build > > @@ -13,6 +13,7 @@ deps += ['hash', 'common_mlx5', 'vhost', 'bus_pci', > > 'eal', 'sched'] sources = files( > > 'mlx5_vdpa.c', > > 'mlx5_vdpa_mem.c', > > + 'mlx5_vdpa_event.c', > > ) > > cflags_options = [ > > '-std=c11', > > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h > > b/drivers/vdpa/mlx5/mlx5_vdpa.h index e27baea..30030b7 100644 > > --- a/drivers/vdpa/mlx5/mlx5_vdpa.h > > +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h > > @@ -9,9 +9,40 @@ > > > > #include <rte_vdpa.h> > > #include <rte_vhost.h> > > +#include <rte_spinlock.h> > > +#include <rte_interrupts.h> > > > > #include <mlx5_glue.h> > > #include <mlx5_devx_cmds.h> > > +#include <mlx5_prm.h> > > + > > + > > +#define MLX5_VDPA_INTR_RETRIES 256 > > +#define MLX5_VDPA_INTR_RETRIES_USEC 1000 > > + > > +struct mlx5_vdpa_cq { > > + uint16_t log_desc_n; > > + uint32_t cq_ci:24; > > + uint32_t arm_sn:2; > > + rte_spinlock_t sl; > > + struct mlx5_devx_obj *cq; > > + struct mlx5dv_devx_umem *umem_obj; > > + union { > > + volatile void *umem_buf; > > + volatile struct mlx5_cqe *cqes; > > + }; > > + volatile uint32_t *db_rec; > > + uint64_t errors; > > +}; > > + > > +struct mlx5_vdpa_event_qp { > > + struct mlx5_vdpa_cq cq; > > + struct mlx5_devx_obj *fw_qp; > > + struct mlx5_devx_obj *sw_qp; > > + struct mlx5dv_devx_umem *umem_obj; > > + void *umem_buf; > > + volatile uint32_t *db_rec; > > +}; > > > > struct mlx5_vdpa_query_mr { > > SLIST_ENTRY(mlx5_vdpa_query_mr) next; @@ -34,6 +65,10 @@ > struct > > mlx5_vdpa_priv { > > uint32_t gpa_mkey_index; > > struct ibv_mr *null_mr; > > struct rte_vhost_memory *vmem; > > + uint32_t eqn; > > + struct mlx5dv_devx_event_channel *eventc; > > + struct mlx5dv_devx_uar *uar; > > + struct rte_intr_handle intr_handle; > > SLIST_HEAD(mr_list, mlx5_vdpa_query_mr) mr_list; }; > > > > @@ -57,4 +92,58 @@ struct mlx5_vdpa_priv { > > */ > > int mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv); > > > > + > > +/** > > + * Create an event QP and all its related resources. > > + * > > + * @param[in] priv > > + * The vdpa driver private structure. > > + * @param[in] desc_n > > + * Number of descriptors. > > + * @param[in] callfd > > + * The guest notification file descriptor. > > + * @param[in/out] eqp > > + * Pointer to the event QP structure. > > + * > > + * @return > > + * 0 on success, -1 otherwise and rte_errno is set. > > + */ > > +int mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t > desc_n, > > + int callfd, struct mlx5_vdpa_event_qp *eqp); > > + > > +/** > > + * Destroy an event QP and all its related resources. > > + * > > + * @param[in/out] eqp > > + * Pointer to the event QP structure. > > + */ > > +void mlx5_vdpa_event_qp_destroy(struct mlx5_vdpa_event_qp *eqp); > > + > > +/** > > + * Release all the event global resources. > > + * > > + * @param[in] priv > > + * The vdpa driver private structure. > > + */ > > +void mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv); > > + > > +/** > > + * Setup CQE event. > > + * > > + * @param[in] priv > > + * The vdpa driver private structure. > > + * > > + * @return > > + * 0 on success, a negative errno value otherwise and rte_errno is set. > > + */ > > +int mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv); > > + > > +/** > > + * Unset CQE event . > > + * > > + * @param[in] priv > > + * The vdpa driver private structure. > > + */ > > +void mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv); > > + > > #endif /* RTE_PMD_MLX5_VDPA_H_ */ > > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c > > b/drivers/vdpa/mlx5/mlx5_vdpa_event.c > > new file mode 100644 > > index 0000000..35518ad > > --- /dev/null > > +++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c > > @@ -0,0 +1,399 @@ > > +/* SPDX-License-Identifier: BSD-3-Clause > > + * Copyright 2019 Mellanox Technologies, Ltd */ #include <unistd.h> > > +#include <stdint.h> #include <fcntl.h> > > + > > +#include <rte_malloc.h> > > +#include <rte_errno.h> > > +#include <rte_lcore.h> > > +#include <rte_atomic.h> > > +#include <rte_common.h> > > +#include <rte_io.h> > > + > > +#include <mlx5_common.h> > > + > > +#include "mlx5_vdpa_utils.h" > > +#include "mlx5_vdpa.h" > > + > > + > > +void > > +mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv) { > > + if (priv->uar) { > > + mlx5_glue->devx_free_uar(priv->uar); > > + priv->uar = NULL; > > + } > > + if (priv->eventc) { > > + mlx5_glue->devx_destroy_event_channel(priv->eventc); > > + priv->eventc = NULL; > > + } > > + priv->eqn = 0; > > +} > > + > > +/* Prepare all the global resources for all the event objects.*/ > > +static int mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv > > +*priv) { > > + uint32_t lcore; > > + > > + if (priv->eventc) > > + return 0; > > + lcore = (uint32_t)rte_lcore_to_cpu_id(-1); > > + if (mlx5_glue->devx_query_eqn(priv->ctx, lcore, &priv->eqn)) { > > + rte_errno = errno; > > + DRV_LOG(ERR, "Failed to query EQ number %d.", rte_errno); > > + return -1; > > + } > > + priv->eventc = mlx5_glue->devx_create_event_channel(priv->ctx, > > + > MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA); > > + if (!priv->eventc) { > > + rte_errno = errno; > > + DRV_LOG(ERR, "Failed to create event channel %d.", > > + rte_errno); > > + goto error; > > + } > > + priv->uar = mlx5_glue->devx_alloc_uar(priv->ctx, 0); > > + if (!priv->uar) { > > + rte_errno = errno; > > + DRV_LOG(ERR, "Failed to allocate UAR."); > > + goto error; > > + } > > + return 0; > > +error: > > + mlx5_vdpa_event_qp_global_release(priv); > > + return -1; > > +} > > + > > +static void > > +mlx5_vdpa_cq_destroy(struct mlx5_vdpa_cq *cq) { > > + if (cq->cq) > > + claim_zero(mlx5_devx_cmd_destroy(cq->cq)); > > + if (cq->umem_obj) > > + claim_zero(mlx5_glue->devx_umem_dereg(cq- > >umem_obj)); > > + if (cq->umem_buf) > > + rte_free((void *)(uintptr_t)cq->umem_buf); > > + memset(cq, 0, sizeof(*cq)); > > +} > > + > > +static inline void > > +mlx5_vdpa_cq_arm(struct mlx5_vdpa_priv *priv, struct mlx5_vdpa_cq > > +*cq) { > > + const unsigned int cqe_mask = (1 << cq->log_desc_n) - 1; > > + uint32_t arm_sn = cq->arm_sn << MLX5_CQ_SQN_OFFSET; > > + uint32_t cq_ci = cq->cq_ci & MLX5_CI_MASK & cqe_mask; > > + uint32_t doorbell_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | cq_ci; > > + uint64_t doorbell = ((uint64_t)doorbell_hi << 32) | cq->cq->id; > > + uint64_t db_be = rte_cpu_to_be_64(doorbell); > > + uint32_t *addr = RTE_PTR_ADD(priv->uar->base_addr, > > +MLX5_CQ_DOORBELL); > > + > > + rte_io_wmb(); > > + cq->db_rec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi); > > + rte_wmb(); > > +#ifdef RTE_ARCH_64 > > + *(uint64_t *)addr = db_be; > > +#else > > + *(uint32_t *)addr = db_be; > > + rte_io_wmb(); > > + *((uint32_t *)addr + 1) = db_be >> 32; #endif > > + cq->arm_sn++; > > +} > > + > > +static int > > +mlx5_vdpa_cq_create(struct mlx5_vdpa_priv *priv, uint16_t log_desc_n, > > + int callfd, struct mlx5_vdpa_cq *cq) { > > + struct mlx5_devx_cq_attr attr; > > + size_t pgsize = sysconf(_SC_PAGESIZE); > > + uint32_t umem_size; > > + int ret; > > + uint16_t event_nums[1] = {0}; > > + > > + cq->log_desc_n = log_desc_n; > > + umem_size = sizeof(struct mlx5_cqe) * (1 << log_desc_n) + > > + sizeof(*cq->db_rec) > * 2; > > + cq->umem_buf = rte_zmalloc(__func__, umem_size, 4096); > > + if (!cq->umem_buf) { > > + DRV_LOG(ERR, "Failed to allocate memory for CQ."); > > + rte_errno = ENOMEM; > > + return -ENOMEM; > > + } > > + cq->umem_obj = mlx5_glue->devx_umem_reg(priv->ctx, > > + (void *)(uintptr_t)cq- > >umem_buf, > > + umem_size, > > + IBV_ACCESS_LOCAL_WRITE); > > + if (!cq->umem_obj) { > > + DRV_LOG(ERR, "Failed to register umem for CQ."); > > + goto error; > > + } > > + attr.q_umem_valid = 1; > > + attr.db_umem_valid = 1; > > + attr.use_first_only = 0; > > + attr.overrun_ignore = 0; > > + attr.uar_page_id = priv->uar->page_id; > > + attr.q_umem_id = cq->umem_obj->umem_id; > > + attr.q_umem_offset = 0; > > + attr.db_umem_id = cq->umem_obj->umem_id; > > + attr.db_umem_offset = sizeof(struct mlx5_cqe) * (1 << log_desc_n); > > + attr.eqn = priv->eqn; > > + attr.log_cq_size = log_desc_n; > > + attr.log_page_size = rte_log2_u32(pgsize); > > + cq->cq = mlx5_devx_cmd_create_cq(priv->ctx, &attr); > > + if (!cq->cq) > > + goto error; > > + cq->db_rec = RTE_PTR_ADD(cq->umem_buf, > (uintptr_t)attr.db_umem_offset); > > + cq->cq_ci = 0; > > + rte_spinlock_init(&cq->sl); > > + /* Subscribe CQ event to the event channel controlled by the driver. > */ > > + ret = mlx5_glue->devx_subscribe_devx_event(priv->eventc, cq->cq- > >obj, > > + sizeof(event_nums), > > + event_nums, > > + (uint64_t)(uintptr_t)cq); > > + if (ret) { > > + DRV_LOG(ERR, "Failed to subscribe CQE event."); > > + rte_errno = errno; > > + goto error; > > + } > > + /* Subscribe CQ event to the guest FD only if it is not in poll mode. */ > > + if (callfd != -1) { > > + ret = mlx5_glue->devx_subscribe_devx_event_fd(priv- > >eventc, > > + callfd, > > + cq->cq->obj, 0); > > + if (ret) { > > + DRV_LOG(ERR, "Failed to subscribe CQE event fd."); > > + rte_errno = errno; > > + goto error; > > + } > > + } > > + /* First arming. */ > > + mlx5_vdpa_cq_arm(priv, cq); > > + return 0; > > +error: > > + mlx5_vdpa_cq_destroy(cq); > > + return -1; > > +} > > + > > +static inline void __rte_unused > > +mlx5_vdpa_cq_poll(struct mlx5_vdpa_priv *priv __rte_unused, > > + struct mlx5_vdpa_cq *cq) > > +{ > > + struct mlx5_vdpa_event_qp *eqp = > > + container_of(cq, struct > mlx5_vdpa_event_qp, cq); > > + const unsigned int cqe_size = 1 << cq->log_desc_n; > > + const unsigned int cqe_mask = cqe_size - 1; > > + int ret; > > + > > + do { > > + volatile struct mlx5_cqe *cqe = cq->cqes + (cq->cq_ci & > > + cqe_mask); > > + > > + ret = check_cqe(cqe, cqe_size, cq->cq_ci); > > + switch (ret) { > > + case MLX5_CQE_STATUS_ERR: > > + cq->errors++; > > + /*fall-through*/ > > + case MLX5_CQE_STATUS_SW_OWN: > > + cq->cq_ci++; > > + break; > > + case MLX5_CQE_STATUS_HW_OWN: > > + default: > > + break; > > + } > > + } while (ret != MLX5_CQE_STATUS_HW_OWN); > > Isn't there a risk of endless loop here? No. maximum iterations number is the CQ size , since HW cannot write more CQEs before the doorbell record is updated. > > > + rte_io_wmb(); > > + /* Ring CQ doorbell record. */ > > + cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); > > + rte_io_wmb(); > > + /* Ring SW QP doorbell record. */ > > + eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cqe_size); } > > +