From: Maxime Coquelin
> On 1/29/20 11:09 AM, Matan Azrad wrote:
> > The HW virtq object represents an emulated context for a VIRTIO_NET
> > virtqueue which was created and managed by a VIRTIO_NET driver as
> > defined in VIRTIO Specification.
> >
> > Add support to prepare and release all the basic HW resources needed
> > the user virtqs emulation according to the rte_vhost configurations.
> >
> > This patch prepares the basic configurations needed by DevX commands
> > to create a virtq.
> >
> > Add new file mlx5_vdpa_virtq.c to manage virtq operations.
> >
> > Signed-off-by: Matan Azrad <ma...@mellanox.com>
> > Acked-by: Viacheslav Ovsiienko <viachesl...@mellanox.com>
> > ---
> >  drivers/vdpa/mlx5/Makefile          |   1 +
> >  drivers/vdpa/mlx5/meson.build       |   1 +
> >  drivers/vdpa/mlx5/mlx5_vdpa.c       |   1 +
> >  drivers/vdpa/mlx5/mlx5_vdpa.h       |  36 ++++++
> >  drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 212
> > ++++++++++++++++++++++++++++++++++++
> >  5 files changed, 251 insertions(+)
> >  create mode 100644 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> >
> > diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile
> > index 7f13756..353e262 100644
> > --- a/drivers/vdpa/mlx5/Makefile
> > +++ b/drivers/vdpa/mlx5/Makefile
> > @@ -10,6 +10,7 @@ LIB = librte_pmd_mlx5_vdpa.a
> >  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa.c
> >  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_mem.c
> >  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_event.c
> > +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_virtq.c
> >
> >  # Basic CFLAGS.
> >  CFLAGS += -O3
> > diff --git a/drivers/vdpa/mlx5/meson.build
> > b/drivers/vdpa/mlx5/meson.build index c609f7c..e017f95 100644
> > --- a/drivers/vdpa/mlx5/meson.build
> > +++ b/drivers/vdpa/mlx5/meson.build
> > @@ -14,6 +14,7 @@ sources = files(
> >     'mlx5_vdpa.c',
> >     'mlx5_vdpa_mem.c',
> >     'mlx5_vdpa_event.c',
> > +   'mlx5_vdpa_virtq.c',
> >  )
> >  cflags_options = [
> >     '-std=c11',
> > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c
> > b/drivers/vdpa/mlx5/mlx5_vdpa.c index c67f93d..4d30b35 100644
> > --- a/drivers/vdpa/mlx5/mlx5_vdpa.c
> > +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
> > @@ -229,6 +229,7 @@
> >             goto error;
> >     }
> >     SLIST_INIT(&priv->mr_list);
> > +   SLIST_INIT(&priv->virtq_list);
> >     pthread_mutex_lock(&priv_list_lock);
> >     TAILQ_INSERT_TAIL(&priv_list, priv, next);
> >     pthread_mutex_unlock(&priv_list_lock);
> > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h
> > b/drivers/vdpa/mlx5/mlx5_vdpa.h index 30030b7..a7e2185 100644
> > --- a/drivers/vdpa/mlx5/mlx5_vdpa.h
> > +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
> > @@ -53,6 +53,19 @@ struct mlx5_vdpa_query_mr {
> >     int is_indirect;
> >  };
> >
> > +struct mlx5_vdpa_virtq {
> > +   SLIST_ENTRY(mlx5_vdpa_virtq) next;
> > +   uint16_t index;
> > +   uint16_t vq_size;
> > +   struct mlx5_devx_obj *virtq;
> > +   struct mlx5_vdpa_event_qp eqp;
> > +   struct {
> > +           struct mlx5dv_devx_umem *obj;
> > +           void *buf;
> > +           uint32_t size;
> > +   } umems[3];
> > +};
> > +
> >  struct mlx5_vdpa_priv {
> >     TAILQ_ENTRY(mlx5_vdpa_priv) next;
> >     int id; /* vDPA device id. */
> > @@ -69,6 +82,10 @@ struct mlx5_vdpa_priv {
> >     struct mlx5dv_devx_event_channel *eventc;
> >     struct mlx5dv_devx_uar *uar;
> >     struct rte_intr_handle intr_handle;
> > +   struct mlx5_devx_obj *td;
> > +   struct mlx5_devx_obj *tis;
> > +   uint16_t nr_virtqs;
> > +   SLIST_HEAD(virtq_list, mlx5_vdpa_virtq) virtq_list;
> >     SLIST_HEAD(mr_list, mlx5_vdpa_query_mr) mr_list;  };
> >
> > @@ -146,4 +163,23 @@ int mlx5_vdpa_event_qp_create(struct
> mlx5_vdpa_priv *priv, uint16_t desc_n,
> >   */
> >  void mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv);
> >
> > +/**
> > + * Release a virtq and all its related resources.
> > + *
> > + * @param[in] priv
> > + *   The vdpa driver private structure.
> > + */
> > +void mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv);
> > +
> > +/**
> > + * Create all the HW virtqs resources and all their related resources.
> > + *
> > + * @param[in] priv
> > + *   The vdpa driver private structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_errno is set.
> > + */
> > +int mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv);
> > +
> >  #endif /* RTE_PMD_MLX5_VDPA_H_ */
> > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> > b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> > new file mode 100644
> > index 0000000..781bccf
> > --- /dev/null
> > +++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> > @@ -0,0 +1,212 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2019 Mellanox Technologies, Ltd  */ #include <string.h>
> > +
> > +#include <rte_malloc.h>
> > +#include <rte_errno.h>
> > +
> > +#include <mlx5_common.h>
> > +
> > +#include "mlx5_vdpa_utils.h"
> > +#include "mlx5_vdpa.h"
> > +
> > +
> > +static int
> > +mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq) {
> > +   int i;
> > +
> > +   if (virtq->virtq) {
> > +           claim_zero(mlx5_devx_cmd_destroy(virtq->virtq));
> > +           virtq->virtq = NULL;
> > +   }
> > +   for (i = 0; i < 3; ++i) {
> > +           if (virtq->umems[i].obj)
> > +                   claim_zero(mlx5_glue->devx_umem_dereg
> > +                                                    (virtq-
> >umems[i].obj));
> > +           if (virtq->umems[i].buf)
> > +                   rte_free(virtq->umems[i].buf);
> > +   }
> > +   memset(&virtq->umems, 0, sizeof(virtq->umems));
> > +   if (virtq->eqp.fw_qp)
> > +           mlx5_vdpa_event_qp_destroy(&virtq->eqp);
> > +   return 0;
> > +}
> > +
> > +void
> > +mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv) {
> > +   struct mlx5_vdpa_virtq *entry;
> > +   struct mlx5_vdpa_virtq *next;
> > +
> > +   entry = SLIST_FIRST(&priv->virtq_list);
> > +   while (entry) {
> > +           next = SLIST_NEXT(entry, next);
> > +           mlx5_vdpa_virtq_unset(entry);
> > +           SLIST_REMOVE(&priv->virtq_list, entry, mlx5_vdpa_virtq,
> next);
> > +           rte_free(entry);
> > +           entry = next;
> > +   }
> > +   SLIST_INIT(&priv->virtq_list);
> > +   if (priv->tis) {
> > +           claim_zero(mlx5_devx_cmd_destroy(priv->tis));
> > +           priv->tis = NULL;
> > +   }
> > +   if (priv->td) {
> > +           claim_zero(mlx5_devx_cmd_destroy(priv->td));
> > +           priv->td = NULL;
> > +   }
> > +}
> > +
> > +static uint64_t
> > +mlx5_vdpa_hva_to_gpa(struct rte_vhost_memory *mem, uint64_t hva) {
> > +   struct rte_vhost_mem_region *reg;
> > +   uint32_t i;
> > +   uint64_t gpa = 0;
> > +
> > +   for (i = 0; i < mem->nregions; i++) {
> > +           reg = &mem->regions[i];
> > +           if (hva >= reg->host_user_addr &&
> > +               hva < reg->host_user_addr + reg->size) {
> > +                   gpa = hva - reg->host_user_addr + reg-
> >guest_phys_addr;
> > +                   break;
> > +           }
> > +   }
> > +   return gpa;
> > +}
> 
> I think you may need a third parameter for the size to map.
> Otherwise, you would be vulnerable to CVE-2018-1059.

Yes, I just read it and understood that the virtio descriptor queues\packets 
data may be non continues in the guest physical memory and even maybe undefined 
here in the rte_vhost library, Is it?

Don't you think that the rte_vhost should validate it? at least, that all the 
queues memory are mapped?

Can you extend more why it may happen? QEMU bug?

In any case,
>From Mellanox perspective, at least for the packet data, it is OK since if the 
>guest will try to access physical address which is not mapped the packet will 
>be ignored by the HW.


 
> > +
> > +static int
> > +mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv,
> > +                 struct mlx5_vdpa_virtq *virtq, int index) {
> > +   struct rte_vhost_vring vq;
> > +   struct mlx5_devx_virtq_attr attr = {0};
> > +   uint64_t gpa;
> > +   int ret;
> > +   int i;
> > +   uint16_t last_avail_idx;
> > +   uint16_t last_used_idx;
> > +
> > +   ret = rte_vhost_get_vhost_vring(priv->vid, index, &vq);
> > +   if (ret)
> > +           return -1;
> > +   virtq->index = index;
> > +   virtq->vq_size = vq.size;
> > +   /*
> > +    * No need event QPs creation when the guest in poll mode or when
> the
> > +    * capability allows it.
> > +    */
> > +   attr.event_mode = vq.callfd != -1 || !(priv->caps.event_mode & (1
> <<
> > +
> MLX5_VIRTQ_EVENT_MODE_NO_MSIX)) ?
> > +
> MLX5_VIRTQ_EVENT_MODE_QP :
> > +
> MLX5_VIRTQ_EVENT_MODE_NO_MSIX;
> > +   if (attr.event_mode == MLX5_VIRTQ_EVENT_MODE_QP) {
> > +           ret = mlx5_vdpa_event_qp_create(priv, vq.size, vq.callfd,
> > +                                           &virtq->eqp);
> > +           if (ret) {
> > +                   DRV_LOG(ERR, "Failed to create event QPs for virtq
> %d.",
> > +                           index);
> > +                   return -1;
> > +           }
> > +           attr.qp_id = virtq->eqp.fw_qp->id;
> > +   } else {
> > +           DRV_LOG(INFO, "Virtq %d is, for sure, working by poll mode,
> no"
> > +                   " need event QPs and event mechanism.", index);
> > +   }
> > +   /* Setup 3 UMEMs for each virtq. */
> > +   for (i = 0; i < 3; ++i) {
> > +           virtq->umems[i].size = priv->caps.umems[i].a * vq.size +
> > +                                                     priv-
> >caps.umems[i].b;
> > +           virtq->umems[i].buf = rte_zmalloc(__func__,
> > +                                             virtq->umems[i].size, 4096);
> > +           if (!virtq->umems[i].buf) {
> > +                   DRV_LOG(ERR, "Cannot allocate umem %d memory
> for virtq"
> > +                           " %u.", i, index);
> > +                   goto error;
> > +           }
> > +           virtq->umems[i].obj = mlx5_glue->devx_umem_reg(priv-
> >ctx,
> > +                                                   virtq->umems[i].buf,
> > +                                                   virtq->umems[i].size,
> > +
>       IBV_ACCESS_LOCAL_WRITE);
> > +           if (!virtq->umems[i].obj) {
> > +                   DRV_LOG(ERR, "Failed to register umem %d for virtq
> %u.",
> > +                           i, index);
> > +                   goto error;
> > +           }
> > +           attr.umems[i].id = virtq->umems[i].obj->umem_id;
> > +           attr.umems[i].offset = 0;
> > +           attr.umems[i].size = virtq->umems[i].size;
> > +   }
> > +   gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
> (uint64_t)(uintptr_t)vq.desc);
> > +   if (!gpa) {
> > +           DRV_LOG(ERR, "Fail to get GPA for descriptor ring.");
> > +           goto error;
> > +   }
> > +   attr.desc_addr = gpa;
> > +   gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
> (uint64_t)(uintptr_t)vq.used);
> > +   if (!gpa) {
> > +           DRV_LOG(ERR, "Fail to get GPA for used ring.");
> > +           goto error;
> > +   }
> > +   attr.used_addr = gpa;
> > +   gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
> (uint64_t)(uintptr_t)vq.avail);
> > +   if (!gpa) {
> > +           DRV_LOG(ERR, "Fail to get GPA for available ring.");
> > +           goto error;
> > +   }
> > +   attr.available_addr = gpa;
> > +   rte_vhost_get_vring_base(priv->vid, index, &last_avail_idx,
> > +                            &last_used_idx);
> > +   DRV_LOG(INFO, "vid %d: Init last_avail_idx=%d, last_used_idx=%d
> for "
> > +           "virtq %d.", priv->vid, last_avail_idx, last_used_idx, index);
> > +   attr.hw_available_index = last_avail_idx;
> > +   attr.hw_used_index = last_used_idx;
> > +   attr.q_size = vq.size;
> > +   attr.mkey = priv->gpa_mkey_index;
> > +   attr.tis_id = priv->tis->id;
> > +   attr.queue_index = index;
> > +   virtq->virtq = mlx5_devx_cmd_create_virtq(priv->ctx, &attr);
> > +   if (!virtq->virtq)
> > +           goto error;
> > +   return 0;
> > +error:
> > +   mlx5_vdpa_virtq_unset(virtq);
> > +   return -1;
> > +}
> > +
> > +int
> > +mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv) {
> > +   struct mlx5_devx_tis_attr tis_attr = {0};
> > +   struct mlx5_vdpa_virtq *virtq;
> > +   uint32_t i;
> > +   uint16_t nr_vring = rte_vhost_get_vring_num(priv->vid);
> > +
> > +   priv->td = mlx5_devx_cmd_create_td(priv->ctx);
> > +   if (!priv->td) {
> > +           DRV_LOG(ERR, "Failed to create transport domain.");
> > +           return -rte_errno;
> > +   }
> > +   tis_attr.transport_domain = priv->td->id;
> > +   priv->tis = mlx5_devx_cmd_create_tis(priv->ctx, &tis_attr);
> > +   if (!priv->tis) {
> > +           DRV_LOG(ERR, "Failed to create TIS.");
> > +           goto error;
> > +   }
> > +   for (i = 0; i < nr_vring; i++) {
> > +           virtq = rte_zmalloc(__func__, sizeof(*virtq), 0);
> > +           if (!virtq || mlx5_vdpa_virtq_setup(priv, virtq, i)) {
> > +                   if (virtq)
> > +                           rte_free(virtq);
> > +                   goto error;
> > +           }
> > +           SLIST_INSERT_HEAD(&priv->virtq_list, virtq, next);
> > +   }
> > +   priv->nr_virtqs = nr_vring;
> > +   return 0;
> > +error:
> > +   mlx5_vdpa_virtqs_release(priv);
> > +   return -1;
> > +}
> >

Reply via email to