> -----Original Message-----
> From: Maxime Coquelin <maxime.coque...@redhat.com>
> Sent: Friday, June 18, 2021 10:04 PM
> To: dev@dpdk.org; david.march...@redhat.com; Xia, Chenbo 
> <chenbo....@intel.com>
> Cc: Maxime Coquelin <maxime.coque...@redhat.com>
> Subject: [PATCH v6 6/7] vhost: allocate all data on same node as virtqueue
> 
> This patch saves the NUMA node the virtqueue is allocated
> on at init time, in order to allocate all other data on the
> same node.
> 
> While most of the data are allocated before numa_realloc()
> is called and so the data will be reallocated properly, some
> data like the log cache are most likely allocated after.
> 
> For the virtio device metadata, we decide to allocate them
> on the same node as the VQ 0.
> 
> Signed-off-by: Maxime Coquelin <maxime.coque...@redhat.com>
> ---
>  lib/vhost/vhost.c      | 34 ++++++++++++++++------------------
>  lib/vhost/vhost.h      |  1 +
>  lib/vhost/vhost_user.c | 41 ++++++++++++++++++++++++++++-------------
>  3 files changed, 45 insertions(+), 31 deletions(-)
> 
> diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
> index c96f6335c8..0000cd3297 100644
> --- a/lib/vhost/vhost.c
> +++ b/lib/vhost/vhost.c
> @@ -261,7 +261,7 @@ vhost_alloc_copy_ind_table(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
>       uint64_t src, dst;
>       uint64_t len, remain = desc_len;
> 
> -     idesc = rte_malloc(__func__, desc_len, 0);
> +     idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
>       if (unlikely(!idesc))
>               return NULL;
> 
> @@ -549,6 +549,7 @@ static void
>  init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
>  {
>       struct vhost_virtqueue *vq;
> +     int numa_node = SOCKET_ID_ANY;
> 
>       if (vring_idx >= VHOST_MAX_VRING) {
>               VHOST_LOG_CONFIG(ERR,
> @@ -570,6 +571,15 @@ init_vring_queue(struct virtio_net *dev, uint32_t
> vring_idx)
>       vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
>       vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
> 
> +#ifdef RTE_LIBRTE_VHOST_NUMA
> +     if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
> +             VHOST_LOG_CONFIG(ERR, "(%d) failed to query numa node: %s\n",
> +                     dev->vid, rte_strerror(errno));
> +             numa_node = SOCKET_ID_ANY;
> +     }
> +#endif
> +     vq->numa_node = numa_node;
> +
>       vhost_user_iotlb_init(dev, vring_idx);
>  }
> 
> @@ -1616,7 +1626,6 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
>       struct vhost_virtqueue *vq;
>       struct virtio_net *dev = get_device(vid);
>       struct rte_vhost_async_features f;
> -     int node;
> 
>       if (dev == NULL || ops == NULL)
>               return -1;
> @@ -1651,20 +1660,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
>               goto reg_out;
>       }
> 
> -#ifdef RTE_LIBRTE_VHOST_NUMA
> -     if (get_mempolicy(&node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
> -             VHOST_LOG_CONFIG(ERR,
> -                     "unable to get numa information in async register. "
> -                     "allocating async buffer memory on the caller thread
> node\n");
> -             node = SOCKET_ID_ANY;
> -     }
> -#else
> -     node = SOCKET_ID_ANY;
> -#endif
> -
>       vq->async_pkts_info = rte_malloc_socket(NULL,
>                       vq->size * sizeof(struct async_inflight_info),
> -                     RTE_CACHE_LINE_SIZE, node);
> +                     RTE_CACHE_LINE_SIZE, vq->numa_node);
>       if (!vq->async_pkts_info) {
>               vhost_free_async_mem(vq);
>               VHOST_LOG_CONFIG(ERR,
> @@ -1675,7 +1673,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
> 
>       vq->it_pool = rte_malloc_socket(NULL,
>                       VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
> -                     RTE_CACHE_LINE_SIZE, node);
> +                     RTE_CACHE_LINE_SIZE, vq->numa_node);
>       if (!vq->it_pool) {
>               vhost_free_async_mem(vq);
>               VHOST_LOG_CONFIG(ERR,
> @@ -1686,7 +1684,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
> 
>       vq->vec_pool = rte_malloc_socket(NULL,
>                       VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
> -                     RTE_CACHE_LINE_SIZE, node);
> +                     RTE_CACHE_LINE_SIZE, vq->numa_node);
>       if (!vq->vec_pool) {
>               vhost_free_async_mem(vq);
>               VHOST_LOG_CONFIG(ERR,
> @@ -1698,7 +1696,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
>       if (vq_is_packed(dev)) {
>               vq->async_buffers_packed = rte_malloc_socket(NULL,
>                       vq->size * sizeof(struct vring_used_elem_packed),
> -                     RTE_CACHE_LINE_SIZE, node);
> +                     RTE_CACHE_LINE_SIZE, vq->numa_node);
>               if (!vq->async_buffers_packed) {
>                       vhost_free_async_mem(vq);
>                       VHOST_LOG_CONFIG(ERR,
> @@ -1709,7 +1707,7 @@ int rte_vhost_async_channel_register(int vid, uint16_t
> queue_id,
>       } else {
>               vq->async_descs_split = rte_malloc_socket(NULL,
>                       vq->size * sizeof(struct vring_used_elem),
> -                     RTE_CACHE_LINE_SIZE, node);
> +                     RTE_CACHE_LINE_SIZE, vq->numa_node);
>               if (!vq->async_descs_split) {
>                       vhost_free_async_mem(vq);
>                       VHOST_LOG_CONFIG(ERR,
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index 8078ddff79..8ffe387556 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -164,6 +164,7 @@ struct vhost_virtqueue {
> 
>       uint16_t                batch_copy_nb_elems;
>       struct batch_copy_elem  *batch_copy_elems;
> +     int                     numa_node;
>       bool                    used_wrap_counter;
>       bool                    avail_wrap_counter;
> 
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index d6ec4000c3..d8ec087dfc 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -433,10 +433,10 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
>       if (vq_is_packed(dev)) {
>               if (vq->shadow_used_packed)
>                       rte_free(vq->shadow_used_packed);
> -             vq->shadow_used_packed = rte_malloc(NULL,
> +             vq->shadow_used_packed = rte_malloc_socket(NULL,
>                               vq->size *
>                               sizeof(struct vring_used_elem_packed),
> -                             RTE_CACHE_LINE_SIZE);
> +                             RTE_CACHE_LINE_SIZE, vq->numa_node);
>               if (!vq->shadow_used_packed) {
>                       VHOST_LOG_CONFIG(ERR,
>                                       "failed to allocate memory for shadow 
> used
> ring.\n");
> @@ -447,9 +447,9 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
>               if (vq->shadow_used_split)
>                       rte_free(vq->shadow_used_split);
> 
> -             vq->shadow_used_split = rte_malloc(NULL,
> +             vq->shadow_used_split = rte_malloc_socket(NULL,
>                               vq->size * sizeof(struct vring_used_elem),
> -                             RTE_CACHE_LINE_SIZE);
> +                             RTE_CACHE_LINE_SIZE, vq->numa_node);
> 
>               if (!vq->shadow_used_split) {
>                       VHOST_LOG_CONFIG(ERR,
> @@ -460,9 +460,9 @@ vhost_user_set_vring_num(struct virtio_net **pdev,
> 
>       if (vq->batch_copy_elems)
>               rte_free(vq->batch_copy_elems);
> -     vq->batch_copy_elems = rte_malloc(NULL,
> +     vq->batch_copy_elems = rte_malloc_socket(NULL,
>                               vq->size * sizeof(struct batch_copy_elem),
> -                             RTE_CACHE_LINE_SIZE);
> +                             RTE_CACHE_LINE_SIZE, vq->numa_node);
>       if (!vq->batch_copy_elems) {
>               VHOST_LOG_CONFIG(ERR,
>                       "failed to allocate memory for batching copy.\n");
> @@ -505,6 +505,9 @@ numa_realloc(struct virtio_net *dev, int index)
>               return dev;
>       }
> 
> +     if (node == vq->numa_node)
> +             goto out_dev_realloc;
> +
>       vq = rte_realloc_socket(vq, sizeof(*vq), 0, node);
>       if (!vq) {
>               VHOST_LOG_CONFIG(ERR, "Failed to realloc virtqueue %d on
> node %d\n",
> @@ -559,6 +562,10 @@ numa_realloc(struct virtio_net *dev, int index)
>               vq->log_cache = lc;
>       }
> 
> +     vq->numa_node = node;
> +
> +out_dev_realloc:
> +
>       if (dev->flags & VIRTIO_DEV_RUNNING)
>               return dev;
> 
> @@ -1213,7 +1220,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
>       struct virtio_net *dev = *pdev;
>       struct VhostUserMemory *memory = &msg->payload.memory;
>       struct rte_vhost_mem_region *reg;
> -
> +     int numa_node = SOCKET_ID_ANY;
>       uint64_t mmap_offset;
>       uint32_t i;
> 
> @@ -1253,13 +1260,21 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
>               for (i = 0; i < dev->nr_vring; i++)
>                       vhost_user_iotlb_flush_all(dev->virtqueue[i]);
> 
> +     /*
> +      * If VQ 0 has already been allocated, try to allocate on the same
> +      * NUMA node. It can be reallocated later in numa_realloc().
> +      */
> +     if (dev->nr_vring > 0)
> +             numa_node = dev->virtqueue[0]->numa_node;
> +
>       dev->nr_guest_pages = 0;
>       if (dev->guest_pages == NULL) {
>               dev->max_guest_pages = 8;
> -             dev->guest_pages = rte_zmalloc(NULL,
> +             dev->guest_pages = rte_zmalloc_socket(NULL,
>                                       dev->max_guest_pages *
>                                       sizeof(struct guest_page),
> -                                     RTE_CACHE_LINE_SIZE);
> +                                     RTE_CACHE_LINE_SIZE,
> +                                     numa_node);
>               if (dev->guest_pages == NULL) {
>                       VHOST_LOG_CONFIG(ERR,
>                               "(%d) failed to allocate memory "
> @@ -1269,8 +1284,8 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> struct VhostUserMsg *msg,
>               }
>       }
> 
> -     dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct 
> rte_vhost_memory)
> +
> -             sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
> +     dev->mem = rte_zmalloc_socket("vhost-mem-table", sizeof(struct
> rte_vhost_memory) +
> +             sizeof(struct rte_vhost_mem_region) * memory->nregions, 0,
> numa_node);
>       if (dev->mem == NULL) {
>               VHOST_LOG_CONFIG(ERR,
>                       "(%d) failed to allocate memory for dev->mem\n",
> @@ -2193,9 +2208,9 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct
> VhostUserMsg *msg,
>               rte_free(vq->log_cache);
>               vq->log_cache = NULL;
>               vq->log_cache_nb_elem = 0;
> -             vq->log_cache = rte_zmalloc("vq log cache",
> +             vq->log_cache = rte_malloc_socket("vq log cache",
>                               sizeof(struct log_cache_entry) * 
> VHOST_LOG_CACHE_NR,
> -                             0);
> +                             0, vq->numa_node);
>               /*
>                * If log cache alloc fail, don't fail migration, but no
>                * caching will be done, which will impact performance
> --
> 2.31.1

Reviewed-by: Chenbo Xia <chenbo....@intel.com>

Reply via email to