On 7/28/20 5:28 AM, patrick...@intel.com wrote:
> From: Patrick Fu <patrick...@intel.com>
> 
> Async copy fails when single ring buffer vector is splited on multiple
> physical pages. This happens because current hpa address translation
> function doesn't handle multi-page buffers. A new gpa to hpa address
> conversion function, which returns the hpa on the first hitting host
> pages, is implemented in this patch. Async data path recursively calls
> this new function to construct a multi-segments async copy descriptor
> for ring buffers crossing physical page boundaries.
> 
> Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring")
> 
> Signed-off-by: Patrick Fu <patrick...@intel.com>
> ---
> v2:
>  - change commit message and title
>  - v1 patch used CPU to copy multi-page buffers; v2 patch split the
> copy into multiple async copy segments whenever possible
> 
> v3:
>  - added fixline
> 
> v4:
>  - fix miss translation of the gpa which is the same length with host
>    page size
> 
>  lib/librte_vhost/vhost.h      | 50 +++++++++++++++++++++++++++++++++++
>  lib/librte_vhost/virtio_net.c | 40 +++++++++++++++++-----------
>  2 files changed, 75 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 0f7212f88..05c202a57 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -616,6 +616,56 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, 
> uint64_t size)
>       return 0;
>  }
>  
> +static __rte_always_inline rte_iova_t
> +gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
> +     uint64_t gpa_size, uint64_t *hpa_size)
> +{
> +     uint32_t i;
> +     struct guest_page *page;
> +     struct guest_page key;
> +
> +     *hpa_size = gpa_size;
> +     if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
> +             key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1);
> +             page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
> +                            sizeof(struct guest_page), guest_page_addrcmp);
> +             if (page) {
> +                     if (gpa + gpa_size <=
> +                                     page->guest_phys_addr + page->size) {
> +                             return gpa - page->guest_phys_addr +
> +                                     page->host_phys_addr;
> +                     } else if (gpa < page->guest_phys_addr +
> +                                             page->size) {
> +                             *hpa_size = page->guest_phys_addr +
> +                                     page->size - gpa;
> +                             return gpa - page->guest_phys_addr +
> +                                     page->host_phys_addr;
> +                     }
> +             }
> +     } else {
> +             for (i = 0; i < dev->nr_guest_pages; i++) {
> +                     page = &dev->guest_pages[i];
> +
> +                     if (gpa >= page->guest_phys_addr) {
> +                             if (gpa + gpa_size <=
> +                                     page->guest_phys_addr + page->size) {
> +                                     return gpa - page->guest_phys_addr +
> +                                             page->host_phys_addr;
> +                             } else if (gpa < page->guest_phys_addr +
> +                                                     page->size) {
> +                                     *hpa_size = page->guest_phys_addr +
> +                                             page->size - gpa;
> +                                     return gpa - page->guest_phys_addr +
> +                                             page->host_phys_addr;
> +                             }
> +                     }
> +             }
> +     }
> +
> +     *hpa_size = 0;
> +     return 0;
> +}
> +
>  static __rte_always_inline uint64_t
>  hva_to_gpa(struct virtio_net *dev, uint64_t vva, uint64_t len)
>  {
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 95a0bc19f..124a33a10 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -980,6 +980,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>       struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
>       struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
>       int error = 0;
> +     uint64_t mapped_len;
>  
>       uint32_t tlen = 0;
>       int tvec_idx = 0;
> @@ -1072,24 +1073,31 @@ async_mbuf_to_desc(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>  
>               cpy_len = RTE_MIN(buf_avail, mbuf_avail);
>  
> -             if (unlikely(cpy_len >= cpy_threshold)) {
> -                     hpa = (void *)(uintptr_t)gpa_to_hpa(dev,
> -                                     buf_iova + buf_offset, cpy_len);
> +             while (unlikely(cpy_len && cpy_len >= cpy_threshold)) {
> +                     hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev,
> +                                     buf_iova + buf_offset,
> +                                     cpy_len, &mapped_len);
>  
> -                     if (unlikely(!hpa)) {
> -                             error = -1;
> -                             goto out;
> -                     }
> +                     if (unlikely(!hpa || mapped_len < cpy_threshold))
> +                             break;
>  
>                       async_fill_vec(src_iovec + tvec_idx,
>                               (void *)(uintptr_t)rte_pktmbuf_iova_offset(m,
> -                                             mbuf_offset), cpy_len);
> +                             mbuf_offset), (size_t)mapped_len);
>  
> -                     async_fill_vec(dst_iovec + tvec_idx, hpa, cpy_len);
> +                     async_fill_vec(dst_iovec + tvec_idx,
> +                                     hpa, (size_t)mapped_len);
>  
> -                     tlen += cpy_len;
> +                     tlen += (uint32_t)mapped_len;
> +                     cpy_len -= (uint32_t)mapped_len;
> +                     mbuf_avail  -= (uint32_t)mapped_len;
> +                     mbuf_offset += (uint32_t)mapped_len;
> +                     buf_avail  -= (uint32_t)mapped_len;
> +                     buf_offset += (uint32_t)mapped_len;
>                       tvec_idx++;
> -             } else {
> +             }
> +
> +             if (likely(cpy_len)) {
>                       if (unlikely(vq->batch_copy_nb_elems >= vq->size)) {
>                               rte_memcpy(
>                               (void *)((uintptr_t)(buf_addr + buf_offset)),
> @@ -1112,10 +1120,12 @@ async_mbuf_to_desc(struct virtio_net *dev, struct 
> vhost_virtqueue *vq,
>                       }
>               }
>  
> -             mbuf_avail  -= cpy_len;
> -             mbuf_offset += cpy_len;
> -             buf_avail  -= cpy_len;
> -             buf_offset += cpy_len;
> +             if (cpy_len) {
> +                     mbuf_avail  -= cpy_len;
> +                     mbuf_offset += cpy_len;
> +                     buf_avail  -= cpy_len;
> +                     buf_offset += cpy_len;
> +             }

Is that really necessary to check if copy length is not 0?

Thanks,
Maxime

>       }
>  
>  out:
> 

Reply via email to