[dpdk-dev] [PATCH v3 1/5] vhost: rewrite enqueue
On Mon, Aug 22, 2016 at 11:35:47AM +0200, Maxime Coquelin wrote: > >-virtio_enqueue_offload(m, _hdr.hdr); > >-copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); > >+/* handle virtio header */ > >+virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr; > >+virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr)); > Parenthesis around virtio_hdr->hdr shouldn't be needed. > > vhost_log_write(dev, desc->addr, dev->vhost_hlen); > >-PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); > Looks like you remove the PRINT_PACKET calls. > Does it impact performance? Yes, it does. But it's only enabled for debug mode. Besides that, it's just a NOOP. > In any case, it should be mentionned in the commit message. Agreed. But for this case, we should not remove it: it breaks the debug-ability. --yliu
[dpdk-dev] [PATCH v3 1/5] vhost: rewrite enqueue
Hi Maxime, Thanks very much for the detailed review. > -Original Message- > From: Maxime Coquelin [mailto:maxime.coquelin at redhat.com] > Sent: Monday, August 22, 2016 5:36 PM > To: Wang, Zhihong ; dev at dpdk.org > Cc: yuanhan.liu at linux.intel.com > Subject: Re: [PATCH v3 1/5] vhost: rewrite enqueue > > > > On 08/19/2016 07:43 AM, Zhihong Wang wrote: > > This patch implements the vhost logic from scratch into a single function > > designed for high performance and better maintainability. > > > > --- > > Changes in v3: > > > > 1. Rewrite enqueue and delete the obsolete in the same patch. > > > > Signed-off-by: Zhihong Wang > > --- > > lib/librte_vhost/vhost_rxtx.c | 537 > > +- > > 1 file changed, 160 insertions(+), 377 deletions(-) > > > > diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c > > index 08a73fd..b09a9c3 100644 > > --- a/lib/librte_vhost/vhost_rxtx.c > > +++ b/lib/librte_vhost/vhost_rxtx.c > > @@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t > qp_nb) > > return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM; > > } > > > > -static void > > +static inline void __attribute__((always_inline)) > > virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr > *net_hdr) > > { > > if (m_buf->ol_flags & PKT_TX_L4_MASK) { > > @@ -125,427 +125,210 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, > struct virtio_net_hdr *net_hdr) > > } > > } > > > > -static inline void > > -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr, > > - struct virtio_net_hdr_mrg_rxbuf hdr) > > +static inline uint32_t __attribute__((always_inline)) > > +loop_check(struct vhost_virtqueue *vq, uint16_t avail_idx, uint32_t > > pkt_left) > Creating a function just for doing this doesn't make much sense. > And the function name doesn't help. > I think you should just remove this function. Okay. > > > { > > - if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf)) > > - *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr; > > - else > > - *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr; > > + if (pkt_left == 0 || avail_idx == vq->last_used_idx) > > + return 1; > > + > > + return 0; > > } > > > > -static inline int __attribute__((always_inline)) > > -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, > > - struct rte_mbuf *m, uint16_t desc_idx) > > +static inline uint32_t __attribute__((always_inline)) > > +enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq, > > + uint16_t avail_idx, struct rte_mbuf *mbuf, > > + uint32_t is_mrg_rxbuf) > > { > > - uint32_t desc_avail, desc_offset; > > - uint32_t mbuf_avail, mbuf_offset; > > - uint32_t cpy_len; > > + struct virtio_net_hdr_mrg_rxbuf *virtio_hdr; > > struct vring_desc *desc; > > - uint64_t desc_addr; > > - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; > > - > > - desc = >desc[desc_idx]; > > + uint64_t desc_addr = 0; > > + uint32_t desc_chain_head = 0; > > + uint32_t desc_chain_len = 0; > > + uint32_t desc_current = 0; > > + uint32_t desc_offset = 0; > > + uint32_t mbuf_len = 0; > > + uint32_t mbuf_avail = 0; > > + uint32_t copy_len = 0; > > + uint32_t extra_buffers = 0; > > + uint32_t used_idx_round = 0; > Most of these variables don't need to be initialized. Okay. > > > + > > + /* start with the first mbuf of the packet */ > > + mbuf_len = rte_pktmbuf_data_len(mbuf); > > + mbuf_avail = mbuf_len; > > + > > + /* get the current desc */ > > + desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)]; > > + desc_chain_head = desc_current; > > + desc = >desc[desc_current]; > > desc_addr = gpa_to_vva(dev, desc->addr); > > - /* > > -* Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid > > -* performance issue with some versions of gcc (4.8.4 and 5.3.0) which > > -* otherwise stores offset on the stack instead of in a register. > > -*/ > > - if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) > > - return -1; > > - > > - rte_prefetch0((void *)(uintptr_t)desc_addr); > > + if (unlikely(!desc_addr)) > > + goto error; > > > > - virtio_enqueue_offload(m, _hdr.hdr); > > - copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); > > + /* handle virtio header */ > > + virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr; > > + virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr)); > Parenthesis around virtio_hdr->hdr shouldn't be needed. > > vhost_log_write(dev, desc->addr, dev->vhost_hlen); > > - PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); > Looks like you remove the PRINT_PACKET calls. > Does it impact performance? > In any case, it should be mentionned in the commit message. Will add this. > > > - > >
[dpdk-dev] [PATCH v3 1/5] vhost: rewrite enqueue
On 08/19/2016 07:43 AM, Zhihong Wang wrote: > This patch implements the vhost logic from scratch into a single function > designed for high performance and better maintainability. > > --- > Changes in v3: > > 1. Rewrite enqueue and delete the obsolete in the same patch. > > Signed-off-by: Zhihong Wang > --- > lib/librte_vhost/vhost_rxtx.c | 537 > +- > 1 file changed, 160 insertions(+), 377 deletions(-) > > diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c > index 08a73fd..b09a9c3 100644 > --- a/lib/librte_vhost/vhost_rxtx.c > +++ b/lib/librte_vhost/vhost_rxtx.c > @@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t > qp_nb) > return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM; > } > > -static void > +static inline void __attribute__((always_inline)) > virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr > *net_hdr) > { > if (m_buf->ol_flags & PKT_TX_L4_MASK) { > @@ -125,427 +125,210 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct > virtio_net_hdr *net_hdr) > } > } > > -static inline void > -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr, > - struct virtio_net_hdr_mrg_rxbuf hdr) > +static inline uint32_t __attribute__((always_inline)) > +loop_check(struct vhost_virtqueue *vq, uint16_t avail_idx, uint32_t pkt_left) Creating a function just for doing this doesn't make much sense. And the function name doesn't help. I think you should just remove this function. > { > - if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf)) > - *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr; > - else > - *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr; > + if (pkt_left == 0 || avail_idx == vq->last_used_idx) > + return 1; > + > + return 0; > } > > -static inline int __attribute__((always_inline)) > -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, > - struct rte_mbuf *m, uint16_t desc_idx) > +static inline uint32_t __attribute__((always_inline)) > +enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq, > + uint16_t avail_idx, struct rte_mbuf *mbuf, > + uint32_t is_mrg_rxbuf) > { > - uint32_t desc_avail, desc_offset; > - uint32_t mbuf_avail, mbuf_offset; > - uint32_t cpy_len; > + struct virtio_net_hdr_mrg_rxbuf *virtio_hdr; > struct vring_desc *desc; > - uint64_t desc_addr; > - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; > - > - desc = >desc[desc_idx]; > + uint64_t desc_addr = 0; > + uint32_t desc_chain_head = 0; > + uint32_t desc_chain_len = 0; > + uint32_t desc_current = 0; > + uint32_t desc_offset = 0; > + uint32_t mbuf_len = 0; > + uint32_t mbuf_avail = 0; > + uint32_t copy_len = 0; > + uint32_t extra_buffers = 0; > + uint32_t used_idx_round = 0; Most of these variables don't need to be initialized. > + > + /* start with the first mbuf of the packet */ > + mbuf_len = rte_pktmbuf_data_len(mbuf); > + mbuf_avail = mbuf_len; > + > + /* get the current desc */ > + desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)]; > + desc_chain_head = desc_current; > + desc = >desc[desc_current]; > desc_addr = gpa_to_vva(dev, desc->addr); > - /* > - * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid > - * performance issue with some versions of gcc (4.8.4 and 5.3.0) which > - * otherwise stores offset on the stack instead of in a register. > - */ > - if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) > - return -1; > - > - rte_prefetch0((void *)(uintptr_t)desc_addr); > + if (unlikely(!desc_addr)) > + goto error; > > - virtio_enqueue_offload(m, _hdr.hdr); > - copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); > + /* handle virtio header */ > + virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr; > + virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr)); Parenthesis around virtio_hdr->hdr shouldn't be needed. > vhost_log_write(dev, desc->addr, dev->vhost_hlen); > - PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); Looks like you remove the PRINT_PACKET calls. Does it impact performance? In any case, it should be mentionned in the commit message. > - > desc_offset = dev->vhost_hlen; > - desc_avail = desc->len - dev->vhost_hlen; > + desc_chain_len = desc_offset; > + desc_addr += desc_offset; > + if (is_mrg_rxbuf) > + virtio_hdr->num_buffers = 1; > > - mbuf_avail = rte_pktmbuf_data_len(m); > - mbuf_offset = 0; > - while (mbuf_avail != 0 || m->next != NULL) { > - /* done with current mbuf, fetch next */ > - if (mbuf_avail == 0) { > -
[dpdk-dev] [PATCH v3 1/5] vhost: rewrite enqueue
This patch implements the vhost logic from scratch into a single function designed for high performance and better maintainability. --- Changes in v3: 1. Rewrite enqueue and delete the obsolete in the same patch. Signed-off-by: Zhihong Wang --- lib/librte_vhost/vhost_rxtx.c | 537 +- 1 file changed, 160 insertions(+), 377 deletions(-) diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c index 08a73fd..b09a9c3 100644 --- a/lib/librte_vhost/vhost_rxtx.c +++ b/lib/librte_vhost/vhost_rxtx.c @@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb) return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM; } -static void +static inline void __attribute__((always_inline)) virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) { if (m_buf->ol_flags & PKT_TX_L4_MASK) { @@ -125,427 +125,210 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) } } -static inline void -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr, - struct virtio_net_hdr_mrg_rxbuf hdr) +static inline uint32_t __attribute__((always_inline)) +loop_check(struct vhost_virtqueue *vq, uint16_t avail_idx, uint32_t pkt_left) { - if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf)) - *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr; - else - *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr; + if (pkt_left == 0 || avail_idx == vq->last_used_idx) + return 1; + + return 0; } -static inline int __attribute__((always_inline)) -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct rte_mbuf *m, uint16_t desc_idx) +static inline uint32_t __attribute__((always_inline)) +enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint16_t avail_idx, struct rte_mbuf *mbuf, + uint32_t is_mrg_rxbuf) { - uint32_t desc_avail, desc_offset; - uint32_t mbuf_avail, mbuf_offset; - uint32_t cpy_len; + struct virtio_net_hdr_mrg_rxbuf *virtio_hdr; struct vring_desc *desc; - uint64_t desc_addr; - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; - - desc = >desc[desc_idx]; + uint64_t desc_addr = 0; + uint32_t desc_chain_head = 0; + uint32_t desc_chain_len = 0; + uint32_t desc_current = 0; + uint32_t desc_offset = 0; + uint32_t mbuf_len = 0; + uint32_t mbuf_avail = 0; + uint32_t copy_len = 0; + uint32_t extra_buffers = 0; + uint32_t used_idx_round = 0; + + /* start with the first mbuf of the packet */ + mbuf_len = rte_pktmbuf_data_len(mbuf); + mbuf_avail = mbuf_len; + + /* get the current desc */ + desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)]; + desc_chain_head = desc_current; + desc = >desc[desc_current]; desc_addr = gpa_to_vva(dev, desc->addr); - /* -* Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid -* performance issue with some versions of gcc (4.8.4 and 5.3.0) which -* otherwise stores offset on the stack instead of in a register. -*/ - if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) - return -1; - - rte_prefetch0((void *)(uintptr_t)desc_addr); + if (unlikely(!desc_addr)) + goto error; - virtio_enqueue_offload(m, _hdr.hdr); - copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); + /* handle virtio header */ + virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr; + virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr)); vhost_log_write(dev, desc->addr, dev->vhost_hlen); - PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); - desc_offset = dev->vhost_hlen; - desc_avail = desc->len - dev->vhost_hlen; + desc_chain_len = desc_offset; + desc_addr += desc_offset; + if (is_mrg_rxbuf) + virtio_hdr->num_buffers = 1; - mbuf_avail = rte_pktmbuf_data_len(m); - mbuf_offset = 0; - while (mbuf_avail != 0 || m->next != NULL) { - /* done with current mbuf, fetch next */ - if (mbuf_avail == 0) { - m = m->next; - - mbuf_offset = 0; - mbuf_avail = rte_pktmbuf_data_len(m); + /* start copy from mbuf to desc */ + while (1) { + /* get the next mbuf if the current done */ + if (!mbuf_avail) { + if (mbuf->next) { + mbuf = mbuf->next; + mbuf_len = rte_pktmbuf_data_len(mbuf); + mbuf_avail = mbuf_len; + } else +