On 05/12/2014 16:20, Konstantin Ananyev wrote:
> That's an alternative way to fix the problem described in the patch:
> http://dpdk.org/ml/archives/dev/2014-December/009394.html.
> The main difference is:
> - move buf_len fields out of rearm_data marker.
> - make ixgbe_recv_pkts_vec() not touch buf_len field at all
> (as all other RX functions behave).
>
> Signed-off-by: Konstantin Ananyev <konstantin.ananyev at intel.com>
> ---
>   lib/librte_mbuf/rte_mbuf.h            |  7 +++++--
>   lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c | 20 +++++++++++++++-----
>   2 files changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 2e5fce5..bb88318 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -179,6 +179,8 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask);
>   typedef void    *MARKER[0];   /**< generic marker for a point in a 
> structure */
>   typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 
> bytes
>                                  * with a single assignment */
> +typedef uint8_t MARKER8[0];   /**< generic marker with 1B alignment */
> +
>   /**
>    * The generic rte_mbuf, containing a packet mbuf.
>    */
> @@ -188,9 +190,10 @@ struct rte_mbuf {
>       void *buf_addr;           /**< Virtual address of segment buffer. */
>       phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */
>
> -     /* next 8 bytes are initialised on RX descriptor rearm */
> -     MARKER64 rearm_data;
>       uint16_t buf_len;         /**< Length of segment buffer. */
> +
> +     /* next 6 bytes are initialised on RX descriptor rearm */
> +     MARKER8 rearm_data;
>       uint16_t data_off;
>
>       /**
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c 
> b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
> index 579bc46..d5fc0cc 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
> @@ -79,13 +79,22 @@ ixgbe_rxq_rearm(struct igb_rx_queue *rxq)
>       /* Initialize the mbufs in vector, process 2 mbufs in one loop */
>       for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
>               __m128i vaddr0, vaddr1;
> +             uintptr_t p0, p1;
>
>               mb0 = rxep[0].mbuf;
>               mb1 = rxep[1].mbuf;
>
> -             /* flush mbuf with pkt template */
> -             mb0->rearm_data[0] = rxq->mbuf_initializer;
> -             mb1->rearm_data[0] = rxq->mbuf_initializer;
> +             /*
> +              * Flush mbuf with pkt template.
> +              * Data to be rearmed is 6 bytes long.
> +              * Though, RX will overwrite ol_flags that are coming next
> +              * anyway. So overwrite whole 8 bytes with one load:
> +              * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
> +              */
> +             p0 = (uintptr_t)&mb0->rearm_data;
> +             *(uint64_t *)p0 = rxq->mbuf_initializer;
> +             p1 = (uintptr_t)&mb1->rearm_data;
> +             *(uint64_t *)p1 = rxq->mbuf_initializer;
>
>               /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
>               vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr));
> @@ -732,14 +741,15 @@ static struct ixgbe_txq_ops vec_txq_ops = {
>   int
>   ixgbe_rxq_vec_setup(struct igb_rx_queue *rxq)
>   {
> +     uintptr_t p;
>       struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
>
>       mb_def.nb_segs = 1;
>       mb_def.data_off = RTE_PKTMBUF_HEADROOM;
> -     mb_def.buf_len = rxq->mb_pool->elt_size - sizeof(struct rte_mbuf);
>       mb_def.port = rxq->port_id;
>       rte_mbuf_refcnt_set(&mb_def, 1);
> -     rxq->mbuf_initializer = *((uint64_t *)&mb_def.rearm_data);
> +     p = (uintptr_t)&mb_def.rearm_data;
> +     rxq->mbuf_initializer = *(uint64_t *)p;
>       return 0;
>   }
>
>

The patch introduces writes on unaligned data, but we can assume no 
performance penalty on intel hw, correct?





Reply via email to