On 05/12/2014 16:20, Konstantin Ananyev wrote: > That's an alternative way to fix the problem described in the patch: > http://dpdk.org/ml/archives/dev/2014-December/009394.html. > The main difference is: > - move buf_len fields out of rearm_data marker. > - make ixgbe_recv_pkts_vec() not touch buf_len field at all > (as all other RX functions behave). > > Signed-off-by: Konstantin Ananyev <konstantin.ananyev at intel.com> > --- > lib/librte_mbuf/rte_mbuf.h | 7 +++++-- > lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c | 20 +++++++++++++++----- > 2 files changed, 20 insertions(+), 7 deletions(-) > > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h > index 2e5fce5..bb88318 100644 > --- a/lib/librte_mbuf/rte_mbuf.h > +++ b/lib/librte_mbuf/rte_mbuf.h > @@ -179,6 +179,8 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask); > typedef void *MARKER[0]; /**< generic marker for a point in a > structure */ > typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 > bytes > * with a single assignment */ > +typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */ > + > /** > * The generic rte_mbuf, containing a packet mbuf. > */ > @@ -188,9 +190,10 @@ struct rte_mbuf { > void *buf_addr; /**< Virtual address of segment buffer. */ > phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */ > > - /* next 8 bytes are initialised on RX descriptor rearm */ > - MARKER64 rearm_data; > uint16_t buf_len; /**< Length of segment buffer. */ > + > + /* next 6 bytes are initialised on RX descriptor rearm */ > + MARKER8 rearm_data; > uint16_t data_off; > > /** > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c > b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c > index 579bc46..d5fc0cc 100644 > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c > @@ -79,13 +79,22 @@ ixgbe_rxq_rearm(struct igb_rx_queue *rxq) > /* Initialize the mbufs in vector, process 2 mbufs in one loop */ > for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) { > __m128i vaddr0, vaddr1; > + uintptr_t p0, p1; > > mb0 = rxep[0].mbuf; > mb1 = rxep[1].mbuf; > > - /* flush mbuf with pkt template */ > - mb0->rearm_data[0] = rxq->mbuf_initializer; > - mb1->rearm_data[0] = rxq->mbuf_initializer; > + /* > + * Flush mbuf with pkt template. > + * Data to be rearmed is 6 bytes long. > + * Though, RX will overwrite ol_flags that are coming next > + * anyway. So overwrite whole 8 bytes with one load: > + * 6 bytes of rearm_data plus first 2 bytes of ol_flags. > + */ > + p0 = (uintptr_t)&mb0->rearm_data; > + *(uint64_t *)p0 = rxq->mbuf_initializer; > + p1 = (uintptr_t)&mb1->rearm_data; > + *(uint64_t *)p1 = rxq->mbuf_initializer; > > /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */ > vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr)); > @@ -732,14 +741,15 @@ static struct ixgbe_txq_ops vec_txq_ops = { > int > ixgbe_rxq_vec_setup(struct igb_rx_queue *rxq) > { > + uintptr_t p; > struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ > > mb_def.nb_segs = 1; > mb_def.data_off = RTE_PKTMBUF_HEADROOM; > - mb_def.buf_len = rxq->mb_pool->elt_size - sizeof(struct rte_mbuf); > mb_def.port = rxq->port_id; > rte_mbuf_refcnt_set(&mb_def, 1); > - rxq->mbuf_initializer = *((uint64_t *)&mb_def.rearm_data); > + p = (uintptr_t)&mb_def.rearm_data; > + rxq->mbuf_initializer = *(uint64_t *)p; > return 0; > } > >
The patch introduces writes on unaligned data, but we can assume no performance penalty on intel hw, correct?