Use a non-volatile uint64_t pointer to store to the descriptor ring.
This will allow the compiler to optionally merge the stores as it sees
best.

Signed-off-by: Bruce Richardson <[email protected]>
---
 drivers/net/intel/common/tx_scalar_fns.h | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/net/intel/common/tx_scalar_fns.h 
b/drivers/net/intel/common/tx_scalar_fns.h
index 7b643fcf44..95e9acbe60 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -184,6 +184,15 @@ struct ci_timesstamp_queue_fns {
        write_ts_tail_t write_ts_tail;
 };
 
+static inline void
+write_txd(volatile void *txd, uint64_t qw0, uint64_t qw1)
+{
+       uint64_t *txd_qw = RTE_CAST_PTR(void *, txd);
+
+       txd_qw[0] = rte_cpu_to_le_64(qw0);
+       txd_qw[1] = rte_cpu_to_le_64(qw1);
+}
+
 static inline uint16_t
 ci_xmit_pkts(struct ci_tx_queue *txq,
             struct rte_mbuf **tx_pkts,
@@ -313,8 +322,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
                                txe->mbuf = NULL;
                        }
 
-                       ctx_txd[0] = cd_qw0;
-                       ctx_txd[1] = cd_qw1;
+                       write_txd(ctx_txd, cd_qw0, cd_qw1);
 
                        txe->last_id = tx_last;
                        tx_id = txe->next_id;
@@ -361,12 +369,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
 
                        while ((ol_flags & (RTE_MBUF_F_TX_TCP_SEG | 
RTE_MBUF_F_TX_UDP_SEG)) &&
                                        unlikely(slen > CI_MAX_DATA_PER_TXD)) {
-                               txd->buffer_addr = 
rte_cpu_to_le_64(buf_dma_addr);
-                               txd->cmd_type_offset_bsz = 
rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+                               const uint64_t cmd_type_offset_bsz = 
CI_TX_DESC_DTYPE_DATA |
                                        ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
                                        ((uint64_t)td_offset << 
CI_TXD_QW1_OFFSET_S) |
                                        ((uint64_t)CI_MAX_DATA_PER_TXD << 
CI_TXD_QW1_TX_BUF_SZ_S) |
-                                       ((uint64_t)td_tag << 
CI_TXD_QW1_L2TAG1_S));
+                                       ((uint64_t)td_tag << 
CI_TXD_QW1_L2TAG1_S);
+                               write_txd(txd, buf_dma_addr, 
cmd_type_offset_bsz);
 
                                buf_dma_addr += CI_MAX_DATA_PER_TXD;
                                slen -= CI_MAX_DATA_PER_TXD;
@@ -382,12 +390,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
                        if (m_seg->next == NULL)
                                td_cmd |= CI_TX_DESC_CMD_EOP;
 
-                       txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
-                       txd->cmd_type_offset_bsz = 
rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
+                       const uint64_t cmd_type_offset_bsz = 
CI_TX_DESC_DTYPE_DATA |
                                ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
                                ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) |
                                ((uint64_t)slen << CI_TXD_QW1_TX_BUF_SZ_S) |
-                               ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S));
+                               ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S);
+                       write_txd(txd, buf_dma_addr, cmd_type_offset_bsz);
 
                        txe->last_id = tx_last;
                        tx_id = txe->next_id;
-- 
2.51.0

Reply via email to