From: Pavan Nikhilesh <pbhagavat...@marvell.com>

Improve single flow performance by moving the point of coherence
to the end of transmit sequence.

Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com>
---
V2:
* replace rte_cio_wmb with rte_io_wmb

 drivers/event/octeontx2/otx2_worker.h | 35 +++++++++++++++++----------
 drivers/net/octeontx2/otx2_tx.h       | 18 ++++++++++++++
 2 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/drivers/event/octeontx2/otx2_worker.h 
b/drivers/event/octeontx2/otx2_worker.h
index 80dfe3e73..757fa6fe5 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -247,15 +247,6 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws)
 #endif
 }
 
-static __rte_always_inline void
-otx2_ssogws_order(struct otx2_ssogws *ws, const uint8_t wait_flag)
-{
-       if (wait_flag)
-               otx2_ssogws_head_wait(ws);
-
-       rte_io_wmb();
-}
-
 static __rte_always_inline const struct otx2_eth_txq *
 otx2_ssogws_xtract_meta(struct rte_mbuf *m,
                        const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
@@ -287,10 +278,9 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct 
rte_event ev[],
                return otx2_sec_event_tx(ws, ev, m, txq, flags);
        }
 
-       rte_prefetch_non_temporal(&txq_data[m->port][0]);
        /* Perform header writes before barrier for TSO */
        otx2_nix_xmit_prepare_tso(m, flags);
-       otx2_ssogws_order(ws, !ev->sched_type);
+       rte_io_wmb();
        txq = otx2_ssogws_xtract_meta(m, txq_data);
        otx2_ssogws_prepare_pkt(txq, m, cmd, flags);
 
@@ -298,12 +288,31 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct 
rte_event ev[],
                const uint16_t segdw = otx2_nix_prepare_mseg(m, cmd, flags);
                otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
                                             m->ol_flags, segdw, flags);
-               otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw);
+               if (!ev->sched_type) {
+                       otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
+                       otx2_ssogws_head_wait(ws);
+                       if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
+                               otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr,
+                                                      txq->io_addr, segdw);
+               } else {
+                       otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr,
+                                              segdw);
+               }
        } else {
                /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */
                otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
                                             m->ol_flags, 4, flags);
-               otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags);
+
+               if (!ev->sched_type) {
+                       otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
+                       otx2_ssogws_head_wait(ws);
+                       if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
+                               otx2_nix_xmit_one(cmd, txq->lmt_addr,
+                                                 txq->io_addr, flags);
+               } else {
+                       otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr,
+                                         flags);
+               }
        }
 
        otx2_write64(0, ws->swtag_flush_op);
diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h
index 3c4317092..caf170fd1 100644
--- a/drivers/net/octeontx2/otx2_tx.h
+++ b/drivers/net/octeontx2/otx2_tx.h
@@ -383,6 +383,18 @@ otx2_nix_xmit_one(uint64_t *cmd, void *lmt_addr,
        } while (lmt_status == 0);
 }
 
+static __rte_always_inline void
+otx2_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags)
+{
+       otx2_lmt_mov(lmt_addr, cmd, otx2_nix_tx_ext_subs(flags));
+}
+
+static __rte_always_inline uint64_t
+otx2_nix_xmit_submit_lmt(const rte_iova_t io_addr)
+{
+       return otx2_lmt_submit(io_addr);
+}
+
 static __rte_always_inline uint16_t
 otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 {
@@ -453,6 +465,12 @@ otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, 
const uint16_t flags)
        return segdw;
 }
 
+static __rte_always_inline void
+otx2_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
+{
+       otx2_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
+}
+
 static __rte_always_inline void
 otx2_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr,
                       rte_iova_t io_addr, uint16_t segdw)
-- 
2.18.0

Reply via email to