On Thu, Feb 10, 2022 at 6:51 PM <pbhagavat...@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavat...@marvell.com> > > Store and reuse workslot status for TT, GRP and HEAD status > instead of reading from GWC as reading from GWC imposes > additional latency. > > Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com>
Series Acked-by: Jerin Jacob <jer...@marvell.com> Series Applied to dpdk-next-net-eventdev/for-main. Thanks > --- > Depends-on: 21590 > > v4 Changes: > - Update commit title for 3/3 > > v3 Changes: > - Split and rebase patches. > > v2 Changes: > - Rebase. > - Fix incorrect use of RoC API > > drivers/common/cnxk/roc_sso.h | 14 ++++++++------ > drivers/event/cnxk/cn10k_worker.h | 16 +++++++++------- > drivers/event/cnxk/cn9k_worker.h | 6 +++--- > drivers/event/cnxk/cnxk_eventdev.h | 2 ++ > drivers/event/cnxk/cnxk_worker.h | 11 +++++++---- > drivers/net/cnxk/cn10k_tx.h | 12 ++++++------ > 6 files changed, 35 insertions(+), 26 deletions(-) > > diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h > index 27d49c6c68..ab7cee1c60 100644 > --- a/drivers/common/cnxk/roc_sso.h > +++ b/drivers/common/cnxk/roc_sso.h > @@ -54,12 +54,13 @@ struct roc_sso { > uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned; > } __plt_cache_aligned; > > -static __plt_always_inline void > -roc_sso_hws_head_wait(uintptr_t tag_op) > +static __plt_always_inline uint64_t > +roc_sso_hws_head_wait(uintptr_t base) > { > -#ifdef RTE_ARCH_ARM64 > + uintptr_t tag_op = base + SSOW_LF_GWS_TAG; > uint64_t tag; > > +#if defined(__aarch64__) > asm volatile(PLT_CPU_FEATURE_PREAMBLE > " ldr %[tag], [%[tag_op]] \n" > " tbnz %[tag], 35, done%= \n" > @@ -71,10 +72,11 @@ roc_sso_hws_head_wait(uintptr_t tag_op) > : [tag] "=&r"(tag) > : [tag_op] "r"(tag_op)); > #else > - /* Wait for the SWTAG/SWTAG_FULL operation */ > - while (!(plt_read64(tag_op) & BIT_ULL(35))) > - ; > + do { > + tag = plt_read64(tag_op); > + } while (!(tag & BIT_ULL(35))); > #endif > + return tag; > } > > /* SSO device initialization */ > diff --git a/drivers/event/cnxk/cn10k_worker.h > b/drivers/event/cnxk/cn10k_worker.h > index ff08b2d974..ada230ea1d 100644 > --- a/drivers/event/cnxk/cn10k_worker.h > +++ b/drivers/event/cnxk/cn10k_worker.h > @@ -40,8 +40,7 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const > struct rte_event *ev) > { > const uint32_t tag = (uint32_t)ev->event; > const uint8_t new_tt = ev->sched_type; > - const uint8_t cur_tt = > - CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)); > + const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata); > > /* CNXK model > * cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED > @@ -81,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, > const uint8_t grp = ev->queue_id; > > /* Group hasn't changed, Use SWTAG to forward the event */ > - if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp) > + if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp) > cn10k_sso_hws_fwd_swtag(ws, ev); > else > /* > @@ -211,6 +210,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct > rte_event *ev, > } while (gw.u64[0] & BIT_ULL(63)); > mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf)); > #endif > + ws->gw_rdata = gw.u64[0]; > gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 | > (gw.u64[0] & (0x3FFull << 36)) << 4 | > (gw.u64[0] & 0xffffffff); > @@ -405,7 +405,8 @@ NIX_RX_FASTPATH_MODES > RTE_SET_USED(timeout_ticks); > \ > if (ws->swtag_req) { > \ > ws->swtag_req = 0; > \ > - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); > \ > + ws->gw_rdata = cnxk_sso_hws_swtag_wait( > \ > + ws->base + SSOW_LF_GWS_WQE0); > \ > return 1; > \ > } > \ > return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); > \ > @@ -424,7 +425,8 @@ NIX_RX_FASTPATH_MODES > uint64_t iter; > \ > if (ws->swtag_req) { > \ > ws->swtag_req = 0; > \ > - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); > \ > + ws->gw_rdata = cnxk_sso_hws_swtag_wait( > \ > + ws->base + SSOW_LF_GWS_WQE0); > \ > return ret; > \ > } > \ > ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); > \ > @@ -507,8 +509,8 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct > rte_mbuf *m, uint64_t *cmd, > else > pa = txq->io_addr | ((segdw - 1) << 4); > > - if (!sched_type) > - roc_sso_hws_head_wait(ws->base + SSOW_LF_GWS_TAG); > + if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type) > + ws->gw_rdata = roc_sso_hws_head_wait(ws->base); > > roc_lmt_submit_steorl(lmt_id, pa); > } > diff --git a/drivers/event/cnxk/cn9k_worker.h > b/drivers/event/cnxk/cn9k_worker.h > index 303b04c215..8455272005 100644 > --- a/drivers/event/cnxk/cn9k_worker.h > +++ b/drivers/event/cnxk/cn9k_worker.h > @@ -700,7 +700,7 @@ cn9k_sso_hws_xmit_sec_one(const struct cn9k_eth_txq *txq, > uint64_t base, > > /* Head wait if needed */ > if (base) > - roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); > + roc_sso_hws_head_wait(base); > > /* ESN */ > outb_priv = roc_nix_inl_onf_ipsec_outb_sa_sw_rsvd((void *)sa); > @@ -793,7 +793,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event > *ev, uint64_t *cmd, > flags); > if (!CNXK_TT_FROM_EVENT(ev->event)) { > cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, > segdw); > - roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); > + roc_sso_hws_head_wait(base); > cn9k_sso_txq_fc_wait(txq); > if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) > cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, > @@ -806,7 +806,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event > *ev, uint64_t *cmd, > cn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, 4, flags); > if (!CNXK_TT_FROM_EVENT(ev->event)) { > cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); > - roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); > + roc_sso_hws_head_wait(base); > cn9k_sso_txq_fc_wait(txq); > if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) > cn9k_nix_xmit_one(cmd, txq->lmt_addr, > diff --git a/drivers/event/cnxk/cnxk_eventdev.h > b/drivers/event/cnxk/cnxk_eventdev.h > index b26df58588..ab58508590 100644 > --- a/drivers/event/cnxk/cnxk_eventdev.h > +++ b/drivers/event/cnxk/cnxk_eventdev.h > @@ -47,6 +47,7 @@ > #define CNXK_CLR_SUB_EVENT(x) (~(0xffu << 20) & x) > #define CNXK_GRP_FROM_TAG(x) (((x) >> 36) & 0x3ff) > #define CNXK_SWTAG_PEND(x) (BIT_ULL(62) & x) > +#define CNXK_TAG_IS_HEAD(x) (BIT_ULL(35) & x) > > #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0) > > @@ -123,6 +124,7 @@ struct cnxk_sso_evdev { > > struct cn10k_sso_hws { > uint64_t base; > + uint64_t gw_rdata; > /* PTP timestamp */ > struct cnxk_timesync_info *tstamp; > void *lookup_mem; > diff --git a/drivers/event/cnxk/cnxk_worker.h > b/drivers/event/cnxk/cnxk_worker.h > index 9f9ceab8a1..7de03f3fbb 100644 > --- a/drivers/event/cnxk/cnxk_worker.h > +++ b/drivers/event/cnxk/cnxk_worker.h > @@ -52,11 +52,11 @@ cnxk_sso_hws_swtag_flush(uint64_t tag_op, uint64_t > flush_op) > plt_write64(0, flush_op); > } > > -static __rte_always_inline void > +static __rte_always_inline uint64_t > cnxk_sso_hws_swtag_wait(uintptr_t tag_op) > { > -#ifdef RTE_ARCH_ARM64 > uint64_t swtp; > +#ifdef RTE_ARCH_ARM64 > > asm volatile(PLT_CPU_FEATURE_PREAMBLE > " ldr %[swtb], [%[swtp_loc]] \n" > @@ -70,9 +70,12 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op) > : [swtp_loc] "r"(tag_op)); > #else > /* Wait for the SWTAG/SWTAG_FULL operation */ > - while (plt_read64(tag_op) & BIT_ULL(62)) > - ; > + do { > + swtp = plt_read64(tag_op); > + } while (swtp & BIT_ULL(62)); > #endif > + > + return swtp; > } > > #endif > diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h > index 4ae6bbf517..ec6366168c 100644 > --- a/drivers/net/cnxk/cn10k_tx.h > +++ b/drivers/net/cnxk/cn10k_tx.h > @@ -905,8 +905,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct > rte_mbuf **tx_pkts, > lnum++; > } > > - if (flags & NIX_TX_VWQE_F) > - roc_sso_hws_head_wait(ws[0]); > + if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35))) > + ws[1] = roc_sso_hws_head_wait(ws[0]); > > left -= burst; > tx_pkts += burst; > @@ -1041,8 +1041,8 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws, > } > } > > - if (flags & NIX_TX_VWQE_F) > - roc_sso_hws_head_wait(ws[0]); > + if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35))) > + ws[1] = roc_sso_hws_head_wait(ws[0]); > > left -= burst; > tx_pkts += burst; > @@ -2582,8 +2582,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws, > if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) > wd.data[0] >>= 16; > > - if (flags & NIX_TX_VWQE_F) > - roc_sso_hws_head_wait(ws[0]); > + if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35))) > + ws[1] = roc_sso_hws_head_wait(ws[0]); > > left -= burst; > > -- > 2.17.1 >