Add Rx burst support scalar version for cn20k.

Signed-off-by: Nithin Dabilpuram <ndabilpu...@marvell.com>
Signed-off-by: Jerin Jacob <jer...@marvell.com>
Signed-off-by: Rahul Bhansali <rbhans...@marvell.com>
Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com>
---
 drivers/net/cnxk/cn20k_ethdev.c    | 126 +++++++++
 drivers/net/cnxk/cn20k_rx.h        | 394 ++++++++++++++++++++++++++++-
 drivers/net/cnxk/cn20k_rx_select.c |   6 +-
 drivers/net/cnxk/cn20k_rxtx.h      | 156 ++++++++++++
 4 files changed, 674 insertions(+), 8 deletions(-)

diff --git a/drivers/net/cnxk/cn20k_ethdev.c b/drivers/net/cnxk/cn20k_ethdev.c
index 4b2f04ba31..cad7b1316a 100644
--- a/drivers/net/cnxk/cn20k_ethdev.c
+++ b/drivers/net/cnxk/cn20k_ethdev.c
@@ -330,6 +330,33 @@ cn20k_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, 
uint16_t qid, uint16_t nb_
        return 0;
 }
 
+static void
+cn20k_nix_rx_queue_meta_aura_update(struct rte_eth_dev *eth_dev)
+{
+       struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
+       struct cnxk_eth_rxq_sp *rxq_sp;
+       struct cn20k_eth_rxq *rxq;
+       struct roc_nix_rq *rq;
+       int i;
+
+       /* Update Aura handle for fastpath rx queues */
+       for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+               rq = &dev->rqs[i];
+               rxq = eth_dev->data->rx_queues[i];
+               rxq->meta_aura = rq->meta_aura_handle;
+               rxq->meta_pool = dev->nix.meta_mempool;
+               /* Assume meta packet from normal aura if meta aura is not setup
+                */
+               if (!rxq->meta_aura) {
+                       rxq_sp = cnxk_eth_rxq_to_sp(rxq);
+                       rxq->meta_aura = rxq_sp->qconf.mp->pool_id;
+                       rxq->meta_pool = (uintptr_t)rxq_sp->qconf.mp;
+               }
+       }
+       /* Store mempool in lookup mem */
+       cnxk_nix_lookup_mem_metapool_set(dev);
+}
+
 static int
 cn20k_nix_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t qidx)
 {
@@ -371,6 +398,74 @@ cn20k_nix_configure(struct rte_eth_dev *eth_dev)
        return 0;
 }
 
+/* Function to enable ptp config for VFs */
+static void
+nix_ptp_enable_vf(struct rte_eth_dev *eth_dev)
+{
+       struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
+
+       if (nix_recalc_mtu(eth_dev))
+               plt_err("Failed to set MTU size for ptp");
+
+       dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F;
+
+       /* Setting up the function pointers as per new offload flags */
+       cn20k_eth_set_rx_function(eth_dev);
+       cn20k_eth_set_tx_function(eth_dev);
+}
+
+static uint16_t
+nix_ptp_vf_burst(void *queue, struct rte_mbuf **mbufs, uint16_t pkts)
+{
+       struct cn20k_eth_rxq *rxq = queue;
+       struct cnxk_eth_rxq_sp *rxq_sp;
+       struct rte_eth_dev *eth_dev;
+
+       RTE_SET_USED(mbufs);
+       RTE_SET_USED(pkts);
+
+       rxq_sp = cnxk_eth_rxq_to_sp(rxq);
+       eth_dev = rxq_sp->dev->eth_dev;
+       nix_ptp_enable_vf(eth_dev);
+
+       return 0;
+}
+
+static int
+cn20k_nix_ptp_info_update_cb(struct roc_nix *nix, bool ptp_en)
+{
+       struct cnxk_eth_dev *dev = (struct cnxk_eth_dev *)nix;
+       struct rte_eth_dev *eth_dev;
+       struct cn20k_eth_rxq *rxq;
+       int i;
+
+       if (!dev)
+               return -EINVAL;
+
+       eth_dev = dev->eth_dev;
+       if (!eth_dev)
+               return -EINVAL;
+
+       dev->ptp_en = ptp_en;
+
+       for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+               rxq = eth_dev->data->rx_queues[i];
+               rxq->mbuf_initializer = cnxk_nix_rxq_mbuf_setup(dev);
+       }
+
+       if (roc_nix_is_vf_or_sdp(nix) && !(roc_nix_is_sdp(nix)) && 
!(roc_nix_is_lbk(nix))) {
+               /* In case of VF, setting of MTU cannot be done directly in this
+                * function as this is running as part of MBOX request(PF->VF)
+                * and MTU setting also requires MBOX message to be
+                * sent(VF->PF)
+                */
+               eth_dev->rx_pkt_burst = nix_ptp_vf_burst;
+               rte_mb();
+       }
+
+       return 0;
+}
+
 static int
 cn20k_nix_timesync_enable(struct rte_eth_dev *eth_dev)
 {
@@ -451,11 +546,21 @@ cn20k_nix_dev_start(struct rte_eth_dev *eth_dev)
        if (rc)
                return rc;
 
+       /* Update VF about data off shifted by 8 bytes if PTP already
+        * enabled in PF owning this VF
+        */
+       if (dev->ptp_en && (!roc_nix_is_pf(nix) && (!roc_nix_is_sdp(nix))))
+               nix_ptp_enable_vf(eth_dev);
+
        /* Setting up the rx[tx]_offload_flags due to change
         * in rx[tx]_offloads.
         */
        dev->rx_offload_flags |= nix_rx_offload_flags(eth_dev);
        dev->tx_offload_flags |= nix_tx_offload_flags(eth_dev);
+
+       if (dev->rx_offload_flags & NIX_RX_OFFLOAD_SECURITY_F)
+               cn20k_nix_rx_queue_meta_aura_update(eth_dev);
+
        /* Set flags for Rx Inject feature */
        if (roc_idev_nix_rx_inject_get(nix->port_id))
                dev->rx_offload_flags |= NIX_RX_SEC_REASSEMBLY_F;
@@ -621,6 +726,20 @@ nix_tm_ops_override(void)
        if (init_once)
                return;
        init_once = 1;
+
+       /* Update platform specific ops */
+}
+
+static void
+npc_flow_ops_override(void)
+{
+       static int init_once;
+
+       if (init_once)
+               return;
+       init_once = 1;
+
+       /* Update platform specific ops */
 }
 
 static int
@@ -633,6 +752,7 @@ static int
 cn20k_nix_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 {
        struct rte_eth_dev *eth_dev;
+       struct cnxk_eth_dev *dev;
        int rc;
 
        rc = roc_plt_init();
@@ -643,6 +763,7 @@ cn20k_nix_probe(struct rte_pci_driver *pci_drv, struct 
rte_pci_device *pci_dev)
 
        nix_eth_dev_ops_override();
        nix_tm_ops_override();
+       npc_flow_ops_override();
 
        /* Common probe */
        rc = cnxk_nix_probe(pci_drv, pci_dev);
@@ -665,6 +786,11 @@ cn20k_nix_probe(struct rte_pci_driver *pci_drv, struct 
rte_pci_device *pci_dev)
                return 0;
        }
 
+       dev = cnxk_eth_pmd_priv(eth_dev);
+
+       /* Register up msg callbacks for PTP information */
+       roc_nix_ptp_info_cb_register(&dev->nix, cn20k_nix_ptp_info_update_cb);
+
        return 0;
 }
 
diff --git a/drivers/net/cnxk/cn20k_rx.h b/drivers/net/cnxk/cn20k_rx.h
index 2cb77c0b46..22abf7bbd8 100644
--- a/drivers/net/cnxk/cn20k_rx.h
+++ b/drivers/net/cnxk/cn20k_rx.h
@@ -29,8 +29,397 @@
 #define NIX_RX_VWQE_F     BIT(13)
 #define NIX_RX_MULTI_SEG_F BIT(14)
 
+#define CNXK_NIX_CQ_ENTRY_SZ 128
+#define NIX_DESCS_PER_LOOP   4
+#define CQE_CAST(x)         ((struct nix_cqe_hdr_s *)(x))
+#define CQE_SZ(x)           ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+
+#define CQE_PTR_OFF(b, i, o, f)                                                
                    \
+       (((f) & NIX_RX_VWQE_F) ? (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) 
+ (o)) :           \
+                                (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + 
(o)))
+#define CQE_PTR_DIFF(b, i, o, f)                                               
                    \
+       (((f) & NIX_RX_VWQE_F) ? (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) 
- (o)) :           \
+                                (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) - 
(o)))
+
+#define NIX_RX_SEC_UCC_CONST                                                   
                    \
+       ((RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1) |                                    
                   \
+        ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1) << 
8 |                 \
+        ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1) << 
16 |                 \
+        ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1) << 
32 |                \
+        ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1) << 
48)
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+static inline void
+nix_mbuf_validate_next(struct rte_mbuf *m)
+{
+       if (m->nb_segs == 1 && m->next) {
+               rte_panic("mbuf->next[%p] valid when mbuf->nb_segs is %d", 
m->next, m->nb_segs);
+       }
+}
+#else
+static inline void
+nix_mbuf_validate_next(struct rte_mbuf *m)
+{
+       RTE_SET_USED(m);
+}
+#endif
+
 #define NIX_RX_SEC_REASSEMBLY_F (NIX_RX_REAS_F | NIX_RX_OFFLOAD_SECURITY_F)
 
+static inline rte_eth_ip_reassembly_dynfield_t *
+cnxk_ip_reassembly_dynfield(struct rte_mbuf *mbuf, int 
ip_reassembly_dynfield_offset)
+{
+       return RTE_MBUF_DYNFIELD(mbuf, ip_reassembly_dynfield_offset,
+                                rte_eth_ip_reassembly_dynfield_t *);
+}
+
+union mbuf_initializer {
+       struct {
+               uint16_t data_off;
+               uint16_t refcnt;
+               uint16_t nb_segs;
+               uint16_t port;
+       } fields;
+       uint64_t value;
+};
+
+static __rte_always_inline uint64_t
+nix_clear_data_off(uint64_t oldval)
+{
+       union mbuf_initializer mbuf_init = {.value = oldval};
+
+       mbuf_init.fields.data_off = 0;
+       return mbuf_init.value;
+}
+
+static __rte_always_inline struct rte_mbuf *
+nix_get_mbuf_from_cqe(void *cq, const uint64_t data_off)
+{
+       rte_iova_t buff;
+
+       /* Skip CQE, NIX_RX_PARSE_S and SG HDR(9 DWORDs) and peek buff addr */
+       buff = *((rte_iova_t *)((uint64_t *)cq + 9));
+       return (struct rte_mbuf *)(buff - data_off);
+}
+
+static __rte_always_inline uint32_t
+nix_ptype_get(const void *const lookup_mem, const uint64_t in)
+{
+       const uint16_t *const ptype = lookup_mem;
+       const uint16_t lh_lg_lf = (in & 0xFFF0000000000000) >> 52;
+       const uint16_t tu_l2 = ptype[(in & 0x000FFFF000000000) >> 36];
+       const uint16_t il4_tu = ptype[PTYPE_NON_TUNNEL_ARRAY_SZ + lh_lg_lf];
+
+       return (il4_tu << PTYPE_NON_TUNNEL_WIDTH) | tu_l2;
+}
+
+static __rte_always_inline uint32_t
+nix_rx_olflags_get(const void *const lookup_mem, const uint64_t in)
+{
+       const uint32_t *const ol_flags =
+               (const uint32_t *)((const uint8_t *)lookup_mem + 
PTYPE_ARRAY_SZ);
+
+       return ol_flags[(in & 0xfff00000) >> 20];
+}
+
+static inline uint64_t
+nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, struct 
rte_mbuf *mbuf)
+{
+       /* There is no separate bit to check match_id
+        * is valid or not? and no flag to identify it is an
+        * RTE_FLOW_ACTION_TYPE_FLAG vs RTE_FLOW_ACTION_TYPE_MARK
+        * action. The former case addressed through 0 being invalid
+        * value and inc/dec match_id pair when MARK is activated.
+        * The later case addressed through defining
+        * CNXK_FLOW_MARK_DEFAULT as value for
+        * RTE_FLOW_ACTION_TYPE_MARK.
+        * This would translate to not use
+        * CNXK_FLOW_ACTION_FLAG_DEFAULT - 1 and
+        * CNXK_FLOW_ACTION_FLAG_DEFAULT for match_id.
+        * i.e valid mark_id's are from
+        * 0 to CNXK_FLOW_ACTION_FLAG_DEFAULT - 2
+        */
+       if (likely(match_id)) {
+               ol_flags |= RTE_MBUF_F_RX_FDIR;
+               if (match_id != CNXK_FLOW_ACTION_FLAG_DEFAULT) {
+                       ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
+                       mbuf->hash.fdir.hi = match_id - 1;
+               }
+       }
+
+       return ol_flags;
+}
+
+static __rte_always_inline void
+nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, 
uint64_t rearm,
+                   uintptr_t cpth, uintptr_t sa_base, const uint16_t flags)
+{
+       const rte_iova_t *iova_list;
+       uint16_t later_skip = 0;
+       struct rte_mbuf *head;
+       const rte_iova_t *eol;
+       uint8_t nb_segs;
+       uint16_t sg_len;
+       int64_t len;
+       uint64_t sg;
+       uintptr_t p;
+
+       (void)cpth;
+       (void)sa_base;
+
+       sg = *(const uint64_t *)(rx + 1);
+       nb_segs = (sg >> 48) & 0x3;
+
+       if (nb_segs == 1)
+               return;
+
+       len = rx->pkt_lenm1 + 1;
+
+       mbuf->pkt_len = len - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 
CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+       mbuf->nb_segs = nb_segs;
+       head = mbuf;
+       mbuf->data_len =
+               (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 
CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+       eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
+
+       len -= mbuf->data_len;
+       sg = sg >> 16;
+       /* Skip SG_S and first IOVA*/
+       iova_list = ((const rte_iova_t *)(rx + 1)) + 2;
+       nb_segs--;
+
+       later_skip = (uintptr_t)mbuf->buf_addr - (uintptr_t)mbuf;
+
+       while (nb_segs) {
+               mbuf->next = (struct rte_mbuf *)(*iova_list - later_skip);
+               mbuf = mbuf->next;
+
+               RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+               sg_len = sg & 0XFFFF;
+
+               mbuf->data_len = sg_len;
+               sg = sg >> 16;
+               p = (uintptr_t)&mbuf->rearm_data;
+               *(uint64_t *)p = rearm & ~0xFFFF;
+               nb_segs--;
+               iova_list++;
+
+               if (!nb_segs && (iova_list + 1 < eol)) {
+                       sg = *(const uint64_t *)(iova_list);
+                       nb_segs = (sg >> 48) & 0x3;
+                       head->nb_segs += nb_segs;
+                       iova_list = (const rte_iova_t *)(iova_list + 1);
+               }
+       }
+}
+
+static __rte_always_inline void
+cn20k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, 
struct rte_mbuf *mbuf,
+                     const void *lookup_mem, const uint64_t val, const 
uintptr_t cpth,
+                     const uintptr_t sa_base, const uint16_t flag)
+{
+       const union nix_rx_parse_u *rx = (const union nix_rx_parse_u *)((const 
uint64_t *)cq + 1);
+       const uint64_t w1 = *(const uint64_t *)rx;
+       uint16_t len = rx->pkt_lenm1 + 1;
+       uint64_t ol_flags = 0;
+       uintptr_t p;
+
+       if (flag & NIX_RX_OFFLOAD_PTYPE_F)
+               mbuf->packet_type = nix_ptype_get(lookup_mem, w1);
+       else
+               mbuf->packet_type = 0;
+
+       if (flag & NIX_RX_OFFLOAD_RSS_F) {
+               mbuf->hash.rss = tag;
+               ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
+       }
+
+       /* Skip rx ol flags extraction for Security packets */
+       ol_flags |= (uint64_t)nix_rx_olflags_get(lookup_mem, w1);
+
+       if (flag & NIX_RX_OFFLOAD_VLAN_STRIP_F) {
+               if (rx->vtag0_gone) {
+                       ol_flags |= RTE_MBUF_F_RX_VLAN | 
RTE_MBUF_F_RX_VLAN_STRIPPED;
+                       mbuf->vlan_tci = rx->vtag0_tci;
+               }
+               if (rx->vtag1_gone) {
+                       ol_flags |= RTE_MBUF_F_RX_QINQ | 
RTE_MBUF_F_RX_QINQ_STRIPPED;
+                       mbuf->vlan_tci_outer = rx->vtag1_tci;
+               }
+       }
+
+       if (flag & NIX_RX_OFFLOAD_MARK_UPDATE_F)
+               ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
+
+       mbuf->ol_flags = ol_flags;
+       mbuf->pkt_len = len;
+       mbuf->data_len = len;
+       p = (uintptr_t)&mbuf->rearm_data;
+       *(uint64_t *)p = val;
+
+       if (flag & NIX_RX_MULTI_SEG_F)
+               /*
+                * For multi segment packets, mbuf length correction according
+                * to Rx timestamp length will be handled later during
+                * timestamp data process.
+                * Hence, timestamp flag argument is not required.
+                */
+               nix_cqe_xtract_mseg(rx, mbuf, val, cpth, sa_base, flag & 
~NIX_RX_OFFLOAD_TSTAMP_F);
+}
+
+static inline uint16_t
+nix_rx_nb_pkts(struct cn20k_eth_rxq *rxq, const uint64_t wdata, const uint16_t 
pkts,
+              const uint32_t qmask)
+{
+       uint32_t available = rxq->available;
+
+       /* Update the available count if cached value is not enough */
+       if (unlikely(available < pkts)) {
+               uint64_t reg, head, tail;
+
+               /* Use LDADDA version to avoid reorder */
+               reg = roc_atomic64_add_sync(wdata, rxq->cq_status);
+               /* CQ_OP_STATUS operation error */
+               if (reg & BIT_ULL(NIX_CQ_OP_STAT_OP_ERR) || reg & 
BIT_ULL(NIX_CQ_OP_STAT_CQ_ERR))
+                       return 0;
+
+               tail = reg & 0xFFFFF;
+               head = (reg >> 20) & 0xFFFFF;
+               if (tail < head)
+                       available = tail - head + qmask + 1;
+               else
+                       available = tail - head;
+
+               rxq->available = available;
+       }
+
+       return RTE_MIN(pkts, available);
+}
+
+static __rte_always_inline void
+cn20k_nix_mbuf_to_tstamp(struct rte_mbuf *mbuf, struct cnxk_timesync_info 
*tstamp,
+                        const uint8_t ts_enable, uint64_t *tstamp_ptr)
+{
+       if (ts_enable) {
+               mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+               mbuf->data_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+
+               /* Reading the rx timestamp inserted by CGX, viz at
+                * starting of the packet data.
+                */
+               *tstamp_ptr = ((*tstamp_ptr >> 32) * NSEC_PER_SEC) + 
(*tstamp_ptr & 0xFFFFFFFFUL);
+               *cnxk_nix_timestamp_dynfield(mbuf, tstamp) = 
rte_be_to_cpu_64(*tstamp_ptr);
+               /* RTE_MBUF_F_RX_IEEE1588_TMST flag needs to be set only in case
+                * PTP packets are received.
+                */
+               if (mbuf->packet_type == RTE_PTYPE_L2_ETHER_TIMESYNC) {
+                       tstamp->rx_tstamp = *cnxk_nix_timestamp_dynfield(mbuf, 
tstamp);
+                       tstamp->rx_ready = 1;
+                       mbuf->ol_flags |= RTE_MBUF_F_RX_IEEE1588_PTP | 
RTE_MBUF_F_RX_IEEE1588_TMST |
+                                         tstamp->rx_tstamp_dynflag;
+               }
+       }
+}
+
+static __rte_always_inline uint16_t
+cn20k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, 
const uint16_t flags)
+{
+       struct cn20k_eth_rxq *rxq = rx_queue;
+       const uint64_t mbuf_init = rxq->mbuf_initializer;
+       const void *lookup_mem = rxq->lookup_mem;
+       const uint64_t data_off = rxq->data_off;
+       const uintptr_t desc = rxq->desc;
+       const uint64_t wdata = rxq->wdata;
+       const uint32_t qmask = rxq->qmask;
+       uint16_t packets = 0, nb_pkts;
+       uint32_t head = rxq->head;
+       struct nix_cqe_hdr_s *cq;
+       struct rte_mbuf *mbuf;
+       uint64_t sa_base = 0;
+       uintptr_t cpth = 0;
+
+       nb_pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
+
+       while (packets < nb_pkts) {
+               /* Prefetch N desc ahead */
+               rte_prefetch_non_temporal((void *)(desc + (CQE_SZ((head + 2) & 
qmask))));
+               cq = (struct nix_cqe_hdr_s *)(desc + CQE_SZ(head));
+
+               mbuf = nix_get_mbuf_from_cqe(cq, data_off);
+
+               /* Mark mempool obj as "get" as it is alloc'ed by NIX */
+               RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+               cn20k_nix_cqe_to_mbuf(cq, cq->tag, mbuf, lookup_mem, mbuf_init, 
cpth, sa_base,
+                                     flags);
+               cn20k_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & 
NIX_RX_OFFLOAD_TSTAMP_F),
+                                        (uint64_t *)((uint8_t *)mbuf + 
data_off));
+               rx_pkts[packets++] = mbuf;
+               roc_prefetch_store_keep(mbuf);
+               head++;
+               head &= qmask;
+       }
+
+       rxq->head = head;
+       rxq->available -= nb_pkts;
+
+       /* Free all the CQs that we've processed */
+       plt_write64((wdata | nb_pkts), rxq->cq_door);
+
+       return nb_pkts;
+}
+
+static __rte_always_inline uint16_t
+cn20k_nix_flush_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t 
pkts,
+                         const uint16_t flags)
+{
+       struct cn20k_eth_rxq *rxq = rx_queue;
+       const uint64_t mbuf_init = rxq->mbuf_initializer;
+       const void *lookup_mem = rxq->lookup_mem;
+       const uint64_t data_off = rxq->data_off;
+       const uint64_t wdata = rxq->wdata;
+       const uint32_t qmask = rxq->qmask;
+       const uintptr_t desc = rxq->desc;
+       uint16_t packets = 0, nb_pkts;
+       uint16_t lmt_id __rte_unused;
+       uint32_t head = rxq->head;
+       struct nix_cqe_hdr_s *cq;
+       struct rte_mbuf *mbuf;
+       uint64_t sa_base = 0;
+       uintptr_t cpth = 0;
+
+       nb_pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
+
+       while (packets < nb_pkts) {
+               /* Prefetch N desc ahead */
+               rte_prefetch_non_temporal((void *)(desc + (CQE_SZ((head + 2) & 
qmask))));
+               cq = (struct nix_cqe_hdr_s *)(desc + CQE_SZ(head));
+
+               mbuf = nix_get_mbuf_from_cqe(cq, data_off);
+
+               /* Mark mempool obj as "get" as it is alloc'ed by NIX */
+               RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+               cn20k_nix_cqe_to_mbuf(cq, cq->tag, mbuf, lookup_mem, mbuf_init, 
cpth, sa_base,
+                                     flags);
+               cn20k_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & 
NIX_RX_OFFLOAD_TSTAMP_F),
+                                        (uint64_t *)((uint8_t *)mbuf + 
data_off));
+               rx_pkts[packets++] = mbuf;
+               roc_prefetch_store_keep(mbuf);
+               head++;
+               head &= qmask;
+       }
+
+       rxq->head = head;
+       rxq->available -= nb_pkts;
+
+       /* Free all the CQs that we've processed */
+       plt_write64((wdata | nb_pkts), rxq->cq_door);
+
+       return nb_pkts;
+}
+
 #define RSS_F    NIX_RX_OFFLOAD_RSS_F
 #define PTYPE_F          NIX_RX_OFFLOAD_PTYPE_F
 #define CKSUM_F          NIX_RX_OFFLOAD_CHECKSUM_F
@@ -220,10 +609,7 @@ NIX_RX_FASTPATH_MODES
        uint16_t __rte_noinline __rte_hot fn(void *rx_queue, struct rte_mbuf 
**rx_pkts,            \
                                             uint16_t pkts)                     
                   \
        {                                                                       
                   \
-               RTE_SET_USED(rx_queue);                                         
                   \
-               RTE_SET_USED(rx_pkts);                                          
                   \
-               RTE_SET_USED(pkts);                                             
                   \
-               return 0;                                                       
                   \
+               return cn20k_nix_recv_pkts(rx_queue, rx_pkts, pkts, (flags));   
                   \
        }
 
 #define NIX_RX_RECV_MSEG(fn, flags) NIX_RX_RECV(fn, flags | NIX_RX_MULTI_SEG_F)
diff --git a/drivers/net/cnxk/cn20k_rx_select.c 
b/drivers/net/cnxk/cn20k_rx_select.c
index 82e06a62ef..25c79434cd 100644
--- a/drivers/net/cnxk/cn20k_rx_select.c
+++ b/drivers/net/cnxk/cn20k_rx_select.c
@@ -22,10 +22,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev, const 
eth_rx_burst_t rx_burst[NIX_RX_O
 static uint16_t __rte_noinline __rte_hot __rte_unused
 cn20k_nix_flush_rx(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)
 {
-       RTE_SET_USED(rx_queue);
-       RTE_SET_USED(rx_pkts);
-       RTE_SET_USED(pkts);
-       return 0;
+       const uint16_t flags = NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F | 
NIX_RX_OFFLOAD_SECURITY_F;
+       return cn20k_nix_flush_recv_pkts(rx_queue, rx_pkts, pkts, flags);
 }
 
 #if defined(RTE_ARCH_ARM64)
diff --git a/drivers/net/cnxk/cn20k_rxtx.h b/drivers/net/cnxk/cn20k_rxtx.h
index 5cc445d4b1..03eaf34d64 100644
--- a/drivers/net/cnxk/cn20k_rxtx.h
+++ b/drivers/net/cnxk/cn20k_rxtx.h
@@ -83,7 +83,163 @@ struct cn20k_eth_rxq {
        struct cnxk_timesync_info *tstamp;
 } __plt_cache_aligned;
 
+/* Private data in sw rsvd area of struct roc_ot_ipsec_inb_sa */
+struct cn20k_inb_priv_data {
+       void *userdata;
+       int reass_dynfield_off;
+       int reass_dynflag_bit;
+       struct cnxk_eth_sec_sess *eth_sec;
+};
+
+struct cn20k_sec_sess_priv {
+       union {
+               struct {
+                       uint32_t sa_idx;
+                       uint8_t inb_sa : 1;
+                       uint8_t outer_ip_ver : 1;
+                       uint8_t mode : 1;
+                       uint8_t roundup_byte : 5;
+                       uint8_t roundup_len;
+                       uint16_t partial_len : 10;
+                       uint16_t chksum : 2;
+                       uint16_t dec_ttl : 1;
+                       uint16_t nixtx_off : 1;
+                       uint16_t rsvd : 2;
+               };
+
+               uint64_t u64;
+       };
+} __rte_packed;
+
 #define LMT_OFF(lmt_addr, lmt_num, offset)                                     
                    \
        (void *)((uintptr_t)(lmt_addr) + ((uint64_t)(lmt_num) << 
ROC_LMT_LINE_SIZE_LOG2) + (offset))
 
+static inline uint16_t
+nix_tx_compl_nb_pkts(struct cn20k_eth_txq *txq, const uint64_t wdata, const 
uint32_t qmask)
+{
+       uint16_t available = txq->tx_compl.available;
+
+       /* Update the available count if cached value is not enough */
+       if (!unlikely(available)) {
+               uint64_t reg, head, tail;
+
+               /* Use LDADDA version to avoid reorder */
+               reg = roc_atomic64_add_sync(wdata, txq->tx_compl.cq_status);
+               /* CQ_OP_STATUS operation error */
+               if (reg & BIT_ULL(NIX_CQ_OP_STAT_OP_ERR) || reg & 
BIT_ULL(NIX_CQ_OP_STAT_CQ_ERR))
+                       return 0;
+
+               tail = reg & 0xFFFFF;
+               head = (reg >> 20) & 0xFFFFF;
+               if (tail < head)
+                       available = tail - head + qmask + 1;
+               else
+                       available = tail - head;
+
+               txq->tx_compl.available = available;
+       }
+       return available;
+}
+
+static inline void
+handle_tx_completion_pkts(struct cn20k_eth_txq *txq, uint8_t mt_safe)
+{
+#define CNXK_NIX_CQ_ENTRY_SZ 128
+#define CQE_SZ(x)           ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+
+       uint16_t tx_pkts = 0, nb_pkts;
+       const uintptr_t desc = txq->tx_compl.desc_base;
+       const uint64_t wdata = txq->tx_compl.wdata;
+       const uint32_t qmask = txq->tx_compl.qmask;
+       uint32_t head = txq->tx_compl.head;
+       struct nix_cqe_hdr_s *tx_compl_cq;
+       struct nix_send_comp_s *tx_compl_s0;
+       struct rte_mbuf *m_next, *m;
+
+       if (mt_safe)
+               rte_spinlock_lock(&txq->tx_compl.ext_buf_lock);
+
+       nb_pkts = nix_tx_compl_nb_pkts(txq, wdata, qmask);
+       while (tx_pkts < nb_pkts) {
+               rte_prefetch_non_temporal((void *)(desc + (CQE_SZ((head + 2) & 
qmask))));
+               tx_compl_cq = (struct nix_cqe_hdr_s *)(desc + CQE_SZ(head));
+               tx_compl_s0 = (struct nix_send_comp_s *)((uint64_t 
*)tx_compl_cq + 1);
+               m = txq->tx_compl.ptr[tx_compl_s0->sqe_id];
+               while (m->next != NULL) {
+                       m_next = m->next;
+                       rte_pktmbuf_free_seg(m);
+                       m = m_next;
+               }
+               rte_pktmbuf_free_seg(m);
+               txq->tx_compl.ptr[tx_compl_s0->sqe_id] = NULL;
+
+               head++;
+               head &= qmask;
+               tx_pkts++;
+       }
+       txq->tx_compl.head = head;
+       txq->tx_compl.available -= nb_pkts;
+
+       plt_write64((wdata | nb_pkts), txq->tx_compl.cq_door);
+
+       if (mt_safe)
+               rte_spinlock_unlock(&txq->tx_compl.ext_buf_lock);
+}
+
+static __rte_always_inline uint64_t
+cn20k_cpt_tx_steor_data(void)
+{
+       /* We have two CPT instructions per LMTLine TODO */
+       const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
+       uint64_t data;
+
+       /* This will be moved to addr area */
+       data = dw_m1 << 16;
+       data |= dw_m1 << 19;
+       data |= dw_m1 << 22;
+       data |= dw_m1 << 25;
+       data |= dw_m1 << 28;
+       data |= dw_m1 << 31;
+       data |= dw_m1 << 34;
+       data |= dw_m1 << 37;
+       data |= dw_m1 << 40;
+       data |= dw_m1 << 43;
+       data |= dw_m1 << 46;
+       data |= dw_m1 << 49;
+       data |= dw_m1 << 52;
+       data |= dw_m1 << 55;
+       data |= dw_m1 << 58;
+       data |= dw_m1 << 61;
+
+       return data;
+}
+
+static __rte_always_inline void
+cn20k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum, uint8_t 
loff, uint8_t shft)
+{
+       uint64_t data;
+       uintptr_t pa;
+
+       /* Check if there is any CPT instruction to submit */
+       if (!lnum && !loff)
+               return;
+
+       data = cn20k_cpt_tx_steor_data();
+       /* Update lmtline use for partial end line */
+       if (loff) {
+               data &= ~(0x7ULL << shft);
+               /* Update it to half full i.e 64B */
+               data |= (0x3UL << shft);
+       }
+
+       pa = io_addr | ((data >> 16) & 0x7) << 4;
+       data &= ~(0x7ULL << 16);
+       /* Update lines - 1 that contain valid data */
+       data |= ((uint64_t)(lnum + loff - 1)) << 12;
+       data |= (uint64_t)lmt_id;
+
+       /* STEOR */
+       roc_lmt_submit_steorl(data, pa);
+}
+
 #endif /* __CN20K_RXTX_H__ */
-- 
2.34.1

Reply via email to